- forward port to 4.0.9

diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/alpha/include/asm/atomic.h linux-4.0.9-pax/arch/alpha/include/asm/atomic.h
--- linux-4.0.9/arch/alpha/include/asm/atomic.h	2015-03-18 15:21:50.164349252 +0100
+++ linux-4.0.9-pax/arch/alpha/include/asm/atomic.h	2015-04-15 12:13:52.846318626 +0200
@@ -239,4 +239,14 @@ static inline long atomic64_dec_if_posit
 #define atomic_dec(v) atomic_sub(1,(v))
 #define atomic64_dec(v) atomic64_sub(1,(v))
 
+#define atomic64_read_unchecked(v)		atomic64_read(v)
+#define atomic64_set_unchecked(v, i)		atomic64_set((v), (i))
+#define atomic64_add_unchecked(a, v)		atomic64_add((a), (v))
+#define atomic64_add_return_unchecked(a, v)	atomic64_add_return((a), (v))
+#define atomic64_sub_unchecked(a, v)		atomic64_sub((a), (v))
+#define atomic64_inc_unchecked(v)		atomic64_inc(v)
+#define atomic64_inc_return_unchecked(v)	atomic64_inc_return(v)
+#define atomic64_dec_unchecked(v)		atomic64_dec(v)
+#define atomic64_cmpxchg_unchecked(v, o, n)	atomic64_cmpxchg((v), (o), (n))
+
 #endif /* _ALPHA_ATOMIC_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/alpha/include/asm/elf.h linux-4.0.9-pax/arch/alpha/include/asm/elf.h
--- linux-4.0.9/arch/alpha/include/asm/elf.h	2015-03-18 15:21:50.164349252 +0100
+++ linux-4.0.9-pax/arch/alpha/include/asm/elf.h	2015-04-15 12:13:52.846318626 +0200
@@ -91,6 +91,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N
 
 #define ELF_ET_DYN_BASE		(TASK_UNMAPPED_BASE + 0x1000000)
 
+#ifdef CONFIG_PAX_ASLR
+#define PAX_ELF_ET_DYN_BASE	(current->personality & ADDR_LIMIT_32BIT ? 0x10000 : 0x120000000UL)
+
+#define PAX_DELTA_MMAP_LEN	(current->personality & ADDR_LIMIT_32BIT ? 14 : 28)
+#define PAX_DELTA_STACK_LEN	(current->personality & ADDR_LIMIT_32BIT ? 14 : 19)
+#endif
+
 /* $0 is set by ld.so to a pointer to a function which might be 
    registered using atexit.  This provides a mean for the dynamic
    linker to call DT_FINI functions for shared libraries that have
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/alpha/include/asm/pgalloc.h linux-4.0.9-pax/arch/alpha/include/asm/pgalloc.h
--- linux-4.0.9/arch/alpha/include/asm/pgalloc.h	2015-03-18 15:21:50.164349252 +0100
+++ linux-4.0.9-pax/arch/alpha/include/asm/pgalloc.h	2015-04-15 12:13:52.846318626 +0200
@@ -29,6 +29,12 @@ pgd_populate(struct mm_struct *mm, pgd_t
 	pgd_set(pgd, pmd);
 }
 
+static inline void
+pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+{
+	pgd_populate(mm, pgd, pmd);
+}
+
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 
 static inline void
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/alpha/include/asm/pgtable.h linux-4.0.9-pax/arch/alpha/include/asm/pgtable.h
--- linux-4.0.9/arch/alpha/include/asm/pgtable.h	2015-04-13 11:20:46.062618304 +0200
+++ linux-4.0.9-pax/arch/alpha/include/asm/pgtable.h	2015-04-15 12:13:52.846318626 +0200
@@ -101,6 +101,17 @@ struct vm_area_struct;
 #define PAGE_SHARED	__pgprot(_PAGE_VALID | __ACCESS_BITS)
 #define PAGE_COPY	__pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW)
 #define PAGE_READONLY	__pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW)
+
+#ifdef CONFIG_PAX_PAGEEXEC
+# define PAGE_SHARED_NOEXEC	__pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOE)
+# define PAGE_COPY_NOEXEC	__pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE)
+# define PAGE_READONLY_NOEXEC	__pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE)
+#else
+# define PAGE_SHARED_NOEXEC	PAGE_SHARED
+# define PAGE_COPY_NOEXEC	PAGE_COPY
+# define PAGE_READONLY_NOEXEC	PAGE_READONLY
+#endif
+
 #define PAGE_KERNEL	__pgprot(_PAGE_VALID | _PAGE_ASM | _PAGE_KRE | _PAGE_KWE)
 
 #define _PAGE_NORMAL(x) __pgprot(_PAGE_VALID | __ACCESS_BITS | (x))
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/alpha/kernel/module.c linux-4.0.9-pax/arch/alpha/kernel/module.c
--- linux-4.0.9/arch/alpha/kernel/module.c	2015-03-18 15:21:50.164349252 +0100
+++ linux-4.0.9-pax/arch/alpha/kernel/module.c	2015-04-15 12:13:52.846318626 +0200
@@ -160,7 +160,7 @@ apply_relocate_add(Elf64_Shdr *sechdrs,
 
 	/* The small sections were sorted to the end of the segment.
 	   The following should definitely cover them.  */
-	gp = (u64)me->module_core + me->core_size - 0x8000;
+	gp = (u64)me->module_core_rw + me->core_size_rw - 0x8000;
 	got = sechdrs[me->arch.gotsecindex].sh_addr;
 
 	for (i = 0; i < n; i++) {
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/alpha/kernel/osf_sys.c linux-4.0.9-pax/arch/alpha/kernel/osf_sys.c
--- linux-4.0.9/arch/alpha/kernel/osf_sys.c	2015-03-18 15:21:50.164349252 +0100
+++ linux-4.0.9-pax/arch/alpha/kernel/osf_sys.c	2015-04-15 12:13:52.846318626 +0200
@@ -1339,6 +1339,10 @@ arch_get_unmapped_area(struct file *filp
 	   merely specific addresses, but regions of memory -- perhaps
 	   this feature should be incorporated into all ports?  */
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(current->mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	if (addr) {
 		addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit);
 		if (addr != (unsigned long) -ENOMEM)
@@ -1346,8 +1350,8 @@ arch_get_unmapped_area(struct file *filp
 	}
 
 	/* Next, try allocating at TASK_UNMAPPED_BASE.  */
-	addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE),
-					 len, limit);
+	addr = arch_get_unmapped_area_1 (PAGE_ALIGN(current->mm->mmap_base), len, limit);
+
 	if (addr != (unsigned long) -ENOMEM)
 		return addr;
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/alpha/mm/fault.c linux-4.0.9-pax/arch/alpha/mm/fault.c
--- linux-4.0.9/arch/alpha/mm/fault.c	2015-03-18 15:21:50.164349252 +0100
+++ linux-4.0.9-pax/arch/alpha/mm/fault.c	2015-04-15 12:13:52.846318626 +0200
@@ -53,6 +53,124 @@ __load_new_mm_context(struct mm_struct *
 	__reload_thread(pcb);
 }
 
+#ifdef CONFIG_PAX_PAGEEXEC
+/*
+ * PaX: decide what to do with offenders (regs->pc = fault address)
+ *
+ * returns 1 when task should be killed
+ *         2 when patched PLT trampoline was detected
+ *         3 when unpatched PLT trampoline was detected
+ */
+static int pax_handle_fetch_fault(struct pt_regs *regs)
+{
+
+#ifdef CONFIG_PAX_EMUPLT
+	int err;
+
+	do { /* PaX: patched PLT emulation #1 */
+		unsigned int ldah, ldq, jmp;
+
+		err = get_user(ldah, (unsigned int *)regs->pc);
+		err |= get_user(ldq, (unsigned int *)(regs->pc+4));
+		err |= get_user(jmp, (unsigned int *)(regs->pc+8));
+
+		if (err)
+			break;
+
+		if ((ldah & 0xFFFF0000U) == 0x277B0000U &&
+		    (ldq & 0xFFFF0000U) == 0xA77B0000U &&
+		    jmp == 0x6BFB0000U)
+		{
+			unsigned long r27, addr;
+			unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16;
+			unsigned long addrl = ldq | 0xFFFFFFFFFFFF0000UL;
+
+			addr = regs->r27 + ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL);
+			err = get_user(r27, (unsigned long *)addr);
+			if (err)
+				break;
+
+			regs->r27 = r27;
+			regs->pc = r27;
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: patched PLT emulation #2 */
+		unsigned int ldah, lda, br;
+
+		err = get_user(ldah, (unsigned int *)regs->pc);
+		err |= get_user(lda, (unsigned int *)(regs->pc+4));
+		err |= get_user(br, (unsigned int *)(regs->pc+8));
+
+		if (err)
+			break;
+
+		if ((ldah & 0xFFFF0000U) == 0x277B0000U &&
+		    (lda & 0xFFFF0000U) == 0xA77B0000U &&
+		    (br & 0xFFE00000U) == 0xC3E00000U)
+		{
+			unsigned long addr = br | 0xFFFFFFFFFFE00000UL;
+			unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16;
+			unsigned long addrl = lda | 0xFFFFFFFFFFFF0000UL;
+
+			regs->r27 += ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL);
+			regs->pc += 12 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2);
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: unpatched PLT emulation */
+		unsigned int br;
+
+		err = get_user(br, (unsigned int *)regs->pc);
+
+		if (!err && (br & 0xFFE00000U) == 0xC3800000U) {
+			unsigned int br2, ldq, nop, jmp;
+			unsigned long addr = br | 0xFFFFFFFFFFE00000UL, resolver;
+
+			addr = regs->pc + 4 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2);
+			err = get_user(br2, (unsigned int *)addr);
+			err |= get_user(ldq, (unsigned int *)(addr+4));
+			err |= get_user(nop, (unsigned int *)(addr+8));
+			err |= get_user(jmp, (unsigned int *)(addr+12));
+			err |= get_user(resolver, (unsigned long *)(addr+16));
+
+			if (err)
+				break;
+
+			if (br2 == 0xC3600000U &&
+			    ldq == 0xA77B000CU &&
+			    nop == 0x47FF041FU &&
+			    jmp == 0x6B7B0000U)
+			{
+				regs->r28 = regs->pc+4;
+				regs->r27 = addr+16;
+				regs->pc = resolver;
+				return 3;
+			}
+		}
+	} while (0);
+#endif
+
+	return 1;
+}
+
+void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
+{
+	unsigned long i;
+
+	printk(KERN_ERR "PAX: bytes at PC: ");
+	for (i = 0; i < 5; i++) {
+		unsigned int c;
+		if (get_user(c, (unsigned int *)pc+i))
+			printk(KERN_CONT "???????? ");
+		else
+			printk(KERN_CONT "%08x ", c);
+	}
+	printk("\n");
+}
+#endif
 
 /*
  * This routine handles page faults.  It determines the address,
@@ -133,8 +251,29 @@ retry:
  good_area:
 	si_code = SEGV_ACCERR;
 	if (cause < 0) {
-		if (!(vma->vm_flags & VM_EXEC))
+		if (!(vma->vm_flags & VM_EXEC)) {
+
+#ifdef CONFIG_PAX_PAGEEXEC
+			if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->pc)
+				goto bad_area;
+
+			up_read(&mm->mmap_sem);
+			switch (pax_handle_fetch_fault(regs)) {
+
+#ifdef CONFIG_PAX_EMUPLT
+			case 2:
+			case 3:
+				return;
+#endif
+
+			}
+			pax_report_fault(regs, (void *)regs->pc, (void *)rdusp());
+			do_group_exit(SIGKILL);
+#else
 			goto bad_area;
+#endif
+
+		}
 	} else if (!cause) {
 		/* Allow reads even for write-only mappings */
 		if (!(vma->vm_flags & (VM_READ | VM_WRITE)))
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/atomic.h linux-4.0.9-pax/arch/arm/include/asm/atomic.h
--- linux-4.0.9/arch/arm/include/asm/atomic.h	2015-03-18 15:21:50.164349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/atomic.h	2015-04-20 21:58:29.247698219 +0200
@@ -18,17 +18,41 @@
 #include <asm/barrier.h>
 #include <asm/cmpxchg.h>
 
+#ifdef CONFIG_GENERIC_ATOMIC64
+#include <asm-generic/atomic64.h>
+#endif
+
 #define ATOMIC_INIT(i)	{ (i) }
 
 #ifdef __KERNEL__
 
+#ifdef CONFIG_THUMB2_KERNEL
+#define REFCOUNT_TRAP_INSN "bkpt	0xf1"
+#else
+#define REFCOUNT_TRAP_INSN "bkpt	0xf103"
+#endif
+
+#define _ASM_EXTABLE(from, to)		\
+"	.pushsection __ex_table,\"a\"\n"\
+"	.align	3\n"			\
+"	.long	" #from ", " #to"\n"	\
+"	.popsection"
+
 /*
  * On ARM, ordinary assignment (str instruction) doesn't clear the local
  * strex/ldrex monitor on some implementations. The reason we can use it for
  * atomic_set() is the clrex or dummy strex done on every exception return.
  */
 #define atomic_read(v)	ACCESS_ONCE((v)->counter)
+static inline int atomic_read_unchecked(const atomic_unchecked_t *v)
+{
+	return ACCESS_ONCE(v->counter);
+}
 #define atomic_set(v,i)	(((v)->counter) = (i))
+static inline void atomic_set_unchecked(atomic_unchecked_t *v, int i)
+{
+	v->counter = i;
+}
 
 #if __LINUX_ARM_ARCH__ >= 6
 
@@ -38,26 +62,50 @@
  * to ensure that the update happens.
  */
 
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
+#ifdef CONFIG_PAX_REFCOUNT
+#define __OVERFLOW_POST			\
+	"	bvc	3f\n"		\
+	"2:	" REFCOUNT_TRAP_INSN "\n"\
+	"3:\n"
+#define __OVERFLOW_POST_RETURN		\
+	"	bvc	3f\n"		\
+"	mov	%0, %1\n"		\
+	"2:	" REFCOUNT_TRAP_INSN "\n"\
+	"3:\n"
+#define __OVERFLOW_EXTABLE		\
+	"4:\n"				\
+	_ASM_EXTABLE(2b, 4b)
+#else
+#define __OVERFLOW_POST
+#define __OVERFLOW_POST_RETURN
+#define __OVERFLOW_EXTABLE
+#endif
+
+#define __ATOMIC_OP(op, suffix, c_op, asm_op, post_op, extable)		\
+static inline void atomic_##op##suffix(int i, atomic##suffix##_t *v)	\
 {									\
 	unsigned long tmp;						\
 	int result;							\
 									\
 	prefetchw(&v->counter);						\
-	__asm__ __volatile__("@ atomic_" #op "\n"			\
+	__asm__ __volatile__("@ atomic_" #op #suffix "\n"		\
 "1:	ldrex	%0, [%3]\n"						\
 "	" #asm_op "	%0, %0, %4\n"					\
+	post_op								\
 "	strex	%1, %0, [%3]\n"						\
 "	teq	%1, #0\n"						\
-"	bne	1b"							\
+"	bne	1b\n"							\
+	extable								\
 	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)		\
 	: "r" (&v->counter), "Ir" (i)					\
 	: "cc");							\
 }									\
 
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+#define ATOMIC_OP(op, c_op, asm_op) __ATOMIC_OP(op, _unchecked, c_op, asm_op, , )\
+				    __ATOMIC_OP(op, , c_op, asm_op##s, __OVERFLOW_POST, __OVERFLOW_EXTABLE)
+
+#define __ATOMIC_OP_RETURN(op, suffix, c_op, asm_op, post_op, extable)	\
+static inline int atomic_##op##_return##suffix(int i, atomic##suffix##_t *v)\
 {									\
 	unsigned long tmp;						\
 	int result;							\
@@ -65,12 +113,14 @@ static inline int atomic_##op##_return(i
 	smp_mb();							\
 	prefetchw(&v->counter);						\
 									\
-	__asm__ __volatile__("@ atomic_" #op "_return\n"		\
+	__asm__ __volatile__("@ atomic_" #op "_return" #suffix "\n"	\
 "1:	ldrex	%0, [%3]\n"						\
 "	" #asm_op "	%0, %0, %4\n"					\
+	post_op								\
 "	strex	%1, %0, [%3]\n"						\
 "	teq	%1, #0\n"						\
-"	bne	1b"							\
+"	bne	1b\n"							\
+	extable								\
 	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)		\
 	: "r" (&v->counter), "Ir" (i)					\
 	: "cc");							\
@@ -80,6 +130,9 @@ static inline int atomic_##op##_return(i
 	return result;							\
 }
 
+#define ATOMIC_OP_RETURN(op, c_op, asm_op) __ATOMIC_OP_RETURN(op, _unchecked, c_op, asm_op, , )\
+					   __ATOMIC_OP_RETURN(op, , c_op, asm_op##s, __OVERFLOW_POST_RETURN, __OVERFLOW_EXTABLE)
+
 static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
 {
 	int oldval;
@@ -115,12 +168,24 @@ static inline int __atomic_add_unless(at
 	__asm__ __volatile__ ("@ atomic_add_unless\n"
 "1:	ldrex	%0, [%4]\n"
 "	teq	%0, %5\n"
-"	beq	2f\n"
-"	add	%1, %0, %6\n"
+"	beq	4f\n"
+"	adds	%1, %0, %6\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+"	bvc	3f\n"
+"2:	" REFCOUNT_TRAP_INSN "\n"
+"3:\n"
+#endif
+
 "	strex	%2, %1, [%4]\n"
 "	teq	%2, #0\n"
 "	bne	1b\n"
-"2:"
+"4:"
+
+#ifdef CONFIG_PAX_REFCOUNT
+	_ASM_EXTABLE(2b, 4b)
+#endif
+
 	: "=&r" (oldval), "=&r" (newval), "=&r" (tmp), "+Qo" (v->counter)
 	: "r" (&v->counter), "r" (u), "r" (a)
 	: "cc");
@@ -131,14 +196,36 @@ static inline int __atomic_add_unless(at
 	return oldval;
 }
 
+static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *ptr, int old, int new)
+{
+	unsigned long oldval, res;
+
+	smp_mb();
+
+	do {
+		__asm__ __volatile__("@ atomic_cmpxchg_unchecked\n"
+		"ldrex	%1, [%3]\n"
+		"mov	%0, #0\n"
+		"teq	%1, %4\n"
+		"strexeq %0, %5, [%3]\n"
+		    : "=&r" (res), "=&r" (oldval), "+Qo" (ptr->counter)
+		    : "r" (&ptr->counter), "Ir" (old), "r" (new)
+		    : "cc");
+	} while (res);
+
+	smp_mb();
+
+	return oldval;
+}
+
 #else /* ARM_ARCH_6 */
 
 #ifdef CONFIG_SMP
 #error SMP not supported on pre-ARMv6 CPUs
 #endif
 
-#define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
+#define __ATOMIC_OP(op, suffix, c_op, asm_op)				\
+static inline void atomic_##op##suffix(int i, atomic##suffix##_t *v)	\
 {									\
 	unsigned long flags;						\
 									\
@@ -147,8 +234,11 @@ static inline void atomic_##op(int i, at
 	raw_local_irq_restore(flags);					\
 }									\
 
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+#define ATOMIC_OP(op, c_op, asm_op) __ATOMIC_OP(op, , c_op, asm_op)	\
+				    __ATOMIC_OP(op, _unchecked, c_op, asm_op)
+
+#define __ATOMIC_OP_RETURN(op, suffix, c_op, asm_op)			\
+static inline int atomic_##op##_return##suffix(int i, atomic##suffix##_t *v)\
 {									\
 	unsigned long flags;						\
 	int val;							\
@@ -161,6 +251,9 @@ static inline int atomic_##op##_return(i
 	return val;							\
 }
 
+#define ATOMIC_OP_RETURN(op, c_op, asm_op) __ATOMIC_OP_RETURN(op, , c_op, asm_op)\
+					   __ATOMIC_OP_RETURN(op, _unchecked, c_op, asm_op)
+
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
@@ -175,6 +268,11 @@ static inline int atomic_cmpxchg(atomic_
 	return ret;
 }
 
+static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old, int new)
+{
+	return atomic_cmpxchg((atomic_t *)v, old, new);
+}
+
 static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
@@ -196,16 +294,38 @@ ATOMIC_OPS(sub, -=, sub)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
+#undef __ATOMIC_OP_RETURN
 #undef ATOMIC_OP
+#undef __ATOMIC_OP
 
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+static inline int atomic_xchg_unchecked(atomic_unchecked_t *v, int new)
+{
+	return xchg(&v->counter, new);
+}
 
 #define atomic_inc(v)		atomic_add(1, v)
+static inline void atomic_inc_unchecked(atomic_unchecked_t *v)
+{
+	atomic_add_unchecked(1, v);
+}
 #define atomic_dec(v)		atomic_sub(1, v)
+static inline void atomic_dec_unchecked(atomic_unchecked_t *v)
+{
+	atomic_sub_unchecked(1, v);
+}
 
 #define atomic_inc_and_test(v)	(atomic_add_return(1, v) == 0)
+static inline int atomic_inc_and_test_unchecked(atomic_unchecked_t *v)
+{
+	return atomic_add_return_unchecked(1, v) == 0;
+}
 #define atomic_dec_and_test(v)	(atomic_sub_return(1, v) == 0)
 #define atomic_inc_return(v)    (atomic_add_return(1, v))
+static inline int atomic_inc_return_unchecked(atomic_unchecked_t *v)
+{
+	return atomic_add_return_unchecked(1, v);
+}
 #define atomic_dec_return(v)    (atomic_sub_return(1, v))
 #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
 
@@ -216,6 +336,14 @@ typedef struct {
 	long long counter;
 } atomic64_t;
 
+#ifdef CONFIG_PAX_REFCOUNT
+typedef struct {
+	long long counter;
+} atomic64_unchecked_t;
+#else
+typedef atomic64_t atomic64_unchecked_t;
+#endif
+
 #define ATOMIC64_INIT(i) { (i) }
 
 #ifdef CONFIG_ARM_LPAE
@@ -232,6 +360,19 @@ static inline long long atomic64_read(co
 	return result;
 }
 
+static inline long long atomic64_read_unchecked(const atomic64_unchecked_t *v)
+{
+	long long result;
+
+	__asm__ __volatile__("@ atomic64_read_unchecked\n"
+"	ldrd	%0, %H0, [%1]"
+	: "=&r" (result)
+	: "r" (&v->counter), "Qo" (v->counter)
+	);
+
+	return result;
+}
+
 static inline void atomic64_set(atomic64_t *v, long long i)
 {
 	__asm__ __volatile__("@ atomic64_set\n"
@@ -240,6 +381,15 @@ static inline void atomic64_set(atomic64
 	: "r" (&v->counter), "r" (i)
 	);
 }
+
+static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long long i)
+{
+	__asm__ __volatile__("@ atomic64_set_unchecked\n"
+"	strd	%2, %H2, [%1]"
+	: "=Qo" (v->counter)
+	: "r" (&v->counter), "r" (i)
+	);
+}
 #else
 static inline long long atomic64_read(const atomic64_t *v)
 {
@@ -254,6 +404,19 @@ static inline long long atomic64_read(co
 	return result;
 }
 
+static inline long long atomic64_read_unchecked(const atomic64_unchecked_t *v)
+{
+	long long result;
+
+	__asm__ __volatile__("@ atomic64_read_unchecked\n"
+"	ldrexd	%0, %H0, [%1]"
+	: "=&r" (result)
+	: "r" (&v->counter), "Qo" (v->counter)
+	);
+
+	return result;
+}
+
 static inline void atomic64_set(atomic64_t *v, long long i)
 {
 	long long tmp;
@@ -268,29 +431,57 @@ static inline void atomic64_set(atomic64
 	: "r" (&v->counter), "r" (i)
 	: "cc");
 }
+
+static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long long i)
+{
+	long long tmp;
+
+	prefetchw(&v->counter);
+	__asm__ __volatile__("@ atomic64_set_unchecked\n"
+"1:	ldrexd	%0, %H0, [%2]\n"
+"	strexd	%0, %3, %H3, [%2]\n"
+"	teq	%0, #0\n"
+"	bne	1b"
+	: "=&r" (tmp), "=Qo" (v->counter)
+	: "r" (&v->counter), "r" (i)
+	: "cc");
+}
 #endif
 
-#define ATOMIC64_OP(op, op1, op2)					\
-static inline void atomic64_##op(long long i, atomic64_t *v)		\
+#undef __OVERFLOW_POST_RETURN
+#define __OVERFLOW_POST_RETURN		\
+	"	bvc	3f\n"		\
+"	mov	%0, %1\n"		\
+"	mov	%H0, %H1\n"		\
+	"2:	" REFCOUNT_TRAP_INSN "\n"\
+	"3:\n"
+
+#define __ATOMIC64_OP(op, suffix, op1, op2, post_op, extable)		\
+static inline void atomic64_##op##suffix(long long i, atomic64##suffix##_t *v)\
 {									\
 	long long result;						\
 	unsigned long tmp;						\
 									\
 	prefetchw(&v->counter);						\
-	__asm__ __volatile__("@ atomic64_" #op "\n"			\
+	__asm__ __volatile__("@ atomic64_" #op #suffix "\n"		\
 "1:	ldrexd	%0, %H0, [%3]\n"					\
 "	" #op1 " %Q0, %Q0, %Q4\n"					\
 "	" #op2 " %R0, %R0, %R4\n"					\
+	post_op								\
 "	strexd	%1, %0, %H0, [%3]\n"					\
 "	teq	%1, #0\n"						\
-"	bne	1b"							\
+"	bne	1b\n"							\
+	extable								\
 	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)		\
 	: "r" (&v->counter), "r" (i)					\
 	: "cc");							\
 }									\
 
-#define ATOMIC64_OP_RETURN(op, op1, op2)				\
-static inline long long atomic64_##op##_return(long long i, atomic64_t *v) \
+#define ATOMIC64_OP(op, op1, op2) __ATOMIC64_OP(op, _unchecked, op1, op2, , ) \
+				  __ATOMIC64_OP(op, , op1, op2##s, __OVERFLOW_POST, __OVERFLOW_EXTABLE)
+
+#define __ATOMIC64_OP_RETURN(op, suffix, op1, op2, post_op, extable)	\
+static inline long long atomic64_##op##_return##suffix(long long i, atomic64##suffix##_t *v) \
 {									\
 	long long result;						\
 	unsigned long tmp;						\
@@ -298,13 +489,15 @@ static inline long long atomic64_##op##_
 	smp_mb();							\
 	prefetchw(&v->counter);						\
 									\
-	__asm__ __volatile__("@ atomic64_" #op "_return\n"		\
+	__asm__ __volatile__("@ atomic64_" #op "_return" #suffix "\n"	\
 "1:	ldrexd	%0, %H0, [%3]\n"					\
 "	" #op1 " %Q0, %Q0, %Q4\n"					\
 "	" #op2 " %R0, %R0, %R4\n"					\
+	post_op								\
 "	strexd	%1, %0, %H0, [%3]\n"					\
 "	teq	%1, #0\n"						\
-"	bne	1b"							\
+"	bne	1b\n"							\
+	extable								\
 	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)		\
 	: "r" (&v->counter), "r" (i)					\
 	: "cc");							\
@@ -314,6 +507,9 @@ static inline long long atomic64_##op##_
 	return result;							\
 }
 
+#define ATOMIC64_OP_RETURN(op, op1, op2) __ATOMIC64_OP_RETURN(op, _unchecked, op1, op2, , ) \
+					 __ATOMIC64_OP_RETURN(op, , op1, op2##s, __OVERFLOW_POST_RETURN, __OVERFLOW_EXTABLE)
+
 #define ATOMIC64_OPS(op, op1, op2)					\
 	ATOMIC64_OP(op, op1, op2)					\
 	ATOMIC64_OP_RETURN(op, op1, op2)
@@ -323,7 +519,12 @@ ATOMIC64_OPS(sub, subs, sbc)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
+#undef __ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
+#undef __ATOMIC64_OP
+#undef __OVERFLOW_EXTABLE
+#undef __OVERFLOW_POST_RETURN
+#undef __OVERFLOW_POST
 
 static inline long long atomic64_cmpxchg(atomic64_t *ptr, long long old,
 					long long new)
@@ -351,6 +552,31 @@ static inline long long atomic64_cmpxchg
 	return oldval;
 }
 
+static inline long long atomic64_cmpxchg_unchecked(atomic64_unchecked_t *ptr, long long old,
+					long long new)
+{
+	long long oldval;
+	unsigned long res;
+
+	smp_mb();
+
+	do {
+		__asm__ __volatile__("@ atomic64_cmpxchg_unchecked\n"
+		"ldrexd		%1, %H1, [%3]\n"
+		"mov		%0, #0\n"
+		"teq		%1, %4\n"
+		"teqeq		%H1, %H4\n"
+		"strexdeq	%0, %5, %H5, [%3]"
+		: "=&r" (res), "=&r" (oldval), "+Qo" (ptr->counter)
+		: "r" (&ptr->counter), "r" (old), "r" (new)
+		: "cc");
+	} while (res);
+
+	smp_mb();
+
+	return oldval;
+}
+
 static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
 {
 	long long result;
@@ -376,21 +602,35 @@ static inline long long atomic64_xchg(at
 static inline long long atomic64_dec_if_positive(atomic64_t *v)
 {
 	long long result;
-	unsigned long tmp;
+	u64 tmp;
 
 	smp_mb();
 	prefetchw(&v->counter);
 
 	__asm__ __volatile__("@ atomic64_dec_if_positive\n"
-"1:	ldrexd	%0, %H0, [%3]\n"
-"	subs	%Q0, %Q0, #1\n"
-"	sbc	%R0, %R0, #0\n"
+"1:	ldrexd	%1, %H1, [%3]\n"
+"	subs	%Q0, %Q1, #1\n"
+"	sbcs	%R0, %R1, #0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+"	bvc	3f\n"
+"	mov	%Q0, %Q1\n"
+"	mov	%R0, %R1\n"
+"2:	" REFCOUNT_TRAP_INSN "\n"
+"3:\n"
+#endif
+
 "	teq	%R0, #0\n"
-"	bmi	2f\n"
+"	bmi	4f\n"
 "	strexd	%1, %0, %H0, [%3]\n"
 "	teq	%1, #0\n"
 "	bne	1b\n"
-"2:"
+"4:\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+	_ASM_EXTABLE(2b, 4b)
+#endif
+
 	: "=&r" (result), "=&r" (tmp), "+Qo" (v->counter)
 	: "r" (&v->counter)
 	: "cc");
@@ -414,13 +654,25 @@ static inline int atomic64_add_unless(at
 "	teq	%0, %5\n"
 "	teqeq	%H0, %H5\n"
 "	moveq	%1, #0\n"
-"	beq	2f\n"
+"	beq	4f\n"
 "	adds	%Q0, %Q0, %Q6\n"
-"	adc	%R0, %R0, %R6\n"
+"	adcs	%R0, %R0, %R6\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+"	bvc	3f\n"
+"2:	" REFCOUNT_TRAP_INSN "\n"
+"3:\n"
+#endif
+
 "	strexd	%2, %0, %H0, [%4]\n"
 "	teq	%2, #0\n"
 "	bne	1b\n"
-"2:"
+"4:\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+	_ASM_EXTABLE(2b, 4b)
+#endif
+
 	: "=&r" (val), "+r" (ret), "=&r" (tmp), "+Qo" (v->counter)
 	: "r" (&v->counter), "r" (u), "r" (a)
 	: "cc");
@@ -433,10 +685,13 @@ static inline int atomic64_add_unless(at
 
 #define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
 #define atomic64_inc(v)			atomic64_add(1LL, (v))
+#define atomic64_inc_unchecked(v)	atomic64_add_unchecked(1LL, (v))
 #define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
+#define atomic64_inc_return_unchecked(v)	atomic64_add_return_unchecked(1LL, (v))
 #define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
 #define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
+#define atomic64_dec_unchecked(v)	atomic64_sub_unchecked(1LL, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/barrier.h linux-4.0.9-pax/arch/arm/include/asm/barrier.h
--- linux-4.0.9/arch/arm/include/asm/barrier.h	2015-03-18 15:21:50.164349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/barrier.h	2015-04-15 12:13:52.846318626 +0200
@@ -67,7 +67,7 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	ACCESS_ONCE_RW(*p) = (v);					\
 } while (0)
 
 #define smp_load_acquire(p)						\
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/cacheflush.h linux-4.0.9-pax/arch/arm/include/asm/cacheflush.h
--- linux-4.0.9/arch/arm/include/asm/cacheflush.h	2015-03-18 15:21:50.164349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/cacheflush.h	2015-04-15 12:13:52.846318626 +0200
@@ -116,7 +116,7 @@ struct cpu_cache_fns {
 	void (*dma_unmap_area)(const void *, size_t, int);
 
 	void (*dma_flush_range)(const void *, const void *);
-};
+} __no_const;
 
 /*
  * Select the calling method
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/cache.h linux-4.0.9-pax/arch/arm/include/asm/cache.h
--- linux-4.0.9/arch/arm/include/asm/cache.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/cache.h	2015-04-15 12:13:52.846318626 +0200
@@ -4,8 +4,10 @@
 #ifndef __ASMARM_CACHE_H
 #define __ASMARM_CACHE_H
 
+#include <linux/const.h>
+
 #define L1_CACHE_SHIFT		CONFIG_ARM_L1_CACHE_SHIFT
-#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+#define L1_CACHE_BYTES		(_AC(1,UL) << L1_CACHE_SHIFT)
 
 /*
  * Memory returned by kmalloc() may be used for DMA, so we must make
@@ -24,5 +26,6 @@
 #endif
 
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
+#define __read_only __attribute__ ((__section__(".data..read_only")))
 
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/checksum.h linux-4.0.9-pax/arch/arm/include/asm/checksum.h
--- linux-4.0.9/arch/arm/include/asm/checksum.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/checksum.h	2015-04-15 12:13:52.846318626 +0200
@@ -37,7 +37,19 @@ __wsum
 csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum);
 
 __wsum
-csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr);
+__csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr);
+
+static inline __wsum
+csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr)
+{
+	__wsum ret;
+	pax_open_userland();
+	ret = __csum_partial_copy_from_user(src, dst, len, sum, err_ptr);
+	pax_close_userland();
+	return ret;
+}
+
+
 
 /*
  * 	Fold a partial checksum without adding pseudo headers
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/cmpxchg.h linux-4.0.9-pax/arch/arm/include/asm/cmpxchg.h
--- linux-4.0.9/arch/arm/include/asm/cmpxchg.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/cmpxchg.h	2015-04-15 12:13:52.846318626 +0200
@@ -104,6 +104,8 @@ static inline unsigned long __xchg(unsig
 
 #define xchg(ptr,x) \
 	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+#define xchg_unchecked(ptr,x) \
+	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
 
 #include <asm-generic/cmpxchg-local.h>
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/domain.h linux-4.0.9-pax/arch/arm/include/asm/domain.h
--- linux-4.0.9/arch/arm/include/asm/domain.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/domain.h	2015-04-15 12:13:52.846318626 +0200
@@ -48,18 +48,37 @@
  * Domain types
  */
 #define DOMAIN_NOACCESS	0
-#define DOMAIN_CLIENT	1
 #ifdef CONFIG_CPU_USE_DOMAINS
+#define DOMAIN_USERCLIENT	1
+#define DOMAIN_KERNELCLIENT	1
 #define DOMAIN_MANAGER	3
+#define DOMAIN_VECTORS		DOMAIN_USER
+#else
+
+#ifdef CONFIG_PAX_KERNEXEC
+#define DOMAIN_MANAGER	1
+#define DOMAIN_KERNEXEC	3
 #else
 #define DOMAIN_MANAGER	1
 #endif
 
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+#define DOMAIN_USERCLIENT	0
+#define DOMAIN_UDEREF		1
+#define DOMAIN_VECTORS		DOMAIN_KERNEL
+#else
+#define DOMAIN_USERCLIENT	1
+#define DOMAIN_VECTORS		DOMAIN_USER
+#endif
+#define DOMAIN_KERNELCLIENT	1
+
+#endif
+
 #define domain_val(dom,type)	((type) << (2*(dom)))
 
 #ifndef __ASSEMBLY__
 
-#ifdef CONFIG_CPU_USE_DOMAINS
+#if defined(CONFIG_CPU_USE_DOMAINS) || defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
 static inline void set_domain(unsigned val)
 {
 	asm volatile(
@@ -68,15 +87,7 @@ static inline void set_domain(unsigned v
 	isb();
 }
 
-#define modify_domain(dom,type)					\
-	do {							\
-	struct thread_info *thread = current_thread_info();	\
-	unsigned int domain = thread->cpu_domain;		\
-	domain &= ~domain_val(dom, DOMAIN_MANAGER);		\
-	thread->cpu_domain = domain | domain_val(dom, type);	\
-	set_domain(thread->cpu_domain);				\
-	} while (0)
-
+extern void modify_domain(unsigned int dom, unsigned int type);
 #else
 static inline void set_domain(unsigned val) { }
 static inline void modify_domain(unsigned dom, unsigned type)	{ }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/elf.h linux-4.0.9-pax/arch/arm/include/asm/elf.h
--- linux-4.0.9/arch/arm/include/asm/elf.h	2015-05-07 02:10:36.152265444 +0200
+++ linux-4.0.9-pax/arch/arm/include/asm/elf.h	2015-04-15 12:13:52.846318626 +0200
@@ -115,7 +115,14 @@ int dump_task_regs(struct task_struct *t
    the loader.  We need to make sure that it is out of the way of the program
    that it will "exec", and that there is sufficient room for the brk.  */
 
-#define ELF_ET_DYN_BASE	(TASK_SIZE / 3 * 2)
+#define ELF_ET_DYN_BASE		(TASK_SIZE / 3 * 2)
+
+#ifdef CONFIG_PAX_ASLR
+#define PAX_ELF_ET_DYN_BASE	0x00008000UL
+
+#define PAX_DELTA_MMAP_LEN	((current->personality == PER_LINUX_32BIT) ? 16 : 10)
+#define PAX_DELTA_STACK_LEN	((current->personality == PER_LINUX_32BIT) ? 16 : 10)
+#endif
 
 /* When the program starts, a1 contains a pointer to a function to be 
    registered with atexit, as per the SVR4 ABI.  A value of 0 means we 
@@ -125,10 +132,6 @@ int dump_task_regs(struct task_struct *t
 extern void elf_set_personality(const struct elf32_hdr *);
 #define SET_PERSONALITY(ex)	elf_set_personality(&(ex))
 
-struct mm_struct;
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
 #ifdef CONFIG_MMU
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 struct linux_binprm;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/fncpy.h linux-4.0.9-pax/arch/arm/include/asm/fncpy.h
--- linux-4.0.9/arch/arm/include/asm/fncpy.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/fncpy.h	2015-04-15 12:13:52.846318626 +0200
@@ -81,7 +81,9 @@
 	BUG_ON((uintptr_t)(dest_buf) & (FNCPY_ALIGN - 1) ||		\
 		(__funcp_address & ~(uintptr_t)1 & (FNCPY_ALIGN - 1)));	\
 									\
+	pax_open_kernel();						\
 	memcpy(dest_buf, (void const *)(__funcp_address & ~1), size);	\
+	pax_close_kernel();						\
 	flush_icache_range((unsigned long)(dest_buf),			\
 		(unsigned long)(dest_buf) + (size));			\
 									\
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/futex.h linux-4.0.9-pax/arch/arm/include/asm/futex.h
--- linux-4.0.9/arch/arm/include/asm/futex.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/futex.h	2015-04-15 12:13:52.846318626 +0200
@@ -46,6 +46,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
 		return -EFAULT;
 
+	pax_open_userland();
+
 	smp_mb();
 	/* Prefetching cannot fault */
 	prefetchw(uaddr);
@@ -63,6 +65,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
 	: "cc", "memory");
 	smp_mb();
 
+	pax_close_userland();
+
 	*uval = val;
 	return ret;
 }
@@ -93,6 +97,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
 		return -EFAULT;
 
+	pax_open_userland();
+
 	__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
 	"1:	" TUSER(ldr) "	%1, [%4]\n"
 	"	teq	%1, %2\n"
@@ -103,6 +109,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
 	: "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT)
 	: "cc", "memory");
 
+	pax_close_userland();
+
 	*uval = val;
 	return ret;
 }
@@ -125,6 +133,7 @@ futex_atomic_op_inuser (int encoded_op,
 		return -EFAULT;
 
 	pagefault_disable();	/* implies preempt_disable() */
+	pax_open_userland();
 
 	switch (op) {
 	case FUTEX_OP_SET:
@@ -146,6 +155,7 @@ futex_atomic_op_inuser (int encoded_op,
 		ret = -ENOSYS;
 	}
 
+	pax_close_userland();
 	pagefault_enable();	/* subsumes preempt_enable() */
 
 	if (!ret) {
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/kmap_types.h linux-4.0.9-pax/arch/arm/include/asm/kmap_types.h
--- linux-4.0.9/arch/arm/include/asm/kmap_types.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/kmap_types.h	2015-04-15 12:13:52.846318626 +0200
@@ -4,6 +4,6 @@
 /*
  * This is the "bare minimum".  AIO seems to require this.
  */
-#define KM_TYPE_NR 16
+#define KM_TYPE_NR 17
 
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/mach/dma.h linux-4.0.9-pax/arch/arm/include/asm/mach/dma.h
--- linux-4.0.9/arch/arm/include/asm/mach/dma.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/mach/dma.h	2015-04-15 12:13:52.846318626 +0200
@@ -22,7 +22,7 @@ struct dma_ops {
 	int	(*residue)(unsigned int, dma_t *);		/* optional */
 	int	(*setspeed)(unsigned int, dma_t *, int);	/* optional */
 	const char *type;
-};
+} __do_const;
 
 struct dma_struct {
 	void		*addr;		/* single DMA address		*/
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/mach/map.h linux-4.0.9-pax/arch/arm/include/asm/mach/map.h
--- linux-4.0.9/arch/arm/include/asm/mach/map.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/mach/map.h	2015-04-15 12:13:52.846318626 +0200
@@ -23,17 +23,19 @@ struct map_desc {
 
 /* types 0-3 are defined in asm/io.h */
 enum {
-	MT_UNCACHED = 4,
-	MT_CACHECLEAN,
-	MT_MINICLEAN,
+	MT_UNCACHED_RW = 4,
+	MT_CACHECLEAN_RO,
+	MT_MINICLEAN_RO,
 	MT_LOW_VECTORS,
 	MT_HIGH_VECTORS,
-	MT_MEMORY_RWX,
+	__MT_MEMORY_RWX,
 	MT_MEMORY_RW,
-	MT_ROM,
-	MT_MEMORY_RWX_NONCACHED,
+	MT_MEMORY_RX,
+	MT_ROM_RX,
+	MT_MEMORY_RW_NONCACHED,
+	MT_MEMORY_RX_NONCACHED,
 	MT_MEMORY_RW_DTCM,
-	MT_MEMORY_RWX_ITCM,
+	MT_MEMORY_RX_ITCM,
 	MT_MEMORY_RW_SO,
 	MT_MEMORY_DMA_READY,
 };
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/outercache.h linux-4.0.9-pax/arch/arm/include/asm/outercache.h
--- linux-4.0.9/arch/arm/include/asm/outercache.h	2015-04-13 11:20:50.386618073 +0200
+++ linux-4.0.9-pax/arch/arm/include/asm/outercache.h	2015-04-15 12:13:52.850318625 +0200
@@ -39,7 +39,7 @@ struct outer_cache_fns {
 	/* This is an ARM L2C thing */
 	void (*write_sec)(unsigned long, unsigned);
 	void (*configure)(const struct l2x0_regs *);
-};
+} __no_const;
 
 extern struct outer_cache_fns outer_cache;
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/page.h linux-4.0.9-pax/arch/arm/include/asm/page.h
--- linux-4.0.9/arch/arm/include/asm/page.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/page.h	2015-04-15 12:13:52.850318625 +0200
@@ -23,6 +23,7 @@
 
 #else
 
+#include <linux/compiler.h>
 #include <asm/glue.h>
 
 /*
@@ -114,7 +115,7 @@ struct cpu_user_fns {
 	void (*cpu_clear_user_highpage)(struct page *page, unsigned long vaddr);
 	void (*cpu_copy_user_highpage)(struct page *to, struct page *from,
 			unsigned long vaddr, struct vm_area_struct *vma);
-};
+} __no_const;
 
 #ifdef MULTI_USER
 extern struct cpu_user_fns cpu_user;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/pgalloc.h linux-4.0.9-pax/arch/arm/include/asm/pgalloc.h
--- linux-4.0.9/arch/arm/include/asm/pgalloc.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/pgalloc.h	2015-04-15 12:13:52.850318625 +0200
@@ -17,6 +17,7 @@
 #include <asm/processor.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
+#include <asm/system_info.h>
 
 #define check_pgt_cache()		do { } while (0)
 
@@ -43,6 +44,11 @@ static inline void pud_populate(struct m
 	set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE));
 }
 
+static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	pud_populate(mm, pud, pmd);
+}
+
 #else	/* !CONFIG_ARM_LPAE */
 
 /*
@@ -51,6 +57,7 @@ static inline void pud_populate(struct m
 #define pmd_alloc_one(mm,addr)		({ BUG(); ((pmd_t *)2); })
 #define pmd_free(mm, pmd)		do { } while (0)
 #define pud_populate(mm,pmd,pte)	BUG()
+#define pud_populate_kernel(mm,pmd,pte)	BUG()
 
 #endif	/* CONFIG_ARM_LPAE */
 
@@ -128,6 +135,19 @@ static inline void pte_free(struct mm_st
 	__free_page(pte);
 }
 
+static inline void __section_update(pmd_t *pmdp, unsigned long addr, pmdval_t prot)
+{
+#ifdef CONFIG_ARM_LPAE
+	pmdp[0] = __pmd(pmd_val(pmdp[0]) | prot);
+#else
+	if (addr & SECTION_SIZE)
+		pmdp[1] = __pmd(pmd_val(pmdp[1]) | prot);
+	else
+		pmdp[0] = __pmd(pmd_val(pmdp[0]) | prot);
+#endif
+	flush_pmd_entry(pmdp);
+}
+
 static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
 				  pmdval_t prot)
 {
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/pgtable-2level.h linux-4.0.9-pax/arch/arm/include/asm/pgtable-2level.h
--- linux-4.0.9/arch/arm/include/asm/pgtable-2level.h	2015-04-13 11:20:50.398618073 +0200
+++ linux-4.0.9-pax/arch/arm/include/asm/pgtable-2level.h	2015-04-15 12:13:52.850318625 +0200
@@ -127,6 +127,9 @@
 #define L_PTE_SHARED		(_AT(pteval_t, 1) << 10)	/* shared(v6), coherent(xsc3) */
 #define L_PTE_NONE		(_AT(pteval_t, 1) << 11)
 
+/* Two-level page tables only have PXN in the PGD, not in the PTE. */
+#define L_PTE_PXN		(_AT(pteval_t, 0))
+
 /*
  * These are the memory types, defined to be compatible with
  * pre-ARMv6 CPUs cacheable and bufferable bits:   XXCB
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/pgtable-2level-hwdef.h linux-4.0.9-pax/arch/arm/include/asm/pgtable-2level-hwdef.h
--- linux-4.0.9/arch/arm/include/asm/pgtable-2level-hwdef.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/pgtable-2level-hwdef.h	2015-04-15 12:13:52.850318625 +0200
@@ -27,7 +27,7 @@
 /*
  *   - section
  */
-#define PMD_SECT_PXN    (_AT(pmdval_t, 1) << 0)     /* v7 */
+#define PMD_SECT_PXN		(_AT(pmdval_t, 1) << 0)     /* v7 */
 #define PMD_SECT_BUFFERABLE	(_AT(pmdval_t, 1) << 2)
 #define PMD_SECT_CACHEABLE	(_AT(pmdval_t, 1) << 3)
 #define PMD_SECT_XN		(_AT(pmdval_t, 1) << 4)		/* v6 */
@@ -39,6 +39,7 @@
 #define PMD_SECT_nG		(_AT(pmdval_t, 1) << 17)	/* v6 */
 #define PMD_SECT_SUPER		(_AT(pmdval_t, 1) << 18)	/* v6 */
 #define PMD_SECT_AF		(_AT(pmdval_t, 0))
+#define PMD_SECT_RDONLY		(_AT(pmdval_t, 0))
 
 #define PMD_SECT_UNCACHED	(_AT(pmdval_t, 0))
 #define PMD_SECT_BUFFERED	(PMD_SECT_BUFFERABLE)
@@ -68,6 +69,7 @@
  *   - extended small page/tiny page
  */
 #define PTE_EXT_XN		(_AT(pteval_t, 1) << 0)		/* v6 */
+#define PTE_EXT_PXN		(_AT(pteval_t, 1) << 2)		/* v7 */
 #define PTE_EXT_AP_MASK		(_AT(pteval_t, 3) << 4)
 #define PTE_EXT_AP0		(_AT(pteval_t, 1) << 4)
 #define PTE_EXT_AP1		(_AT(pteval_t, 2) << 4)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/pgtable-3level.h linux-4.0.9-pax/arch/arm/include/asm/pgtable-3level.h
--- linux-4.0.9/arch/arm/include/asm/pgtable-3level.h	2015-04-13 11:20:50.402618072 +0200
+++ linux-4.0.9-pax/arch/arm/include/asm/pgtable-3level.h	2015-04-15 12:13:52.850318625 +0200
@@ -80,6 +80,7 @@
 #define L_PTE_USER		(_AT(pteval_t, 1) << 6)		/* AP[1] */
 #define L_PTE_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
 #define L_PTE_YOUNG		(_AT(pteval_t, 1) << 10)	/* AF */
+#define L_PTE_PXN		(_AT(pteval_t, 1) << 53)	/* PXN */
 #define L_PTE_XN		(_AT(pteval_t, 1) << 54)	/* XN */
 #define L_PTE_DIRTY		(_AT(pteval_t, 1) << 55)
 #define L_PTE_SPECIAL		(_AT(pteval_t, 1) << 56)
@@ -91,10 +92,12 @@
 #define L_PMD_SECT_SPLITTING	(_AT(pmdval_t, 1) << 56)
 #define L_PMD_SECT_NONE		(_AT(pmdval_t, 1) << 57)
 #define L_PMD_SECT_RDONLY	(_AT(pteval_t, 1) << 58)
+#define PMD_SECT_RDONLY		PMD_SECT_AP2
 
 /*
  * To be used in assembly code with the upper page attributes.
  */
+#define L_PTE_PXN_HIGH		(1 << (53 - 32))
 #define L_PTE_XN_HIGH		(1 << (54 - 32))
 #define L_PTE_DIRTY_HIGH	(1 << (55 - 32))
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/pgtable.h linux-4.0.9-pax/arch/arm/include/asm/pgtable.h
--- linux-4.0.9/arch/arm/include/asm/pgtable.h	2015-04-13 11:20:50.418618072 +0200
+++ linux-4.0.9-pax/arch/arm/include/asm/pgtable.h	2015-04-15 12:13:52.850318625 +0200
@@ -33,6 +33,9 @@
 #include <asm/pgtable-2level.h>
 #endif
 
+#define ktla_ktva(addr)		(addr)
+#define ktva_ktla(addr)		(addr)
+
 /*
  * Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
@@ -48,6 +51,9 @@
 #define LIBRARY_TEXT_START	0x0c000000
 
 #ifndef __ASSEMBLY__
+extern pteval_t __supported_pte_mask;
+extern pmdval_t __supported_pmd_mask;
+
 extern void __pte_error(const char *file, int line, pte_t);
 extern void __pmd_error(const char *file, int line, pmd_t);
 extern void __pgd_error(const char *file, int line, pgd_t);
@@ -56,6 +62,48 @@ extern void __pgd_error(const char *file
 #define pmd_ERROR(pmd)		__pmd_error(__FILE__, __LINE__, pmd)
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd)
 
+#define  __HAVE_ARCH_PAX_OPEN_KERNEL
+#define  __HAVE_ARCH_PAX_CLOSE_KERNEL
+
+#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+#include <asm/domain.h>
+#include <linux/thread_info.h>
+#include <linux/preempt.h>
+
+static inline int test_domain(int domain, int domaintype)
+{
+	return ((current_thread_info()->cpu_domain) & domain_val(domain, 3)) == domain_val(domain, domaintype);
+}
+#endif
+
+#ifdef CONFIG_PAX_KERNEXEC
+static inline unsigned long pax_open_kernel(void) {
+#ifdef CONFIG_ARM_LPAE
+	/* TODO */
+#else
+	preempt_disable();
+	BUG_ON(test_domain(DOMAIN_KERNEL, DOMAIN_KERNEXEC));
+	modify_domain(DOMAIN_KERNEL, DOMAIN_KERNEXEC);
+#endif
+	return 0;
+}
+
+static inline unsigned long pax_close_kernel(void) {
+#ifdef CONFIG_ARM_LPAE
+	/* TODO */
+#else
+	BUG_ON(test_domain(DOMAIN_KERNEL, DOMAIN_MANAGER));
+	/* DOMAIN_MANAGER = "client" under KERNEXEC */
+	modify_domain(DOMAIN_KERNEL, DOMAIN_MANAGER);
+	preempt_enable_no_resched();
+#endif
+	return 0;
+}
+#else
+static inline unsigned long pax_open_kernel(void) { return 0; }
+static inline unsigned long pax_close_kernel(void) { return 0; }
+#endif
+
 /*
  * This is the lowest virtual address we can permit any user space
  * mapping to be mapped at.  This is particularly important for
@@ -75,8 +123,8 @@ extern void __pgd_error(const char *file
 /*
  * The pgprot_* and protection_map entries will be fixed up in runtime
  * to include the cachable and bufferable bits based on memory policy,
- * as well as any architecture dependent bits like global/ASID and SMP
- * shared mapping bits.
+ * as well as any architecture dependent bits like global/ASID, PXN,
+ * and SMP shared mapping bits.
  */
 #define _L_PTE_DEFAULT	L_PTE_PRESENT | L_PTE_YOUNG
 
@@ -307,7 +355,7 @@ static inline pte_t pte_mknexec(pte_t pt
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER |
-		L_PTE_NONE | L_PTE_VALID;
+		L_PTE_NONE | L_PTE_VALID | __supported_pte_mask;
 	pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
 	return pte;
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/psci.h linux-4.0.9-pax/arch/arm/include/asm/psci.h
--- linux-4.0.9/arch/arm/include/asm/psci.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/psci.h	2015-04-15 12:13:52.850318625 +0200
@@ -32,7 +32,7 @@ struct psci_operations {
 	int (*affinity_info)(unsigned long target_affinity,
 			unsigned long lowest_affinity_level);
 	int (*migrate_info_type)(void);
-};
+} __no_const;
 
 extern struct psci_operations psci_ops;
 extern struct smp_operations psci_smp_ops;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/smp.h linux-4.0.9-pax/arch/arm/include/asm/smp.h
--- linux-4.0.9/arch/arm/include/asm/smp.h	2015-03-18 15:21:50.168349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/smp.h	2015-04-15 12:13:52.850318625 +0200
@@ -107,7 +107,7 @@ struct smp_operations {
 	int  (*cpu_disable)(unsigned int cpu);
 #endif
 #endif
-};
+} __no_const;
 
 struct of_cpu_method {
 	const char *method;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/thread_info.h linux-4.0.9-pax/arch/arm/include/asm/thread_info.h
--- linux-4.0.9/arch/arm/include/asm/thread_info.h	2015-04-13 11:20:50.434618071 +0200
+++ linux-4.0.9-pax/arch/arm/include/asm/thread_info.h	2015-04-15 12:13:52.850318625 +0200
@@ -77,9 +77,9 @@ struct thread_info {
 	.flags		= 0,						\
 	.preempt_count	= INIT_PREEMPT_COUNT,				\
 	.addr_limit	= KERNEL_DS,					\
-	.cpu_domain	= domain_val(DOMAIN_USER, DOMAIN_MANAGER) |	\
-			  domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) |	\
-			  domain_val(DOMAIN_IO, DOMAIN_CLIENT),		\
+	.cpu_domain	= domain_val(DOMAIN_USER, DOMAIN_USERCLIENT) |	\
+			  domain_val(DOMAIN_KERNEL, DOMAIN_KERNELCLIENT) |	\
+			  domain_val(DOMAIN_IO, DOMAIN_KERNELCLIENT),	\
 }
 
 #define init_thread_info	(init_thread_union.thread_info)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/tls.h linux-4.0.9-pax/arch/arm/include/asm/tls.h
--- linux-4.0.9/arch/arm/include/asm/tls.h	2015-03-18 15:21:50.172349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/asm/tls.h	2015-04-15 12:13:52.850318625 +0200
@@ -3,6 +3,7 @@
 
 #include <linux/compiler.h>
 #include <asm/thread_info.h>
+#include <asm/pgtable.h>
 
 #ifdef __ASSEMBLY__
 #include <asm/asm-offsets.h>
@@ -89,7 +90,9 @@ static inline void set_tls(unsigned long
 			 * at 0xffff0fe0 must be used instead.  (see
 			 * entry-armv.S for details)
 			 */
+			pax_open_kernel();
 			*((unsigned int *)0xffff0ff0) = val;
+			pax_close_kernel();
 #endif
 		}
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/asm/uaccess.h linux-4.0.9-pax/arch/arm/include/asm/uaccess.h
--- linux-4.0.9/arch/arm/include/asm/uaccess.h	2015-04-13 11:20:50.434618071 +0200
+++ linux-4.0.9-pax/arch/arm/include/asm/uaccess.h	2015-04-15 12:13:52.850318625 +0200
@@ -18,6 +18,7 @@
 #include <asm/domain.h>
 #include <asm/unified.h>
 #include <asm/compiler.h>
+#include <asm/pgtable.h>
 
 #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 #include <asm-generic/uaccess-unaligned.h>
@@ -70,11 +71,38 @@ extern int __put_user_bad(void);
 static inline void set_fs(mm_segment_t fs)
 {
 	current_thread_info()->addr_limit = fs;
-	modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER);
+	modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_KERNELCLIENT : DOMAIN_MANAGER);
 }
 
 #define segment_eq(a, b)	((a) == (b))
 
+#define __HAVE_ARCH_PAX_OPEN_USERLAND
+#define __HAVE_ARCH_PAX_CLOSE_USERLAND
+
+static inline void pax_open_userland(void)
+{
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	if (segment_eq(get_fs(), USER_DS)) {
+		BUG_ON(test_domain(DOMAIN_USER, DOMAIN_UDEREF));
+		modify_domain(DOMAIN_USER, DOMAIN_UDEREF);
+	}
+#endif
+
+}
+
+static inline void pax_close_userland(void)
+{
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	if (segment_eq(get_fs(), USER_DS)) {
+		BUG_ON(test_domain(DOMAIN_USER, DOMAIN_NOACCESS));
+		modify_domain(DOMAIN_USER, DOMAIN_NOACCESS);
+	}
+#endif
+
+}
+
 #define __addr_ok(addr) ({ \
 	unsigned long flag; \
 	__asm__("cmp %2, %0; movlo %0, #0" \
@@ -198,8 +226,12 @@ extern int __get_user_64t_4(void *);
 
 #define get_user(x, p)							\
 	({								\
+		int __e;						\
 		might_fault();						\
-		__get_user_check(x, p);					\
+		pax_open_userland();					\
+		__e = __get_user_check((x), (p));			\
+		pax_close_userland();					\
+		__e;							\
 	 })
 
 extern int __put_user_1(void *, unsigned int);
@@ -244,8 +276,12 @@ extern int __put_user_8(void *, unsigned
 
 #define put_user(x, p)							\
 	({								\
+		int __e;						\
 		might_fault();						\
-		__put_user_check(x, p);					\
+		pax_open_userland();					\
+		__e = __put_user_check((x), (p));			\
+		pax_close_userland();					\
+		__e;							\
 	 })
 
 #else /* CONFIG_MMU */
@@ -269,6 +305,7 @@ static inline void set_fs(mm_segment_t f
 
 #endif /* CONFIG_MMU */
 
+#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size))
 #define access_ok(type, addr, size)	(__range_ok(addr, size) == 0)
 
 #define user_addr_max() \
@@ -286,13 +323,17 @@ static inline void set_fs(mm_segment_t f
 #define __get_user(x, ptr)						\
 ({									\
 	long __gu_err = 0;						\
+	pax_open_userland();						\
 	__get_user_err((x), (ptr), __gu_err);				\
+	pax_close_userland();						\
 	__gu_err;							\
 })
 
 #define __get_user_error(x, ptr, err)					\
 ({									\
+	pax_open_userland();						\
 	__get_user_err((x), (ptr), err);				\
+	pax_close_userland();						\
 	(void) 0;							\
 })
 
@@ -368,13 +409,17 @@ do {									\
 #define __put_user(x, ptr)						\
 ({									\
 	long __pu_err = 0;						\
+	pax_open_userland();						\
 	__put_user_err((x), (ptr), __pu_err);				\
+	pax_close_userland();						\
 	__pu_err;							\
 })
 
 #define __put_user_error(x, ptr, err)					\
 ({									\
+	pax_open_userland();						\
 	__put_user_err((x), (ptr), err);				\
+	pax_close_userland();						\
 	(void) 0;							\
 })
 
@@ -474,11 +519,44 @@ do {									\
 
 
 #ifdef CONFIG_MMU
-extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n);
-extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n);
+extern unsigned long __must_check ___copy_from_user(void *to, const void __user *from, unsigned long n);
+extern unsigned long __must_check ___copy_to_user(void __user *to, const void *from, unsigned long n);
+
+static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	unsigned long ret;
+
+	check_object_size(to, n, false);
+	pax_open_userland();
+	ret = ___copy_from_user(to, from, n);
+	pax_close_userland();
+	return ret;
+}
+
+static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	unsigned long ret;
+
+	check_object_size(from, n, true);
+	pax_open_userland();
+	ret = ___copy_to_user(to, from, n);
+	pax_close_userland();
+	return ret;
+}
+
 extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n);
-extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n);
+extern unsigned long __must_check ___clear_user(void __user *addr, unsigned long n);
 extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned long n);
+
+static inline unsigned long __must_check __clear_user(void __user *addr, unsigned long n)
+{
+	unsigned long ret;
+	pax_open_userland();
+	ret = ___clear_user(addr, n);
+	pax_close_userland();
+	return ret;
+}
+
 #else
 #define __copy_from_user(to, from, n)	(memcpy(to, (void __force *)from, n), 0)
 #define __copy_to_user(to, from, n)	(memcpy((void __force *)to, from, n), 0)
@@ -487,6 +565,9 @@ extern unsigned long __must_check __clea
 
 static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
 {
+	if ((long)n < 0)
+		return n;
+
 	if (access_ok(VERIFY_READ, from, n))
 		n = __copy_from_user(to, from, n);
 	else /* security hole - plug it */
@@ -496,6 +577,9 @@ static inline unsigned long __must_check
 
 static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
 {
+	if ((long)n < 0)
+		return n;
+
 	if (access_ok(VERIFY_WRITE, to, n))
 		n = __copy_to_user(to, from, n);
 	return n;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/include/uapi/asm/ptrace.h linux-4.0.9-pax/arch/arm/include/uapi/asm/ptrace.h
--- linux-4.0.9/arch/arm/include/uapi/asm/ptrace.h	2015-03-18 15:21:50.172349252 +0100
+++ linux-4.0.9-pax/arch/arm/include/uapi/asm/ptrace.h	2015-04-15 12:13:52.850318625 +0200
@@ -92,7 +92,7 @@
  * ARMv7 groups of PSR bits
  */
 #define APSR_MASK	0xf80f0000	/* N, Z, C, V, Q and GE flags */
-#define PSR_ISET_MASK	0x01000010	/* ISA state (J, T) mask */
+#define PSR_ISET_MASK	0x01000020	/* ISA state (J, T) mask */
 #define PSR_IT_MASK	0x0600fc00	/* If-Then execution state mask */
 #define PSR_ENDIAN_MASK	0x00000200	/* Endianness state mask */
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/Kconfig linux-4.0.9-pax/arch/arm/Kconfig
--- linux-4.0.9/arch/arm/Kconfig	2015-04-13 11:20:46.222618296 +0200
+++ linux-4.0.9-pax/arch/arm/Kconfig	2015-04-15 12:13:52.850318625 +0200
@@ -1735,7 +1735,7 @@ config ALIGNMENT_TRAP
 
 config UACCESS_WITH_MEMCPY
 	bool "Use kernel mem{cpy,set}() for {copy_to,clear}_user()"
-	depends on MMU
+	depends on MMU && !PAX_MEMORY_UDEREF
 	default y if CPU_FEROCEON
 	help
 	  Implement faster copy_to_user and clear_user methods for CPU
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/armksyms.c linux-4.0.9-pax/arch/arm/kernel/armksyms.c
--- linux-4.0.9/arch/arm/kernel/armksyms.c	2015-03-18 15:21:50.172349252 +0100
+++ linux-4.0.9-pax/arch/arm/kernel/armksyms.c	2015-04-15 12:13:52.850318625 +0200
@@ -55,7 +55,7 @@ EXPORT_SYMBOL(arm_delay_ops);
 
 	/* networking */
 EXPORT_SYMBOL(csum_partial);
-EXPORT_SYMBOL(csum_partial_copy_from_user);
+EXPORT_SYMBOL(__csum_partial_copy_from_user);
 EXPORT_SYMBOL(csum_partial_copy_nocheck);
 EXPORT_SYMBOL(__csum_ipv6_magic);
 
@@ -91,9 +91,9 @@ EXPORT_SYMBOL(__memzero);
 #ifdef CONFIG_MMU
 EXPORT_SYMBOL(copy_page);
 
-EXPORT_SYMBOL(__copy_from_user);
-EXPORT_SYMBOL(__copy_to_user);
-EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(___copy_from_user);
+EXPORT_SYMBOL(___copy_to_user);
+EXPORT_SYMBOL(___clear_user);
 
 EXPORT_SYMBOL(__get_user_1);
 EXPORT_SYMBOL(__get_user_2);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/entry-armv.S linux-4.0.9-pax/arch/arm/kernel/entry-armv.S
--- linux-4.0.9/arch/arm/kernel/entry-armv.S	2015-04-13 11:20:50.490618068 +0200
+++ linux-4.0.9-pax/arch/arm/kernel/entry-armv.S	2015-04-15 12:13:52.850318625 +0200
@@ -48,6 +48,87 @@
 9997:
 	.endm
 
+	.macro	pax_enter_kernel
+#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+	@ make aligned space for saved DACR
+	sub	sp, sp, #8
+	@ save regs
+	stmdb	sp!, {r1, r2}
+	@ read DACR from cpu_domain into r1
+	mov	r2, sp
+	@ assume 8K pages, since we have to split the immediate in two
+	bic	r2, r2, #(0x1fc0)
+	bic	r2, r2, #(0x3f)
+	ldr	r1, [r2, #TI_CPU_DOMAIN]
+	@ store old DACR on stack
+	str	r1, [sp, #8]
+#ifdef CONFIG_PAX_KERNEXEC
+	@ set type of DOMAIN_KERNEL to DOMAIN_KERNELCLIENT
+	bic	r1, r1, #(domain_val(DOMAIN_KERNEL, 3))
+	orr	r1, r1, #(domain_val(DOMAIN_KERNEL, DOMAIN_KERNELCLIENT))
+#endif
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	@ set current DOMAIN_USER to DOMAIN_NOACCESS
+	bic	r1, r1, #(domain_val(DOMAIN_USER, 3))
+#endif
+	@ write r1 to current_thread_info()->cpu_domain
+	str	r1, [r2, #TI_CPU_DOMAIN]
+	@ write r1 to DACR
+	mcr	p15, 0, r1, c3, c0, 0
+	@ instruction sync
+	instr_sync
+	@ restore regs
+	ldmia	sp!, {r1, r2}
+#endif
+	.endm
+
+	.macro	pax_open_userland
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	@ save regs
+	stmdb	sp!, {r0, r1}
+	@ read DACR from cpu_domain into r1
+	mov	r0, sp
+	@ assume 8K pages, since we have to split the immediate in two
+	bic	r0, r0, #(0x1fc0)
+	bic	r0, r0, #(0x3f)
+	ldr	r1, [r0, #TI_CPU_DOMAIN]
+	@ set current DOMAIN_USER to DOMAIN_CLIENT
+	bic	r1, r1, #(domain_val(DOMAIN_USER, 3))
+	orr	r1, r1, #(domain_val(DOMAIN_USER, DOMAIN_UDEREF))
+	@ write r1 to current_thread_info()->cpu_domain
+	str	r1, [r0, #TI_CPU_DOMAIN]
+	@ write r1 to DACR
+	mcr	p15, 0, r1, c3, c0, 0
+	@ instruction sync
+	instr_sync
+	@ restore regs
+	ldmia	sp!, {r0, r1}
+#endif
+	.endm
+
+	.macro	pax_close_userland
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	@ save regs
+	stmdb	sp!, {r0, r1}
+	@ read DACR from cpu_domain into r1
+	mov	r0, sp
+	@ assume 8K pages, since we have to split the immediate in two
+	bic	r0, r0, #(0x1fc0)
+	bic	r0, r0, #(0x3f)
+	ldr	r1, [r0, #TI_CPU_DOMAIN]
+	@ set current DOMAIN_USER to DOMAIN_NOACCESS
+	bic	r1, r1, #(domain_val(DOMAIN_USER, 3))
+	@ write r1 to current_thread_info()->cpu_domain
+	str	r1, [r0, #TI_CPU_DOMAIN]
+	@ write r1 to DACR
+	mcr	p15, 0, r1, c3, c0, 0
+	@ instruction sync
+	instr_sync
+	@ restore regs
+	ldmia	sp!, {r0, r1}
+#endif
+	.endm
+
 	.macro	pabt_helper
 	@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
 #ifdef MULTI_PABORT
@@ -90,11 +171,15 @@
  * Invalid mode handlers
  */
 	.macro	inv_entry, reason
+
+	pax_enter_kernel
+
 	sub	sp, sp, #S_FRAME_SIZE
  ARM(	stmib	sp, {r1 - lr}		)
  THUMB(	stmia	sp, {r0 - r12}		)
  THUMB(	str	sp, [sp, #S_SP]		)
  THUMB(	str	lr, [sp, #S_LR]		)
+
 	mov	r1, #\reason
 	.endm
 
@@ -150,7 +235,11 @@ ENDPROC(__und_invalid)
 	.macro	svc_entry, stack_hole=0, trace=1
  UNWIND(.fnstart		)
  UNWIND(.save {r0 - pc}		)
+
+	pax_enter_kernel
+
 	sub	sp, sp, #(S_FRAME_SIZE + \stack_hole - 4)
+
 #ifdef CONFIG_THUMB2_KERNEL
  SPFIX(	str	r0, [sp]	)	@ temporarily saved
  SPFIX(	mov	r0, sp		)
@@ -165,7 +254,12 @@ ENDPROC(__und_invalid)
 	ldmia	r0, {r3 - r5}
 	add	r7, sp, #S_SP - 4	@ here for interlock avoidance
 	mov	r6, #-1			@  ""  ""      ""       ""
+#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+	@ offset sp by 8 as done in pax_enter_kernel
+	add	r2, sp, #(S_FRAME_SIZE + \stack_hole + 4)
+#else
 	add	r2, sp, #(S_FRAME_SIZE + \stack_hole - 4)
+#endif
  SPFIX(	addeq	r2, r2, #4	)
 	str	r3, [sp, #-4]!		@ save the "real" r0 copied
 					@ from the exception stack
@@ -369,6 +463,9 @@ ENDPROC(__fiq_abt)
 	.macro	usr_entry, trace=1
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)	@ don't unwind the user space
+
+	pax_enter_kernel_user
+
 	sub	sp, sp, #S_FRAME_SIZE
  ARM(	stmib	sp, {r1 - r12}	)
  THUMB(	stmia	sp, {r0 - r12}	)
@@ -479,7 +576,9 @@ __und_usr:
 	tst	r3, #PSR_T_BIT			@ Thumb mode?
 	bne	__und_usr_thumb
 	sub	r4, r2, #4			@ ARM instr at LR - 4
+	pax_open_userland
 1:	ldrt	r0, [r4]
+	pax_close_userland
  ARM_BE8(rev	r0, r0)				@ little endian instruction
 
 	@ r0 = 32-bit ARM instruction which caused the exception
@@ -513,11 +612,15 @@ __und_usr_thumb:
  */
 	.arch	armv6t2
 #endif
+	pax_open_userland
 2:	ldrht	r5, [r4]
+	pax_close_userland
 ARM_BE8(rev16	r5, r5)				@ little endian instruction
 	cmp	r5, #0xe800			@ 32bit instruction if xx != 0
 	blo	__und_usr_fault_16		@ 16bit undefined instruction
+	pax_open_userland
 3:	ldrht	r0, [r2]
+	pax_close_userland
 ARM_BE8(rev16	r0, r0)				@ little endian instruction
 	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
 	str	r2, [sp, #S_PC]			@ it's a 2x16bit instr, update
@@ -547,7 +650,8 @@ ENDPROC(__und_usr)
  */
 	.pushsection .fixup, "ax"
 	.align	2
-4:	str     r4, [sp, #S_PC]			@ retry current instruction
+4:	pax_close_userland
+	str     r4, [sp, #S_PC]			@ retry current instruction
 	ret	r9
 	.popsection
 	.pushsection __ex_table,"a"
@@ -767,7 +871,7 @@ ENTRY(__switch_to)
  THUMB(	str	lr, [ip], #4		   )
 	ldr	r4, [r2, #TI_TP_VALUE]
 	ldr	r5, [r2, #TI_TP_VALUE + 4]
-#ifdef CONFIG_CPU_USE_DOMAINS
+#if defined(CONFIG_CPU_USE_DOMAINS) || defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
 	switch_tls r1, r4, r5, r3, r7
@@ -776,7 +880,7 @@ ENTRY(__switch_to)
 	ldr	r8, =__stack_chk_guard
 	ldr	r7, [r7, #TSK_STACK_CANARY]
 #endif
-#ifdef CONFIG_CPU_USE_DOMAINS
+#if defined(CONFIG_CPU_USE_DOMAINS) || defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
 	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
 #endif
 	mov	r5, r0
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/entry-common.S linux-4.0.9-pax/arch/arm/kernel/entry-common.S
--- linux-4.0.9/arch/arm/kernel/entry-common.S	2015-06-15 16:02:22.179183858 +0200
+++ linux-4.0.9-pax/arch/arm/kernel/entry-common.S	2015-06-15 16:02:33.011183834 +0200
@@ -11,18 +11,46 @@
 #include <asm/assembler.h>
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
+#include <asm/domain.h>
 #include <asm/unwind.h>
 
+#include "entry-header.S"
+
 #ifdef CONFIG_NEED_RET_TO_USER
 #include <mach/entry-macro.S>
 #else
 	.macro  arch_ret_to_user, tmp1, tmp2
+#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+	@ save regs
+	stmdb	sp!, {r1, r2}
+        @ read DACR from cpu_domain into r1
+        mov     r2, sp
+        @ assume 8K pages, since we have to split the immediate in two
+        bic     r2, r2, #(0x1fc0)
+        bic     r2, r2, #(0x3f)
+        ldr     r1, [r2, #TI_CPU_DOMAIN]
+#ifdef CONFIG_PAX_KERNEXEC
+        @ set type of DOMAIN_KERNEL to DOMAIN_KERNELCLIENT
+        bic     r1, r1, #(domain_val(DOMAIN_KERNEL, 3))
+        orr     r1, r1, #(domain_val(DOMAIN_KERNEL, DOMAIN_KERNELCLIENT))
+#endif
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+        @ set current DOMAIN_USER to DOMAIN_UDEREF
+        bic     r1, r1, #(domain_val(DOMAIN_USER, 3))
+        orr     r1, r1, #(domain_val(DOMAIN_USER, DOMAIN_UDEREF))
+#endif
+        @ write r1 to current_thread_info()->cpu_domain
+        str     r1, [r2, #TI_CPU_DOMAIN]
+        @ write r1 to DACR
+        mcr     p15, 0, r1, c3, c0, 0
+        @ instruction sync
+        instr_sync
+	@ restore regs
+	ldmia	sp!, {r1, r2}
+#endif
 	.endm
 #endif
 
-#include "entry-header.S"
-
-
 	.align	5
 /*
  * This is the fast syscall return path.  We do as little as
@@ -173,6 +201,12 @@ ENTRY(vector_swi)
  USER(	ldr	scno, [lr, #-4]		)	@ get SWI instruction
 #endif
 
+	/*
+	 * do this here to avoid a performance hit of wrapping the code above
+	 * that directly dereferences userland to parse the SWI instruction
+	 */
+	pax_enter_kernel_user
+
 	adr	tbl, sys_call_table		@ load syscall table pointer
 
 #if defined(CONFIG_OABI_COMPAT)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/entry-header.S linux-4.0.9-pax/arch/arm/kernel/entry-header.S
--- linux-4.0.9/arch/arm/kernel/entry-header.S	2015-03-18 15:21:50.172349252 +0100
+++ linux-4.0.9-pax/arch/arm/kernel/entry-header.S	2015-04-15 12:13:52.850318625 +0200
@@ -196,6 +196,60 @@
 	msr	cpsr_c, \rtemp			@ switch back to the SVC mode
 	.endm
 
+	.macro	pax_enter_kernel_user
+#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+	@ save regs
+	stmdb	sp!, {r0, r1}
+	@ read DACR from cpu_domain into r1
+	mov	r0, sp
+	@ assume 8K pages, since we have to split the immediate in two
+	bic	r0, r0, #(0x1fc0)
+	bic	r0, r0, #(0x3f)
+	ldr	r1, [r0, #TI_CPU_DOMAIN]
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	@ set current DOMAIN_USER to DOMAIN_NOACCESS
+	bic	r1, r1, #(domain_val(DOMAIN_USER, 3))
+#endif
+#ifdef CONFIG_PAX_KERNEXEC
+	@ set current DOMAIN_KERNEL to DOMAIN_KERNELCLIENT
+	bic	r1, r1, #(domain_val(DOMAIN_KERNEL, 3))
+	orr	r1, r1, #(domain_val(DOMAIN_KERNEL, DOMAIN_KERNELCLIENT))
+#endif
+	@ write r1 to current_thread_info()->cpu_domain
+	str	r1, [r0, #TI_CPU_DOMAIN]
+	@ write r1 to DACR
+	mcr	p15, 0, r1, c3, c0, 0
+	@ instruction sync
+	instr_sync
+	@ restore regs
+	ldmia	sp!, {r0, r1}
+#endif
+	.endm
+
+	.macro  pax_exit_kernel
+#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+	@ save regs
+	stmdb	sp!, {r0, r1}
+	@ read old DACR from stack into r1
+	ldr	r1, [sp, #(8 + S_SP)]
+	sub	r1, r1, #8
+	ldr	r1, [r1]
+
+	@ write r1 to current_thread_info()->cpu_domain
+	mov	r0, sp
+	@ assume 8K pages, since we have to split the immediate in two
+	bic	r0, r0, #(0x1fc0)
+	bic	r0, r0, #(0x3f)
+	str	r1, [r0, #TI_CPU_DOMAIN]
+	@ write r1 to DACR
+	mcr	p15, 0, r1, c3, c0, 0
+	@ instruction sync
+	instr_sync
+	@ restore regs
+	ldmia	sp!, {r0, r1}
+#endif
+	.endm
+
 #ifndef CONFIG_THUMB2_KERNEL
 	.macro	svc_exit, rpsr, irq = 0
 	.if	\irq != 0
@@ -215,6 +269,9 @@
 	blne	trace_hardirqs_off
 #endif
 	.endif
+
+	pax_exit_kernel
+
 	msr	spsr_cxsf, \rpsr
 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
 	@ We must avoid clrex due to Cortex-A15 erratum #830321
@@ -291,6 +348,9 @@
 	blne	trace_hardirqs_off
 #endif
 	.endif
+
+	pax_exit_kernel
+
 	ldr	lr, [sp, #S_SP]			@ top of the stack
 	ldrd	r0, r1, [sp, #S_LR]		@ calling lr and pc
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/fiq.c linux-4.0.9-pax/arch/arm/kernel/fiq.c
--- linux-4.0.9/arch/arm/kernel/fiq.c	2015-03-18 15:21:50.172349252 +0100
+++ linux-4.0.9-pax/arch/arm/kernel/fiq.c	2015-04-15 12:13:52.850318625 +0200
@@ -95,7 +95,10 @@ void set_fiq_handler(void *start, unsign
 	void *base = vectors_page;
 	unsigned offset = FIQ_OFFSET;
 
+	pax_open_kernel();
 	memcpy(base + offset, start, length);
+	pax_close_kernel();
+
 	if (!cache_is_vipt_nonaliasing())
 		flush_icache_range((unsigned long)base + offset, offset +
 				   length);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/head.S linux-4.0.9-pax/arch/arm/kernel/head.S
--- linux-4.0.9/arch/arm/kernel/head.S	2015-04-13 11:20:50.498618067 +0200
+++ linux-4.0.9-pax/arch/arm/kernel/head.S	2015-04-15 12:13:52.850318625 +0200
@@ -444,7 +444,7 @@ __enable_mmu:
 	mov	r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
-		      domain_val(DOMAIN_IO, DOMAIN_CLIENT))
+		      domain_val(DOMAIN_IO, DOMAIN_KERNELCLIENT))
 	mcr	p15, 0, r5, c3, c0, 0		@ load domain access register
 	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/module.c linux-4.0.9-pax/arch/arm/kernel/module.c
--- linux-4.0.9/arch/arm/kernel/module.c	2015-04-13 11:20:50.570618063 +0200
+++ linux-4.0.9-pax/arch/arm/kernel/module.c	2015-04-15 12:13:52.850318625 +0200
@@ -38,12 +38,39 @@
 #endif
 
 #ifdef CONFIG_MMU
-void *module_alloc(unsigned long size)
+static inline void *__module_alloc(unsigned long size, pgprot_t prot)
 {
+	if (!size || PAGE_ALIGN(size) > MODULES_END - MODULES_VADDR)
+		return NULL;
 	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-				GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+				GFP_KERNEL, prot, 0, NUMA_NO_NODE,
 				__builtin_return_address(0));
 }
+
+void *module_alloc(unsigned long size)
+{
+
+#ifdef CONFIG_PAX_KERNEXEC
+	return __module_alloc(size, PAGE_KERNEL);
+#else
+	return __module_alloc(size, PAGE_KERNEL_EXEC);
+#endif
+
+}
+
+#ifdef CONFIG_PAX_KERNEXEC
+void module_memfree_exec(void *module_region)
+{
+	module_memfree(module_region);
+}
+EXPORT_SYMBOL(module_memfree_exec);
+
+void *module_alloc_exec(unsigned long size)
+{
+	return __module_alloc(size, PAGE_KERNEL_EXEC);
+}
+EXPORT_SYMBOL(module_alloc_exec);
+#endif
 #endif
 
 int
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/patch.c linux-4.0.9-pax/arch/arm/kernel/patch.c
--- linux-4.0.9/arch/arm/kernel/patch.c	2015-04-13 11:20:50.570618063 +0200
+++ linux-4.0.9-pax/arch/arm/kernel/patch.c	2015-04-15 12:13:52.850318625 +0200
@@ -66,6 +66,7 @@ void __kprobes __patch_text_real(void *a
 	else
 		__acquire(&patch_lock);
 
+	pax_open_kernel();
 	if (thumb2 && __opcode_is_thumb16(insn)) {
 		*(u16 *)waddr = __opcode_to_mem_thumb16(insn);
 		size = sizeof(u16);
@@ -97,6 +98,7 @@ void __kprobes __patch_text_real(void *a
 		*(u32 *)waddr = insn;
 		size = sizeof(u32);
 	}
+	pax_close_kernel();
 
 	if (waddr != addr) {
 		flush_kernel_vmap_range(waddr, twopage ? size / 2 : size);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/process.c linux-4.0.9-pax/arch/arm/kernel/process.c
--- linux-4.0.9/arch/arm/kernel/process.c	2015-05-07 02:10:36.356265450 +0200
+++ linux-4.0.9-pax/arch/arm/kernel/process.c	2015-05-07 02:10:56.284266036 +0200
@@ -213,6 +213,7 @@ void machine_power_off(void)
 
 	if (pm_power_off)
 		pm_power_off();
+	BUG();
 }
 
 /*
@@ -226,7 +227,7 @@ void machine_power_off(void)
  * executing pre-reset code, and using RAM that the primary CPU's code wishes
  * to use. Implementing such co-ordination would be essentially impossible.
  */
-void machine_restart(char *cmd)
+__noreturn void machine_restart(char *cmd)
 {
 	local_irq_disable();
 	smp_send_stop();
@@ -430,12 +431,6 @@ unsigned long get_wchan(struct task_stru
 	return 0;
 }
 
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long range_end = mm->brk + 0x02000000;
-	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
-
 #ifdef CONFIG_MMU
 #ifdef CONFIG_KUSER_HELPERS
 /*
@@ -451,7 +446,7 @@ static struct vm_area_struct gate_vma =
 
 static int __init gate_vma_init(void)
 {
-	gate_vma.vm_page_prot = PAGE_READONLY_EXEC;
+	gate_vma.vm_page_prot = vm_get_page_prot(gate_vma.vm_flags);
 	return 0;
 }
 arch_initcall(gate_vma_init);
@@ -480,81 +475,13 @@ const char *arch_vma_name(struct vm_area
 	return is_gate_vma(vma) ? "[vectors]" : NULL;
 }
 
-/* If possible, provide a placement hint at a random offset from the
- * stack for the signal page.
- */
-static unsigned long sigpage_addr(const struct mm_struct *mm,
-				  unsigned int npages)
-{
-	unsigned long offset;
-	unsigned long first;
-	unsigned long last;
-	unsigned long addr;
-	unsigned int slots;
-
-	first = PAGE_ALIGN(mm->start_stack);
-
-	last = TASK_SIZE - (npages << PAGE_SHIFT);
-
-	/* No room after stack? */
-	if (first > last)
-		return 0;
-
-	/* Just enough room? */
-	if (first == last)
-		return first;
-
-	slots = ((last - first) >> PAGE_SHIFT) + 1;
-
-	offset = get_random_int() % slots;
-
-	addr = first + (offset << PAGE_SHIFT);
-
-	return addr;
-}
-
-static struct page *signal_page;
-extern struct page *get_signal_page(void);
-
-static const struct vm_special_mapping sigpage_mapping = {
-	.name = "[sigpage]",
-	.pages = &signal_page,
-};
-
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	unsigned long addr;
-	unsigned long hint;
-	int ret = 0;
-
-	if (!signal_page)
-		signal_page = get_signal_page();
-	if (!signal_page)
-		return -ENOMEM;
 
 	down_write(&mm->mmap_sem);
-	hint = sigpage_addr(mm, 1);
-	addr = get_unmapped_area(NULL, hint, PAGE_SIZE, 0, 0);
-	if (IS_ERR_VALUE(addr)) {
-		ret = addr;
-		goto up_fail;
-	}
-
-	vma = _install_special_mapping(mm, addr, PAGE_SIZE,
-		VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-		&sigpage_mapping);
-
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		goto up_fail;
-	}
-
-	mm->context.sigpage = addr;
-
- up_fail:
+	mm->context.sigpage = (PAGE_OFFSET + (get_random_int() % 0x3FFEFFE0)) & 0xFFFFFFFC;
 	up_write(&mm->mmap_sem);
-	return ret;
+	return 0;
 }
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/psci.c linux-4.0.9-pax/arch/arm/kernel/psci.c
--- linux-4.0.9/arch/arm/kernel/psci.c	2015-05-17 23:33:05.113623796 +0200
+++ linux-4.0.9-pax/arch/arm/kernel/psci.c	2015-05-17 23:37:01.317636624 +0200
@@ -26,7 +26,7 @@
 #include <asm/psci.h>
 #include <asm/system_misc.h>
 
-struct psci_operations psci_ops;
+struct psci_operations psci_ops __read_only;
 
 static int (*invoke_psci_fn)(u32, u32, u32, u32);
 typedef int (*psci_initcall_t)(const struct device_node *);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/setup.c linux-4.0.9-pax/arch/arm/kernel/setup.c
--- linux-4.0.9/arch/arm/kernel/setup.c	2015-04-13 11:20:50.602618062 +0200
+++ linux-4.0.9-pax/arch/arm/kernel/setup.c	2015-04-15 12:13:52.854318625 +0200
@@ -105,21 +105,23 @@ EXPORT_SYMBOL(elf_hwcap);
 unsigned int elf_hwcap2 __read_mostly;
 EXPORT_SYMBOL(elf_hwcap2);
 
+pteval_t __supported_pte_mask __read_only;
+pmdval_t __supported_pmd_mask __read_only;
 
 #ifdef MULTI_CPU
-struct processor processor __read_mostly;
+struct processor processor __read_only;
 #endif
 #ifdef MULTI_TLB
-struct cpu_tlb_fns cpu_tlb __read_mostly;
+struct cpu_tlb_fns cpu_tlb __read_only;
 #endif
 #ifdef MULTI_USER
-struct cpu_user_fns cpu_user __read_mostly;
+struct cpu_user_fns cpu_user __read_only;
 #endif
 #ifdef MULTI_CACHE
-struct cpu_cache_fns cpu_cache __read_mostly;
+struct cpu_cache_fns cpu_cache __read_only;
 #endif
 #ifdef CONFIG_OUTER_CACHE
-struct outer_cache_fns outer_cache __read_mostly;
+struct outer_cache_fns outer_cache __read_only;
 EXPORT_SYMBOL(outer_cache);
 #endif
 
@@ -250,9 +252,13 @@ static int __get_cpu_architecture(void)
 		 * Register 0 and check for VMSAv7 or PMSAv7 */
 		unsigned int mmfr0 = read_cpuid_ext(CPUID_EXT_MMFR0);
 		if ((mmfr0 & 0x0000000f) >= 0x00000003 ||
-		    (mmfr0 & 0x000000f0) >= 0x00000030)
+		    (mmfr0 & 0x000000f0) >= 0x00000030) {
 			cpu_arch = CPU_ARCH_ARMv7;
-		else if ((mmfr0 & 0x0000000f) == 0x00000002 ||
+			if ((mmfr0 & 0x0000000f) == 0x00000005 || (mmfr0 & 0x0000000f) == 0x00000004) {
+				__supported_pte_mask |= L_PTE_PXN;
+				__supported_pmd_mask |= PMD_PXNTABLE;
+			}
+		} else if ((mmfr0 & 0x0000000f) == 0x00000002 ||
 			 (mmfr0 & 0x000000f0) == 0x00000020)
 			cpu_arch = CPU_ARCH_ARMv6;
 		else
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/signal.c linux-4.0.9-pax/arch/arm/kernel/signal.c
--- linux-4.0.9/arch/arm/kernel/signal.c	2015-04-13 11:20:50.602618062 +0200
+++ linux-4.0.9-pax/arch/arm/kernel/signal.c	2015-04-15 12:13:52.854318625 +0200
@@ -24,8 +24,6 @@
 
 extern const unsigned long sigreturn_codes[7];
 
-static unsigned long signal_return_offset;
-
 #ifdef CONFIG_CRUNCH
 static int preserve_crunch_context(struct crunch_sigframe __user *frame)
 {
@@ -396,8 +394,7 @@ setup_return(struct pt_regs *regs, struc
 			 * except when the MPU has protected the vectors
 			 * page from PL0
 			 */
-			retcode = mm->context.sigpage + signal_return_offset +
-				  (idx << 2) + thumb;
+			retcode = mm->context.sigpage + (idx << 2) + thumb;
 		} else
 #endif
 		{
@@ -603,33 +600,3 @@ do_work_pending(struct pt_regs *regs, un
 	} while (thread_flags & _TIF_WORK_MASK);
 	return 0;
 }
-
-struct page *get_signal_page(void)
-{
-	unsigned long ptr;
-	unsigned offset;
-	struct page *page;
-	void *addr;
-
-	page = alloc_pages(GFP_KERNEL, 0);
-
-	if (!page)
-		return NULL;
-
-	addr = page_address(page);
-
-	/* Give the signal return code some randomness */
-	offset = 0x200 + (get_random_int() & 0x7fc);
-	signal_return_offset = offset;
-
-	/*
-	 * Copy signal return handlers into the vector page, and
-	 * set sigreturn to be a pointer to these.
-	 */
-	memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes));
-
-	ptr = (unsigned long)addr + offset;
-	flush_icache_range(ptr, ptr + sizeof(sigreturn_codes));
-
-	return page;
-}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/smp.c linux-4.0.9-pax/arch/arm/kernel/smp.c
--- linux-4.0.9/arch/arm/kernel/smp.c	2015-03-18 15:21:50.176349252 +0100
+++ linux-4.0.9-pax/arch/arm/kernel/smp.c	2015-04-15 12:13:52.854318625 +0200
@@ -76,7 +76,7 @@ enum ipi_msg_type {
 
 static DECLARE_COMPLETION(cpu_running);
 
-static struct smp_operations smp_ops;
+static struct smp_operations smp_ops __read_only;
 
 void __init smp_set_ops(struct smp_operations *ops)
 {
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/tcm.c linux-4.0.9-pax/arch/arm/kernel/tcm.c
--- linux-4.0.9/arch/arm/kernel/tcm.c	2015-03-18 15:21:50.176349252 +0100
+++ linux-4.0.9-pax/arch/arm/kernel/tcm.c	2015-04-15 12:13:52.854318625 +0200
@@ -61,7 +61,7 @@ static struct map_desc itcm_iomap[] __in
 		.virtual	= ITCM_OFFSET,
 		.pfn		= __phys_to_pfn(ITCM_OFFSET),
 		.length		= 0,
-		.type		= MT_MEMORY_RWX_ITCM,
+		.type		= MT_MEMORY_RX_ITCM,
 	}
 };
 
@@ -267,7 +267,9 @@ no_dtcm:
 		start = &__sitcm_text;
 		end   = &__eitcm_text;
 		ram   = &__itcm_start;
+		pax_open_kernel();
 		memcpy(start, ram, itcm_code_sz);
+		pax_close_kernel();
 		pr_debug("CPU ITCM: copied code from %p - %p\n",
 			 start, end);
 		itcm_present = true;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/traps.c linux-4.0.9-pax/arch/arm/kernel/traps.c
--- linux-4.0.9/arch/arm/kernel/traps.c	2015-03-18 15:21:50.176349252 +0100
+++ linux-4.0.9-pax/arch/arm/kernel/traps.c	2015-04-15 12:13:52.854318625 +0200
@@ -880,7 +880,11 @@ void __init early_trap_init(void *vector
 	kuser_init(vectors_base);
 
 	flush_icache_range(vectors, vectors + PAGE_SIZE * 2);
-	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
+
+#ifndef CONFIG_PAX_MEMORY_UDEREF
+	modify_domain(DOMAIN_USER, DOMAIN_USERCLIENT);
+#endif
+
 #else /* ifndef CONFIG_CPU_V7M */
 	/*
 	 * on V7-M there is no need to copy the vector table to a dedicated
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kernel/vmlinux.lds.S linux-4.0.9-pax/arch/arm/kernel/vmlinux.lds.S
--- linux-4.0.9/arch/arm/kernel/vmlinux.lds.S	2015-03-18 15:21:50.176349252 +0100
+++ linux-4.0.9-pax/arch/arm/kernel/vmlinux.lds.S	2015-04-15 12:13:52.854318625 +0200
@@ -37,7 +37,7 @@
 #endif
 
 #if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
-	defined(CONFIG_GENERIC_BUG)
+	defined(CONFIG_GENERIC_BUG) || defined(CONFIG_PAX_REFCOUNT)
 #define ARM_EXIT_KEEP(x)	x
 #define ARM_EXIT_DISCARD(x)
 #else
@@ -123,6 +123,8 @@ SECTIONS
 #ifdef CONFIG_DEBUG_RODATA
 	. = ALIGN(1<<SECTION_SHIFT);
 #endif
+	_etext = .;			/* End of text section */
+
 	RO_DATA(PAGE_SIZE)
 
 	. = ALIGN(4);
@@ -153,8 +155,6 @@ SECTIONS
 
 	NOTES
 
-	_etext = .;			/* End of text and rodata section */
-
 #ifndef CONFIG_XIP_KERNEL
 # ifdef CONFIG_ARM_KERNMEM_PERMS
 	. = ALIGN(1<<SECTION_SHIFT);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/kvm/arm.c linux-4.0.9-pax/arch/arm/kvm/arm.c
--- linux-4.0.9/arch/arm/kvm/arm.c	2015-05-07 02:10:36.416265452 +0200
+++ linux-4.0.9-pax/arch/arm/kvm/arm.c	2015-05-07 02:10:56.284266036 +0200
@@ -57,7 +57,7 @@ static unsigned long hyp_default_vectors
 static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
 
 /* The VMID used in the VTTBR */
-static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
+static atomic64_unchecked_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
@@ -358,7 +358,7 @@ void force_vm_exit(const cpumask_t *mask
  */
 static bool need_new_vmid_gen(struct kvm *kvm)
 {
-	return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
+	return unlikely(kvm->arch.vmid_gen != atomic64_read_unchecked(&kvm_vmid_gen));
 }
 
 /**
@@ -391,7 +391,7 @@ static void update_vttbr(struct kvm *kvm
 
 	/* First user of a new VMID generation? */
 	if (unlikely(kvm_next_vmid == 0)) {
-		atomic64_inc(&kvm_vmid_gen);
+		atomic64_inc_unchecked(&kvm_vmid_gen);
 		kvm_next_vmid = 1;
 
 		/*
@@ -408,7 +408,7 @@ static void update_vttbr(struct kvm *kvm
 		kvm_call_hyp(__kvm_flush_vm_context);
 	}
 
-	kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
+	kvm->arch.vmid_gen = atomic64_read_unchecked(&kvm_vmid_gen);
 	kvm->arch.vmid = kvm_next_vmid;
 	kvm_next_vmid++;
 
@@ -1087,7 +1087,7 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struc
 /**
  * Initialize Hyp-mode and memory mappings on all CPUs.
  */
-int kvm_arch_init(void *opaque)
+int kvm_arch_init(const void *opaque)
 {
 	int err;
 	int ret, cpu;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/lib/clear_user.S linux-4.0.9-pax/arch/arm/lib/clear_user.S
--- linux-4.0.9/arch/arm/lib/clear_user.S	2015-03-18 15:21:50.176349252 +0100
+++ linux-4.0.9-pax/arch/arm/lib/clear_user.S	2015-04-15 12:13:52.854318625 +0200
@@ -12,14 +12,14 @@
 
 		.text
 
-/* Prototype: int __clear_user(void *addr, size_t sz)
+/* Prototype: int ___clear_user(void *addr, size_t sz)
  * Purpose  : clear some user memory
  * Params   : addr - user memory address to clear
  *          : sz   - number of bytes to clear
  * Returns  : number of bytes NOT cleared
  */
 ENTRY(__clear_user_std)
-WEAK(__clear_user)
+WEAK(___clear_user)
 		stmfd	sp!, {r1, lr}
 		mov	r2, #0
 		cmp	r1, #4
@@ -44,7 +44,7 @@ WEAK(__clear_user)
 USER(		strnebt	r2, [r0])
 		mov	r0, #0
 		ldmfd	sp!, {r1, pc}
-ENDPROC(__clear_user)
+ENDPROC(___clear_user)
 ENDPROC(__clear_user_std)
 
 		.pushsection .fixup,"ax"
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/lib/copy_from_user.S linux-4.0.9-pax/arch/arm/lib/copy_from_user.S
--- linux-4.0.9/arch/arm/lib/copy_from_user.S	2015-03-18 15:21:50.176349252 +0100
+++ linux-4.0.9-pax/arch/arm/lib/copy_from_user.S	2015-04-15 12:13:52.854318625 +0200
@@ -17,7 +17,7 @@
 /*
  * Prototype:
  *
- *	size_t __copy_from_user(void *to, const void *from, size_t n)
+ *	size_t ___copy_from_user(void *to, const void *from, size_t n)
  *
  * Purpose:
  *
@@ -89,11 +89,11 @@
 
 	.text
 
-ENTRY(__copy_from_user)
+ENTRY(___copy_from_user)
 
 #include "copy_template.S"
 
-ENDPROC(__copy_from_user)
+ENDPROC(___copy_from_user)
 
 	.pushsection .fixup,"ax"
 	.align 0
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/lib/copy_page.S linux-4.0.9-pax/arch/arm/lib/copy_page.S
--- linux-4.0.9/arch/arm/lib/copy_page.S	2015-03-18 15:21:50.176349252 +0100
+++ linux-4.0.9-pax/arch/arm/lib/copy_page.S	2015-04-15 12:13:52.854318625 +0200
@@ -10,6 +10,7 @@
  *  ASM optimised string functions
  */
 #include <linux/linkage.h>
+#include <linux/const.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/lib/copy_to_user.S linux-4.0.9-pax/arch/arm/lib/copy_to_user.S
--- linux-4.0.9/arch/arm/lib/copy_to_user.S	2015-03-18 15:21:50.176349252 +0100
+++ linux-4.0.9-pax/arch/arm/lib/copy_to_user.S	2015-04-15 12:13:52.854318625 +0200
@@ -17,7 +17,7 @@
 /*
  * Prototype:
  *
- *	size_t __copy_to_user(void *to, const void *from, size_t n)
+ *	size_t ___copy_to_user(void *to, const void *from, size_t n)
  *
  * Purpose:
  *
@@ -93,11 +93,11 @@
 	.text
 
 ENTRY(__copy_to_user_std)
-WEAK(__copy_to_user)
+WEAK(___copy_to_user)
 
 #include "copy_template.S"
 
-ENDPROC(__copy_to_user)
+ENDPROC(___copy_to_user)
 ENDPROC(__copy_to_user_std)
 
 	.pushsection .fixup,"ax"
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/lib/csumpartialcopyuser.S linux-4.0.9-pax/arch/arm/lib/csumpartialcopyuser.S
--- linux-4.0.9/arch/arm/lib/csumpartialcopyuser.S	2015-03-18 15:21:50.176349252 +0100
+++ linux-4.0.9-pax/arch/arm/lib/csumpartialcopyuser.S	2015-04-15 12:13:52.854318625 +0200
@@ -57,8 +57,8 @@
  *  Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT
  */
 
-#define FN_ENTRY	ENTRY(csum_partial_copy_from_user)
-#define FN_EXIT		ENDPROC(csum_partial_copy_from_user)
+#define FN_ENTRY	ENTRY(__csum_partial_copy_from_user)
+#define FN_EXIT		ENDPROC(__csum_partial_copy_from_user)
 
 #include "csumpartialcopygeneric.S"
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/lib/delay.c linux-4.0.9-pax/arch/arm/lib/delay.c
--- linux-4.0.9/arch/arm/lib/delay.c	2015-03-18 15:21:50.176349252 +0100
+++ linux-4.0.9-pax/arch/arm/lib/delay.c	2015-04-15 12:13:52.854318625 +0200
@@ -29,7 +29,7 @@
 /*
  * Default to the loop-based delay implementation.
  */
-struct arm_delay_ops arm_delay_ops = {
+struct arm_delay_ops arm_delay_ops __read_only = {
 	.delay		= __loop_delay,
 	.const_udelay	= __loop_const_udelay,
 	.udelay		= __loop_udelay,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/lib/uaccess_with_memcpy.c linux-4.0.9-pax/arch/arm/lib/uaccess_with_memcpy.c
--- linux-4.0.9/arch/arm/lib/uaccess_with_memcpy.c	2015-03-18 15:21:50.176349252 +0100
+++ linux-4.0.9-pax/arch/arm/lib/uaccess_with_memcpy.c	2015-04-15 12:13:52.854318625 +0200
@@ -136,7 +136,7 @@ out:
 }
 
 unsigned long
-__copy_to_user(void __user *to, const void *from, unsigned long n)
+___copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	/*
 	 * This test is stubbed out of the main function above to keep
@@ -190,7 +190,7 @@ out:
 	return n;
 }
 
-unsigned long __clear_user(void __user *addr, unsigned long n)
+unsigned long ___clear_user(void __user *addr, unsigned long n)
 {
 	/* See rational for this in __copy_to_user() above. */
 	if (n < 64)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-exynos/suspend.c linux-4.0.9-pax/arch/arm/mach-exynos/suspend.c
--- linux-4.0.9/arch/arm/mach-exynos/suspend.c	2015-06-29 23:02:28.010445600 +0200
+++ linux-4.0.9-pax/arch/arm/mach-exynos/suspend.c	2015-06-29 23:02:33.422445588 +0200
@@ -18,6 +18,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/cpu_pm.h>
 #include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/irqchip/arm-gic.h>
 #include <linux/err.h>
 #include <linux/regulator/machine.h>
@@ -635,8 +636,10 @@ void __init exynos_pm_init(void)
 	tmp |= pm_data->wake_disable_mask;
 	pmu_raw_writel(tmp, S5P_WAKEUP_MASK);
 
-	exynos_pm_syscore_ops.suspend	= pm_data->pm_suspend;
-	exynos_pm_syscore_ops.resume	= pm_data->pm_resume;
+	pax_open_kernel();
+	*(void **)&exynos_pm_syscore_ops.suspend	= pm_data->pm_suspend;
+	*(void **)&exynos_pm_syscore_ops.resume	= pm_data->pm_resume;
+	pax_close_kernel();
 
 	register_syscore_ops(&exynos_pm_syscore_ops);
 	suspend_set_ops(&exynos_suspend_ops);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-keystone/keystone.c linux-4.0.9-pax/arch/arm/mach-keystone/keystone.c
--- linux-4.0.9/arch/arm/mach-keystone/keystone.c	2015-04-13 11:20:51.138618033 +0200
+++ linux-4.0.9-pax/arch/arm/mach-keystone/keystone.c	2015-04-15 12:13:52.854318625 +0200
@@ -27,7 +27,7 @@
 
 #include "keystone.h"
 
-static struct notifier_block platform_nb;
+static notifier_block_no_const platform_nb;
 static unsigned long keystone_dma_pfn_offset __read_mostly;
 
 static int keystone_platform_notifier(struct notifier_block *nb,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-mvebu/coherency.c linux-4.0.9-pax/arch/arm/mach-mvebu/coherency.c
--- linux-4.0.9/arch/arm/mach-mvebu/coherency.c	2015-04-13 11:20:51.206618029 +0200
+++ linux-4.0.9-pax/arch/arm/mach-mvebu/coherency.c	2015-04-15 12:13:52.854318625 +0200
@@ -117,7 +117,7 @@ static void __init armada_370_coherency_
 
 /*
  * This ioremap hook is used on Armada 375/38x to ensure that PCIe
- * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
+ * memory areas are mapped as MT_UNCACHED_RW instead of MT_DEVICE. This
  * is needed as a workaround for a deadlock issue between the PCIe
  * interface and the cache controller.
  */
@@ -130,7 +130,7 @@ armada_pcie_wa_ioremap_caller(phys_addr_
 	mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
 
 	if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
-		mtype = MT_UNCACHED;
+		mtype = MT_UNCACHED_RW;
 
 	return __arm_ioremap_caller(phys_addr, size, mtype, caller);
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-omap2/board-n8x0.c linux-4.0.9-pax/arch/arm/mach-omap2/board-n8x0.c
--- linux-4.0.9/arch/arm/mach-omap2/board-n8x0.c	2015-03-18 15:21:50.180349252 +0100
+++ linux-4.0.9-pax/arch/arm/mach-omap2/board-n8x0.c	2015-04-15 12:13:52.854318625 +0200
@@ -569,7 +569,7 @@ static int n8x0_menelaus_late_init(struc
 }
 #endif
 
-struct menelaus_platform_data n8x0_menelaus_platform_data __initdata = {
+struct menelaus_platform_data n8x0_menelaus_platform_data __initconst = {
 	.late_init = n8x0_menelaus_late_init,
 };
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-omap2/omap_device.c linux-4.0.9-pax/arch/arm/mach-omap2/omap_device.c
--- linux-4.0.9/arch/arm/mach-omap2/omap_device.c	2015-03-18 15:21:50.180349252 +0100
+++ linux-4.0.9-pax/arch/arm/mach-omap2/omap_device.c	2015-04-15 12:13:52.854318625 +0200
@@ -510,7 +510,7 @@ void omap_device_delete(struct omap_devi
 struct platform_device __init *omap_device_build(const char *pdev_name,
 						 int pdev_id,
 						 struct omap_hwmod *oh,
-						 void *pdata, int pdata_len)
+						 const void *pdata, int pdata_len)
 {
 	struct omap_hwmod *ohs[] = { oh };
 
@@ -538,7 +538,7 @@ struct platform_device __init *omap_devi
 struct platform_device __init *omap_device_build_ss(const char *pdev_name,
 						    int pdev_id,
 						    struct omap_hwmod **ohs,
-						    int oh_cnt, void *pdata,
+						    int oh_cnt, const void *pdata,
 						    int pdata_len)
 {
 	int ret = -ENOMEM;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-omap2/omap_device.h linux-4.0.9-pax/arch/arm/mach-omap2/omap_device.h
--- linux-4.0.9/arch/arm/mach-omap2/omap_device.h	2015-03-18 15:21:50.180349252 +0100
+++ linux-4.0.9-pax/arch/arm/mach-omap2/omap_device.h	2015-04-15 12:13:52.854318625 +0200
@@ -72,12 +72,12 @@ int omap_device_idle(struct platform_dev
 /* Core code interface */
 
 struct platform_device *omap_device_build(const char *pdev_name, int pdev_id,
-					  struct omap_hwmod *oh, void *pdata,
+					  struct omap_hwmod *oh, const void *pdata,
 					  int pdata_len);
 
 struct platform_device *omap_device_build_ss(const char *pdev_name, int pdev_id,
 					 struct omap_hwmod **oh, int oh_cnt,
-					 void *pdata, int pdata_len);
+					 const void *pdata, int pdata_len);
 
 struct omap_device *omap_device_alloc(struct platform_device *pdev,
 				      struct omap_hwmod **ohs, int oh_cnt);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-omap2/omap_hwmod.c linux-4.0.9-pax/arch/arm/mach-omap2/omap_hwmod.c
--- linux-4.0.9/arch/arm/mach-omap2/omap_hwmod.c	2015-04-13 11:20:51.506618013 +0200
+++ linux-4.0.9-pax/arch/arm/mach-omap2/omap_hwmod.c	2015-04-15 12:13:52.858318625 +0200
@@ -193,10 +193,10 @@ struct omap_hwmod_soc_ops {
 	int (*init_clkdm)(struct omap_hwmod *oh);
 	void (*update_context_lost)(struct omap_hwmod *oh);
 	int (*get_context_lost)(struct omap_hwmod *oh);
-};
+} __no_const;
 
 /* soc_ops: adapts the omap_hwmod code to the currently-booted SoC */
-static struct omap_hwmod_soc_ops soc_ops;
+static struct omap_hwmod_soc_ops soc_ops __read_only;
 
 /* omap_hwmod_list contains all registered struct omap_hwmods */
 static LIST_HEAD(omap_hwmod_list);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-omap2/omap-mpuss-lowpower.c linux-4.0.9-pax/arch/arm/mach-omap2/omap-mpuss-lowpower.c
--- linux-4.0.9/arch/arm/mach-omap2/omap-mpuss-lowpower.c	2015-03-18 15:21:50.180349252 +0100
+++ linux-4.0.9-pax/arch/arm/mach-omap2/omap-mpuss-lowpower.c	2015-04-15 12:13:52.858318625 +0200
@@ -86,7 +86,7 @@ struct cpu_pm_ops {
 	void (*resume)(void);
 	void (*scu_prepare)(unsigned int cpu_id, unsigned int cpu_state);
 	void (*hotplug_restart)(void);
-};
+} __no_const;
 
 static DEFINE_PER_CPU(struct omap4_cpu_pm_info, omap4_pm_info);
 static struct powerdomain *mpuss_pd;
@@ -105,7 +105,7 @@ static void dummy_cpu_resume(void)
 static void dummy_scu_prepare(unsigned int cpu_id, unsigned int cpu_state)
 {}
 
-struct cpu_pm_ops omap_pm_ops = {
+static struct cpu_pm_ops omap_pm_ops __read_only = {
 	.finish_suspend		= default_finish_suspend,
 	.resume			= dummy_cpu_resume,
 	.scu_prepare		= dummy_scu_prepare,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-omap2/omap-smp.c linux-4.0.9-pax/arch/arm/mach-omap2/omap-smp.c
--- linux-4.0.9/arch/arm/mach-omap2/omap-smp.c	2015-03-18 15:21:50.180349252 +0100
+++ linux-4.0.9-pax/arch/arm/mach-omap2/omap-smp.c	2015-04-15 12:13:52.858318625 +0200
@@ -19,6 +19,7 @@
 #include <linux/device.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/irqchip/arm-gic.h>
 
 #include <asm/smp_scu.h>
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-omap2/omap-wakeupgen.c linux-4.0.9-pax/arch/arm/mach-omap2/omap-wakeupgen.c
--- linux-4.0.9/arch/arm/mach-omap2/omap-wakeupgen.c	2015-03-18 15:21:50.180349252 +0100
+++ linux-4.0.9-pax/arch/arm/mach-omap2/omap-wakeupgen.c	2015-04-15 12:13:52.858318625 +0200
@@ -344,7 +344,7 @@ static int irq_cpu_hotplug_notify(struct
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __refdata irq_hotplug_notifier = {
+static struct notifier_block irq_hotplug_notifier = {
 	.notifier_call = irq_cpu_hotplug_notify,
 };
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-omap2/powerdomains43xx_data.c linux-4.0.9-pax/arch/arm/mach-omap2/powerdomains43xx_data.c
--- linux-4.0.9/arch/arm/mach-omap2/powerdomains43xx_data.c	2015-03-18 15:21:50.180349252 +0100
+++ linux-4.0.9-pax/arch/arm/mach-omap2/powerdomains43xx_data.c	2015-04-15 12:13:52.858318625 +0200
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <asm/pgtable.h>
 
 #include "powerdomain.h"
 
@@ -129,7 +130,9 @@ static int am43xx_check_vcvp(void)
 
 void __init am43xx_powerdomains_init(void)
 {
-	omap4_pwrdm_operations.pwrdm_has_voltdm = am43xx_check_vcvp;
+	pax_open_kernel();
+	*(void **)&omap4_pwrdm_operations.pwrdm_has_voltdm = am43xx_check_vcvp;
+	pax_close_kernel();
 	pwrdm_register_platform_funcs(&omap4_pwrdm_operations);
 	pwrdm_register_pwrdms(powerdomains_am43xx);
 	pwrdm_complete_init();
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-omap2/wd_timer.c linux-4.0.9-pax/arch/arm/mach-omap2/wd_timer.c
--- linux-4.0.9/arch/arm/mach-omap2/wd_timer.c	2015-03-18 15:21:50.180349252 +0100
+++ linux-4.0.9-pax/arch/arm/mach-omap2/wd_timer.c	2015-04-15 12:13:52.858318625 +0200
@@ -110,7 +110,9 @@ static int __init omap_init_wdt(void)
 	struct omap_hwmod *oh;
 	char *oh_name = "wd_timer2";
 	char *dev_name = "omap_wdt";
-	struct omap_wd_timer_platform_data pdata;
+	static struct omap_wd_timer_platform_data pdata = {
+		.read_reset_sources = prm_read_reset_sources
+	};
 
 	if (!cpu_class_is_omap2() || of_have_populated_dt())
 		return 0;
@@ -121,8 +123,6 @@ static int __init omap_init_wdt(void)
 		return -EINVAL;
 	}
 
-	pdata.read_reset_sources = prm_read_reset_sources;
-
 	pdev = omap_device_build(dev_name, id, oh, &pdata,
 				 sizeof(struct omap_wd_timer_platform_data));
 	WARN(IS_ERR(pdev), "Can't build omap_device for %s:%s.\n",
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-tegra/cpuidle-tegra20.c linux-4.0.9-pax/arch/arm/mach-tegra/cpuidle-tegra20.c
--- linux-4.0.9/arch/arm/mach-tegra/cpuidle-tegra20.c	2015-07-10 20:07:37.535036135 +0200
+++ linux-4.0.9-pax/arch/arm/mach-tegra/cpuidle-tegra20.c	2015-07-10 20:07:47.707035591 +0200
@@ -178,7 +178,7 @@ static int tegra20_idle_lp2_coupled(stru
 	bool entered_lp2 = false;
 
 	if (tegra_pending_sgi())
-		ACCESS_ONCE(abort_flag) = true;
+		ACCESS_ONCE_RW(abort_flag) = true;
 
 	cpuidle_coupled_parallel_barrier(dev, &abort_barrier);
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-tegra/irq.c linux-4.0.9-pax/arch/arm/mach-tegra/irq.c
--- linux-4.0.9/arch/arm/mach-tegra/irq.c	2015-03-18 15:21:50.180349252 +0100
+++ linux-4.0.9-pax/arch/arm/mach-tegra/irq.c	2015-04-15 12:13:52.858318625 +0200
@@ -20,6 +20,7 @@
 #include <linux/cpu_pm.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/irqchip/arm-gic.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-ux500/pm.c linux-4.0.9-pax/arch/arm/mach-ux500/pm.c
--- linux-4.0.9/arch/arm/mach-ux500/pm.c	2015-03-18 15:21:50.180349252 +0100
+++ linux-4.0.9-pax/arch/arm/mach-ux500/pm.c	2015-04-15 12:13:52.858318625 +0200
@@ -10,6 +10,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/irq.h>
 #include <linux/irqchip/arm-gic.h>
 #include <linux/delay.h>
 #include <linux/io.h>
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-ux500/setup.h linux-4.0.9-pax/arch/arm/mach-ux500/setup.h
--- linux-4.0.9/arch/arm/mach-ux500/setup.h	2015-03-18 15:21:50.180349252 +0100
+++ linux-4.0.9-pax/arch/arm/mach-ux500/setup.h	2015-04-15 12:13:52.858318625 +0200
@@ -33,13 +33,6 @@ extern void ux500_timer_init(void);
 	.type		= MT_DEVICE,		\
 }
 
-#define __MEM_DEV_DESC(x, sz)	{		\
-	.virtual	= IO_ADDRESS(x),	\
-	.pfn		= __phys_to_pfn(x),	\
-	.length		= sz,			\
-	.type		= MT_MEMORY_RWX,		\
-}
-
 extern struct smp_operations ux500_smp_ops;
 extern void ux500_cpu_die(unsigned int cpu);
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mach-zynq/platsmp.c linux-4.0.9-pax/arch/arm/mach-zynq/platsmp.c
--- linux-4.0.9/arch/arm/mach-zynq/platsmp.c	2015-03-18 15:21:50.180349252 +0100
+++ linux-4.0.9-pax/arch/arm/mach-zynq/platsmp.c	2015-04-15 12:13:52.858318625 +0200
@@ -24,6 +24,7 @@
 #include <linux/io.h>
 #include <asm/cacheflush.h>
 #include <asm/smp_scu.h>
+#include <linux/irq.h>
 #include <linux/irqchip/arm-gic.h>
 #include "common.h"
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mm/alignment.c linux-4.0.9-pax/arch/arm/mm/alignment.c
--- linux-4.0.9/arch/arm/mm/alignment.c	2015-03-18 15:21:50.184349252 +0100
+++ linux-4.0.9-pax/arch/arm/mm/alignment.c	2015-04-15 12:13:52.858318625 +0200
@@ -216,10 +216,12 @@ union offset_union {
 #define __get16_unaligned_check(ins,val,addr)			\
 	do {							\
 		unsigned int err = 0, v, a = addr;		\
+		pax_open_userland();				\
 		__get8_unaligned_check(ins,v,a,err);		\
 		val =  v << ((BE) ? 8 : 0);			\
 		__get8_unaligned_check(ins,v,a,err);		\
 		val |= v << ((BE) ? 0 : 8);			\
+		pax_close_userland();				\
 		if (err)					\
 			goto fault;				\
 	} while (0)
@@ -233,6 +235,7 @@ union offset_union {
 #define __get32_unaligned_check(ins,val,addr)			\
 	do {							\
 		unsigned int err = 0, v, a = addr;		\
+		pax_open_userland();				\
 		__get8_unaligned_check(ins,v,a,err);		\
 		val =  v << ((BE) ? 24 :  0);			\
 		__get8_unaligned_check(ins,v,a,err);		\
@@ -241,6 +244,7 @@ union offset_union {
 		val |= v << ((BE) ?  8 : 16);			\
 		__get8_unaligned_check(ins,v,a,err);		\
 		val |= v << ((BE) ?  0 : 24);			\
+		pax_close_userland();				\
 		if (err)					\
 			goto fault;				\
 	} while (0)
@@ -254,6 +258,7 @@ union offset_union {
 #define __put16_unaligned_check(ins,val,addr)			\
 	do {							\
 		unsigned int err = 0, v = val, a = addr;	\
+		pax_open_userland();				\
 		__asm__( FIRST_BYTE_16				\
 	 ARM(	"1:	"ins"	%1, [%2], #1\n"	)		\
 	 THUMB(	"1:	"ins"	%1, [%2]\n"	)		\
@@ -273,6 +278,7 @@ union offset_union {
 		"	.popsection\n"				\
 		: "=r" (err), "=&r" (v), "=&r" (a)		\
 		: "0" (err), "1" (v), "2" (a));			\
+		pax_close_userland();				\
 		if (err)					\
 			goto fault;				\
 	} while (0)
@@ -286,6 +292,7 @@ union offset_union {
 #define __put32_unaligned_check(ins,val,addr)			\
 	do {							\
 		unsigned int err = 0, v = val, a = addr;	\
+		pax_open_userland();				\
 		__asm__( FIRST_BYTE_32				\
 	 ARM(	"1:	"ins"	%1, [%2], #1\n"	)		\
 	 THUMB(	"1:	"ins"	%1, [%2]\n"	)		\
@@ -315,6 +322,7 @@ union offset_union {
 		"	.popsection\n"				\
 		: "=r" (err), "=&r" (v), "=&r" (a)		\
 		: "0" (err), "1" (v), "2" (a));			\
+		pax_close_userland();				\
 		if (err)					\
 			goto fault;				\
 	} while (0)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mm/cache-l2x0.c linux-4.0.9-pax/arch/arm/mm/cache-l2x0.c
--- linux-4.0.9/arch/arm/mm/cache-l2x0.c	2015-04-13 11:20:52.250617974 +0200
+++ linux-4.0.9-pax/arch/arm/mm/cache-l2x0.c	2015-04-15 12:13:52.858318625 +0200
@@ -43,7 +43,7 @@ struct l2c_init_data {
 	void (*save)(void __iomem *);
 	void (*configure)(void __iomem *);
 	struct outer_cache_fns outer_cache;
-};
+} __do_const;
 
 #define CACHE_LINE_SIZE		32
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mm/context.c linux-4.0.9-pax/arch/arm/mm/context.c
--- linux-4.0.9/arch/arm/mm/context.c	2015-03-18 15:21:50.184349252 +0100
+++ linux-4.0.9-pax/arch/arm/mm/context.c	2015-04-15 12:13:52.858318625 +0200
@@ -43,7 +43,7 @@
 #define NUM_USER_ASIDS		ASID_FIRST_VERSION
 
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
+static atomic64_unchecked_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
 static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS);
 
 static DEFINE_PER_CPU(atomic64_t, active_asids);
@@ -178,7 +178,7 @@ static u64 new_context(struct mm_struct
 {
 	static u32 cur_idx = 1;
 	u64 asid = atomic64_read(&mm->context.id);
-	u64 generation = atomic64_read(&asid_generation);
+	u64 generation = atomic64_read_unchecked(&asid_generation);
 
 	if (asid != 0) {
 		/*
@@ -208,7 +208,7 @@ static u64 new_context(struct mm_struct
 	 */
 	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
 	if (asid == NUM_USER_ASIDS) {
-		generation = atomic64_add_return(ASID_FIRST_VERSION,
+		generation = atomic64_add_return_unchecked(ASID_FIRST_VERSION,
 						 &asid_generation);
 		flush_context(cpu);
 		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
@@ -240,14 +240,14 @@ void check_and_switch_context(struct mm_
 	cpu_set_reserved_ttbr0();
 
 	asid = atomic64_read(&mm->context.id);
-	if (!((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS)
+	if (!((asid ^ atomic64_read_unchecked(&asid_generation)) >> ASID_BITS)
 	    && atomic64_xchg(&per_cpu(active_asids, cpu), asid))
 		goto switch_mm_fastpath;
 
 	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
 	/* Check that our ASID belongs to the current generation. */
 	asid = atomic64_read(&mm->context.id);
-	if ((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) {
+	if ((asid ^ atomic64_read_unchecked(&asid_generation)) >> ASID_BITS) {
 		asid = new_context(mm, cpu);
 		atomic64_set(&mm->context.id, asid);
 	}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mm/fault.c linux-4.0.9-pax/arch/arm/mm/fault.c
--- linux-4.0.9/arch/arm/mm/fault.c	2015-04-13 11:20:52.262617973 +0200
+++ linux-4.0.9-pax/arch/arm/mm/fault.c	2015-06-16 23:14:51.813008900 +0200
@@ -25,6 +25,7 @@
 #include <asm/system_misc.h>
 #include <asm/system_info.h>
 #include <asm/tlbflush.h>
+#include <asm/sections.h>
 
 #include "fault.h"
 
@@ -138,6 +139,22 @@ __do_kernel_fault(struct mm_struct *mm,
 	if (fixup_exception(regs))
 		return;
 
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	if (addr < TASK_SIZE)
+		printk(KERN_EMERG "PAX: %s:%d, uid/euid: %u/%u, attempted to access userland memory at %08lx\n", current->comm, task_pid_nr(current),
+				from_kuid_munged(&init_user_ns, current_uid()), from_kuid_munged(&init_user_ns, current_euid()), addr);
+#endif
+
+#ifdef CONFIG_PAX_KERNEXEC
+	if ((fsr & FSR_WRITE) &&
+	    (((unsigned long)_stext <= addr && addr < init_mm.end_code) ||
+	     (MODULES_VADDR <= addr && addr < MODULES_END)))
+	{
+		printk(KERN_EMERG "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", current->comm, task_pid_nr(current),
+				from_kuid_munged(&init_user_ns, current_uid()), from_kuid_munged(&init_user_ns, current_euid()));
+	}
+#endif
+
 	/*
 	 * No handler, we'll have to terminate things with extreme prejudice.
 	 */
@@ -173,6 +190,13 @@ __do_user_fault(struct task_struct *tsk,
 	}
 #endif
 
+#ifdef CONFIG_PAX_PAGEEXEC
+	if (fsr & FSR_LNX_PF) {
+		pax_report_fault(regs, (void *)regs->ARM_pc, (void *)regs->ARM_sp);
+		do_group_exit(SIGKILL);
+	}
+#endif
+
 	tsk->thread.address = addr;
 	tsk->thread.error_code = fsr;
 	tsk->thread.trap_no = 14;
@@ -400,6 +424,33 @@ do_page_fault(unsigned long addr, unsign
 }
 #endif					/* CONFIG_MMU */
 
+#ifdef CONFIG_PAX_PAGEEXEC
+void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
+{
+	long i;
+
+	printk(KERN_ERR "PAX: bytes at PC: ");
+	for (i = 0; i < 20; i++) {
+		unsigned char c;
+		if (get_user(c, (__force unsigned char __user *)pc+i))
+			printk(KERN_CONT "?? ");
+		else
+			printk(KERN_CONT "%02x ", c);
+	}
+	printk("\n");
+
+	printk(KERN_ERR "PAX: bytes at SP-4: ");
+	for (i = -1; i < 20; i++) {
+		unsigned long c;
+		if (get_user(c, (__force unsigned long __user *)sp+i))
+			printk(KERN_CONT "???????? ");
+		else
+			printk(KERN_CONT "%08lx ", c);
+	}
+	printk("\n");
+}
+#endif
+
 /*
  * First Level Translation Fault Handler
  *
@@ -547,9 +598,18 @@ do_DataAbort(unsigned long addr, unsigne
 	const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
 	struct siginfo info;
 
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	if (addr < TASK_SIZE && is_domain_fault(fsr)) {
+		printk(KERN_EMERG "PAX: %s:%d, uid/euid: %u/%u, attempted to access userland memory at %08lx\n", current->comm, task_pid_nr(current),
+				from_kuid_munged(&init_user_ns, current_uid()), from_kuid_munged(&init_user_ns, current_euid()), addr);
+		goto die;
+	}
+#endif
+
 	if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
 		return;
 
+die:
 	pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
 		inf->name, fsr, addr);
 	show_pte(current->mm, addr);
@@ -574,15 +634,101 @@ hook_ifault_code(int nr, int (*fn)(unsig
 	ifsr_info[nr].name = name;
 }
 
+asmlinkage int sys_sigreturn(struct pt_regs *regs);
+asmlinkage int sys_rt_sigreturn(struct pt_regs *regs);
+
 asmlinkage void __exception
 do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
 {
 	const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr);
 	struct siginfo info;
+	unsigned long pc = instruction_pointer(regs);
+
+	if (user_mode(regs)) {
+		unsigned long sigpage = current->mm->context.sigpage;
+
+		if (sigpage <= pc && pc < sigpage + 7*4) {
+			if (pc < sigpage + 3*4)
+				sys_sigreturn(regs);
+			else
+				sys_rt_sigreturn(regs);
+			return;
+		}
+		if (pc == 0xffff0f60UL) {
+			/*
+			 * PaX: __kuser_cmpxchg64 emulation
+			 */
+			// TODO
+			//regs->ARM_pc = regs->ARM_lr;
+			//return;
+		}
+		if (pc == 0xffff0fa0UL) {
+			/*
+			 * PaX: __kuser_memory_barrier emulation
+			 */
+			// dmb(); implied by the exception
+			regs->ARM_pc = regs->ARM_lr;
+			return;
+		}
+		if (pc == 0xffff0fc0UL) {
+			/*
+			 * PaX: __kuser_cmpxchg emulation
+			 */
+			// TODO
+			//long new;
+			//int op;
+
+			//op = FUTEX_OP_SET << 28;
+			//new = futex_atomic_op_inuser(op, regs->ARM_r2);
+			//regs->ARM_r0 = old != new;
+			//regs->ARM_pc = regs->ARM_lr;
+			//return;
+		}
+		if (pc == 0xffff0fe0UL) {
+			/*
+			 * PaX: __kuser_get_tls emulation
+			 */
+			regs->ARM_r0 = current_thread_info()->tp_value[0];
+			regs->ARM_pc = regs->ARM_lr;
+			return;
+		}
+	}
+
+#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+	else if (is_domain_fault(ifsr) || is_xn_fault(ifsr)) {
+		printk(KERN_EMERG "PAX: %s:%d, uid/euid: %u/%u, attempted to execute %s memory at %08lx\n",
+				current->comm, task_pid_nr(current),
+				from_kuid_munged(&init_user_ns, current_uid()),
+				from_kuid_munged(&init_user_ns, current_euid()),
+				pc >= TASK_SIZE ? "non-executable kernel" : "userland", pc);
+		goto die;
+	}
+#endif
+
+#ifdef CONFIG_PAX_REFCOUNT
+	if (fsr_fs(ifsr) == FAULT_CODE_DEBUG) {
+#ifdef CONFIG_THUMB2_KERNEL
+		unsigned short bkpt;
+
+		if (!probe_kernel_address(pc, bkpt) && cpu_to_le16(bkpt) == 0xbef1) {
+#else
+		unsigned int bkpt;
+
+		if (!probe_kernel_address(pc, bkpt) && cpu_to_le32(bkpt) == 0xe12f1073) {
+#endif
+			current->thread.error_code = ifsr;
+			current->thread.trap_no = 0;
+			pax_report_refcount_overflow(regs);
+			fixup_exception(regs);
+			return;
+		}
+	}
+#endif
 
 	if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
 		return;
 
+die:
 	pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
 		inf->name, ifsr, addr);
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mm/fault.h linux-4.0.9-pax/arch/arm/mm/fault.h
--- linux-4.0.9/arch/arm/mm/fault.h	2015-03-18 15:21:50.184349252 +0100
+++ linux-4.0.9-pax/arch/arm/mm/fault.h	2015-04-15 12:13:52.858318625 +0200
@@ -3,6 +3,7 @@
 
 /*
  * Fault status register encodings.  We steal bit 31 for our own purposes.
+ * Set when the FSR value is from an instruction fault.
  */
 #define FSR_LNX_PF		(1 << 31)
 #define FSR_WRITE		(1 << 11)
@@ -22,6 +23,17 @@ static inline int fsr_fs(unsigned int fs
 }
 #endif
 
+/* valid for LPAE and !LPAE */
+static inline int is_xn_fault(unsigned int fsr)
+{
+	return ((fsr_fs(fsr) & 0x3c) == 0xc);
+}
+
+static inline int is_domain_fault(unsigned int fsr)
+{
+	return ((fsr_fs(fsr) & 0xD) == 0x9);
+}
+
 void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
 unsigned long search_exception_table(unsigned long addr);
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mm/init.c linux-4.0.9-pax/arch/arm/mm/init.c
--- linux-4.0.9/arch/arm/mm/init.c	2015-04-13 11:20:52.278617972 +0200
+++ linux-4.0.9-pax/arch/arm/mm/init.c	2015-04-15 12:13:52.858318625 +0200
@@ -755,7 +755,46 @@ void free_tcmmem(void)
 {
 #ifdef CONFIG_HAVE_TCM
 	extern char __tcm_start, __tcm_end;
+#endif
+
+#ifdef CONFIG_PAX_KERNEXEC
+	unsigned long addr;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	int cpu_arch = cpu_architecture();
+	unsigned int cr = get_cr();
+
+	if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
+		/* make pages tables, etc before .text NX */
+		for (addr = PAGE_OFFSET; addr < (unsigned long)_stext; addr += SECTION_SIZE) {
+			pgd = pgd_offset_k(addr);
+			pud = pud_offset(pgd, addr);
+			pmd = pmd_offset(pud, addr);
+			__section_update(pmd, addr, PMD_SECT_XN);
+		}
+		/* make init NX */
+		for (addr = (unsigned long)__init_begin; addr < (unsigned long)_sdata; addr += SECTION_SIZE) {
+			pgd = pgd_offset_k(addr);
+			pud = pud_offset(pgd, addr);
+			pmd = pmd_offset(pud, addr);
+			__section_update(pmd, addr, PMD_SECT_XN);
+		}
+		/* make kernel code/rodata RX */
+		for (addr = (unsigned long)_stext; addr < (unsigned long)__init_begin; addr += SECTION_SIZE) {
+			pgd = pgd_offset_k(addr);
+			pud = pud_offset(pgd, addr);
+			pmd = pmd_offset(pud, addr);
+#ifdef CONFIG_ARM_LPAE
+			__section_update(pmd, addr, PMD_SECT_RDONLY);
+#else
+			__section_update(pmd, addr, PMD_SECT_APX|PMD_SECT_AP_WRITE);
+#endif
+		}
+	}
+#endif
 
+#ifdef CONFIG_HAVE_TCM
 	poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start);
 	free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link");
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mm/ioremap.c linux-4.0.9-pax/arch/arm/mm/ioremap.c
--- linux-4.0.9/arch/arm/mm/ioremap.c	2015-03-18 15:21:50.184349252 +0100
+++ linux-4.0.9-pax/arch/arm/mm/ioremap.c	2015-04-15 12:13:52.858318625 +0200
@@ -392,9 +392,9 @@ __arm_ioremap_exec(phys_addr_t phys_addr
 	unsigned int mtype;
 
 	if (cached)
-		mtype = MT_MEMORY_RWX;
+		mtype = MT_MEMORY_RX;
 	else
-		mtype = MT_MEMORY_RWX_NONCACHED;
+		mtype = MT_MEMORY_RX_NONCACHED;
 
 	return __arm_ioremap_caller(phys_addr, size, mtype,
 			__builtin_return_address(0));
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mm/Kconfig linux-4.0.9-pax/arch/arm/mm/Kconfig
--- linux-4.0.9/arch/arm/mm/Kconfig	2015-04-13 11:20:52.250617974 +0200
+++ linux-4.0.9-pax/arch/arm/mm/Kconfig	2015-04-15 12:13:52.858318625 +0200
@@ -446,6 +446,7 @@ config CPU_32v5
 
 config CPU_32v6
 	bool
+	select CPU_USE_DOMAINS if CPU_V6 && MMU && !PAX_KERNEXEC && !PAX_MEMORY_UDEREF
 	select TLS_REG_EMUL if !CPU_32v6K && !MMU
 
 config CPU_32v6K
@@ -600,6 +601,7 @@ config CPU_CP15_MPU
 
 config CPU_USE_DOMAINS
 	bool
+	depends on !ARM_LPAE && !PAX_KERNEXEC && !PAX_MEMORY_UDEREF
 	help
 	  This option enables or disables the use of domain switching
 	  via the set_fs() function.
@@ -798,7 +800,7 @@ config NEED_KUSER_HELPERS
 
 config KUSER_HELPERS
 	bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS
-	depends on MMU
+	depends on MMU && !(CPU_V6 || CPU_V6K || CPU_V7)
 	default y
 	help
 	  Warning: disabling this option may break user programs.
@@ -812,7 +814,7 @@ config KUSER_HELPERS
 	  See Documentation/arm/kernel_user_helpers.txt for details.
 
 	  However, the fixed address nature of these helpers can be used
-	  by ROP (return orientated programming) authors when creating
+	  by ROP (Return Oriented Programming) authors when creating
 	  exploits.
 
 	  If all of the binaries and libraries which run on your platform
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mm/mmap.c linux-4.0.9-pax/arch/arm/mm/mmap.c
--- linux-4.0.9/arch/arm/mm/mmap.c	2015-03-18 15:21:50.184349252 +0100
+++ linux-4.0.9-pax/arch/arm/mm/mmap.c	2015-04-15 12:13:52.862318625 +0200
@@ -81,6 +81,10 @@ arch_get_unmapped_area(struct file *filp
 	if (len > TASK_SIZE)
 		return -ENOMEM;
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	if (addr) {
 		if (do_align)
 			addr = COLOUR_ALIGN(addr, pgoff);
@@ -88,8 +92,7 @@ arch_get_unmapped_area(struct file *filp
 			addr = PAGE_ALIGN(addr);
 
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len))
 			return addr;
 	}
 
@@ -132,6 +135,10 @@ arch_get_unmapped_area_topdown(struct fi
 		return addr;
 	}
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	/* requesting a specific address */
 	if (addr) {
 		if (do_align)
@@ -139,8 +146,7 @@ arch_get_unmapped_area_topdown(struct fi
 		else
 			addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-				(!vma || addr + len <= vma->vm_start))
+		if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len))
 			return addr;
 	}
 
@@ -173,6 +179,10 @@ void arch_pick_mmap_layout(struct mm_str
 {
 	unsigned long random_factor = 0UL;
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	/* 8 bits of randomness in 20 address space bits */
 	if ((current->flags & PF_RANDOMIZE) &&
 	    !(current->personality & ADDR_NO_RANDOMIZE))
@@ -180,9 +190,21 @@ void arch_pick_mmap_layout(struct mm_str
 
 	if (mmap_is_legacy()) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			mm->mmap_base += mm->delta_mmap;
+#endif
+
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
 		mm->mmap_base = mmap_base(random_factor);
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			mm->mmap_base -= mm->delta_mmap + mm->delta_stack;
+#endif
+
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/mm/mmu.c linux-4.0.9-pax/arch/arm/mm/mmu.c
--- linux-4.0.9/arch/arm/mm/mmu.c	2015-06-15 16:02:22.187183858 +0200
+++ linux-4.0.9-pax/arch/arm/mm/mmu.c	2015-06-15 16:02:33.011183834 +0200
@@ -41,6 +41,22 @@
 #include "mm.h"
 #include "tcm.h"
 
+#if defined(CONFIG_CPU_USE_DOMAINS) || defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+void modify_domain(unsigned int dom, unsigned int type)
+{
+	struct thread_info *thread = current_thread_info();
+	unsigned int domain = thread->cpu_domain;
+	/*
+	 * DOMAIN_MANAGER might be defined to some other value,
+	 * use the arch-defined constant
+	 */
+	domain &= ~domain_val(dom, 3);
+	thread->cpu_domain = domain | domain_val(dom, type);
+	set_domain(thread->cpu_domain);
+}
+EXPORT_SYMBOL(modify_domain);
+#endif
+
 /*
  * empty_zero_page is a special page that is used for
  * zero-initialized data and COW.
@@ -242,7 +258,15 @@ __setup("noalign", noalign_setup);
 #define PROT_PTE_S2_DEVICE	PROT_PTE_DEVICE
 #define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE
 
-static struct mem_type mem_types[] = {
+#ifdef CONFIG_PAX_KERNEXEC
+#define L_PTE_KERNEXEC		L_PTE_RDONLY
+#define PMD_SECT_KERNEXEC	PMD_SECT_RDONLY
+#else
+#define L_PTE_KERNEXEC		L_PTE_DIRTY
+#define PMD_SECT_KERNEXEC	PMD_SECT_AP_WRITE
+#endif
+
+static struct mem_type mem_types[] __read_only = {
 	[MT_DEVICE] = {		  /* Strongly ordered / ARMv6 shared device */
 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
 				  L_PTE_SHARED,
@@ -271,19 +295,19 @@ static struct mem_type mem_types[] = {
 		.prot_sect	= PROT_SECT_DEVICE,
 		.domain		= DOMAIN_IO,
 	},
-	[MT_UNCACHED] = {
+	[MT_UNCACHED_RW] = {
 		.prot_pte	= PROT_PTE_DEVICE,
 		.prot_l1	= PMD_TYPE_TABLE,
 		.prot_sect	= PMD_TYPE_SECT | PMD_SECT_XN,
 		.domain		= DOMAIN_IO,
 	},
-	[MT_CACHECLEAN] = {
-		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
+	[MT_CACHECLEAN_RO] = {
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_RDONLY,
 		.domain    = DOMAIN_KERNEL,
 	},
 #ifndef CONFIG_ARM_LPAE
-	[MT_MINICLEAN] = {
-		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
+	[MT_MINICLEAN_RO] = {
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_MINICACHE | PMD_SECT_XN | PMD_SECT_RDONLY,
 		.domain    = DOMAIN_KERNEL,
 	},
 #endif
@@ -291,15 +315,15 @@ static struct mem_type mem_types[] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 				L_PTE_RDONLY,
 		.prot_l1   = PMD_TYPE_TABLE,
-		.domain    = DOMAIN_USER,
+		.domain    = DOMAIN_VECTORS,
 	},
 	[MT_HIGH_VECTORS] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 				L_PTE_USER | L_PTE_RDONLY,
 		.prot_l1   = PMD_TYPE_TABLE,
-		.domain    = DOMAIN_USER,
+		.domain    = DOMAIN_VECTORS,
 	},
-	[MT_MEMORY_RWX] = {
+	[__MT_MEMORY_RWX] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
 		.prot_l1   = PMD_TYPE_TABLE,
 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
@@ -312,17 +336,30 @@ static struct mem_type mem_types[] = {
 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 		.domain    = DOMAIN_KERNEL,
 	},
-	[MT_ROM] = {
-		.prot_sect = PMD_TYPE_SECT,
+	[MT_MEMORY_RX] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_KERNEXEC,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_KERNEXEC,
+		.domain	   = DOMAIN_KERNEL,
+	},
+	[MT_ROM_RX] = {
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_RDONLY,
 		.domain    = DOMAIN_KERNEL,
 	},
-	[MT_MEMORY_RWX_NONCACHED] = {
+	[MT_MEMORY_RW_NONCACHED] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 				L_PTE_MT_BUFFERABLE,
 		.prot_l1   = PMD_TYPE_TABLE,
 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 		.domain    = DOMAIN_KERNEL,
 	},
+	[MT_MEMORY_RX_NONCACHED] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_KERNEXEC |
+				L_PTE_MT_BUFFERABLE,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_KERNEXEC,
+		.domain    = DOMAIN_KERNEL,
+	},
 	[MT_MEMORY_RW_DTCM] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 				L_PTE_XN,
@@ -330,9 +367,10 @@ static struct mem_type mem_types[] = {
 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
 		.domain    = DOMAIN_KERNEL,
 	},
-	[MT_MEMORY_RWX_ITCM] = {
-		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
+	[MT_MEMORY_RX_ITCM] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_KERNEXEC,
 		.prot_l1   = PMD_TYPE_TABLE,
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_KERNEXEC,
 		.domain    = DOMAIN_KERNEL,
 	},
 	[MT_MEMORY_RW_SO] = {
@@ -544,9 +582,14 @@ static void __init build_mem_type_table(
 		 * Mark cache clean areas and XIP ROM read only
 		 * from SVC mode and no access from userspace.
 		 */
-		mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
-		mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
-		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+		mem_types[MT_ROM_RX].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+#ifdef CONFIG_PAX_KERNEXEC
+		mem_types[MT_MEMORY_RX].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+		mem_types[MT_MEMORY_RX_NONCACHED].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+		mem_types[MT_MEMORY_RX_ITCM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+#endif
+		mem_types[MT_MINICLEAN_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+		mem_types[MT_CACHECLEAN_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 #endif
 
 		/*
@@ -563,13 +606,17 @@ static void __init build_mem_type_table(
 			mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
 			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
 			mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
-			mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S;
-			mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
+			mem_types[__MT_MEMORY_RWX].prot_sect |= PMD_SECT_S;
+			mem_types[__MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
 			mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S;
 			mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED;
+			mem_types[MT_MEMORY_RX].prot_sect |= PMD_SECT_S;
+			mem_types[MT_MEMORY_RX].prot_pte |= L_PTE_SHARED;
 			mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
-			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S;
-			mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED;
+			mem_types[MT_MEMORY_RW_NONCACHED].prot_sect |= PMD_SECT_S;
+			mem_types[MT_MEMORY_RW_NONCACHED].prot_pte |= L_PTE_SHARED;
+			mem_types[MT_MEMORY_RX_NONCACHED].prot_sect |= PMD_SECT_S;
+			mem_types[MT_MEMORY_RX_NONCACHED].prot_pte |= L_PTE_SHARED;
 		}
 	}
 
@@ -580,15 +627,20 @@ static void __init build_mem_type_table(
 	if (cpu_arch >= CPU_ARCH_ARMv6) {
 		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
 			/* Non-cacheable Normal is XCB = 001 */
-			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
+			mem_types[MT_MEMORY_RW_NONCACHED].prot_sect |=
+				PMD_SECT_BUFFERED;
+			mem_types[MT_MEMORY_RX_NONCACHED].prot_sect |=
 				PMD_SECT_BUFFERED;
 		} else {
 			/* For both ARMv6 and non-TEX-remapping ARMv7 */
-			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
+			mem_types[MT_MEMORY_RW_NONCACHED].prot_sect |=
+				PMD_SECT_TEX(1);
+			mem_types[MT_MEMORY_RX_NONCACHED].prot_sect |=
 				PMD_SECT_TEX(1);
 		}
 	} else {
-		mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
+		mem_types[MT_MEMORY_RW_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
+		mem_types[MT_MEMORY_RX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
 	}
 
 #ifdef CONFIG_ARM_LPAE
@@ -609,6 +661,8 @@ static void __init build_mem_type_table(
 	user_pgprot |= PTE_EXT_PXN;
 #endif
 
+	user_pgprot |= __supported_pte_mask;
+
 	for (i = 0; i < 16; i++) {
 		pteval_t v = pgprot_val(protection_map[i]);
 		protection_map[i] = __pgprot(v | user_pgprot);
@@ -626,21 +680,24 @@ static void __init build_mem_type_table(
 
 	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
 	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
-	mem_types[MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd;
-	mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot;
+	mem_types[__MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd;
+	mem_types[__MT_MEMORY_RWX].prot_pte |= kern_pgprot;
 	mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd;
 	mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot;
+	mem_types[MT_MEMORY_RX].prot_sect |= ecc_mask | cp->pmd;
+	mem_types[MT_MEMORY_RX].prot_pte |= kern_pgprot;
 	mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
-	mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask;
-	mem_types[MT_ROM].prot_sect |= cp->pmd;
+	mem_types[MT_MEMORY_RW_NONCACHED].prot_sect |= ecc_mask;
+	mem_types[MT_MEMORY_RX_NONCACHED].prot_sect |= ecc_mask;
+	mem_types[MT_ROM_RX].prot_sect |= cp->pmd;
 
 	switch (cp->pmd) {
 	case PMD_SECT_WT:
-		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
+		mem_types[MT_CACHECLEAN_RO].prot_sect |= PMD_SECT_WT;
 		break;
 	case PMD_SECT_WB:
 	case PMD_SECT_WBWA:
-		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
+		mem_types[MT_CACHECLEAN_RO].prot_sect |= PMD_SECT_WB;
 		break;
 	}
 	pr_info("Memory policy: %sData cache %s\n",
@@ -854,7 +911,7 @@ static void __init create_mapping(struct
 		return;
 	}
 
-	if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
+	if ((md->type == MT_DEVICE || md->type == MT_ROM_RX) &&
 	    md->virtual >= PAGE_OFFSET &&
 	    (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) {
 		pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n",
@@ -1218,18 +1275,15 @@ void __init arm_mm_memblock_reserve(void
  * called function.  This means you can't use any function or debugging
  * method which may touch any device, otherwise the kernel _will_ crash.
  */
+
+static char vectors[PAGE_SIZE * 2] __read_only __aligned(PAGE_SIZE);
+
 static void __init devicemaps_init(const struct machine_desc *mdesc)
 {
 	struct map_desc map;
 	unsigned long addr;
-	void *vectors;
 
-	/*
-	 * Allocate the vector page early.
-	 */
-	vectors = early_alloc(PAGE_SIZE * 2);
-
-	early_trap_init(vectors);
+	early_trap_init(&vectors);
 
 	for (addr = VMALLOC_START; addr; addr += PMD_SIZE)
 		pmd_clear(pmd_off_k(addr));
@@ -1242,7 +1296,7 @@ static void __init devicemaps_init(const
 	map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
 	map.virtual = MODULES_VADDR;
 	map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
-	map.type = MT_ROM;
+	map.type = MT_ROM_RX;
 	create_mapping(&map);
 #endif
 
@@ -1253,14 +1307,14 @@ static void __init devicemaps_init(const
 	map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
 	map.virtual = FLUSH_BASE;
 	map.length = SZ_1M;
-	map.type = MT_CACHECLEAN;
+	map.type = MT_CACHECLEAN_RO;
 	create_mapping(&map);
 #endif
 #ifdef FLUSH_BASE_MINICACHE
 	map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
 	map.virtual = FLUSH_BASE_MINICACHE;
 	map.length = SZ_1M;
-	map.type = MT_MINICLEAN;
+	map.type = MT_MINICLEAN_RO;
 	create_mapping(&map);
 #endif
 
@@ -1269,7 +1323,7 @@ static void __init devicemaps_init(const
 	 * location (0xffff0000).  If we aren't using high-vectors, also
 	 * create a mapping at the low-vectors virtual address.
 	 */
-	map.pfn = __phys_to_pfn(virt_to_phys(vectors));
+	map.pfn = __phys_to_pfn(virt_to_phys(&vectors));
 	map.virtual = 0xffff0000;
 	map.length = PAGE_SIZE;
 #ifdef CONFIG_KUSER_HELPERS
@@ -1329,8 +1383,10 @@ static void __init kmap_init(void)
 static void __init map_lowmem(void)
 {
 	struct memblock_region *reg;
+#ifndef CONFIG_PAX_KERNEXEC
 	phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
 	phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
+#endif
 
 	/* Map all the lowmem memory banks. */
 	for_each_memblock(memory, reg) {
@@ -1343,11 +1399,48 @@ static void __init map_lowmem(void)
 		if (start >= end)
 			break;
 
+#ifdef CONFIG_PAX_KERNEXEC
+		map.pfn = __phys_to_pfn(start);
+		map.virtual = __phys_to_virt(start);
+		map.length = end - start;
+
+		if (map.virtual <= (unsigned long)_stext && ((unsigned long)_end < (map.virtual + map.length))) {
+			struct map_desc kernel;
+			struct map_desc initmap;
+
+			/* when freeing initmem we will make this RW */
+			initmap.pfn = __phys_to_pfn(__pa(__init_begin));
+			initmap.virtual = (unsigned long)__init_begin;
+			initmap.length = _sdata - __init_begin;
+			initmap.type = __MT_MEMORY_RWX;
+			create_mapping(&initmap);
+
+			/* when freeing initmem we will make this RX */
+			kernel.pfn = __phys_to_pfn(__pa(_stext));
+			kernel.virtual = (unsigned long)_stext;
+			kernel.length = __init_begin - _stext;
+			kernel.type = __MT_MEMORY_RWX;
+			create_mapping(&kernel);
+
+			if (map.virtual < (unsigned long)_stext) {
+				map.length = (unsigned long)_stext - map.virtual;
+				map.type = __MT_MEMORY_RWX;
+				create_mapping(&map);
+			}
+
+			map.pfn = __phys_to_pfn(__pa(_sdata));
+			map.virtual = (unsigned long)_sdata;
+			map.length = end - __pa(_sdata);
+		}
+
+		map.type = MT_MEMORY_RW;
+		create_mapping(&map);
+#else
 		if (end < kernel_x_start) {
 			map.pfn = __phys_to_pfn(start);
 			map.virtual = __phys_to_virt(start);
 			map.length = end - start;
-			map.type = MT_MEMORY_RWX;
+			map.type = __MT_MEMORY_RWX;
 
 			create_mapping(&map);
 		} else if (start >= kernel_x_end) {
@@ -1371,7 +1464,7 @@ static void __init map_lowmem(void)
 			map.pfn = __phys_to_pfn(kernel_x_start);
 			map.virtual = __phys_to_virt(kernel_x_start);
 			map.length = kernel_x_end - kernel_x_start;
-			map.type = MT_MEMORY_RWX;
+			map.type = __MT_MEMORY_RWX;
 
 			create_mapping(&map);
 
@@ -1384,6 +1477,7 @@ static void __init map_lowmem(void)
 				create_mapping(&map);
 			}
 		}
+#endif
 	}
 }
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/net/bpf_jit_32.c linux-4.0.9-pax/arch/arm/net/bpf_jit_32.c
--- linux-4.0.9/arch/arm/net/bpf_jit_32.c	2015-05-17 23:33:05.181623799 +0200
+++ linux-4.0.9-pax/arch/arm/net/bpf_jit_32.c	2015-05-17 23:37:01.329636625 +0200
@@ -20,6 +20,7 @@
 #include <asm/cacheflush.h>
 #include <asm/hwcap.h>
 #include <asm/opcodes.h>
+#include <asm/pgtable.h>
 
 #include "bpf_jit_32.h"
 
@@ -178,8 +179,10 @@ static void jit_fill_hole(void *area, un
 {
 	u32 *ptr;
 	/* We are guaranteed to have aligned memory. */
+	pax_open_kernel();
 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
 		*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
+	pax_close_kernel();
 }
 
 static void build_prologue(struct jit_ctx *ctx)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/plat-iop/setup.c linux-4.0.9-pax/arch/arm/plat-iop/setup.c
--- linux-4.0.9/arch/arm/plat-iop/setup.c	2015-03-18 15:21:50.184349252 +0100
+++ linux-4.0.9-pax/arch/arm/plat-iop/setup.c	2015-04-15 12:13:52.862318625 +0200
@@ -24,7 +24,7 @@ static struct map_desc iop3xx_std_desc[]
 		.virtual	= IOP3XX_PERIPHERAL_VIRT_BASE,
 		.pfn		= __phys_to_pfn(IOP3XX_PERIPHERAL_PHYS_BASE),
 		.length		= IOP3XX_PERIPHERAL_SIZE,
-		.type		= MT_UNCACHED,
+		.type		= MT_UNCACHED_RW,
 	 },
 };
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm/plat-omap/sram.c linux-4.0.9-pax/arch/arm/plat-omap/sram.c
--- linux-4.0.9/arch/arm/plat-omap/sram.c	2015-03-18 15:21:50.184349252 +0100
+++ linux-4.0.9-pax/arch/arm/plat-omap/sram.c	2015-04-15 12:13:52.862318625 +0200
@@ -93,6 +93,8 @@ void __init omap_map_sram(unsigned long
 	 * Looks like we need to preserve some bootloader code at the
 	 * beginning of SRAM for jumping to flash for reboot to work...
 	 */
+	pax_open_kernel();
 	memset_io(omap_sram_base + omap_sram_skip, 0,
 		  omap_sram_size - omap_sram_skip);
+	pax_close_kernel();
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm64/include/asm/atomic.h linux-4.0.9-pax/arch/arm64/include/asm/atomic.h
--- linux-4.0.9/arch/arm64/include/asm/atomic.h	2014-12-08 21:49:14.680778897 +0100
+++ linux-4.0.9-pax/arch/arm64/include/asm/atomic.h	2015-04-15 12:13:52.862318625 +0200
@@ -252,5 +252,15 @@ static inline int atomic64_add_unless(at
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
 
+#define atomic64_read_unchecked(v)		atomic64_read(v)
+#define atomic64_set_unchecked(v, i)		atomic64_set((v), (i))
+#define atomic64_add_unchecked(a, v)		atomic64_add((a), (v))
+#define atomic64_add_return_unchecked(a, v)	atomic64_add_return((a), (v))
+#define atomic64_sub_unchecked(a, v)		atomic64_sub((a), (v))
+#define atomic64_inc_unchecked(v)		atomic64_inc(v)
+#define atomic64_inc_return_unchecked(v)	atomic64_inc_return(v)
+#define atomic64_dec_unchecked(v)		atomic64_dec(v)
+#define atomic64_cmpxchg_unchecked(v, o, n)	atomic64_cmpxchg((v), (o), (n))
+
 #endif
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm64/include/asm/barrier.h linux-4.0.9-pax/arch/arm64/include/asm/barrier.h
--- linux-4.0.9/arch/arm64/include/asm/barrier.h	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/arm64/include/asm/barrier.h	2015-04-15 12:13:52.862318625 +0200
@@ -44,7 +44,7 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	ACCESS_ONCE_RW(*p) = (v);					\
 } while (0)
 
 #define smp_load_acquire(p)						\
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm64/include/asm/percpu.h linux-4.0.9-pax/arch/arm64/include/asm/percpu.h
--- linux-4.0.9/arch/arm64/include/asm/percpu.h	2015-04-13 11:20:52.666617952 +0200
+++ linux-4.0.9-pax/arch/arm64/include/asm/percpu.h	2015-04-15 12:13:52.862318625 +0200
@@ -135,16 +135,16 @@ static inline void __percpu_write(void *
 {
 	switch (size) {
 	case 1:
-		ACCESS_ONCE(*(u8 *)ptr) = (u8)val;
+		ACCESS_ONCE_RW(*(u8 *)ptr) = (u8)val;
 		break;
 	case 2:
-		ACCESS_ONCE(*(u16 *)ptr) = (u16)val;
+		ACCESS_ONCE_RW(*(u16 *)ptr) = (u16)val;
 		break;
 	case 4:
-		ACCESS_ONCE(*(u32 *)ptr) = (u32)val;
+		ACCESS_ONCE_RW(*(u32 *)ptr) = (u32)val;
 		break;
 	case 8:
-		ACCESS_ONCE(*(u64 *)ptr) = (u64)val;
+		ACCESS_ONCE_RW(*(u64 *)ptr) = (u64)val;
 		break;
 	default:
 		BUILD_BUG();
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm64/include/asm/pgalloc.h linux-4.0.9-pax/arch/arm64/include/asm/pgalloc.h
--- linux-4.0.9/arch/arm64/include/asm/pgalloc.h	2015-02-09 21:11:41.213571337 +0100
+++ linux-4.0.9-pax/arch/arm64/include/asm/pgalloc.h	2015-04-15 12:13:52.862318625 +0200
@@ -46,6 +46,11 @@ static inline void pud_populate(struct m
 	set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE));
 }
 
+static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	pud_populate(mm, pud, pmd);
+}
+
 #endif	/* CONFIG_ARM64_PGTABLE_LEVELS > 2 */
 
 #if CONFIG_ARM64_PGTABLE_LEVELS > 3
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm64/include/asm/uaccess.h linux-4.0.9-pax/arch/arm64/include/asm/uaccess.h
--- linux-4.0.9/arch/arm64/include/asm/uaccess.h	2015-04-13 11:20:52.774617946 +0200
+++ linux-4.0.9-pax/arch/arm64/include/asm/uaccess.h	2015-04-15 12:13:52.862318625 +0200
@@ -99,6 +99,7 @@ static inline void set_fs(mm_segment_t f
 	flag;								\
 })
 
+#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size))
 #define access_ok(type, addr, size)	__range_ok(addr, size)
 #define user_addr_max			get_fs
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/arm64/mm/dma-mapping.c linux-4.0.9-pax/arch/arm64/mm/dma-mapping.c
--- linux-4.0.9/arch/arm64/mm/dma-mapping.c	2015-05-13 20:51:57.552791949 +0200
+++ linux-4.0.9-pax/arch/arm64/mm/dma-mapping.c	2015-05-13 20:52:12.148792741 +0200
@@ -134,7 +134,7 @@ static void __dma_free_coherent(struct d
 					phys_to_page(paddr),
 					size >> PAGE_SHIFT);
 	if (!freed)
-		swiotlb_free_coherent(dev, size, vaddr, dma_handle);
+		swiotlb_free_coherent(dev, size, vaddr, dma_handle, attrs);
 }
 
 static void *__dma_alloc(struct device *dev, size_t size,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/avr32/include/asm/elf.h linux-4.0.9-pax/arch/avr32/include/asm/elf.h
--- linux-4.0.9/arch/avr32/include/asm/elf.h	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/avr32/include/asm/elf.h	2015-04-15 12:13:52.862318625 +0200
@@ -84,8 +84,14 @@ typedef struct user_fpu_struct elf_fpreg
    the loader.  We need to make sure that it is out of the way of the program
    that it will "exec", and that there is sufficient room for the brk.  */
 
-#define ELF_ET_DYN_BASE         (2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE		(TASK_SIZE / 3 * 2)
 
+#ifdef CONFIG_PAX_ASLR
+#define PAX_ELF_ET_DYN_BASE	0x00001000UL
+
+#define PAX_DELTA_MMAP_LEN	15
+#define PAX_DELTA_STACK_LEN	15
+#endif
 
 /* This yields a mask that user programs can use to figure out what
    instruction set this CPU supports.  This could be done in user space,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/avr32/include/asm/kmap_types.h linux-4.0.9-pax/arch/avr32/include/asm/kmap_types.h
--- linux-4.0.9/arch/avr32/include/asm/kmap_types.h	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/avr32/include/asm/kmap_types.h	2015-04-15 12:13:52.862318625 +0200
@@ -2,9 +2,9 @@
 #define __ASM_AVR32_KMAP_TYPES_H
 
 #ifdef CONFIG_DEBUG_HIGHMEM
-# define KM_TYPE_NR 29
+# define KM_TYPE_NR 30
 #else
-# define KM_TYPE_NR 14
+# define KM_TYPE_NR 15
 #endif
 
 #endif /* __ASM_AVR32_KMAP_TYPES_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/avr32/mm/fault.c linux-4.0.9-pax/arch/avr32/mm/fault.c
--- linux-4.0.9/arch/avr32/mm/fault.c	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/avr32/mm/fault.c	2015-04-15 12:13:52.862318625 +0200
@@ -41,6 +41,23 @@ static inline int notify_page_fault(stru
 
 int exception_trace = 1;
 
+#ifdef CONFIG_PAX_PAGEEXEC
+void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
+{
+	unsigned long i;
+
+	printk(KERN_ERR "PAX: bytes at PC: ");
+	for (i = 0; i < 20; i++) {
+		unsigned char c;
+		if (get_user(c, (unsigned char *)pc+i))
+			printk(KERN_CONT "???????? ");
+		else
+			printk(KERN_CONT "%02x ", c);
+	}
+	printk("\n");
+}
+#endif
+
 /*
  * This routine handles page faults. It determines the address and the
  * problem, and then passes it off to one of the appropriate routines.
@@ -178,6 +195,16 @@ bad_area:
 	up_read(&mm->mmap_sem);
 
 	if (user_mode(regs)) {
+
+#ifdef CONFIG_PAX_PAGEEXEC
+		if (mm->pax_flags & MF_PAX_PAGEEXEC) {
+			if (ecr == ECR_PROTECTION_X || ecr == ECR_TLB_MISS_X) {
+				pax_report_fault(regs, (void *)regs->pc, (void *)regs->sp);
+				do_group_exit(SIGKILL);
+			}
+		}
+#endif
+
 		if (exception_trace && printk_ratelimit())
 			printk("%s%s[%d]: segfault at %08lx pc %08lx "
 			       "sp %08lx ecr %lu\n",
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/frv/include/asm/atomic.h linux-4.0.9-pax/arch/frv/include/asm/atomic.h
--- linux-4.0.9/arch/frv/include/asm/atomic.h	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/frv/include/asm/atomic.h	2015-04-15 12:13:52.862318625 +0200
@@ -181,6 +181,16 @@ static inline void atomic64_dec(atomic64
 #define atomic64_cmpxchg(v, old, new)	(__cmpxchg_64(old, new, &(v)->counter))
 #define atomic64_xchg(v, new)		(__xchg_64(new, &(v)->counter))
 
+#define atomic64_read_unchecked(v)		atomic64_read(v)
+#define atomic64_set_unchecked(v, i)		atomic64_set((v), (i))
+#define atomic64_add_unchecked(a, v)		atomic64_add((a), (v))
+#define atomic64_add_return_unchecked(a, v)	atomic64_add_return((a), (v))
+#define atomic64_sub_unchecked(a, v)		atomic64_sub((a), (v))
+#define atomic64_inc_unchecked(v)		atomic64_inc(v)
+#define atomic64_inc_return_unchecked(v)	atomic64_inc_return(v)
+#define atomic64_dec_unchecked(v)		atomic64_dec(v)
+#define atomic64_cmpxchg_unchecked(v, o, n)	atomic64_cmpxchg((v), (o), (n))
+
 static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/frv/include/asm/kmap_types.h linux-4.0.9-pax/arch/frv/include/asm/kmap_types.h
--- linux-4.0.9/arch/frv/include/asm/kmap_types.h	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/frv/include/asm/kmap_types.h	2015-04-15 12:13:52.862318625 +0200
@@ -2,6 +2,6 @@
 #ifndef _ASM_KMAP_TYPES_H
 #define _ASM_KMAP_TYPES_H
 
-#define KM_TYPE_NR 17
+#define KM_TYPE_NR 18
 
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/frv/mm/elf-fdpic.c linux-4.0.9-pax/arch/frv/mm/elf-fdpic.c
--- linux-4.0.9/arch/frv/mm/elf-fdpic.c	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/frv/mm/elf-fdpic.c	2015-04-15 12:13:52.862318625 +0200
@@ -73,8 +73,7 @@ unsigned long arch_get_unmapped_area(str
 	if (addr) {
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(current->mm, addr);
-		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len))
 			goto success;
 	}
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/include/asm/atomic.h linux-4.0.9-pax/arch/ia64/include/asm/atomic.h
--- linux-4.0.9/arch/ia64/include/asm/atomic.h	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/ia64/include/asm/atomic.h	2015-04-15 12:13:52.862318625 +0200
@@ -193,4 +193,14 @@ atomic64_add_negative (__s64 i, atomic64
 #define atomic64_inc(v)			atomic64_add(1, (v))
 #define atomic64_dec(v)			atomic64_sub(1, (v))
 
+#define atomic64_read_unchecked(v)		atomic64_read(v)
+#define atomic64_set_unchecked(v, i)		atomic64_set((v), (i))
+#define atomic64_add_unchecked(a, v)		atomic64_add((a), (v))
+#define atomic64_add_return_unchecked(a, v)	atomic64_add_return((a), (v))
+#define atomic64_sub_unchecked(a, v)		atomic64_sub((a), (v))
+#define atomic64_inc_unchecked(v)		atomic64_inc(v)
+#define atomic64_inc_return_unchecked(v)	atomic64_inc_return(v)
+#define atomic64_dec_unchecked(v)		atomic64_dec(v)
+#define atomic64_cmpxchg_unchecked(v, o, n)	atomic64_cmpxchg((v), (o), (n))
+
 #endif /* _ASM_IA64_ATOMIC_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/include/asm/barrier.h linux-4.0.9-pax/arch/ia64/include/asm/barrier.h
--- linux-4.0.9/arch/ia64/include/asm/barrier.h	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/ia64/include/asm/barrier.h	2015-04-15 12:13:52.862318625 +0200
@@ -66,7 +66,7 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	ACCESS_ONCE_RW(*p) = (v);					\
 } while (0)
 
 #define smp_load_acquire(p)						\
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/include/asm/elf.h linux-4.0.9-pax/arch/ia64/include/asm/elf.h
--- linux-4.0.9/arch/ia64/include/asm/elf.h	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/ia64/include/asm/elf.h	2015-04-15 12:13:52.862318625 +0200
@@ -42,6 +42,13 @@
  */
 #define ELF_ET_DYN_BASE		(TASK_UNMAPPED_BASE + 0x800000000UL)
 
+#ifdef CONFIG_PAX_ASLR
+#define PAX_ELF_ET_DYN_BASE	(current->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL)
+
+#define PAX_DELTA_MMAP_LEN	(current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13)
+#define PAX_DELTA_STACK_LEN	(current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13)
+#endif
+
 #define PT_IA_64_UNWIND		0x70000001
 
 /* IA-64 relocations: */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/include/asm/pgalloc.h linux-4.0.9-pax/arch/ia64/include/asm/pgalloc.h
--- linux-4.0.9/arch/ia64/include/asm/pgalloc.h	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/ia64/include/asm/pgalloc.h	2015-04-15 12:13:52.862318625 +0200
@@ -39,6 +39,12 @@ pgd_populate(struct mm_struct *mm, pgd_t
 	pgd_val(*pgd_entry) = __pa(pud);
 }
 
+static inline void
+pgd_populate_kernel(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud)
+{
+	pgd_populate(mm, pgd_entry, pud);
+}
+
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
@@ -57,6 +63,12 @@ pud_populate(struct mm_struct *mm, pud_t
 	pud_val(*pud_entry) = __pa(pmd);
 }
 
+static inline void
+pud_populate_kernel(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
+{
+	pud_populate(mm, pud_entry, pmd);
+}
+
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	return quicklist_alloc(0, GFP_KERNEL, NULL);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/include/asm/pgtable.h linux-4.0.9-pax/arch/ia64/include/asm/pgtable.h
--- linux-4.0.9/arch/ia64/include/asm/pgtable.h	2015-04-13 11:20:53.446617910 +0200
+++ linux-4.0.9-pax/arch/ia64/include/asm/pgtable.h	2015-04-15 12:13:52.862318625 +0200
@@ -12,7 +12,7 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
-
+#include <linux/const.h>
 #include <asm/mman.h>
 #include <asm/page.h>
 #include <asm/processor.h>
@@ -139,6 +139,17 @@
 #define PAGE_READONLY	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
 #define PAGE_COPY	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
 #define PAGE_COPY_EXEC	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
+
+#ifdef CONFIG_PAX_PAGEEXEC
+# define PAGE_SHARED_NOEXEC	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW)
+# define PAGE_READONLY_NOEXEC	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
+# define PAGE_COPY_NOEXEC	__pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
+#else
+# define PAGE_SHARED_NOEXEC	PAGE_SHARED
+# define PAGE_READONLY_NOEXEC	PAGE_READONLY
+# define PAGE_COPY_NOEXEC	PAGE_COPY
+#endif
+
 #define PAGE_GATE	__pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
 #define PAGE_KERNEL	__pgprot(__DIRTY_BITS  | _PAGE_PL_0 | _PAGE_AR_RWX)
 #define PAGE_KERNELRX	__pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/include/asm/spinlock.h linux-4.0.9-pax/arch/ia64/include/asm/spinlock.h
--- linux-4.0.9/arch/ia64/include/asm/spinlock.h	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/ia64/include/asm/spinlock.h	2015-04-15 12:13:52.862318625 +0200
@@ -71,7 +71,7 @@ static __always_inline void __ticket_spi
 	unsigned short	*p = (unsigned short *)&lock->lock + 1, tmp;
 
 	asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p));
-	ACCESS_ONCE(*p) = (tmp + 2) & ~1;
+	ACCESS_ONCE_RW(*p) = (tmp + 2) & ~1;
 }
 
 static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/include/asm/uaccess.h linux-4.0.9-pax/arch/ia64/include/asm/uaccess.h
--- linux-4.0.9/arch/ia64/include/asm/uaccess.h	2015-04-13 11:20:53.454617909 +0200
+++ linux-4.0.9-pax/arch/ia64/include/asm/uaccess.h	2015-04-15 12:13:52.866318624 +0200
@@ -70,6 +70,7 @@
 	 && ((segment).seg == KERNEL_DS.seg						\
 	     || likely(REGION_OFFSET((unsigned long) (addr)) < RGN_MAP_LIMIT)));	\
 })
+#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size))
 #define access_ok(type, addr, size)	__access_ok((addr), (size), get_fs())
 
 /*
@@ -241,12 +242,24 @@ extern unsigned long __must_check __copy
 static inline unsigned long
 __copy_to_user (void __user *to, const void *from, unsigned long count)
 {
+	if (count > INT_MAX)
+		return count;
+
+	if (!__builtin_constant_p(count))
+		check_object_size(from, count, true);
+
 	return __copy_user(to, (__force void __user *) from, count);
 }
 
 static inline unsigned long
 __copy_from_user (void *to, const void __user *from, unsigned long count)
 {
+	if (count > INT_MAX)
+		return count;
+
+	if (!__builtin_constant_p(count))
+		check_object_size(to, count, false);
+
 	return __copy_user((__force void __user *) to, from, count);
 }
 
@@ -256,10 +269,13 @@ __copy_from_user (void *to, const void _
 ({											\
 	void __user *__cu_to = (to);							\
 	const void *__cu_from = (from);							\
-	long __cu_len = (n);								\
+	unsigned long __cu_len = (n);							\
 											\
-	if (__access_ok(__cu_to, __cu_len, get_fs()))					\
+	if (__cu_len <= INT_MAX && __access_ok(__cu_to, __cu_len, get_fs())) {		\
+		if (!__builtin_constant_p(n))						\
+			check_object_size(__cu_from, __cu_len, true);			\
 		__cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len);	\
+	}										\
 	__cu_len;									\
 })
 
@@ -267,11 +283,14 @@ __copy_from_user (void *to, const void _
 ({											\
 	void *__cu_to = (to);								\
 	const void __user *__cu_from = (from);						\
-	long __cu_len = (n);								\
+	unsigned long __cu_len = (n);							\
 											\
 	__chk_user_ptr(__cu_from);							\
-	if (__access_ok(__cu_from, __cu_len, get_fs()))					\
+	if (__cu_len <= INT_MAX  && __access_ok(__cu_from, __cu_len, get_fs())) {	\
+		if (!__builtin_constant_p(n))						\
+			check_object_size(__cu_to, __cu_len, false);			\
 		__cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len);	\
+	}										\
 	__cu_len;									\
 })
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/kernel/module.c linux-4.0.9-pax/arch/ia64/kernel/module.c
--- linux-4.0.9/arch/ia64/kernel/module.c	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/ia64/kernel/module.c	2015-04-15 12:13:52.866318624 +0200
@@ -492,15 +492,39 @@ module_frob_arch_sections (Elf_Ehdr *ehd
 }
 
 static inline int
+in_init_rx (const struct module *mod, uint64_t addr)
+{
+	return addr - (uint64_t) mod->module_init_rx < mod->init_size_rx;
+}
+
+static inline int
+in_init_rw (const struct module *mod, uint64_t addr)
+{
+	return addr - (uint64_t) mod->module_init_rw < mod->init_size_rw;
+}
+
+static inline int
 in_init (const struct module *mod, uint64_t addr)
 {
-	return addr - (uint64_t) mod->module_init < mod->init_size;
+	return in_init_rx(mod, addr) || in_init_rw(mod, addr);
+}
+
+static inline int
+in_core_rx (const struct module *mod, uint64_t addr)
+{
+	return addr - (uint64_t) mod->module_core_rx < mod->core_size_rx;
+}
+
+static inline int
+in_core_rw (const struct module *mod, uint64_t addr)
+{
+	return addr - (uint64_t) mod->module_core_rw < mod->core_size_rw;
 }
 
 static inline int
 in_core (const struct module *mod, uint64_t addr)
 {
-	return addr - (uint64_t) mod->module_core < mod->core_size;
+	return in_core_rx(mod, addr) || in_core_rw(mod, addr);
 }
 
 static inline int
@@ -683,7 +707,14 @@ do_reloc (struct module *mod, uint8_t r_
 		break;
 
 	      case RV_BDREL:
-		val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core);
+		if (in_init_rx(mod, val))
+			val -= (uint64_t) mod->module_init_rx;
+		else if (in_init_rw(mod, val))
+			val -= (uint64_t) mod->module_init_rw;
+		else if (in_core_rx(mod, val))
+			val -= (uint64_t) mod->module_core_rx;
+		else if (in_core_rw(mod, val))
+			val -= (uint64_t) mod->module_core_rw;
 		break;
 
 	      case RV_LTV:
@@ -818,15 +849,15 @@ apply_relocate_add (Elf64_Shdr *sechdrs,
 		 *     addresses have been selected...
 		 */
 		uint64_t gp;
-		if (mod->core_size > MAX_LTOFF)
+		if (mod->core_size_rx + mod->core_size_rw > MAX_LTOFF)
 			/*
 			 * This takes advantage of fact that SHF_ARCH_SMALL gets allocated
 			 * at the end of the module.
 			 */
-			gp = mod->core_size - MAX_LTOFF / 2;
+			gp = mod->core_size_rx + mod->core_size_rw - MAX_LTOFF / 2;
 		else
-			gp = mod->core_size / 2;
-		gp = (uint64_t) mod->module_core + ((gp + 7) & -8);
+			gp = (mod->core_size_rx + mod->core_size_rw) / 2;
+		gp = (uint64_t) mod->module_core_rx + ((gp + 7) & -8);
 		mod->arch.gp = gp;
 		DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp);
 	}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/kernel/palinfo.c linux-4.0.9-pax/arch/ia64/kernel/palinfo.c
--- linux-4.0.9/arch/ia64/kernel/palinfo.c	2015-03-18 15:21:50.188349252 +0100
+++ linux-4.0.9-pax/arch/ia64/kernel/palinfo.c	2015-04-15 12:13:52.866318624 +0200
@@ -980,7 +980,7 @@ static int palinfo_cpu_callback(struct n
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __refdata palinfo_cpu_notifier =
+static struct notifier_block palinfo_cpu_notifier =
 {
 	.notifier_call = palinfo_cpu_callback,
 	.priority = 0,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/kernel/sys_ia64.c linux-4.0.9-pax/arch/ia64/kernel/sys_ia64.c
--- linux-4.0.9/arch/ia64/kernel/sys_ia64.c	2015-03-18 15:21:50.192349252 +0100
+++ linux-4.0.9-pax/arch/ia64/kernel/sys_ia64.c	2015-04-15 12:13:52.866318624 +0200
@@ -43,6 +43,13 @@ arch_get_unmapped_area (struct file *fil
 	if (REGION_NUMBER(addr) == RGN_HPAGE)
 		addr = 0;
 #endif
+
+#ifdef CONFIG_PAX_RANDMMAP
+	if (mm->pax_flags & MF_PAX_RANDMMAP)
+		addr = mm->free_area_cache;
+	else
+#endif
+
 	if (!addr)
 		addr = TASK_UNMAPPED_BASE;
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/kernel/vmlinux.lds.S linux-4.0.9-pax/arch/ia64/kernel/vmlinux.lds.S
--- linux-4.0.9/arch/ia64/kernel/vmlinux.lds.S	2015-03-18 15:21:50.192349252 +0100
+++ linux-4.0.9-pax/arch/ia64/kernel/vmlinux.lds.S	2015-04-15 12:13:52.866318624 +0200
@@ -192,7 +192,7 @@ SECTIONS {
 	/* Per-cpu data: */
 	. = ALIGN(PERCPU_PAGE_SIZE);
 	PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu)
-	__phys_per_cpu_start = __per_cpu_load;
+	__phys_per_cpu_start = per_cpu_load;
 	/*
 	 * ensure percpu data fits
 	 * into percpu page size
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/Makefile linux-4.0.9-pax/arch/ia64/Makefile
--- linux-4.0.9/arch/ia64/Makefile	2015-03-18 15:21:50.192349252 +0100
+++ linux-4.0.9-pax/arch/ia64/Makefile	2015-04-15 12:13:52.866318624 +0200
@@ -98,5 +98,6 @@ endef
 archprepare: make_nr_irqs_h FORCE
 PHONY += make_nr_irqs_h FORCE
 
+make_nr_irqs_h: KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
 make_nr_irqs_h: FORCE
 	$(Q)$(MAKE) $(build)=arch/ia64/kernel include/generated/nr-irqs.h
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/mm/fault.c linux-4.0.9-pax/arch/ia64/mm/fault.c
--- linux-4.0.9/arch/ia64/mm/fault.c	2015-03-18 15:21:50.192349252 +0100
+++ linux-4.0.9-pax/arch/ia64/mm/fault.c	2015-04-15 12:13:52.866318624 +0200
@@ -72,6 +72,23 @@ mapped_kernel_page_is_present (unsigned
 	return pte_present(pte);
 }
 
+#ifdef CONFIG_PAX_PAGEEXEC
+void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
+{
+	unsigned long i;
+
+	printk(KERN_ERR "PAX: bytes at PC: ");
+	for (i = 0; i < 8; i++) {
+		unsigned int c;
+		if (get_user(c, (unsigned int *)pc+i))
+			printk(KERN_CONT "???????? ");
+		else
+			printk(KERN_CONT "%08x ", c);
+	}
+	printk("\n");
+}
+#endif
+
 #	define VM_READ_BIT	0
 #	define VM_WRITE_BIT	1
 #	define VM_EXEC_BIT	2
@@ -151,8 +168,21 @@ retry:
 	if (((isr >> IA64_ISR_R_BIT) & 1UL) && (!(vma->vm_flags & (VM_READ | VM_WRITE))))
 		goto bad_area;
 
-	if ((vma->vm_flags & mask) != mask)
+	if ((vma->vm_flags & mask) != mask) {
+
+#ifdef CONFIG_PAX_PAGEEXEC
+		if (!(vma->vm_flags & VM_EXEC) && (mask & VM_EXEC)) {
+			if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->cr_iip)
+				goto bad_area;
+
+			up_read(&mm->mmap_sem);
+			pax_report_fault(regs, (void *)regs->cr_iip, (void *)regs->r12);
+			do_group_exit(SIGKILL);
+		}
+#endif
+
 		goto bad_area;
+	}
 
 	/*
 	 * If for any reason at all we couldn't handle the fault, make
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/ia64/mm/init.c linux-4.0.9-pax/arch/ia64/mm/init.c
--- linux-4.0.9/arch/ia64/mm/init.c	2015-03-18 15:21:50.192349252 +0100
+++ linux-4.0.9-pax/arch/ia64/mm/init.c	2015-04-15 12:13:52.866318624 +0200
@@ -120,6 +120,19 @@ ia64_init_addr_space (void)
 		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
 		vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
+
+#ifdef CONFIG_PAX_PAGEEXEC
+		if (current->mm->pax_flags & MF_PAX_PAGEEXEC) {
+			vma->vm_flags &= ~VM_EXEC;
+
+#ifdef CONFIG_PAX_MPROTECT
+			if (current->mm->pax_flags & MF_PAX_MPROTECT)
+				vma->vm_flags &= ~VM_MAYEXEC;
+#endif
+
+		}
+#endif
+
 		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 		down_write(&current->mm->mmap_sem);
 		if (insert_vm_struct(current->mm, vma)) {
@@ -286,7 +299,7 @@ static int __init gate_vma_init(void)
 	gate_vma.vm_start = FIXADDR_USER_START;
 	gate_vma.vm_end = FIXADDR_USER_END;
 	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
-	gate_vma.vm_page_prot = __P101;
+	gate_vma.vm_page_prot = vm_get_page_prot(gate_vma.vm_flags);
 
 	return 0;
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/m32r/lib/usercopy.c linux-4.0.9-pax/arch/m32r/lib/usercopy.c
--- linux-4.0.9/arch/m32r/lib/usercopy.c	2015-03-18 15:21:50.192349252 +0100
+++ linux-4.0.9-pax/arch/m32r/lib/usercopy.c	2015-04-15 12:13:52.866318624 +0200
@@ -14,6 +14,9 @@
 unsigned long
 __generic_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
+	if ((long)n < 0)
+		return n;
+
 	prefetch(from);
 	if (access_ok(VERIFY_WRITE, to, n))
 		__copy_user(to,from,n);
@@ -23,6 +26,9 @@ __generic_copy_to_user(void __user *to,
 unsigned long
 __generic_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
+	if ((long)n < 0)
+		return n;
+
 	prefetchw(to);
 	if (access_ok(VERIFY_READ, from, n))
 		__copy_user_zeroing(to,from,n);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/metag/include/asm/barrier.h linux-4.0.9-pax/arch/metag/include/asm/barrier.h
--- linux-4.0.9/arch/metag/include/asm/barrier.h	2015-03-18 15:21:50.192349252 +0100
+++ linux-4.0.9-pax/arch/metag/include/asm/barrier.h	2015-04-15 12:13:52.866318624 +0200
@@ -90,7 +90,7 @@ static inline void fence(void)
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	ACCESS_ONCE_RW(*p) = (v);					\
 } while (0)
 
 #define smp_load_acquire(p)						\
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/cavium-octeon/dma-octeon.c linux-4.0.9-pax/arch/mips/cavium-octeon/dma-octeon.c
--- linux-4.0.9/arch/mips/cavium-octeon/dma-octeon.c	2015-05-13 20:51:57.616791952 +0200
+++ linux-4.0.9-pax/arch/mips/cavium-octeon/dma-octeon.c	2015-05-13 20:52:12.148792741 +0200
@@ -199,7 +199,7 @@ static void octeon_dma_free_coherent(str
 	if (dma_release_from_coherent(dev, order, vaddr))
 		return;
 
-	swiotlb_free_coherent(dev, size, vaddr, dma_handle);
+	swiotlb_free_coherent(dev, size, vaddr, dma_handle, attrs);
 }
 
 static dma_addr_t octeon_unity_phys_to_dma(struct device *dev, phys_addr_t paddr)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/include/asm/atomic.h linux-4.0.9-pax/arch/mips/include/asm/atomic.h
--- linux-4.0.9/arch/mips/include/asm/atomic.h	2015-04-13 11:20:54.246617867 +0200
+++ linux-4.0.9-pax/arch/mips/include/asm/atomic.h	2015-04-20 22:20:07.523768732 +0200
@@ -22,15 +22,39 @@
 #include <asm/cmpxchg.h>
 #include <asm/war.h>
 
+#ifdef CONFIG_GENERIC_ATOMIC64
+#include <asm-generic/atomic64.h>
+#endif
+
 #define ATOMIC_INIT(i)	  { (i) }
 
+#ifdef CONFIG_64BIT
+#define _ASM_EXTABLE(from, to)		\
+"	.section __ex_table,\"a\"\n"	\
+"	.dword	" #from ", " #to"\n"	\
+"	.previous\n"
+#else
+#define _ASM_EXTABLE(from, to)		\
+"	.section __ex_table,\"a\"\n"	\
+"	.word	" #from ", " #to"\n"	\
+"	.previous\n"
+#endif
+
 /*
  * atomic_read - read atomic variable
  * @v: pointer of type atomic_t
  *
  * Atomically reads the value of @v.
  */
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
+static inline int atomic_read(const atomic_t *v)
+{
+	return ACCESS_ONCE(v->counter);
+}
+
+static inline int atomic_read_unchecked(const atomic_unchecked_t *v)
+{
+	return ACCESS_ONCE(v->counter);
+}
 
 /*
  * atomic_set - set atomic variable
@@ -39,47 +63,77 @@
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v, i)		((v)->counter = (i))
+static inline void atomic_set(atomic_t *v, int i)
+{
+	v->counter = i;
+}
+
+static inline void atomic_set_unchecked(atomic_unchecked_t *v, int i)
+{
+	v->counter = i;
+}
+
+#ifdef CONFIG_PAX_REFCOUNT
+#define __OVERFLOW_POST				\
+	"	b	4f		\n"	\
+	"	.set	noreorder	\n"	\
+	"3:	b	5f		\n"	\
+	"	move	%0, %1		\n"	\
+	"	.set	reorder		\n"
+#define __OVERFLOW_EXTABLE	\
+	"3:\n"			\
+	_ASM_EXTABLE(2b, 3b)
+#else
+#define __OVERFLOW_POST
+#define __OVERFLOW_EXTABLE
+#endif
 
-#define ATOMIC_OP(op, c_op, asm_op)					      \
-static __inline__ void atomic_##op(int i, atomic_t * v)			      \
+#define __ATOMIC_OP(op, suffix, asm_op, extable)			      \
+static inline void atomic_##op##suffix(int i, atomic##suffix##_t * v)	      \
 {									      \
 	if (kernel_uses_llsc && R10000_LLSC_WAR) {			      \
 		int temp;						      \
 									      \
 		__asm__ __volatile__(					      \
-		"	.set	arch=r4000				\n"   \
-		"1:	ll	%0, %1		# atomic_" #op "	\n"   \
-		"	" #asm_op " %0, %2				\n"   \
+		"	.set	mips3					\n"   \
+		"1:	ll	%0, %1		# atomic_" #op #suffix "\n"   \
+		"2:	" #asm_op " %0, %2				\n"   \
 		"	sc	%0, %1					\n"   \
 		"	beqzl	%0, 1b					\n"   \
+		extable							      \
 		"	.set	mips0					\n"   \
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
 		: "Ir" (i));						      \
 	} else if (kernel_uses_llsc) {					      \
 		int temp;						      \
 									      \
-		do {							      \
-			__asm__ __volatile__(				      \
-			"	.set	"MIPS_ISA_LEVEL"		\n"   \
-			"	ll	%0, %1		# atomic_" #op "\n"   \
-			"	" #asm_op " %0, %2			\n"   \
-			"	sc	%0, %1				\n"   \
-			"	.set	mips0				\n"   \
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)      \
-			: "Ir" (i));					      \
-		} while (unlikely(!temp));				      \
+		__asm__ __volatile__(					      \
+		"	.set	"MIPS_ISA_LEVEL"			\n"   \
+		"1:	ll	%0, %1		# atomic_" #op #suffix "\n"   \
+		"2:	" #asm_op " %0, %2				\n"   \
+		"	sc	%0, %1					\n"   \
+		"	beqz	%0, 1b					\n"   \
+			extable						      \
+		"	.set	mips0					\n"   \
+		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
+		: "Ir" (i));						      \
 	} else {							      \
 		unsigned long flags;					      \
 									      \
 		raw_local_irq_save(flags);				      \
-		v->counter c_op i;					      \
+		__asm__ __volatile__(					      \
+		"2:	" #asm_op " %0, %1				\n"   \
+		extable							      \
+		: "+r" (v->counter) : "Ir" (i));			      \
 		raw_local_irq_restore(flags);				      \
 	}								      \
 }
 
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)				      \
-static __inline__ int atomic_##op##_return(int i, atomic_t * v)		      \
+#define ATOMIC_OP(op, asm_op) __ATOMIC_OP(op, _unchecked, asm_op##u, )	      \
+			      __ATOMIC_OP(op, , asm_op, __OVERFLOW_EXTABLE)
+
+#define __ATOMIC_OP_RETURN(op, suffix, asm_op, post_op, extable)	      \
+static inline int atomic_##op##_return##suffix(int i, atomic##suffix##_t * v) \
 {									      \
 	int result;							      \
 									      \
@@ -89,12 +143,15 @@ static __inline__ int atomic_##op##_retu
 		int temp;						      \
 									      \
 		__asm__ __volatile__(					      \
-		"	.set	arch=r4000				\n"   \
-		"1:	ll	%1, %2		# atomic_" #op "_return	\n"   \
-		"	" #asm_op " %0, %1, %3				\n"   \
+		"	.set	mips3					\n"   \
+		"1:	ll	%1, %2	# atomic_" #op "_return" #suffix"\n"  \
+		"2:	" #asm_op " %0, %1, %3				\n"   \
 		"	sc	%0, %2					\n"   \
 		"	beqzl	%0, 1b					\n"   \
-		"	" #asm_op " %0, %1, %3				\n"   \
+		post_op							      \
+		extable							      \
+		"4:	" #asm_op " %0, %1, %3				\n"   \
+		"5:							\n"   \
 		"	.set	mips0					\n"   \
 		: "=&r" (result), "=&r" (temp),				      \
 		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
@@ -102,26 +159,33 @@ static __inline__ int atomic_##op##_retu
 	} else if (kernel_uses_llsc) {					      \
 		int temp;						      \
 									      \
-		do {							      \
-			__asm__ __volatile__(				      \
-			"	.set	"MIPS_ISA_LEVEL"		\n"   \
-			"	ll	%1, %2	# atomic_" #op "_return	\n"   \
-			"	" #asm_op " %0, %1, %3			\n"   \
-			"	sc	%0, %2				\n"   \
-			"	.set	mips0				\n"   \
-			: "=&r" (result), "=&r" (temp),			      \
-			  "+" GCC_OFF_SMALL_ASM() (v->counter)		      \
-			: "Ir" (i));					      \
-		} while (unlikely(!result));				      \
+		__asm__ __volatile__(					      \
+		"	.set	"MIPS_ISA_LEVEL"			\n"   \
+		"1:	ll	%1, %2	# atomic_" #op "_return" #suffix "\n" \
+		"2:	" #asm_op " %0, %1, %3				\n"   \
+		"	sc	%0, %2					\n"   \
+		post_op							      \
+		extable							      \
+		"4:	" #asm_op " %0, %1, %3				\n"   \
+		"5:							\n"   \
+		"	.set	mips0					\n"   \
+		: "=&r" (result), "=&r" (temp),				      \
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
+		: "Ir" (i));						      \
 									      \
 		result = temp; result c_op i;				      \
 	} else {							      \
 		unsigned long flags;					      \
 									      \
 		raw_local_irq_save(flags);				      \
-		result = v->counter;					      \
-		result c_op i;						      \
-		v->counter = result;					      \
+		__asm__ __volatile__(					      \
+		"	lw	%0, %1					\n"   \
+		"2:	" #asm_op " %0, %1, %2				\n"   \
+		"	sw	%0, %1					\n"   \
+		"3:							\n"   \
+		extable							      \
+		: "=&r" (result), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
+		: "Ir" (i));						      \
 		raw_local_irq_restore(flags);				      \
 	}								      \
 									      \
@@ -130,16 +194,21 @@ static __inline__ int atomic_##op##_retu
 	return result;							      \
 }
 
-#define ATOMIC_OPS(op, c_op, asm_op)					      \
-	ATOMIC_OP(op, c_op, asm_op)					      \
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+#define ATOMIC_OP_RETURN(op, asm_op) __ATOMIC_OP_RETURN(op, _unchecked, asm_op##u, , )	\
+				     __ATOMIC_OP_RETURN(op, , asm_op, __OVERFLOW_POST, __OVERFLOW_EXTABLE)
+
+#define ATOMIC_OPS(op, asm_op)						      \
+	ATOMIC_OP(op, asm_op)						      \
+	ATOMIC_OP_RETURN(op, asm_op)
 
-ATOMIC_OPS(add, +=, addu)
-ATOMIC_OPS(sub, -=, subu)
+ATOMIC_OPS(add, add)
+ATOMIC_OPS(sub, sub)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
+#undef __ATOMIC_OP_RETURN
 #undef ATOMIC_OP
+#undef __ATOMIC_OP
 
 /*
  * atomic_sub_if_positive - conditionally subtract integer from atomic variable
@@ -149,7 +218,7 @@ ATOMIC_OPS(sub, -=, subu)
  * Atomically test @v and subtract @i if @v is greater or equal than @i.
  * The function returns the old value of @v minus @i.
  */
-static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
+static __inline__ int atomic_sub_if_positive(int i, atomic_t *v)
 {
 	int result;
 
@@ -159,7 +228,7 @@ static __inline__ int atomic_sub_if_posi
 		int temp;
 
 		__asm__ __volatile__(
-		"	.set	arch=r4000				\n"
+		"	.set	"MIPS_ISA_LEVEL"			\n"
 		"1:	ll	%1, %2		# atomic_sub_if_positive\n"
 		"	subu	%0, %1, %3				\n"
 		"	bltz	%0, 1f					\n"
@@ -208,8 +277,26 @@ static __inline__ int atomic_sub_if_posi
 	return result;
 }
 
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	return cmpxchg(&v->counter, old, new);
+}
+
+static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old,
+					   int new)
+{
+	return cmpxchg(&(v->counter), old, new);
+}
+
+static inline int atomic_xchg(atomic_t *v, int new)
+{
+	return xchg(&v->counter, new);
+}
+
+static inline int atomic_xchg_unchecked(atomic_unchecked_t *v, int new)
+{
+	return xchg(&(v->counter), new);
+}
 
 /**
  * __atomic_add_unless - add unless the number is a given value
@@ -237,6 +324,10 @@ static __inline__ int __atomic_add_unles
 
 #define atomic_dec_return(v) atomic_sub_return(1, (v))
 #define atomic_inc_return(v) atomic_add_return(1, (v))
+static __inline__ int atomic_inc_return_unchecked(atomic_unchecked_t *v)
+{
+	return atomic_add_return_unchecked(1, v);
+}
 
 /*
  * atomic_sub_and_test - subtract value from variable and test result
@@ -258,6 +349,10 @@ static __inline__ int __atomic_add_unles
  * other cases.
  */
 #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
+static __inline__ int atomic_inc_and_test_unchecked(atomic_unchecked_t *v)
+{
+	return atomic_add_return_unchecked(1, v) == 0;
+}
 
 /*
  * atomic_dec_and_test - decrement by 1 and test
@@ -282,6 +377,10 @@ static __inline__ int __atomic_add_unles
  * Atomically increments @v by 1.
  */
 #define atomic_inc(v) atomic_add(1, (v))
+static __inline__ void atomic_inc_unchecked(atomic_unchecked_t *v)
+{
+	atomic_add_unchecked(1, v);
+}
 
 /*
  * atomic_dec - decrement and test
@@ -290,6 +389,10 @@ static __inline__ int __atomic_add_unles
  * Atomically decrements @v by 1.
  */
 #define atomic_dec(v) atomic_sub(1, (v))
+static __inline__ void atomic_dec_unchecked(atomic_unchecked_t *v)
+{
+	atomic_sub_unchecked(1, v);
+}
 
 /*
  * atomic_add_negative - add and test if negative
@@ -311,54 +414,77 @@ static __inline__ int __atomic_add_unles
  * @v: pointer of type atomic64_t
  *
  */
-#define atomic64_read(v)	ACCESS_ONCE((v)->counter)
+static inline long atomic64_read(const atomic64_t *v)
+{
+	return ACCESS_ONCE(v->counter);
+}
+
+static inline long atomic64_read_unchecked(const atomic64_unchecked_t *v)
+{
+	return ACCESS_ONCE(v->counter);
+}
 
 /*
  * atomic64_set - set atomic variable
  * @v: pointer of type atomic64_t
  * @i: required value
  */
-#define atomic64_set(v, i)	((v)->counter = (i))
+static inline void atomic64_set(atomic64_t *v, long i)
+{
+	v->counter = i;
+}
+
+static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long i)
+{
+	v->counter = i;
+}
 
-#define ATOMIC64_OP(op, c_op, asm_op)					      \
-static __inline__ void atomic64_##op(long i, atomic64_t * v)		      \
+#define __ATOMIC64_OP(op, suffix, asm_op, extable)			      \
+static inline void atomic64_##op##suffix(long i, atomic64##suffix##_t * v)    \
 {									      \
 	if (kernel_uses_llsc && R10000_LLSC_WAR) {			      \
 		long temp;						      \
 									      \
 		__asm__ __volatile__(					      \
-		"	.set	arch=r4000				\n"   \
-		"1:	lld	%0, %1		# atomic64_" #op "	\n"   \
-		"	" #asm_op " %0, %2				\n"   \
+		"	.set	"MIPS_ISA_LEVEL"			\n"   \
+		"1:	lld	%0, %1		# atomic64_" #op #suffix "\n" \
+		"2:	" #asm_op " %0, %2				\n"   \
 		"	scd	%0, %1					\n"   \
 		"	beqzl	%0, 1b					\n"   \
+		extable							      \
 		"	.set	mips0					\n"   \
 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
 		: "Ir" (i));						      \
 	} else if (kernel_uses_llsc) {					      \
 		long temp;						      \
 									      \
-		do {							      \
-			__asm__ __volatile__(				      \
-			"	.set	"MIPS_ISA_LEVEL"		\n"   \
-			"	lld	%0, %1		# atomic64_" #op "\n" \
-			"	" #asm_op " %0, %2			\n"   \
-			"	scd	%0, %1				\n"   \
-			"	.set	mips0				\n"   \
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)      \
-			: "Ir" (i));					      \
-		} while (unlikely(!temp));				      \
+		__asm__ __volatile__(					      \
+		"	.set	"MIPS_ISA_LEVEL"			\n"   \
+		"1:	lld	%0, %1		# atomic64_" #op #suffix "\n" \
+		"2:	" #asm_op " %0, %2				\n"   \
+		"	scd	%0, %1					\n"   \
+		"	beqz	%0, 1b					\n"   \
+			extable						      \
+		"	.set	mips0					\n"   \
+		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
+		: "Ir" (i));						      \
 	} else {							      \
 		unsigned long flags;					      \
 									      \
 		raw_local_irq_save(flags);				      \
-		v->counter c_op i;					      \
+		__asm__ __volatile__(					      \
+		"2:	" #asm_op " %0, %1				\n"   \
+		extable							      \
+		: "+" GCC_OFF_SMALL_ASM() (v->counter) : "Ir" (i));	      \
 		raw_local_irq_restore(flags);				      \
 	}								      \
 }
 
-#define ATOMIC64_OP_RETURN(op, c_op, asm_op)				      \
-static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	      \
+#define ATOMIC64_OP(op, asm_op) __ATOMIC64_OP(op, _unchecked, asm_op##u, )    \
+				__ATOMIC64_OP(op, , asm_op, __OVERFLOW_EXTABLE)
+
+#define __ATOMIC64_OP_RETURN(op, suffix, asm_op, post_op, extable)	      \
+static inline long atomic64_##op##_return##suffix(long i, atomic64##suffix##_t * v)\
 {									      \
 	long result;							      \
 									      \
@@ -368,12 +494,15 @@ static __inline__ long atomic64_##op##_r
 		long temp;						      \
 									      \
 		__asm__ __volatile__(					      \
-		"	.set	arch=r4000				\n"   \
+		"	.set	mips3					\n"   \
 		"1:	lld	%1, %2		# atomic64_" #op "_return\n"  \
-		"	" #asm_op " %0, %1, %3				\n"   \
+		"2:	" #asm_op " %0, %1, %3				\n"   \
 		"	scd	%0, %2					\n"   \
 		"	beqzl	%0, 1b					\n"   \
-		"	" #asm_op " %0, %1, %3				\n"   \
+		post_op							      \
+		extable							      \
+		"4:	" #asm_op " %0, %1, %3				\n"   \
+		"5:							\n"   \
 		"	.set	mips0					\n"   \
 		: "=&r" (result), "=&r" (temp),				      \
 		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
@@ -381,27 +510,35 @@ static __inline__ long atomic64_##op##_r
 	} else if (kernel_uses_llsc) {					      \
 		long temp;						      \
 									      \
-		do {							      \
-			__asm__ __volatile__(				      \
-			"	.set	"MIPS_ISA_LEVEL"		\n"   \
-			"	lld	%1, %2	# atomic64_" #op "_return\n"  \
-			"	" #asm_op " %0, %1, %3			\n"   \
-			"	scd	%0, %2				\n"   \
-			"	.set	mips0				\n"   \
-			: "=&r" (result), "=&r" (temp),			      \
-			  "=" GCC_OFF_SMALL_ASM() (v->counter)		      \
-			: "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter)	      \
-			: "memory");					      \
-		} while (unlikely(!result));				      \
+		__asm__ __volatile__(					      \
+		"	.set	"MIPS_ISA_LEVEL"			\n"   \
+		"1:	lld	%1, %2	# atomic64_" #op "_return" #suffix "\n"\
+		"2:	" #asm_op " %0, %1, %3				\n"   \
+		"	scd	%0, %2					\n"   \
+		"	beqz	%0, 1b					\n"   \
+		post_op							      \
+		extable							      \
+		"4:	" #asm_op " %0, %1, %3				\n"   \
+		"5:							\n"   \
+		"	.set	mips0					\n"   \
+		: "=&r" (result), "=&r" (temp),				      \
+		  "=" GCC_OFF_SMALL_ASM() (v->counter)			      \
+		: "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter)		      \
+		: "memory");						      \
 									      \
 		result = temp; result c_op i;				      \
 	} else {							      \
 		unsigned long flags;					      \
 									      \
 		raw_local_irq_save(flags);				      \
-		result = v->counter;					      \
-		result c_op i;						      \
-		v->counter = result;					      \
+		__asm__ __volatile__(					      \
+		"	ld	%0, %1					\n"   \
+		"2:	" #asm_op " %0, %1, %2				\n"   \
+		"	sd	%0, %1					\n"   \
+		"3:							\n"   \
+		extable							      \
+		: "=&r" (result), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
+		: "Ir" (i));						      \
 		raw_local_irq_restore(flags);				      \
 	}								      \
 									      \
@@ -410,16 +547,23 @@ static __inline__ long atomic64_##op##_r
 	return result;							      \
 }
 
-#define ATOMIC64_OPS(op, c_op, asm_op)					      \
-	ATOMIC64_OP(op, c_op, asm_op)					      \
-	ATOMIC64_OP_RETURN(op, c_op, asm_op)
+#define ATOMIC64_OP_RETURN(op, asm_op) __ATOMIC64_OP_RETURN(op, _unchecked, asm_op##u, , )	\
+				       __ATOMIC64_OP_RETURN(op, , asm_op, __OVERFLOW_POST, __OVERFLOW_EXTABLE)
+
+#define ATOMIC64_OPS(op, asm_op)						\
+	ATOMIC64_OP(op, asm_op)							\
+	ATOMIC64_OP_RETURN(op, asm_op)
 
-ATOMIC64_OPS(add, +=, daddu)
-ATOMIC64_OPS(sub, -=, dsubu)
+ATOMIC64_OPS(add, dadd)
+ATOMIC64_OPS(sub, dsub)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
+#undef __ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
+#undef __ATOMIC64_OP
+#undef __OVERFLOW_EXTABLE
+#undef __OVERFLOW_POST
 
 /*
  * atomic64_sub_if_positive - conditionally subtract integer from atomic
@@ -430,7 +574,7 @@ ATOMIC64_OPS(sub, -=, dsubu)
  * Atomically test @v and subtract @i if @v is greater or equal than @i.
  * The function returns the old value of @v minus @i.
  */
-static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
+static __inline__ long atomic64_sub_if_positive(long i, atomic64_t *v)
 {
 	long result;
 
@@ -440,7 +584,7 @@ static __inline__ long atomic64_sub_if_p
 		long temp;
 
 		__asm__ __volatile__(
-		"	.set	arch=r4000				\n"
+		"	.set	"MIPS_ISA_LEVEL"			\n"
 		"1:	lld	%1, %2		# atomic64_sub_if_positive\n"
 		"	dsubu	%0, %1, %3				\n"
 		"	bltz	%0, 1f					\n"
@@ -489,9 +633,26 @@ static __inline__ long atomic64_sub_if_p
 	return result;
 }
 
-#define atomic64_cmpxchg(v, o, n) \
-	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new)))
+static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
+{
+	return cmpxchg(&v->counter, old, new);
+}
+
+static inline long atomic64_cmpxchg_unchecked(atomic64_unchecked_t *v, long old,
+					      long new)
+{
+	return cmpxchg(&(v->counter), old, new);
+}
+
+static inline long atomic64_xchg(atomic64_t *v, long new)
+{
+	return xchg(&v->counter, new);
+}
+
+static inline long atomic64_xchg_unchecked(atomic64_unchecked_t *v, long new)
+{
+	return xchg(&(v->counter), new);
+}
 
 /**
  * atomic64_add_unless - add unless the number is a given value
@@ -521,6 +682,7 @@ static __inline__ int atomic64_add_unles
 
 #define atomic64_dec_return(v) atomic64_sub_return(1, (v))
 #define atomic64_inc_return(v) atomic64_add_return(1, (v))
+#define atomic64_inc_return_unchecked(v) atomic64_add_return_unchecked(1, (v))
 
 /*
  * atomic64_sub_and_test - subtract value from variable and test result
@@ -542,6 +704,7 @@ static __inline__ int atomic64_add_unles
  * other cases.
  */
 #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
+#define atomic64_inc_and_test_unchecked(v) atomic64_add_return_unchecked(1, (v)) == 0)
 
 /*
  * atomic64_dec_and_test - decrement by 1 and test
@@ -566,6 +729,7 @@ static __inline__ int atomic64_add_unles
  * Atomically increments @v by 1.
  */
 #define atomic64_inc(v) atomic64_add(1, (v))
+#define atomic64_inc_unchecked(v) atomic64_add_unchecked(1, (v))
 
 /*
  * atomic64_dec - decrement and test
@@ -574,6 +738,7 @@ static __inline__ int atomic64_add_unles
  * Atomically decrements @v by 1.
  */
 #define atomic64_dec(v) atomic64_sub(1, (v))
+#define atomic64_dec_unchecked(v) atomic64_sub_unchecked(1, (v))
 
 /*
  * atomic64_add_negative - add and test if negative
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/include/asm/barrier.h linux-4.0.9-pax/arch/mips/include/asm/barrier.h
--- linux-4.0.9/arch/mips/include/asm/barrier.h	2015-03-18 15:21:50.192349252 +0100
+++ linux-4.0.9-pax/arch/mips/include/asm/barrier.h	2015-04-15 12:13:52.866318624 +0200
@@ -133,7 +133,7 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	ACCESS_ONCE_RW(*p) = (v);					\
 } while (0)
 
 #define smp_load_acquire(p)						\
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/include/asm/elf.h linux-4.0.9-pax/arch/mips/include/asm/elf.h
--- linux-4.0.9/arch/mips/include/asm/elf.h	2015-05-13 20:51:57.644791954 +0200
+++ linux-4.0.9-pax/arch/mips/include/asm/elf.h	2015-05-13 20:52:12.148792741 +0200
@@ -410,15 +410,18 @@ extern const char *__elf_platform;
 #define ELF_ET_DYN_BASE		(TASK_SIZE / 3 * 2)
 #endif
 
+#ifdef CONFIG_PAX_ASLR
+#define PAX_ELF_ET_DYN_BASE	(TASK_IS_32BIT_ADDR ? 0x00400000UL : 0x00400000UL)
+
+#define PAX_DELTA_MMAP_LEN	(TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT)
+#define PAX_DELTA_STACK_LEN	(TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT)
+#endif
+
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 				       int uses_interp);
 
-struct mm_struct;
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
 struct arch_elf_state {
 	int fp_abi;
 	int interp_fp_abi;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/include/asm/exec.h linux-4.0.9-pax/arch/mips/include/asm/exec.h
--- linux-4.0.9/arch/mips/include/asm/exec.h	2015-03-18 15:21:50.192349252 +0100
+++ linux-4.0.9-pax/arch/mips/include/asm/exec.h	2015-04-15 12:13:52.866318624 +0200
@@ -12,6 +12,6 @@
 #ifndef _ASM_EXEC_H
 #define _ASM_EXEC_H
 
-extern unsigned long arch_align_stack(unsigned long sp);
+#define arch_align_stack(x) ((x) & ~0xfUL)
 
 #endif /* _ASM_EXEC_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/include/asm/hw_irq.h linux-4.0.9-pax/arch/mips/include/asm/hw_irq.h
--- linux-4.0.9/arch/mips/include/asm/hw_irq.h	2015-03-18 15:21:50.192349252 +0100
+++ linux-4.0.9-pax/arch/mips/include/asm/hw_irq.h	2015-04-15 12:13:52.866318624 +0200
@@ -10,7 +10,7 @@
 
 #include <linux/atomic.h>
 
-extern atomic_t irq_err_count;
+extern atomic_unchecked_t irq_err_count;
 
 /*
  * interrupt-retrigger: NOP for now. This may not be appropriate for all
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/include/asm/local.h linux-4.0.9-pax/arch/mips/include/asm/local.h
--- linux-4.0.9/arch/mips/include/asm/local.h	2015-04-13 11:20:54.490617854 +0200
+++ linux-4.0.9-pax/arch/mips/include/asm/local.h	2015-04-15 12:13:52.866318624 +0200
@@ -13,15 +13,25 @@ typedef struct
 	atomic_long_t a;
 } local_t;
 
+typedef struct {
+	atomic_long_unchecked_t a;
+} local_unchecked_t;
+
 #define LOCAL_INIT(i)	{ ATOMIC_LONG_INIT(i) }
 
 #define local_read(l)	atomic_long_read(&(l)->a)
+#define local_read_unchecked(l)	atomic_long_read_unchecked(&(l)->a)
 #define local_set(l, i) atomic_long_set(&(l)->a, (i))
+#define local_set_unchecked(l, i)	atomic_long_set_unchecked(&(l)->a, (i))
 
 #define local_add(i, l) atomic_long_add((i), (&(l)->a))
+#define local_add_unchecked(i, l) atomic_long_add_unchecked((i), (&(l)->a))
 #define local_sub(i, l) atomic_long_sub((i), (&(l)->a))
+#define local_sub_unchecked(i, l) atomic_long_sub_unchecked((i), (&(l)->a))
 #define local_inc(l)	atomic_long_inc(&(l)->a)
+#define local_inc_unchecked(l)	atomic_long_inc_unchecked(&(l)->a)
 #define local_dec(l)	atomic_long_dec(&(l)->a)
+#define local_dec_unchecked(l)	atomic_long_dec_unchecked(&(l)->a)
 
 /*
  * Same as above, but return the result value
@@ -71,6 +81,51 @@ static __inline__ long local_add_return(
 	return result;
 }
 
+static __inline__ long local_add_return_unchecked(long i, local_unchecked_t * l)
+{
+	unsigned long result;
+
+	if (kernel_uses_llsc && R10000_LLSC_WAR) {
+		unsigned long temp;
+
+		__asm__ __volatile__(
+		"	.set	mips3					\n"
+		"1:"	__LL	"%1, %2		# local_add_return	\n"
+		"	addu	%0, %1, %3				\n"
+			__SC	"%0, %2					\n"
+		"	beqzl	%0, 1b					\n"
+		"	addu	%0, %1, %3				\n"
+		"	.set	mips0					\n"
+		: "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+		: "Ir" (i), "m" (l->a.counter)
+		: "memory");
+	} else if (kernel_uses_llsc) {
+		unsigned long temp;
+
+		__asm__ __volatile__(
+		"	.set	mips3					\n"
+		"1:"	__LL	"%1, %2		# local_add_return	\n"
+		"	addu	%0, %1, %3				\n"
+			__SC	"%0, %2					\n"
+		"	beqz	%0, 1b					\n"
+		"	addu	%0, %1, %3				\n"
+		"	.set	mips0					\n"
+		: "=&r" (result), "=&r" (temp), "=m" (l->a.counter)
+		: "Ir" (i), "m" (l->a.counter)
+		: "memory");
+	} else {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		result = l->a.counter;
+		result += i;
+		l->a.counter = result;
+		local_irq_restore(flags);
+	}
+
+	return result;
+}
+
 static __inline__ long local_sub_return(long i, local_t * l)
 {
 	unsigned long result;
@@ -118,6 +173,8 @@ static __inline__ long local_sub_return(
 
 #define local_cmpxchg(l, o, n) \
 	((long)cmpxchg_local(&((l)->a.counter), (o), (n)))
+#define local_cmpxchg_unchecked(l, o, n) \
+	((long)cmpxchg_local(&((l)->a.counter), (o), (n)))
 #define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n)))
 
 /**
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/include/asm/page.h linux-4.0.9-pax/arch/mips/include/asm/page.h
--- linux-4.0.9/arch/mips/include/asm/page.h	2015-03-18 15:21:50.192349252 +0100
+++ linux-4.0.9-pax/arch/mips/include/asm/page.h	2015-04-15 12:13:52.866318624 +0200
@@ -120,7 +120,7 @@ extern void copy_user_highpage(struct pa
   #ifdef CONFIG_CPU_MIPS32
     typedef struct { unsigned long pte_low, pte_high; } pte_t;
     #define pte_val(x)	  ((x).pte_low | ((unsigned long long)(x).pte_high << 32))
-    #define __pte(x)	  ({ pte_t __pte = {(x), ((unsigned long long)(x)) >> 32}; __pte; })
+    #define __pte(x)	  ({ pte_t __pte = {(x), (x) >> 32}; __pte; })
   #else
      typedef struct { unsigned long long pte; } pte_t;
      #define pte_val(x) ((x).pte)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/include/asm/pgalloc.h linux-4.0.9-pax/arch/mips/include/asm/pgalloc.h
--- linux-4.0.9/arch/mips/include/asm/pgalloc.h	2015-03-18 15:21:50.192349252 +0100
+++ linux-4.0.9-pax/arch/mips/include/asm/pgalloc.h	2015-04-15 12:13:52.866318624 +0200
@@ -37,6 +37,11 @@ static inline void pud_populate(struct m
 {
 	set_pud(pud, __pud((unsigned long)pmd));
 }
+
+static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	pud_populate(mm, pud, pmd);
+}
 #endif
 
 /*
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/include/asm/pgtable.h linux-4.0.9-pax/arch/mips/include/asm/pgtable.h
--- linux-4.0.9/arch/mips/include/asm/pgtable.h	2015-05-13 20:51:57.672791955 +0200
+++ linux-4.0.9-pax/arch/mips/include/asm/pgtable.h	2015-05-13 20:52:12.148792741 +0200
@@ -20,6 +20,9 @@
 #include <asm/io.h>
 #include <asm/pgtable-bits.h>
 
+#define ktla_ktva(addr)		(addr)
+#define ktva_ktla(addr)		(addr)
+
 struct mm_struct;
 struct vm_area_struct;
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/include/asm/uaccess.h linux-4.0.9-pax/arch/mips/include/asm/uaccess.h
--- linux-4.0.9/arch/mips/include/asm/uaccess.h	2015-03-18 15:21:50.196349252 +0100
+++ linux-4.0.9-pax/arch/mips/include/asm/uaccess.h	2015-04-15 12:13:52.870318624 +0200
@@ -130,6 +130,7 @@ extern u64 __ua_limit;
 	__ok == 0;							\
 })
 
+#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size))
 #define access_ok(type, addr, size)					\
 	likely(__access_ok((addr), (size), __access_mask))
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/kernel/binfmt_elfn32.c linux-4.0.9-pax/arch/mips/kernel/binfmt_elfn32.c
--- linux-4.0.9/arch/mips/kernel/binfmt_elfn32.c	2015-03-18 15:21:50.196349252 +0100
+++ linux-4.0.9-pax/arch/mips/kernel/binfmt_elfn32.c	2015-04-15 12:13:52.870318624 +0200
@@ -50,6 +50,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N
 #undef ELF_ET_DYN_BASE
 #define ELF_ET_DYN_BASE		(TASK32_SIZE / 3 * 2)
 
+#ifdef CONFIG_PAX_ASLR
+#define PAX_ELF_ET_DYN_BASE	(TASK_IS_32BIT_ADDR ? 0x00400000UL : 0x00400000UL)
+
+#define PAX_DELTA_MMAP_LEN	(TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT)
+#define PAX_DELTA_STACK_LEN	(TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT)
+#endif
+
 #include <asm/processor.h>
 #include <linux/module.h>
 #include <linux/elfcore.h>
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/kernel/binfmt_elfo32.c linux-4.0.9-pax/arch/mips/kernel/binfmt_elfo32.c
--- linux-4.0.9/arch/mips/kernel/binfmt_elfo32.c	2015-03-18 15:21:50.196349252 +0100
+++ linux-4.0.9-pax/arch/mips/kernel/binfmt_elfo32.c	2015-04-15 12:13:52.870318624 +0200
@@ -70,6 +70,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N
 #undef ELF_ET_DYN_BASE
 #define ELF_ET_DYN_BASE		(TASK32_SIZE / 3 * 2)
 
+#ifdef CONFIG_PAX_ASLR
+#define PAX_ELF_ET_DYN_BASE	(TASK_IS_32BIT_ADDR ? 0x00400000UL : 0x00400000UL)
+
+#define PAX_DELTA_MMAP_LEN	(TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT)
+#define PAX_DELTA_STACK_LEN	(TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT)
+#endif
+
 #include <asm/processor.h>
 
 #include <linux/module.h>
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/kernel/i8259.c linux-4.0.9-pax/arch/mips/kernel/i8259.c
--- linux-4.0.9/arch/mips/kernel/i8259.c	2015-03-18 15:21:50.196349252 +0100
+++ linux-4.0.9-pax/arch/mips/kernel/i8259.c	2015-04-15 12:13:52.870318624 +0200
@@ -202,7 +202,7 @@ spurious_8259A_irq:
 			printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
 			spurious_irq_mask |= irqmask;
 		}
-		atomic_inc(&irq_err_count);
+		atomic_inc_unchecked(&irq_err_count);
 		/*
 		 * Theoretically we do not have to handle this IRQ,
 		 * but in Linux this does not cause problems and is
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/kernel/irq.c linux-4.0.9-pax/arch/mips/kernel/irq.c
--- linux-4.0.9/arch/mips/kernel/irq.c	2015-06-26 10:29:22.446538574 +0200
+++ linux-4.0.9-pax/arch/mips/kernel/irq.c	2015-06-26 10:29:32.594538551 +0200
@@ -76,17 +76,17 @@ void ack_bad_irq(unsigned int irq)
 	printk("unexpected IRQ # %d\n", irq);
 }
 
-atomic_t irq_err_count;
+atomic_unchecked_t irq_err_count;
 
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
-	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
+	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read_unchecked(&irq_err_count));
 	return 0;
 }
 
 asmlinkage void spurious_interrupt(void)
 {
-	atomic_inc(&irq_err_count);
+	atomic_inc_unchecked(&irq_err_count);
 }
 
 void __init init_IRQ(void)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/kernel/irq-gt641xx.c linux-4.0.9-pax/arch/mips/kernel/irq-gt641xx.c
--- linux-4.0.9/arch/mips/kernel/irq-gt641xx.c	2015-03-18 15:21:50.196349252 +0100
+++ linux-4.0.9-pax/arch/mips/kernel/irq-gt641xx.c	2015-04-15 12:13:52.870318624 +0200
@@ -110,7 +110,7 @@ void gt641xx_irq_dispatch(void)
 		}
 	}
 
-	atomic_inc(&irq_err_count);
+	atomic_inc_unchecked(&irq_err_count);
 }
 
 void __init gt641xx_irq_init(void)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/kernel/pm-cps.c linux-4.0.9-pax/arch/mips/kernel/pm-cps.c
--- linux-4.0.9/arch/mips/kernel/pm-cps.c	2015-03-18 15:21:50.196349252 +0100
+++ linux-4.0.9-pax/arch/mips/kernel/pm-cps.c	2015-04-15 12:13:52.870318624 +0200
@@ -172,7 +172,7 @@ int cps_pm_enter_state(enum cps_pm_state
 	nc_core_ready_count = nc_addr;
 
 	/* Ensure ready_count is zero-initialised before the assembly runs */
-	ACCESS_ONCE(*nc_core_ready_count) = 0;
+	ACCESS_ONCE_RW(*nc_core_ready_count) = 0;
 	coupled_barrier(&per_cpu(pm_barrier, core), online);
 
 	/* Run the generated entry code */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/kernel/process.c linux-4.0.9-pax/arch/mips/kernel/process.c
--- linux-4.0.9/arch/mips/kernel/process.c	2015-04-13 11:20:54.738617841 +0200
+++ linux-4.0.9-pax/arch/mips/kernel/process.c	2015-04-15 12:13:52.870318624 +0200
@@ -535,18 +535,6 @@ out:
 	return pc;
 }
 
-/*
- * Don't forget that the stack pointer must be aligned on a 8 bytes
- * boundary for 32-bits ABI and 16 bytes for 64-bits ABI.
- */
-unsigned long arch_align_stack(unsigned long sp)
-{
-	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() & ~PAGE_MASK;
-
-	return sp & ALMASK;
-}
-
 static void arch_dump_stack(void *info)
 {
 	struct pt_regs *regs;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/kernel/reset.c linux-4.0.9-pax/arch/mips/kernel/reset.c
--- linux-4.0.9/arch/mips/kernel/reset.c	2015-03-18 15:21:50.196349252 +0100
+++ linux-4.0.9-pax/arch/mips/kernel/reset.c	2015-04-15 12:13:52.870318624 +0200
@@ -13,6 +13,7 @@
 #include <linux/reboot.h>
 
 #include <asm/reboot.h>
+#include <asm/bug.h>
 
 /*
  * Urgs ...  Too many MIPS machines to handle this in a generic way.
@@ -29,16 +30,19 @@ void machine_restart(char *command)
 {
 	if (_machine_restart)
 		_machine_restart(command);
+	BUG();
 }
 
 void machine_halt(void)
 {
 	if (_machine_halt)
 		_machine_halt();
+	BUG();
 }
 
 void machine_power_off(void)
 {
 	if (pm_power_off)
 		pm_power_off();
+	BUG();
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/kernel/sync-r4k.c linux-4.0.9-pax/arch/mips/kernel/sync-r4k.c
--- linux-4.0.9/arch/mips/kernel/sync-r4k.c	2015-03-18 15:21:50.196349252 +0100
+++ linux-4.0.9-pax/arch/mips/kernel/sync-r4k.c	2015-04-15 12:13:52.870318624 +0200
@@ -18,8 +18,8 @@
 #include <asm/mipsregs.h>
 
 static atomic_t count_start_flag = ATOMIC_INIT(0);
-static atomic_t count_count_start = ATOMIC_INIT(0);
-static atomic_t count_count_stop = ATOMIC_INIT(0);
+static atomic_unchecked_t count_count_start = ATOMIC_INIT(0);
+static atomic_unchecked_t count_count_stop = ATOMIC_INIT(0);
 static atomic_t count_reference = ATOMIC_INIT(0);
 
 #define COUNTON 100
@@ -58,13 +58,13 @@ void synchronise_count_master(int cpu)
 
 	for (i = 0; i < NR_LOOPS; i++) {
 		/* slaves loop on '!= 2' */
-		while (atomic_read(&count_count_start) != 1)
+		while (atomic_read_unchecked(&count_count_start) != 1)
 			mb();
-		atomic_set(&count_count_stop, 0);
+		atomic_set_unchecked(&count_count_stop, 0);
 		smp_wmb();
 
 		/* this lets the slaves write their count register */
-		atomic_inc(&count_count_start);
+		atomic_inc_unchecked(&count_count_start);
 
 		/*
 		 * Everyone initialises count in the last loop:
@@ -75,11 +75,11 @@ void synchronise_count_master(int cpu)
 		/*
 		 * Wait for all slaves to leave the synchronization point:
 		 */
-		while (atomic_read(&count_count_stop) != 1)
+		while (atomic_read_unchecked(&count_count_stop) != 1)
 			mb();
-		atomic_set(&count_count_start, 0);
+		atomic_set_unchecked(&count_count_start, 0);
 		smp_wmb();
-		atomic_inc(&count_count_stop);
+		atomic_inc_unchecked(&count_count_stop);
 	}
 	/* Arrange for an interrupt in a short while */
 	write_c0_compare(read_c0_count() + COUNTON);
@@ -112,8 +112,8 @@ void synchronise_count_slave(int cpu)
 	initcount = atomic_read(&count_reference);
 
 	for (i = 0; i < NR_LOOPS; i++) {
-		atomic_inc(&count_count_start);
-		while (atomic_read(&count_count_start) != 2)
+		atomic_inc_unchecked(&count_count_start);
+		while (atomic_read_unchecked(&count_count_start) != 2)
 			mb();
 
 		/*
@@ -122,8 +122,8 @@ void synchronise_count_slave(int cpu)
 		if (i == NR_LOOPS-1)
 			write_c0_count(initcount);
 
-		atomic_inc(&count_count_stop);
-		while (atomic_read(&count_count_stop) != 2)
+		atomic_inc_unchecked(&count_count_stop);
+		while (atomic_read_unchecked(&count_count_stop) != 2)
 			mb();
 	}
 	/* Arrange for an interrupt in a short while */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/kernel/traps.c linux-4.0.9-pax/arch/mips/kernel/traps.c
--- linux-4.0.9/arch/mips/kernel/traps.c	2015-04-13 11:20:54.798617838 +0200
+++ linux-4.0.9-pax/arch/mips/kernel/traps.c	2015-04-15 12:13:52.870318624 +0200
@@ -689,7 +689,18 @@ asmlinkage void do_ov(struct pt_regs *re
 	siginfo_t info;
 
 	prev_state = exception_enter();
-	die_if_kernel("Integer overflow", regs);
+	if (unlikely(!user_mode(regs))) {
+
+#ifdef CONFIG_PAX_REFCOUNT
+		if (fixup_exception(regs)) {
+			pax_report_refcount_overflow(regs);
+			exception_exit(prev_state);
+			return;
+		}
+#endif
+
+		die("Integer overflow", regs);
+	}
 
 	info.si_code = FPE_INTOVF;
 	info.si_signo = SIGFPE;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/kvm/mips.c linux-4.0.9-pax/arch/mips/kvm/mips.c
--- linux-4.0.9/arch/mips/kvm/mips.c	2015-07-10 20:07:37.551036134 +0200
+++ linux-4.0.9-pax/arch/mips/kvm/mips.c	2015-07-10 20:07:47.711035591 +0200
@@ -816,7 +816,7 @@ long kvm_arch_vm_ioctl(struct file *filp
 	return r;
 }
 
-int kvm_arch_init(void *opaque)
+int kvm_arch_init(const void *opaque)
 {
 	if (kvm_mips_callbacks) {
 		kvm_err("kvm: module already exists\n");
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/mm/fault.c linux-4.0.9-pax/arch/mips/mm/fault.c
--- linux-4.0.9/arch/mips/mm/fault.c	2015-04-13 11:20:54.878617833 +0200
+++ linux-4.0.9-pax/arch/mips/mm/fault.c	2015-04-15 12:13:52.870318624 +0200
@@ -31,6 +31,23 @@
 
 int show_unhandled_signals = 1;
 
+#ifdef CONFIG_PAX_PAGEEXEC
+void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
+{
+	unsigned long i;
+
+	printk(KERN_ERR "PAX: bytes at PC: ");
+	for (i = 0; i < 5; i++) {
+		unsigned int c;
+		if (get_user(c, (unsigned int *)pc+i))
+			printk(KERN_CONT "???????? ");
+		else
+			printk(KERN_CONT "%08x ", c);
+	}
+	printk("\n");
+}
+#endif
+
 /*
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
@@ -206,6 +223,14 @@ bad_area:
 bad_area_nosemaphore:
 	/* User mode accesses just cause a SIGSEGV */
 	if (user_mode(regs)) {
+
+#ifdef CONFIG_PAX_PAGEEXEC
+		if (cpu_has_rixi && (mm->pax_flags & MF_PAX_PAGEEXEC) && !write && address == instruction_pointer(regs)) {
+			pax_report_fault(regs, (void *)address, (void *)user_stack_pointer(regs));
+			do_group_exit(SIGKILL);
+		}
+#endif
+
 		tsk->thread.cp0_badvaddr = address;
 		tsk->thread.error_code = write;
 		if (show_unhandled_signals &&
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/mm/mmap.c linux-4.0.9-pax/arch/mips/mm/mmap.c
--- linux-4.0.9/arch/mips/mm/mmap.c	2015-03-18 15:21:50.196349252 +0100
+++ linux-4.0.9-pax/arch/mips/mm/mmap.c	2015-04-15 12:13:52.870318624 +0200
@@ -84,6 +84,11 @@ static unsigned long arch_get_unmapped_a
 		do_color_align = 1;
 
 	/* requesting a specific address */
+
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(current->mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	if (addr) {
 		if (do_color_align)
 			addr = COLOUR_ALIGN(addr, pgoff);
@@ -91,8 +96,7 @@ static unsigned long arch_get_unmapped_a
 			addr = PAGE_ALIGN(addr);
 
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len))
 			return addr;
 	}
 
@@ -146,6 +150,10 @@ void arch_pick_mmap_layout(struct mm_str
 {
 	unsigned long random_factor = 0UL;
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	if (current->flags & PF_RANDOMIZE) {
 		random_factor = get_random_int();
 		random_factor = random_factor << PAGE_SHIFT;
@@ -157,38 +165,23 @@ void arch_pick_mmap_layout(struct mm_str
 
 	if (mmap_is_legacy()) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			mm->mmap_base += mm->delta_mmap;
+#endif
+
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
 		mm->mmap_base = mmap_base(random_factor);
-		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-	}
-}
-
-static inline unsigned long brk_rnd(void)
-{
-	unsigned long rnd = get_random_int();
-
-	rnd = rnd << PAGE_SHIFT;
-	/* 8MB for 32bit, 256MB for 64bit */
-	if (TASK_IS_32BIT_ADDR)
-		rnd = rnd & 0x7ffffful;
-	else
-		rnd = rnd & 0xffffffful;
 
-	return rnd;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long base = mm->brk;
-	unsigned long ret;
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			mm->mmap_base -= mm->delta_mmap + mm->delta_stack;
+#endif
 
-	ret = PAGE_ALIGN(base + brk_rnd());
-
-	if (ret < mm->brk)
-		return mm->brk;
-
-	return ret;
+		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+	}
 }
 
 int __virt_addr_valid(const volatile void *kaddr)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/sgi-ip27/ip27-nmi.c linux-4.0.9-pax/arch/mips/sgi-ip27/ip27-nmi.c
--- linux-4.0.9/arch/mips/sgi-ip27/ip27-nmi.c	2015-03-18 15:21:50.200349252 +0100
+++ linux-4.0.9-pax/arch/mips/sgi-ip27/ip27-nmi.c	2015-04-15 12:13:52.870318624 +0200
@@ -187,9 +187,9 @@ void
 cont_nmi_dump(void)
 {
 #ifndef REAL_NMI_SIGNAL
-	static atomic_t nmied_cpus = ATOMIC_INIT(0);
+	static atomic_unchecked_t nmied_cpus = ATOMIC_INIT(0);
 
-	atomic_inc(&nmied_cpus);
+	atomic_inc_unchecked(&nmied_cpus);
 #endif
 	/*
 	 * Only allow 1 cpu to proceed
@@ -233,7 +233,7 @@ cont_nmi_dump(void)
 		udelay(10000);
 	}
 #else
-	while (atomic_read(&nmied_cpus) != num_online_cpus());
+	while (atomic_read_unchecked(&nmied_cpus) != num_online_cpus());
 #endif
 
 	/*
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/sni/rm200.c linux-4.0.9-pax/arch/mips/sni/rm200.c
--- linux-4.0.9/arch/mips/sni/rm200.c	2015-03-18 15:21:50.200349252 +0100
+++ linux-4.0.9-pax/arch/mips/sni/rm200.c	2015-04-15 12:13:52.870318624 +0200
@@ -270,7 +270,7 @@ spurious_8259A_irq:
 			       "spurious RM200 8259A interrupt: IRQ%d.\n", irq);
 			spurious_irq_mask |= irqmask;
 		}
-		atomic_inc(&irq_err_count);
+		atomic_inc_unchecked(&irq_err_count);
 		/*
 		 * Theoretically we do not have to handle this IRQ,
 		 * but in Linux this does not cause problems and is
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/vr41xx/common/icu.c linux-4.0.9-pax/arch/mips/vr41xx/common/icu.c
--- linux-4.0.9/arch/mips/vr41xx/common/icu.c	2015-03-18 15:21:50.200349252 +0100
+++ linux-4.0.9-pax/arch/mips/vr41xx/common/icu.c	2015-04-15 12:13:52.870318624 +0200
@@ -653,7 +653,7 @@ static int icu_get_irq(unsigned int irq)
 
 	printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2);
 
-	atomic_inc(&irq_err_count);
+	atomic_inc_unchecked(&irq_err_count);
 
 	return -1;
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/mips/vr41xx/common/irq.c linux-4.0.9-pax/arch/mips/vr41xx/common/irq.c
--- linux-4.0.9/arch/mips/vr41xx/common/irq.c	2015-03-18 15:21:50.200349252 +0100
+++ linux-4.0.9-pax/arch/mips/vr41xx/common/irq.c	2015-04-15 12:13:52.870318624 +0200
@@ -64,7 +64,7 @@ static void irq_dispatch(unsigned int ir
 	irq_cascade_t *cascade;
 
 	if (irq >= NR_IRQS) {
-		atomic_inc(&irq_err_count);
+		atomic_inc_unchecked(&irq_err_count);
 		return;
 	}
 
@@ -84,7 +84,7 @@ static void irq_dispatch(unsigned int ir
 		ret = cascade->get_irq(irq);
 		irq = ret;
 		if (ret < 0)
-			atomic_inc(&irq_err_count);
+			atomic_inc_unchecked(&irq_err_count);
 		else
 			irq_dispatch(irq);
 		if (!irqd_irq_disabled(idata) && chip->irq_unmask)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/parisc/include/asm/atomic.h linux-4.0.9-pax/arch/parisc/include/asm/atomic.h
--- linux-4.0.9/arch/parisc/include/asm/atomic.h	2015-03-18 15:21:50.200349252 +0100
+++ linux-4.0.9-pax/arch/parisc/include/asm/atomic.h	2015-04-15 12:13:52.870318624 +0200
@@ -273,6 +273,16 @@ static inline long atomic64_dec_if_posit
 	return dec;
 }
 
+#define atomic64_read_unchecked(v)		atomic64_read(v)
+#define atomic64_set_unchecked(v, i)		atomic64_set((v), (i))
+#define atomic64_add_unchecked(a, v)		atomic64_add((a), (v))
+#define atomic64_add_return_unchecked(a, v)	atomic64_add_return((a), (v))
+#define atomic64_sub_unchecked(a, v)		atomic64_sub((a), (v))
+#define atomic64_inc_unchecked(v)		atomic64_inc(v)
+#define atomic64_inc_return_unchecked(v)	atomic64_inc_return(v)
+#define atomic64_dec_unchecked(v)		atomic64_dec(v)
+#define atomic64_cmpxchg_unchecked(v, o, n)	atomic64_cmpxchg((v), (o), (n))
+
 #endif /* !CONFIG_64BIT */
 
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/parisc/include/asm/elf.h linux-4.0.9-pax/arch/parisc/include/asm/elf.h
--- linux-4.0.9/arch/parisc/include/asm/elf.h	2015-06-15 16:02:22.211183858 +0200
+++ linux-4.0.9-pax/arch/parisc/include/asm/elf.h	2015-06-15 16:02:33.015183834 +0200
@@ -342,6 +342,13 @@ struct pt_regs;	/* forward declaration..
 
 #define ELF_ET_DYN_BASE         (TASK_UNMAPPED_BASE + 0x01000000)
 
+#ifdef CONFIG_PAX_ASLR
+#define PAX_ELF_ET_DYN_BASE	0x10000UL
+
+#define PAX_DELTA_MMAP_LEN	16
+#define PAX_DELTA_STACK_LEN	16
+#endif
+
 /* This yields a mask that user programs can use to figure out what
    instruction set this CPU supports.  This could be done in user space,
    but it's not easy, and we've already done it here.  */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/parisc/include/asm/pgalloc.h linux-4.0.9-pax/arch/parisc/include/asm/pgalloc.h
--- linux-4.0.9/arch/parisc/include/asm/pgalloc.h	2015-04-13 11:20:55.298617811 +0200
+++ linux-4.0.9-pax/arch/parisc/include/asm/pgalloc.h	2015-04-15 12:13:52.874318624 +0200
@@ -61,6 +61,11 @@ static inline void pgd_populate(struct m
 		        (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT));
 }
 
+static inline void pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
+{
+	pgd_populate(mm, pgd, pmd);
+}
+
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT,
@@ -96,6 +101,7 @@ static inline void pmd_free(struct mm_st
 #define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
 #define pmd_free(mm, x)			do { } while (0)
 #define pgd_populate(mm, pmd, pte)	BUG()
+#define pgd_populate_kernel(mm, pmd, pte)	BUG()
 
 #endif
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/parisc/include/asm/pgtable.h linux-4.0.9-pax/arch/parisc/include/asm/pgtable.h
--- linux-4.0.9/arch/parisc/include/asm/pgtable.h	2015-04-13 11:20:55.298617811 +0200
+++ linux-4.0.9-pax/arch/parisc/include/asm/pgtable.h	2015-04-15 12:13:52.874318624 +0200
@@ -215,6 +215,17 @@ extern void purge_tlb_entries(struct mm_
 #define PAGE_EXECREAD   __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_EXEC |_PAGE_ACCESSED)
 #define PAGE_COPY       PAGE_EXECREAD
 #define PAGE_RWX        __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED)
+
+#ifdef CONFIG_PAX_PAGEEXEC
+# define PAGE_SHARED_NOEXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_ACCESSED)
+# define PAGE_COPY_NOEXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED)
+# define PAGE_READONLY_NOEXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED)
+#else
+# define PAGE_SHARED_NOEXEC	PAGE_SHARED
+# define PAGE_COPY_NOEXEC	PAGE_COPY
+# define PAGE_READONLY_NOEXEC	PAGE_READONLY
+#endif
+
 #define PAGE_KERNEL	__pgprot(_PAGE_KERNEL)
 #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_KERNEL_EXEC)
 #define PAGE_KERNEL_RWX	__pgprot(_PAGE_KERNEL_RWX)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/parisc/include/asm/uaccess.h linux-4.0.9-pax/arch/parisc/include/asm/uaccess.h
--- linux-4.0.9/arch/parisc/include/asm/uaccess.h	2015-04-13 11:20:55.310617810 +0200
+++ linux-4.0.9-pax/arch/parisc/include/asm/uaccess.h	2015-04-15 12:13:52.874318624 +0200
@@ -243,10 +243,10 @@ static inline unsigned long __must_check
                                           const void __user *from,
                                           unsigned long n)
 {
-        int sz = __compiletime_object_size(to);
+        size_t sz = __compiletime_object_size(to);
         int ret = -EFAULT;
 
-        if (likely(sz == -1 || !__builtin_constant_p(n) || sz >= n))
+        if (likely(sz == (size_t)-1 || !__builtin_constant_p(n) || sz >= n))
                 ret = __copy_from_user(to, from, n);
         else
                 copy_from_user_overflow();
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/parisc/kernel/module.c linux-4.0.9-pax/arch/parisc/kernel/module.c
--- linux-4.0.9/arch/parisc/kernel/module.c	2015-04-13 11:20:55.330617809 +0200
+++ linux-4.0.9-pax/arch/parisc/kernel/module.c	2015-04-15 12:13:52.874318624 +0200
@@ -98,16 +98,38 @@
 
 /* three functions to determine where in the module core
  * or init pieces the location is */
+static inline int in_init_rx(struct module *me, void *loc)
+{
+	return (loc >= me->module_init_rx &&
+		loc < (me->module_init_rx + me->init_size_rx));
+}
+
+static inline int in_init_rw(struct module *me, void *loc)
+{
+	return (loc >= me->module_init_rw &&
+		loc < (me->module_init_rw + me->init_size_rw));
+}
+
 static inline int in_init(struct module *me, void *loc)
 {
-	return (loc >= me->module_init &&
-		loc <= (me->module_init + me->init_size));
+	return in_init_rx(me, loc) || in_init_rw(me, loc);
+}
+
+static inline int in_core_rx(struct module *me, void *loc)
+{
+	return (loc >= me->module_core_rx &&
+		loc < (me->module_core_rx + me->core_size_rx));
+}
+
+static inline int in_core_rw(struct module *me, void *loc)
+{
+	return (loc >= me->module_core_rw &&
+		loc < (me->module_core_rw + me->core_size_rw));
 }
 
 static inline int in_core(struct module *me, void *loc)
 {
-	return (loc >= me->module_core &&
-		loc <= (me->module_core + me->core_size));
+	return in_core_rx(me, loc) || in_core_rw(me, loc);
 }
 
 static inline int in_local(struct module *me, void *loc)
@@ -367,13 +389,13 @@ int module_frob_arch_sections(CONST Elf_
 	}
 
 	/* align things a bit */
-	me->core_size = ALIGN(me->core_size, 16);
-	me->arch.got_offset = me->core_size;
-	me->core_size += gots * sizeof(struct got_entry);
-
-	me->core_size = ALIGN(me->core_size, 16);
-	me->arch.fdesc_offset = me->core_size;
-	me->core_size += fdescs * sizeof(Elf_Fdesc);
+	me->core_size_rw = ALIGN(me->core_size_rw, 16);
+	me->arch.got_offset = me->core_size_rw;
+	me->core_size_rw += gots * sizeof(struct got_entry);
+
+	me->core_size_rw = ALIGN(me->core_size_rw, 16);
+	me->arch.fdesc_offset = me->core_size_rw;
+	me->core_size_rw += fdescs * sizeof(Elf_Fdesc);
 
 	me->arch.got_max = gots;
 	me->arch.fdesc_max = fdescs;
@@ -391,7 +413,7 @@ static Elf64_Word get_got(struct module
 
 	BUG_ON(value == 0);
 
-	got = me->module_core + me->arch.got_offset;
+	got = me->module_core_rw + me->arch.got_offset;
 	for (i = 0; got[i].addr; i++)
 		if (got[i].addr == value)
 			goto out;
@@ -409,7 +431,7 @@ static Elf64_Word get_got(struct module
 #ifdef CONFIG_64BIT
 static Elf_Addr get_fdesc(struct module *me, unsigned long value)
 {
-	Elf_Fdesc *fdesc = me->module_core + me->arch.fdesc_offset;
+	Elf_Fdesc *fdesc = me->module_core_rw + me->arch.fdesc_offset;
 
 	if (!value) {
 		printk(KERN_ERR "%s: zero OPD requested!\n", me->name);
@@ -427,7 +449,7 @@ static Elf_Addr get_fdesc(struct module
 
 	/* Create new one */
 	fdesc->addr = value;
-	fdesc->gp = (Elf_Addr)me->module_core + me->arch.got_offset;
+	fdesc->gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset;
 	return (Elf_Addr)fdesc;
 }
 #endif /* CONFIG_64BIT */
@@ -839,7 +861,7 @@ register_unwind_table(struct module *me,
 
 	table = (unsigned char *)sechdrs[me->arch.unwind_section].sh_addr;
 	end = table + sechdrs[me->arch.unwind_section].sh_size;
-	gp = (Elf_Addr)me->module_core + me->arch.got_offset;
+	gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset;
 
 	DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n",
 	       me->arch.unwind_section, table, end, gp);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/parisc/kernel/sys_parisc.c linux-4.0.9-pax/arch/parisc/kernel/sys_parisc.c
--- linux-4.0.9/arch/parisc/kernel/sys_parisc.c	2015-06-15 16:02:22.211183858 +0200
+++ linux-4.0.9-pax/arch/parisc/kernel/sys_parisc.c	2015-06-15 16:02:33.015183834 +0200
@@ -109,6 +109,10 @@ unsigned long arch_get_unmapped_area(str
 		goto found_addr;
 	}
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	if (addr) {
 		if (do_color_align && last_mmap)
 			addr = COLOR_ALIGN(addr, last_mmap, pgoff);
@@ -170,6 +174,10 @@ arch_get_unmapped_area_topdown(struct fi
 	}
 
 	/* requesting a specific address */
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	if (addr) {
 		if (do_color_align && last_mmap)
 			addr = COLOR_ALIGN(addr, last_mmap, pgoff);
@@ -252,6 +260,13 @@ void arch_pick_mmap_layout(struct mm_str
 	mm->mmap_legacy_base = mmap_legacy_base();
 	mm->mmap_base = mmap_upper_limit();
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (mm->pax_flags & MF_PAX_RANDMMAP) {
+		mm->mmap_legacy_base += mm->delta_mmap;
+		mm->mmap_base -= mm->delta_mmap + mm->delta_stack;
+	}
+#endif
+
 	if (mmap_is_legacy()) {
 		mm->mmap_base = mm->mmap_legacy_base;
 		mm->get_unmapped_area = arch_get_unmapped_area;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/parisc/kernel/traps.c linux-4.0.9-pax/arch/parisc/kernel/traps.c
--- linux-4.0.9/arch/parisc/kernel/traps.c	2015-03-18 15:21:50.200349252 +0100
+++ linux-4.0.9-pax/arch/parisc/kernel/traps.c	2015-04-15 12:13:52.874318624 +0200
@@ -726,9 +726,7 @@ void notrace handle_interruption(int cod
 
 			down_read(&current->mm->mmap_sem);
 			vma = find_vma(current->mm,regs->iaoq[0]);
-			if (vma && (regs->iaoq[0] >= vma->vm_start)
-				&& (vma->vm_flags & VM_EXEC)) {
-
+			if (vma && (regs->iaoq[0] >= vma->vm_start)) {
 				fault_address = regs->iaoq[0];
 				fault_space = regs->iasq[0];
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/parisc/mm/fault.c linux-4.0.9-pax/arch/parisc/mm/fault.c
--- linux-4.0.9/arch/parisc/mm/fault.c	2015-03-18 15:21:50.200349252 +0100
+++ linux-4.0.9-pax/arch/parisc/mm/fault.c	2015-04-15 12:13:52.874318624 +0200
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/unistd.h>
 
 #include <asm/uaccess.h>
 #include <asm/traps.h>
@@ -50,7 +51,7 @@ int show_unhandled_signals = 1;
 static unsigned long
 parisc_acctyp(unsigned long code, unsigned int inst)
 {
-	if (code == 6 || code == 16)
+	if (code == 6 || code == 7 || code == 16)
 	    return VM_EXEC;
 
 	switch (inst & 0xf0000000) {
@@ -136,6 +137,116 @@ parisc_acctyp(unsigned long code, unsign
 			}
 #endif
 
+#ifdef CONFIG_PAX_PAGEEXEC
+/*
+ * PaX: decide what to do with offenders (instruction_pointer(regs) = fault address)
+ *
+ * returns 1 when task should be killed
+ *         2 when rt_sigreturn trampoline was detected
+ *         3 when unpatched PLT trampoline was detected
+ */
+static int pax_handle_fetch_fault(struct pt_regs *regs)
+{
+
+#ifdef CONFIG_PAX_EMUPLT
+	int err;
+
+	do { /* PaX: unpatched PLT emulation */
+		unsigned int bl, depwi;
+
+		err = get_user(bl, (unsigned int *)instruction_pointer(regs));
+		err |= get_user(depwi, (unsigned int *)(instruction_pointer(regs)+4));
+
+		if (err)
+			break;
+
+		if (bl == 0xEA9F1FDDU && depwi == 0xD6801C1EU) {
+			unsigned int ldw, bv, ldw2, addr = instruction_pointer(regs)-12;
+
+			err = get_user(ldw, (unsigned int *)addr);
+			err |= get_user(bv, (unsigned int *)(addr+4));
+			err |= get_user(ldw2, (unsigned int *)(addr+8));
+
+			if (err)
+				break;
+
+			if (ldw == 0x0E801096U &&
+			    bv == 0xEAC0C000U &&
+			    ldw2 == 0x0E881095U)
+			{
+				unsigned int resolver, map;
+
+				err = get_user(resolver, (unsigned int *)(instruction_pointer(regs)+8));
+				err |= get_user(map, (unsigned int *)(instruction_pointer(regs)+12));
+				if (err)
+					break;
+
+				regs->gr[20] = instruction_pointer(regs)+8;
+				regs->gr[21] = map;
+				regs->gr[22] = resolver;
+				regs->iaoq[0] = resolver | 3UL;
+				regs->iaoq[1] = regs->iaoq[0] + 4;
+				return 3;
+			}
+		}
+	} while (0);
+#endif
+
+#ifdef CONFIG_PAX_EMUTRAMP
+
+#ifndef CONFIG_PAX_EMUSIGRT
+	if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP))
+		return 1;
+#endif
+
+	do { /* PaX: rt_sigreturn emulation */
+		unsigned int ldi1, ldi2, bel, nop;
+
+		err = get_user(ldi1, (unsigned int *)instruction_pointer(regs));
+		err |= get_user(ldi2, (unsigned int *)(instruction_pointer(regs)+4));
+		err |= get_user(bel, (unsigned int *)(instruction_pointer(regs)+8));
+		err |= get_user(nop, (unsigned int *)(instruction_pointer(regs)+12));
+
+		if (err)
+			break;
+
+		if ((ldi1 == 0x34190000U || ldi1 == 0x34190002U) &&
+		    ldi2 == 0x3414015AU &&
+		    bel == 0xE4008200U &&
+		    nop == 0x08000240U)
+		{
+			regs->gr[25] = (ldi1 & 2) >> 1;
+			regs->gr[20] = __NR_rt_sigreturn;
+			regs->gr[31] = regs->iaoq[1] + 16;
+			regs->sr[0] = regs->iasq[1];
+			regs->iaoq[0] = 0x100UL;
+			regs->iaoq[1] = regs->iaoq[0] + 4;
+			regs->iasq[0] = regs->sr[2];
+			regs->iasq[1] = regs->sr[2];
+			return 2;
+		}
+	} while (0);
+#endif
+
+	return 1;
+}
+
+void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
+{
+	unsigned long i;
+
+	printk(KERN_ERR "PAX: bytes at PC: ");
+	for (i = 0; i < 5; i++) {
+		unsigned int c;
+		if (get_user(c, (unsigned int *)pc+i))
+			printk(KERN_CONT "???????? ");
+		else
+			printk(KERN_CONT "%08x ", c);
+	}
+	printk("\n");
+}
+#endif
+
 int fixup_exception(struct pt_regs *regs)
 {
 	const struct exception_table_entry *fix;
@@ -234,8 +345,33 @@ retry:
 
 good_area:
 
-	if ((vma->vm_flags & acc_type) != acc_type)
+	if ((vma->vm_flags & acc_type) != acc_type) {
+
+#ifdef CONFIG_PAX_PAGEEXEC
+		if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (acc_type & VM_EXEC) &&
+		    (address & ~3UL) == instruction_pointer(regs))
+		{
+			up_read(&mm->mmap_sem);
+			switch (pax_handle_fetch_fault(regs)) {
+
+#ifdef CONFIG_PAX_EMUPLT
+			case 3:
+				return;
+#endif
+
+#ifdef CONFIG_PAX_EMUTRAMP
+			case 2:
+				return;
+#endif
+
+			}
+			pax_report_fault(regs, (void *)instruction_pointer(regs), (void *)regs->gr[30]);
+			do_group_exit(SIGKILL);
+		}
+#endif
+
 		goto bad_area;
+	}
 
 	/*
 	 * If for any reason at all we couldn't handle the fault, make
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/atomic.h linux-4.0.9-pax/arch/powerpc/include/asm/atomic.h
--- linux-4.0.9/arch/powerpc/include/asm/atomic.h	2015-03-18 15:21:50.204349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/include/asm/atomic.h	2015-04-15 12:13:52.874318624 +0200
@@ -12,6 +12,11 @@
 
 #define ATOMIC_INIT(i)		{ (i) }
 
+#define _ASM_EXTABLE(from, to)			\
+"	.section	__ex_table,\"a\"\n"	\
+	PPC_LONG"	" #from ", " #to"\n"	\
+"	.previous\n"
+
 static __inline__ int atomic_read(const atomic_t *v)
 {
 	int t;
@@ -21,39 +26,80 @@ static __inline__ int atomic_read(const
 	return t;
 }
 
+static __inline__ int atomic_read_unchecked(const atomic_unchecked_t *v)
+{
+	int t;
+
+	__asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
+
+	return t;
+}
+
 static __inline__ void atomic_set(atomic_t *v, int i)
 {
 	__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
 }
 
-#define ATOMIC_OP(op, asm_op)						\
-static __inline__ void atomic_##op(int a, atomic_t *v)			\
+static __inline__ void atomic_set_unchecked(atomic_unchecked_t *v, int i)
+{
+	__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
+}
+
+#ifdef CONFIG_PAX_REFCOUNT
+#define __REFCOUNT_OP(op) op##o.
+#define __OVERFLOW_PRE			\
+	"	mcrxr	cr0\n"
+#define __OVERFLOW_POST			\
+	"	bf 4*cr0+so, 3f\n"	\
+	"2:	.long 0x00c00b00\n"	\
+	"3:\n"
+#define __OVERFLOW_EXTABLE \
+	"\n4:\n"
+	_ASM_EXTABLE(2b, 4b)
+#else
+#define __REFCOUNT_OP(op) op
+#define __OVERFLOW_PRE
+#define __OVERFLOW_POST
+#define __OVERFLOW_EXTABLE
+#endif
+
+#define __ATOMIC_OP(op, suffix, pre_op, asm_op, post_op, extable)	\
+static inline void atomic_##op##suffix(int a, atomic##suffix##_t *v)	\
 {									\
 	int t;								\
 									\
 	__asm__ __volatile__(						\
-"1:	lwarx	%0,0,%3		# atomic_" #op "\n"			\
+"1:	lwarx	%0,0,%3		# atomic_" #op #suffix "\n"		\
+	pre_op								\
 	#asm_op " %0,%2,%0\n"						\
+	post_op								\
 	PPC405_ERR77(0,%3)						\
 "	stwcx.	%0,0,%3 \n"						\
 "	bne-	1b\n"							\
+	extable								\
 	: "=&r" (t), "+m" (v->counter)					\
 	: "r" (a), "r" (&v->counter)					\
 	: "cc");							\
 }									\
 
-#define ATOMIC_OP_RETURN(op, asm_op)					\
-static __inline__ int atomic_##op##_return(int a, atomic_t *v)		\
+#define ATOMIC_OP(op, asm_op) __ATOMIC_OP(op, , , asm_op, , )		\
+			      __ATOMIC_OP(op, _unchecked, __OVERFLOW_PRE, __REFCOUNT_OP(asm_op), __OVERFLOW_POST, __OVERFLOW_EXTABLE)
+
+#define __ATOMIC_OP_RETURN(op, suffix, pre_op, asm_op, post_op, extable)\
+static inline int atomic_##op##_return##suffix(int a, atomic##suffix##_t *v)\
 {									\
 	int t;								\
 									\
 	__asm__ __volatile__(						\
 	PPC_ATOMIC_ENTRY_BARRIER					\
-"1:	lwarx	%0,0,%2		# atomic_" #op "_return\n"		\
+"1:	lwarx	%0,0,%2		# atomic_" #op "_return" #suffix "\n"	\
+	pre_op								\
 	#asm_op " %0,%1,%0\n"						\
+	post_op								\
 	PPC405_ERR77(0,%2)						\
 "	stwcx.	%0,0,%2 \n"						\
 "	bne-	1b\n"							\
+	extable								\
 	PPC_ATOMIC_EXIT_BARRIER						\
 	: "=&r" (t)							\
 	: "r" (a), "r" (&v->counter)					\
@@ -62,6 +108,9 @@ static __inline__ int atomic_##op##_retu
 	return t;							\
 }
 
+#define ATOMIC_OP_RETURN(op, asm_op) __ATOMIC_OP_RETURN(op, , , asm_op, , )\
+				     __ATOMIC_OP_RETURN(op, _unchecked, __OVERFLOW_PRE, __REFCOUNT_OP(asm_op), __OVERFLOW_POST, __OVERFLOW_EXTABLE)
+
 #define ATOMIC_OPS(op, asm_op) ATOMIC_OP(op, asm_op) ATOMIC_OP_RETURN(op, asm_op)
 
 ATOMIC_OPS(add, add)
@@ -69,42 +118,29 @@ ATOMIC_OPS(sub, subf)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
+#undef __ATOMIC_OP_RETURN
 #undef ATOMIC_OP
+#undef __ATOMIC_OP
 
 #define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
 
-static __inline__ void atomic_inc(atomic_t *v)
-{
-	int t;
+/*
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Automatically increments @v by 1
+ */
+#define atomic_inc(v) atomic_add(1, (v))
+#define atomic_inc_return(v) atomic_add_return(1, (v))
 
-	__asm__ __volatile__(
-"1:	lwarx	%0,0,%2		# atomic_inc\n\
-	addic	%0,%0,1\n"
-	PPC405_ERR77(0,%2)
-"	stwcx.	%0,0,%2 \n\
-	bne-	1b"
-	: "=&r" (t), "+m" (v->counter)
-	: "r" (&v->counter)
-	: "cc", "xer");
+static inline void atomic_inc_unchecked(atomic_unchecked_t *v)
+{
+	atomic_add_unchecked(1, v);
 }
 
-static __inline__ int atomic_inc_return(atomic_t *v)
+static inline int atomic_inc_return_unchecked(atomic_unchecked_t *v)
 {
-	int t;
-
-	__asm__ __volatile__(
-	PPC_ATOMIC_ENTRY_BARRIER
-"1:	lwarx	%0,0,%1		# atomic_inc_return\n\
-	addic	%0,%0,1\n"
-	PPC405_ERR77(0,%1)
-"	stwcx.	%0,0,%1 \n\
-	bne-	1b"
-	PPC_ATOMIC_EXIT_BARRIER
-	: "=&r" (t)
-	: "r" (&v->counter)
-	: "cc", "xer", "memory");
-
-	return t;
+	return atomic_add_return_unchecked(1, v);
 }
 
 /*
@@ -117,43 +153,38 @@ static __inline__ int atomic_inc_return(
  */
 #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
 
-static __inline__ void atomic_dec(atomic_t *v)
+static __inline__ int atomic_inc_and_test_unchecked(atomic_unchecked_t *v)
 {
-	int t;
-
-	__asm__ __volatile__(
-"1:	lwarx	%0,0,%2		# atomic_dec\n\
-	addic	%0,%0,-1\n"
-	PPC405_ERR77(0,%2)\
-"	stwcx.	%0,0,%2\n\
-	bne-	1b"
-	: "=&r" (t), "+m" (v->counter)
-	: "r" (&v->counter)
-	: "cc", "xer");
+	return atomic_add_return_unchecked(1, v) == 0;
 }
 
-static __inline__ int atomic_dec_return(atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-	PPC_ATOMIC_ENTRY_BARRIER
-"1:	lwarx	%0,0,%1		# atomic_dec_return\n\
-	addic	%0,%0,-1\n"
-	PPC405_ERR77(0,%1)
-"	stwcx.	%0,0,%1\n\
-	bne-	1b"
-	PPC_ATOMIC_EXIT_BARRIER
-	: "=&r" (t)
-	: "r" (&v->counter)
-	: "cc", "xer", "memory");
+/* 
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1
+ */
+#define atomic_dec(v) atomic_sub(1, (v))
+#define atomic_dec_return(v) atomic_sub_return(1, (v))
 
-	return t;
+static __inline__ void atomic_dec_unchecked(atomic_unchecked_t *v)
+{
+	atomic_sub_unchecked(1, v);
 }
 
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
+static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old, int new)
+{
+	return cmpxchg(&(v->counter), old, new);
+}
+
+static inline int atomic_xchg_unchecked(atomic_unchecked_t *v, int new) 
+{
+	return xchg(&(v->counter), new);
+}
+
 /**
  * __atomic_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
@@ -171,11 +202,27 @@ static __inline__ int __atomic_add_unles
 	PPC_ATOMIC_ENTRY_BARRIER
 "1:	lwarx	%0,0,%1		# __atomic_add_unless\n\
 	cmpw	0,%0,%3 \n\
-	beq-	2f \n\
-	add	%0,%2,%0 \n"
+	beq-	2f \n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+"	mcrxr	cr0\n"
+"	addo.	%0,%2,%0\n"
+"	bf 4*cr0+so, 4f\n"
+"3:.long " "0x00c00b00""\n"
+"4:\n"
+#else
+	"add	%0,%2,%0 \n"
+#endif
+
 	PPC405_ERR77(0,%2)
 "	stwcx.	%0,0,%1 \n\
 	bne-	1b \n"
+"5:"
+
+#ifdef CONFIG_PAX_REFCOUNT
+	_ASM_EXTABLE(3b, 5b)
+#endif
+
 	PPC_ATOMIC_EXIT_BARRIER
 "	subf	%0,%2,%0 \n\
 2:"
@@ -248,6 +295,11 @@ static __inline__ int atomic_dec_if_posi
 }
 #define atomic_dec_if_positive atomic_dec_if_positive
 
+#define smp_mb__before_atomic_dec()     smp_mb()
+#define smp_mb__after_atomic_dec()      smp_mb()
+#define smp_mb__before_atomic_inc()     smp_mb()
+#define smp_mb__after_atomic_inc()      smp_mb()
+
 #ifdef __powerpc64__
 
 #define ATOMIC64_INIT(i)	{ (i) }
@@ -261,37 +313,60 @@ static __inline__ long atomic64_read(con
 	return t;
 }
 
+static __inline__ long atomic64_read_unchecked(const atomic64_unchecked_t *v)
+{
+	long t;
+
+	__asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
+
+	return t;
+}
+
 static __inline__ void atomic64_set(atomic64_t *v, long i)
 {
 	__asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
 }
 
-#define ATOMIC64_OP(op, asm_op)						\
-static __inline__ void atomic64_##op(long a, atomic64_t *v)		\
+static __inline__ void atomic64_set_unchecked(atomic64_unchecked_t *v, long i)
+{
+	__asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
+}
+
+#define __ATOMIC64_OP(op, suffix, pre_op, asm_op, post_op, extable)	\
+static inline void atomic64_##op##suffix(long a, atomic64##suffix##_t *v)\
 {									\
 	long t;								\
 									\
 	__asm__ __volatile__(						\
 "1:	ldarx	%0,0,%3		# atomic64_" #op "\n"			\
+	pre_op								\
 	#asm_op " %0,%2,%0\n"						\
+	post_op								\
 "	stdcx.	%0,0,%3 \n"						\
 "	bne-	1b\n"							\
+	extable								\
 	: "=&r" (t), "+m" (v->counter)					\
 	: "r" (a), "r" (&v->counter)					\
 	: "cc");							\
 }
 
-#define ATOMIC64_OP_RETURN(op, asm_op)					\
-static __inline__ long atomic64_##op##_return(long a, atomic64_t *v)	\
+#define ATOMIC64_OP(op, asm_op) __ATOMIC64_OP(op, , , asm_op, , )		\
+				__ATOMIC64_OP(op, _unchecked, __OVERFLOW_PRE, __REFCOUNT_OP(asm_op), __OVERFLOW_POST, __OVERFLOW_EXTABLE)
+
+#define __ATOMIC64_OP_RETURN(op, suffix, pre_op, asm_op, post_op, extable)\
+static inline long atomic64_##op##_return##suffix(long a, atomic64##suffix##_t *v)\
 {									\
 	long t;								\
 									\
 	__asm__ __volatile__(						\
 	PPC_ATOMIC_ENTRY_BARRIER					\
 "1:	ldarx	%0,0,%2		# atomic64_" #op "_return\n"		\
+	pre_op								\
 	#asm_op " %0,%1,%0\n"						\
+	post_op								\
 "	stdcx.	%0,0,%2 \n"						\
 "	bne-	1b\n"							\
+	extable								\
 	PPC_ATOMIC_EXIT_BARRIER						\
 	: "=&r" (t)							\
 	: "r" (a), "r" (&v->counter)					\
@@ -300,6 +375,9 @@ static __inline__ long atomic64_##op##_r
 	return t;							\
 }
 
+#define ATOMIC64_OP_RETURN(op, asm_op) __ATOMIC64_OP_RETURN(op, , , asm_op, , )\
+				       __ATOMIC64_OP_RETURN(op, _unchecked, __OVERFLOW_PRE, __REFCOUNT_OP(asm_op), __OVERFLOW_POST, __OVERFLOW_EXTABLE)
+
 #define ATOMIC64_OPS(op, asm_op) ATOMIC64_OP(op, asm_op) ATOMIC64_OP_RETURN(op, asm_op)
 
 ATOMIC64_OPS(add, add)
@@ -307,40 +385,33 @@ ATOMIC64_OPS(sub, subf)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
+#undef __ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
+#undef __ATOMIC64_OP
+#undef __OVERFLOW_EXTABLE
+#undef __OVERFLOW_POST
+#undef __OVERFLOW_PRE
+#undef __REFCOUNT_OP
 
 #define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
 
-static __inline__ void atomic64_inc(atomic64_t *v)
-{
-	long t;
+/*
+ * atomic64_inc - increment atomic variable
+ * @v: pointer of type atomic64_t
+ *
+ * Automatically increments @v by 1
+ */
+#define atomic64_inc(v) atomic64_add(1, (v))
+#define atomic64_inc_return(v) atomic64_add_return(1, (v))
 
-	__asm__ __volatile__(
-"1:	ldarx	%0,0,%2		# atomic64_inc\n\
-	addic	%0,%0,1\n\
-	stdcx.	%0,0,%2 \n\
-	bne-	1b"
-	: "=&r" (t), "+m" (v->counter)
-	: "r" (&v->counter)
-	: "cc", "xer");
+static inline void atomic64_inc_unchecked(atomic64_unchecked_t *v)
+{
+	atomic64_add_unchecked(1, v);
 }
 
-static __inline__ long atomic64_inc_return(atomic64_t *v)
+static inline long atomic64_inc_return_unchecked(atomic64_unchecked_t *v)
 {
-	long t;
-
-	__asm__ __volatile__(
-	PPC_ATOMIC_ENTRY_BARRIER
-"1:	ldarx	%0,0,%1		# atomic64_inc_return\n\
-	addic	%0,%0,1\n\
-	stdcx.	%0,0,%1 \n\
-	bne-	1b"
-	PPC_ATOMIC_EXIT_BARRIER
-	: "=&r" (t)
-	: "r" (&v->counter)
-	: "cc", "xer", "memory");
-
-	return t;
+	return atomic64_add_return_unchecked(1, v);
 }
 
 /*
@@ -353,36 +424,18 @@ static __inline__ long atomic64_inc_retu
  */
 #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
 
-static __inline__ void atomic64_dec(atomic64_t *v)
-{
-	long t;
-
-	__asm__ __volatile__(
-"1:	ldarx	%0,0,%2		# atomic64_dec\n\
-	addic	%0,%0,-1\n\
-	stdcx.	%0,0,%2\n\
-	bne-	1b"
-	: "=&r" (t), "+m" (v->counter)
-	: "r" (&v->counter)
-	: "cc", "xer");
-}
+/* 
+ * atomic64_dec - decrement atomic variable
+ * @v: pointer of type atomic64_t
+ * 
+ * Atomically decrements @v by 1
+ */
+#define atomic64_dec(v) atomic64_sub(1, (v))
+#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
 
-static __inline__ long atomic64_dec_return(atomic64_t *v)
+static __inline__ void atomic64_dec_unchecked(atomic64_unchecked_t *v)
 {
-	long t;
-
-	__asm__ __volatile__(
-	PPC_ATOMIC_ENTRY_BARRIER
-"1:	ldarx	%0,0,%1		# atomic64_dec_return\n\
-	addic	%0,%0,-1\n\
-	stdcx.	%0,0,%1\n\
-	bne-	1b"
-	PPC_ATOMIC_EXIT_BARRIER
-	: "=&r" (t)
-	: "r" (&v->counter)
-	: "cc", "xer", "memory");
-
-	return t;
+	atomic64_sub_unchecked(1, v);
 }
 
 #define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
@@ -415,6 +468,16 @@ static __inline__ long atomic64_dec_if_p
 #define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
+static inline long atomic64_cmpxchg_unchecked(atomic64_unchecked_t *v, long old, long new)
+{
+	return cmpxchg(&(v->counter), old, new);
+}
+
+static inline long atomic64_xchg_unchecked(atomic64_unchecked_t *v, long new) 
+{
+	return xchg(&(v->counter), new);
+}
+
 /**
  * atomic64_add_unless - add unless the number is a given value
  * @v: pointer of type atomic64_t
@@ -430,13 +493,29 @@ static __inline__ int atomic64_add_unles
 
 	__asm__ __volatile__ (
 	PPC_ATOMIC_ENTRY_BARRIER
-"1:	ldarx	%0,0,%1		# __atomic_add_unless\n\
+"1:	ldarx	%0,0,%1		# atomic64_add_unless\n\
 	cmpd	0,%0,%3 \n\
-	beq-	2f \n\
-	add	%0,%2,%0 \n"
+	beq-	2f \n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+"	mcrxr	cr0\n"
+"	addo.	%0,%2,%0\n"
+"	bf 4*cr0+so, 4f\n"
+"3:.long " "0x00c00b00""\n"
+"4:\n"
+#else
+	"add	%0,%2,%0 \n"
+#endif
+
 "	stdcx.	%0,0,%1 \n\
 	bne-	1b \n"
 	PPC_ATOMIC_EXIT_BARRIER
+"5:"
+
+#ifdef CONFIG_PAX_REFCOUNT
+	_ASM_EXTABLE(3b, 5b)
+#endif
+
 "	subf	%0,%2,%0 \n\
 2:"
 	: "=&r" (t)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/barrier.h linux-4.0.9-pax/arch/powerpc/include/asm/barrier.h
--- linux-4.0.9/arch/powerpc/include/asm/barrier.h	2015-03-18 15:21:50.204349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/include/asm/barrier.h	2015-04-15 12:13:52.874318624 +0200
@@ -76,7 +76,7 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_lwsync();							\
-	ACCESS_ONCE(*p) = (v);						\
+	ACCESS_ONCE_RW(*p) = (v);					\
 } while (0)
 
 #define smp_load_acquire(p)						\
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/elf.h linux-4.0.9-pax/arch/powerpc/include/asm/elf.h
--- linux-4.0.9/arch/powerpc/include/asm/elf.h	2015-03-18 15:21:50.204349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/include/asm/elf.h	2015-04-15 12:13:52.874318624 +0200
@@ -30,6 +30,18 @@
 
 #define ELF_ET_DYN_BASE	0x20000000
 
+#ifdef CONFIG_PAX_ASLR
+#define PAX_ELF_ET_DYN_BASE	(0x10000000UL)
+
+#ifdef __powerpc64__
+#define PAX_DELTA_MMAP_LEN	(is_32bit_task() ? 16 : 28)
+#define PAX_DELTA_STACK_LEN	(is_32bit_task() ? 16 : 28)
+#else
+#define PAX_DELTA_MMAP_LEN	15
+#define PAX_DELTA_STACK_LEN	15
+#endif
+#endif
+
 #define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0)
 
 /*
@@ -128,10 +140,6 @@ extern int arch_setup_additional_pages(s
 	(0x7ff >> (PAGE_SHIFT - 12)) : \
 	(0x3ffff >> (PAGE_SHIFT - 12)))
 
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
-
 #ifdef CONFIG_SPU_BASE
 /* Notes used in ET_CORE. Note name is "SPU/<fd>/<filename>". */
 #define NT_SPU		1
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/exec.h linux-4.0.9-pax/arch/powerpc/include/asm/exec.h
--- linux-4.0.9/arch/powerpc/include/asm/exec.h	2015-03-18 15:21:50.204349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/include/asm/exec.h	2015-04-15 12:13:52.874318624 +0200
@@ -4,6 +4,6 @@
 #ifndef _ASM_POWERPC_EXEC_H
 #define _ASM_POWERPC_EXEC_H
 
-extern unsigned long arch_align_stack(unsigned long sp);
+#define arch_align_stack(x) ((x) & ~0xfUL)
 
 #endif /* _ASM_POWERPC_EXEC_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/kmap_types.h linux-4.0.9-pax/arch/powerpc/include/asm/kmap_types.h
--- linux-4.0.9/arch/powerpc/include/asm/kmap_types.h	2015-03-18 15:21:50.204349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/include/asm/kmap_types.h	2015-04-15 12:13:52.874318624 +0200
@@ -10,7 +10,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#define KM_TYPE_NR 16
+#define KM_TYPE_NR 17
 
 #endif	/* __KERNEL__ */
 #endif	/* _ASM_POWERPC_KMAP_TYPES_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/local.h linux-4.0.9-pax/arch/powerpc/include/asm/local.h
--- linux-4.0.9/arch/powerpc/include/asm/local.h	2015-03-18 15:21:50.204349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/include/asm/local.h	2015-04-15 12:13:52.874318624 +0200
@@ -9,21 +9,65 @@ typedef struct
 	atomic_long_t a;
 } local_t;
 
+typedef struct
+{
+	atomic_long_unchecked_t a;
+} local_unchecked_t;
+
 #define LOCAL_INIT(i)	{ ATOMIC_LONG_INIT(i) }
 
 #define local_read(l)	atomic_long_read(&(l)->a)
+#define local_read_unchecked(l)	atomic_long_read_unchecked(&(l)->a)
 #define local_set(l,i)	atomic_long_set(&(l)->a, (i))
+#define local_set_unchecked(l,i)	atomic_long_set_unchecked(&(l)->a, (i))
 
 #define local_add(i,l)	atomic_long_add((i),(&(l)->a))
+#define local_add_unchecked(i,l)	atomic_long_add_unchecked((i),(&(l)->a))
 #define local_sub(i,l)	atomic_long_sub((i),(&(l)->a))
+#define local_sub_unchecked(i,l)	atomic_long_sub_unchecked((i),(&(l)->a))
 #define local_inc(l)	atomic_long_inc(&(l)->a)
+#define local_inc_unchecked(l)	atomic_long_inc_unchecked(&(l)->a)
 #define local_dec(l)	atomic_long_dec(&(l)->a)
+#define local_dec_unchecked(l)	atomic_long_dec_unchecked(&(l)->a)
 
 static __inline__ long local_add_return(long a, local_t *l)
 {
 	long t;
 
 	__asm__ __volatile__(
+"1:"	PPC_LLARX(%0,0,%2,0) "			# local_add_return\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+"	mcrxr   cr0\n"
+"	addo.	%0,%1,%0\n"
+"	bf 4*cr0+so, 3f\n"
+"2:.long " "0x00c00b00""\n"
+#else
+"	add	%0,%1,%0\n"
+#endif
+
+"3:\n"
+	PPC405_ERR77(0,%2)
+	PPC_STLCX	"%0,0,%2 \n\
+	bne-	1b"
+
+#ifdef CONFIG_PAX_REFCOUNT
+"\n4:\n"
+	_ASM_EXTABLE(2b, 4b)
+#endif
+
+	: "=&r" (t)
+	: "r" (a), "r" (&(l->a.counter))
+	: "cc", "memory");
+
+	return t;
+}
+
+static __inline__ long local_add_return_unchecked(long a, local_unchecked_t *l)
+{
+	long t;
+
+	__asm__ __volatile__(
 "1:"	PPC_LLARX(%0,0,%2,0) "			# local_add_return\n\
 	add	%0,%1,%0\n"
 	PPC405_ERR77(0,%2)
@@ -101,6 +145,8 @@ static __inline__ long local_dec_return(
 
 #define local_cmpxchg(l, o, n) \
 	(cmpxchg_local(&((l)->a.counter), (o), (n)))
+#define local_cmpxchg_unchecked(l, o, n) \
+	(cmpxchg_local(&((l)->a.counter), (o), (n)))
 #define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
 
 /**
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/mman.h linux-4.0.9-pax/arch/powerpc/include/asm/mman.h
--- linux-4.0.9/arch/powerpc/include/asm/mman.h	2015-03-18 15:21:50.204349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/include/asm/mman.h	2015-04-15 12:13:52.874318624 +0200
@@ -24,7 +24,7 @@ static inline unsigned long arch_calc_vm
 }
 #define arch_calc_vm_prot_bits(prot) arch_calc_vm_prot_bits(prot)
 
-static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
+static inline pgprot_t arch_vm_get_page_prot(vm_flags_t vm_flags)
 {
 	return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/page_64.h linux-4.0.9-pax/arch/powerpc/include/asm/page_64.h
--- linux-4.0.9/arch/powerpc/include/asm/page_64.h	2015-03-18 15:21:50.204349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/include/asm/page_64.h	2015-04-15 12:13:52.874318624 +0200
@@ -172,15 +172,18 @@ do {						\
  * stack by default, so in the absence of a PT_GNU_STACK program header
  * we turn execute permission off.
  */
-#define VM_STACK_DEFAULT_FLAGS32	(VM_READ | VM_WRITE | VM_EXEC | \
-					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_STACK_DEFAULT_FLAGS32 \
+	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
+	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #define VM_STACK_DEFAULT_FLAGS64	(VM_READ | VM_WRITE | \
 					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
+#ifndef CONFIG_PAX_PAGEEXEC
 #define VM_STACK_DEFAULT_FLAGS \
 	(is_32bit_task() ? \
 	 VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
+#endif
 
 #include <asm-generic/getorder.h>
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/page.h linux-4.0.9-pax/arch/powerpc/include/asm/page.h
--- linux-4.0.9/arch/powerpc/include/asm/page.h	2015-03-18 15:21:50.204349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/include/asm/page.h	2015-04-15 12:13:52.874318624 +0200
@@ -227,8 +227,9 @@ extern long long virt_phys_offset;
  * and needs to be executable.  This means the whole heap ends
  * up being executable.
  */
-#define VM_DATA_DEFAULT_FLAGS32	(VM_READ | VM_WRITE | VM_EXEC | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_DEFAULT_FLAGS32 \
+	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
+	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #define VM_DATA_DEFAULT_FLAGS64	(VM_READ | VM_WRITE | \
 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
@@ -256,6 +257,9 @@ extern long long virt_phys_offset;
 #define is_kernel_addr(x)	((x) >= PAGE_OFFSET)
 #endif
 
+#define ktla_ktva(addr)		(addr)
+#define ktva_ktla(addr)		(addr)
+
 #ifndef CONFIG_PPC_BOOK3S_64
 /*
  * Use the top bit of the higher-level page table entries to indicate whether
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/pgalloc-64.h linux-4.0.9-pax/arch/powerpc/include/asm/pgalloc-64.h
--- linux-4.0.9/arch/powerpc/include/asm/pgalloc-64.h	2015-03-18 15:21:50.204349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/include/asm/pgalloc-64.h	2015-04-15 12:13:52.874318624 +0200
@@ -54,6 +54,7 @@ static inline void pgd_free(struct mm_st
 #ifndef CONFIG_PPC_64K_PAGES
 
 #define pgd_populate(MM, PGD, PUD)	pgd_set(PGD, PUD)
+#define pgd_populate_kernel(MM, PGD, PUD)	pgd_populate((MM), (PGD), (PUD))
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
@@ -71,6 +72,11 @@ static inline void pud_populate(struct m
 	pud_set(pud, (unsigned long)pmd);
 }
 
+static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	pud_populate(mm, pud, pmd);
+}
+
 #define pmd_populate(mm, pmd, pte_page) \
 	pmd_populate_kernel(mm, pmd, page_address(pte_page))
 #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte))
@@ -173,6 +179,7 @@ extern void __tlb_remove_table(void *_ta
 #endif
 
 #define pud_populate(mm, pud, pmd)	pud_set(pud, (unsigned long)pmd)
+#define pud_populate_kernel(mm, pud, pmd)	pud_populate((mm), (pud), (pmd))
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 				       pte_t *pte)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/pgtable.h linux-4.0.9-pax/arch/powerpc/include/asm/pgtable.h
--- linux-4.0.9/arch/powerpc/include/asm/pgtable.h	2015-04-13 11:20:55.922617778 +0200
+++ linux-4.0.9-pax/arch/powerpc/include/asm/pgtable.h	2015-04-15 12:13:52.874318624 +0200
@@ -2,6 +2,7 @@
 #define _ASM_POWERPC_PGTABLE_H
 #ifdef __KERNEL__
 
+#include <linux/const.h>
 #ifndef __ASSEMBLY__
 #include <linux/mmdebug.h>
 #include <linux/mmzone.h>
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/pte-hash32.h linux-4.0.9-pax/arch/powerpc/include/asm/pte-hash32.h
--- linux-4.0.9/arch/powerpc/include/asm/pte-hash32.h	2015-04-13 11:20:55.990617774 +0200
+++ linux-4.0.9-pax/arch/powerpc/include/asm/pte-hash32.h	2015-04-15 12:13:52.874318624 +0200
@@ -20,6 +20,7 @@
 #define _PAGE_HASHPTE	0x002	/* hash_page has made an HPTE for this pte */
 #define _PAGE_USER	0x004	/* usermode access allowed */
 #define _PAGE_GUARDED	0x008	/* G: prohibit speculative access */
+#define _PAGE_EXEC	_PAGE_GUARDED
 #define _PAGE_COHERENT	0x010	/* M: enforce memory coherence (SMP systems) */
 #define _PAGE_NO_CACHE	0x020	/* I: cache inhibit */
 #define _PAGE_WRITETHRU	0x040	/* W: cache write-through */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/reg.h linux-4.0.9-pax/arch/powerpc/include/asm/reg.h
--- linux-4.0.9/arch/powerpc/include/asm/reg.h	2015-04-13 11:20:55.998617774 +0200
+++ linux-4.0.9-pax/arch/powerpc/include/asm/reg.h	2015-04-15 12:13:52.874318624 +0200
@@ -253,6 +253,7 @@
 #define SPRN_DBCR	0x136	/* e300 Data Breakpoint Control Reg */
 #define SPRN_DSISR	0x012	/* Data Storage Interrupt Status Register */
 #define   DSISR_NOHPTE		0x40000000	/* no translation found */
+#define   DSISR_GUARDED		0x10000000	/* fetch from guarded storage */
 #define   DSISR_PROTFAULT	0x08000000	/* protection fault */
 #define   DSISR_ISSTORE		0x02000000	/* access was a store */
 #define   DSISR_DABRMATCH	0x00400000	/* hit data breakpoint */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/smp.h linux-4.0.9-pax/arch/powerpc/include/asm/smp.h
--- linux-4.0.9/arch/powerpc/include/asm/smp.h	2015-04-13 11:20:56.014617773 +0200
+++ linux-4.0.9-pax/arch/powerpc/include/asm/smp.h	2015-04-15 12:13:52.874318624 +0200
@@ -51,7 +51,7 @@ struct smp_ops_t {
 	int   (*cpu_disable)(void);
 	void  (*cpu_die)(unsigned int nr);
 	int   (*cpu_bootable)(unsigned int nr);
-};
+} __no_const;
 
 extern void smp_send_debugger_break(void);
 extern void start_secondary_resume(void);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/spinlock.h linux-4.0.9-pax/arch/powerpc/include/asm/spinlock.h
--- linux-4.0.9/arch/powerpc/include/asm/spinlock.h	2015-03-18 15:21:50.204349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/include/asm/spinlock.h	2015-04-15 12:13:52.878318624 +0200
@@ -204,13 +204,29 @@ static inline long __arch_read_trylock(a
 	__asm__ __volatile__(
 "1:	" PPC_LWARX(%0,0,%1,1) "\n"
 	__DO_SIGN_EXTEND
-"	addic.		%0,%0,1\n\
-	ble-		2f\n"
+
+#ifdef	CONFIG_PAX_REFCOUNT
+"	mcrxr	cr0\n"
+"	addico.		%0,%0,1\n"
+"	bf 4*cr0+so, 3f\n"
+"2:.long " "0x00c00b00""\n"
+#else
+"	addic.		%0,%0,1\n"
+#endif
+
+"3:\n"
+	"ble-		4f\n"
 	PPC405_ERR77(0,%1)
 "	stwcx.		%0,0,%1\n\
 	bne-		1b\n"
 	PPC_ACQUIRE_BARRIER
-"2:"	: "=&r" (tmp)
+"4:"	
+
+#ifdef CONFIG_PAX_REFCOUNT
+	_ASM_EXTABLE(2b,4b)
+#endif
+
+	: "=&r" (tmp)
 	: "r" (&rw->lock)
 	: "cr0", "xer", "memory");
 
@@ -286,11 +302,27 @@ static inline void arch_read_unlock(arch
 	__asm__ __volatile__(
 	"# read_unlock\n\t"
 	PPC_RELEASE_BARRIER
-"1:	lwarx		%0,0,%1\n\
-	addic		%0,%0,-1\n"
+"1:	lwarx		%0,0,%1\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+"	mcrxr 	cr0\n"
+"	addico.		%0,%0,-1\n"
+"	bf 4*cr0+so, 3f\n"
+"2:.long " "0x00c00b00""\n"
+#else
+"	addic.		%0,%0,-1\n"
+#endif
+
+"3:\n"
 	PPC405_ERR77(0,%1)
 "	stwcx.		%0,0,%1\n\
 	bne-		1b"
+
+#ifdef CONFIG_PAX_REFCOUNT
+"\n4:\n"
+	_ASM_EXTABLE(2b, 4b)
+#endif
+
 	: "=&r"(tmp)
 	: "r"(&rw->lock)
 	: "cr0", "xer", "memory");
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/include/asm/uaccess.h linux-4.0.9-pax/arch/powerpc/include/asm/uaccess.h
--- linux-4.0.9/arch/powerpc/include/asm/uaccess.h	2015-03-18 15:21:50.204349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/include/asm/uaccess.h	2015-04-15 12:13:52.878318624 +0200
@@ -58,6 +58,7 @@
 
 #endif
 
+#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size))
 #define access_ok(type, addr, size)		\
 	(__chk_user_ptr(addr),			\
 	 __access_ok((__force unsigned long)(addr), (size), get_fs()))
@@ -318,52 +319,6 @@ do {								\
 extern unsigned long __copy_tofrom_user(void __user *to,
 		const void __user *from, unsigned long size);
 
-#ifndef __powerpc64__
-
-static inline unsigned long copy_from_user(void *to,
-		const void __user *from, unsigned long n)
-{
-	unsigned long over;
-
-	if (access_ok(VERIFY_READ, from, n))
-		return __copy_tofrom_user((__force void __user *)to, from, n);
-	if ((unsigned long)from < TASK_SIZE) {
-		over = (unsigned long)from + n - TASK_SIZE;
-		return __copy_tofrom_user((__force void __user *)to, from,
-				n - over) + over;
-	}
-	return n;
-}
-
-static inline unsigned long copy_to_user(void __user *to,
-		const void *from, unsigned long n)
-{
-	unsigned long over;
-
-	if (access_ok(VERIFY_WRITE, to, n))
-		return __copy_tofrom_user(to, (__force void __user *)from, n);
-	if ((unsigned long)to < TASK_SIZE) {
-		over = (unsigned long)to + n - TASK_SIZE;
-		return __copy_tofrom_user(to, (__force void __user *)from,
-				n - over) + over;
-	}
-	return n;
-}
-
-#else /* __powerpc64__ */
-
-#define __copy_in_user(to, from, size) \
-	__copy_tofrom_user((to), (from), (size))
-
-extern unsigned long copy_from_user(void *to, const void __user *from,
-				    unsigned long n);
-extern unsigned long copy_to_user(void __user *to, const void *from,
-				  unsigned long n);
-extern unsigned long copy_in_user(void __user *to, const void __user *from,
-				  unsigned long n);
-
-#endif /* __powerpc64__ */
-
 static inline unsigned long __copy_from_user_inatomic(void *to,
 		const void __user *from, unsigned long n)
 {
@@ -387,6 +342,10 @@ static inline unsigned long __copy_from_
 		if (ret == 0)
 			return 0;
 	}
+
+	if (!__builtin_constant_p(n))
+		check_object_size(to, n, false);
+
 	return __copy_tofrom_user((__force void __user *)to, from, n);
 }
 
@@ -413,6 +372,10 @@ static inline unsigned long __copy_to_us
 		if (ret == 0)
 			return 0;
 	}
+
+	if (!__builtin_constant_p(n))
+		check_object_size(from, n, true);
+
 	return __copy_tofrom_user(to, (__force const void __user *)from, n);
 }
 
@@ -430,6 +393,92 @@ static inline unsigned long __copy_to_us
 	return __copy_to_user_inatomic(to, from, size);
 }
 
+#ifndef __powerpc64__
+
+static inline unsigned long __must_check copy_from_user(void *to,
+		const void __user *from, unsigned long n)
+{
+	unsigned long over;
+
+	if ((long)n < 0)
+		return n;
+
+	if (access_ok(VERIFY_READ, from, n)) {
+		if (!__builtin_constant_p(n))
+			check_object_size(to, n, false);
+		return __copy_tofrom_user((__force void __user *)to, from, n);
+	}
+	if ((unsigned long)from < TASK_SIZE) {
+		over = (unsigned long)from + n - TASK_SIZE;
+		if (!__builtin_constant_p(n - over))
+			check_object_size(to, n - over, false);
+		return __copy_tofrom_user((__force void __user *)to, from,
+				n - over) + over;
+	}
+	return n;
+}
+
+static inline unsigned long __must_check copy_to_user(void __user *to,
+		const void *from, unsigned long n)
+{
+	unsigned long over;
+
+	if ((long)n < 0)
+		return n;
+
+	if (access_ok(VERIFY_WRITE, to, n)) {
+		if (!__builtin_constant_p(n))
+			check_object_size(from, n, true);
+		return __copy_tofrom_user(to, (__force void __user *)from, n);
+	}
+	if ((unsigned long)to < TASK_SIZE) {
+		over = (unsigned long)to + n - TASK_SIZE;
+		if (!__builtin_constant_p(n))
+			check_object_size(from, n - over, true);
+		return __copy_tofrom_user(to, (__force void __user *)from,
+				n - over) + over;
+	}
+	return n;
+}
+
+#else /* __powerpc64__ */
+
+#define __copy_in_user(to, from, size) \
+	__copy_tofrom_user((to), (from), (size))
+
+static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	if ((long)n < 0 || n > INT_MAX)
+		return n;
+
+	if (!__builtin_constant_p(n))
+		check_object_size(to, n, false);
+
+	if (likely(access_ok(VERIFY_READ, from, n)))
+		n = __copy_from_user(to, from, n);
+	else
+		memset(to, 0, n);
+	return n;
+}
+
+static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if ((long)n < 0 || n > INT_MAX)
+		return n;
+
+	if (likely(access_ok(VERIFY_WRITE, to, n))) {
+		if (!__builtin_constant_p(n))
+			check_object_size(from, n, true);
+		n = __copy_to_user(to, from, n);
+	}
+	return n;
+}
+
+extern unsigned long copy_in_user(void __user *to, const void __user *from,
+				  unsigned long n);
+
+#endif /* __powerpc64__ */
+
 extern unsigned long __clear_user(void __user *addr, unsigned long size);
 
 static inline unsigned long clear_user(void __user *addr, unsigned long size)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/kernel/exceptions-64e.S linux-4.0.9-pax/arch/powerpc/kernel/exceptions-64e.S
--- linux-4.0.9/arch/powerpc/kernel/exceptions-64e.S	2015-03-18 15:21:50.208349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/kernel/exceptions-64e.S	2015-04-15 12:13:52.878318624 +0200
@@ -1010,6 +1010,7 @@ storage_fault_common:
 	std	r14,_DAR(r1)
 	std	r15,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	save_nvgprs
 	mr	r4,r14
 	mr	r5,r15
 	ld	r14,PACA_EXGEN+EX_R14(r13)
@@ -1018,8 +1019,7 @@ storage_fault_common:
 	cmpdi	r3,0
 	bne-	1f
 	b	ret_from_except_lite
-1:	bl	save_nvgprs
-	mr	r5,r3
+1:	mr	r5,r3
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r4,_DAR(r1)
 	bl	bad_page_fault
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/kernel/exceptions-64s.S linux-4.0.9-pax/arch/powerpc/kernel/exceptions-64s.S
--- linux-4.0.9/arch/powerpc/kernel/exceptions-64s.S	2015-04-13 11:20:56.082617769 +0200
+++ linux-4.0.9-pax/arch/powerpc/kernel/exceptions-64s.S	2015-04-15 12:13:52.878318624 +0200
@@ -1599,10 +1599,10 @@ handle_page_fault:
 11:	ld	r4,_DAR(r1)
 	ld	r5,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	save_nvgprs
 	bl	do_page_fault
 	cmpdi	r3,0
 	beq+	12f
-	bl	save_nvgprs
 	mr	r5,r3
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	lwz	r4,_DAR(r1)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/kernel/Makefile linux-4.0.9-pax/arch/powerpc/kernel/Makefile
--- linux-4.0.9/arch/powerpc/kernel/Makefile	2015-03-18 15:21:50.208349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/kernel/Makefile	2015-04-15 12:13:52.878318624 +0200
@@ -15,6 +15,11 @@ CFLAGS_prom_init.o      += -fPIC
 CFLAGS_btext.o		+= -fPIC
 endif
 
+CFLAGS_REMOVE_cputable.o = $(LATENT_ENTROPY_PLUGIN_CFLAGS)
+CFLAGS_REMOVE_prom_init.o = $(LATENT_ENTROPY_PLUGIN_CFLAGS)
+CFLAGS_REMOVE_btext.o = $(LATENT_ENTROPY_PLUGIN_CFLAGS)
+CFLAGS_REMOVE_prom.o = $(LATENT_ENTROPY_PLUGIN_CFLAGS)
+
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace early boot code
 CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/kernel/module_32.c linux-4.0.9-pax/arch/powerpc/kernel/module_32.c
--- linux-4.0.9/arch/powerpc/kernel/module_32.c	2015-03-18 15:21:50.208349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/kernel/module_32.c	2015-04-15 12:13:52.878318624 +0200
@@ -158,7 +158,7 @@ int module_frob_arch_sections(Elf32_Ehdr
 			me->arch.core_plt_section = i;
 	}
 	if (!me->arch.core_plt_section || !me->arch.init_plt_section) {
-		pr_err("Module doesn't contain .plt or .init.plt sections.\n");
+		pr_err("Module $s doesn't contain .plt or .init.plt sections.\n", me->name);
 		return -ENOEXEC;
 	}
 
@@ -188,11 +188,16 @@ static uint32_t do_plt_call(void *locati
 
 	pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location);
 	/* Init, or core PLT? */
-	if (location >= mod->module_core
-	    && location < mod->module_core + mod->core_size)
+	if ((location >= mod->module_core_rx && location < mod->module_core_rx + mod->core_size_rx) ||
+	    (location >= mod->module_core_rw && location < mod->module_core_rw + mod->core_size_rw))
 		entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
-	else
+	else if ((location >= mod->module_init_rx && location < mod->module_init_rx + mod->init_size_rx) ||
+		 (location >= mod->module_init_rw && location < mod->module_init_rw + mod->init_size_rw))
 		entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
+	else {
+		printk(KERN_ERR "%s: invalid R_PPC_REL24 entry found\n", mod->name);
+		return ~0UL;
+	}
 
 	/* Find this entry, or if that fails, the next avail. entry */
 	while (entry->jump[0]) {
@@ -296,7 +301,7 @@ int apply_relocate_add(Elf32_Shdr *sechd
 	}
 #ifdef CONFIG_DYNAMIC_FTRACE
 	module->arch.tramp =
-		do_plt_call(module->module_core,
+		do_plt_call(module->module_core_rx,
 			    (unsigned long)ftrace_caller,
 			    sechdrs, module);
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/kernel/process.c linux-4.0.9-pax/arch/powerpc/kernel/process.c
--- linux-4.0.9/arch/powerpc/kernel/process.c	2015-03-18 15:21:50.208349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/kernel/process.c	2015-04-15 12:13:52.878318624 +0200
@@ -1608,49 +1608,3 @@ void notrace __ppc64_runlatch_off(void)
 	mtspr(SPRN_CTRLT, ctrl);
 }
 #endif /* CONFIG_PPC64 */
-
-unsigned long arch_align_stack(unsigned long sp)
-{
-	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() & ~PAGE_MASK;
-	return sp & ~0xf;
-}
-
-static inline unsigned long brk_rnd(void)
-{
-        unsigned long rnd = 0;
-
-	/* 8MB for 32bit, 1GB for 64bit */
-	if (is_32bit_task())
-		rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
-	else
-		rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
-
-	return rnd << PAGE_SHIFT;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long base = mm->brk;
-	unsigned long ret;
-
-#ifdef CONFIG_PPC_STD_MMU_64
-	/*
-	 * If we are using 1TB segments and we are allowed to randomise
-	 * the heap, we can put it above 1TB so it is backed by a 1TB
-	 * segment. Otherwise the heap will be in the bottom 1TB
-	 * which always uses 256MB segments and this may result in a
-	 * performance penalty.
-	 */
-	if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
-		base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
-#endif
-
-	ret = PAGE_ALIGN(base + brk_rnd());
-
-	if (ret < mm->brk)
-		return mm->brk;
-
-	return ret;
-}
-
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/kernel/signal_32.c linux-4.0.9-pax/arch/powerpc/kernel/signal_32.c
--- linux-4.0.9/arch/powerpc/kernel/signal_32.c	2015-04-13 11:20:56.114617767 +0200
+++ linux-4.0.9-pax/arch/powerpc/kernel/signal_32.c	2015-04-15 12:13:52.878318624 +0200
@@ -1011,7 +1011,7 @@ int handle_rt_signal32(struct ksignal *k
 	/* Save user registers on the stack */
 	frame = &rt_sf->uc.uc_mcontext;
 	addr = frame;
-	if (vdso32_rt_sigtramp && current->mm->context.vdso_base) {
+	if (vdso32_rt_sigtramp && current->mm->context.vdso_base != ~0UL) {
 		sigret = 0;
 		tramp = current->mm->context.vdso_base + vdso32_rt_sigtramp;
 	} else {
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/kernel/signal_64.c linux-4.0.9-pax/arch/powerpc/kernel/signal_64.c
--- linux-4.0.9/arch/powerpc/kernel/signal_64.c	2015-04-13 11:20:56.114617767 +0200
+++ linux-4.0.9-pax/arch/powerpc/kernel/signal_64.c	2015-04-15 12:13:52.878318624 +0200
@@ -754,7 +754,7 @@ int handle_rt_signal64(struct ksignal *k
 	current->thread.fp_state.fpscr = 0;
 
 	/* Set up to return from userspace. */
-	if (vdso64_rt_sigtramp && current->mm->context.vdso_base) {
+	if (vdso64_rt_sigtramp && current->mm->context.vdso_base != ~0UL) {
 		regs->link = current->mm->context.vdso_base + vdso64_rt_sigtramp;
 	} else {
 		err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/kernel/traps.c linux-4.0.9-pax/arch/powerpc/kernel/traps.c
--- linux-4.0.9/arch/powerpc/kernel/traps.c	2015-04-13 11:20:56.122617767 +0200
+++ linux-4.0.9-pax/arch/powerpc/kernel/traps.c	2015-04-15 12:13:52.878318624 +0200
@@ -36,6 +36,7 @@
 #include <linux/debugfs.h>
 #include <linux/ratelimit.h>
 #include <linux/context_tracking.h>
+#include <linux/uaccess.h>
 
 #include <asm/emulated_ops.h>
 #include <asm/pgtable.h>
@@ -1137,6 +1138,26 @@ void __kprobes program_check_exception(s
 	enum ctx_state prev_state = exception_enter();
 	unsigned int reason = get_reason(regs);
 
+#ifdef CONFIG_PAX_REFCOUNT
+	unsigned int bkpt;
+	const struct exception_table_entry *entry;
+
+	if (reason & REASON_ILLEGAL) {
+		/* Check if PaX bad instruction */
+		if (!probe_kernel_address(regs->nip, bkpt) && bkpt == 0xc00b00) {
+			current->thread.trap_nr = 0;
+			pax_report_refcount_overflow(regs);
+			/* fixup_exception() for PowerPC does not exist, simulate its job */
+			if ((entry = search_exception_tables(regs->nip)) != NULL) {
+				regs->nip = entry->fixup;
+				return;
+			}
+			/* fixup_exception() could not handle */
+			goto bail;
+		}
+	}
+#endif
+
 	/* We can now get here via a FP Unavailable exception if the core
 	 * has no FPU, in that case the reason flags will be 0 */
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/kernel/vdso.c linux-4.0.9-pax/arch/powerpc/kernel/vdso.c
--- linux-4.0.9/arch/powerpc/kernel/vdso.c	2015-03-18 15:21:50.208349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/kernel/vdso.c	2015-04-15 12:13:52.878318624 +0200
@@ -34,6 +34,7 @@
 #include <asm/vdso.h>
 #include <asm/vdso_datapage.h>
 #include <asm/setup.h>
+#include <asm/mman.h>
 
 #undef DEBUG
 
@@ -220,7 +221,7 @@ int arch_setup_additional_pages(struct l
 	vdso_base = VDSO32_MBASE;
 #endif
 
-	current->mm->context.vdso_base = 0;
+	current->mm->context.vdso_base = ~0UL;
 
 	/* vDSO has a problem and was disabled, just don't "enable" it for the
 	 * process
@@ -240,7 +241,7 @@ int arch_setup_additional_pages(struct l
 	vdso_base = get_unmapped_area(NULL, vdso_base,
 				      (vdso_pages << PAGE_SHIFT) +
 				      ((VDSO_ALIGNMENT - 1) & PAGE_MASK),
-				      0, 0);
+				      0, MAP_PRIVATE | MAP_EXECUTABLE);
 	if (IS_ERR_VALUE(vdso_base)) {
 		rc = vdso_base;
 		goto fail_mmapsem;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/kvm/powerpc.c linux-4.0.9-pax/arch/powerpc/kvm/powerpc.c
--- linux-4.0.9/arch/powerpc/kvm/powerpc.c	2015-04-13 11:20:56.178617764 +0200
+++ linux-4.0.9-pax/arch/powerpc/kvm/powerpc.c	2015-04-15 12:13:52.878318624 +0200
@@ -1402,7 +1402,7 @@ void kvmppc_init_lpid(unsigned long nr_l
 }
 EXPORT_SYMBOL_GPL(kvmppc_init_lpid);
 
-int kvm_arch_init(void *opaque)
+int kvm_arch_init(const void *opaque)
 {
 	return 0;
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/lib/usercopy_64.c linux-4.0.9-pax/arch/powerpc/lib/usercopy_64.c
--- linux-4.0.9/arch/powerpc/lib/usercopy_64.c	2015-03-18 15:21:50.208349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/lib/usercopy_64.c	2015-04-15 12:13:52.878318624 +0200
@@ -9,22 +9,6 @@
 #include <linux/module.h>
 #include <asm/uaccess.h>
 
-unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
-{
-	if (likely(access_ok(VERIFY_READ, from, n)))
-		n = __copy_from_user(to, from, n);
-	else
-		memset(to, 0, n);
-	return n;
-}
-
-unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
-{
-	if (likely(access_ok(VERIFY_WRITE, to, n)))
-		n = __copy_to_user(to, from, n);
-	return n;
-}
-
 unsigned long copy_in_user(void __user *to, const void __user *from,
 			   unsigned long n)
 {
@@ -35,7 +19,5 @@ unsigned long copy_in_user(void __user *
 	return n;
 }
 
-EXPORT_SYMBOL(copy_from_user);
-EXPORT_SYMBOL(copy_to_user);
 EXPORT_SYMBOL(copy_in_user);
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/mm/fault.c linux-4.0.9-pax/arch/powerpc/mm/fault.c
--- linux-4.0.9/arch/powerpc/mm/fault.c	2015-04-13 11:20:56.202617763 +0200
+++ linux-4.0.9-pax/arch/powerpc/mm/fault.c	2015-04-15 12:13:52.878318624 +0200
@@ -33,6 +33,10 @@
 #include <linux/ratelimit.h>
 #include <linux/context_tracking.h>
 #include <linux/hugetlb.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/compiler.h>
+#include <linux/unistd.h>
 
 #include <asm/firmware.h>
 #include <asm/page.h>
@@ -68,6 +72,33 @@ static inline int notify_page_fault(stru
 }
 #endif
 
+#ifdef CONFIG_PAX_PAGEEXEC
+/*
+ * PaX: decide what to do with offenders (regs->nip = fault address)
+ *
+ * returns 1 when task should be killed
+ */
+static int pax_handle_fetch_fault(struct pt_regs *regs)
+{
+	return 1;
+}
+
+void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
+{
+	unsigned long i;
+
+	printk(KERN_ERR "PAX: bytes at PC: ");
+	for (i = 0; i < 5; i++) {
+		unsigned int c;
+		if (get_user(c, (unsigned int __user *)pc+i))
+			printk(KERN_CONT "???????? ");
+		else
+			printk(KERN_CONT "%08x ", c);
+	}
+	printk("\n");
+}
+#endif
+
 /*
  * Check whether the instruction at regs->nip is a store using
  * an update addressing form which will update r1.
@@ -227,7 +258,7 @@ int __kprobes do_page_fault(struct pt_re
 	 * indicate errors in DSISR but can validly be set in SRR1.
 	 */
 	if (trap == 0x400)
-		error_code &= 0x48200000;
+		error_code &= 0x58200000;
 	else
 		is_write = error_code & DSISR_ISSTORE;
 #else
@@ -383,12 +414,16 @@ good_area:
          * "undefined".  Of those that can be set, this is the only
          * one which seems bad.
          */
-	if (error_code & 0x10000000)
+	if (error_code & DSISR_GUARDED)
                 /* Guarded storage error. */
 		goto bad_area;
 #endif /* CONFIG_8xx */
 
 	if (is_exec) {
+#ifdef CONFIG_PPC_STD_MMU
+		if (error_code & DSISR_GUARDED)
+			goto bad_area;
+#endif
 		/*
 		 * Allow execution from readable areas if the MMU does not
 		 * provide separate controls over reading and executing.
@@ -483,6 +518,23 @@ bad_area:
 bad_area_nosemaphore:
 	/* User mode accesses cause a SIGSEGV */
 	if (user_mode(regs)) {
+
+#ifdef CONFIG_PAX_PAGEEXEC
+		if (mm->pax_flags & MF_PAX_PAGEEXEC) {
+#ifdef CONFIG_PPC_STD_MMU
+			if (is_exec && (error_code & (DSISR_PROTFAULT | DSISR_GUARDED))) {
+#else
+			if (is_exec && regs->nip == address) {
+#endif
+				switch (pax_handle_fetch_fault(regs)) {
+				}
+
+				pax_report_fault(regs, (void *)regs->nip, (void *)regs->gpr[PT_R1]);
+				do_group_exit(SIGKILL);
+			}
+		}
+#endif
+
 		_exception(SIGSEGV, regs, code, address);
 		goto bail;
 	}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/mm/mmap.c linux-4.0.9-pax/arch/powerpc/mm/mmap.c
--- linux-4.0.9/arch/powerpc/mm/mmap.c	2015-03-18 15:21:50.212349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/mm/mmap.c	2015-04-15 12:13:52.878318624 +0200
@@ -53,10 +53,14 @@ static inline int mmap_is_legacy(void)
 	return sysctl_legacy_va_layout;
 }
 
-static unsigned long mmap_rnd(void)
+static unsigned long mmap_rnd(struct mm_struct *mm)
 {
 	unsigned long rnd = 0;
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	if (current->flags & PF_RANDOMIZE) {
 		/* 8MB for 32bit, 1GB for 64bit */
 		if (is_32bit_task())
@@ -67,7 +71,7 @@ static unsigned long mmap_rnd(void)
 	return rnd << PAGE_SHIFT;
 }
 
-static inline unsigned long mmap_base(void)
+static inline unsigned long mmap_base(struct mm_struct *mm)
 {
 	unsigned long gap = rlimit(RLIMIT_STACK);
 
@@ -76,7 +80,7 @@ static inline unsigned long mmap_base(vo
 	else if (gap > MAX_GAP)
 		gap = MAX_GAP;
 
-	return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd());
+	return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd(mm));
 }
 
 /*
@@ -91,9 +95,21 @@ void arch_pick_mmap_layout(struct mm_str
 	 */
 	if (mmap_is_legacy()) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			mm->mmap_base += mm->delta_mmap;
+#endif
+
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base();
+		mm->mmap_base = mmap_base(mm);
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			mm->mmap_base -= mm->delta_mmap + mm->delta_stack;
+#endif
+
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/mm/slice.c linux-4.0.9-pax/arch/powerpc/mm/slice.c
--- linux-4.0.9/arch/powerpc/mm/slice.c	2015-04-13 11:20:56.514617746 +0200
+++ linux-4.0.9-pax/arch/powerpc/mm/slice.c	2015-04-15 12:13:52.882318624 +0200
@@ -105,7 +105,7 @@ static int slice_area_is_free(struct mm_
 	if ((mm->task_size - len) < addr)
 		return 0;
 	vma = find_vma(mm, addr);
-	return (!vma || (addr + len) <= vma->vm_start);
+	return check_heap_stack_gap(vma, addr, len);
 }
 
 static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
@@ -277,6 +277,12 @@ static unsigned long slice_find_area_bot
 	info.align_offset = 0;
 
 	addr = TASK_UNMAPPED_BASE;
+
+#ifdef CONFIG_PAX_RANDMMAP
+	if (mm->pax_flags & MF_PAX_RANDMMAP)
+		addr += mm->delta_mmap;
+#endif
+
 	while (addr < TASK_SIZE) {
 		info.low_limit = addr;
 		if (!slice_scan_available(addr, available, 1, &addr))
@@ -410,6 +416,11 @@ unsigned long slice_get_unmapped_area(un
 	if (fixed && addr > (mm->task_size - len))
 		return -ENOMEM;
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!fixed && (mm->pax_flags & MF_PAX_RANDMMAP))
+		addr = 0;
+#endif
+
 	/* If hint, make sure it matches our alignment restrictions */
 	if (!fixed && addr) {
 		addr = _ALIGN_UP(addr, 1ul << pshift);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/powerpc/platforms/cell/spufs/file.c linux-4.0.9-pax/arch/powerpc/platforms/cell/spufs/file.c
--- linux-4.0.9/arch/powerpc/platforms/cell/spufs/file.c	2015-03-18 15:21:50.212349252 +0100
+++ linux-4.0.9-pax/arch/powerpc/platforms/cell/spufs/file.c	2015-04-15 12:13:52.882318624 +0200
@@ -280,9 +280,9 @@ spufs_mem_mmap_fault(struct vm_area_stru
 	return VM_FAULT_NOPAGE;
 }
 
-static int spufs_mem_mmap_access(struct vm_area_struct *vma,
+static ssize_t spufs_mem_mmap_access(struct vm_area_struct *vma,
 				unsigned long address,
-				void *buf, int len, int write)
+				void *buf, size_t len, int write)
 {
 	struct spu_context *ctx = vma->vm_file->private_data;
 	unsigned long offset = address - vma->vm_start;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/s390/include/asm/atomic.h linux-4.0.9-pax/arch/s390/include/asm/atomic.h
--- linux-4.0.9/arch/s390/include/asm/atomic.h	2015-03-18 15:21:50.212349252 +0100
+++ linux-4.0.9-pax/arch/s390/include/asm/atomic.h	2015-04-15 12:13:52.882318624 +0200
@@ -412,4 +412,14 @@ static inline long long atomic64_dec_if_
 #define atomic64_dec_and_test(_v)	(atomic64_sub_return(1, _v) == 0)
 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
 
+#define atomic64_read_unchecked(v)		atomic64_read(v)
+#define atomic64_set_unchecked(v, i)		atomic64_set((v), (i))
+#define atomic64_add_unchecked(a, v)		atomic64_add((a), (v))
+#define atomic64_add_return_unchecked(a, v)	atomic64_add_return((a), (v))
+#define atomic64_sub_unchecked(a, v)		atomic64_sub((a), (v))
+#define atomic64_inc_unchecked(v)		atomic64_inc(v)
+#define atomic64_inc_return_unchecked(v)	atomic64_inc_return(v)
+#define atomic64_dec_unchecked(v)		atomic64_dec(v)
+#define atomic64_cmpxchg_unchecked(v, o, n)	atomic64_cmpxchg((v), (o), (n))
+
 #endif /* __ARCH_S390_ATOMIC__  */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/s390/include/asm/barrier.h linux-4.0.9-pax/arch/s390/include/asm/barrier.h
--- linux-4.0.9/arch/s390/include/asm/barrier.h	2015-03-18 15:21:50.212349252 +0100
+++ linux-4.0.9-pax/arch/s390/include/asm/barrier.h	2015-04-15 12:13:52.882318624 +0200
@@ -42,7 +42,7 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	ACCESS_ONCE_RW(*p) = (v);					\
 } while (0)
 
 #define smp_load_acquire(p)						\
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/s390/include/asm/elf.h linux-4.0.9-pax/arch/s390/include/asm/elf.h
--- linux-4.0.9/arch/s390/include/asm/elf.h	2015-04-13 11:21:00.602617528 +0200
+++ linux-4.0.9-pax/arch/s390/include/asm/elf.h	2015-04-15 12:13:52.882318624 +0200
@@ -163,8 +163,14 @@ extern unsigned int vdso_enabled;
    the loader.  We need to make sure that it is out of the way of the program
    that it will "exec", and that there is sufficient room for the brk.  */
 
-extern unsigned long randomize_et_dyn(void);
-#define ELF_ET_DYN_BASE		randomize_et_dyn()
+#define ELF_ET_DYN_BASE		(STACK_TOP / 3 * 2)
+
+#ifdef CONFIG_PAX_ASLR
+#define PAX_ELF_ET_DYN_BASE	(test_thread_flag(TIF_31BIT) ? 0x10000UL : 0x80000000UL)
+
+#define PAX_DELTA_MMAP_LEN	(test_thread_flag(TIF_31BIT) ? 15 : 26)
+#define PAX_DELTA_STACK_LEN	(test_thread_flag(TIF_31BIT) ? 15 : 26)
+#endif
 
 /* This yields a mask that user programs can use to figure out what
    instruction set this CPU supports. */
@@ -225,9 +231,6 @@ struct linux_binprm;
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 int arch_setup_additional_pages(struct linux_binprm *, int);
 
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
 void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs);
 
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/s390/include/asm/exec.h linux-4.0.9-pax/arch/s390/include/asm/exec.h
--- linux-4.0.9/arch/s390/include/asm/exec.h	2015-03-18 15:21:50.212349252 +0100
+++ linux-4.0.9-pax/arch/s390/include/asm/exec.h	2015-04-15 12:13:52.882318624 +0200
@@ -7,6 +7,6 @@
 #ifndef __ASM_EXEC_H
 #define __ASM_EXEC_H
 
-extern unsigned long arch_align_stack(unsigned long sp);
+#define arch_align_stack(x) ((x) & ~0xfUL)
 
 #endif /* __ASM_EXEC_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/s390/include/asm/uaccess.h linux-4.0.9-pax/arch/s390/include/asm/uaccess.h
--- linux-4.0.9/arch/s390/include/asm/uaccess.h	2015-03-18 15:21:50.212349252 +0100
+++ linux-4.0.9-pax/arch/s390/include/asm/uaccess.h	2015-04-15 12:13:52.882318624 +0200
@@ -59,6 +59,7 @@ static inline int __range_ok(unsigned lo
 	__range_ok((unsigned long)(addr), (size));	\
 })
 
+#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size))
 #define access_ok(type, addr, size) __access_ok(addr, size)
 
 /*
@@ -275,6 +276,10 @@ static inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	might_fault();
+
+	if ((long)n < 0)
+		return n;
+
 	return __copy_to_user(to, from, n);
 }
 
@@ -303,10 +308,14 @@ __compiletime_warning("copy_from_user()
 static inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	unsigned int sz = __compiletime_object_size(to);
+	size_t sz = __compiletime_object_size(to);
 
 	might_fault();
-	if (unlikely(sz != -1 && sz < n)) {
+
+	if ((long)n < 0)
+		return n;
+
+	if (unlikely(sz != (size_t)-1 && sz < n)) {
 		copy_from_user_overflow();
 		return n;
 	}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/s390/kernel/module.c linux-4.0.9-pax/arch/s390/kernel/module.c
--- linux-4.0.9/arch/s390/kernel/module.c	2015-04-13 11:21:00.878617513 +0200
+++ linux-4.0.9-pax/arch/s390/kernel/module.c	2015-04-15 12:13:52.882318624 +0200
@@ -165,11 +165,11 @@ int module_frob_arch_sections(Elf_Ehdr *
 
 	/* Increase core size by size of got & plt and set start
 	   offsets for got and plt. */
-	me->core_size = ALIGN(me->core_size, 4);
-	me->arch.got_offset = me->core_size;
-	me->core_size += me->arch.got_size;
-	me->arch.plt_offset = me->core_size;
-	me->core_size += me->arch.plt_size;
+	me->core_size_rw = ALIGN(me->core_size_rw, 4);
+	me->arch.got_offset = me->core_size_rw;
+	me->core_size_rw += me->arch.got_size;
+	me->arch.plt_offset = me->core_size_rx;
+	me->core_size_rx += me->arch.plt_size;
 	return 0;
 }
 
@@ -285,7 +285,7 @@ static int apply_rela(Elf_Rela *rela, El
 		if (info->got_initialized == 0) {
 			Elf_Addr *gotent;
 
-			gotent = me->module_core + me->arch.got_offset +
+			gotent = me->module_core_rw + me->arch.got_offset +
 				info->got_offset;
 			*gotent = val;
 			info->got_initialized = 1;
@@ -308,7 +308,7 @@ static int apply_rela(Elf_Rela *rela, El
 			rc = apply_rela_bits(loc, val, 0, 64, 0);
 		else if (r_type == R_390_GOTENT ||
 			 r_type == R_390_GOTPLTENT) {
-			val += (Elf_Addr) me->module_core - loc;
+			val += (Elf_Addr) me->module_core_rw - loc;
 			rc = apply_rela_bits(loc, val, 1, 32, 1);
 		}
 		break;
@@ -321,7 +321,7 @@ static int apply_rela(Elf_Rela *rela, El
 	case R_390_PLTOFF64:	/* 16 bit offset from GOT to PLT. */
 		if (info->plt_initialized == 0) {
 			unsigned int *ip;
-			ip = me->module_core + me->arch.plt_offset +
+			ip = me->module_core_rx + me->arch.plt_offset +
 				info->plt_offset;
 #ifndef CONFIG_64BIT
 			ip[0] = 0x0d105810; /* basr 1,0; l 1,6(1); br 1 */
@@ -346,7 +346,7 @@ static int apply_rela(Elf_Rela *rela, El
 			       val - loc + 0xffffUL < 0x1ffffeUL) ||
 			      (r_type == R_390_PLT32DBL &&
 			       val - loc + 0xffffffffULL < 0x1fffffffeULL)))
-				val = (Elf_Addr) me->module_core +
+				val = (Elf_Addr) me->module_core_rx +
 					me->arch.plt_offset +
 					info->plt_offset;
 			val += rela->r_addend - loc;
@@ -368,7 +368,7 @@ static int apply_rela(Elf_Rela *rela, El
 	case R_390_GOTOFF32:	/* 32 bit offset to GOT.  */
 	case R_390_GOTOFF64:	/* 64 bit offset to GOT. */
 		val = val + rela->r_addend -
-			((Elf_Addr) me->module_core + me->arch.got_offset);
+			((Elf_Addr) me->module_core_rw + me->arch.got_offset);
 		if (r_type == R_390_GOTOFF16)
 			rc = apply_rela_bits(loc, val, 0, 16, 0);
 		else if (r_type == R_390_GOTOFF32)
@@ -378,7 +378,7 @@ static int apply_rela(Elf_Rela *rela, El
 		break;
 	case R_390_GOTPC:	/* 32 bit PC relative offset to GOT. */
 	case R_390_GOTPCDBL:	/* 32 bit PC rel. off. to GOT shifted by 1. */
-		val = (Elf_Addr) me->module_core + me->arch.got_offset +
+		val = (Elf_Addr) me->module_core_rw + me->arch.got_offset +
 			rela->r_addend - loc;
 		if (r_type == R_390_GOTPC)
 			rc = apply_rela_bits(loc, val, 1, 32, 0);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/s390/kernel/process.c linux-4.0.9-pax/arch/s390/kernel/process.c
--- linux-4.0.9/arch/s390/kernel/process.c	2015-04-13 11:21:00.878617513 +0200
+++ linux-4.0.9-pax/arch/s390/kernel/process.c	2015-04-15 12:13:52.882318624 +0200
@@ -227,27 +227,3 @@ unsigned long get_wchan(struct task_stru
 	}
 	return 0;
 }
-
-unsigned long arch_align_stack(unsigned long sp)
-{
-	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() & ~PAGE_MASK;
-	return sp & ~0xf;
-}
-
-static inline unsigned long brk_rnd(void)
-{
-	/* 8MB for 32bit, 1GB for 64bit */
-	if (is_32bit_task())
-		return (get_random_int() & 0x7ffUL) << PAGE_SHIFT;
-	else
-		return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long ret;
-
-	ret = PAGE_ALIGN(mm->brk + brk_rnd());
-	return (ret > mm->brk) ? ret : mm->brk;
-}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/s390/mm/mmap.c linux-4.0.9-pax/arch/s390/mm/mmap.c
--- linux-4.0.9/arch/s390/mm/mmap.c	2015-04-13 11:21:01.046617504 +0200
+++ linux-4.0.9-pax/arch/s390/mm/mmap.c	2015-06-15 22:36:29.491131160 +0200
@@ -62,6 +62,12 @@ static inline int mmap_is_legacy(void)
 
 static unsigned long mmap_rnd(void)
 {
+
+#ifdef CONFIG_PAX_RANDMMAP
+	if (current->mm->pax_flags & MF_PAX_RANDMMAP)
+		return 0;
+#endif
+
 	if (!(current->flags & PF_RANDOMIZE))
 		return 0;
 	if (is_32bit_task())
@@ -204,9 +210,21 @@ void arch_pick_mmap_layout(struct mm_str
 	 */
 	if (mmap_is_legacy()) {
 		mm->mmap_base = mmap_base_legacy();
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			mm->mmap_base += mm->delta_mmap;
+#endif
+
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
 		mm->mmap_base = mmap_base();
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			mm->mmap_base -= mm->delta_mmap + mm->delta_stack;
+#endif
+
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
@@ -279,9 +297,21 @@ void arch_pick_mmap_layout(struct mm_str
 	 */
 	if (mmap_is_legacy()) {
 		mm->mmap_base = mmap_base_legacy();
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			mm->mmap_base += mm->delta_mmap;
+#endif
+
 		mm->get_unmapped_area = s390_get_unmapped_area;
 	} else {
 		mm->mmap_base = mmap_base();
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			mm->mmap_base -= mm->delta_mmap + mm->delta_stack;
+#endif
+
 		mm->get_unmapped_area = s390_get_unmapped_area_topdown;
 	}
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/score/include/asm/exec.h linux-4.0.9-pax/arch/score/include/asm/exec.h
--- linux-4.0.9/arch/score/include/asm/exec.h	2015-03-18 15:21:50.212349252 +0100
+++ linux-4.0.9-pax/arch/score/include/asm/exec.h	2015-04-15 12:13:52.882318624 +0200
@@ -1,6 +1,6 @@
 #ifndef _ASM_SCORE_EXEC_H
 #define _ASM_SCORE_EXEC_H
 
-extern unsigned long arch_align_stack(unsigned long sp);
+#define arch_align_stack(x) (x)
 
 #endif /* _ASM_SCORE_EXEC_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/score/kernel/process.c linux-4.0.9-pax/arch/score/kernel/process.c
--- linux-4.0.9/arch/score/kernel/process.c	2015-03-18 15:21:50.212349252 +0100
+++ linux-4.0.9-pax/arch/score/kernel/process.c	2015-04-15 12:13:52.882318624 +0200
@@ -116,8 +116,3 @@ unsigned long get_wchan(struct task_stru
 
 	return task_pt_regs(task)->cp0_epc;
 }
-
-unsigned long arch_align_stack(unsigned long sp)
-{
-	return sp;
-}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sh/mm/mmap.c linux-4.0.9-pax/arch/sh/mm/mmap.c
--- linux-4.0.9/arch/sh/mm/mmap.c	2015-03-18 15:21:50.212349252 +0100
+++ linux-4.0.9-pax/arch/sh/mm/mmap.c	2015-04-15 12:13:52.882318624 +0200
@@ -55,6 +55,10 @@ unsigned long arch_get_unmapped_area(str
 	if (filp || (flags & MAP_SHARED))
 		do_colour_align = 1;
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	if (addr) {
 		if (do_colour_align)
 			addr = COLOUR_ALIGN(addr, pgoff);
@@ -62,14 +66,13 @@ unsigned long arch_get_unmapped_area(str
 			addr = PAGE_ALIGN(addr);
 
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len))
 			return addr;
 	}
 
 	info.flags = 0;
 	info.length = len;
-	info.low_limit = TASK_UNMAPPED_BASE;
+	info.low_limit = mm->mmap_base;
 	info.high_limit = TASK_SIZE;
 	info.align_mask = do_colour_align ? (PAGE_MASK & shm_align_mask) : 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
@@ -104,6 +107,10 @@ arch_get_unmapped_area_topdown(struct fi
 	if (filp || (flags & MAP_SHARED))
 		do_colour_align = 1;
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	/* requesting a specific address */
 	if (addr) {
 		if (do_colour_align)
@@ -112,8 +119,7 @@ arch_get_unmapped_area_topdown(struct fi
 			addr = PAGE_ALIGN(addr);
 
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len))
 			return addr;
 	}
 
@@ -135,6 +141,12 @@ arch_get_unmapped_area_topdown(struct fi
 		VM_BUG_ON(addr != -ENOMEM);
 		info.flags = 0;
 		info.low_limit = TASK_UNMAPPED_BASE;
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			info.low_limit += mm->delta_mmap;
+#endif
+
 		info.high_limit = TASK_SIZE;
 		addr = vm_unmapped_area(&info);
 	}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/atomic_64.h linux-4.0.9-pax/arch/sparc/include/asm/atomic_64.h
--- linux-4.0.9/arch/sparc/include/asm/atomic_64.h	2015-03-18 15:21:50.212349252 +0100
+++ linux-4.0.9-pax/arch/sparc/include/asm/atomic_64.h	2015-04-15 12:13:52.882318624 +0200
@@ -15,18 +15,38 @@
 #define ATOMIC64_INIT(i)	{ (i) }
 
 #define atomic_read(v)		ACCESS_ONCE((v)->counter)
+static inline int atomic_read_unchecked(const atomic_unchecked_t *v)
+{
+	return ACCESS_ONCE(v->counter);
+}
 #define atomic64_read(v)	ACCESS_ONCE((v)->counter)
+static inline long atomic64_read_unchecked(const atomic64_unchecked_t *v)
+{
+	return ACCESS_ONCE(v->counter);
+}
 
 #define atomic_set(v, i)	(((v)->counter) = i)
+static inline void atomic_set_unchecked(atomic_unchecked_t *v, int i)
+{
+	v->counter = i;
+}
 #define atomic64_set(v, i)	(((v)->counter) = i)
+static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long i)
+{
+	v->counter = i;
+}
+
+#define __ATOMIC_OP(op, suffix)						\
+void atomic_##op##suffix(int, atomic##suffix##_t *);			\
+void atomic64_##op##suffix(long, atomic64##suffix##_t *);
+
+#define ATOMIC_OP(op) __ATOMIC_OP(op, ) __ATOMIC_OP(op, _unchecked)
 
-#define ATOMIC_OP(op)							\
-void atomic_##op(int, atomic_t *);					\
-void atomic64_##op(long, atomic64_t *);
-
-#define ATOMIC_OP_RETURN(op)						\
-int atomic_##op##_return(int, atomic_t *);				\
-long atomic64_##op##_return(long, atomic64_t *);
+#define __ATOMIC_OP_RETURN(op, suffix)					\
+int atomic_##op##_return##suffix(int, atomic##suffix##_t *);		\
+long atomic64_##op##_return##suffix(long, atomic64##suffix##_t *);
+
+#define ATOMIC_OP_RETURN(op) __ATOMIC_OP_RETURN(op, ) __ATOMIC_OP_RETURN(op, _unchecked)
 
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
 
@@ -35,13 +55,23 @@ ATOMIC_OPS(sub)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
+#undef __ATOMIC_OP_RETURN
 #undef ATOMIC_OP
+#undef __ATOMIC_OP
 
 #define atomic_dec_return(v)   atomic_sub_return(1, v)
 #define atomic64_dec_return(v) atomic64_sub_return(1, v)
 
 #define atomic_inc_return(v)   atomic_add_return(1, v)
+static inline int atomic_inc_return_unchecked(atomic_unchecked_t *v)
+{
+	return atomic_add_return_unchecked(1, v);
+}
 #define atomic64_inc_return(v) atomic64_add_return(1, v)
+static inline long atomic64_inc_return_unchecked(atomic64_unchecked_t *v)
+{
+	return atomic64_add_return_unchecked(1, v);
+}
 
 /*
  * atomic_inc_and_test - increment and test
@@ -52,6 +82,10 @@ ATOMIC_OPS(sub)
  * other cases.
  */
 #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
+static inline int atomic_inc_and_test_unchecked(atomic_unchecked_t *v)
+{
+	return atomic_inc_return_unchecked(v) == 0;
+}
 #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
 
 #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
@@ -61,25 +95,60 @@ ATOMIC_OPS(sub)
 #define atomic64_dec_and_test(v) (atomic64_sub_return(1, v) == 0)
 
 #define atomic_inc(v) atomic_add(1, v)
+static inline void atomic_inc_unchecked(atomic_unchecked_t *v)
+{
+	atomic_add_unchecked(1, v);
+}
 #define atomic64_inc(v) atomic64_add(1, v)
+static inline void atomic64_inc_unchecked(atomic64_unchecked_t *v)
+{
+	atomic64_add_unchecked(1, v);
+}
 
 #define atomic_dec(v) atomic_sub(1, v)
+static inline void atomic_dec_unchecked(atomic_unchecked_t *v)
+{
+	atomic_sub_unchecked(1, v);
+}
 #define atomic64_dec(v) atomic64_sub(1, v)
+static inline void atomic64_dec_unchecked(atomic64_unchecked_t *v)
+{
+	atomic64_sub_unchecked(1, v);
+}
 
 #define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
 #define atomic64_add_negative(i, v) (atomic64_add_return(i, v) < 0)
 
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old, int new)
+{
+	return cmpxchg(&v->counter, old, new);
+}
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+static inline int atomic_xchg_unchecked(atomic_unchecked_t *v, int new)
+{
+	return xchg(&v->counter, new);
+}
 
 static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
-	int c, old;
+	int c, old, new;
 	c = atomic_read(v);
 	for (;;) {
-		if (unlikely(c == (u)))
+		if (unlikely(c == u))
 			break;
-		old = atomic_cmpxchg((v), c, c + (a));
+
+		asm volatile("addcc %2, %0, %0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+			     "tvs %%icc, 6\n"
+#endif
+
+			     : "=r" (new)
+			     : "0" (c), "ir" (a)
+			     : "cc");
+
+		old = atomic_cmpxchg(v, c, new);
 		if (likely(old == c))
 			break;
 		c = old;
@@ -90,20 +159,35 @@ static inline int __atomic_add_unless(at
 #define atomic64_cmpxchg(v, o, n) \
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+static inline long atomic64_xchg_unchecked(atomic64_unchecked_t *v, long new)
+{
+	return xchg(&v->counter, new);
+}
 
 static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
 {
-	long c, old;
+	long c, old, new;
 	c = atomic64_read(v);
 	for (;;) {
-		if (unlikely(c == (u)))
+		if (unlikely(c == u))
 			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
+
+		asm volatile("addcc %2, %0, %0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+			     "tvs %%xcc, 6\n"
+#endif
+
+			     : "=r" (new)
+			     : "0" (c), "ir" (a)
+			     : "cc");
+
+		old = atomic64_cmpxchg(v, c, new);
 		if (likely(old == c))
 			break;
 		c = old;
 	}
-	return c != (u);
+	return c != u;
 }
 
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/barrier_64.h linux-4.0.9-pax/arch/sparc/include/asm/barrier_64.h
--- linux-4.0.9/arch/sparc/include/asm/barrier_64.h	2015-03-18 15:21:50.216349253 +0100
+++ linux-4.0.9-pax/arch/sparc/include/asm/barrier_64.h	2015-04-15 12:13:52.882318624 +0200
@@ -60,7 +60,7 @@ do {	__asm__ __volatile__("ba,pt	%%xcc,
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	ACCESS_ONCE_RW(*p) = (v);					\
 } while (0)
 
 #define smp_load_acquire(p)						\
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/cache.h linux-4.0.9-pax/arch/sparc/include/asm/cache.h
--- linux-4.0.9/arch/sparc/include/asm/cache.h	2015-03-18 15:21:50.216349253 +0100
+++ linux-4.0.9-pax/arch/sparc/include/asm/cache.h	2015-04-15 12:13:52.882318624 +0200
@@ -10,7 +10,7 @@
 #define ARCH_SLAB_MINALIGN	__alignof__(unsigned long long)
 
 #define L1_CACHE_SHIFT 5
-#define L1_CACHE_BYTES 32
+#define L1_CACHE_BYTES 32UL
 
 #ifdef CONFIG_SPARC32
 #define SMP_CACHE_BYTES_SHIFT 5
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/elf_32.h linux-4.0.9-pax/arch/sparc/include/asm/elf_32.h
--- linux-4.0.9/arch/sparc/include/asm/elf_32.h	2015-03-18 15:21:50.216349253 +0100
+++ linux-4.0.9-pax/arch/sparc/include/asm/elf_32.h	2015-04-15 12:13:52.882318624 +0200
@@ -114,6 +114,13 @@ typedef struct {
 
 #define ELF_ET_DYN_BASE         (TASK_UNMAPPED_BASE)
 
+#ifdef CONFIG_PAX_ASLR
+#define PAX_ELF_ET_DYN_BASE	0x10000UL
+
+#define PAX_DELTA_MMAP_LEN	16
+#define PAX_DELTA_STACK_LEN	16
+#endif
+
 /* This yields a mask that user programs can use to figure out what
    instruction set this cpu supports.  This can NOT be done in userspace
    on Sparc.  */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/elf_64.h linux-4.0.9-pax/arch/sparc/include/asm/elf_64.h
--- linux-4.0.9/arch/sparc/include/asm/elf_64.h	2015-03-18 15:21:50.216349253 +0100
+++ linux-4.0.9-pax/arch/sparc/include/asm/elf_64.h	2015-04-15 12:13:52.882318624 +0200
@@ -189,6 +189,13 @@ typedef struct {
 #define ELF_ET_DYN_BASE		0x0000010000000000UL
 #define COMPAT_ELF_ET_DYN_BASE	0x0000000070000000UL
 
+#ifdef CONFIG_PAX_ASLR
+#define PAX_ELF_ET_DYN_BASE	(test_thread_flag(TIF_32BIT) ? 0x10000UL : 0x100000UL)
+
+#define PAX_DELTA_MMAP_LEN	(test_thread_flag(TIF_32BIT) ? 14 : 28)
+#define PAX_DELTA_STACK_LEN	(test_thread_flag(TIF_32BIT) ? 15 : 29)
+#endif
+
 extern unsigned long sparc64_elf_hwcap;
 #define ELF_HWCAP	sparc64_elf_hwcap
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/pgalloc_32.h linux-4.0.9-pax/arch/sparc/include/asm/pgalloc_32.h
--- linux-4.0.9/arch/sparc/include/asm/pgalloc_32.h	2015-03-18 15:21:50.216349253 +0100
+++ linux-4.0.9-pax/arch/sparc/include/asm/pgalloc_32.h	2015-04-15 12:13:52.882318624 +0200
@@ -35,6 +35,7 @@ static inline void pgd_set(pgd_t * pgdp,
 }
 
 #define pgd_populate(MM, PGD, PMD)      pgd_set(PGD, PMD)
+#define pgd_populate_kernel(MM, PGD, PMD)      pgd_populate((MM), (PGD), (PMD))
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm,
 				   unsigned long address)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/pgalloc_64.h linux-4.0.9-pax/arch/sparc/include/asm/pgalloc_64.h
--- linux-4.0.9/arch/sparc/include/asm/pgalloc_64.h	2015-03-18 15:21:50.216349253 +0100
+++ linux-4.0.9-pax/arch/sparc/include/asm/pgalloc_64.h	2015-04-15 12:13:52.882318624 +0200
@@ -38,6 +38,7 @@ static inline void __pud_populate(pud_t
 }
 
 #define pud_populate(MM, PUD, PMD)	__pud_populate(PUD, PMD)
+#define pud_populate_kernel(MM, PUD, PMD)	pud_populate((MM), (PUD), (PMD))
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/pgtable_32.h linux-4.0.9-pax/arch/sparc/include/asm/pgtable_32.h
--- linux-4.0.9/arch/sparc/include/asm/pgtable_32.h	2015-04-13 11:21:01.298617491 +0200
+++ linux-4.0.9-pax/arch/sparc/include/asm/pgtable_32.h	2015-04-15 12:13:52.882318624 +0200
@@ -51,6 +51,9 @@ unsigned long __init bootmem_init(unsign
 #define PAGE_SHARED	SRMMU_PAGE_SHARED
 #define PAGE_COPY	SRMMU_PAGE_COPY
 #define PAGE_READONLY	SRMMU_PAGE_RDONLY
+#define PAGE_SHARED_NOEXEC	SRMMU_PAGE_SHARED_NOEXEC
+#define PAGE_COPY_NOEXEC	SRMMU_PAGE_COPY_NOEXEC
+#define PAGE_READONLY_NOEXEC	SRMMU_PAGE_RDONLY_NOEXEC
 #define PAGE_KERNEL	SRMMU_PAGE_KERNEL
 
 /* Top-level page directory - dummy used by init-mm.
@@ -63,18 +66,18 @@ extern unsigned long ptr_in_current_pgd;
 
 /*         xwr */
 #define __P000  PAGE_NONE
-#define __P001  PAGE_READONLY
-#define __P010  PAGE_COPY
-#define __P011  PAGE_COPY
+#define __P001  PAGE_READONLY_NOEXEC
+#define __P010  PAGE_COPY_NOEXEC
+#define __P011  PAGE_COPY_NOEXEC
 #define __P100  PAGE_READONLY
 #define __P101  PAGE_READONLY
 #define __P110  PAGE_COPY
 #define __P111  PAGE_COPY
 
 #define __S000	PAGE_NONE
-#define __S001	PAGE_READONLY
-#define __S010	PAGE_SHARED
-#define __S011	PAGE_SHARED
+#define __S001	PAGE_READONLY_NOEXEC
+#define __S010	PAGE_SHARED_NOEXEC
+#define __S011	PAGE_SHARED_NOEXEC
 #define __S100	PAGE_READONLY
 #define __S101	PAGE_READONLY
 #define __S110	PAGE_SHARED
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/pgtable.h linux-4.0.9-pax/arch/sparc/include/asm/pgtable.h
--- linux-4.0.9/arch/sparc/include/asm/pgtable.h	2015-03-18 15:21:50.216349253 +0100
+++ linux-4.0.9-pax/arch/sparc/include/asm/pgtable.h	2015-04-15 12:13:52.882318624 +0200
@@ -5,4 +5,8 @@
 #else
 #include <asm/pgtable_32.h>
 #endif
+
+#define ktla_ktva(addr)		(addr)
+#define ktva_ktla(addr)		(addr)
+
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/pgtsrmmu.h linux-4.0.9-pax/arch/sparc/include/asm/pgtsrmmu.h
--- linux-4.0.9/arch/sparc/include/asm/pgtsrmmu.h	2015-04-13 11:21:01.298617491 +0200
+++ linux-4.0.9-pax/arch/sparc/include/asm/pgtsrmmu.h	2015-04-15 12:13:52.886318623 +0200
@@ -111,6 +111,11 @@
 				    SRMMU_EXEC | SRMMU_REF)
 #define SRMMU_PAGE_RDONLY  __pgprot(SRMMU_VALID | SRMMU_CACHE | \
 				    SRMMU_EXEC | SRMMU_REF)
+
+#define SRMMU_PAGE_SHARED_NOEXEC	__pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_WRITE | SRMMU_REF)
+#define SRMMU_PAGE_COPY_NOEXEC		__pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_REF)
+#define SRMMU_PAGE_RDONLY_NOEXEC	__pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_REF)
+
 #define SRMMU_PAGE_KERNEL  __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_PRIV | \
 				    SRMMU_DIRTY | SRMMU_REF)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/setup.h linux-4.0.9-pax/arch/sparc/include/asm/setup.h
--- linux-4.0.9/arch/sparc/include/asm/setup.h	2015-03-18 15:21:50.216349253 +0100
+++ linux-4.0.9-pax/arch/sparc/include/asm/setup.h	2015-04-15 12:13:52.886318623 +0200
@@ -55,8 +55,8 @@ int handle_ldf_stq(u32 insn, struct pt_r
 void handle_ld_nf(u32 insn, struct pt_regs *regs);
 
 /* init_64.c */
-extern atomic_t dcpage_flushes;
-extern atomic_t dcpage_flushes_xcall;
+extern atomic_unchecked_t dcpage_flushes;
+extern atomic_unchecked_t dcpage_flushes_xcall;
 
 extern int sysctl_tsb_ratio;
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/spinlock_64.h linux-4.0.9-pax/arch/sparc/include/asm/spinlock_64.h
--- linux-4.0.9/arch/sparc/include/asm/spinlock_64.h	2015-03-18 15:21:50.216349253 +0100
+++ linux-4.0.9-pax/arch/sparc/include/asm/spinlock_64.h	2015-04-15 12:13:52.886318623 +0200
@@ -92,14 +92,19 @@ static inline void arch_spin_lock_flags(
 
 /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */
 
-static void inline arch_read_lock(arch_rwlock_t *lock)
+static inline void arch_read_lock(arch_rwlock_t *lock)
 {
 	unsigned long tmp1, tmp2;
 
 	__asm__ __volatile__ (
 "1:	ldsw		[%2], %0\n"
 "	brlz,pn		%0, 2f\n"
-"4:	 add		%0, 1, %1\n"
+"4:	 addcc		%0, 1, %1\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+"	tvs		%%icc, 6\n"
+#endif
+
 "	cas		[%2], %0, %1\n"
 "	cmp		%0, %1\n"
 "	bne,pn		%%icc, 1b\n"
@@ -112,10 +117,10 @@ static void inline arch_read_lock(arch_r
 "	.previous"
 	: "=&r" (tmp1), "=&r" (tmp2)
 	: "r" (lock)
-	: "memory");
+	: "memory", "cc");
 }
 
-static int inline arch_read_trylock(arch_rwlock_t *lock)
+static inline int arch_read_trylock(arch_rwlock_t *lock)
 {
 	int tmp1, tmp2;
 
@@ -123,7 +128,12 @@ static int inline arch_read_trylock(arch
 "1:	ldsw		[%2], %0\n"
 "	brlz,a,pn	%0, 2f\n"
 "	 mov		0, %0\n"
-"	add		%0, 1, %1\n"
+"	addcc		%0, 1, %1\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+"	tvs		%%icc, 6\n"
+#endif
+
 "	cas		[%2], %0, %1\n"
 "	cmp		%0, %1\n"
 "	bne,pn		%%icc, 1b\n"
@@ -136,13 +146,18 @@ static int inline arch_read_trylock(arch
 	return tmp1;
 }
 
-static void inline arch_read_unlock(arch_rwlock_t *lock)
+static inline void arch_read_unlock(arch_rwlock_t *lock)
 {
 	unsigned long tmp1, tmp2;
 
 	__asm__ __volatile__(
 "1:	lduw	[%2], %0\n"
-"	sub	%0, 1, %1\n"
+"	subcc	%0, 1, %1\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+"	tvs	%%icc, 6\n"
+#endif
+
 "	cas	[%2], %0, %1\n"
 "	cmp	%0, %1\n"
 "	bne,pn	%%xcc, 1b\n"
@@ -152,7 +167,7 @@ static void inline arch_read_unlock(arch
 	: "memory");
 }
 
-static void inline arch_write_lock(arch_rwlock_t *lock)
+static inline void arch_write_lock(arch_rwlock_t *lock)
 {
 	unsigned long mask, tmp1, tmp2;
 
@@ -177,7 +192,7 @@ static void inline arch_write_lock(arch_
 	: "memory");
 }
 
-static void inline arch_write_unlock(arch_rwlock_t *lock)
+static inline void arch_write_unlock(arch_rwlock_t *lock)
 {
 	__asm__ __volatile__(
 "	stw		%%g0, [%0]"
@@ -186,7 +201,7 @@ static void inline arch_write_unlock(arc
 	: "memory");
 }
 
-static int inline arch_write_trylock(arch_rwlock_t *lock)
+static inline int arch_write_trylock(arch_rwlock_t *lock)
 {
 	unsigned long mask, tmp1, tmp2, result;
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/thread_info_32.h linux-4.0.9-pax/arch/sparc/include/asm/thread_info_32.h
--- linux-4.0.9/arch/sparc/include/asm/thread_info_32.h	2015-04-13 11:21:01.310617490 +0200
+++ linux-4.0.9-pax/arch/sparc/include/asm/thread_info_32.h	2015-04-15 12:13:52.886318623 +0200
@@ -47,6 +47,7 @@ struct thread_info {
 	struct reg_window32	reg_window[NSWINS];	/* align for ldd! */
 	unsigned long		rwbuf_stkptrs[NSWINS];
 	unsigned long		w_saved;
+	unsigned long		lowest_stack;
 };
 
 /*
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/thread_info_64.h linux-4.0.9-pax/arch/sparc/include/asm/thread_info_64.h
--- linux-4.0.9/arch/sparc/include/asm/thread_info_64.h	2015-04-13 11:21:01.310617490 +0200
+++ linux-4.0.9-pax/arch/sparc/include/asm/thread_info_64.h	2015-04-15 12:13:52.886318623 +0200
@@ -61,6 +61,8 @@ struct thread_info {
 	struct pt_regs		*kern_una_regs;
 	unsigned int		kern_una_insn;
 
+	unsigned long		lowest_stack;
+
 	unsigned long		fpregs[(7 * 256) / sizeof(unsigned long)]
 		__attribute__ ((aligned(64)));
 };
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/uaccess_32.h linux-4.0.9-pax/arch/sparc/include/asm/uaccess_32.h
--- linux-4.0.9/arch/sparc/include/asm/uaccess_32.h	2015-04-13 11:21:01.310617490 +0200
+++ linux-4.0.9-pax/arch/sparc/include/asm/uaccess_32.h	2015-04-15 12:13:52.886318623 +0200
@@ -47,6 +47,7 @@
 #define __user_ok(addr, size) ({ (void)(size); (addr) < STACK_TOP; })
 #define __kernel_ok (segment_eq(get_fs(), KERNEL_DS))
 #define __access_ok(addr, size) (__user_ok((addr) & get_fs().seg, (size)))
+#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size))
 #define access_ok(type, addr, size) \
 	({ (void)(type); __access_ok((unsigned long)(addr), size); })
 
@@ -313,27 +314,46 @@ unsigned long __copy_user(void __user *t
 
 static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	if (n && __access_ok((unsigned long) to, n))
+	if ((long)n < 0)
+		return n;
+
+	if (n && __access_ok((unsigned long) to, n)) {
+		if (!__builtin_constant_p(n))
+			check_object_size(from, n, true);
 		return __copy_user(to, (__force void __user *) from, n);
-	else
+	} else
 		return n;
 }
 
 static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
+	if ((long)n < 0)
+		return n;
+
+	if (!__builtin_constant_p(n))
+		check_object_size(from, n, true);
+
 	return __copy_user(to, (__force void __user *) from, n);
 }
 
 static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	if (n && __access_ok((unsigned long) from, n))
+	if ((long)n < 0)
+		return n;
+
+	if (n && __access_ok((unsigned long) from, n)) {
+		if (!__builtin_constant_p(n))
+			check_object_size(to, n, false);
 		return __copy_user((__force void __user *) to, from, n);
-	else
+	} else
 		return n;
 }
 
 static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
+	if ((long)n < 0)
+		return n;
+
 	return __copy_user((__force void __user *) to, from, n);
 }
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/uaccess_64.h linux-4.0.9-pax/arch/sparc/include/asm/uaccess_64.h
--- linux-4.0.9/arch/sparc/include/asm/uaccess_64.h	2015-04-13 11:21:01.314617490 +0200
+++ linux-4.0.9-pax/arch/sparc/include/asm/uaccess_64.h	2015-04-15 12:13:52.886318623 +0200
@@ -10,6 +10,7 @@
 #include <linux/compiler.h>
 #include <linux/string.h>
 #include <linux/thread_info.h>
+#include <linux/kernel.h>
 #include <asm/asi.h>
 #include <asm/spitfire.h>
 #include <asm-generic/uaccess-unaligned.h>
@@ -54,6 +55,11 @@ static inline int __access_ok(const void
 	return 1;
 }
 
+static inline int access_ok_noprefault(int type, const void __user * addr, unsigned long size)
+{
+	return 1;
+}
+
 static inline int access_ok(int type, const void __user * addr, unsigned long size)
 {
 	return 1;
@@ -228,8 +234,15 @@ unsigned long copy_from_user_fixup(void
 static inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long size)
 {
-	unsigned long ret = ___copy_from_user(to, from, size);
+	unsigned long ret;
 
+	if ((long)size < 0 || size > INT_MAX)
+		return size;
+
+	if (!__builtin_constant_p(size))
+		check_object_size(to, size, false);
+
+	ret = ___copy_from_user(to, from, size);
 	if (unlikely(ret))
 		ret = copy_from_user_fixup(to, from, size);
 
@@ -245,8 +258,15 @@ unsigned long copy_to_user_fixup(void __
 static inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long size)
 {
-	unsigned long ret = ___copy_to_user(to, from, size);
+	unsigned long ret;
+
+	if ((long)size < 0 || size > INT_MAX)
+		return size;
+
+	if (!__builtin_constant_p(size))
+		check_object_size(from, size, true);
 
+	ret = ___copy_to_user(to, from, size);
 	if (unlikely(ret))
 		ret = copy_to_user_fixup(to, from, size);
 	return ret;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/include/asm/uaccess.h linux-4.0.9-pax/arch/sparc/include/asm/uaccess.h
--- linux-4.0.9/arch/sparc/include/asm/uaccess.h	2015-03-18 15:21:50.216349253 +0100
+++ linux-4.0.9-pax/arch/sparc/include/asm/uaccess.h	2015-04-15 12:13:52.886318623 +0200
@@ -1,5 +1,6 @@
 #ifndef ___ASM_SPARC_UACCESS_H
 #define ___ASM_SPARC_UACCESS_H
+
 #if defined(__sparc__) && defined(__arch64__)
 #include <asm/uaccess_64.h>
 #else
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/kernel/Makefile linux-4.0.9-pax/arch/sparc/kernel/Makefile
--- linux-4.0.9/arch/sparc/kernel/Makefile	2015-03-18 15:21:50.216349253 +0100
+++ linux-4.0.9-pax/arch/sparc/kernel/Makefile	2015-04-15 12:13:52.886318623 +0200
@@ -4,7 +4,7 @@
 #
 
 asflags-y := -ansi
-ccflags-y := -Werror
+#ccflags-y := -Werror
 
 extra-y     := head_$(BITS).o
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/kernel/prom_common.c linux-4.0.9-pax/arch/sparc/kernel/prom_common.c
--- linux-4.0.9/arch/sparc/kernel/prom_common.c	2015-03-18 15:21:50.216349253 +0100
+++ linux-4.0.9-pax/arch/sparc/kernel/prom_common.c	2015-04-15 12:13:52.886318623 +0200
@@ -144,7 +144,7 @@ static int __init prom_common_nextprop(p
 
 unsigned int prom_early_allocated __initdata;
 
-static struct of_pdt_ops prom_sparc_ops __initdata = {
+static struct of_pdt_ops prom_sparc_ops __initconst = {
 	.nextprop = prom_common_nextprop,
 	.getproplen = prom_getproplen,
 	.getproperty = prom_getproperty,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/kernel/smp_64.c linux-4.0.9-pax/arch/sparc/kernel/smp_64.c
--- linux-4.0.9/arch/sparc/kernel/smp_64.c	2015-04-13 11:21:01.378617486 +0200
+++ linux-4.0.9-pax/arch/sparc/kernel/smp_64.c	2015-04-15 12:13:52.886318623 +0200
@@ -887,7 +887,7 @@ void smp_flush_dcache_page_impl(struct p
 		return;
 
 #ifdef CONFIG_DEBUG_DCFLUSH
-	atomic_inc(&dcpage_flushes);
+	atomic_inc_unchecked(&dcpage_flushes);
 #endif
 
 	this_cpu = get_cpu();
@@ -911,7 +911,7 @@ void smp_flush_dcache_page_impl(struct p
 			xcall_deliver(data0, __pa(pg_addr),
 				      (u64) pg_addr, cpumask_of(cpu));
 #ifdef CONFIG_DEBUG_DCFLUSH
-			atomic_inc(&dcpage_flushes_xcall);
+			atomic_inc_unchecked(&dcpage_flushes_xcall);
 #endif
 		}
 	}
@@ -930,7 +930,7 @@ void flush_dcache_page_all(struct mm_str
 	preempt_disable();
 
 #ifdef CONFIG_DEBUG_DCFLUSH
-	atomic_inc(&dcpage_flushes);
+	atomic_inc_unchecked(&dcpage_flushes);
 #endif
 	data0 = 0;
 	pg_addr = page_address(page);
@@ -947,7 +947,7 @@ void flush_dcache_page_all(struct mm_str
 		xcall_deliver(data0, __pa(pg_addr),
 			      (u64) pg_addr, cpu_online_mask);
 #ifdef CONFIG_DEBUG_DCFLUSH
-		atomic_inc(&dcpage_flushes_xcall);
+		atomic_inc_unchecked(&dcpage_flushes_xcall);
 #endif
 	}
 	__local_flush_dcache_page(page);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/kernel/sys_sparc_32.c linux-4.0.9-pax/arch/sparc/kernel/sys_sparc_32.c
--- linux-4.0.9/arch/sparc/kernel/sys_sparc_32.c	2015-03-18 15:21:50.216349253 +0100
+++ linux-4.0.9-pax/arch/sparc/kernel/sys_sparc_32.c	2015-04-15 12:13:52.886318623 +0200
@@ -54,7 +54,7 @@ unsigned long arch_get_unmapped_area(str
 	if (len > TASK_SIZE - PAGE_SIZE)
 		return -ENOMEM;
 	if (!addr)
-		addr = TASK_UNMAPPED_BASE;
+		addr = current->mm->mmap_base;
 
 	info.flags = 0;
 	info.length = len;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/kernel/sys_sparc_64.c linux-4.0.9-pax/arch/sparc/kernel/sys_sparc_64.c
--- linux-4.0.9/arch/sparc/kernel/sys_sparc_64.c	2015-04-13 11:21:01.386617486 +0200
+++ linux-4.0.9-pax/arch/sparc/kernel/sys_sparc_64.c	2015-04-15 12:13:52.886318623 +0200
@@ -95,7 +95,7 @@ unsigned long arch_get_unmapped_area(str
 		/* We do not accept a shared mapping if it would violate
 		 * cache aliasing constraints.
 		 */
-		if ((flags & MAP_SHARED) &&
+		if ((filp || (flags & MAP_SHARED)) &&
 		    ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)))
 			return -EINVAL;
 		return addr;
@@ -110,6 +110,10 @@ unsigned long arch_get_unmapped_area(str
 	if (filp || (flags & MAP_SHARED))
 		do_color_align = 1;
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	if (addr) {
 		if (do_color_align)
 			addr = COLOR_ALIGN(addr, pgoff);
@@ -117,14 +121,13 @@ unsigned long arch_get_unmapped_area(str
 			addr = PAGE_ALIGN(addr);
 
 		vma = find_vma(mm, addr);
-		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		if (task_size - len >= addr && check_heap_stack_gap(vma, addr, len))
 			return addr;
 	}
 
 	info.flags = 0;
 	info.length = len;
-	info.low_limit = TASK_UNMAPPED_BASE;
+	info.low_limit = mm->mmap_base;
 	info.high_limit = min(task_size, VA_EXCLUDE_START);
 	info.align_mask = do_color_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
@@ -133,6 +136,12 @@ unsigned long arch_get_unmapped_area(str
 	if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
 		VM_BUG_ON(addr != -ENOMEM);
 		info.low_limit = VA_EXCLUDE_END;
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			info.low_limit += mm->delta_mmap;
+#endif
+
 		info.high_limit = task_size;
 		addr = vm_unmapped_area(&info);
 	}
@@ -159,7 +168,7 @@ arch_get_unmapped_area_topdown(struct fi
 		/* We do not accept a shared mapping if it would violate
 		 * cache aliasing constraints.
 		 */
-		if ((flags & MAP_SHARED) &&
+		if ((filp || (flags & MAP_SHARED)) &&
 		    ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)))
 			return -EINVAL;
 		return addr;
@@ -172,6 +181,10 @@ arch_get_unmapped_area_topdown(struct fi
 	if (filp || (flags & MAP_SHARED))
 		do_color_align = 1;
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	/* requesting a specific address */
 	if (addr) {
 		if (do_color_align)
@@ -180,8 +193,7 @@ arch_get_unmapped_area_topdown(struct fi
 			addr = PAGE_ALIGN(addr);
 
 		vma = find_vma(mm, addr);
-		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		if (task_size - len >= addr && check_heap_stack_gap(vma, addr, len))
 			return addr;
 	}
 
@@ -203,6 +215,12 @@ arch_get_unmapped_area_topdown(struct fi
 		VM_BUG_ON(addr != -ENOMEM);
 		info.flags = 0;
 		info.low_limit = TASK_UNMAPPED_BASE;
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			info.low_limit += mm->delta_mmap;
+#endif
+
 		info.high_limit = STACK_TOP32;
 		addr = vm_unmapped_area(&info);
 	}
@@ -259,10 +277,14 @@ unsigned long get_fb_unmapped_area(struc
 EXPORT_SYMBOL(get_fb_unmapped_area);
 
 /* Essentially the same as PowerPC.  */
-static unsigned long mmap_rnd(void)
+static unsigned long mmap_rnd(struct mm_struct *mm)
 {
 	unsigned long rnd = 0UL;
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	if (current->flags & PF_RANDOMIZE) {
 		unsigned long val = get_random_int();
 		if (test_thread_flag(TIF_32BIT))
@@ -275,7 +297,7 @@ static unsigned long mmap_rnd(void)
 
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
-	unsigned long random_factor = mmap_rnd();
+	unsigned long random_factor = mmap_rnd(mm);
 	unsigned long gap;
 
 	/*
@@ -288,6 +310,12 @@ void arch_pick_mmap_layout(struct mm_str
 	    gap == RLIM_INFINITY ||
 	    sysctl_legacy_va_layout) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			mm->mmap_base += mm->delta_mmap;
+#endif
+
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
 		/* We know it's 32-bit */
@@ -299,6 +327,12 @@ void arch_pick_mmap_layout(struct mm_str
 			gap = (task_size / 6 * 5);
 
 		mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor);
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			mm->mmap_base -= mm->delta_mmap + mm->delta_stack;
+#endif
+
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/kernel/traps_64.c linux-4.0.9-pax/arch/sparc/kernel/traps_64.c
--- linux-4.0.9/arch/sparc/kernel/traps_64.c	2015-04-13 11:21:01.386617486 +0200
+++ linux-4.0.9-pax/arch/sparc/kernel/traps_64.c	2015-04-15 12:13:52.886318623 +0200
@@ -99,6 +99,12 @@ void bad_trap(struct pt_regs *regs, long
 
 	lvl -= 0x100;
 	if (regs->tstate & TSTATE_PRIV) {
+
+#ifdef CONFIG_PAX_REFCOUNT
+		if (lvl == 6)
+			pax_report_refcount_overflow(regs);
+#endif
+
 		sprintf(buffer, "Kernel bad sw trap %lx", lvl);
 		die_if_kernel(buffer, regs);
 	}
@@ -117,11 +123,16 @@ void bad_trap(struct pt_regs *regs, long
 void bad_trap_tl1(struct pt_regs *regs, long lvl)
 {
 	char buffer[32];
-	
+
 	if (notify_die(DIE_TRAP_TL1, "bad trap tl1", regs,
 		       0, lvl, SIGTRAP) == NOTIFY_STOP)
 		return;
 
+#ifdef CONFIG_PAX_REFCOUNT
+	if (lvl == 6)
+		pax_report_refcount_overflow(regs);
+#endif
+
 	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
 
 	sprintf (buffer, "Bad trap %lx at tl>0", lvl);
@@ -1839,8 +1850,8 @@ struct sun4v_error_entry {
 /*0x38*/u64		reserved_5;
 };
 
-static atomic_t sun4v_resum_oflow_cnt = ATOMIC_INIT(0);
-static atomic_t sun4v_nonresum_oflow_cnt = ATOMIC_INIT(0);
+static atomic_unchecked_t sun4v_resum_oflow_cnt = ATOMIC_INIT(0);
+static atomic_unchecked_t sun4v_nonresum_oflow_cnt = ATOMIC_INIT(0);
 
 static const char *sun4v_err_type_to_str(u8 type)
 {
@@ -1932,7 +1943,7 @@ static void sun4v_report_real_raddr(cons
 }
 
 static void sun4v_log_error(struct pt_regs *regs, struct sun4v_error_entry *ent,
-			    int cpu, const char *pfx, atomic_t *ocnt)
+			    int cpu, const char *pfx, atomic_unchecked_t *ocnt)
 {
 	u64 *raw_ptr = (u64 *) ent;
 	u32 attrs;
@@ -1990,8 +2001,8 @@ static void sun4v_log_error(struct pt_re
 
 	show_regs(regs);
 
-	if ((cnt = atomic_read(ocnt)) != 0) {
-		atomic_set(ocnt, 0);
+	if ((cnt = atomic_read_unchecked(ocnt)) != 0) {
+		atomic_set_unchecked(ocnt, 0);
 		wmb();
 		printk("%s: Queue overflowed %d times.\n",
 		       pfx, cnt);
@@ -2048,7 +2059,7 @@ out:
  */
 void sun4v_resum_overflow(struct pt_regs *regs)
 {
-	atomic_inc(&sun4v_resum_oflow_cnt);
+	atomic_inc_unchecked(&sun4v_resum_oflow_cnt);
 }
 
 /* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate.
@@ -2101,7 +2112,7 @@ void sun4v_nonresum_overflow(struct pt_r
 	/* XXX Actually even this can make not that much sense.  Perhaps
 	 * XXX we should just pull the plug and panic directly from here?
 	 */
-	atomic_inc(&sun4v_nonresum_oflow_cnt);
+	atomic_inc_unchecked(&sun4v_nonresum_oflow_cnt);
 }
 
 static void sun4v_tlb_error(struct pt_regs *regs)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/lib/atomic_64.S linux-4.0.9-pax/arch/sparc/lib/atomic_64.S
--- linux-4.0.9/arch/sparc/lib/atomic_64.S	2015-03-18 15:21:50.220349253 +0100
+++ linux-4.0.9-pax/arch/sparc/lib/atomic_64.S	2015-04-15 12:13:52.886318623 +0200
@@ -15,11 +15,22 @@
 	 * a value and does the barriers.
 	 */
 
-#define ATOMIC_OP(op)							\
-ENTRY(atomic_##op) /* %o0 = increment, %o1 = atomic_ptr */		\
+#ifdef CONFIG_PAX_REFCOUNT
+#define __REFCOUNT_OP(op) op##cc
+#define __OVERFLOW_IOP tvs	%icc, 6;
+#define __OVERFLOW_XOP tvs	%xcc, 6;
+#else
+#define __REFCOUNT_OP(op) op
+#define __OVERFLOW_IOP
+#define __OVERFLOW_XOP
+#endif
+
+#define __ATOMIC_OP(op, suffix, asm_op, post_op)			\
+ENTRY(atomic_##op##suffix) /* %o0 = increment, %o1 = atomic_ptr */	\
 	BACKOFF_SETUP(%o2);						\
 1:	lduw	[%o1], %g1;						\
-	op	%g1, %o0, %g7;						\
+	asm_op	%g1, %o0, %g7;						\
+	post_op								\
 	cas	[%o1], %g1, %g7;					\
 	cmp	%g1, %g7;						\
 	bne,pn	%icc, BACKOFF_LABEL(2f, 1b);				\
@@ -29,11 +40,15 @@ ENTRY(atomic_##op) /* %o0 = increment, %
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
 ENDPROC(atomic_##op);							\
 
-#define ATOMIC_OP_RETURN(op)						\
-ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
+#define ATOMIC_OP(op) __ATOMIC_OP(op, , op, ) \
+		      __ATOMIC_OP(op, _unchecked, __REFCOUNT_OP(op), __OVERFLOW_IOP)
+
+#define __ATOMIC_OP_RETURN(op, suffix, asm_op, post_op)			\
+ENTRY(atomic_##op##_return##suffix) /* %o0 = increment, %o1 = atomic_ptr */\
 	BACKOFF_SETUP(%o2);						\
 1:	lduw	[%o1], %g1;						\
-	op	%g1, %o0, %g7;						\
+	asm_op	%g1, %o0, %g7;						\
+	post_op								\
 	cas	[%o1], %g1, %g7;					\
 	cmp	%g1, %g7;						\
 	bne,pn	%icc, BACKOFF_LABEL(2f, 1b);				\
@@ -43,6 +58,9 @@ ENTRY(atomic_##op##_return) /* %o0 = inc
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
 ENDPROC(atomic_##op##_return);
 
+#define ATOMIC_OP_RETURN(op) __ATOMIC_OP_RETURN(op, , op, ) \
+			     __ATOMIC_OP_RETURN(op, _unchecked, __REFCOUNT_OP(op), __OVERFLOW_IOP)
+
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
 
 ATOMIC_OPS(add)
@@ -50,13 +68,16 @@ ATOMIC_OPS(sub)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
+#undef __ATOMIC_OP_RETURN
 #undef ATOMIC_OP
+#undef __ATOMIC_OP
 
-#define ATOMIC64_OP(op)							\
-ENTRY(atomic64_##op) /* %o0 = increment, %o1 = atomic_ptr */		\
+#define __ATOMIC64_OP(op, suffix, asm_op, post_op)			\
+ENTRY(atomic64_##op##suffix) /* %o0 = increment, %o1 = atomic_ptr */	\
 	BACKOFF_SETUP(%o2);						\
 1:	ldx	[%o1], %g1;						\
-	op	%g1, %o0, %g7;						\
+	asm_op	%g1, %o0, %g7;						\
+	post_op								\
 	casx	[%o1], %g1, %g7;					\
 	cmp	%g1, %g7;						\
 	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b);				\
@@ -66,11 +87,15 @@ ENTRY(atomic64_##op) /* %o0 = increment,
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
 ENDPROC(atomic64_##op);							\
 
-#define ATOMIC64_OP_RETURN(op)						\
-ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
+#define ATOMIC64_OP(op) __ATOMIC64_OP(op, , op, ) \
+			__ATOMIC64_OP(op, _unchecked, __REFCOUNT_OP(op), __OVERFLOW_XOP)
+
+#define __ATOMIC64_OP_RETURN(op, suffix, asm_op, post_op)		\
+ENTRY(atomic64_##op##_return##suffix) /* %o0 = increment, %o1 = atomic_ptr */\
 	BACKOFF_SETUP(%o2);						\
 1:	ldx	[%o1], %g1;						\
-	op	%g1, %o0, %g7;						\
+	asm_op	%g1, %o0, %g7;						\
+	post_op								\
 	casx	[%o1], %g1, %g7;					\
 	cmp	%g1, %g7;						\
 	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b);				\
@@ -80,6 +105,9 @@ ENTRY(atomic64_##op##_return) /* %o0 = i
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
 ENDPROC(atomic64_##op##_return);
 
+#define ATOMIC64_OP_RETURN(op) __ATOMIC64_OP_RETURN(op, , op, ) \
+i			       __ATOMIC64_OP_RETURN(op, _unchecked, __REFCOUNT_OP(op), __OVERFLOW_XOP)
+
 #define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op)
 
 ATOMIC64_OPS(add)
@@ -87,7 +115,12 @@ ATOMIC64_OPS(sub)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
+#undef __ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
+#undef __ATOMIC64_OP
+#undef __OVERFLOW_XOP
+#undef __OVERFLOW_IOP
+#undef __REFCOUNT_OP
 
 ENTRY(atomic64_dec_if_positive) /* %o0 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/lib/ksyms.c linux-4.0.9-pax/arch/sparc/lib/ksyms.c
--- linux-4.0.9/arch/sparc/lib/ksyms.c	2015-03-18 15:21:50.220349253 +0100
+++ linux-4.0.9-pax/arch/sparc/lib/ksyms.c	2015-04-15 12:13:52.886318623 +0200
@@ -101,7 +101,9 @@ EXPORT_SYMBOL(__clear_user);
 /* Atomic counter implementation. */
 #define ATOMIC_OP(op)							\
 EXPORT_SYMBOL(atomic_##op);						\
-EXPORT_SYMBOL(atomic64_##op);
+EXPORT_SYMBOL(atomic_##op##_unchecked);					\
+EXPORT_SYMBOL(atomic64_##op);						\
+EXPORT_SYMBOL(atomic64_##op##_unchecked);
 
 #define ATOMIC_OP_RETURN(op)						\
 EXPORT_SYMBOL(atomic_##op##_return);					\
@@ -110,6 +112,8 @@ EXPORT_SYMBOL(atomic64_##op##_return);
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
 
 ATOMIC_OPS(add)
+EXPORT_SYMBOL(atomic_add_ret_unchecked);
+EXPORT_SYMBOL(atomic64_add_ret_unchecked);
 ATOMIC_OPS(sub)
 
 #undef ATOMIC_OPS
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/lib/Makefile linux-4.0.9-pax/arch/sparc/lib/Makefile
--- linux-4.0.9/arch/sparc/lib/Makefile	2015-03-18 15:21:50.220349253 +0100
+++ linux-4.0.9-pax/arch/sparc/lib/Makefile	2015-04-15 12:13:52.886318623 +0200
@@ -2,7 +2,7 @@
 #
 
 asflags-y := -ansi -DST_DIV0=0x02
-ccflags-y := -Werror
+#ccflags-y := -Werror
 
 lib-$(CONFIG_SPARC32) += ashrdi3.o
 lib-$(CONFIG_SPARC32) += memcpy.o memset.o
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/mm/fault_32.c linux-4.0.9-pax/arch/sparc/mm/fault_32.c
--- linux-4.0.9/arch/sparc/mm/fault_32.c	2015-03-18 15:21:50.220349253 +0100
+++ linux-4.0.9-pax/arch/sparc/mm/fault_32.c	2015-04-15 12:13:52.886318623 +0200
@@ -21,6 +21,9 @@
 #include <linux/perf_event.h>
 #include <linux/interrupt.h>
 #include <linux/kdebug.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/compiler.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -156,6 +159,277 @@ static unsigned long compute_si_addr(str
 	return safe_compute_effective_address(regs, insn);
 }
 
+#ifdef CONFIG_PAX_PAGEEXEC
+#ifdef CONFIG_PAX_DLRESOLVE
+static void pax_emuplt_close(struct vm_area_struct *vma)
+{
+	vma->vm_mm->call_dl_resolve = 0UL;
+}
+
+static int pax_emuplt_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	unsigned int *kaddr;
+
+	vmf->page = alloc_page(GFP_HIGHUSER);
+	if (!vmf->page)
+		return VM_FAULT_OOM;
+
+	kaddr = kmap(vmf->page);
+	memset(kaddr, 0, PAGE_SIZE);
+	kaddr[0] = 0x9DE3BFA8U; /* save */
+	flush_dcache_page(vmf->page);
+	kunmap(vmf->page);
+	return VM_FAULT_MAJOR;
+}
+
+static const struct vm_operations_struct pax_vm_ops = {
+	.close = pax_emuplt_close,
+	.fault = pax_emuplt_fault
+};
+
+static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr)
+{
+	int ret;
+
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
+	vma->vm_mm = current->mm;
+	vma->vm_start = addr;
+	vma->vm_end = addr + PAGE_SIZE;
+	vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC;
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	vma->vm_ops = &pax_vm_ops;
+
+	ret = insert_vm_struct(current->mm, vma);
+	if (ret)
+		return ret;
+
+	++current->mm->total_vm;
+	return 0;
+}
+#endif
+
+/*
+ * PaX: decide what to do with offenders (regs->pc = fault address)
+ *
+ * returns 1 when task should be killed
+ *         2 when patched PLT trampoline was detected
+ *         3 when unpatched PLT trampoline was detected
+ */
+static int pax_handle_fetch_fault(struct pt_regs *regs)
+{
+
+#ifdef CONFIG_PAX_EMUPLT
+	int err;
+
+	do { /* PaX: patched PLT emulation #1 */
+		unsigned int sethi1, sethi2, jmpl;
+
+		err = get_user(sethi1, (unsigned int *)regs->pc);
+		err |= get_user(sethi2, (unsigned int *)(regs->pc+4));
+		err |= get_user(jmpl, (unsigned int *)(regs->pc+8));
+
+		if (err)
+			break;
+
+		if ((sethi1 & 0xFFC00000U) == 0x03000000U &&
+		    (sethi2 & 0xFFC00000U) == 0x03000000U &&
+		    (jmpl & 0xFFFFE000U) == 0x81C06000U)
+		{
+			unsigned int addr;
+
+			regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10;
+			addr = regs->u_regs[UREG_G1];
+			addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U);
+			regs->pc = addr;
+			regs->npc = addr+4;
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: patched PLT emulation #2 */
+		unsigned int ba;
+
+		err = get_user(ba, (unsigned int *)regs->pc);
+
+		if (err)
+			break;
+
+		if ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30480000U) {
+			unsigned int addr;
+
+			if ((ba & 0xFFC00000U) == 0x30800000U)
+				addr = regs->pc + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2);
+			else
+				addr = regs->pc + ((((ba | 0xFFF80000U) ^ 0x00040000U) + 0x00040000U) << 2);
+			regs->pc = addr;
+			regs->npc = addr+4;
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: patched PLT emulation #3 */
+		unsigned int sethi, bajmpl, nop;
+
+		err = get_user(sethi, (unsigned int *)regs->pc);
+		err |= get_user(bajmpl, (unsigned int *)(regs->pc+4));
+		err |= get_user(nop, (unsigned int *)(regs->pc+8));
+
+		if (err)
+			break;
+
+		if ((sethi & 0xFFC00000U) == 0x03000000U &&
+		    ((bajmpl & 0xFFFFE000U) == 0x81C06000U || (bajmpl & 0xFFF80000U) == 0x30480000U) &&
+		    nop == 0x01000000U)
+		{
+			unsigned int addr;
+
+			addr = (sethi & 0x003FFFFFU) << 10;
+			regs->u_regs[UREG_G1] = addr;
+			if ((bajmpl & 0xFFFFE000U) == 0x81C06000U)
+				addr += (((bajmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U);
+			else
+				addr = regs->pc + ((((bajmpl | 0xFFF80000U) ^ 0x00040000U) + 0x00040000U) << 2);
+			regs->pc = addr;
+			regs->npc = addr+4;
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: unpatched PLT emulation step 1 */
+		unsigned int sethi, ba, nop;
+
+		err = get_user(sethi, (unsigned int *)regs->pc);
+		err |= get_user(ba, (unsigned int *)(regs->pc+4));
+		err |= get_user(nop, (unsigned int *)(regs->pc+8));
+
+		if (err)
+			break;
+
+		if ((sethi & 0xFFC00000U) == 0x03000000U &&
+		    ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) &&
+		    nop == 0x01000000U)
+		{
+			unsigned int addr, save, call;
+
+			if ((ba & 0xFFC00000U) == 0x30800000U)
+				addr = regs->pc + 4 + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2);
+			else
+				addr = regs->pc + 4 + ((((ba | 0xFFF80000U) ^ 0x00040000U) + 0x00040000U) << 2);
+
+			err = get_user(save, (unsigned int *)addr);
+			err |= get_user(call, (unsigned int *)(addr+4));
+			err |= get_user(nop, (unsigned int *)(addr+8));
+			if (err)
+				break;
+
+#ifdef CONFIG_PAX_DLRESOLVE
+			if (save == 0x9DE3BFA8U &&
+			    (call & 0xC0000000U) == 0x40000000U &&
+			    nop == 0x01000000U)
+			{
+				struct vm_area_struct *vma;
+				unsigned long call_dl_resolve;
+
+				down_read(&current->mm->mmap_sem);
+				call_dl_resolve = current->mm->call_dl_resolve;
+				up_read(&current->mm->mmap_sem);
+				if (likely(call_dl_resolve))
+					goto emulate;
+
+				vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+
+				down_write(&current->mm->mmap_sem);
+				if (current->mm->call_dl_resolve) {
+					call_dl_resolve = current->mm->call_dl_resolve;
+					up_write(&current->mm->mmap_sem);
+					if (vma)
+						kmem_cache_free(vm_area_cachep, vma);
+					goto emulate;
+				}
+
+				call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE);
+				if (!vma || (call_dl_resolve & ~PAGE_MASK)) {
+					up_write(&current->mm->mmap_sem);
+					if (vma)
+						kmem_cache_free(vm_area_cachep, vma);
+					return 1;
+				}
+
+				if (pax_insert_vma(vma, call_dl_resolve)) {
+					up_write(&current->mm->mmap_sem);
+					kmem_cache_free(vm_area_cachep, vma);
+					return 1;
+				}
+
+				current->mm->call_dl_resolve = call_dl_resolve;
+				up_write(&current->mm->mmap_sem);
+
+emulate:
+				regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10;
+				regs->pc = call_dl_resolve;
+				regs->npc = addr+4;
+				return 3;
+			}
+#endif
+
+			/* PaX: glibc 2.4+ generates sethi/jmpl instead of save/call */
+			if ((save & 0xFFC00000U) == 0x05000000U &&
+			    (call & 0xFFFFE000U) == 0x85C0A000U &&
+			    nop == 0x01000000U)
+			{
+				regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10;
+				regs->u_regs[UREG_G2] = addr + 4;
+				addr = (save & 0x003FFFFFU) << 10;
+				addr += (((call | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U);
+				regs->pc = addr;
+				regs->npc = addr+4;
+				return 3;
+			}
+		}
+	} while (0);
+
+	do { /* PaX: unpatched PLT emulation step 2 */
+		unsigned int save, call, nop;
+
+		err = get_user(save, (unsigned int *)(regs->pc-4));
+		err |= get_user(call, (unsigned int *)regs->pc);
+		err |= get_user(nop, (unsigned int *)(regs->pc+4));
+		if (err)
+			break;
+
+		if (save == 0x9DE3BFA8U &&
+		    (call & 0xC0000000U) == 0x40000000U &&
+		    nop == 0x01000000U)
+		{
+			unsigned int dl_resolve = regs->pc + ((((call | 0xC0000000U) ^ 0x20000000U) + 0x20000000U) << 2);
+
+			regs->u_regs[UREG_RETPC] = regs->pc;
+			regs->pc = dl_resolve;
+			regs->npc = dl_resolve+4;
+			return 3;
+		}
+	} while (0);
+#endif
+
+	return 1;
+}
+
+void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
+{
+	unsigned long i;
+
+	printk(KERN_ERR "PAX: bytes at PC: ");
+	for (i = 0; i < 8; i++) {
+		unsigned int c;
+		if (get_user(c, (unsigned int *)pc+i))
+			printk(KERN_CONT "???????? ");
+		else
+			printk(KERN_CONT "%08x ", c);
+	}
+	printk("\n");
+}
+#endif
+
 static noinline void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
 				      int text_fault)
 {
@@ -226,6 +500,24 @@ good_area:
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
 	} else {
+
+#ifdef CONFIG_PAX_PAGEEXEC
+		if ((mm->pax_flags & MF_PAX_PAGEEXEC) && text_fault && !(vma->vm_flags & VM_EXEC)) {
+			up_read(&mm->mmap_sem);
+			switch (pax_handle_fetch_fault(regs)) {
+
+#ifdef CONFIG_PAX_EMUPLT
+			case 2:
+			case 3:
+				return;
+#endif
+
+			}
+			pax_report_fault(regs, (void *)regs->pc, (void *)regs->u_regs[UREG_FP]);
+			do_group_exit(SIGKILL);
+		}
+#endif
+
 		/* Allow reads even for write-only mappings */
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/mm/fault_64.c linux-4.0.9-pax/arch/sparc/mm/fault_64.c
--- linux-4.0.9/arch/sparc/mm/fault_64.c	2015-03-18 15:21:50.220349253 +0100
+++ linux-4.0.9-pax/arch/sparc/mm/fault_64.c	2015-04-15 12:13:52.886318623 +0200
@@ -22,6 +22,9 @@
 #include <linux/kdebug.h>
 #include <linux/percpu.h>
 #include <linux/context_tracking.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/compiler.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -279,6 +282,466 @@ static void noinline __kprobes bogus_32b
 	show_regs(regs);
 }
 
+#ifdef CONFIG_PAX_PAGEEXEC
+#ifdef CONFIG_PAX_DLRESOLVE
+static void pax_emuplt_close(struct vm_area_struct *vma)
+{
+	vma->vm_mm->call_dl_resolve = 0UL;
+}
+
+static int pax_emuplt_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	unsigned int *kaddr;
+
+	vmf->page = alloc_page(GFP_HIGHUSER);
+	if (!vmf->page)
+		return VM_FAULT_OOM;
+
+	kaddr = kmap(vmf->page);
+	memset(kaddr, 0, PAGE_SIZE);
+	kaddr[0] = 0x9DE3BFA8U; /* save */
+	flush_dcache_page(vmf->page);
+	kunmap(vmf->page);
+	return VM_FAULT_MAJOR;
+}
+
+static const struct vm_operations_struct pax_vm_ops = {
+	.close = pax_emuplt_close,
+	.fault = pax_emuplt_fault
+};
+
+static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr)
+{
+	int ret;
+
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
+	vma->vm_mm = current->mm;
+	vma->vm_start = addr;
+	vma->vm_end = addr + PAGE_SIZE;
+	vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC;
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	vma->vm_ops = &pax_vm_ops;
+
+	ret = insert_vm_struct(current->mm, vma);
+	if (ret)
+		return ret;
+
+	++current->mm->total_vm;
+	return 0;
+}
+#endif
+
+/*
+ * PaX: decide what to do with offenders (regs->tpc = fault address)
+ *
+ * returns 1 when task should be killed
+ *         2 when patched PLT trampoline was detected
+ *         3 when unpatched PLT trampoline was detected
+ */
+static int pax_handle_fetch_fault(struct pt_regs *regs)
+{
+
+#ifdef CONFIG_PAX_EMUPLT
+	int err;
+
+	do { /* PaX: patched PLT emulation #1 */
+		unsigned int sethi1, sethi2, jmpl;
+
+		err = get_user(sethi1, (unsigned int *)regs->tpc);
+		err |= get_user(sethi2, (unsigned int *)(regs->tpc+4));
+		err |= get_user(jmpl, (unsigned int *)(regs->tpc+8));
+
+		if (err)
+			break;
+
+		if ((sethi1 & 0xFFC00000U) == 0x03000000U &&
+		    (sethi2 & 0xFFC00000U) == 0x03000000U &&
+		    (jmpl & 0xFFFFE000U) == 0x81C06000U)
+		{
+			unsigned long addr;
+
+			regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10;
+			addr = regs->u_regs[UREG_G1];
+			addr += (((jmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL);
+
+			if (test_thread_flag(TIF_32BIT))
+				addr &= 0xFFFFFFFFUL;
+
+			regs->tpc = addr;
+			regs->tnpc = addr+4;
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: patched PLT emulation #2 */
+		unsigned int ba;
+
+		err = get_user(ba, (unsigned int *)regs->tpc);
+
+		if (err)
+			break;
+
+		if ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30480000U) {
+			unsigned long addr;
+
+			if ((ba & 0xFFC00000U) == 0x30800000U)
+				addr = regs->tpc + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2);
+			else
+				addr = regs->tpc + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2);
+
+			if (test_thread_flag(TIF_32BIT))
+				addr &= 0xFFFFFFFFUL;
+
+			regs->tpc = addr;
+			regs->tnpc = addr+4;
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: patched PLT emulation #3 */
+		unsigned int sethi, bajmpl, nop;
+
+		err = get_user(sethi, (unsigned int *)regs->tpc);
+		err |= get_user(bajmpl, (unsigned int *)(regs->tpc+4));
+		err |= get_user(nop, (unsigned int *)(regs->tpc+8));
+
+		if (err)
+			break;
+
+		if ((sethi & 0xFFC00000U) == 0x03000000U &&
+		    ((bajmpl & 0xFFFFE000U) == 0x81C06000U || (bajmpl & 0xFFF80000U) == 0x30480000U) &&
+		    nop == 0x01000000U)
+		{
+			unsigned long addr;
+
+			addr = (sethi & 0x003FFFFFU) << 10;
+			regs->u_regs[UREG_G1] = addr;
+			if ((bajmpl & 0xFFFFE000U) == 0x81C06000U)
+				addr += (((bajmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL);
+			else
+				addr = regs->tpc + ((((bajmpl | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2);
+
+			if (test_thread_flag(TIF_32BIT))
+				addr &= 0xFFFFFFFFUL;
+
+			regs->tpc = addr;
+			regs->tnpc = addr+4;
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: patched PLT emulation #4 */
+		unsigned int sethi, mov1, call, mov2;
+
+		err = get_user(sethi, (unsigned int *)regs->tpc);
+		err |= get_user(mov1, (unsigned int *)(regs->tpc+4));
+		err |= get_user(call, (unsigned int *)(regs->tpc+8));
+		err |= get_user(mov2, (unsigned int *)(regs->tpc+12));
+
+		if (err)
+			break;
+
+		if ((sethi & 0xFFC00000U) == 0x03000000U &&
+		    mov1 == 0x8210000FU &&
+		    (call & 0xC0000000U) == 0x40000000U &&
+		    mov2 == 0x9E100001U)
+		{
+			unsigned long addr;
+
+			regs->u_regs[UREG_G1] = regs->u_regs[UREG_RETPC];
+			addr = regs->tpc + 4 + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2);
+
+			if (test_thread_flag(TIF_32BIT))
+				addr &= 0xFFFFFFFFUL;
+
+			regs->tpc = addr;
+			regs->tnpc = addr+4;
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: patched PLT emulation #5 */
+		unsigned int sethi, sethi1, sethi2, or1, or2, sllx, jmpl, nop;
+
+		err = get_user(sethi, (unsigned int *)regs->tpc);
+		err |= get_user(sethi1, (unsigned int *)(regs->tpc+4));
+		err |= get_user(sethi2, (unsigned int *)(regs->tpc+8));
+		err |= get_user(or1, (unsigned int *)(regs->tpc+12));
+		err |= get_user(or2, (unsigned int *)(regs->tpc+16));
+		err |= get_user(sllx, (unsigned int *)(regs->tpc+20));
+		err |= get_user(jmpl, (unsigned int *)(regs->tpc+24));
+		err |= get_user(nop, (unsigned int *)(regs->tpc+28));
+
+		if (err)
+			break;
+
+		if ((sethi & 0xFFC00000U) == 0x03000000U &&
+		    (sethi1 & 0xFFC00000U) == 0x03000000U &&
+		    (sethi2 & 0xFFC00000U) == 0x0B000000U &&
+		    (or1 & 0xFFFFE000U) == 0x82106000U &&
+		    (or2 & 0xFFFFE000U) == 0x8A116000U &&
+		    sllx == 0x83287020U &&
+		    jmpl == 0x81C04005U &&
+		    nop == 0x01000000U)
+		{
+			unsigned long addr;
+
+			regs->u_regs[UREG_G1] = ((sethi1 & 0x003FFFFFU) << 10) | (or1 & 0x000003FFU);
+			regs->u_regs[UREG_G1] <<= 32;
+			regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or2 & 0x000003FFU);
+			addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5];
+			regs->tpc = addr;
+			regs->tnpc = addr+4;
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: patched PLT emulation #6 */
+		unsigned int sethi, sethi1, sethi2, sllx, or,  jmpl, nop;
+
+		err = get_user(sethi, (unsigned int *)regs->tpc);
+		err |= get_user(sethi1, (unsigned int *)(regs->tpc+4));
+		err |= get_user(sethi2, (unsigned int *)(regs->tpc+8));
+		err |= get_user(sllx, (unsigned int *)(regs->tpc+12));
+		err |= get_user(or, (unsigned int *)(regs->tpc+16));
+		err |= get_user(jmpl, (unsigned int *)(regs->tpc+20));
+		err |= get_user(nop, (unsigned int *)(regs->tpc+24));
+
+		if (err)
+			break;
+
+		if ((sethi & 0xFFC00000U) == 0x03000000U &&
+		    (sethi1 & 0xFFC00000U) == 0x03000000U &&
+		    (sethi2 & 0xFFC00000U) == 0x0B000000U &&
+		    sllx == 0x83287020U &&
+		    (or & 0xFFFFE000U) == 0x8A116000U &&
+		    jmpl == 0x81C04005U &&
+		    nop == 0x01000000U)
+		{
+			unsigned long addr;
+
+			regs->u_regs[UREG_G1] = (sethi1 & 0x003FFFFFU) << 10;
+			regs->u_regs[UREG_G1] <<= 32;
+			regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or & 0x3FFU);
+			addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5];
+			regs->tpc = addr;
+			regs->tnpc = addr+4;
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: unpatched PLT emulation step 1 */
+		unsigned int sethi, ba, nop;
+
+		err = get_user(sethi, (unsigned int *)regs->tpc);
+		err |= get_user(ba, (unsigned int *)(regs->tpc+4));
+		err |= get_user(nop, (unsigned int *)(regs->tpc+8));
+
+		if (err)
+			break;
+
+		if ((sethi & 0xFFC00000U) == 0x03000000U &&
+		    ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) &&
+		    nop == 0x01000000U)
+		{
+			unsigned long addr;
+			unsigned int save, call;
+			unsigned int sethi1, sethi2, or1, or2, sllx, add, jmpl;
+
+			if ((ba & 0xFFC00000U) == 0x30800000U)
+				addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2);
+			else
+				addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2);
+
+			if (test_thread_flag(TIF_32BIT))
+				addr &= 0xFFFFFFFFUL;
+
+			err = get_user(save, (unsigned int *)addr);
+			err |= get_user(call, (unsigned int *)(addr+4));
+			err |= get_user(nop, (unsigned int *)(addr+8));
+			if (err)
+				break;
+
+#ifdef CONFIG_PAX_DLRESOLVE
+			if (save == 0x9DE3BFA8U &&
+			    (call & 0xC0000000U) == 0x40000000U &&
+			    nop == 0x01000000U)
+			{
+				struct vm_area_struct *vma;
+				unsigned long call_dl_resolve;
+
+				down_read(&current->mm->mmap_sem);
+				call_dl_resolve = current->mm->call_dl_resolve;
+				up_read(&current->mm->mmap_sem);
+				if (likely(call_dl_resolve))
+					goto emulate;
+
+				vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+
+				down_write(&current->mm->mmap_sem);
+				if (current->mm->call_dl_resolve) {
+					call_dl_resolve = current->mm->call_dl_resolve;
+					up_write(&current->mm->mmap_sem);
+					if (vma)
+						kmem_cache_free(vm_area_cachep, vma);
+					goto emulate;
+				}
+
+				call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE);
+				if (!vma || (call_dl_resolve & ~PAGE_MASK)) {
+					up_write(&current->mm->mmap_sem);
+					if (vma)
+						kmem_cache_free(vm_area_cachep, vma);
+					return 1;
+				}
+
+				if (pax_insert_vma(vma, call_dl_resolve)) {
+					up_write(&current->mm->mmap_sem);
+					kmem_cache_free(vm_area_cachep, vma);
+					return 1;
+				}
+
+				current->mm->call_dl_resolve = call_dl_resolve;
+				up_write(&current->mm->mmap_sem);
+
+emulate:
+				regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10;
+				regs->tpc = call_dl_resolve;
+				regs->tnpc = addr+4;
+				return 3;
+			}
+#endif
+
+			/* PaX: glibc 2.4+ generates sethi/jmpl instead of save/call */
+			if ((save & 0xFFC00000U) == 0x05000000U &&
+			    (call & 0xFFFFE000U) == 0x85C0A000U &&
+			    nop == 0x01000000U)
+			{
+				regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10;
+				regs->u_regs[UREG_G2] = addr + 4;
+				addr = (save & 0x003FFFFFU) << 10;
+				addr += (((call | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL);
+
+				if (test_thread_flag(TIF_32BIT))
+					addr &= 0xFFFFFFFFUL;
+
+				regs->tpc = addr;
+				regs->tnpc = addr+4;
+				return 3;
+			}
+
+			/* PaX: 64-bit PLT stub */
+			err = get_user(sethi1, (unsigned int *)addr);
+			err |= get_user(sethi2, (unsigned int *)(addr+4));
+			err |= get_user(or1, (unsigned int *)(addr+8));
+			err |= get_user(or2, (unsigned int *)(addr+12));
+			err |= get_user(sllx, (unsigned int *)(addr+16));
+			err |= get_user(add, (unsigned int *)(addr+20));
+			err |= get_user(jmpl, (unsigned int *)(addr+24));
+			err |= get_user(nop, (unsigned int *)(addr+28));
+			if (err)
+				break;
+
+			if ((sethi1 & 0xFFC00000U) == 0x09000000U &&
+			    (sethi2 & 0xFFC00000U) == 0x0B000000U &&
+			    (or1 & 0xFFFFE000U) == 0x88112000U &&
+			    (or2 & 0xFFFFE000U) == 0x8A116000U &&
+			    sllx == 0x89293020U &&
+			    add == 0x8A010005U &&
+			    jmpl == 0x89C14000U &&
+			    nop == 0x01000000U)
+			{
+				regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10;
+				regs->u_regs[UREG_G4] = ((sethi1 & 0x003FFFFFU) << 10) | (or1 & 0x000003FFU);
+				regs->u_regs[UREG_G4] <<= 32;
+				regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or2 & 0x000003FFU);
+				regs->u_regs[UREG_G5] += regs->u_regs[UREG_G4];
+				regs->u_regs[UREG_G4] = addr + 24;
+				addr = regs->u_regs[UREG_G5];
+				regs->tpc = addr;
+				regs->tnpc = addr+4;
+				return 3;
+			}
+		}
+	} while (0);
+
+#ifdef CONFIG_PAX_DLRESOLVE
+	do { /* PaX: unpatched PLT emulation step 2 */
+		unsigned int save, call, nop;
+
+		err = get_user(save, (unsigned int *)(regs->tpc-4));
+		err |= get_user(call, (unsigned int *)regs->tpc);
+		err |= get_user(nop, (unsigned int *)(regs->tpc+4));
+		if (err)
+			break;
+
+		if (save == 0x9DE3BFA8U &&
+		    (call & 0xC0000000U) == 0x40000000U &&
+		    nop == 0x01000000U)
+		{
+			unsigned long dl_resolve = regs->tpc + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2);
+
+			if (test_thread_flag(TIF_32BIT))
+				dl_resolve &= 0xFFFFFFFFUL;
+
+			regs->u_regs[UREG_RETPC] = regs->tpc;
+			regs->tpc = dl_resolve;
+			regs->tnpc = dl_resolve+4;
+			return 3;
+		}
+	} while (0);
+#endif
+
+	do { /* PaX: patched PLT emulation #7, must be AFTER the unpatched PLT emulation */
+		unsigned int sethi, ba, nop;
+
+		err = get_user(sethi, (unsigned int *)regs->tpc);
+		err |= get_user(ba, (unsigned int *)(regs->tpc+4));
+		err |= get_user(nop, (unsigned int *)(regs->tpc+8));
+
+		if (err)
+			break;
+
+		if ((sethi & 0xFFC00000U) == 0x03000000U &&
+		    (ba & 0xFFF00000U) == 0x30600000U &&
+		    nop == 0x01000000U)
+		{
+			unsigned long addr;
+
+			addr = (sethi & 0x003FFFFFU) << 10;
+			regs->u_regs[UREG_G1] = addr;
+			addr = regs->tpc + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2);
+
+			if (test_thread_flag(TIF_32BIT))
+				addr &= 0xFFFFFFFFUL;
+
+			regs->tpc = addr;
+			regs->tnpc = addr+4;
+			return 2;
+		}
+	} while (0);
+
+#endif
+
+	return 1;
+}
+
+void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
+{
+	unsigned long i;
+
+	printk(KERN_ERR "PAX: bytes at PC: ");
+	for (i = 0; i < 8; i++) {
+		unsigned int c;
+		if (get_user(c, (unsigned int *)pc+i))
+			printk(KERN_CONT "???????? ");
+		else
+			printk(KERN_CONT "%08x ", c);
+	}
+	printk("\n");
+}
+#endif
+
 asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
 {
 	enum ctx_state prev_state = exception_enter();
@@ -353,6 +816,29 @@ retry:
 	if (!vma)
 		goto bad_area;
 
+#ifdef CONFIG_PAX_PAGEEXEC
+	/* PaX: detect ITLB misses on non-exec pages */
+	if ((mm->pax_flags & MF_PAX_PAGEEXEC) && vma->vm_start <= address &&
+	    !(vma->vm_flags & VM_EXEC) && (fault_code & FAULT_CODE_ITLB))
+	{
+		if (address != regs->tpc)
+			goto good_area;
+
+		up_read(&mm->mmap_sem);
+		switch (pax_handle_fetch_fault(regs)) {
+
+#ifdef CONFIG_PAX_EMUPLT
+		case 2:
+		case 3:
+			return;
+#endif
+
+		}
+		pax_report_fault(regs, (void *)regs->tpc, (void *)(regs->u_regs[UREG_FP] + STACK_BIAS));
+		do_group_exit(SIGKILL);
+	}
+#endif
+
 	/* Pure DTLB misses do not tell us whether the fault causing
 	 * load/store/atomic was a write or not, it only says that there
 	 * was no match.  So in such a case we (carefully) read the
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/mm/hugetlbpage.c linux-4.0.9-pax/arch/sparc/mm/hugetlbpage.c
--- linux-4.0.9/arch/sparc/mm/hugetlbpage.c	2015-04-13 11:21:01.402617485 +0200
+++ linux-4.0.9-pax/arch/sparc/mm/hugetlbpage.c	2015-04-15 12:13:52.890318623 +0200
@@ -27,6 +27,7 @@ static unsigned long hugetlb_get_unmappe
 							unsigned long pgoff,
 							unsigned long flags)
 {
+	struct mm_struct *mm = current->mm;
 	unsigned long task_size = TASK_SIZE;
 	struct vm_unmapped_area_info info;
 
@@ -35,7 +36,7 @@ static unsigned long hugetlb_get_unmappe
 
 	info.flags = 0;
 	info.length = len;
-	info.low_limit = TASK_UNMAPPED_BASE;
+	info.low_limit = mm->mmap_base;
 	info.high_limit = min(task_size, VA_EXCLUDE_START);
 	info.align_mask = PAGE_MASK & ~HPAGE_MASK;
 	info.align_offset = 0;
@@ -44,6 +45,12 @@ static unsigned long hugetlb_get_unmappe
 	if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
 		VM_BUG_ON(addr != -ENOMEM);
 		info.low_limit = VA_EXCLUDE_END;
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			info.low_limit += mm->delta_mmap;
+#endif
+
 		info.high_limit = task_size;
 		addr = vm_unmapped_area(&info);
 	}
@@ -82,6 +89,12 @@ hugetlb_get_unmapped_area_topdown(struct
 		VM_BUG_ON(addr != -ENOMEM);
 		info.flags = 0;
 		info.low_limit = TASK_UNMAPPED_BASE;
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			info.low_limit += mm->delta_mmap;
+#endif
+
 		info.high_limit = STACK_TOP32;
 		addr = vm_unmapped_area(&info);
 	}
@@ -111,11 +124,14 @@ hugetlb_get_unmapped_area(struct file *f
 		return addr;
 	}
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	if (addr) {
 		addr = ALIGN(addr, HPAGE_SIZE);
 		vma = find_vma(mm, addr);
-		if (task_size - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		if (task_size - len >= addr && check_heap_stack_gap(vma, addr, len))
 			return addr;
 	}
 	if (mm->get_unmapped_area == arch_get_unmapped_area)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/mm/init_64.c linux-4.0.9-pax/arch/sparc/mm/init_64.c
--- linux-4.0.9/arch/sparc/mm/init_64.c	2015-04-13 11:21:01.402617485 +0200
+++ linux-4.0.9-pax/arch/sparc/mm/init_64.c	2015-04-15 12:13:52.890318623 +0200
@@ -186,9 +186,9 @@ unsigned long sparc64_kern_sec_context _
 int num_kernel_image_mappings;
 
 #ifdef CONFIG_DEBUG_DCFLUSH
-atomic_t dcpage_flushes = ATOMIC_INIT(0);
+atomic_unchecked_t dcpage_flushes = ATOMIC_INIT(0);
 #ifdef CONFIG_SMP
-atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0);
+atomic_unchecked_t dcpage_flushes_xcall = ATOMIC_INIT(0);
 #endif
 #endif
 
@@ -196,7 +196,7 @@ inline void flush_dcache_page_impl(struc
 {
 	BUG_ON(tlb_type == hypervisor);
 #ifdef CONFIG_DEBUG_DCFLUSH
-	atomic_inc(&dcpage_flushes);
+	atomic_inc_unchecked(&dcpage_flushes);
 #endif
 
 #ifdef DCACHE_ALIASING_POSSIBLE
@@ -468,10 +468,10 @@ void mmu_info(struct seq_file *m)
 
 #ifdef CONFIG_DEBUG_DCFLUSH
 	seq_printf(m, "DCPageFlushes\t: %d\n",
-		   atomic_read(&dcpage_flushes));
+		   atomic_read_unchecked(&dcpage_flushes));
 #ifdef CONFIG_SMP
 	seq_printf(m, "DCPageFlushesXC\t: %d\n",
-		   atomic_read(&dcpage_flushes_xcall));
+		   atomic_read_unchecked(&dcpage_flushes_xcall));
 #endif /* CONFIG_SMP */
 #endif /* CONFIG_DEBUG_DCFLUSH */
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/sparc/mm/Makefile linux-4.0.9-pax/arch/sparc/mm/Makefile
--- linux-4.0.9/arch/sparc/mm/Makefile	2015-03-18 15:21:50.220349253 +0100
+++ linux-4.0.9-pax/arch/sparc/mm/Makefile	2015-04-15 12:13:52.890318623 +0200
@@ -2,7 +2,7 @@
 #
 
 asflags-y := -ansi
-ccflags-y := -Werror
+#ccflags-y := -Werror
 
 obj-$(CONFIG_SPARC64)   += ultra.o tlb.o tsb.o gup.o
 obj-y                   += fault_$(BITS).o
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/tile/include/asm/atomic_64.h linux-4.0.9-pax/arch/tile/include/asm/atomic_64.h
--- linux-4.0.9/arch/tile/include/asm/atomic_64.h	2015-03-18 15:21:50.220349253 +0100
+++ linux-4.0.9-pax/arch/tile/include/asm/atomic_64.h	2015-04-15 12:13:52.890318623 +0200
@@ -105,6 +105,16 @@ static inline long atomic64_add_unless(a
 
 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
 
+#define atomic64_read_unchecked(v)		atomic64_read(v)
+#define atomic64_set_unchecked(v, i)		atomic64_set((v), (i))
+#define atomic64_add_unchecked(a, v)		atomic64_add((a), (v))
+#define atomic64_add_return_unchecked(a, v)	atomic64_add_return((a), (v))
+#define atomic64_sub_unchecked(a, v)		atomic64_sub((a), (v))
+#define atomic64_inc_unchecked(v)		atomic64_inc(v)
+#define atomic64_inc_return_unchecked(v)	atomic64_inc_return(v)
+#define atomic64_dec_unchecked(v)		atomic64_dec(v)
+#define atomic64_cmpxchg_unchecked(v, o, n)	atomic64_cmpxchg((v), (o), (n))
+
 /* Define this to indicate that cmpxchg is an efficient operation. */
 #define __HAVE_ARCH_CMPXCHG
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/tile/include/asm/uaccess.h linux-4.0.9-pax/arch/tile/include/asm/uaccess.h
--- linux-4.0.9/arch/tile/include/asm/uaccess.h	2015-04-13 11:21:01.418617484 +0200
+++ linux-4.0.9-pax/arch/tile/include/asm/uaccess.h	2015-04-15 12:13:52.890318623 +0200
@@ -417,9 +417,9 @@ static inline unsigned long __must_check
 					  const void __user *from,
 					  unsigned long n)
 {
-	int sz = __compiletime_object_size(to);
+	size_t sz = __compiletime_object_size(to);
 
-	if (likely(sz == -1 || sz >= n))
+	if (likely(sz == (size_t)-1 || sz >= n))
 		n = _copy_from_user(to, from, n);
 	else
 		copy_from_user_overflow();
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/um/include/asm/kmap_types.h linux-4.0.9-pax/arch/um/include/asm/kmap_types.h
--- linux-4.0.9/arch/um/include/asm/kmap_types.h	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/um/include/asm/kmap_types.h	2015-04-15 12:13:52.890318623 +0200
@@ -8,6 +8,6 @@
 
 /* No more #include "asm/arch/kmap_types.h" ! */
 
-#define KM_TYPE_NR 14
+#define KM_TYPE_NR 15
 
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/um/include/asm/page.h linux-4.0.9-pax/arch/um/include/asm/page.h
--- linux-4.0.9/arch/um/include/asm/page.h	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/um/include/asm/page.h	2015-04-15 12:13:52.890318623 +0200
@@ -14,6 +14,9 @@
 #define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
+#define ktla_ktva(addr)			(addr)
+#define ktva_ktla(addr)			(addr)
+
 #ifndef __ASSEMBLY__
 
 struct page;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/um/include/asm/pgtable-3level.h linux-4.0.9-pax/arch/um/include/asm/pgtable-3level.h
--- linux-4.0.9/arch/um/include/asm/pgtable-3level.h	2015-04-13 11:21:01.518617479 +0200
+++ linux-4.0.9-pax/arch/um/include/asm/pgtable-3level.h	2015-04-15 12:13:52.890318623 +0200
@@ -58,6 +58,7 @@
 #define pud_present(x)	(pud_val(x) & _PAGE_PRESENT)
 #define pud_populate(mm, pud, pmd) \
 	set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd)))
+#define pud_populate_kernel(mm, pud, pmd) pud_populate((mm), (pud), (pmd))
 
 #ifdef CONFIG_64BIT
 #define set_pud(pudptr, pudval) set_64bit((u64 *) (pudptr), pud_val(pudval))
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/um/kernel/process.c linux-4.0.9-pax/arch/um/kernel/process.c
--- linux-4.0.9/arch/um/kernel/process.c	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/um/kernel/process.c	2015-04-15 12:13:52.890318623 +0200
@@ -356,22 +356,6 @@ int singlestepping(void * t)
 	return 2;
 }
 
-/*
- * Only x86 and x86_64 have an arch_align_stack().
- * All other arches have "#define arch_align_stack(x) (x)"
- * in their asm/exec.h
- * As this is included in UML from asm-um/system-generic.h,
- * we can use it to behave as the subarch does.
- */
-#ifndef arch_align_stack
-unsigned long arch_align_stack(unsigned long sp)
-{
-	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() % 8192;
-	return sp & ~0xf;
-}
-#endif
-
 unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long stack_page, sp, ip;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/um/Makefile linux-4.0.9-pax/arch/um/Makefile
--- linux-4.0.9/arch/um/Makefile	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/um/Makefile	2015-04-15 12:13:52.890318623 +0200
@@ -72,6 +72,10 @@ USER_CFLAGS = $(patsubst $(KERNEL_DEFINE
 	$(patsubst -I%,,$(KBUILD_CFLAGS)))) $(ARCH_INCLUDE) $(MODE_INCLUDE) \
 	$(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64 -idirafter include
 
+ifdef CONSTIFY_PLUGIN
+USER_CFLAGS	+= -fplugin-arg-constify_plugin-no-constify
+endif
+
 #This will adjust *FLAGS accordingly to the platform.
 include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/bitops.h linux-4.0.9-pax/arch/x86/boot/bitops.h
--- linux-4.0.9/arch/x86/boot/bitops.h	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/x86/boot/bitops.h	2015-04-15 12:13:52.890318623 +0200
@@ -26,7 +26,7 @@ static inline int variable_test_bit(int
 	u8 v;
 	const u32 *p = (const u32 *)addr;
 
-	asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr));
+	asm volatile("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr));
 	return v;
 }
 
@@ -37,7 +37,7 @@ static inline int variable_test_bit(int
 
 static inline void set_bit(int nr, void *addr)
 {
-	asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr));
+	asm volatile("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr));
 }
 
 #endif /* BOOT_BITOPS_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/boot.h linux-4.0.9-pax/arch/x86/boot/boot.h
--- linux-4.0.9/arch/x86/boot/boot.h	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/x86/boot/boot.h	2015-04-15 12:13:52.890318623 +0200
@@ -84,7 +84,7 @@ static inline void io_delay(void)
 static inline u16 ds(void)
 {
 	u16 seg;
-	asm("movw %%ds,%0" : "=rm" (seg));
+	asm volatile("movw %%ds,%0" : "=rm" (seg));
 	return seg;
 }
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/compressed/efi_stub_32.S linux-4.0.9-pax/arch/x86/boot/compressed/efi_stub_32.S
--- linux-4.0.9/arch/x86/boot/compressed/efi_stub_32.S	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/x86/boot/compressed/efi_stub_32.S	2015-04-15 12:13:52.890318623 +0200
@@ -46,16 +46,13 @@ ENTRY(efi_call_phys)
 	 * parameter 2, ..., param n. To make things easy, we save the return
 	 * address of efi_call_phys in a global variable.
 	 */
-	popl	%ecx
-	movl	%ecx, saved_return_addr(%edx)
-	/* get the function pointer into ECX*/
-	popl	%ecx
-	movl	%ecx, efi_rt_function_ptr(%edx)
+	popl	saved_return_addr(%edx)
+	popl	efi_rt_function_ptr(%edx)
 
 	/*
 	 * 3. Call the physical function.
 	 */
-	call	*%ecx
+	call	*efi_rt_function_ptr(%edx)
 
 	/*
 	 * 4. Balance the stack. And because EAX contain the return value,
@@ -67,15 +64,12 @@ ENTRY(efi_call_phys)
 1:	popl	%edx
 	subl	$1b, %edx
 
-	movl	efi_rt_function_ptr(%edx), %ecx
-	pushl	%ecx
+	pushl	efi_rt_function_ptr(%edx)
 
 	/*
 	 * 10. Push the saved return address onto the stack and return.
 	 */
-	movl	saved_return_addr(%edx), %ecx
-	pushl	%ecx
-	ret
+	jmpl	*saved_return_addr(%edx)
 ENDPROC(efi_call_phys)
 .previous
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/compressed/efi_thunk_64.S linux-4.0.9-pax/arch/x86/boot/compressed/efi_thunk_64.S
--- linux-4.0.9/arch/x86/boot/compressed/efi_thunk_64.S	2015-04-13 11:21:01.618617474 +0200
+++ linux-4.0.9-pax/arch/x86/boot/compressed/efi_thunk_64.S	2015-04-15 12:13:52.890318623 +0200
@@ -189,8 +189,8 @@ efi_gdt64:
 	.long	0			/* Filled out by user */
 	.word	0
 	.quad	0x0000000000000000	/* NULL descriptor */
-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
+	.quad	0x00af9b000000ffff	/* __KERNEL_CS */
+	.quad	0x00cf93000000ffff	/* __KERNEL_DS */
 	.quad	0x0080890000000000	/* TS descriptor */
 	.quad   0x0000000000000000	/* TS continued */
 efi_gdt64_end:
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/compressed/head_32.S linux-4.0.9-pax/arch/x86/boot/compressed/head_32.S
--- linux-4.0.9/arch/x86/boot/compressed/head_32.S	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/x86/boot/compressed/head_32.S	2015-04-15 12:13:52.890318623 +0200
@@ -140,10 +140,10 @@ preferred_addr:
 	addl    %eax, %ebx
 	notl	%eax
 	andl    %eax, %ebx
-	cmpl	$LOAD_PHYSICAL_ADDR, %ebx
+	cmpl	$____LOAD_PHYSICAL_ADDR, %ebx
 	jge	1f
 #endif
-	movl	$LOAD_PHYSICAL_ADDR, %ebx
+	movl	$____LOAD_PHYSICAL_ADDR, %ebx
 1:
 
 	/* Target address to relocate to for decompression */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/compressed/head_64.S linux-4.0.9-pax/arch/x86/boot/compressed/head_64.S
--- linux-4.0.9/arch/x86/boot/compressed/head_64.S	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/x86/boot/compressed/head_64.S	2015-04-15 12:13:52.890318623 +0200
@@ -94,10 +94,10 @@ ENTRY(startup_32)
 	addl	%eax, %ebx
 	notl	%eax
 	andl	%eax, %ebx
-	cmpl	$LOAD_PHYSICAL_ADDR, %ebx
+	cmpl	$____LOAD_PHYSICAL_ADDR, %ebx
 	jge	1f
 #endif
-	movl	$LOAD_PHYSICAL_ADDR, %ebx
+	movl	$____LOAD_PHYSICAL_ADDR, %ebx
 1:
 
 	/* Target address to relocate to for decompression */
@@ -322,10 +322,10 @@ preferred_addr:
 	addq	%rax, %rbp
 	notq	%rax
 	andq	%rax, %rbp
-	cmpq	$LOAD_PHYSICAL_ADDR, %rbp
+	cmpq	$____LOAD_PHYSICAL_ADDR, %rbp
 	jge	1f
 #endif
-	movq	$LOAD_PHYSICAL_ADDR, %rbp
+	movq	$____LOAD_PHYSICAL_ADDR, %rbp
 1:
 
 	/* Target address to relocate to for decompression */
@@ -434,8 +434,8 @@ gdt:
 	.long	gdt
 	.word	0
 	.quad	0x0000000000000000	/* NULL descriptor */
-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
+	.quad	0x00af9b000000ffff	/* __KERNEL_CS */
+	.quad	0x00cf93000000ffff	/* __KERNEL_DS */
 	.quad	0x0080890000000000	/* TS descriptor */
 	.quad   0x0000000000000000	/* TS continued */
 gdt_end:
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/compressed/Makefile linux-4.0.9-pax/arch/x86/boot/compressed/Makefile
--- linux-4.0.9/arch/x86/boot/compressed/Makefile	2015-04-13 11:21:01.618617474 +0200
+++ linux-4.0.9-pax/arch/x86/boot/compressed/Makefile	2015-04-15 12:13:52.890318623 +0200
@@ -30,6 +30,9 @@ KBUILD_CFLAGS += $(cflags-y)
 KBUILD_CFLAGS += -mno-mmx -mno-sse
 KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
 KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
+ifdef CONSTIFY_PLUGIN
+KBUILD_CFLAGS += -fplugin-arg-constify_plugin-no-constify
+endif
 
 KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/compressed/misc.c linux-4.0.9-pax/arch/x86/boot/compressed/misc.c
--- linux-4.0.9/arch/x86/boot/compressed/misc.c	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/x86/boot/compressed/misc.c	2015-04-15 12:13:52.890318623 +0200
@@ -242,7 +242,7 @@ static void handle_relocations(void *out
 	 * Calculate the delta between where vmlinux was linked to load
 	 * and where it was actually loaded.
 	 */
-	delta = min_addr - LOAD_PHYSICAL_ADDR;
+	delta = min_addr - ____LOAD_PHYSICAL_ADDR;
 	if (!delta) {
 		debug_putstr("No relocation needed... ");
 		return;
@@ -324,7 +324,7 @@ static void parse_elf(void *output)
 	Elf32_Ehdr ehdr;
 	Elf32_Phdr *phdrs, *phdr;
 #endif
-	void *dest;
+	void *dest, *prev;
 	int i;
 
 	memcpy(&ehdr, output, sizeof(ehdr));
@@ -351,13 +351,16 @@ static void parse_elf(void *output)
 		case PT_LOAD:
 #ifdef CONFIG_RELOCATABLE
 			dest = output;
-			dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR);
+			dest += (phdr->p_paddr - ____LOAD_PHYSICAL_ADDR);
 #else
 			dest = (void *)(phdr->p_paddr);
 #endif
 			memcpy(dest,
 			       output + phdr->p_offset,
 			       phdr->p_filesz);
+			if (i)
+				memset(prev, 0xff, dest - prev);
+			prev = dest + phdr->p_filesz;
 			break;
 		default: /* Ignore other PT_* */ break;
 		}
@@ -416,7 +419,7 @@ asmlinkage __visible void *decompress_ke
 		error("Destination address too large");
 #endif
 #ifndef CONFIG_RELOCATABLE
-	if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
+	if ((unsigned long)output != ____LOAD_PHYSICAL_ADDR)
 		error("Wrong destination address");
 #endif
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/cpucheck.c linux-4.0.9-pax/arch/x86/boot/cpucheck.c
--- linux-4.0.9/arch/x86/boot/cpucheck.c	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/x86/boot/cpucheck.c	2015-04-15 12:13:52.890318623 +0200
@@ -125,9 +125,9 @@ int check_cpu(int *cpu_level_ptr, int *r
 		u32 ecx = MSR_K7_HWCR;
 		u32 eax, edx;
 
-		asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
+		asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
 		eax &= ~(1 << 15);
-		asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+		asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
 
 		get_cpuflags();	/* Make sure it really did something */
 		err = check_cpuflags();
@@ -140,9 +140,9 @@ int check_cpu(int *cpu_level_ptr, int *r
 		u32 ecx = MSR_VIA_FCR;
 		u32 eax, edx;
 
-		asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
+		asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
 		eax |= (1<<1)|(1<<7);
-		asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+		asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
 
 		set_bit(X86_FEATURE_CX8, cpu.flags);
 		err = check_cpuflags();
@@ -153,12 +153,12 @@ int check_cpu(int *cpu_level_ptr, int *r
 		u32 eax, edx;
 		u32 level = 1;
 
-		asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
-		asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx));
-		asm("cpuid"
+		asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx));
+		asm volatile("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx));
+		asm volatile("cpuid"
 		    : "+a" (level), "=d" (cpu.flags[0])
 		    : : "ecx", "ebx");
-		asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
+		asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
 
 		err = check_cpuflags();
 	} else if (err == 0x01 &&
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/header.S linux-4.0.9-pax/arch/x86/boot/header.S
--- linux-4.0.9/arch/x86/boot/header.S	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/x86/boot/header.S	2015-04-15 12:13:52.890318623 +0200
@@ -438,10 +438,14 @@ setup_data:		.quad 0			# 64-bit physical
 						# single linked list of
 						# struct setup_data
 
-pref_address:		.quad LOAD_PHYSICAL_ADDR	# preferred load addr
+pref_address:		.quad ____LOAD_PHYSICAL_ADDR	# preferred load addr
 
 #define ZO_INIT_SIZE	(ZO__end - ZO_startup_32 + ZO_z_extract_offset)
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+#define VO_INIT_SIZE	(VO__end - VO__text - __PAGE_OFFSET - ____LOAD_PHYSICAL_ADDR)
+#else
 #define VO_INIT_SIZE	(VO__end - VO__text)
+#endif
 #if ZO_INIT_SIZE > VO_INIT_SIZE
 #define INIT_SIZE ZO_INIT_SIZE
 #else
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/Makefile linux-4.0.9-pax/arch/x86/boot/Makefile
--- linux-4.0.9/arch/x86/boot/Makefile	2015-04-13 11:21:01.618617474 +0200
+++ linux-4.0.9-pax/arch/x86/boot/Makefile	2015-04-15 12:13:52.890318623 +0200
@@ -58,6 +58,9 @@ clean-files += cpustr.h
 # ---------------------------------------------------------------------------
 
 KBUILD_CFLAGS	:= $(USERINCLUDE) $(REALMODE_CFLAGS) -D_SETUP
+ifdef CONSTIFY_PLUGIN
+KBUILD_CFLAGS	+= -fplugin-arg-constify_plugin-no-constify
+endif
 KBUILD_AFLAGS	:= $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/memory.c linux-4.0.9-pax/arch/x86/boot/memory.c
--- linux-4.0.9/arch/x86/boot/memory.c	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/x86/boot/memory.c	2015-04-15 12:13:52.890318623 +0200
@@ -19,7 +19,7 @@
 
 static int detect_memory_e820(void)
 {
-	int count = 0;
+	unsigned int count = 0;
 	struct biosregs ireg, oreg;
 	struct e820entry *desc = boot_params.e820_map;
 	static struct e820entry buf; /* static so it is zeroed */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/video.c linux-4.0.9-pax/arch/x86/boot/video.c
--- linux-4.0.9/arch/x86/boot/video.c	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/x86/boot/video.c	2015-04-15 12:13:52.890318623 +0200
@@ -96,7 +96,7 @@ static void store_mode_params(void)
 static unsigned int get_entry(void)
 {
 	char entry_buf[4];
-	int i, len = 0;
+	unsigned int i, len = 0;
 	int key;
 	unsigned int v;
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/boot/video-vesa.c linux-4.0.9-pax/arch/x86/boot/video-vesa.c
--- linux-4.0.9/arch/x86/boot/video-vesa.c	2015-03-18 15:21:50.224349253 +0100
+++ linux-4.0.9-pax/arch/x86/boot/video-vesa.c	2015-04-15 12:13:52.894318623 +0200
@@ -201,6 +201,7 @@ static void vesa_store_pm_info(void)
 
 	boot_params.screen_info.vesapm_seg = oreg.es;
 	boot_params.screen_info.vesapm_off = oreg.di;
+	boot_params.screen_info.vesapm_size = oreg.cx;
 }
 
 /*
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/aesni-intel_asm.S linux-4.0.9-pax/arch/x86/crypto/aesni-intel_asm.S
--- linux-4.0.9/arch/x86/crypto/aesni-intel_asm.S	2015-04-13 11:21:01.618617474 +0200
+++ linux-4.0.9-pax/arch/x86/crypto/aesni-intel_asm.S	2015-04-15 12:13:52.894318623 +0200
@@ -31,6 +31,7 @@
 
 #include <linux/linkage.h>
 #include <asm/inst.h>
+#include <asm/alternative-asm.h>
 
 /*
  * The following macros are used to move an (un)aligned 16 byte value to/from
@@ -217,7 +218,7 @@ enc:        .octa 0x2
 * num_initial_blocks = b mod 4
 * encrypt the initial num_initial_blocks blocks and apply ghash on
 * the ciphertext
-* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
+* %r10, %r11, %r15, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
 * are clobbered
 * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
 */
@@ -227,8 +228,8 @@ enc:        .octa 0x2
 XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
         MOVADQ     SHUF_MASK(%rip), %xmm14
 	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r12           # %r12 = aadLen
-	mov	   %r12, %r11
+	mov	   arg8, %r15           # %r15 = aadLen
+	mov	   %r15, %r11
 	pxor	   %xmm\i, %xmm\i
 
 _get_AAD_loop\num_initial_blocks\operation:
@@ -237,17 +238,17 @@ _get_AAD_loop\num_initial_blocks\operati
 	psrldq	   $4, %xmm\i
 	pxor	   \TMP1, %xmm\i
 	add	   $4, %r10
-	sub	   $4, %r12
+	sub	   $4, %r15
 	jne	   _get_AAD_loop\num_initial_blocks\operation
 
 	cmp	   $16, %r11
 	je	   _get_AAD_loop2_done\num_initial_blocks\operation
 
-	mov	   $16, %r12
+	mov	   $16, %r15
 _get_AAD_loop2\num_initial_blocks\operation:
 	psrldq	   $4, %xmm\i
-	sub	   $4, %r12
-	cmp	   %r11, %r12
+	sub	   $4, %r15
+	cmp	   %r11, %r15
 	jne	   _get_AAD_loop2\num_initial_blocks\operation
 
 _get_AAD_loop2_done\num_initial_blocks\operation:
@@ -442,7 +443,7 @@ _initial_blocks_done\num_initial_blocks\
 * num_initial_blocks = b mod 4
 * encrypt the initial num_initial_blocks blocks and apply ghash on
 * the ciphertext
-* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
+* %r10, %r11, %r15, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
 * are clobbered
 * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
 */
@@ -452,8 +453,8 @@ _initial_blocks_done\num_initial_blocks\
 XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
         MOVADQ     SHUF_MASK(%rip), %xmm14
 	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r12           # %r12 = aadLen
-	mov	   %r12, %r11
+	mov	   arg8, %r15           # %r15 = aadLen
+	mov	   %r15, %r11
 	pxor	   %xmm\i, %xmm\i
 _get_AAD_loop\num_initial_blocks\operation:
 	movd	   (%r10), \TMP1
@@ -461,15 +462,15 @@ _get_AAD_loop\num_initial_blocks\operati
 	psrldq	   $4, %xmm\i
 	pxor	   \TMP1, %xmm\i
 	add	   $4, %r10
-	sub	   $4, %r12
+	sub	   $4, %r15
 	jne	   _get_AAD_loop\num_initial_blocks\operation
 	cmp	   $16, %r11
 	je	   _get_AAD_loop2_done\num_initial_blocks\operation
-	mov	   $16, %r12
+	mov	   $16, %r15
 _get_AAD_loop2\num_initial_blocks\operation:
 	psrldq	   $4, %xmm\i
-	sub	   $4, %r12
-	cmp	   %r11, %r12
+	sub	   $4, %r15
+	cmp	   %r11, %r15
 	jne	   _get_AAD_loop2\num_initial_blocks\operation
 _get_AAD_loop2_done\num_initial_blocks\operation:
 	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
@@ -1280,7 +1281,7 @@ _esb_loop_\@:
 *
 *****************************************************************************/
 ENTRY(aesni_gcm_dec)
-	push	%r12
+	push	%r15
 	push	%r13
 	push	%r14
 	mov	%rsp, %r14
@@ -1290,8 +1291,8 @@ ENTRY(aesni_gcm_dec)
 */
 	sub	$VARIABLE_OFFSET, %rsp
 	and	$~63, %rsp                        # align rsp to 64 bytes
-	mov	%arg6, %r12
-	movdqu	(%r12), %xmm13			  # %xmm13 = HashKey
+	mov	%arg6, %r15
+	movdqu	(%r15), %xmm13			  # %xmm13 = HashKey
         movdqa  SHUF_MASK(%rip), %xmm2
 	PSHUFB_XMM %xmm2, %xmm13
 
@@ -1319,10 +1320,10 @@ ENTRY(aesni_gcm_dec)
 	movdqa %xmm13, HashKey(%rsp)           # store HashKey<<1 (mod poly)
 	mov %arg4, %r13    # save the number of bytes of plaintext/ciphertext
 	and $-16, %r13                      # %r13 = %r13 - (%r13 mod 16)
-	mov %r13, %r12
-	and $(3<<4), %r12
+	mov %r13, %r15
+	and $(3<<4), %r15
 	jz _initial_num_blocks_is_0_decrypt
-	cmp $(2<<4), %r12
+	cmp $(2<<4), %r15
 	jb _initial_num_blocks_is_1_decrypt
 	je _initial_num_blocks_is_2_decrypt
 _initial_num_blocks_is_3_decrypt:
@@ -1372,16 +1373,16 @@ _zero_cipher_left_decrypt:
 	sub $16, %r11
 	add %r13, %r11
 	movdqu (%arg3,%r11,1), %xmm1   # receive the last <16 byte block
-	lea SHIFT_MASK+16(%rip), %r12
-	sub %r13, %r12
+	lea SHIFT_MASK+16(%rip), %r15
+	sub %r13, %r15
 # adjust the shuffle mask pointer to be able to shift 16-%r13 bytes
 # (%r13 is the number of bytes in plaintext mod 16)
-	movdqu (%r12), %xmm2           # get the appropriate shuffle mask
+	movdqu (%r15), %xmm2           # get the appropriate shuffle mask
 	PSHUFB_XMM %xmm2, %xmm1            # right shift 16-%r13 butes
 
 	movdqa  %xmm1, %xmm2
 	pxor %xmm1, %xmm0            # Ciphertext XOR E(K, Yn)
-	movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
+	movdqu ALL_F-SHIFT_MASK(%r15), %xmm1
 	# get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
 	pand %xmm1, %xmm0            # mask out top 16-%r13 bytes of %xmm0
 	pand    %xmm1, %xmm2
@@ -1410,9 +1411,9 @@ _less_than_8_bytes_left_decrypt:
 	sub	$1, %r13
 	jne	_less_than_8_bytes_left_decrypt
 _multiple_of_16_bytes_decrypt:
-	mov	arg8, %r12		  # %r13 = aadLen (number of bytes)
-	shl	$3, %r12		  # convert into number of bits
-	movd	%r12d, %xmm15		  # len(A) in %xmm15
+	mov	arg8, %r15		  # %r13 = aadLen (number of bytes)
+	shl	$3, %r15		  # convert into number of bits
+	movd	%r15d, %xmm15		  # len(A) in %xmm15
 	shl	$3, %arg4		  # len(C) in bits (*128)
 	MOVQ_R64_XMM	%arg4, %xmm1
 	pslldq	$8, %xmm15		  # %xmm15 = len(A)||0x0000000000000000
@@ -1451,7 +1452,8 @@ _return_T_done_decrypt:
 	mov	%r14, %rsp
 	pop	%r14
 	pop	%r13
-	pop	%r12
+	pop	%r15
+	pax_force_retaddr
 	ret
 ENDPROC(aesni_gcm_dec)
 
@@ -1540,7 +1542,7 @@ ENDPROC(aesni_gcm_dec)
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 ***************************************************************************/
 ENTRY(aesni_gcm_enc)
-	push	%r12
+	push	%r15
 	push	%r13
 	push	%r14
 	mov	%rsp, %r14
@@ -1550,8 +1552,8 @@ ENTRY(aesni_gcm_enc)
 #
 	sub	$VARIABLE_OFFSET, %rsp
 	and	$~63, %rsp
-	mov	%arg6, %r12
-	movdqu	(%r12), %xmm13
+	mov	%arg6, %r15
+	movdqu	(%r15), %xmm13
         movdqa  SHUF_MASK(%rip), %xmm2
 	PSHUFB_XMM %xmm2, %xmm13
 
@@ -1575,13 +1577,13 @@ ENTRY(aesni_gcm_enc)
 	movdqa	%xmm13, HashKey(%rsp)
 	mov	%arg4, %r13            # %xmm13 holds HashKey<<1 (mod poly)
 	and	$-16, %r13
-	mov	%r13, %r12
+	mov	%r13, %r15
 
         # Encrypt first few blocks
 
-	and	$(3<<4), %r12
+	and	$(3<<4), %r15
 	jz	_initial_num_blocks_is_0_encrypt
-	cmp	$(2<<4), %r12
+	cmp	$(2<<4), %r15
 	jb	_initial_num_blocks_is_1_encrypt
 	je	_initial_num_blocks_is_2_encrypt
 _initial_num_blocks_is_3_encrypt:
@@ -1634,14 +1636,14 @@ _zero_cipher_left_encrypt:
 	sub $16, %r11
 	add %r13, %r11
 	movdqu (%arg3,%r11,1), %xmm1     # receive the last <16 byte blocks
-	lea SHIFT_MASK+16(%rip), %r12
-	sub %r13, %r12
+	lea SHIFT_MASK+16(%rip), %r15
+	sub %r13, %r15
 	# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
 	# (%r13 is the number of bytes in plaintext mod 16)
-	movdqu	(%r12), %xmm2           # get the appropriate shuffle mask
+	movdqu	(%r15), %xmm2           # get the appropriate shuffle mask
 	PSHUFB_XMM	%xmm2, %xmm1            # shift right 16-r13 byte
 	pxor	%xmm1, %xmm0            # Plaintext XOR Encrypt(K, Yn)
-	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
+	movdqu	ALL_F-SHIFT_MASK(%r15), %xmm1
 	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
 	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
         movdqa SHUF_MASK(%rip), %xmm10
@@ -1674,9 +1676,9 @@ _less_than_8_bytes_left_encrypt:
 	sub $1, %r13
 	jne _less_than_8_bytes_left_encrypt
 _multiple_of_16_bytes_encrypt:
-	mov	arg8, %r12    # %r12 = addLen (number of bytes)
-	shl	$3, %r12
-	movd	%r12d, %xmm15       # len(A) in %xmm15
+	mov	arg8, %r15    # %r15 = addLen (number of bytes)
+	shl	$3, %r15
+	movd	%r15d, %xmm15       # len(A) in %xmm15
 	shl	$3, %arg4               # len(C) in bits (*128)
 	MOVQ_R64_XMM	%arg4, %xmm1
 	pslldq	$8, %xmm15          # %xmm15 = len(A)||0x0000000000000000
@@ -1715,7 +1717,8 @@ _return_T_done_encrypt:
 	mov	%r14, %rsp
 	pop	%r14
 	pop	%r13
-	pop	%r12
+	pop	%r15
+	pax_force_retaddr
 	ret
 ENDPROC(aesni_gcm_enc)
 
@@ -1733,6 +1736,7 @@ _key_expansion_256a:
 	pxor %xmm1, %xmm0
 	movaps %xmm0, (TKEYP)
 	add $0x10, TKEYP
+	pax_force_retaddr
 	ret
 ENDPROC(_key_expansion_128)
 ENDPROC(_key_expansion_256a)
@@ -1759,6 +1763,7 @@ _key_expansion_192a:
 	shufps $0b01001110, %xmm2, %xmm1
 	movaps %xmm1, 0x10(TKEYP)
 	add $0x20, TKEYP
+	pax_force_retaddr
 	ret
 ENDPROC(_key_expansion_192a)
 
@@ -1779,6 +1784,7 @@ _key_expansion_192b:
 
 	movaps %xmm0, (TKEYP)
 	add $0x10, TKEYP
+	pax_force_retaddr
 	ret
 ENDPROC(_key_expansion_192b)
 
@@ -1792,6 +1798,7 @@ _key_expansion_256b:
 	pxor %xmm1, %xmm2
 	movaps %xmm2, (TKEYP)
 	add $0x10, TKEYP
+	pax_force_retaddr
 	ret
 ENDPROC(_key_expansion_256b)
 
@@ -1905,6 +1912,7 @@ ENTRY(aesni_set_key)
 #ifndef __x86_64__
 	popl KEYP
 #endif
+	pax_force_retaddr
 	ret
 ENDPROC(aesni_set_key)
 
@@ -1927,6 +1935,7 @@ ENTRY(aesni_enc)
 	popl KLEN
 	popl KEYP
 #endif
+	pax_force_retaddr
 	ret
 ENDPROC(aesni_enc)
 
@@ -1985,6 +1994,7 @@ _aesni_enc1:
 	AESENC KEY STATE
 	movaps 0x70(TKEYP), KEY
 	AESENCLAST KEY STATE
+	pax_force_retaddr
 	ret
 ENDPROC(_aesni_enc1)
 
@@ -2094,6 +2104,7 @@ _aesni_enc4:
 	AESENCLAST KEY STATE2
 	AESENCLAST KEY STATE3
 	AESENCLAST KEY STATE4
+	pax_force_retaddr
 	ret
 ENDPROC(_aesni_enc4)
 
@@ -2117,6 +2128,7 @@ ENTRY(aesni_dec)
 	popl KLEN
 	popl KEYP
 #endif
+	pax_force_retaddr
 	ret
 ENDPROC(aesni_dec)
 
@@ -2175,6 +2187,7 @@ _aesni_dec1:
 	AESDEC KEY STATE
 	movaps 0x70(TKEYP), KEY
 	AESDECLAST KEY STATE
+	pax_force_retaddr
 	ret
 ENDPROC(_aesni_dec1)
 
@@ -2284,6 +2297,7 @@ _aesni_dec4:
 	AESDECLAST KEY STATE2
 	AESDECLAST KEY STATE3
 	AESDECLAST KEY STATE4
+	pax_force_retaddr
 	ret
 ENDPROC(_aesni_dec4)
 
@@ -2342,6 +2356,7 @@ ENTRY(aesni_ecb_enc)
 	popl KEYP
 	popl LEN
 #endif
+	pax_force_retaddr
 	ret
 ENDPROC(aesni_ecb_enc)
 
@@ -2401,6 +2416,7 @@ ENTRY(aesni_ecb_dec)
 	popl KEYP
 	popl LEN
 #endif
+	pax_force_retaddr
 	ret
 ENDPROC(aesni_ecb_dec)
 
@@ -2443,6 +2459,7 @@ ENTRY(aesni_cbc_enc)
 	popl LEN
 	popl IVP
 #endif
+	pax_force_retaddr
 	ret
 ENDPROC(aesni_cbc_enc)
 
@@ -2534,6 +2551,7 @@ ENTRY(aesni_cbc_dec)
 	popl LEN
 	popl IVP
 #endif
+	pax_force_retaddr
 	ret
 ENDPROC(aesni_cbc_dec)
 
@@ -2561,6 +2579,7 @@ _aesni_inc_init:
 	mov $1, TCTR_LOW
 	MOVQ_R64_XMM TCTR_LOW INC
 	MOVQ_R64_XMM CTR TCTR_LOW
+	pax_force_retaddr
 	ret
 ENDPROC(_aesni_inc_init)
 
@@ -2590,6 +2609,7 @@ _aesni_inc:
 .Linc_low:
 	movaps CTR, IV
 	PSHUFB_XMM BSWAP_MASK IV
+	pax_force_retaddr
 	ret
 ENDPROC(_aesni_inc)
 
@@ -2651,6 +2671,7 @@ ENTRY(aesni_ctr_enc)
 .Lctr_enc_ret:
 	movups IV, (IVP)
 .Lctr_enc_just_ret:
+	pax_force_retaddr
 	ret
 ENDPROC(aesni_ctr_enc)
 
@@ -2777,6 +2798,7 @@ ENTRY(aesni_xts_crypt8)
 	pxor INC, STATE4
 	movdqu STATE4, 0x70(OUTP)
 
+	pax_force_retaddr
 	ret
 ENDPROC(aesni_xts_crypt8)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/aes-x86_64-asm_64.S linux-4.0.9-pax/arch/x86/crypto/aes-x86_64-asm_64.S
--- linux-4.0.9/arch/x86/crypto/aes-x86_64-asm_64.S	2015-03-18 15:21:50.228349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/aes-x86_64-asm_64.S	2015-04-15 12:13:52.894318623 +0200
@@ -8,6 +8,8 @@
  * including this sentence is retained in full.
  */
 
+#include <asm/alternative-asm.h>
+
 .extern crypto_ft_tab
 .extern crypto_it_tab
 .extern crypto_fl_tab
@@ -70,6 +72,8 @@
 	je	B192;			\
 	leaq	32(r9),r9;
 
+#define ret	pax_force_retaddr; ret
+
 #define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
 	movq	r1,r2;			\
 	movq	r3,r4;			\
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/blowfish-x86_64-asm_64.S linux-4.0.9-pax/arch/x86/crypto/blowfish-x86_64-asm_64.S
--- linux-4.0.9/arch/x86/crypto/blowfish-x86_64-asm_64.S	2015-03-18 15:21:50.228349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/blowfish-x86_64-asm_64.S	2015-04-15 12:13:52.894318623 +0200
@@ -21,6 +21,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 .file "blowfish-x86_64-asm.S"
 .text
@@ -149,9 +150,11 @@ ENTRY(__blowfish_enc_blk)
 	jnz .L__enc_xor;
 
 	write_block();
+	pax_force_retaddr
 	ret;
 .L__enc_xor:
 	xor_block();
+	pax_force_retaddr
 	ret;
 ENDPROC(__blowfish_enc_blk)
 
@@ -183,6 +186,7 @@ ENTRY(blowfish_dec_blk)
 
 	movq %r11, %rbp;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(blowfish_dec_blk)
 
@@ -334,6 +338,7 @@ ENTRY(__blowfish_enc_blk_4way)
 
 	popq %rbx;
 	popq %rbp;
+	pax_force_retaddr
 	ret;
 
 .L__enc_xor4:
@@ -341,6 +346,7 @@ ENTRY(__blowfish_enc_blk_4way)
 
 	popq %rbx;
 	popq %rbp;
+	pax_force_retaddr
 	ret;
 ENDPROC(__blowfish_enc_blk_4way)
 
@@ -375,5 +381,6 @@ ENTRY(blowfish_dec_blk_4way)
 	popq %rbx;
 	popq %rbp;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(blowfish_dec_blk_4way)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/camellia-aesni-avx2-asm_64.S linux-4.0.9-pax/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
--- linux-4.0.9/arch/x86/crypto/camellia-aesni-avx2-asm_64.S	2015-03-18 15:21:50.228349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/camellia-aesni-avx2-asm_64.S	2015-04-15 12:13:52.894318623 +0200
@@ -11,6 +11,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
@@ -230,6 +231,7 @@ roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_
 	roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
 		  %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
 		  %rcx, (%r9));
+	pax_force_retaddr
 	ret;
 ENDPROC(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
 
@@ -238,6 +240,7 @@ roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_
 	roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
 		  %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
 		  %rax, (%r9));
+	pax_force_retaddr
 	ret;
 ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
 
@@ -820,6 +823,7 @@ __camellia_enc_blk32:
 		    %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
 		    %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax));
 
+	pax_force_retaddr
 	ret;
 
 .align 8
@@ -905,6 +909,7 @@ __camellia_dec_blk32:
 		    %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
 		    %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax));
 
+	pax_force_retaddr
 	ret;
 
 .align 8
@@ -948,6 +953,7 @@ ENTRY(camellia_ecb_enc_32way)
 
 	vzeroupper;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(camellia_ecb_enc_32way)
 
@@ -980,6 +986,7 @@ ENTRY(camellia_ecb_dec_32way)
 
 	vzeroupper;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(camellia_ecb_dec_32way)
 
@@ -1046,6 +1053,7 @@ ENTRY(camellia_cbc_dec_32way)
 
 	vzeroupper;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(camellia_cbc_dec_32way)
 
@@ -1184,6 +1192,7 @@ ENTRY(camellia_ctr_32way)
 
 	vzeroupper;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(camellia_ctr_32way)
 
@@ -1349,6 +1358,7 @@ camellia_xts_crypt_32way:
 
 	vzeroupper;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(camellia_xts_crypt_32way)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/camellia-aesni-avx-asm_64.S linux-4.0.9-pax/arch/x86/crypto/camellia-aesni-avx-asm_64.S
--- linux-4.0.9/arch/x86/crypto/camellia-aesni-avx-asm_64.S	2015-03-18 15:21:50.228349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/camellia-aesni-avx-asm_64.S	2015-04-15 12:13:52.894318623 +0200
@@ -16,6 +16,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
@@ -191,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_
 	roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
 		  %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
 		  %rcx, (%r9));
+	pax_force_retaddr
 	ret;
 ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
 
@@ -199,6 +201,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_
 	roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3,
 		  %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
 		  %rax, (%r9));
+	pax_force_retaddr
 	ret;
 ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
 
@@ -780,6 +783,7 @@ __camellia_enc_blk16:
 		    %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
 		    %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax));
 
+	pax_force_retaddr
 	ret;
 
 .align 8
@@ -865,6 +869,7 @@ __camellia_dec_blk16:
 		    %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
 		    %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax));
 
+	pax_force_retaddr
 	ret;
 
 .align 8
@@ -904,6 +909,7 @@ ENTRY(camellia_ecb_enc_16way)
 		     %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
 		     %xmm8, %rsi);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(camellia_ecb_enc_16way)
 
@@ -932,6 +938,7 @@ ENTRY(camellia_ecb_dec_16way)
 		     %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
 		     %xmm8, %rsi);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(camellia_ecb_dec_16way)
 
@@ -981,6 +988,7 @@ ENTRY(camellia_cbc_dec_16way)
 		     %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
 		     %xmm8, %rsi);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(camellia_cbc_dec_16way)
 
@@ -1092,6 +1100,7 @@ ENTRY(camellia_ctr_16way)
 		     %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
 		     %xmm8, %rsi);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(camellia_ctr_16way)
 
@@ -1234,6 +1243,7 @@ camellia_xts_crypt_16way:
 		     %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
 		     %xmm8, %rsi);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(camellia_xts_crypt_16way)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/camellia-x86_64-asm_64.S linux-4.0.9-pax/arch/x86/crypto/camellia-x86_64-asm_64.S
--- linux-4.0.9/arch/x86/crypto/camellia-x86_64-asm_64.S	2015-03-18 15:21:50.228349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/camellia-x86_64-asm_64.S	2015-04-15 12:13:52.894318623 +0200
@@ -21,6 +21,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 .file "camellia-x86_64-asm_64.S"
 .text
@@ -228,12 +229,14 @@ ENTRY(__camellia_enc_blk)
 	enc_outunpack(mov, RT1);
 
 	movq RRBP, %rbp;
+	pax_force_retaddr
 	ret;
 
 .L__enc_xor:
 	enc_outunpack(xor, RT1);
 
 	movq RRBP, %rbp;
+	pax_force_retaddr
 	ret;
 ENDPROC(__camellia_enc_blk)
 
@@ -272,6 +275,7 @@ ENTRY(camellia_dec_blk)
 	dec_outunpack();
 
 	movq RRBP, %rbp;
+	pax_force_retaddr
 	ret;
 ENDPROC(camellia_dec_blk)
 
@@ -463,6 +467,7 @@ ENTRY(__camellia_enc_blk_2way)
 
 	movq RRBP, %rbp;
 	popq %rbx;
+	pax_force_retaddr
 	ret;
 
 .L__enc2_xor:
@@ -470,6 +475,7 @@ ENTRY(__camellia_enc_blk_2way)
 
 	movq RRBP, %rbp;
 	popq %rbx;
+	pax_force_retaddr
 	ret;
 ENDPROC(__camellia_enc_blk_2way)
 
@@ -510,5 +516,6 @@ ENTRY(camellia_dec_blk_2way)
 
 	movq RRBP, %rbp;
 	movq RXOR, %rbx;
+	pax_force_retaddr
 	ret;
 ENDPROC(camellia_dec_blk_2way)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/cast5-avx-x86_64-asm_64.S linux-4.0.9-pax/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
--- linux-4.0.9/arch/x86/crypto/cast5-avx-x86_64-asm_64.S	2015-03-18 15:21:50.228349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/cast5-avx-x86_64-asm_64.S	2015-04-15 12:13:52.894318623 +0200
@@ -24,6 +24,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 .file "cast5-avx-x86_64-asm_64.S"
 
@@ -281,6 +282,7 @@ __cast5_enc_blk16:
 	outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
 	outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(__cast5_enc_blk16)
 
@@ -352,6 +354,7 @@ __cast5_dec_blk16:
 	outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
 	outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
 
+	pax_force_retaddr
 	ret;
 
 .L__skip_dec:
@@ -388,6 +391,7 @@ ENTRY(cast5_ecb_enc_16way)
 	vmovdqu RR4, (6*4*4)(%r11);
 	vmovdqu RL4, (7*4*4)(%r11);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(cast5_ecb_enc_16way)
 
@@ -420,6 +424,7 @@ ENTRY(cast5_ecb_dec_16way)
 	vmovdqu RR4, (6*4*4)(%r11);
 	vmovdqu RL4, (7*4*4)(%r11);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(cast5_ecb_dec_16way)
 
@@ -430,10 +435,10 @@ ENTRY(cast5_cbc_dec_16way)
 	 *	%rdx: src
 	 */
 
-	pushq %r12;
+	pushq %r14;
 
 	movq %rsi, %r11;
-	movq %rdx, %r12;
+	movq %rdx, %r14;
 
 	vmovdqu (0*16)(%rdx), RL1;
 	vmovdqu (1*16)(%rdx), RR1;
@@ -447,16 +452,16 @@ ENTRY(cast5_cbc_dec_16way)
 	call __cast5_dec_blk16;
 
 	/* xor with src */
-	vmovq (%r12), RX;
+	vmovq (%r14), RX;
 	vpshufd $0x4f, RX, RX;
 	vpxor RX, RR1, RR1;
-	vpxor 0*16+8(%r12), RL1, RL1;
-	vpxor 1*16+8(%r12), RR2, RR2;
-	vpxor 2*16+8(%r12), RL2, RL2;
-	vpxor 3*16+8(%r12), RR3, RR3;
-	vpxor 4*16+8(%r12), RL3, RL3;
-	vpxor 5*16+8(%r12), RR4, RR4;
-	vpxor 6*16+8(%r12), RL4, RL4;
+	vpxor 0*16+8(%r14), RL1, RL1;
+	vpxor 1*16+8(%r14), RR2, RR2;
+	vpxor 2*16+8(%r14), RL2, RL2;
+	vpxor 3*16+8(%r14), RR3, RR3;
+	vpxor 4*16+8(%r14), RL3, RL3;
+	vpxor 5*16+8(%r14), RR4, RR4;
+	vpxor 6*16+8(%r14), RL4, RL4;
 
 	vmovdqu RR1, (0*16)(%r11);
 	vmovdqu RL1, (1*16)(%r11);
@@ -467,8 +472,9 @@ ENTRY(cast5_cbc_dec_16way)
 	vmovdqu RR4, (6*16)(%r11);
 	vmovdqu RL4, (7*16)(%r11);
 
-	popq %r12;
+	popq %r14;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(cast5_cbc_dec_16way)
 
@@ -480,10 +486,10 @@ ENTRY(cast5_ctr_16way)
 	 *	%rcx: iv (big endian, 64bit)
 	 */
 
-	pushq %r12;
+	pushq %r14;
 
 	movq %rsi, %r11;
-	movq %rdx, %r12;
+	movq %rdx, %r14;
 
 	vpcmpeqd RTMP, RTMP, RTMP;
 	vpsrldq $8, RTMP, RTMP; /* low: -1, high: 0 */
@@ -523,14 +529,14 @@ ENTRY(cast5_ctr_16way)
 	call __cast5_enc_blk16;
 
 	/* dst = src ^ iv */
-	vpxor (0*16)(%r12), RR1, RR1;
-	vpxor (1*16)(%r12), RL1, RL1;
-	vpxor (2*16)(%r12), RR2, RR2;
-	vpxor (3*16)(%r12), RL2, RL2;
-	vpxor (4*16)(%r12), RR3, RR3;
-	vpxor (5*16)(%r12), RL3, RL3;
-	vpxor (6*16)(%r12), RR4, RR4;
-	vpxor (7*16)(%r12), RL4, RL4;
+	vpxor (0*16)(%r14), RR1, RR1;
+	vpxor (1*16)(%r14), RL1, RL1;
+	vpxor (2*16)(%r14), RR2, RR2;
+	vpxor (3*16)(%r14), RL2, RL2;
+	vpxor (4*16)(%r14), RR3, RR3;
+	vpxor (5*16)(%r14), RL3, RL3;
+	vpxor (6*16)(%r14), RR4, RR4;
+	vpxor (7*16)(%r14), RL4, RL4;
 	vmovdqu RR1, (0*16)(%r11);
 	vmovdqu RL1, (1*16)(%r11);
 	vmovdqu RR2, (2*16)(%r11);
@@ -540,7 +546,8 @@ ENTRY(cast5_ctr_16way)
 	vmovdqu RR4, (6*16)(%r11);
 	vmovdqu RL4, (7*16)(%r11);
 
-	popq %r12;
+	popq %r14;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(cast5_ctr_16way)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/cast6-avx-x86_64-asm_64.S linux-4.0.9-pax/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
--- linux-4.0.9/arch/x86/crypto/cast6-avx-x86_64-asm_64.S	2015-03-18 15:21:50.228349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/cast6-avx-x86_64-asm_64.S	2015-04-15 12:13:52.894318623 +0200
@@ -24,6 +24,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 #include "glue_helper-asm-avx.S"
 
 .file "cast6-avx-x86_64-asm_64.S"
@@ -295,6 +296,7 @@ __cast6_enc_blk8:
 	outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
 	outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(__cast6_enc_blk8)
 
@@ -340,6 +342,7 @@ __cast6_dec_blk8:
 	outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
 	outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(__cast6_dec_blk8)
 
@@ -358,6 +361,7 @@ ENTRY(cast6_ecb_enc_8way)
 
 	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(cast6_ecb_enc_8way)
 
@@ -376,6 +380,7 @@ ENTRY(cast6_ecb_dec_8way)
 
 	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(cast6_ecb_dec_8way)
 
@@ -386,19 +391,20 @@ ENTRY(cast6_cbc_dec_8way)
 	 *	%rdx: src
 	 */
 
-	pushq %r12;
+	pushq %r14;
 
 	movq %rsi, %r11;
-	movq %rdx, %r12;
+	movq %rdx, %r14;
 
 	load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
 	call __cast6_dec_blk8;
 
-	store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+	store_cbc_8way(%r14, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
-	popq %r12;
+	popq %r14;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(cast6_cbc_dec_8way)
 
@@ -410,20 +416,21 @@ ENTRY(cast6_ctr_8way)
 	 *	%rcx: iv (little endian, 128bit)
 	 */
 
-	pushq %r12;
+	pushq %r14;
 
 	movq %rsi, %r11;
-	movq %rdx, %r12;
+	movq %rdx, %r14;
 
 	load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
 		      RD2, RX, RKR, RKM);
 
 	call __cast6_enc_blk8;
 
-	store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+	store_ctr_8way(%r14, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
-	popq %r12;
+	popq %r14;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(cast6_ctr_8way)
 
@@ -446,6 +453,7 @@ ENTRY(cast6_xts_enc_8way)
 	/* dst <= regs xor IVs(in dst) */
 	store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(cast6_xts_enc_8way)
 
@@ -468,5 +476,6 @@ ENTRY(cast6_xts_dec_8way)
 	/* dst <= regs xor IVs(in dst) */
 	store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(cast6_xts_dec_8way)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/crc32c-pcl-intel-asm_64.S linux-4.0.9-pax/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
--- linux-4.0.9/arch/x86/crypto/crc32c-pcl-intel-asm_64.S	2015-03-18 15:21:50.228349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/crc32c-pcl-intel-asm_64.S	2015-05-23 20:30:13.829575315 +0200
@@ -45,6 +45,7 @@
 
 #include <asm/inst.h>
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
 
@@ -309,6 +310,7 @@ do_return:
 	popq    %rsi
 	popq    %rdi
 	popq    %rbx
+	pax_force_retaddr
         ret
 
         ################################################################
@@ -330,7 +332,7 @@ ENDPROC(crc_pcl)
 	## PCLMULQDQ tables
 	## Table is 128 entries x 2 words (8 bytes) each
 	################################################################
-.section	.rotata, "a", %progbits
+.section	.rodata, "a", %progbits
 .align 8
 K_table:
 	.long 0x493c7d27, 0x00000001
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/ghash-clmulni-intel_asm.S linux-4.0.9-pax/arch/x86/crypto/ghash-clmulni-intel_asm.S
--- linux-4.0.9/arch/x86/crypto/ghash-clmulni-intel_asm.S	2015-03-18 15:21:50.228349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/ghash-clmulni-intel_asm.S	2015-04-15 12:13:52.894318623 +0200
@@ -18,6 +18,7 @@
 
 #include <linux/linkage.h>
 #include <asm/inst.h>
+#include <asm/alternative-asm.h>
 
 .data
 
@@ -89,6 +90,7 @@ __clmul_gf128mul_ble:
 	psrlq $1, T2
 	pxor T2, T1
 	pxor T1, DATA
+	pax_force_retaddr
 	ret
 ENDPROC(__clmul_gf128mul_ble)
 
@@ -101,6 +103,7 @@ ENTRY(clmul_ghash_mul)
 	call __clmul_gf128mul_ble
 	PSHUFB_XMM BSWAP DATA
 	movups DATA, (%rdi)
+	pax_force_retaddr
 	ret
 ENDPROC(clmul_ghash_mul)
 
@@ -128,5 +131,6 @@ ENTRY(clmul_ghash_update)
 	PSHUFB_XMM BSWAP DATA
 	movups DATA, (%rdi)
 .Lupdate_just_ret:
+	pax_force_retaddr
 	ret
 ENDPROC(clmul_ghash_update)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/salsa20-x86_64-asm_64.S linux-4.0.9-pax/arch/x86/crypto/salsa20-x86_64-asm_64.S
--- linux-4.0.9/arch/x86/crypto/salsa20-x86_64-asm_64.S	2015-03-18 15:21:50.228349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/salsa20-x86_64-asm_64.S	2015-04-15 12:13:52.894318623 +0200
@@ -1,4 +1,5 @@
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 # enter salsa20_encrypt_bytes
 ENTRY(salsa20_encrypt_bytes)
@@ -789,6 +790,7 @@ ENTRY(salsa20_encrypt_bytes)
 	add	%r11,%rsp
 	mov	%rdi,%rax
 	mov	%rsi,%rdx
+	pax_force_retaddr
 	ret
 #   bytesatleast65:
 ._bytesatleast65:
@@ -889,6 +891,7 @@ ENTRY(salsa20_keysetup)
 	add	%r11,%rsp
 	mov	%rdi,%rax
 	mov	%rsi,%rdx
+	pax_force_retaddr
 	ret
 ENDPROC(salsa20_keysetup)
 
@@ -914,5 +917,6 @@ ENTRY(salsa20_ivsetup)
 	add	%r11,%rsp
 	mov	%rdi,%rax
 	mov	%rsi,%rdx
+	pax_force_retaddr
 	ret
 ENDPROC(salsa20_ivsetup)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/serpent-avx2-asm_64.S linux-4.0.9-pax/arch/x86/crypto/serpent-avx2-asm_64.S
--- linux-4.0.9/arch/x86/crypto/serpent-avx2-asm_64.S	2015-03-18 15:21:50.232349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/serpent-avx2-asm_64.S	2015-04-15 12:13:52.894318623 +0200
@@ -15,6 +15,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 #include "glue_helper-asm-avx2.S"
 
 .file "serpent-avx2-asm_64.S"
@@ -610,6 +611,7 @@ __serpent_enc_blk16:
 	write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
 	write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(__serpent_enc_blk16)
 
@@ -664,6 +666,7 @@ __serpent_dec_blk16:
 	write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
 	write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(__serpent_dec_blk16)
 
@@ -684,6 +687,7 @@ ENTRY(serpent_ecb_enc_16way)
 
 	vzeroupper;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(serpent_ecb_enc_16way)
 
@@ -704,6 +708,7 @@ ENTRY(serpent_ecb_dec_16way)
 
 	vzeroupper;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(serpent_ecb_dec_16way)
 
@@ -725,6 +730,7 @@ ENTRY(serpent_cbc_dec_16way)
 
 	vzeroupper;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(serpent_cbc_dec_16way)
 
@@ -748,6 +754,7 @@ ENTRY(serpent_ctr_16way)
 
 	vzeroupper;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(serpent_ctr_16way)
 
@@ -772,6 +779,7 @@ ENTRY(serpent_xts_enc_16way)
 
 	vzeroupper;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(serpent_xts_enc_16way)
 
@@ -796,5 +804,6 @@ ENTRY(serpent_xts_dec_16way)
 
 	vzeroupper;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(serpent_xts_dec_16way)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/serpent-avx-x86_64-asm_64.S linux-4.0.9-pax/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
--- linux-4.0.9/arch/x86/crypto/serpent-avx-x86_64-asm_64.S	2015-03-18 15:21:50.232349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/serpent-avx-x86_64-asm_64.S	2015-04-15 12:13:52.894318623 +0200
@@ -24,6 +24,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 #include "glue_helper-asm-avx.S"
 
 .file "serpent-avx-x86_64-asm_64.S"
@@ -618,6 +619,7 @@ __serpent_enc_blk8_avx:
 	write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
 	write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(__serpent_enc_blk8_avx)
 
@@ -672,6 +674,7 @@ __serpent_dec_blk8_avx:
 	write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
 	write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(__serpent_dec_blk8_avx)
 
@@ -688,6 +691,7 @@ ENTRY(serpent_ecb_enc_8way_avx)
 
 	store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(serpent_ecb_enc_8way_avx)
 
@@ -704,6 +708,7 @@ ENTRY(serpent_ecb_dec_8way_avx)
 
 	store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(serpent_ecb_dec_8way_avx)
 
@@ -720,6 +725,7 @@ ENTRY(serpent_cbc_dec_8way_avx)
 
 	store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(serpent_cbc_dec_8way_avx)
 
@@ -738,6 +744,7 @@ ENTRY(serpent_ctr_8way_avx)
 
 	store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(serpent_ctr_8way_avx)
 
@@ -758,6 +765,7 @@ ENTRY(serpent_xts_enc_8way_avx)
 	/* dst <= regs xor IVs(in dst) */
 	store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(serpent_xts_enc_8way_avx)
 
@@ -778,5 +786,6 @@ ENTRY(serpent_xts_dec_8way_avx)
 	/* dst <= regs xor IVs(in dst) */
 	store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(serpent_xts_dec_8way_avx)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S linux-4.0.9-pax/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
--- linux-4.0.9/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S	2015-03-18 15:21:50.232349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S	2015-04-15 12:13:52.894318623 +0200
@@ -25,6 +25,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 .file "serpent-sse2-x86_64-asm_64.S"
 .text
@@ -690,12 +691,14 @@ ENTRY(__serpent_enc_blk_8way)
 	write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
 	write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
 
+	pax_force_retaddr
 	ret;
 
 .L__enc_xor8:
 	xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
 	xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(__serpent_enc_blk_8way)
 
@@ -750,5 +753,6 @@ ENTRY(serpent_dec_blk_8way)
 	write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
 	write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(serpent_dec_blk_8way)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/sha1_ssse3_asm.S linux-4.0.9-pax/arch/x86/crypto/sha1_ssse3_asm.S
--- linux-4.0.9/arch/x86/crypto/sha1_ssse3_asm.S	2015-03-18 15:21:50.232349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/sha1_ssse3_asm.S	2015-04-15 12:13:52.894318623 +0200
@@ -29,6 +29,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 #define CTX	%rdi	// arg1
 #define BUF	%rsi	// arg2
@@ -75,9 +76,9 @@
 
 	push	%rbx
 	push	%rbp
-	push	%r12
+	push	%r14
 
-	mov	%rsp, %r12
+	mov	%rsp, %r14
 	sub	$64, %rsp		# allocate workspace
 	and	$~15, %rsp		# align stack
 
@@ -99,11 +100,12 @@
 	xor	%rax, %rax
 	rep stosq
 
-	mov	%r12, %rsp		# deallocate workspace
+	mov	%r14, %rsp		# deallocate workspace
 
-	pop	%r12
+	pop	%r14
 	pop	%rbp
 	pop	%rbx
+	pax_force_retaddr
 	ret
 
 	ENDPROC(\name)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/sha256-avx2-asm.S linux-4.0.9-pax/arch/x86/crypto/sha256-avx2-asm.S
--- linux-4.0.9/arch/x86/crypto/sha256-avx2-asm.S	2015-03-18 15:21:50.232349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/sha256-avx2-asm.S	2015-04-15 12:13:52.898318623 +0200
@@ -50,6 +50,7 @@
 
 #ifdef CONFIG_AS_AVX2
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 ## assume buffers not aligned
 #define	VMOVDQ vmovdqu
@@ -720,6 +721,7 @@ done_hash:
 	popq	%r12
 	popq	%rbp
 	popq	%rbx
+	pax_force_retaddr
 	ret
 ENDPROC(sha256_transform_rorx)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/sha256-avx-asm.S linux-4.0.9-pax/arch/x86/crypto/sha256-avx-asm.S
--- linux-4.0.9/arch/x86/crypto/sha256-avx-asm.S	2015-03-18 15:21:50.232349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/sha256-avx-asm.S	2015-04-15 12:13:52.898318623 +0200
@@ -49,6 +49,7 @@
 
 #ifdef CONFIG_AS_AVX
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 ## assume buffers not aligned
 #define    VMOVDQ vmovdqu
@@ -460,6 +461,7 @@ done_hash:
 	popq    %r13
 	popq    %rbp
 	popq    %rbx
+	pax_force_retaddr
 	ret
 ENDPROC(sha256_transform_avx)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/sha256-ssse3-asm.S linux-4.0.9-pax/arch/x86/crypto/sha256-ssse3-asm.S
--- linux-4.0.9/arch/x86/crypto/sha256-ssse3-asm.S	2015-03-18 15:21:50.232349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/sha256-ssse3-asm.S	2015-04-15 12:13:52.898318623 +0200
@@ -47,6 +47,7 @@
 ########################################################################
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 ## assume buffers not aligned
 #define    MOVDQ movdqu
@@ -471,6 +472,7 @@ done_hash:
 	popq    %rbp
 	popq    %rbx
 
+	pax_force_retaddr
 	ret
 ENDPROC(sha256_transform_ssse3)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/sha512-avx2-asm.S linux-4.0.9-pax/arch/x86/crypto/sha512-avx2-asm.S
--- linux-4.0.9/arch/x86/crypto/sha512-avx2-asm.S	2015-03-18 15:21:50.232349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/sha512-avx2-asm.S	2015-04-15 12:13:52.898318623 +0200
@@ -51,6 +51,7 @@
 
 #ifdef CONFIG_AS_AVX2
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 .text
 
@@ -678,6 +679,7 @@ done_hash:
 
 	# Restore Stack Pointer
 	mov	frame_RSPSAVE(%rsp), %rsp
+	pax_force_retaddr
 	ret
 ENDPROC(sha512_transform_rorx)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/sha512-avx-asm.S linux-4.0.9-pax/arch/x86/crypto/sha512-avx-asm.S
--- linux-4.0.9/arch/x86/crypto/sha512-avx-asm.S	2015-03-18 15:21:50.232349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/sha512-avx-asm.S	2015-04-15 12:13:52.898318623 +0200
@@ -49,6 +49,7 @@
 
 #ifdef CONFIG_AS_AVX
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 .text
 
@@ -364,6 +365,7 @@ updateblock:
 	mov	frame_RSPSAVE(%rsp), %rsp
 
 nowork:
+	pax_force_retaddr
 	ret
 ENDPROC(sha512_transform_avx)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/sha512-ssse3-asm.S linux-4.0.9-pax/arch/x86/crypto/sha512-ssse3-asm.S
--- linux-4.0.9/arch/x86/crypto/sha512-ssse3-asm.S	2015-03-18 15:21:50.232349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/sha512-ssse3-asm.S	2015-04-15 12:13:52.898318623 +0200
@@ -48,6 +48,7 @@
 ########################################################################
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 .text
 
@@ -363,6 +364,7 @@ updateblock:
 	mov	frame_RSPSAVE(%rsp), %rsp
 
 nowork:
+	pax_force_retaddr
 	ret
 ENDPROC(sha512_transform_ssse3)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/twofish-avx-x86_64-asm_64.S linux-4.0.9-pax/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
--- linux-4.0.9/arch/x86/crypto/twofish-avx-x86_64-asm_64.S	2015-03-18 15:21:50.232349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/twofish-avx-x86_64-asm_64.S	2015-04-15 12:13:52.898318623 +0200
@@ -24,6 +24,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 #include "glue_helper-asm-avx.S"
 
 .file "twofish-avx-x86_64-asm_64.S"
@@ -284,6 +285,7 @@ __twofish_enc_blk8:
 	outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
 	outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(__twofish_enc_blk8)
 
@@ -324,6 +326,7 @@ __twofish_dec_blk8:
 	outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
 	outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(__twofish_dec_blk8)
 
@@ -342,6 +345,7 @@ ENTRY(twofish_ecb_enc_8way)
 
 	store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(twofish_ecb_enc_8way)
 
@@ -360,6 +364,7 @@ ENTRY(twofish_ecb_dec_8way)
 
 	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(twofish_ecb_dec_8way)
 
@@ -370,19 +375,20 @@ ENTRY(twofish_cbc_dec_8way)
 	 *	%rdx: src
 	 */
 
-	pushq %r12;
+	pushq %r14;
 
 	movq %rsi, %r11;
-	movq %rdx, %r12;
+	movq %rdx, %r14;
 
 	load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
 
 	call __twofish_dec_blk8;
 
-	store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+	store_cbc_8way(%r14, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
-	popq %r12;
+	popq %r14;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(twofish_cbc_dec_8way)
 
@@ -394,20 +400,21 @@ ENTRY(twofish_ctr_8way)
 	 *	%rcx: iv (little endian, 128bit)
 	 */
 
-	pushq %r12;
+	pushq %r14;
 
 	movq %rsi, %r11;
-	movq %rdx, %r12;
+	movq %rdx, %r14;
 
 	load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
 		      RD2, RX0, RX1, RY0);
 
 	call __twofish_enc_blk8;
 
-	store_ctr_8way(%r12, %r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
+	store_ctr_8way(%r14, %r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
 
-	popq %r12;
+	popq %r14;
 
+	pax_force_retaddr
 	ret;
 ENDPROC(twofish_ctr_8way)
 
@@ -430,6 +437,7 @@ ENTRY(twofish_xts_enc_8way)
 	/* dst <= regs xor IVs(in dst) */
 	store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(twofish_xts_enc_8way)
 
@@ -452,5 +460,6 @@ ENTRY(twofish_xts_dec_8way)
 	/* dst <= regs xor IVs(in dst) */
 	store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	pax_force_retaddr
 	ret;
 ENDPROC(twofish_xts_dec_8way)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/twofish-x86_64-asm_64-3way.S linux-4.0.9-pax/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
--- linux-4.0.9/arch/x86/crypto/twofish-x86_64-asm_64-3way.S	2015-03-18 15:21:50.232349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/twofish-x86_64-asm_64-3way.S	2015-04-15 12:13:52.898318623 +0200
@@ -21,6 +21,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 
 .file "twofish-x86_64-asm-3way.S"
 .text
@@ -258,6 +259,7 @@ ENTRY(__twofish_enc_blk_3way)
 	popq %r13;
 	popq %r14;
 	popq %r15;
+	pax_force_retaddr
 	ret;
 
 .L__enc_xor3:
@@ -269,6 +271,7 @@ ENTRY(__twofish_enc_blk_3way)
 	popq %r13;
 	popq %r14;
 	popq %r15;
+	pax_force_retaddr
 	ret;
 ENDPROC(__twofish_enc_blk_3way)
 
@@ -308,5 +311,6 @@ ENTRY(twofish_dec_blk_3way)
 	popq %r13;
 	popq %r14;
 	popq %r15;
+	pax_force_retaddr
 	ret;
 ENDPROC(twofish_dec_blk_3way)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/crypto/twofish-x86_64-asm_64.S linux-4.0.9-pax/arch/x86/crypto/twofish-x86_64-asm_64.S
--- linux-4.0.9/arch/x86/crypto/twofish-x86_64-asm_64.S	2015-03-18 15:21:50.232349253 +0100
+++ linux-4.0.9-pax/arch/x86/crypto/twofish-x86_64-asm_64.S	2015-04-15 12:13:52.898318623 +0200
@@ -22,6 +22,7 @@
 
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
+#include <asm/alternative-asm.h>
 
 #define a_offset	0
 #define b_offset	4
@@ -265,6 +266,7 @@ ENTRY(twofish_enc_blk)
 
 	popq	R1
 	movq	$1,%rax
+	pax_force_retaddr
 	ret
 ENDPROC(twofish_enc_blk)
 
@@ -317,5 +319,6 @@ ENTRY(twofish_dec_blk)
 
 	popq	R1
 	movq	$1,%rax
+	pax_force_retaddr
 	ret
 ENDPROC(twofish_dec_blk)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/ia32/ia32entry.S linux-4.0.9-pax/arch/x86/ia32/ia32entry.S
--- linux-4.0.9/arch/x86/ia32/ia32entry.S	2015-04-13 11:21:01.622617473 +0200
+++ linux-4.0.9-pax/arch/x86/ia32/ia32entry.S	2015-04-15 12:13:52.898318623 +0200
@@ -15,8 +15,10 @@
 #include <asm/irqflags.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
+#include <asm/pgtable.h>
 #include <linux/linkage.h>
 #include <linux/err.h>
+#include <asm/alternative-asm.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -62,12 +64,12 @@
 	 */
 	.macro LOAD_ARGS32 offset, _r9=0
 	.if \_r9
-	movl \offset+16(%rsp),%r9d
+	movl \offset+R9(%rsp),%r9d
 	.endif
-	movl \offset+40(%rsp),%ecx
-	movl \offset+48(%rsp),%edx
-	movl \offset+56(%rsp),%esi
-	movl \offset+64(%rsp),%edi
+	movl \offset+RCX(%rsp),%ecx
+	movl \offset+RDX(%rsp),%edx
+	movl \offset+RSI(%rsp),%esi
+	movl \offset+RDI(%rsp),%edi
 	movl %eax,%eax			/* zero extension */
 	.endm
 	
@@ -96,6 +98,32 @@ ENTRY(native_irq_enable_sysexit)
 ENDPROC(native_irq_enable_sysexit)
 #endif
 
+	.macro pax_enter_kernel_user
+	pax_set_fptr_mask
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	call pax_enter_kernel_user
+#endif
+	.endm
+
+	.macro pax_exit_kernel_user
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	call pax_exit_kernel_user
+#endif
+#ifdef CONFIG_PAX_RANDKSTACK
+	pushq %rax
+	pushq %r11
+	call pax_randomize_kstack
+	popq %r11
+	popq %rax
+#endif
+	.endm
+
+	.macro pax_erase_kstack
+#ifdef CONFIG_PAX_MEMORY_STACKLEAK
+	call pax_erase_kstack
+#endif
+	.endm
+
 /*
  * 32bit SYSENTER instruction entry.
  *
@@ -122,12 +150,6 @@ ENTRY(ia32_sysenter_target)
 	CFI_REGISTER	rsp,rbp
 	SWAPGS_UNSAFE_STACK
 	movq	PER_CPU_VAR(kernel_stack), %rsp
-	addq	$(KERNEL_STACK_OFFSET),%rsp
-	/*
-	 * No need to follow this irqs on/off section: the syscall
-	 * disabled irqs, here we enable it straight after entry:
-	 */
-	ENABLE_INTERRUPTS(CLBR_NONE)
  	movl	%ebp,%ebp		/* zero extension */
 	pushq_cfi $__USER32_DS
 	/*CFI_REL_OFFSET ss,0*/
@@ -135,23 +157,46 @@ ENTRY(ia32_sysenter_target)
 	CFI_REL_OFFSET rsp,0
 	pushfq_cfi
 	/*CFI_REL_OFFSET rflags,0*/
-	movl	TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
-	CFI_REGISTER rip,r10
+	orl	$X86_EFLAGS_IF,(%rsp)
+	GET_THREAD_INFO(%r11)
+	movl	TI_sysenter_return(%r11), %r11d
+	CFI_REGISTER rip,r11
 	pushq_cfi $__USER32_CS
 	/*CFI_REL_OFFSET cs,0*/
 	movl	%eax, %eax
-	pushq_cfi %r10
+	pushq_cfi %r11
 	CFI_REL_OFFSET rip,0
 	pushq_cfi %rax
 	cld
 	SAVE_ARGS 0,1,0
+	pax_enter_kernel_user
+
+#ifdef CONFIG_PAX_RANDKSTACK
+	pax_erase_kstack
+#endif
+
+	/*
+	 * No need to follow this irqs on/off section: the syscall
+	 * disabled irqs, here we enable it straight after entry:
+	 */
+	ENABLE_INTERRUPTS(CLBR_NONE)
  	/* no need to do an access_ok check here because rbp has been
  	   32bit zero extended */ 
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	addq	pax_user_shadow_base,%rbp
+	ASM_PAX_OPEN_USERLAND
+#endif
+
 	ASM_STAC
 1:	movl	(%rbp),%ebp
 	_ASM_EXTABLE(1b,ia32_badarg)
 	ASM_CLAC
 
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	ASM_PAX_CLOSE_USERLAND
+#endif
+
 	/*
 	 * Sysenter doesn't filter flags, so we need to clear NT
 	 * ourselves.  To save a few cycles, we can check whether
@@ -161,8 +206,9 @@ ENTRY(ia32_sysenter_target)
 	jnz sysenter_fix_flags
 sysenter_flags_fixed:
 
-	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	GET_THREAD_INFO(%r11)
+	orl    $TS_COMPAT,TI_status(%r11)
+	testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r11)
 	CFI_REMEMBER_STATE
 	jnz  sysenter_tracesys
 	cmpq	$(IA32_NR_syscalls-1),%rax
@@ -172,14 +218,17 @@ sysenter_do_call:
 sysenter_dispatch:
 	call	*ia32_sys_call_table(,%rax,8)
 	movq	%rax,RAX-ARGOFFSET(%rsp)
+	GET_THREAD_INFO(%r11)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl	$_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl	$_TIF_ALLWORK_MASK,TI_flags(%r11)
 	jnz	sysexit_audit
 sysexit_from_sys_call:
-	andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	pax_exit_kernel_user
+	pax_erase_kstack
+	andl	$~TS_COMPAT,TI_status(%r11)
 	/* clear IF, that popfq doesn't enable interrupts early */
-	andl	$~0x200,EFLAGS-ARGOFFSET(%rsp)
+	andl	$~X86_EFLAGS_IF,EFLAGS-ARGOFFSET(%rsp)
 	movl	RIP-ARGOFFSET(%rsp),%edx		/* User %eip */
 	CFI_REGISTER rip,rdx
 	RESTORE_ARGS 0,24,0,0,0,0
@@ -205,6 +254,9 @@ sysexit_from_sys_call:
 	movl %ebx,%esi			/* 2nd arg: 1st syscall arg */
 	movl %eax,%edi			/* 1st arg: syscall number */
 	call __audit_syscall_entry
+
+	pax_erase_kstack
+
 	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall number */
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
@@ -216,7 +268,7 @@ sysexit_from_sys_call:
 	.endm
 
 	.macro auditsys_exit exit
-	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r11)
 	jnz ia32_ret_from_sys_call
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
@@ -227,11 +279,12 @@ sysexit_from_sys_call:
 1:	setbe %al		/* 1 if error, 0 if not */
 	movzbl %al,%edi		/* zero-extend that into %edi */
 	call __audit_syscall_exit
+	GET_THREAD_INFO(%r11)
 	movq RAX-ARGOFFSET(%rsp),%rax	/* reload syscall return value */
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl %edi,TI_flags(%r11)
 	jz \exit
 	CLEAR_RREGS -ARGOFFSET
 	jmp int_with_check
@@ -253,7 +306,7 @@ sysenter_fix_flags:
 
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r11)
 	jz	sysenter_auditsys
 #endif
 	SAVE_REST
@@ -265,6 +318,9 @@ sysenter_tracesys:
 	RESTORE_REST
 	cmpq	$(IA32_NR_syscalls-1),%rax
 	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
+
+	pax_erase_kstack
+
 	jmp	sysenter_do_call
 	CFI_ENDPROC
 ENDPROC(ia32_sysenter_target)
@@ -292,19 +348,25 @@ ENDPROC(ia32_sysenter_target)
 ENTRY(ia32_cstar_target)
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
+	CFI_DEF_CFA	rsp,0
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	SWAPGS_UNSAFE_STACK
 	movl	%esp,%r8d
 	CFI_REGISTER	rsp,r8
 	movq	PER_CPU_VAR(kernel_stack),%rsp
+	SAVE_ARGS 8*6,0,0
+	pax_enter_kernel_user
+
+#ifdef CONFIG_PAX_RANDKSTACK
+	pax_erase_kstack
+#endif
+
 	/*
 	 * No need to follow this irqs on/off section: the syscall
 	 * disabled irqs and here we enable it straight after entry:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8,0,0
 	movl 	%eax,%eax	/* zero extension */
 	movq	%rax,ORIG_RAX-ARGOFFSET(%rsp)
 	movq	%rcx,RIP-ARGOFFSET(%rsp)
@@ -320,12 +382,25 @@ ENTRY(ia32_cstar_target)
 	/* no need to do an access_ok check here because r8 has been
 	   32bit zero extended */ 
 	/* hardware stack frame is complete now */	
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	ASM_PAX_OPEN_USERLAND
+	movq	pax_user_shadow_base,%r8
+	addq	RSP-ARGOFFSET(%rsp),%r8
+#endif
+
 	ASM_STAC
 1:	movl	(%r8),%r9d
 	_ASM_EXTABLE(1b,ia32_badarg)
 	ASM_CLAC
-	orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	ASM_PAX_CLOSE_USERLAND
+#endif
+
+	GET_THREAD_INFO(%r11)
+	orl   $TS_COMPAT,TI_status(%r11)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r11)
 	CFI_REMEMBER_STATE
 	jnz   cstar_tracesys
 	cmpq $IA32_NR_syscalls-1,%rax
@@ -335,13 +410,16 @@ cstar_do_call:
 cstar_dispatch:
 	call *ia32_sys_call_table(,%rax,8)
 	movq %rax,RAX-ARGOFFSET(%rsp)
+	GET_THREAD_INFO(%r11)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $_TIF_ALLWORK_MASK,TI_flags(%r11)
 	jnz sysretl_audit
 sysretl_from_sys_call:
-	andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	RESTORE_ARGS 0,-ARG_SKIP,0,0,0
+	pax_exit_kernel_user
+	pax_erase_kstack
+	andl $~TS_COMPAT,TI_status(%r11)
+	RESTORE_ARGS 0,-ORIG_RAX,0,0,0
 	movl RIP-ARGOFFSET(%rsp),%ecx
 	CFI_REGISTER rip,rcx
 	movl EFLAGS-ARGOFFSET(%rsp),%r11d	
@@ -368,7 +446,7 @@ sysretl_audit:
 
 cstar_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r11)
 	jz cstar_auditsys
 #endif
 	xchgl %r9d,%ebp
@@ -382,11 +460,19 @@ cstar_tracesys:
 	xchgl %ebp,%r9d
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
+
+	pax_erase_kstack
+
 	jmp cstar_do_call
 END(ia32_cstar_target)
 				
 ia32_badarg:
 	ASM_CLAC
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	ASM_PAX_CLOSE_USERLAND
+#endif
+
 	movq $-EFAULT,%rax
 	jmp ia32_sysret
 	CFI_ENDPROC
@@ -423,19 +509,26 @@ ENTRY(ia32_syscall)
 	CFI_REL_OFFSET	rip,RIP-RIP
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	SWAPGS
-	/*
-	 * No need to follow this irqs on/off section: the syscall
-	 * disabled irqs and here we enable it straight after entry:
-	 */
-	ENABLE_INTERRUPTS(CLBR_NONE)
 	movl %eax,%eax
 	pushq_cfi %rax
 	cld
 	/* note the registers are not zero extended to the sf.
 	   this could be a problem. */
 	SAVE_ARGS 0,1,0
-	orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	pax_enter_kernel_user
+
+#ifdef CONFIG_PAX_RANDKSTACK
+	pax_erase_kstack
+#endif
+
+	/*
+	 * No need to follow this irqs on/off section: the syscall
+	 * disabled irqs and here we enable it straight after entry:
+	 */
+	ENABLE_INTERRUPTS(CLBR_NONE)
+	GET_THREAD_INFO(%r11)
+	orl   $TS_COMPAT,TI_status(%r11)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r11)
 	jnz ia32_tracesys
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
@@ -458,6 +551,9 @@ ia32_tracesys:
 	RESTORE_REST
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
+
+	pax_erase_kstack
+
 	jmp ia32_do_call
 END(ia32_syscall)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/ia32/ia32_signal.c linux-4.0.9-pax/arch/x86/ia32/ia32_signal.c
--- linux-4.0.9/arch/x86/ia32/ia32_signal.c	2015-04-13 11:21:01.622617473 +0200
+++ linux-4.0.9-pax/arch/x86/ia32/ia32_signal.c	2015-04-15 12:13:52.898318623 +0200
@@ -218,7 +218,7 @@ asmlinkage long sys32_sigreturn(void)
 	if (__get_user(set.sig[0], &frame->sc.oldmask)
 	    || (_COMPAT_NSIG_WORDS > 1
 		&& __copy_from_user((((char *) &set.sig) + 4),
-				    &frame->extramask,
+				    frame->extramask,
 				    sizeof(frame->extramask))))
 		goto badframe;
 
@@ -338,7 +338,7 @@ static void __user *get_sigframe(struct
 	sp -= frame_size;
 	/* Align the stack pointer according to the i386 ABI,
 	 * i.e. so that on function entry ((sp + 4) & 15) == 0. */
-	sp = ((sp + 4) & -16ul) - 4;
+	sp = ((sp - 12) & -16ul) - 4;
 	return (void __user *) sp;
 }
 
@@ -383,10 +383,10 @@ int ia32_setup_frame(int sig, struct ksi
 	} else {
 		/* Return stub is in 32bit vsyscall page */
 		if (current->mm->context.vdso)
-			restorer = current->mm->context.vdso +
-				selected_vdso32->sym___kernel_sigreturn;
+			restorer = (void __force_user *)(current->mm->context.vdso +
+				selected_vdso32->sym___kernel_sigreturn);
 		else
-			restorer = &frame->retcode;
+			restorer = frame->retcode;
 	}
 
 	put_user_try {
@@ -396,7 +396,7 @@ int ia32_setup_frame(int sig, struct ksi
 		 * These are actually not used anymore, but left because some
 		 * gdb versions depend on them as a marker.
 		 */
-		put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
+		put_user_ex(*((const u64 *)&code), (u64 __user *)frame->retcode);
 	} put_user_catch(err);
 
 	if (err)
@@ -438,7 +438,7 @@ int ia32_setup_rt_frame(int sig, struct
 		0xb8,
 		__NR_ia32_rt_sigreturn,
 		0x80cd,
-		0,
+		0
 	};
 
 	frame = get_sigframe(ksig, regs, sizeof(*frame), &fpstate);
@@ -461,16 +461,19 @@ int ia32_setup_rt_frame(int sig, struct
 
 		if (ksig->ka.sa.sa_flags & SA_RESTORER)
 			restorer = ksig->ka.sa.sa_restorer;
+		else if (current->mm->context.vdso)
+			/* Return stub is in 32bit vsyscall page */
+			restorer = (void __force_user *)(current->mm->context.vdso +
+				selected_vdso32->sym___kernel_rt_sigreturn);
 		else
-			restorer = current->mm->context.vdso +
-				selected_vdso32->sym___kernel_rt_sigreturn;
+			restorer = frame->retcode;
 		put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
 
 		/*
 		 * Not actually used anymore, but left because some gdb
 		 * versions need it.
 		 */
-		put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
+		put_user_ex(*((const u64 *)&code), (u64 __user *)frame->retcode);
 	} put_user_catch(err);
 
 	err |= copy_siginfo_to_user32(&frame->info, &ksig->info);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/ia32/sys_ia32.c linux-4.0.9-pax/arch/x86/ia32/sys_ia32.c
--- linux-4.0.9/arch/x86/ia32/sys_ia32.c	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/ia32/sys_ia32.c	2015-04-15 12:13:52.898318623 +0200
@@ -69,8 +69,8 @@ asmlinkage long sys32_ftruncate64(unsign
  */
 static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
 {
-	typeof(ubuf->st_uid) uid = 0;
-	typeof(ubuf->st_gid) gid = 0;
+	typeof(((struct stat64 *)0)->st_uid) uid = 0;
+	typeof(((struct stat64 *)0)->st_gid) gid = 0;
 	SET_UID(uid, from_kuid_munged(current_user_ns(), stat->uid));
 	SET_GID(gid, from_kgid_munged(current_user_ns(), stat->gid));
 	if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) ||
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/alternative-asm.h linux-4.0.9-pax/arch/x86/include/asm/alternative-asm.h
--- linux-4.0.9/arch/x86/include/asm/alternative-asm.h	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/alternative-asm.h	2015-04-15 12:13:52.898318623 +0200
@@ -18,6 +18,45 @@
 	.endm
 #endif
 
+#ifdef KERNEXEC_PLUGIN
+	.macro pax_force_retaddr_bts rip=0
+	btsq $63,\rip(%rsp)
+	.endm
+#ifdef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_BTS
+	.macro pax_force_retaddr rip=0, reload=0
+	btsq $63,\rip(%rsp)
+	.endm
+	.macro pax_force_fptr ptr
+	btsq $63,\ptr
+	.endm
+	.macro pax_set_fptr_mask
+	.endm
+#endif
+#ifdef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR
+	.macro pax_force_retaddr rip=0, reload=0
+	.if \reload
+	pax_set_fptr_mask
+	.endif
+	orq %r12,\rip(%rsp)
+	.endm
+	.macro pax_force_fptr ptr
+	orq %r12,\ptr
+	.endm
+	.macro pax_set_fptr_mask
+	movabs $0x8000000000000000,%r12
+	.endm
+#endif
+#else
+	.macro pax_force_retaddr rip=0, reload=0
+	.endm
+	.macro pax_force_fptr ptr
+	.endm
+	.macro pax_force_retaddr_bts rip=0
+	.endm
+	.macro pax_set_fptr_mask
+	.endm
+#endif
+
 .macro altinstruction_entry orig alt feature orig_len alt_len
 	.long \orig - .
 	.long \alt - .
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/alternative.h linux-4.0.9-pax/arch/x86/include/asm/alternative.h
--- linux-4.0.9/arch/x86/include/asm/alternative.h	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/alternative.h	2015-04-15 12:13:52.898318623 +0200
@@ -106,7 +106,7 @@ static inline int alternatives_text_rese
 	".pushsection .discard,\"aw\",@progbits\n"			\
 	DISCARD_ENTRY(1)						\
 	".popsection\n"							\
-	".pushsection .altinstr_replacement, \"ax\"\n"			\
+	".pushsection .altinstr_replacement, \"a\"\n"			\
 	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\
 	".popsection"
 
@@ -120,7 +120,7 @@ static inline int alternatives_text_rese
 	DISCARD_ENTRY(1)						\
 	DISCARD_ENTRY(2)						\
 	".popsection\n"							\
-	".pushsection .altinstr_replacement, \"ax\"\n"			\
+	".pushsection .altinstr_replacement, \"a\"\n"			\
 	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\
 	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\
 	".popsection"
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/apic.h linux-4.0.9-pax/arch/x86/include/asm/apic.h
--- linux-4.0.9/arch/x86/include/asm/apic.h	2015-04-13 11:21:01.622617473 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/apic.h	2015-04-15 12:13:52.898318623 +0200
@@ -45,7 +45,7 @@ static inline void generic_apic_probe(vo
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
-extern unsigned int apic_verbosity;
+extern int apic_verbosity;
 extern int local_apic_timer_c2_ok;
 
 extern int disable_apic;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/apm.h linux-4.0.9-pax/arch/x86/include/asm/apm.h
--- linux-4.0.9/arch/x86/include/asm/apm.h	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/apm.h	2015-04-15 12:13:52.898318623 +0200
@@ -34,7 +34,7 @@ static inline void apm_bios_call_asm(u32
 	__asm__ __volatile__(APM_DO_ZERO_SEGS
 		"pushl %%edi\n\t"
 		"pushl %%ebp\n\t"
-		"lcall *%%cs:apm_bios_entry\n\t"
+		"lcall *%%ss:apm_bios_entry\n\t"
 		"setc %%al\n\t"
 		"popl %%ebp\n\t"
 		"popl %%edi\n\t"
@@ -58,7 +58,7 @@ static inline u8 apm_bios_call_simple_as
 	__asm__ __volatile__(APM_DO_ZERO_SEGS
 		"pushl %%edi\n\t"
 		"pushl %%ebp\n\t"
-		"lcall *%%cs:apm_bios_entry\n\t"
+		"lcall *%%ss:apm_bios_entry\n\t"
 		"setc %%bl\n\t"
 		"popl %%ebp\n\t"
 		"popl %%edi\n\t"
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/atomic64_32.h linux-4.0.9-pax/arch/x86/include/asm/atomic64_32.h
--- linux-4.0.9/arch/x86/include/asm/atomic64_32.h	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/atomic64_32.h	2015-04-15 12:13:52.898318623 +0200
@@ -12,6 +12,14 @@ typedef struct {
 	u64 __aligned(8) counter;
 } atomic64_t;
 
+#ifdef CONFIG_PAX_REFCOUNT
+typedef struct {
+	u64 __aligned(8) counter;
+} atomic64_unchecked_t;
+#else
+typedef atomic64_t atomic64_unchecked_t;
+#endif
+
 #define ATOMIC64_INIT(val)	{ (val) }
 
 #define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
@@ -37,21 +45,31 @@ typedef struct {
 	ATOMIC64_DECL_ONE(sym##_386)
 
 ATOMIC64_DECL_ONE(add_386);
+ATOMIC64_DECL_ONE(add_unchecked_386);
 ATOMIC64_DECL_ONE(sub_386);
+ATOMIC64_DECL_ONE(sub_unchecked_386);
 ATOMIC64_DECL_ONE(inc_386);
+ATOMIC64_DECL_ONE(inc_unchecked_386);
 ATOMIC64_DECL_ONE(dec_386);
+ATOMIC64_DECL_ONE(dec_unchecked_386);
 #endif
 
 #define alternative_atomic64(f, out, in...) \
 	__alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in)
 
 ATOMIC64_DECL(read);
+ATOMIC64_DECL(read_unchecked);
 ATOMIC64_DECL(set);
+ATOMIC64_DECL(set_unchecked);
 ATOMIC64_DECL(xchg);
 ATOMIC64_DECL(add_return);
+ATOMIC64_DECL(add_return_unchecked);
 ATOMIC64_DECL(sub_return);
+ATOMIC64_DECL(sub_return_unchecked);
 ATOMIC64_DECL(inc_return);
+ATOMIC64_DECL(inc_return_unchecked);
 ATOMIC64_DECL(dec_return);
+ATOMIC64_DECL(dec_return_unchecked);
 ATOMIC64_DECL(dec_if_positive);
 ATOMIC64_DECL(inc_not_zero);
 ATOMIC64_DECL(add_unless);
@@ -77,6 +95,21 @@ static inline long long atomic64_cmpxchg
 }
 
 /**
+ * atomic64_cmpxchg_unchecked - cmpxchg atomic64 variable
+ * @p: pointer to type atomic64_unchecked_t
+ * @o: expected value
+ * @n: new value
+ *
+ * Atomically sets @v to @n if it was equal to @o and returns
+ * the old value.
+ */
+
+static inline long long atomic64_cmpxchg_unchecked(atomic64_unchecked_t *v, long long o, long long n)
+{
+	return cmpxchg64(&v->counter, o, n);
+}
+
+/**
  * atomic64_xchg - xchg atomic64 variable
  * @v: pointer to type atomic64_t
  * @n: value to assign
@@ -112,6 +145,22 @@ static inline void atomic64_set(atomic64
 }
 
 /**
+ * atomic64_set_unchecked - set atomic64 variable
+ * @v: pointer to type atomic64_unchecked_t
+ * @n: value to assign
+ *
+ * Atomically sets the value of @v to @n.
+ */
+static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long long i)
+{
+	unsigned high = (unsigned)(i >> 32);
+	unsigned low = (unsigned)i;
+	alternative_atomic64(set, /* no output */,
+			     "S" (v), "b" (low), "c" (high)
+			     : "eax", "edx", "memory");
+}
+
+/**
  * atomic64_read - read atomic64 variable
  * @v: pointer to type atomic64_t
  *
@@ -125,6 +174,19 @@ static inline long long atomic64_read(co
  }
 
 /**
+ * atomic64_read_unchecked - read atomic64 variable
+ * @v: pointer to type atomic64_unchecked_t
+ *
+ * Atomically reads the value of @v and returns it.
+ */
+static inline long long __intentional_overflow(-1) atomic64_read_unchecked(atomic64_unchecked_t *v)
+{
+	long long r;
+	alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
+	return r;
+ }
+
+/**
  * atomic64_add_return - add and return
  * @i: integer value to add
  * @v: pointer to type atomic64_t
@@ -139,6 +201,21 @@ static inline long long atomic64_add_ret
 	return i;
 }
 
+/**
+ * atomic64_add_return_unchecked - add and return
+ * @i: integer value to add
+ * @v: pointer to type atomic64_unchecked_t
+ *
+ * Atomically adds @i to @v and returns @i + *@v
+ */
+static inline long long atomic64_add_return_unchecked(long long i, atomic64_unchecked_t *v)
+{
+	alternative_atomic64(add_return_unchecked,
+			     ASM_OUTPUT2("+A" (i), "+c" (v)),
+			     ASM_NO_INPUT_CLOBBER("memory"));
+	return i;
+}
+
 /*
  * Other variants with different arithmetic operators:
  */
@@ -158,6 +235,14 @@ static inline long long atomic64_inc_ret
 	return a;
 }
 
+static inline long long atomic64_inc_return_unchecked(atomic64_unchecked_t *v)
+{
+	long long a;
+	alternative_atomic64(inc_return_unchecked, "=&A" (a),
+			     "S" (v) : "memory", "ecx");
+	return a;
+}
+
 static inline long long atomic64_dec_return(atomic64_t *v)
 {
 	long long a;
@@ -179,6 +264,21 @@ static inline long long atomic64_add(lon
 			       ASM_OUTPUT2("+A" (i), "+c" (v)),
 			       ASM_NO_INPUT_CLOBBER("memory"));
 	return i;
+}
+
+/**
+ * atomic64_add_unchecked - add integer to atomic64 variable
+ * @i: integer value to add
+ * @v: pointer to type atomic64_unchecked_t
+ *
+ * Atomically adds @i to @v.
+ */
+static inline long long atomic64_add_unchecked(long long i, atomic64_unchecked_t *v)
+{
+	__alternative_atomic64(add_unchecked, add_return_unchecked,
+			       ASM_OUTPUT2("+A" (i), "+c" (v)),
+			       ASM_NO_INPUT_CLOBBER("memory"));
+	return i;
 }
 
 /**
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/atomic64_64.h linux-4.0.9-pax/arch/x86/include/asm/atomic64_64.h
--- linux-4.0.9/arch/x86/include/asm/atomic64_64.h	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/atomic64_64.h	2015-04-15 12:13:52.898318623 +0200
@@ -22,6 +22,18 @@ static inline long atomic64_read(const a
 }
 
 /**
+ * atomic64_read_unchecked - read atomic64 variable
+ * @v: pointer of type atomic64_unchecked_t
+ *
+ * Atomically reads the value of @v.
+ * Doesn't imply a read memory barrier.
+ */
+static inline long __intentional_overflow(-1) atomic64_read_unchecked(const atomic64_unchecked_t *v)
+{
+	return ACCESS_ONCE((v)->counter);
+}
+
+/**
  * atomic64_set - set atomic64 variable
  * @v: pointer to type atomic64_t
  * @i: required value
@@ -34,6 +46,18 @@ static inline void atomic64_set(atomic64
 }
 
 /**
+ * atomic64_set_unchecked - set atomic64 variable
+ * @v: pointer to type atomic64_unchecked_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long i)
+{
+	v->counter = i;
+}
+
+/**
  * atomic64_add - add integer to atomic64 variable
  * @i: integer value to add
  * @v: pointer to type atomic64_t
@@ -42,6 +66,28 @@ static inline void atomic64_set(atomic64
  */
 static inline void atomic64_add(long i, atomic64_t *v)
 {
+	asm volatile(LOCK_PREFIX "addq %1,%0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     LOCK_PREFIX "subq %1,%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "=m" (v->counter)
+		     : "er" (i), "m" (v->counter));
+}
+
+/**
+ * atomic64_add_unchecked - add integer to atomic64 variable
+ * @i: integer value to add
+ * @v: pointer to type atomic64_unchecked_t
+ *
+ * Atomically adds @i to @v.
+ */
+static inline void atomic64_add_unchecked(long i, atomic64_unchecked_t *v)
+{
 	asm volatile(LOCK_PREFIX "addq %1,%0"
 		     : "=m" (v->counter)
 		     : "er" (i), "m" (v->counter));
@@ -56,7 +102,29 @@ static inline void atomic64_add(long i,
  */
 static inline void atomic64_sub(long i, atomic64_t *v)
 {
-	asm volatile(LOCK_PREFIX "subq %1,%0"
+	asm volatile(LOCK_PREFIX "subq %1,%0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     LOCK_PREFIX "addq %1,%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "=m" (v->counter)
+		     : "er" (i), "m" (v->counter));
+}
+
+/**
+ * atomic64_sub_unchecked - subtract the atomic64 variable
+ * @i: integer value to subtract
+ * @v: pointer to type atomic64_unchecked_t
+ *
+ * Atomically subtracts @i from @v.
+ */
+static inline void atomic64_sub_unchecked(long i, atomic64_unchecked_t *v)
+{
+	asm volatile(LOCK_PREFIX "subq %1,%0\n"
 		     : "=m" (v->counter)
 		     : "er" (i), "m" (v->counter));
 }
@@ -72,7 +140,7 @@ static inline void atomic64_sub(long i,
  */
 static inline int atomic64_sub_and_test(long i, atomic64_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", LOCK_PREFIX "addq", v->counter, "er", i, "%0", "e");
 }
 
 /**
@@ -83,6 +151,27 @@ static inline int atomic64_sub_and_test(
  */
 static inline void atomic64_inc(atomic64_t *v)
 {
+	asm volatile(LOCK_PREFIX "incq %0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     LOCK_PREFIX "decq %0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "=m" (v->counter)
+		     : "m" (v->counter));
+}
+
+/**
+ * atomic64_inc_unchecked - increment atomic64 variable
+ * @v: pointer to type atomic64_unchecked_t
+ *
+ * Atomically increments @v by 1.
+ */
+static inline void atomic64_inc_unchecked(atomic64_unchecked_t *v)
+{
 	asm volatile(LOCK_PREFIX "incq %0"
 		     : "=m" (v->counter)
 		     : "m" (v->counter));
@@ -96,7 +185,28 @@ static inline void atomic64_inc(atomic64
  */
 static inline void atomic64_dec(atomic64_t *v)
 {
-	asm volatile(LOCK_PREFIX "decq %0"
+	asm volatile(LOCK_PREFIX "decq %0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     LOCK_PREFIX "incq %0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "=m" (v->counter)
+		     : "m" (v->counter));
+}
+
+/**
+ * atomic64_dec_unchecked - decrement atomic64 variable
+ * @v: pointer to type atomic64_t
+ *
+ * Atomically decrements @v by 1.
+ */
+static inline void atomic64_dec_unchecked(atomic64_unchecked_t *v)
+{
+	asm volatile(LOCK_PREFIX "decq %0\n"
 		     : "=m" (v->counter)
 		     : "m" (v->counter));
 }
@@ -111,7 +221,7 @@ static inline void atomic64_dec(atomic64
  */
 static inline int atomic64_dec_and_test(atomic64_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e");
+	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", LOCK_PREFIX "incq", v->counter, "%0", "e");
 }
 
 /**
@@ -124,7 +234,7 @@ static inline int atomic64_dec_and_test(
  */
 static inline int atomic64_inc_and_test(atomic64_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e");
+	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", LOCK_PREFIX "decq", v->counter, "%0", "e");
 }
 
 /**
@@ -138,7 +248,7 @@ static inline int atomic64_inc_and_test(
  */
 static inline int atomic64_add_negative(long i, atomic64_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", LOCK_PREFIX "subq",  v->counter, "er", i, "%0", "s");
 }
 
 /**
@@ -150,6 +260,18 @@ static inline int atomic64_add_negative(
  */
 static inline long atomic64_add_return(long i, atomic64_t *v)
 {
+	return i + xadd_check_overflow(&v->counter, i);
+}
+
+/**
+ * atomic64_add_return_unchecked - add and return
+ * @i: integer value to add
+ * @v: pointer to type atomic64_unchecked_t
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static inline long atomic64_add_return_unchecked(long i, atomic64_unchecked_t *v)
+{
 	return i + xadd(&v->counter, i);
 }
 
@@ -159,6 +281,10 @@ static inline long atomic64_sub_return(l
 }
 
 #define atomic64_inc_return(v)  (atomic64_add_return(1, (v)))
+static inline long atomic64_inc_return_unchecked(atomic64_unchecked_t *v)
+{
+	return atomic64_add_return_unchecked(1, v);
+}
 #define atomic64_dec_return(v)  (atomic64_sub_return(1, (v)))
 
 static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
@@ -166,6 +292,11 @@ static inline long atomic64_cmpxchg(atom
 	return cmpxchg(&v->counter, old, new);
 }
 
+static inline long atomic64_cmpxchg_unchecked(atomic64_unchecked_t *v, long old, long new)
+{
+	return cmpxchg(&v->counter, old, new);
+}
+
 static inline long atomic64_xchg(atomic64_t *v, long new)
 {
 	return xchg(&v->counter, new);
@@ -182,17 +313,30 @@ static inline long atomic64_xchg(atomic6
  */
 static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
 {
-	long c, old;
+	long c, old, new;
 	c = atomic64_read(v);
 	for (;;) {
-		if (unlikely(c == (u)))
+		if (unlikely(c == u))
 			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
+
+		asm volatile("add %2,%0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+			     "jno 0f\n"
+			     "sub %2,%0\n"
+			     "int $4\n0:\n"
+			     _ASM_EXTABLE(0b, 0b)
+#endif
+
+			     : "=r" (new)
+			     : "0" (c), "ir" (a));
+
+		old = atomic64_cmpxchg(v, c, new);
 		if (likely(old == c))
 			break;
 		c = old;
 	}
-	return c != (u);
+	return c != u;
 }
 
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/atomic.h linux-4.0.9-pax/arch/x86/include/asm/atomic.h
--- linux-4.0.9/arch/x86/include/asm/atomic.h	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/atomic.h	2015-04-15 12:13:52.898318623 +0200
@@ -28,6 +28,17 @@ static inline int atomic_read(const atom
 }
 
 /**
+ * atomic_read_unchecked - read atomic variable
+ * @v: pointer of type atomic_unchecked_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline int __intentional_overflow(-1) atomic_read_unchecked(const atomic_unchecked_t *v)
+{
+	return ACCESS_ONCE((v)->counter);
+}
+
+/**
  * atomic_set - set atomic variable
  * @v: pointer of type atomic_t
  * @i: required value
@@ -40,6 +51,18 @@ static inline void atomic_set(atomic_t *
 }
 
 /**
+ * atomic_set_unchecked - set atomic variable
+ * @v: pointer of type atomic_unchecked_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static inline void atomic_set_unchecked(atomic_unchecked_t *v, int i)
+{
+	v->counter = i;
+}
+
+/**
  * atomic_add - add integer to atomic variable
  * @i: integer value to add
  * @v: pointer of type atomic_t
@@ -48,7 +71,29 @@ static inline void atomic_set(atomic_t *
  */
 static inline void atomic_add(int i, atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "addl %1,%0"
+	asm volatile(LOCK_PREFIX "addl %1,%0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     LOCK_PREFIX "subl %1,%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "+m" (v->counter)
+		     : "ir" (i));
+}
+
+/**
+ * atomic_add_unchecked - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_unchecked_t
+ *
+ * Atomically adds @i to @v.
+ */
+static inline void atomic_add_unchecked(int i, atomic_unchecked_t *v)
+{
+	asm volatile(LOCK_PREFIX "addl %1,%0\n"
 		     : "+m" (v->counter)
 		     : "ir" (i));
 }
@@ -62,7 +107,29 @@ static inline void atomic_add(int i, ato
  */
 static inline void atomic_sub(int i, atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "subl %1,%0"
+	asm volatile(LOCK_PREFIX "subl %1,%0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     LOCK_PREFIX "addl %1,%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "+m" (v->counter)
+		     : "ir" (i));
+}
+
+/**
+ * atomic_sub_unchecked - subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_unchecked_t
+ *
+ * Atomically subtracts @i from @v.
+ */
+static inline void atomic_sub_unchecked(int i, atomic_unchecked_t *v)
+{
+	asm volatile(LOCK_PREFIX "subl %1,%0\n"
 		     : "+m" (v->counter)
 		     : "ir" (i));
 }
@@ -78,7 +145,7 @@ static inline void atomic_sub(int i, ato
  */
 static inline int atomic_sub_and_test(int i, atomic_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", LOCK_PREFIX "addl",  v->counter, "er", i, "%0", "e");
 }
 
 /**
@@ -89,7 +156,27 @@ static inline int atomic_sub_and_test(in
  */
 static inline void atomic_inc(atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "incl %0"
+	asm volatile(LOCK_PREFIX "incl %0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     LOCK_PREFIX "decl %0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "+m" (v->counter));
+}
+
+/**
+ * atomic_inc_unchecked - increment atomic variable
+ * @v: pointer of type atomic_unchecked_t
+ *
+ * Atomically increments @v by 1.
+ */
+static inline void atomic_inc_unchecked(atomic_unchecked_t *v)
+{
+	asm volatile(LOCK_PREFIX "incl %0\n"
 		     : "+m" (v->counter));
 }
 
@@ -101,7 +188,27 @@ static inline void atomic_inc(atomic_t *
  */
 static inline void atomic_dec(atomic_t *v)
 {
-	asm volatile(LOCK_PREFIX "decl %0"
+	asm volatile(LOCK_PREFIX "decl %0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     LOCK_PREFIX "incl %0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "+m" (v->counter));
+}
+
+/**
+ * atomic_dec_unchecked - decrement atomic variable
+ * @v: pointer of type atomic_unchecked_t
+ *
+ * Atomically decrements @v by 1.
+ */
+static inline void atomic_dec_unchecked(atomic_unchecked_t *v)
+{
+	asm volatile(LOCK_PREFIX "decl %0\n"
 		     : "+m" (v->counter));
 }
 
@@ -115,7 +222,7 @@ static inline void atomic_dec(atomic_t *
  */
 static inline int atomic_dec_and_test(atomic_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
+	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", LOCK_PREFIX "incl", v->counter, "%0", "e");
 }
 
 /**
@@ -128,7 +235,20 @@ static inline int atomic_dec_and_test(at
  */
 static inline int atomic_inc_and_test(atomic_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
+	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", LOCK_PREFIX "decl", v->counter, "%0", "e");
+}
+
+/**
+ * atomic_inc_and_test_unchecked - increment and test
+ * @v: pointer of type atomic_unchecked_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int atomic_inc_and_test_unchecked(atomic_unchecked_t *v)
+{
+	GEN_UNARY_RMWcc_unchecked(LOCK_PREFIX "incl", v->counter, "%0", "e");
 }
 
 /**
@@ -142,7 +262,7 @@ static inline int atomic_inc_and_test(at
  */
 static inline int atomic_add_negative(int i, atomic_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", LOCK_PREFIX "subl", v->counter, "er", i, "%0", "s");
 }
 
 /**
@@ -152,7 +272,19 @@ static inline int atomic_add_negative(in
  *
  * Atomically adds @i to @v and returns @i + @v
  */
-static inline int atomic_add_return(int i, atomic_t *v)
+static inline int __intentional_overflow(-1) atomic_add_return(int i, atomic_t *v)
+{
+	return i + xadd_check_overflow(&v->counter, i);
+}
+
+/**
+ * atomic_add_return_unchecked - add integer and return
+ * @i: integer value to add
+ * @v: pointer of type atomic_unchecked_t
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static inline int atomic_add_return_unchecked(int i, atomic_unchecked_t *v)
 {
 	return i + xadd(&v->counter, i);
 }
@@ -164,15 +296,24 @@ static inline int atomic_add_return(int
  *
  * Atomically subtracts @i from @v and returns @v - @i
  */
-static inline int atomic_sub_return(int i, atomic_t *v)
+static inline int __intentional_overflow(-1) atomic_sub_return(int i, atomic_t *v)
 {
 	return atomic_add_return(-i, v);
 }
 
 #define atomic_inc_return(v)  (atomic_add_return(1, v))
+static inline int atomic_inc_return_unchecked(atomic_unchecked_t *v)
+{
+	return atomic_add_return_unchecked(1, v);
+}
 #define atomic_dec_return(v)  (atomic_sub_return(1, v))
 
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+static inline int __intentional_overflow(-1) atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	return cmpxchg(&v->counter, old, new);
+}
+
+static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old, int new)
 {
 	return cmpxchg(&v->counter, old, new);
 }
@@ -182,6 +323,11 @@ static inline int atomic_xchg(atomic_t *
 	return xchg(&v->counter, new);
 }
 
+static inline int atomic_xchg_unchecked(atomic_unchecked_t *v, int new)
+{
+	return xchg(&v->counter, new);
+}
+
 /**
  * __atomic_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
@@ -193,12 +339,25 @@ static inline int atomic_xchg(atomic_t *
  */
 static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
-	int c, old;
+	int c, old, new;
 	c = atomic_read(v);
 	for (;;) {
-		if (unlikely(c == (u)))
+		if (unlikely(c == u))
 			break;
-		old = atomic_cmpxchg((v), c, c + (a));
+
+		asm volatile("addl %2,%0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+			     "jno 0f\n"
+			     "subl %2,%0\n"
+			     "int $4\n0:\n"
+			     _ASM_EXTABLE(0b, 0b)
+#endif
+
+			     : "=r" (new)
+			     : "0" (c), "ir" (a));
+
+		old = atomic_cmpxchg(v, c, new);
 		if (likely(old == c))
 			break;
 		c = old;
@@ -207,6 +366,49 @@ static inline int __atomic_add_unless(at
 }
 
 /**
+ * atomic_inc_not_zero_hint - increment if not null
+ * @v: pointer of type atomic_t
+ * @hint: probable value of the atomic before the increment
+ *
+ * This version of atomic_inc_not_zero() gives a hint of probable
+ * value of the atomic. This helps processor to not read the memory
+ * before doing the atomic read/modify/write cycle, lowering
+ * number of bus transactions on some arches.
+ *
+ * Returns: 0 if increment was not done, 1 otherwise.
+ */
+#define atomic_inc_not_zero_hint atomic_inc_not_zero_hint
+static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint)
+{
+	int val, c = hint, new;
+
+	/* sanity test, should be removed by compiler if hint is a constant */
+	if (!hint)
+		return __atomic_add_unless(v, 1, 0);
+
+	do {
+		asm volatile("incl %0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+			     "jno 0f\n"
+			     "decl %0\n"
+			     "int $4\n0:\n"
+			     _ASM_EXTABLE(0b, 0b)
+#endif
+
+			     : "=r" (new)
+			     : "0" (c));
+
+		val = atomic_cmpxchg(v, c, new);
+		if (val == c)
+			return 1;
+		c = val;
+	} while (c);
+
+	return 0;
+}
+
+/**
  * atomic_inc_short - increment of a short integer
  * @v: pointer to type int
  *
@@ -220,14 +422,37 @@ static inline short int atomic_inc_short
 }
 
 /* These are x86-specific, used by some header files */
-#define atomic_clear_mask(mask, addr)				\
-	asm volatile(LOCK_PREFIX "andl %0,%1"			\
-		     : : "r" (~(mask)), "m" (*(addr)) : "memory")
-
-#define atomic_set_mask(mask, addr)				\
-	asm volatile(LOCK_PREFIX "orl %0,%1"			\
-		     : : "r" ((unsigned)(mask)), "m" (*(addr))	\
-		     : "memory")
+static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	asm volatile(LOCK_PREFIX "andl %1,%0"
+		     : "+m" (v->counter)
+		     : "r" (~(mask))
+		     : "memory");
+}
+
+static inline void atomic_clear_mask_unchecked(unsigned int mask, atomic_unchecked_t *v)
+{
+	asm volatile(LOCK_PREFIX "andl %1,%0"
+		     : "+m" (v->counter)
+		     : "r" (~(mask))
+		     : "memory");
+}
+
+static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	asm volatile(LOCK_PREFIX "orl %1,%0"
+		     : "+m" (v->counter)
+		     : "r" (mask)
+		     : "memory");
+}
+
+static inline void atomic_set_mask_unchecked(unsigned int mask, atomic_unchecked_t *v)
+{
+	asm volatile(LOCK_PREFIX "orl %1,%0"
+		     : "+m" (v->counter)
+		     : "r" (mask)
+		     : "memory");
+}
 
 #ifdef CONFIG_X86_32
 # include <asm/atomic64_32.h>
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/barrier.h linux-4.0.9-pax/arch/x86/include/asm/barrier.h
--- linux-4.0.9/arch/x86/include/asm/barrier.h	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/barrier.h	2015-04-15 12:13:52.898318623 +0200
@@ -57,7 +57,7 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	ACCESS_ONCE_RW(*p) = (v);					\
 } while (0)
 
 #define smp_load_acquire(p)						\
@@ -74,7 +74,7 @@ do {									\
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	ACCESS_ONCE_RW(*p) = (v);					\
 } while (0)
 
 #define smp_load_acquire(p)						\
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/bitops.h linux-4.0.9-pax/arch/x86/include/asm/bitops.h
--- linux-4.0.9/arch/x86/include/asm/bitops.h	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/bitops.h	2015-04-15 12:13:52.902318623 +0200
@@ -50,7 +50,7 @@
  * a mask operation on a byte.
  */
 #define IS_IMMEDIATE(nr)		(__builtin_constant_p(nr))
-#define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK_ADDR(nr, addr)	BITOP_ADDR((volatile void *)(addr) + ((nr)>>3))
 #define CONST_MASK(nr)			(1 << ((nr) & 7))
 
 /**
@@ -203,7 +203,7 @@ static inline void change_bit(long nr, v
  */
 static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
+	GEN_BINARY_RMWcc_unchecked(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
 }
 
 /**
@@ -249,7 +249,7 @@ static inline int __test_and_set_bit(lon
  */
 static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
+	GEN_BINARY_RMWcc_unchecked(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
 }
 
 /**
@@ -302,7 +302,7 @@ static inline int __test_and_change_bit(
  */
 static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
+	GEN_BINARY_RMWcc_unchecked(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
 }
 
 static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
@@ -343,7 +343,7 @@ static int test_bit(int nr, const volati
  *
  * Undefined if no bit exists, so code should check against 0 first.
  */
-static inline unsigned long __ffs(unsigned long word)
+static inline unsigned long __intentional_overflow(-1) __ffs(unsigned long word)
 {
 	asm("rep; bsf %1,%0"
 		: "=r" (word)
@@ -357,7 +357,7 @@ static inline unsigned long __ffs(unsign
  *
  * Undefined if no zero exists, so code should check against ~0UL first.
  */
-static inline unsigned long ffz(unsigned long word)
+static inline unsigned long __intentional_overflow(-1) ffz(unsigned long word)
 {
 	asm("rep; bsf %1,%0"
 		: "=r" (word)
@@ -371,7 +371,7 @@ static inline unsigned long ffz(unsigned
  *
  * Undefined if no set bit exists, so code should check against 0 first.
  */
-static inline unsigned long __fls(unsigned long word)
+static inline unsigned long __intentional_overflow(-1) __fls(unsigned long word)
 {
 	asm("bsr %1,%0"
 	    : "=r" (word)
@@ -434,7 +434,7 @@ static inline int ffs(int x)
  * set bit if value is nonzero. The last (most significant) bit is
  * at position 32.
  */
-static inline int fls(int x)
+static inline int __intentional_overflow(-1) fls(int x)
 {
 	int r;
 
@@ -476,7 +476,7 @@ static inline int fls(int x)
  * at position 64.
  */
 #ifdef CONFIG_X86_64
-static __always_inline int fls64(__u64 x)
+static __always_inline __intentional_overflow(-1) int fls64(__u64 x)
 {
 	int bitpos = -1;
 	/*
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/boot.h linux-4.0.9-pax/arch/x86/include/asm/boot.h
--- linux-4.0.9/arch/x86/include/asm/boot.h	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/boot.h	2015-04-15 12:13:52.902318623 +0200
@@ -6,10 +6,15 @@
 #include <uapi/asm/boot.h>
 
 /* Physical address where kernel should be loaded. */
-#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
+#define ____LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
 				+ (CONFIG_PHYSICAL_ALIGN - 1)) \
 				& ~(CONFIG_PHYSICAL_ALIGN - 1))
 
+#ifndef __ASSEMBLY__
+extern unsigned char __LOAD_PHYSICAL_ADDR[];
+#define LOAD_PHYSICAL_ADDR ((unsigned long)__LOAD_PHYSICAL_ADDR)
+#endif
+
 /* Minimum kernel alignment, as a power of two */
 #ifdef CONFIG_X86_64
 #define MIN_KERNEL_ALIGN_LG2	PMD_SHIFT
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/cache.h linux-4.0.9-pax/arch/x86/include/asm/cache.h
--- linux-4.0.9/arch/x86/include/asm/cache.h	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/cache.h	2015-04-15 12:13:52.902318623 +0200
@@ -5,12 +5,13 @@
 
 /* L1 cache line size */
 #define L1_CACHE_SHIFT	(CONFIG_X86_L1_CACHE_SHIFT)
-#define L1_CACHE_BYTES	(1 << L1_CACHE_SHIFT)
+#define L1_CACHE_BYTES	(_AC(1,UL) << L1_CACHE_SHIFT)
 
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
+#define __read_only __attribute__((__section__(".data..read_only")))
 
 #define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT
-#define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT)
+#define INTERNODE_CACHE_BYTES (_AC(1,UL) << INTERNODE_CACHE_SHIFT)
 
 #ifdef CONFIG_X86_VSMP
 #ifdef CONFIG_SMP
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/calling.h linux-4.0.9-pax/arch/x86/include/asm/calling.h
--- linux-4.0.9/arch/x86/include/asm/calling.h	2015-04-13 11:21:01.622617473 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/calling.h	2015-04-15 12:13:52.902318623 +0200
@@ -82,106 +82,117 @@ For 32-bit we have the following convent
 #define RSP		152
 #define SS		160
 
-#define ARGOFFSET	R11
+#define ARGOFFSET	R15
 
 	.macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
-	subq  $9*8+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	9*8+\addskip
-	movq_cfi rdi, 8*8
-	movq_cfi rsi, 7*8
-	movq_cfi rdx, 6*8
+	subq  $ORIG_RAX-ARGOFFSET+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET	ORIG_RAX-ARGOFFSET+\addskip
+	movq_cfi rdi, RDI
+	movq_cfi rsi, RSI
+	movq_cfi rdx, RDX
 
 	.if \save_rcx
-	movq_cfi rcx, 5*8
+	movq_cfi rcx, RCX
 	.endif
 
 	.if \rax_enosys
-	movq $-ENOSYS, 4*8(%rsp)
+	movq $-ENOSYS, RAX(%rsp)
 	.else
-	movq_cfi rax, 4*8
+	movq_cfi rax, RAX
 	.endif
 
 	.if \save_r891011
-	movq_cfi r8,  3*8
-	movq_cfi r9,  2*8
-	movq_cfi r10, 1*8
-	movq_cfi r11, 0*8
+	movq_cfi r8,  R8
+	movq_cfi r9,  R9
+	movq_cfi r10, R10
+	movq_cfi r11, R11
 	.endif
 
+#ifdef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR
+	movq_cfi r12, R12
+#endif
+
 	.endm
 
-#define ARG_SKIP	(9*8)
+#define ARG_SKIP	ORIG_RAX
 
 	.macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
 			    rstor_r8910=1, rstor_rdx=1
+
+#ifdef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR
+	movq_cfi_restore R12, r12
+#endif
+
 	.if \rstor_r11
-	movq_cfi_restore 0*8, r11
+	movq_cfi_restore R11, r11
 	.endif
 
 	.if \rstor_r8910
-	movq_cfi_restore 1*8, r10
-	movq_cfi_restore 2*8, r9
-	movq_cfi_restore 3*8, r8
+	movq_cfi_restore R10, r10
+	movq_cfi_restore R9, r9
+	movq_cfi_restore R8, r8
 	.endif
 
 	.if \rstor_rax
-	movq_cfi_restore 4*8, rax
+	movq_cfi_restore RAX, rax
 	.endif
 
 	.if \rstor_rcx
-	movq_cfi_restore 5*8, rcx
+	movq_cfi_restore RCX, rcx
 	.endif
 
 	.if \rstor_rdx
-	movq_cfi_restore 6*8, rdx
+	movq_cfi_restore RDX, rdx
 	.endif
 
-	movq_cfi_restore 7*8, rsi
-	movq_cfi_restore 8*8, rdi
+	movq_cfi_restore RSI, rsi
+	movq_cfi_restore RDI, rdi
 
-	.if ARG_SKIP+\addskip > 0
-	addq $ARG_SKIP+\addskip, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(ARG_SKIP+\addskip)
+	.if ORIG_RAX+\addskip > 0
+	addq $ORIG_RAX+\addskip, %rsp
+	CFI_ADJUST_CFA_OFFSET	-(ORIG_RAX+\addskip)
 	.endif
 	.endm
 
-	.macro LOAD_ARGS offset, skiprax=0
-	movq \offset(%rsp),    %r11
-	movq \offset+8(%rsp),  %r10
-	movq \offset+16(%rsp), %r9
-	movq \offset+24(%rsp), %r8
-	movq \offset+40(%rsp), %rcx
-	movq \offset+48(%rsp), %rdx
-	movq \offset+56(%rsp), %rsi
-	movq \offset+64(%rsp), %rdi
+	.macro LOAD_ARGS skiprax=0
+	movq R11(%rsp),    %r11
+	movq R10(%rsp),  %r10
+	movq R9(%rsp), %r9
+	movq R8(%rsp), %r8
+	movq RCX(%rsp), %rcx
+	movq RDX(%rsp), %rdx
+	movq RSI(%rsp), %rsi
+	movq RDI(%rsp), %rdi
 	.if \skiprax
 	.else
-	movq \offset+72(%rsp), %rax
+	movq ORIG_RAX(%rsp), %rax
 	.endif
 	.endm
 
-#define REST_SKIP	(6*8)
-
 	.macro SAVE_REST
-	subq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
-	movq_cfi rbx, 5*8
-	movq_cfi rbp, 4*8
-	movq_cfi r12, 3*8
-	movq_cfi r13, 2*8
-	movq_cfi r14, 1*8
-	movq_cfi r15, 0*8
+	movq_cfi rbx, RBX
+	movq_cfi rbp, RBP
+
+#ifndef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR
+	movq_cfi r12, R12
+#endif
+
+	movq_cfi r13, R13
+	movq_cfi r14, R14
+	movq_cfi r15, R15
 	.endm
 
 	.macro RESTORE_REST
-	movq_cfi_restore 0*8, r15
-	movq_cfi_restore 1*8, r14
-	movq_cfi_restore 2*8, r13
-	movq_cfi_restore 3*8, r12
-	movq_cfi_restore 4*8, rbp
-	movq_cfi_restore 5*8, rbx
-	addq $REST_SKIP, %rsp
-	CFI_ADJUST_CFA_OFFSET	-(REST_SKIP)
+	movq_cfi_restore R15, r15
+	movq_cfi_restore R14, r14
+	movq_cfi_restore R13, r13
+
+#ifndef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR
+	movq_cfi_restore R12, r12
+#endif
+
+	movq_cfi_restore RBP, rbp
+	movq_cfi_restore RBX, rbx
 	.endm
 
 	.macro SAVE_ALL
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/checksum_32.h linux-4.0.9-pax/arch/x86/include/asm/checksum_32.h
--- linux-4.0.9/arch/x86/include/asm/checksum_32.h	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/checksum_32.h	2015-04-15 12:13:52.902318623 +0200
@@ -31,6 +31,14 @@ asmlinkage __wsum csum_partial_copy_gene
 					    int len, __wsum sum,
 					    int *src_err_ptr, int *dst_err_ptr);
 
+asmlinkage __wsum csum_partial_copy_generic_to_user(const void *src, void *dst,
+						  int len, __wsum sum,
+						  int *src_err_ptr, int *dst_err_ptr);
+
+asmlinkage __wsum csum_partial_copy_generic_from_user(const void *src, void *dst,
+						  int len, __wsum sum,
+						  int *src_err_ptr, int *dst_err_ptr);
+
 /*
  *	Note: when you get a NULL pointer exception here this means someone
  *	passed in an incorrect kernel address to one of these functions.
@@ -53,7 +61,7 @@ static inline __wsum csum_partial_copy_f
 
 	might_sleep();
 	stac();
-	ret = csum_partial_copy_generic((__force void *)src, dst,
+	ret = csum_partial_copy_generic_from_user((__force void *)src, dst,
 					len, sum, err_ptr, NULL);
 	clac();
 
@@ -187,7 +195,7 @@ static inline __wsum csum_and_copy_to_us
 	might_sleep();
 	if (access_ok(VERIFY_WRITE, dst, len)) {
 		stac();
-		ret = csum_partial_copy_generic(src, (__force void *)dst,
+		ret = csum_partial_copy_generic_to_user(src, (__force void *)dst,
 						len, sum, NULL, err_ptr);
 		clac();
 		return ret;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/cmpxchg.h linux-4.0.9-pax/arch/x86/include/asm/cmpxchg.h
--- linux-4.0.9/arch/x86/include/asm/cmpxchg.h	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/cmpxchg.h	2015-04-15 12:13:52.902318623 +0200
@@ -16,8 +16,12 @@ extern void __cmpxchg_wrong_size(void)
 	__compiletime_error("Bad argument size for cmpxchg");
 extern void __xadd_wrong_size(void)
 	__compiletime_error("Bad argument size for xadd");
+extern void __xadd_check_overflow_wrong_size(void)
+	__compiletime_error("Bad argument size for xadd_check_overflow");
 extern void __add_wrong_size(void)
 	__compiletime_error("Bad argument size for add");
+extern void __add_check_overflow_wrong_size(void)
+	__compiletime_error("Bad argument size for add_check_overflow");
 
 /*
  * Constants for operation sizes. On 32-bit, the 64-bit size it set to
@@ -69,6 +73,38 @@ extern void __add_wrong_size(void)
 		__ret;							\
 	})
 
+#ifdef CONFIG_PAX_REFCOUNT
+#define __xchg_op_check_overflow(ptr, arg, op, lock)			\
+	({								\
+	        __typeof__ (*(ptr)) __ret = (arg);			\
+		switch (sizeof(*(ptr))) {				\
+		case __X86_CASE_L:					\
+			asm volatile (lock #op "l %0, %1\n"		\
+				      "jno 0f\n"			\
+				      "mov %0,%1\n"			\
+				      "int $4\n0:\n"			\
+				      _ASM_EXTABLE(0b, 0b)		\
+				      : "+r" (__ret), "+m" (*(ptr))	\
+				      : : "memory", "cc");		\
+			break;						\
+		case __X86_CASE_Q:					\
+			asm volatile (lock #op "q %q0, %1\n"		\
+				      "jno 0f\n"			\
+				      "mov %0,%1\n"			\
+				      "int $4\n0:\n"			\
+				      _ASM_EXTABLE(0b, 0b)		\
+				      : "+r" (__ret), "+m" (*(ptr))	\
+				      : : "memory", "cc");		\
+			break;						\
+		default:						\
+			__ ## op ## _check_overflow_wrong_size();	\
+		}							\
+		__ret;							\
+	})
+#else
+#define __xchg_op_check_overflow(ptr, arg, op, lock) __xchg_op(ptr, arg, op, lock)
+#endif
+
 /*
  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
  * Since this is generally used to protect other memory information, we
@@ -167,6 +203,9 @@ extern void __add_wrong_size(void)
 #define xadd_sync(ptr, inc)	__xadd((ptr), (inc), "lock; ")
 #define xadd_local(ptr, inc)	__xadd((ptr), (inc), "")
 
+#define __xadd_check_overflow(ptr, inc, lock)	__xchg_op_check_overflow((ptr), (inc), xadd, lock)
+#define xadd_check_overflow(ptr, inc)		__xadd_check_overflow((ptr), (inc), LOCK_PREFIX)
+
 #define __add(ptr, inc, lock)						\
 	({								\
 	        __typeof__ (*(ptr)) __ret = (inc);			\
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/compat.h linux-4.0.9-pax/arch/x86/include/asm/compat.h
--- linux-4.0.9/arch/x86/include/asm/compat.h	2015-03-18 15:21:50.236349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/compat.h	2015-04-15 12:13:52.902318623 +0200
@@ -41,7 +41,7 @@ typedef s64 __attribute__((aligned(4)))
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64 __attribute__((aligned(4))) compat_u64;
-typedef u32		compat_uptr_t;
+typedef u32		__user compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/cpufeature.h linux-4.0.9-pax/arch/x86/include/asm/cpufeature.h
--- linux-4.0.9/arch/x86/include/asm/cpufeature.h	2015-04-13 11:21:01.622617473 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/cpufeature.h	2015-04-15 12:13:52.902318623 +0200
@@ -213,7 +213,7 @@
 #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
 #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
 #define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
-
+#define X86_FEATURE_STRONGUDEREF (8*32+31) /* PaX PCID based strong UDEREF */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
 #define X86_FEATURE_FSGSBASE	( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
@@ -221,7 +221,7 @@
 #define X86_FEATURE_BMI1	( 9*32+ 3) /* 1st group bit manipulation extensions */
 #define X86_FEATURE_HLE		( 9*32+ 4) /* Hardware Lock Elision */
 #define X86_FEATURE_AVX2	( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP	( 9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_SMEP	( 9*32+ 7) /* Supervisor Mode Execution Prevention */
 #define X86_FEATURE_BMI2	( 9*32+ 8) /* 2nd group bit manipulation extensions */
 #define X86_FEATURE_ERMS	( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
 #define X86_FEATURE_INVPCID	( 9*32+10) /* Invalidate Processor Context ID */
@@ -390,6 +390,7 @@ extern const char * const x86_bug_flags[
 #define cpu_has_eager_fpu	boot_cpu_has(X86_FEATURE_EAGER_FPU)
 #define cpu_has_topoext		boot_cpu_has(X86_FEATURE_TOPOEXT)
 #define cpu_has_bpext		boot_cpu_has(X86_FEATURE_BPEXT)
+#define cpu_has_pcid		boot_cpu_has(X86_FEATURE_PCID)
 
 #if __GNUC__ >= 4
 extern void warn_pre_alternatives(void);
@@ -441,7 +442,8 @@ static __always_inline __pure bool __sta
 
 #ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
 	t_warn:
-		warn_pre_alternatives();
+		if (bit != X86_FEATURE_PCID && bit != X86_FEATURE_INVPCID)
+			warn_pre_alternatives();
 		return false;
 #endif
 
@@ -461,7 +463,7 @@ static __always_inline __pure bool __sta
 			     ".section .discard,\"aw\",@progbits\n"
 			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
 			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
+			     ".section .altinstr_replacement,\"a\"\n"
 			     "3: movb $1,%0\n"
 			     "4:\n"
 			     ".previous\n"
@@ -498,7 +500,7 @@ static __always_inline __pure bool _stat
 			 " .byte 2b - 1b\n"		/* src len */
 			 " .byte 4f - 3f\n"		/* repl len */
 			 ".previous\n"
-			 ".section .altinstr_replacement,\"ax\"\n"
+			 ".section .altinstr_replacement,\"a\"\n"
 			 "3: .byte 0xe9\n .long %l[t_no] - 2b\n"
 			 "4:\n"
 			 ".previous\n"
@@ -531,7 +533,7 @@ static __always_inline __pure bool _stat
 			     ".section .discard,\"aw\",@progbits\n"
 			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
 			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
+			     ".section .altinstr_replacement,\"a\"\n"
 			     "3: movb $0,%0\n"
 			     "4:\n"
 			     ".previous\n"
@@ -545,7 +547,7 @@ static __always_inline __pure bool _stat
 			     ".section .discard,\"aw\",@progbits\n"
 			     " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
 			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
+			     ".section .altinstr_replacement,\"a\"\n"
 			     "5: movb $1,%0\n"
 			     "6:\n"
 			     ".previous\n"
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/desc_defs.h linux-4.0.9-pax/arch/x86/include/asm/desc_defs.h
--- linux-4.0.9/arch/x86/include/asm/desc_defs.h	2015-03-18 15:21:50.240349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/desc_defs.h	2015-04-15 12:13:52.902318623 +0200
@@ -31,6 +31,12 @@ struct desc_struct {
 			unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1;
 			unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8;
 		};
+		struct {
+			u16 offset_low;
+			u16 seg;
+			unsigned reserved: 8, type: 4, s: 1, dpl: 2, p: 1;
+			unsigned offset_high: 16;
+		} gate;
 	};
 } __attribute__((packed));
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/desc.h linux-4.0.9-pax/arch/x86/include/asm/desc.h
--- linux-4.0.9/arch/x86/include/asm/desc.h	2015-03-18 15:21:50.240349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/desc.h	2015-04-15 12:13:52.902318623 +0200
@@ -4,6 +4,7 @@
 #include <asm/desc_defs.h>
 #include <asm/ldt.h>
 #include <asm/mmu.h>
+#include <asm/pgtable.h>
 
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -17,6 +18,7 @@ static inline void fill_ldt(struct desc_
 
 	desc->type		= (info->read_exec_only ^ 1) << 1;
 	desc->type	       |= info->contents << 2;
+	desc->type	       |= info->seg_not_present ^ 1;
 
 	desc->s			= 1;
 	desc->dpl		= 0x3;
@@ -35,19 +37,14 @@ static inline void fill_ldt(struct desc_
 }
 
 extern struct desc_ptr idt_descr;
-extern gate_desc idt_table[];
-extern struct desc_ptr debug_idt_descr;
-extern gate_desc debug_idt_table[];
-
-struct gdt_page {
-	struct desc_struct gdt[GDT_ENTRIES];
-} __attribute__((aligned(PAGE_SIZE)));
-
-DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
+extern gate_desc idt_table[IDT_ENTRIES];
+extern const struct desc_ptr debug_idt_descr;
+extern gate_desc debug_idt_table[IDT_ENTRIES];
 
+extern struct desc_struct cpu_gdt_table[NR_CPUS][PAGE_SIZE / sizeof(struct desc_struct)];
 static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
 {
-	return per_cpu(gdt_page, cpu).gdt;
+	return cpu_gdt_table[cpu];
 }
 
 #ifdef CONFIG_X86_64
@@ -72,8 +69,14 @@ static inline void pack_gate(gate_desc *
 			     unsigned long base, unsigned dpl, unsigned flags,
 			     unsigned short seg)
 {
-	gate->a = (seg << 16) | (base & 0xffff);
-	gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8);
+	gate->gate.offset_low	= base;
+	gate->gate.seg		= seg;
+	gate->gate.reserved	= 0;
+	gate->gate.type		= type;
+	gate->gate.s		= 0;
+	gate->gate.dpl		= dpl;
+	gate->gate.p		= 1;
+	gate->gate.offset_high	= base >> 16;
 }
 
 #endif
@@ -118,12 +121,16 @@ static inline void paravirt_free_ldt(str
 
 static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate)
 {
+	pax_open_kernel();
 	memcpy(&idt[entry], gate, sizeof(*gate));
+	pax_close_kernel();
 }
 
 static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc)
 {
+	pax_open_kernel();
 	memcpy(&ldt[entry], desc, 8);
+	pax_close_kernel();
 }
 
 static inline void
@@ -137,7 +144,9 @@ native_write_gdt_entry(struct desc_struc
 	default:	size = sizeof(*gdt);		break;
 	}
 
+	pax_open_kernel();
 	memcpy(&gdt[entry], desc, size);
+	pax_close_kernel();
 }
 
 static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
@@ -210,7 +219,9 @@ static inline void native_set_ldt(const
 
 static inline void native_load_tr_desc(void)
 {
+	pax_open_kernel();
 	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+	pax_close_kernel();
 }
 
 static inline void native_load_gdt(const struct desc_ptr *dtr)
@@ -247,8 +258,10 @@ static inline void native_load_tls(struc
 	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
 	unsigned int i;
 
+	pax_open_kernel();
 	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
 		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
+	pax_close_kernel();
 }
 
 /* This intentionally ignores lm, since 32-bit apps don't have that field. */
@@ -295,7 +308,7 @@ static inline void load_LDT(mm_context_t
 	preempt_enable();
 }
 
-static inline unsigned long get_desc_base(const struct desc_struct *desc)
+static inline unsigned long __intentional_overflow(-1) get_desc_base(const struct desc_struct *desc)
 {
 	return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
 }
@@ -319,7 +332,7 @@ static inline void set_desc_limit(struct
 }
 
 #ifdef CONFIG_X86_64
-static inline void set_nmi_gate(int gate, void *addr)
+static inline void set_nmi_gate(int gate, const void *addr)
 {
 	gate_desc s;
 
@@ -329,14 +342,14 @@ static inline void set_nmi_gate(int gate
 #endif
 
 #ifdef CONFIG_TRACING
-extern struct desc_ptr trace_idt_descr;
-extern gate_desc trace_idt_table[];
+extern const struct desc_ptr trace_idt_descr;
+extern gate_desc trace_idt_table[IDT_ENTRIES];
 static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
 {
 	write_idt_entry(trace_idt_table, entry, gate);
 }
 
-static inline void _trace_set_gate(int gate, unsigned type, void *addr,
+static inline void _trace_set_gate(int gate, unsigned type, const void *addr,
 				   unsigned dpl, unsigned ist, unsigned seg)
 {
 	gate_desc s;
@@ -356,7 +369,7 @@ static inline void write_trace_idt_entry
 #define _trace_set_gate(gate, type, addr, dpl, ist, seg)
 #endif
 
-static inline void _set_gate(int gate, unsigned type, void *addr,
+static inline void _set_gate(int gate, unsigned type, const void *addr,
 			     unsigned dpl, unsigned ist, unsigned seg)
 {
 	gate_desc s;
@@ -379,9 +392,9 @@ static inline void _set_gate(int gate, u
 #define set_intr_gate(n, addr)						\
 	do {								\
 		BUG_ON((unsigned)n > 0xFF);				\
-		_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0,	\
+		_set_gate(n, GATE_INTERRUPT, (const void *)addr, 0, 0,	\
 			  __KERNEL_CS);					\
-		_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
+		_trace_set_gate(n, GATE_INTERRUPT, (const void *)trace_##addr,\
 				0, 0, __KERNEL_CS);			\
 	} while (0)
 
@@ -409,19 +422,19 @@ static inline void alloc_system_vector(i
 /*
  * This routine sets up an interrupt gate at directory privilege level 3.
  */
-static inline void set_system_intr_gate(unsigned int n, void *addr)
+static inline void set_system_intr_gate(unsigned int n, const void *addr)
 {
 	BUG_ON((unsigned)n > 0xFF);
 	_set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
 }
 
-static inline void set_system_trap_gate(unsigned int n, void *addr)
+static inline void set_system_trap_gate(unsigned int n, const void *addr)
 {
 	BUG_ON((unsigned)n > 0xFF);
 	_set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
 }
 
-static inline void set_trap_gate(unsigned int n, void *addr)
+static inline void set_trap_gate(unsigned int n, const void *addr)
 {
 	BUG_ON((unsigned)n > 0xFF);
 	_set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
@@ -430,16 +443,16 @@ static inline void set_trap_gate(unsigne
 static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
 {
 	BUG_ON((unsigned)n > 0xFF);
-	_set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3));
+	_set_gate(n, GATE_TASK, (const void *)0, 0, 0, (gdt_entry<<3));
 }
 
-static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
+static inline void set_intr_gate_ist(int n, const void *addr, unsigned ist)
 {
 	BUG_ON((unsigned)n > 0xFF);
 	_set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
 }
 
-static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
+static inline void set_system_intr_gate_ist(int n, const void *addr, unsigned ist)
 {
 	BUG_ON((unsigned)n > 0xFF);
 	_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
@@ -511,4 +524,17 @@ static inline void load_current_idt(void
 	else
 		load_idt((const struct desc_ptr *)&idt_descr);
 }
+
+#ifdef CONFIG_X86_32
+static inline void set_user_cs(unsigned long base, unsigned long limit, int cpu)
+{
+	struct desc_struct d;
+
+	if (likely(limit))
+		limit = (limit - 1UL) >> PAGE_SHIFT;
+	pack_descriptor(&d, base, limit, 0xFB, 0xC);
+	write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_DEFAULT_USER_CS, &d, DESCTYPE_S);
+}
+#endif
+
 #endif /* _ASM_X86_DESC_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/div64.h linux-4.0.9-pax/arch/x86/include/asm/div64.h
--- linux-4.0.9/arch/x86/include/asm/div64.h	2015-03-18 15:21:50.240349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/div64.h	2015-04-15 12:13:52.902318623 +0200
@@ -39,7 +39,7 @@
 	__mod;							\
 })
 
-static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+static inline u64 __intentional_overflow(-1) div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
 {
 	union {
 		u64 v64;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/elf.h linux-4.0.9-pax/arch/x86/include/asm/elf.h
--- linux-4.0.9/arch/x86/include/asm/elf.h	2015-03-18 15:21:50.240349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/elf.h	2015-04-15 12:13:52.902318623 +0200
@@ -75,9 +75,6 @@ typedef struct user_fxsr_struct elf_fpxr
 
 #include <asm/vdso.h>
 
-#ifdef CONFIG_X86_64
-extern unsigned int vdso64_enabled;
-#endif
 #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
 extern unsigned int vdso32_enabled;
 #endif
@@ -249,7 +246,25 @@ extern int force_personality32;
    the loader.  We need to make sure that it is out of the way of the program
    that it will "exec", and that there is sufficient room for the brk.  */
 
+#ifdef CONFIG_PAX_SEGMEXEC
+#define ELF_ET_DYN_BASE		((current->mm->pax_flags & MF_PAX_SEGMEXEC) ? SEGMEXEC_TASK_SIZE/3*2 : TASK_SIZE/3*2)
+#else
 #define ELF_ET_DYN_BASE		(TASK_SIZE / 3 * 2)
+#endif
+
+#ifdef CONFIG_PAX_ASLR
+#ifdef CONFIG_X86_32
+#define PAX_ELF_ET_DYN_BASE	0x10000000UL
+
+#define PAX_DELTA_MMAP_LEN	(current->mm->pax_flags & MF_PAX_SEGMEXEC ? 15 : 16)
+#define PAX_DELTA_STACK_LEN	(current->mm->pax_flags & MF_PAX_SEGMEXEC ? 15 : 16)
+#else
+#define PAX_ELF_ET_DYN_BASE	0x400000UL
+
+#define PAX_DELTA_MMAP_LEN	((test_thread_flag(TIF_ADDR32)) ? 16 : TASK_SIZE_MAX_SHIFT - PAGE_SHIFT - 3)
+#define PAX_DELTA_STACK_LEN	((test_thread_flag(TIF_ADDR32)) ? 16 : TASK_SIZE_MAX_SHIFT - PAGE_SHIFT - 3)
+#endif
+#endif
 
 /* This yields a mask that user programs can use to figure out what
    instruction set this CPU supports.  This could be done in user space,
@@ -298,17 +313,13 @@ do {									\
 
 #define ARCH_DLINFO							\
 do {									\
-	if (vdso64_enabled)						\
-		NEW_AUX_ENT(AT_SYSINFO_EHDR,				\
-			    (unsigned long __force)current->mm->context.vdso); \
+	NEW_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso);	\
 } while (0)
 
 /* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */
 #define ARCH_DLINFO_X32							\
 do {									\
-	if (vdso64_enabled)						\
-		NEW_AUX_ENT(AT_SYSINFO_EHDR,				\
-			    (unsigned long __force)current->mm->context.vdso); \
+	NEW_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso);	\
 } while (0)
 
 #define AT_SYSINFO		32
@@ -323,10 +334,10 @@ else									\
 
 #endif /* !CONFIG_X86_32 */
 
-#define VDSO_CURRENT_BASE	((unsigned long)current->mm->context.vdso)
+#define VDSO_CURRENT_BASE	(current->mm->context.vdso)
 
 #define VDSO_ENTRY							\
-	((unsigned long)current->mm->context.vdso +			\
+	(current->mm->context.vdso +					\
 	 selected_vdso32->sym___kernel_vsyscall)
 
 struct linux_binprm;
@@ -338,9 +349,6 @@ extern int compat_arch_setup_additional_
 					      int uses_interp);
 #define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
 
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
 /*
  * True on X86_32 or when emulating IA32 on X86_64
  */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/emergency-restart.h linux-4.0.9-pax/arch/x86/include/asm/emergency-restart.h
--- linux-4.0.9/arch/x86/include/asm/emergency-restart.h	2015-03-18 15:21:50.240349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/emergency-restart.h	2015-04-15 12:13:52.902318623 +0200
@@ -1,6 +1,6 @@
 #ifndef _ASM_X86_EMERGENCY_RESTART_H
 #define _ASM_X86_EMERGENCY_RESTART_H
 
-extern void machine_emergency_restart(void);
+extern void machine_emergency_restart(void) __noreturn;
 
 #endif /* _ASM_X86_EMERGENCY_RESTART_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/fpu-internal.h linux-4.0.9-pax/arch/x86/include/asm/fpu-internal.h
--- linux-4.0.9/arch/x86/include/asm/fpu-internal.h	2015-04-13 11:21:01.622617473 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/fpu-internal.h	2015-04-15 12:13:52.902318623 +0200
@@ -124,8 +124,11 @@ static inline void sanitize_i387_state(s
 #define user_insn(insn, output, input...)				\
 ({									\
 	int err;							\
+	pax_open_userland();						\
 	asm volatile(ASM_STAC "\n"					\
-		     "1:" #insn "\n\t"					\
+		     "1:"						\
+		     __copyuser_seg					\
+		     #insn "\n\t"					\
 		     "2: " ASM_CLAC "\n"				\
 		     ".section .fixup,\"ax\"\n"				\
 		     "3:  movl $-1,%[err]\n"				\
@@ -134,6 +137,7 @@ static inline void sanitize_i387_state(s
 		     _ASM_EXTABLE(1b, 3b)				\
 		     : [err] "=r" (err), output				\
 		     : "0"(0), input);					\
+	pax_close_userland();						\
 	err;								\
 })
 
@@ -300,7 +304,7 @@ static inline int restore_fpu_checking(s
 			"fnclex\n\t"
 			"emms\n\t"
 			"fildl %P[addr]"	/* set F?P to defined value */
-			: : [addr] "m" (tsk->thread.fpu.has_fpu));
+			: : [addr] "m" (init_tss[raw_smp_processor_id()].x86_tss.sp0));
 	}
 
 	return fpu_restore_checking(&tsk->thread.fpu);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/futex.h linux-4.0.9-pax/arch/x86/include/asm/futex.h
--- linux-4.0.9/arch/x86/include/asm/futex.h	2015-03-18 15:21:50.240349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/futex.h	2015-04-15 12:13:52.902318623 +0200
@@ -12,6 +12,7 @@
 #include <asm/smap.h>
 
 #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg)	\
+	typecheck(u32 __user *, uaddr);				\
 	asm volatile("\t" ASM_STAC "\n"				\
 		     "1:\t" insn "\n"				\
 		     "2:\t" ASM_CLAC "\n"			\
@@ -20,15 +21,16 @@
 		     "\tjmp\t2b\n"				\
 		     "\t.previous\n"				\
 		     _ASM_EXTABLE(1b, 3b)			\
-		     : "=r" (oldval), "=r" (ret), "+m" (*uaddr)	\
+		     : "=r" (oldval), "=r" (ret), "+m" (*(u32 __user *)____m(uaddr))	\
 		     : "i" (-EFAULT), "0" (oparg), "1" (0))
 
 #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg)	\
+	typecheck(u32 __user *, uaddr);				\
 	asm volatile("\t" ASM_STAC "\n"				\
 		     "1:\tmovl	%2, %0\n"			\
 		     "\tmovl\t%0, %3\n"				\
 		     "\t" insn "\n"				\
-		     "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"	\
+		     "2:\t" LOCK_PREFIX __copyuser_seg"cmpxchgl %3, %2\n"	\
 		     "\tjnz\t1b\n"				\
 		     "3:\t" ASM_CLAC "\n"			\
 		     "\t.section .fixup,\"ax\"\n"		\
@@ -38,7 +40,7 @@
 		     _ASM_EXTABLE(1b, 4b)			\
 		     _ASM_EXTABLE(2b, 4b)			\
 		     : "=&a" (oldval), "=&r" (ret),		\
-		       "+m" (*uaddr), "=&r" (tem)		\
+		       "+m" (*(u32 __user *)____m(uaddr)), "=&r" (tem)	\
 		     : "r" (oparg), "i" (-EFAULT), "1" (0))
 
 static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
@@ -57,12 +59,13 @@ static inline int futex_atomic_op_inuser
 
 	pagefault_disable();
 
+	pax_open_userland();
 	switch (op) {
 	case FUTEX_OP_SET:
-		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
+		__futex_atomic_op1(__copyuser_seg"xchgl %0, %2", ret, oldval, uaddr, oparg);
 		break;
 	case FUTEX_OP_ADD:
-		__futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
+		__futex_atomic_op1(LOCK_PREFIX __copyuser_seg"xaddl %0, %2", ret, oldval,
 				   uaddr, oparg);
 		break;
 	case FUTEX_OP_OR:
@@ -77,6 +80,7 @@ static inline int futex_atomic_op_inuser
 	default:
 		ret = -ENOSYS;
 	}
+	pax_close_userland();
 
 	pagefault_enable();
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/hw_irq.h linux-4.0.9-pax/arch/x86/include/asm/hw_irq.h
--- linux-4.0.9/arch/x86/include/asm/hw_irq.h	2015-03-18 15:21:50.240349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/hw_irq.h	2015-04-15 12:13:52.902318623 +0200
@@ -160,8 +160,8 @@ static inline void unlock_vector_lock(vo
 #endif	/* CONFIG_X86_LOCAL_APIC */
 
 /* Statistics */
-extern atomic_t irq_err_count;
-extern atomic_t irq_mis_count;
+extern atomic_unchecked_t irq_err_count;
+extern atomic_unchecked_t irq_mis_count;
 
 /* EISA */
 extern void eisa_set_level_irq(unsigned int irq);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/i8259.h linux-4.0.9-pax/arch/x86/include/asm/i8259.h
--- linux-4.0.9/arch/x86/include/asm/i8259.h	2015-03-18 15:21:50.240349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/i8259.h	2015-04-15 12:13:52.902318623 +0200
@@ -62,7 +62,7 @@ struct legacy_pic {
 	void (*init)(int auto_eoi);
 	int (*irq_pending)(unsigned int irq);
 	void (*make_irq)(unsigned int irq);
-};
+} __do_const;
 
 extern struct legacy_pic *legacy_pic;
 extern struct legacy_pic null_legacy_pic;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/io.h linux-4.0.9-pax/arch/x86/include/asm/io.h
--- linux-4.0.9/arch/x86/include/asm/io.h	2015-03-18 15:21:50.240349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/io.h	2015-04-15 12:13:52.902318623 +0200
@@ -52,12 +52,12 @@ static inline void name(type val, volati
 "m" (*(volatile type __force *)addr) barrier); }
 
 build_mmio_read(readb, "b", unsigned char, "=q", :"memory")
-build_mmio_read(readw, "w", unsigned short, "=r", :"memory")
-build_mmio_read(readl, "l", unsigned int, "=r", :"memory")
+build_mmio_read(__intentional_overflow(-1) readw, "w", unsigned short, "=r", :"memory")
+build_mmio_read(__intentional_overflow(-1) readl, "l", unsigned int, "=r", :"memory")
 
 build_mmio_read(__readb, "b", unsigned char, "=q", )
-build_mmio_read(__readw, "w", unsigned short, "=r", )
-build_mmio_read(__readl, "l", unsigned int, "=r", )
+build_mmio_read(__intentional_overflow(-1) __readw, "w", unsigned short, "=r", )
+build_mmio_read(__intentional_overflow(-1) __readl, "l", unsigned int, "=r", )
 
 build_mmio_write(writeb, "b", unsigned char, "q", :"memory")
 build_mmio_write(writew, "w", unsigned short, "r", :"memory")
@@ -189,7 +189,7 @@ static inline void __iomem *ioremap(reso
 	return ioremap_nocache(offset, size);
 }
 
-extern void iounmap(volatile void __iomem *addr);
+extern void iounmap(const volatile void __iomem *addr);
 
 extern void set_iounmap_nonlazy(void);
 
@@ -199,6 +199,17 @@ extern void set_iounmap_nonlazy(void);
 
 #include <linux/vmalloc.h>
 
+#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
+static inline int valid_phys_addr_range(unsigned long addr, size_t count)
+{
+	return ((addr + count + PAGE_SIZE - 1) >> PAGE_SHIFT) < (1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) ? 1 : 0;
+}
+
+static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t count)
+{
+	return (pfn + (count >> PAGE_SHIFT)) < (1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) ? 1 : 0;
+}
+
 /*
  * Convert a virtual cached pointer to an uncached pointer
  */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/irqflags.h linux-4.0.9-pax/arch/x86/include/asm/irqflags.h
--- linux-4.0.9/arch/x86/include/asm/irqflags.h	2015-03-18 15:21:50.240349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/irqflags.h	2015-04-15 12:13:52.902318623 +0200
@@ -141,6 +141,11 @@ static inline notrace unsigned long arch
 	sti;					\
 	sysexit
 
+#define GET_CR0_INTO_RDI		mov %cr0, %rdi
+#define SET_RDI_INTO_CR0		mov %rdi, %cr0
+#define GET_CR3_INTO_RDI		mov %cr3, %rdi
+#define SET_RDI_INTO_CR3		mov %rdi, %cr3
+
 #else
 #define INTERRUPT_RETURN		iret
 #define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/kprobes.h linux-4.0.9-pax/arch/x86/include/asm/kprobes.h
--- linux-4.0.9/arch/x86/include/asm/kprobes.h	2015-03-18 15:21:50.240349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/kprobes.h	2015-04-15 12:13:52.902318623 +0200
@@ -37,13 +37,8 @@ typedef u8 kprobe_opcode_t;
 #define RELATIVEJUMP_SIZE 5
 #define RELATIVECALL_OPCODE 0xe8
 #define RELATIVE_ADDR_SIZE 4
-#define MAX_STACK_SIZE 64
-#define MIN_STACK_SIZE(ADDR)					       \
-	(((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \
-			      THREAD_SIZE - (unsigned long)(ADDR)))    \
-	 ? (MAX_STACK_SIZE)					       \
-	 : (((unsigned long)current_thread_info()) +		       \
-	    THREAD_SIZE - (unsigned long)(ADDR)))
+#define MAX_STACK_SIZE 64UL
+#define MIN_STACK_SIZE(ADDR)	min(MAX_STACK_SIZE, current->thread.sp0 - (unsigned long)(ADDR))
 
 #define flush_insn_slot(p)	do { } while (0)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/local.h linux-4.0.9-pax/arch/x86/include/asm/local.h
--- linux-4.0.9/arch/x86/include/asm/local.h	2015-03-18 15:21:50.240349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/local.h	2015-04-15 12:13:52.902318623 +0200
@@ -10,33 +10,97 @@ typedef struct {
 	atomic_long_t a;
 } local_t;
 
+typedef struct {
+	atomic_long_unchecked_t a;
+} local_unchecked_t;
+
 #define LOCAL_INIT(i)	{ ATOMIC_LONG_INIT(i) }
 
 #define local_read(l)	atomic_long_read(&(l)->a)
+#define local_read_unchecked(l)	atomic_long_read_unchecked(&(l)->a)
 #define local_set(l, i)	atomic_long_set(&(l)->a, (i))
+#define local_set_unchecked(l, i)	atomic_long_set_unchecked(&(l)->a, (i))
 
 static inline void local_inc(local_t *l)
 {
-	asm volatile(_ASM_INC "%0"
+	asm volatile(_ASM_INC "%0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     _ASM_DEC "%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "+m" (l->a.counter));
+}
+
+static inline void local_inc_unchecked(local_unchecked_t *l)
+{
+	asm volatile(_ASM_INC "%0\n"
 		     : "+m" (l->a.counter));
 }
 
 static inline void local_dec(local_t *l)
 {
-	asm volatile(_ASM_DEC "%0"
+	asm volatile(_ASM_DEC "%0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     _ASM_INC "%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "+m" (l->a.counter));
+}
+
+static inline void local_dec_unchecked(local_unchecked_t *l)
+{
+	asm volatile(_ASM_DEC "%0\n"
 		     : "+m" (l->a.counter));
 }
 
 static inline void local_add(long i, local_t *l)
 {
-	asm volatile(_ASM_ADD "%1,%0"
+	asm volatile(_ASM_ADD "%1,%0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     _ASM_SUB "%1,%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "+m" (l->a.counter)
+		     : "ir" (i));
+}
+
+static inline void local_add_unchecked(long i, local_unchecked_t *l)
+{
+	asm volatile(_ASM_ADD "%1,%0\n"
 		     : "+m" (l->a.counter)
 		     : "ir" (i));
 }
 
 static inline void local_sub(long i, local_t *l)
 {
-	asm volatile(_ASM_SUB "%1,%0"
+	asm volatile(_ASM_SUB "%1,%0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     _ASM_ADD "%1,%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "+m" (l->a.counter)
+		     : "ir" (i));
+}
+
+static inline void local_sub_unchecked(long i, local_unchecked_t *l)
+{
+	asm volatile(_ASM_SUB "%1,%0\n"
 		     : "+m" (l->a.counter)
 		     : "ir" (i));
 }
@@ -52,7 +116,7 @@ static inline void local_sub(long i, loc
  */
 static inline int local_sub_and_test(long i, local_t *l)
 {
-	GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e");
+	GEN_BINARY_RMWcc(_ASM_SUB, _ASM_ADD, l->a.counter, "er", i, "%0", "e");
 }
 
 /**
@@ -65,7 +129,7 @@ static inline int local_sub_and_test(lon
  */
 static inline int local_dec_and_test(local_t *l)
 {
-	GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e");
+	GEN_UNARY_RMWcc(_ASM_DEC, _ASM_INC, l->a.counter, "%0", "e");
 }
 
 /**
@@ -78,7 +142,7 @@ static inline int local_dec_and_test(loc
  */
 static inline int local_inc_and_test(local_t *l)
 {
-	GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e");
+	GEN_UNARY_RMWcc(_ASM_INC, _ASM_DEC, l->a.counter, "%0", "e");
 }
 
 /**
@@ -92,7 +156,7 @@ static inline int local_inc_and_test(loc
  */
 static inline int local_add_negative(long i, local_t *l)
 {
-	GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s");
+	GEN_BINARY_RMWcc(_ASM_ADD, _ASM_SUB, l->a.counter, "er", i, "%0", "s");
 }
 
 /**
@@ -105,6 +169,30 @@ static inline int local_add_negative(lon
 static inline long local_add_return(long i, local_t *l)
 {
 	long __i = i;
+	asm volatile(_ASM_XADD "%0, %1\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     _ASM_MOV "%0,%1\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
+		     : "+r" (i), "+m" (l->a.counter)
+		     : : "memory");
+	return i + __i;
+}
+
+/**
+ * local_add_return_unchecked - add and return
+ * @i: integer value to add
+ * @l: pointer to type local_unchecked_t
+ *
+ * Atomically adds @i to @l and returns @i + @l
+ */
+static inline long local_add_return_unchecked(long i, local_unchecked_t *l)
+{
+	long __i = i;
 	asm volatile(_ASM_XADD "%0, %1;"
 		     : "+r" (i), "+m" (l->a.counter)
 		     : : "memory");
@@ -121,6 +209,8 @@ static inline long local_sub_return(long
 
 #define local_cmpxchg(l, o, n) \
 	(cmpxchg_local(&((l)->a.counter), (o), (n)))
+#define local_cmpxchg_unchecked(l, o, n) \
+	(cmpxchg_local(&((l)->a.counter), (o), (n)))
 /* Always has a lock prefix */
 #define local_xchg(l, n) (xchg(&((l)->a.counter), (n)))
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/mman.h linux-4.0.9-pax/arch/x86/include/asm/mman.h
--- linux-4.0.9/arch/x86/include/asm/mman.h	1970-01-01 01:00:00.000000000 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/mman.h	2015-04-15 12:13:52.902318623 +0200
@@ -0,0 +1,15 @@
+#ifndef _X86_MMAN_H
+#define _X86_MMAN_H
+
+#include <uapi/asm/mman.h>
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_X86_32
+#define arch_mmap_check	i386_mmap_check
+int i386_mmap_check(unsigned long addr, unsigned long len, unsigned long flags);
+#endif
+#endif
+#endif
+
+#endif /* X86_MMAN_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/mmu_context.h linux-4.0.9-pax/arch/x86/include/asm/mmu_context.h
--- linux-4.0.9/arch/x86/include/asm/mmu_context.h	2015-04-13 11:21:01.662617471 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/mmu_context.h	2015-06-28 00:12:23.510445227 +0200
@@ -42,6 +42,20 @@ void destroy_context(struct mm_struct *m
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	if (!(static_cpu_has(X86_FEATURE_PCID))) {
+		unsigned int i;
+		pgd_t *pgd;
+
+		pax_open_kernel();
+		pgd = get_cpu_pgd(smp_processor_id(), kernel);
+		for (i = USER_PGD_PTRS; i < 2 * USER_PGD_PTRS; ++i)
+			set_pgd_batched(pgd+i, native_make_pgd(0));
+		pax_close_kernel();
+	}
+#endif
+
 #ifdef CONFIG_SMP
 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
@@ -52,16 +66,59 @@ static inline void switch_mm(struct mm_s
 			     struct task_struct *tsk)
 {
 	unsigned cpu = smp_processor_id();
+#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) && (defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC))
+	int tlbstate = TLBSTATE_OK;
+#endif
 
 	if (likely(prev != next)) {
 #ifdef CONFIG_SMP
+#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC))
+		tlbstate = this_cpu_read(cpu_tlbstate.state);
+#endif
 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 		this_cpu_write(cpu_tlbstate.active_mm, next);
 #endif
 		cpumask_set_cpu(cpu, mm_cpumask(next));
 
 		/* Re-load page tables */
+#ifdef CONFIG_PAX_PER_CPU_PGD
+		pax_open_kernel();
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+		if (static_cpu_has(X86_FEATURE_PCID))
+			__clone_user_pgds(get_cpu_pgd(cpu, user), next->pgd);
+		else
+#endif
+
+		__clone_user_pgds(get_cpu_pgd(cpu, kernel), next->pgd);
+		__shadow_user_pgds(get_cpu_pgd(cpu, kernel) + USER_PGD_PTRS, next->pgd);
+		pax_close_kernel();
+		BUG_ON((__pa(get_cpu_pgd(cpu, kernel)) | PCID_KERNEL) != (read_cr3() & __PHYSICAL_MASK) && (__pa(get_cpu_pgd(cpu, user)) | PCID_USER) != (read_cr3() & __PHYSICAL_MASK));
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+		if (static_cpu_has(X86_FEATURE_PCID)) {
+			if (static_cpu_has(X86_FEATURE_INVPCID)) {
+				u64 descriptor[2];
+				descriptor[0] = PCID_USER;
+				asm volatile(__ASM_INVPCID : : "d"(&descriptor), "a"(INVPCID_SINGLE_CONTEXT) : "memory");
+				if (!static_cpu_has(X86_FEATURE_STRONGUDEREF)) {
+					descriptor[0] = PCID_KERNEL;
+					asm volatile(__ASM_INVPCID : : "d"(&descriptor), "a"(INVPCID_SINGLE_CONTEXT) : "memory");
+				}
+			} else {
+				write_cr3(__pa(get_cpu_pgd(cpu, user)) | PCID_USER);
+				if (static_cpu_has(X86_FEATURE_STRONGUDEREF))
+					write_cr3(__pa(get_cpu_pgd(cpu, kernel)) | PCID_KERNEL | PCID_NOFLUSH);
+				else
+					write_cr3(__pa(get_cpu_pgd(cpu, kernel)) | PCID_KERNEL);
+			}
+		} else
+#endif
+
+			load_cr3(get_cpu_pgd(cpu, kernel));
+#else
 		load_cr3(next->pgd);
+#endif
 		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 
 		/* Stop flush ipis for the previous mm */
@@ -84,9 +141,67 @@ static inline void switch_mm(struct mm_s
 		 */
 		if (unlikely(prev->context.ldt != next->context.ldt))
 			load_LDT_nolock(&next->context);
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP)
+		if (!(__supported_pte_mask & _PAGE_NX)) {
+			smp_mb__before_atomic();
+			cpumask_clear_cpu(cpu, &prev->context.cpu_user_cs_mask);
+			smp_mb__after_atomic();
+			cpumask_set_cpu(cpu, &next->context.cpu_user_cs_mask);
+		}
+#endif
+
+#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC))
+		if (unlikely(prev->context.user_cs_base != next->context.user_cs_base ||
+			     prev->context.user_cs_limit != next->context.user_cs_limit))
+			set_user_cs(next->context.user_cs_base, next->context.user_cs_limit, cpu);
+#ifdef CONFIG_SMP
+		else if (unlikely(tlbstate != TLBSTATE_OK))
+			set_user_cs(next->context.user_cs_base, next->context.user_cs_limit, cpu);
+#endif
+#endif
+
 	}
+	else {
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+		pax_open_kernel();
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+		if (static_cpu_has(X86_FEATURE_PCID))
+			__clone_user_pgds(get_cpu_pgd(cpu, user), next->pgd);
+		else
+#endif
+
+		__clone_user_pgds(get_cpu_pgd(cpu, kernel), next->pgd);
+		__shadow_user_pgds(get_cpu_pgd(cpu, kernel) + USER_PGD_PTRS, next->pgd);
+		pax_close_kernel();
+		BUG_ON((__pa(get_cpu_pgd(cpu, kernel)) | PCID_KERNEL) != (read_cr3() & __PHYSICAL_MASK) && (__pa(get_cpu_pgd(cpu, user)) | PCID_USER) != (read_cr3() & __PHYSICAL_MASK));
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+		if (static_cpu_has(X86_FEATURE_PCID)) {
+			if (static_cpu_has(X86_FEATURE_INVPCID)) {
+				u64 descriptor[2];
+				descriptor[0] = PCID_USER;
+				asm volatile(__ASM_INVPCID : : "d"(&descriptor), "a"(INVPCID_SINGLE_CONTEXT) : "memory");
+				if (!static_cpu_has(X86_FEATURE_STRONGUDEREF)) {
+					descriptor[0] = PCID_KERNEL;
+					asm volatile(__ASM_INVPCID : : "d"(&descriptor), "a"(INVPCID_SINGLE_CONTEXT) : "memory");
+				}
+			} else {
+				write_cr3(__pa(get_cpu_pgd(cpu, user)) | PCID_USER);
+				if (static_cpu_has(X86_FEATURE_STRONGUDEREF))
+					write_cr3(__pa(get_cpu_pgd(cpu, kernel)) | PCID_KERNEL | PCID_NOFLUSH);
+				else
+					write_cr3(__pa(get_cpu_pgd(cpu, kernel)) | PCID_KERNEL);
+			}
+		} else
+#endif
+
+			load_cr3(get_cpu_pgd(cpu, kernel));
+#endif
+
 #ifdef CONFIG_SMP
-	  else {
 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 		BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
 
@@ -103,13 +218,30 @@ static inline void switch_mm(struct mm_s
 			 * tlb flush IPI delivery. We must reload CR3
 			 * to make sure to use no freed page tables.
 			 */
+
+#ifndef CONFIG_PAX_PER_CPU_PGD
 			load_cr3(next->pgd);
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+#endif
+
 			load_mm_cr4(next);
 			load_LDT_nolock(&next->context);
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC)
+			if (!(__supported_pte_mask & _PAGE_NX))
+				cpumask_set_cpu(cpu, &next->context.cpu_user_cs_mask);
+#endif
+
+#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC))
+#ifdef CONFIG_PAX_PAGEEXEC
+			if (!((next->pax_flags & MF_PAX_PAGEEXEC) && (__supported_pte_mask & _PAGE_NX)))
+#endif
+				set_user_cs(next->context.user_cs_base, next->context.user_cs_limit, cpu);
+#endif
+
 		}
-	}
 #endif
+	}
 }
 
 #define activate_mm(prev, next)			\
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/mmu.h linux-4.0.9-pax/arch/x86/include/asm/mmu.h
--- linux-4.0.9/arch/x86/include/asm/mmu.h	2015-04-13 11:21:01.662617471 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/mmu.h	2015-04-15 12:13:52.906318622 +0200
@@ -9,7 +9,7 @@
  * we put the segment information here.
  */
 typedef struct {
-	void *ldt;
+	struct desc_struct *ldt;
 	int size;
 
 #ifdef CONFIG_X86_64
@@ -18,7 +18,19 @@ typedef struct {
 #endif
 
 	struct mutex lock;
-	void __user *vdso;
+	unsigned long vdso;
+
+#ifdef CONFIG_X86_32
+#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)
+	unsigned long user_cs_base;
+	unsigned long user_cs_limit;
+
+#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP)
+	cpumask_t cpu_user_cs_mask;
+#endif
+
+#endif
+#endif
 
 	atomic_t perf_rdpmc_allowed;	/* nonzero if rdpmc is allowed */
 } mm_context_t;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/module.h linux-4.0.9-pax/arch/x86/include/asm/module.h
--- linux-4.0.9/arch/x86/include/asm/module.h	2015-03-18 15:21:50.244349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/module.h	2015-04-15 12:13:52.906318622 +0200
@@ -5,6 +5,7 @@
 
 #ifdef CONFIG_X86_64
 /* X86_64 does not define MODULE_PROC_FAMILY */
+#define MODULE_PROC_FAMILY ""
 #elif defined CONFIG_M486
 #define MODULE_PROC_FAMILY "486 "
 #elif defined CONFIG_M586
@@ -57,8 +58,20 @@
 #error unknown processor family
 #endif
 
-#ifdef CONFIG_X86_32
-# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
+#ifdef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_BTS
+#define MODULE_PAX_KERNEXEC "KERNEXEC_BTS "
+#elif defined(CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR)
+#define MODULE_PAX_KERNEXEC "KERNEXEC_OR "
+#else
+#define MODULE_PAX_KERNEXEC ""
 #endif
 
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+#define MODULE_PAX_UDEREF "UDEREF "
+#else
+#define MODULE_PAX_UDEREF ""
+#endif
+
+#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_PAX_KERNEXEC MODULE_PAX_UDEREF
+
 #endif /* _ASM_X86_MODULE_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/nmi.h linux-4.0.9-pax/arch/x86/include/asm/nmi.h
--- linux-4.0.9/arch/x86/include/asm/nmi.h	2015-03-18 15:21:50.244349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/nmi.h	2015-04-15 12:13:52.906318622 +0200
@@ -36,26 +36,35 @@ enum {
 
 typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *);
 
+struct nmiaction;
+
+struct nmiwork {
+	const struct nmiaction	*action;
+	u64			max_duration;
+	struct irq_work		irq_work;
+};
+
 struct nmiaction {
 	struct list_head	list;
 	nmi_handler_t		handler;
-	u64			max_duration;
-	struct irq_work		irq_work;
 	unsigned long		flags;
 	const char		*name;
-};
+	struct nmiwork		*work;
+} __do_const;
 
 #define register_nmi_handler(t, fn, fg, n, init...)	\
 ({							\
-	static struct nmiaction init fn##_na = {	\
+	static struct nmiwork fn##_nw;			\
+	static const struct nmiaction init fn##_na = {	\
 		.handler = (fn),			\
 		.name = (n),				\
 		.flags = (fg),				\
+		.work = &fn##_nw,			\
 	};						\
 	__register_nmi_handler((t), &fn##_na);		\
 })
 
-int __register_nmi_handler(unsigned int, struct nmiaction *);
+int __register_nmi_handler(unsigned int, const struct nmiaction *);
 
 void unregister_nmi_handler(unsigned int, const char *);
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/page_64.h linux-4.0.9-pax/arch/x86/include/asm/page_64.h
--- linux-4.0.9/arch/x86/include/asm/page_64.h	2015-03-18 15:21:50.244349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/page_64.h	2015-04-15 12:13:52.906318622 +0200
@@ -7,9 +7,9 @@
 
 /* duplicated to the one in bootmem.h */
 extern unsigned long max_pfn;
-extern unsigned long phys_base;
+extern const unsigned long phys_base;
 
-static inline unsigned long __phys_addr_nodebug(unsigned long x)
+static inline unsigned long __intentional_overflow(-1) __phys_addr_nodebug(unsigned long x)
 {
 	unsigned long y = x - __START_KERNEL_map;
 
@@ -20,8 +20,8 @@ static inline unsigned long __phys_addr_
 }
 
 #ifdef CONFIG_DEBUG_VIRTUAL
-extern unsigned long __phys_addr(unsigned long);
-extern unsigned long __phys_addr_symbol(unsigned long);
+extern unsigned long __intentional_overflow(-1) __phys_addr(unsigned long);
+extern unsigned long __intentional_overflow(-1) __phys_addr_symbol(unsigned long);
 #else
 #define __phys_addr(x)		__phys_addr_nodebug(x)
 #define __phys_addr_symbol(x) \
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/page.h linux-4.0.9-pax/arch/x86/include/asm/page.h
--- linux-4.0.9/arch/x86/include/asm/page.h	2015-03-18 15:21:50.244349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/page.h	2015-04-15 12:13:52.906318622 +0200
@@ -52,6 +52,7 @@ static inline void copy_user_page(void *
 	__phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
 
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
+#define __early_va(x)		((void *)((unsigned long)(x)+__START_KERNEL_map - phys_base))
 
 #define __boot_va(x)		__va(x)
 #define __boot_pa(x)		__pa(x)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/paravirt.h linux-4.0.9-pax/arch/x86/include/asm/paravirt.h
--- linux-4.0.9/arch/x86/include/asm/paravirt.h	2015-04-13 11:21:01.662617471 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/paravirt.h	2015-04-15 12:13:52.906318622 +0200
@@ -560,7 +560,7 @@ static inline pmd_t __pmd(pmdval_t val)
 	return (pmd_t) { ret };
 }
 
-static inline pmdval_t pmd_val(pmd_t pmd)
+static inline __intentional_overflow(-1) pmdval_t pmd_val(pmd_t pmd)
 {
 	pmdval_t ret;
 
@@ -626,6 +626,18 @@ static inline void set_pgd(pgd_t *pgdp,
 			    val);
 }
 
+static inline void set_pgd_batched(pgd_t *pgdp, pgd_t pgd)
+{
+	pgdval_t val = native_pgd_val(pgd);
+
+	if (sizeof(pgdval_t) > sizeof(long))
+		PVOP_VCALL3(pv_mmu_ops.set_pgd_batched, pgdp,
+			    val, (u64)val >> 32);
+	else
+		PVOP_VCALL2(pv_mmu_ops.set_pgd_batched, pgdp,
+			    val);
+}
+
 static inline void pgd_clear(pgd_t *pgdp)
 {
 	set_pgd(pgdp, __pgd(0));
@@ -710,6 +722,21 @@ static inline void __set_fixmap(unsigned
 	pv_mmu_ops.set_fixmap(idx, phys, flags);
 }
 
+#ifdef CONFIG_PAX_KERNEXEC
+static inline unsigned long pax_open_kernel(void)
+{
+	return PVOP_CALL0(unsigned long, pv_mmu_ops.pax_open_kernel);
+}
+
+static inline unsigned long pax_close_kernel(void)
+{
+	return PVOP_CALL0(unsigned long, pv_mmu_ops.pax_close_kernel);
+}
+#else
+static inline unsigned long pax_open_kernel(void) { return 0; }
+static inline unsigned long pax_close_kernel(void) { return 0; }
+#endif
+
 #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 
 static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
@@ -906,7 +933,7 @@ extern void default_banner(void);
 
 #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
-#define PARA_INDIRECT(addr)	*%cs:addr
+#define PARA_INDIRECT(addr)	*%ss:addr
 #endif
 
 #define INTERRUPT_RETURN						\
@@ -981,6 +1008,21 @@ extern void default_banner(void);
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),	\
 		  CLBR_NONE,						\
 		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
+
+#define GET_CR0_INTO_RDI				\
+	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0);	\
+	mov %rax,%rdi
+
+#define SET_RDI_INTO_CR0				\
+	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_write_cr0)
+
+#define GET_CR3_INTO_RDI				\
+	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr3);	\
+	mov %rax,%rdi
+
+#define SET_RDI_INTO_CR3				\
+	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_write_cr3)
+
 #endif	/* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/paravirt_types.h linux-4.0.9-pax/arch/x86/include/asm/paravirt_types.h
--- linux-4.0.9/arch/x86/include/asm/paravirt_types.h	2015-03-18 15:21:50.244349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/paravirt_types.h	2015-04-15 12:13:52.906318622 +0200
@@ -84,7 +84,7 @@ struct pv_init_ops {
 	 */
 	unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
 			  unsigned long addr, unsigned len);
-};
+} __no_const;
 
 
 struct pv_lazy_ops {
@@ -98,7 +98,7 @@ struct pv_time_ops {
 	unsigned long long (*sched_clock)(void);
 	unsigned long long (*steal_clock)(int cpu);
 	unsigned long (*get_tsc_khz)(void);
-};
+} __no_const;
 
 struct pv_cpu_ops {
 	/* hooks for various privileged instructions */
@@ -192,7 +192,7 @@ struct pv_cpu_ops {
 
 	void (*start_context_switch)(struct task_struct *prev);
 	void (*end_context_switch)(struct task_struct *next);
-};
+} __no_const;
 
 struct pv_irq_ops {
 	/*
@@ -223,7 +223,7 @@ struct pv_apic_ops {
 				 unsigned long start_eip,
 				 unsigned long start_esp);
 #endif
-};
+} __no_const;
 
 struct pv_mmu_ops {
 	unsigned long (*read_cr2)(void);
@@ -313,6 +313,7 @@ struct pv_mmu_ops {
 	struct paravirt_callee_save make_pud;
 
 	void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
+	void (*set_pgd_batched)(pgd_t *pudp, pgd_t pgdval);
 #endif	/* PAGETABLE_LEVELS == 4 */
 #endif	/* PAGETABLE_LEVELS >= 3 */
 
@@ -324,6 +325,12 @@ struct pv_mmu_ops {
 	   an mfn.  We can tell which is which from the index. */
 	void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
 			   phys_addr_t phys, pgprot_t flags);
+
+#ifdef CONFIG_PAX_KERNEXEC
+	unsigned long (*pax_open_kernel)(void);
+	unsigned long (*pax_close_kernel)(void);
+#endif
+
 };
 
 struct arch_spinlock;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/pgalloc.h linux-4.0.9-pax/arch/x86/include/asm/pgalloc.h
--- linux-4.0.9/arch/x86/include/asm/pgalloc.h	2015-03-18 15:21:50.244349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/pgalloc.h	2015-04-15 12:13:52.906318622 +0200
@@ -63,6 +63,13 @@ static inline void pmd_populate_kernel(s
 				       pmd_t *pmd, pte_t *pte)
 {
 	paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);
+	set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
+}
+
+static inline void pmd_populate_user(struct mm_struct *mm,
+				       pmd_t *pmd, pte_t *pte)
+{
+	paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);
 	set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
 }
 
@@ -108,12 +115,22 @@ static inline void __pmd_free_tlb(struct
 
 #ifdef CONFIG_X86_PAE
 extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
+static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
+{
+	pud_populate(mm, pudp, pmd);
+}
 #else	/* !CONFIG_X86_PAE */
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
 	paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
 	set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
 }
+
+static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
+	set_pud(pud, __pud(_KERNPG_TABLE | __pa(pmd)));
+}
 #endif	/* CONFIG_X86_PAE */
 
 #if PAGETABLE_LEVELS > 3
@@ -123,6 +140,12 @@ static inline void pgd_populate(struct m
 	set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
 }
 
+static inline void pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+	paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
+	set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(pud)));
+}
+
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/pgtable-2level.h linux-4.0.9-pax/arch/x86/include/asm/pgtable-2level.h
--- linux-4.0.9/arch/x86/include/asm/pgtable-2level.h	2015-04-13 11:21:01.662617471 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/pgtable-2level.h	2015-04-15 12:13:52.906318622 +0200
@@ -18,7 +18,9 @@ static inline void native_set_pte(pte_t
 
 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
+	pax_open_kernel();
 	*pmdp = pmd;
+	pax_close_kernel();
 }
 
 static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/pgtable_32.h linux-4.0.9-pax/arch/x86/include/asm/pgtable_32.h
--- linux-4.0.9/arch/x86/include/asm/pgtable_32.h	2015-03-18 15:21:50.244349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/pgtable_32.h	2015-04-15 12:13:52.906318622 +0200
@@ -25,9 +25,6 @@
 struct mm_struct;
 struct vm_area_struct;
 
-extern pgd_t swapper_pg_dir[1024];
-extern pgd_t initial_page_table[1024];
-
 static inline void pgtable_cache_init(void) { }
 static inline void check_pgt_cache(void) { }
 void paging_init(void);
@@ -45,6 +42,12 @@ void paging_init(void);
 # include <asm/pgtable-2level.h>
 #endif
 
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern pgd_t initial_page_table[PTRS_PER_PGD];
+#ifdef CONFIG_X86_PAE
+extern pmd_t swapper_pm_dir[PTRS_PER_PGD][PTRS_PER_PMD];
+#endif
+
 #if defined(CONFIG_HIGHPTE)
 #define pte_offset_map(dir, address)					\
 	((pte_t *)kmap_atomic(pmd_page(*(dir))) +		\
@@ -59,12 +62,17 @@ void paging_init(void);
 /* Clear a kernel PTE and flush it from the TLB */
 #define kpte_clear_flush(ptep, vaddr)		\
 do {						\
+	pax_open_kernel();			\
 	pte_clear(&init_mm, (vaddr), (ptep));	\
+	pax_close_kernel();			\
 	__flush_tlb_one((vaddr));		\
 } while (0)
 
 #endif /* !__ASSEMBLY__ */
 
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
 /*
  * kern_addr_valid() is (1) for FLATMEM and (0) for
  * SPARSEMEM and DISCONTIGMEM
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/pgtable_32_types.h linux-4.0.9-pax/arch/x86/include/asm/pgtable_32_types.h
--- linux-4.0.9/arch/x86/include/asm/pgtable_32_types.h	2015-03-18 15:21:50.244349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/pgtable_32_types.h	2015-04-15 12:13:52.906318622 +0200
@@ -8,7 +8,7 @@
  */
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level_types.h>
-# define PMD_SIZE	(1UL << PMD_SHIFT)
+# define PMD_SIZE	(_AC(1, UL) << PMD_SHIFT)
 # define PMD_MASK	(~(PMD_SIZE - 1))
 #else
 # include <asm/pgtable-2level_types.h>
@@ -46,6 +46,19 @@ extern bool __vmalloc_start_set; /* set
 # define VMALLOC_END	(FIXADDR_START - 2 * PAGE_SIZE)
 #endif
 
+#ifdef CONFIG_PAX_KERNEXEC
+#ifndef __ASSEMBLY__
+extern unsigned char MODULES_EXEC_VADDR[];
+extern unsigned char MODULES_EXEC_END[];
+#endif
+#include <asm/boot.h>
+#define ktla_ktva(addr)		(addr + LOAD_PHYSICAL_ADDR + PAGE_OFFSET)
+#define ktva_ktla(addr)		(addr - LOAD_PHYSICAL_ADDR - PAGE_OFFSET)
+#else
+#define ktla_ktva(addr)		(addr)
+#define ktva_ktla(addr)		(addr)
+#endif
+
 #define MODULES_VADDR	VMALLOC_START
 #define MODULES_END	VMALLOC_END
 #define MODULES_LEN	(MODULES_VADDR - MODULES_END)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/pgtable-3level.h linux-4.0.9-pax/arch/x86/include/asm/pgtable-3level.h
--- linux-4.0.9/arch/x86/include/asm/pgtable-3level.h	2015-04-13 11:21:01.662617471 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/pgtable-3level.h	2015-04-15 12:13:52.906318622 +0200
@@ -92,12 +92,16 @@ static inline void native_set_pte_atomic
 
 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
+	pax_open_kernel();
 	set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd));
+	pax_close_kernel();
 }
 
 static inline void native_set_pud(pud_t *pudp, pud_t pud)
 {
+	pax_open_kernel();
 	set_64bit((unsigned long long *)(pudp), native_pud_val(pud));
+	pax_close_kernel();
 }
 
 /*
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/pgtable_64.h linux-4.0.9-pax/arch/x86/include/asm/pgtable_64.h
--- linux-4.0.9/arch/x86/include/asm/pgtable_64.h	2015-04-13 11:21:01.662617471 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/pgtable_64.h	2015-05-17 22:43:48.913463237 +0200
@@ -16,11 +16,16 @@
 
 extern pud_t level3_kernel_pgt[512];
 extern pud_t level3_ident_pgt[512];
+extern pud_t level3_vmalloc_start_pgt[512];
+extern pud_t level3_vmalloc_end_pgt[512];
+extern pud_t level3_vmemmap_pgt[512];
+extern pud_t level2_vmemmap_pgt[512];
 extern pmd_t level2_kernel_pgt[512];
 extern pmd_t level2_fixmap_pgt[512];
-extern pmd_t level2_ident_pgt[512];
-extern pte_t level1_fixmap_pgt[512];
-extern pgd_t init_level4_pgt[];
+extern pmd_t level2_ident_pgt[2][512];
+extern pte_t level1_fixmap_pgt[3][512];
+extern pte_t level1_vsyscall_pgt[512];
+extern pgd_t init_level4_pgt[512];
 
 #define swapper_pg_dir init_level4_pgt
 
@@ -62,7 +67,9 @@ static inline void native_set_pte_atomic
 
 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
+	pax_open_kernel();
 	*pmdp = pmd;
+	pax_close_kernel();
 }
 
 static inline void native_pmd_clear(pmd_t *pmd)
@@ -98,7 +105,9 @@ static inline pmd_t native_pmdp_get_and_
 
 static inline void native_set_pud(pud_t *pudp, pud_t pud)
 {
+	pax_open_kernel();
 	*pudp = pud;
+	pax_close_kernel();
 }
 
 static inline void native_pud_clear(pud_t *pud)
@@ -108,6 +117,13 @@ static inline void native_pud_clear(pud_
 
 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
+	pax_open_kernel();
+	*pgdp = pgd;
+	pax_close_kernel();
+}
+
+static inline void native_set_pgd_batched(pgd_t *pgdp, pgd_t pgd)
+{
 	*pgdp = pgd;
 }
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/pgtable_64_types.h linux-4.0.9-pax/arch/x86/include/asm/pgtable_64_types.h
--- linux-4.0.9/arch/x86/include/asm/pgtable_64_types.h	2015-03-18 15:21:50.244349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/pgtable_64_types.h	2015-04-15 12:13:52.906318622 +0200
@@ -61,11 +61,16 @@ typedef struct { pteval_t pte; } pte_t;
 #define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
 #define MODULES_END      _AC(0xffffffffff000000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
+#define MODULES_EXEC_VADDR MODULES_VADDR
+#define MODULES_EXEC_END MODULES_END
 #define ESPFIX_PGD_ENTRY _AC(-2, UL)
 #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
 #define EFI_VA_START	 ( -4 * (_AC(1, UL) << 30))
 #define EFI_VA_END	 (-68 * (_AC(1, UL) << 30))
 
+#define ktla_ktva(addr)		(addr)
+#define ktva_ktla(addr)		(addr)
+
 #define EARLY_DYNAMIC_PAGE_TABLES	64
 
 #endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/pgtable.h linux-4.0.9-pax/arch/x86/include/asm/pgtable.h
--- linux-4.0.9/arch/x86/include/asm/pgtable.h	2015-04-13 11:21:01.662617471 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/pgtable.h	2015-05-12 16:11:09.811820581 +0200
@@ -47,6 +47,7 @@ extern struct mm_struct *pgd_page_get_mm
 
 #ifndef __PAGETABLE_PUD_FOLDED
 #define set_pgd(pgdp, pgd)		native_set_pgd(pgdp, pgd)
+#define set_pgd_batched(pgdp, pgd)	native_set_pgd_batched(pgdp, pgd)
 #define pgd_clear(pgd)			native_pgd_clear(pgd)
 #endif
 
@@ -84,12 +85,53 @@ extern struct mm_struct *pgd_page_get_mm
 
 #define arch_end_context_switch(prev)	do {} while(0)
 
+#define pax_open_kernel()	native_pax_open_kernel()
+#define pax_close_kernel()	native_pax_close_kernel()
 #endif	/* CONFIG_PARAVIRT */
 
+#define  __HAVE_ARCH_PAX_OPEN_KERNEL
+#define  __HAVE_ARCH_PAX_CLOSE_KERNEL
+
+#ifdef CONFIG_PAX_KERNEXEC
+static inline unsigned long native_pax_open_kernel(void)
+{
+	unsigned long cr0;
+
+	preempt_disable();
+	barrier();
+	cr0 = read_cr0() ^ X86_CR0_WP;
+	BUG_ON(cr0 & X86_CR0_WP);
+	write_cr0(cr0);
+	barrier();
+	return cr0 ^ X86_CR0_WP;
+}
+
+static inline unsigned long native_pax_close_kernel(void)
+{
+	unsigned long cr0;
+
+	barrier();
+	cr0 = read_cr0() ^ X86_CR0_WP;
+	BUG_ON(!(cr0 & X86_CR0_WP));
+	write_cr0(cr0);
+	barrier();
+	preempt_enable_no_resched();
+	return cr0 ^ X86_CR0_WP;
+}
+#else
+static inline unsigned long native_pax_open_kernel(void) { return 0; }
+static inline unsigned long native_pax_close_kernel(void) { return 0; }
+#endif
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
  */
+static inline int pte_user(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_USER;
+}
+
 static inline int pte_dirty(pte_t pte)
 {
 	return pte_flags(pte) & _PAGE_DIRTY;
@@ -150,6 +192,11 @@ static inline unsigned long pud_pfn(pud_
 	return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
+static inline unsigned long pgd_pfn(pgd_t pgd)
+{
+	return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
 
 static inline int pmd_large(pmd_t pte)
@@ -203,9 +250,29 @@ static inline pte_t pte_wrprotect(pte_t
 	return pte_clear_flags(pte, _PAGE_RW);
 }
 
+static inline pte_t pte_mkread(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_USER);
+}
+
 static inline pte_t pte_mkexec(pte_t pte)
 {
-	return pte_clear_flags(pte, _PAGE_NX);
+#ifdef CONFIG_X86_PAE
+	if (__supported_pte_mask & _PAGE_NX)
+		return pte_clear_flags(pte, _PAGE_NX);
+	else
+#endif
+		return pte_set_flags(pte, _PAGE_USER);
+}
+
+static inline pte_t pte_exprotect(pte_t pte)
+{
+#ifdef CONFIG_X86_PAE
+	if (__supported_pte_mask & _PAGE_NX)
+		return pte_set_flags(pte, _PAGE_NX);
+	else
+#endif
+		return pte_clear_flags(pte, _PAGE_USER);
 }
 
 static inline pte_t pte_mkdirty(pte_t pte)
@@ -420,6 +487,16 @@ pte_t *populate_extra_pte(unsigned long
 #endif
 
 #ifndef __ASSEMBLY__
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+extern pgd_t cpu_pgd[NR_CPUS][2][PTRS_PER_PGD];
+enum cpu_pgd_type {kernel = 0, user = 1};
+static inline pgd_t *get_cpu_pgd(unsigned int cpu, enum cpu_pgd_type type)
+{
+	return cpu_pgd[cpu][type];
+}
+#endif
+
 #include <linux/mm_types.h>
 #include <linux/mmdebug.h>
 #include <linux/log2.h>
@@ -571,7 +648,7 @@ static inline unsigned long pud_page_vad
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pud_page(pud)		pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
+#define pud_page(pud)		pfn_to_page((pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT)
 
 /* Find an entry in the second-level page table.. */
 static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
@@ -611,7 +688,7 @@ static inline unsigned long pgd_page_vad
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pgd_page(pgd)		pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
+#define pgd_page(pgd)		pfn_to_page((pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT)
 
 /* to find an entry in a page-table-directory. */
 static inline unsigned long pud_index(unsigned long address)
@@ -626,7 +703,7 @@ static inline pud_t *pud_offset(pgd_t *p
 
 static inline int pgd_bad(pgd_t pgd)
 {
-	return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
+	return (pgd_flags(pgd) & ~(_PAGE_USER | _PAGE_NX)) != _KERNPG_TABLE;
 }
 
 static inline int pgd_none(pgd_t pgd)
@@ -649,7 +726,12 @@ static inline int pgd_none(pgd_t pgd)
  * pgd_offset() returns a (pgd_t *)
  * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
  */
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+#define pgd_offset_cpu(cpu, type, address) (get_cpu_pgd(cpu, type) + pgd_index(address))
+#endif
+
 /*
  * a shortcut which implies the use of the kernel's pgd, instead
  * of a process's
@@ -660,6 +742,25 @@ static inline int pgd_none(pgd_t pgd)
 #define KERNEL_PGD_BOUNDARY	pgd_index(PAGE_OFFSET)
 #define KERNEL_PGD_PTRS		(PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
 
+#ifdef CONFIG_X86_32
+#define USER_PGD_PTRS		KERNEL_PGD_BOUNDARY
+#else
+#define TASK_SIZE_MAX_SHIFT CONFIG_TASK_SIZE_MAX_SHIFT
+#define USER_PGD_PTRS		(_AC(1,UL) << (TASK_SIZE_MAX_SHIFT - PGDIR_SHIFT))
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+#ifdef __ASSEMBLY__
+#define pax_user_shadow_base	pax_user_shadow_base(%rip)
+#else
+extern unsigned long pax_user_shadow_base;
+extern pgdval_t clone_pgd_mask;
+#endif
+#else
+#define pax_user_shadow_base	(0UL)
+#endif
+
+#endif
+
 #ifndef __ASSEMBLY__
 
 extern int direct_gbpages;
@@ -826,11 +927,24 @@ static inline void pmdp_set_wrprotect(st
  * dst and src can be on the same page, but the range must not overlap,
  * and must not cross a page boundary.
  */
-static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+static inline void clone_pgd_range(pgd_t *dst, const pgd_t *src, int count)
 {
-       memcpy(dst, src, count * sizeof(pgd_t));
+	pax_open_kernel();
+	while (count--)
+		*dst++ = *src++;
+	pax_close_kernel();
 }
 
+#ifdef CONFIG_PAX_PER_CPU_PGD
+extern void __clone_user_pgds(pgd_t *dst, const pgd_t *src);
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+extern void __shadow_user_pgds(pgd_t *dst, const pgd_t *src);
+#else
+static inline void __shadow_user_pgds(pgd_t *dst, const pgd_t *src) {}
+#endif
+
 #define PTE_SHIFT ilog2(PTRS_PER_PTE)
 static inline int page_level_shift(enum pg_level level)
 {
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/pgtable_types.h linux-4.0.9-pax/arch/x86/include/asm/pgtable_types.h
--- linux-4.0.9/arch/x86/include/asm/pgtable_types.h	2015-04-13 11:21:01.666617471 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/pgtable_types.h	2015-04-15 12:13:52.906318622 +0200
@@ -85,8 +85,10 @@
 
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_NX)
-#else
+#elif defined(CONFIG_KMEMCHECK) || defined(CONFIG_MEM_SOFT_DIRTY)
 #define _PAGE_NX	(_AT(pteval_t, 0))
+#else
+#define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
 #endif
 
 #define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
@@ -141,6 +143,9 @@ enum page_cache_mode {
 #define PAGE_READONLY_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
 					 _PAGE_ACCESSED)
 
+#define PAGE_READONLY_NOEXEC PAGE_READONLY
+#define PAGE_SHARED_NOEXEC PAGE_SHARED
+
 #define __PAGE_KERNEL_EXEC						\
 	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
 #define __PAGE_KERNEL		(__PAGE_KERNEL_EXEC | _PAGE_NX)
@@ -148,7 +153,7 @@ enum page_cache_mode {
 #define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
 #define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
 #define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VSYSCALL		(__PAGE_KERNEL_RX | _PAGE_USER)
+#define __PAGE_KERNEL_VSYSCALL		(__PAGE_KERNEL_RO | _PAGE_USER)
 #define __PAGE_KERNEL_VVAR		(__PAGE_KERNEL_RO | _PAGE_USER)
 #define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
 #define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
@@ -194,7 +199,7 @@ enum page_cache_mode {
 #ifdef CONFIG_X86_64
 #define __PAGE_KERNEL_IDENT_LARGE_EXEC	__PAGE_KERNEL_LARGE_EXEC
 #else
-#define PTE_IDENT_ATTR	 0x003		/* PRESENT+RW */
+#define PTE_IDENT_ATTR	 0x063		/* PRESENT+RW+DIRTY+ACCESSED */
 #define PDE_IDENT_ATTR	 0x063		/* PRESENT+RW+DIRTY+ACCESSED */
 #define PGD_IDENT_ATTR	 0x001		/* PRESENT (no other attributes) */
 #endif
@@ -233,7 +238,17 @@ static inline pgdval_t pgd_flags(pgd_t p
 {
 	return native_pgd_val(pgd) & PTE_FLAGS_MASK;
 }
+#endif
 
+#if PAGETABLE_LEVELS == 3
+#include <asm-generic/pgtable-nopud.h>
+#endif
+
+#if PAGETABLE_LEVELS == 2
+#include <asm-generic/pgtable-nopmd.h>
+#endif
+
+#ifndef __ASSEMBLY__
 #if PAGETABLE_LEVELS > 3
 typedef struct { pudval_t pud; } pud_t;
 
@@ -247,8 +262,6 @@ static inline pudval_t native_pud_val(pu
 	return pud.pud;
 }
 #else
-#include <asm-generic/pgtable-nopud.h>
-
 static inline pudval_t native_pud_val(pud_t pud)
 {
 	return native_pgd_val(pud.pgd);
@@ -268,8 +281,6 @@ static inline pmdval_t native_pmd_val(pm
 	return pmd.pmd;
 }
 #else
-#include <asm-generic/pgtable-nopmd.h>
-
 static inline pmdval_t native_pmd_val(pmd_t pmd)
 {
 	return native_pgd_val(pmd.pud.pgd);
@@ -362,7 +373,6 @@ typedef struct page *pgtable_t;
 
 extern pteval_t __supported_pte_mask;
 extern void set_nx(void);
-extern int nx_enabled;
 
 #define pgprot_writecombine	pgprot_writecombine
 extern pgprot_t pgprot_writecombine(pgprot_t prot);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/preempt.h linux-4.0.9-pax/arch/x86/include/asm/preempt.h
--- linux-4.0.9/arch/x86/include/asm/preempt.h	2015-03-18 15:21:50.244349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/preempt.h	2015-04-15 12:13:52.906318622 +0200
@@ -84,7 +84,7 @@ static __always_inline void __preempt_co
  */
 static __always_inline bool __preempt_count_dec_and_test(void)
 {
-	GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
+	GEN_UNARY_RMWcc("decl", "incl", __preempt_count, __percpu_arg(0), "e");
 }
 
 /*
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/processor.h linux-4.0.9-pax/arch/x86/include/asm/processor.h
--- linux-4.0.9/arch/x86/include/asm/processor.h	2015-04-13 11:21:01.678617470 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/processor.h	2015-04-15 12:13:52.906318622 +0200
@@ -198,9 +198,21 @@ static inline void native_cpuid(unsigned
 	    : "memory");
 }
 
+/* invpcid (%rdx),%rax */
+#define __ASM_INVPCID ".byte 0x66,0x0f,0x38,0x82,0x02"
+
+#define INVPCID_SINGLE_ADDRESS	0UL
+#define INVPCID_SINGLE_CONTEXT	1UL
+#define INVPCID_ALL_GLOBAL	2UL
+#define INVPCID_ALL_NONGLOBAL	3UL
+
+#define PCID_KERNEL		0UL
+#define PCID_USER		1UL
+#define PCID_NOFLUSH		(1UL << 63)
+
 static inline void load_cr3(pgd_t *pgdir)
 {
-	write_cr3(__pa(pgdir));
+	write_cr3(__pa(pgdir) | PCID_KERNEL);
 }
 
 #ifdef CONFIG_X86_32
@@ -282,7 +294,7 @@ struct tss_struct {
 
 } ____cacheline_aligned;
 
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss);
+extern struct tss_struct init_tss[NR_CPUS];
 
 /*
  * Save the original ist values for checking stack pointers during debugging
@@ -479,6 +491,7 @@ struct thread_struct {
 	unsigned short		ds;
 	unsigned short		fsindex;
 	unsigned short		gsindex;
+	unsigned short		ss;
 #endif
 #ifdef CONFIG_X86_32
 	unsigned long		ip;
@@ -805,11 +818,18 @@ static inline void spin_lock_prefetch(co
  */
 #define TASK_SIZE		PAGE_OFFSET
 #define TASK_SIZE_MAX		TASK_SIZE
+
+#ifdef CONFIG_PAX_SEGMEXEC
+#define SEGMEXEC_TASK_SIZE	(TASK_SIZE / 2)
+#define STACK_TOP		((current->mm->pax_flags & MF_PAX_SEGMEXEC)?SEGMEXEC_TASK_SIZE:TASK_SIZE)
+#else
 #define STACK_TOP		TASK_SIZE
-#define STACK_TOP_MAX		STACK_TOP
+#endif
+
+#define STACK_TOP_MAX		TASK_SIZE
 
 #define INIT_THREAD  {							  \
-	.sp0			= sizeof(init_stack) + (long)&init_stack, \
+	.sp0			= sizeof(init_stack) + (long)&init_stack - 8, \
 	.vm86_info		= NULL,					  \
 	.sysenter_cs		= __KERNEL_CS,				  \
 	.io_bitmap_ptr		= NULL,					  \
@@ -823,7 +843,7 @@ static inline void spin_lock_prefetch(co
  */
 #define INIT_TSS  {							  \
 	.x86_tss = {							  \
-		.sp0		= sizeof(init_stack) + (long)&init_stack, \
+		.sp0		= sizeof(init_stack) + (long)&init_stack - 8, \
 		.ss0		= __KERNEL_DS,				  \
 		.ss1		= __KERNEL_CS,				  \
 		.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,		  \
@@ -834,11 +854,7 @@ static inline void spin_lock_prefetch(co
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
 #define THREAD_SIZE_LONGS      (THREAD_SIZE/sizeof(unsigned long))
-#define KSTK_TOP(info)                                                 \
-({                                                                     \
-       unsigned long *__ptr = (unsigned long *)(info);                 \
-       (unsigned long)(&__ptr[THREAD_SIZE_LONGS]);                     \
-})
+#define KSTK_TOP(info)         ((container_of(info, struct task_struct, tinfo))->thread.sp0)
 
 /*
  * The below -8 is to reserve 8 bytes on top of the ring0 stack.
@@ -853,7 +869,7 @@ extern unsigned long thread_saved_pc(str
 #define task_pt_regs(task)                                             \
 ({                                                                     \
        struct pt_regs *__regs__;                                       \
-       __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
+       __regs__ = (struct pt_regs *)((task)->thread.sp0);              \
        __regs__ - 1;                                                   \
 })
 
@@ -869,13 +885,13 @@ extern unsigned long thread_saved_pc(str
  * particular problem by preventing anything from being mapped
  * at the maximum canonical address.
  */
-#define TASK_SIZE_MAX	((1UL << 47) - PAGE_SIZE)
+#define TASK_SIZE_MAX	((1UL << TASK_SIZE_MAX_SHIFT) - PAGE_SIZE)
 
 /* This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
 #define IA32_PAGE_OFFSET	((current->personality & ADDR_LIMIT_3GB) ? \
-					0xc0000000 : 0xFFFFe000)
+					0xc0000000 : 0xFFFFf000)
 
 #define TASK_SIZE		(test_thread_flag(TIF_ADDR32) ? \
 					IA32_PAGE_OFFSET : TASK_SIZE_MAX)
@@ -886,11 +902,11 @@ extern unsigned long thread_saved_pc(str
 #define STACK_TOP_MAX		TASK_SIZE_MAX
 
 #define INIT_THREAD  { \
-	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) - 16 \
 }
 
 #define INIT_TSS  { \
-	.x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+	.x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) - 16 \
 }
 
 /*
@@ -918,6 +934,10 @@ extern void start_thread(struct pt_regs
  */
 #define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
 
+#ifdef CONFIG_PAX_SEGMEXEC
+#define SEGMEXEC_TASK_UNMAPPED_BASE	(PAGE_ALIGN(SEGMEXEC_TASK_SIZE / 3))
+#endif
+
 #define KSTK_EIP(task)		(task_pt_regs(task)->ip)
 
 /* Get/set a process' ability to use the timestamp counter instruction */
@@ -962,7 +982,7 @@ static inline uint32_t hypervisor_cpuid_
 	return 0;
 }
 
-extern unsigned long arch_align_stack(unsigned long sp);
+#define arch_align_stack(x) ((x) & ~0xfUL)
 extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
 
 void default_idle(void);
@@ -972,6 +992,6 @@ bool xen_set_default_idle(void);
 #define xen_set_default_idle 0
 #endif
 
-void stop_this_cpu(void *dummy);
+void stop_this_cpu(void *dummy) __noreturn;
 void df_debug(struct pt_regs *regs, long error_code);
 #endif /* _ASM_X86_PROCESSOR_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/ptrace.h linux-4.0.9-pax/arch/x86/include/asm/ptrace.h
--- linux-4.0.9/arch/x86/include/asm/ptrace.h	2015-03-18 15:21:50.244349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/ptrace.h	2015-04-15 12:13:52.906318622 +0200
@@ -89,28 +89,29 @@ static inline unsigned long regs_return_
 }
 
 /*
- * user_mode_vm(regs) determines whether a register set came from user mode.
+ * user_mode(regs) determines whether a register set came from user mode.
  * This is true if V8086 mode was enabled OR if the register set was from
  * protected mode with RPL-3 CS value.  This tricky test checks that with
  * one comparison.  Many places in the kernel can bypass this full check
- * if they have already ruled out V8086 mode, so user_mode(regs) can be used.
+ * if they have already ruled out V8086 mode, so user_mode_novm(regs) can
+ * be used.
  */
-static inline int user_mode(struct pt_regs *regs)
+static inline int user_mode_novm(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
 	return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL;
 #else
-	return !!(regs->cs & 3);
+	return !!(regs->cs & SEGMENT_RPL_MASK);
 #endif
 }
 
-static inline int user_mode_vm(struct pt_regs *regs)
+static inline int user_mode(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
 	return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >=
 		USER_RPL;
 #else
-	return user_mode(regs);
+	return user_mode_novm(regs);
 #endif
 }
 
@@ -126,15 +127,16 @@ static inline int v8086_mode(struct pt_r
 #ifdef CONFIG_X86_64
 static inline bool user_64bit_mode(struct pt_regs *regs)
 {
+	unsigned long cs = regs->cs & 0xffff;
 #ifndef CONFIG_PARAVIRT
 	/*
 	 * On non-paravirt systems, this is the only long mode CPL 3
 	 * selector.  We do not allow long mode selectors in the LDT.
 	 */
-	return regs->cs == __USER_CS;
+	return cs == __USER_CS;
 #else
 	/* Headers are too twisted for this to go in paravirt.h. */
-	return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
+	return cs == __USER_CS || cs == pv_info.extra_user_64bit_cs;
 #endif
 }
 
@@ -185,9 +187,11 @@ static inline unsigned long regs_get_reg
 	 * Traps from the kernel do not save sp and ss.
 	 * Use the helper function to retrieve sp.
 	 */
-	if (offset == offsetof(struct pt_regs, sp) &&
-	    regs->cs == __KERNEL_CS)
-		return kernel_stack_pointer(regs);
+	if (offset == offsetof(struct pt_regs, sp)) {
+		unsigned long cs = regs->cs & 0xffff;
+		if (cs == __KERNEL_CS || cs == __KERNEXEC_KERNEL_CS)
+			return kernel_stack_pointer(regs);
+	}
 #endif
 	return *(unsigned long *)((unsigned long)regs + offset);
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/qrwlock.h linux-4.0.9-pax/arch/x86/include/asm/qrwlock.h
--- linux-4.0.9/arch/x86/include/asm/qrwlock.h	2015-03-18 15:21:50.248349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/qrwlock.h	2015-04-15 12:13:52.906318622 +0200
@@ -7,8 +7,8 @@
 #define queue_write_unlock queue_write_unlock
 static inline void queue_write_unlock(struct qrwlock *lock)
 {
-        barrier();
-        ACCESS_ONCE(*(u8 *)&lock->cnts) = 0;
+	barrier();
+	ACCESS_ONCE_RW(*(u8 *)&lock->cnts) = 0;
 }
 #endif
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/realmode.h linux-4.0.9-pax/arch/x86/include/asm/realmode.h
--- linux-4.0.9/arch/x86/include/asm/realmode.h	2015-03-18 15:21:50.248349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/realmode.h	2015-04-15 12:13:52.906318622 +0200
@@ -22,16 +22,14 @@ struct real_mode_header {
 #endif
 	/* APM/BIOS reboot */
 	u32	machine_real_restart_asm;
-#ifdef CONFIG_X86_64
 	u32	machine_real_restart_seg;
-#endif
 };
 
 /* This must match data at trampoline_32/64.S */
 struct trampoline_header {
 #ifdef CONFIG_X86_32
 	u32 start;
-	u16 gdt_pad;
+	u16 boot_cs;
 	u16 gdt_limit;
 	u32 gdt_base;
 #else
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/reboot.h linux-4.0.9-pax/arch/x86/include/asm/reboot.h
--- linux-4.0.9/arch/x86/include/asm/reboot.h	2015-03-18 15:21:50.248349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/reboot.h	2015-04-15 12:13:52.906318622 +0200
@@ -6,13 +6,13 @@
 struct pt_regs;
 
 struct machine_ops {
-	void (*restart)(char *cmd);
-	void (*halt)(void);
-	void (*power_off)(void);
+	void (* __noreturn restart)(char *cmd);
+	void (* __noreturn halt)(void);
+	void (* __noreturn power_off)(void);
 	void (*shutdown)(void);
 	void (*crash_shutdown)(struct pt_regs *);
-	void (*emergency_restart)(void);
-};
+	void (* __noreturn emergency_restart)(void);
+} __no_const;
 
 extern struct machine_ops machine_ops;
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/rmwcc.h linux-4.0.9-pax/arch/x86/include/asm/rmwcc.h
--- linux-4.0.9/arch/x86/include/asm/rmwcc.h	2015-03-18 15:21:50.248349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/rmwcc.h	2015-04-15 12:13:52.910318622 +0200
@@ -3,7 +3,34 @@
 
 #ifdef CC_HAVE_ASM_GOTO
 
-#define __GEN_RMWcc(fullop, var, cc, ...)				\
+#ifdef CONFIG_PAX_REFCOUNT
+#define __GEN_RMWcc(fullop, fullantiop, var, cc, ...)			\
+do {									\
+	asm_volatile_goto (fullop					\
+			";jno 0f\n"					\
+			fullantiop					\
+			";int $4\n0:\n"					\
+			_ASM_EXTABLE(0b, 0b)				\
+			 ";j" cc " %l[cc_label]"			\
+			: : "m" (var), ## __VA_ARGS__ 			\
+			: "memory" : cc_label);				\
+	return 0;							\
+cc_label:								\
+	return 1;							\
+} while (0)
+#else
+#define __GEN_RMWcc(fullop, fullantiop, var, cc, ...)			\
+do {									\
+	asm_volatile_goto (fullop ";j" cc " %l[cc_label]"		\
+			: : "m" (var), ## __VA_ARGS__ 			\
+			: "memory" : cc_label);				\
+	return 0;							\
+cc_label:								\
+	return 1;							\
+} while (0)
+#endif
+
+#define __GEN_RMWcc_unchecked(fullop, var, cc, ...)			\
 do {									\
 	asm_volatile_goto (fullop "; j" cc " %l[cc_label]"		\
 			: : "m" (var), ## __VA_ARGS__ 			\
@@ -13,15 +40,46 @@ cc_label:								\
 	return 1;							\
 } while (0)
 
-#define GEN_UNARY_RMWcc(op, var, arg0, cc) 				\
-	__GEN_RMWcc(op " " arg0, var, cc)
+#define GEN_UNARY_RMWcc(op, antiop, var, arg0, cc) 			\
+	__GEN_RMWcc(op " " arg0, antiop " " arg0, var, cc)
+
+#define GEN_UNARY_RMWcc_unchecked(op, var, arg0, cc) 			\
+	__GEN_RMWcc_unchecked(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, antiop, var, vcon, val, arg0, cc)		\
+	__GEN_RMWcc(op " %1, " arg0, antiop " %1, " arg0, var, cc, vcon (val))
 
-#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\
-	__GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
+#define GEN_BINARY_RMWcc_unchecked(op, var, vcon, val, arg0, cc)	\
+	__GEN_RMWcc_unchecked(op " %1, " arg0, var, cc, vcon (val))
 
 #else /* !CC_HAVE_ASM_GOTO */
 
-#define __GEN_RMWcc(fullop, var, cc, ...)				\
+#ifdef CONFIG_PAX_REFCOUNT
+#define __GEN_RMWcc(fullop, fullantiop, var, cc, ...)			\
+do {									\
+	char c;								\
+	asm volatile (fullop 						\
+			";jno 0f\n"					\
+			fullantiop					\
+			";int $4\n0:\n"					\
+			_ASM_EXTABLE(0b, 0b)				\
+			"; set" cc " %1"				\
+			: "+m" (var), "=qm" (c)				\
+			: __VA_ARGS__ : "memory");			\
+	return c != 0;							\
+} while (0)
+#else
+#define __GEN_RMWcc(fullop, fullantiop, var, cc, ...)			\
+do {									\
+	char c;								\
+	asm volatile (fullop "; set" cc " %1"				\
+			: "+m" (var), "=qm" (c)				\
+			: __VA_ARGS__ : "memory");			\
+	return c != 0;							\
+} while (0)
+#endif
+
+#define __GEN_RMWcc_unchecked(fullop, var, cc, ...)			\
 do {									\
 	char c;								\
 	asm volatile (fullop "; set" cc " %1"				\
@@ -30,11 +88,17 @@ do {									\
 	return c != 0;							\
 } while (0)
 
-#define GEN_UNARY_RMWcc(op, var, arg0, cc)				\
-	__GEN_RMWcc(op " " arg0, var, cc)
+#define GEN_UNARY_RMWcc(op, antiop, var, arg0, cc)			\
+	__GEN_RMWcc(op " " arg0, antiop " " arg0, var, cc)
+
+#define GEN_UNARY_RMWcc_unchecked(op, var, arg0, cc)			\
+	__GEN_RMWcc_unchecked(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, antiop, var, vcon, val, arg0, cc)		\
+	__GEN_RMWcc(op " %2, " arg0, antiop " %2, " arg0, var, cc, vcon (val))
 
-#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\
-	__GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
+#define GEN_BINARY_RMWcc_unchecked(op, var, vcon, val, arg0, cc)	\
+	__GEN_RMWcc_unchecked(op " %2, " arg0, var, cc, vcon (val))
 
 #endif /* CC_HAVE_ASM_GOTO */
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/rwsem.h linux-4.0.9-pax/arch/x86/include/asm/rwsem.h
--- linux-4.0.9/arch/x86/include/asm/rwsem.h	2015-03-18 15:21:50.248349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/rwsem.h	2015-04-15 12:13:52.910318622 +0200
@@ -64,6 +64,14 @@ static inline void __down_read(struct rw
 {
 	asm volatile("# beginning down_read\n\t"
 		     LOCK_PREFIX _ASM_INC "(%1)\n\t"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     LOCK_PREFIX _ASM_DEC "(%1)\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     /* adds 0x00000001 */
 		     "  jns        1f\n"
 		     "  call call_rwsem_down_read_failed\n"
@@ -85,6 +93,14 @@ static inline int __down_read_trylock(st
 		     "1:\n\t"
 		     "  mov          %1,%2\n\t"
 		     "  add          %3,%2\n\t"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     "sub %3,%2\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     "  jle	     2f\n\t"
 		     LOCK_PREFIX "  cmpxchg  %2,%0\n\t"
 		     "  jnz	     1b\n\t"
@@ -104,6 +120,14 @@ static inline void __down_write_nested(s
 	long tmp;
 	asm volatile("# beginning down_write\n\t"
 		     LOCK_PREFIX "  xadd      %1,(%2)\n\t"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     "mov %1,(%2)\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     /* adds 0xffff0001, returns the old value */
 		     "  test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
 		     /* was the active mask 0 before? */
@@ -155,6 +179,14 @@ static inline void __up_read(struct rw_s
 	long tmp;
 	asm volatile("# beginning __up_read\n\t"
 		     LOCK_PREFIX "  xadd      %1,(%2)\n\t"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     "mov %1,(%2)\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     /* subtracts 1, returns the old value */
 		     "  jns        1f\n\t"
 		     "  call call_rwsem_wake\n" /* expects old value in %edx */
@@ -173,6 +205,14 @@ static inline void __up_write(struct rw_
 	long tmp;
 	asm volatile("# beginning __up_write\n\t"
 		     LOCK_PREFIX "  xadd      %1,(%2)\n\t"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     "mov %1,(%2)\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     /* subtracts 0xffff0001, returns the old value */
 		     "  jns        1f\n\t"
 		     "  call call_rwsem_wake\n" /* expects old value in %edx */
@@ -190,6 +230,14 @@ static inline void __downgrade_write(str
 {
 	asm volatile("# beginning __downgrade_write\n\t"
 		     LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     LOCK_PREFIX _ASM_SUB "%2,(%1)\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     /*
 		      * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
 		      *     0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
@@ -208,7 +256,15 @@ static inline void __downgrade_write(str
  */
 static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
 {
-	asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
+	asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0\n"
+
+#ifdef CONFIG_PAX_REFCOUNT
+		     "jno 0f\n"
+		     LOCK_PREFIX _ASM_SUB "%1,%0\n"
+		     "int $4\n0:\n"
+		     _ASM_EXTABLE(0b, 0b)
+#endif
+
 		     : "+m" (sem->count)
 		     : "er" (delta));
 }
@@ -218,7 +274,7 @@ static inline void rwsem_atomic_add(long
  */
 static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
 {
-	return delta + xadd(&sem->count, delta);
+	return delta + xadd_check_overflow(&sem->count, delta);
 }
 
 #endif /* __KERNEL__ */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/segment.h linux-4.0.9-pax/arch/x86/include/asm/segment.h
--- linux-4.0.9/arch/x86/include/asm/segment.h	2015-06-26 10:29:22.454538574 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/segment.h	2015-06-26 10:29:32.594538551 +0200
@@ -73,10 +73,15 @@
  *  26 - ESPFIX small SS
  *  27 - per-cpu			[ offset to per-cpu data area ]
  *  28 - stack_canary-20		[ for stack protector ]
- *  29 - unused
- *  30 - unused
+ *  29 - PCI BIOS CS
+ *  30 - PCI BIOS DS
  *  31 - TSS for double fault handler
  */
+#define GDT_ENTRY_KERNEXEC_EFI_CS	(1)
+#define GDT_ENTRY_KERNEXEC_EFI_DS	(2)
+#define __KERNEXEC_EFI_CS	(GDT_ENTRY_KERNEXEC_EFI_CS*8)
+#define __KERNEXEC_EFI_DS	(GDT_ENTRY_KERNEXEC_EFI_DS*8)
+
 #define GDT_ENTRY_TLS_MIN	6
 #define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
@@ -88,6 +93,8 @@
 
 #define GDT_ENTRY_KERNEL_CS		(GDT_ENTRY_KERNEL_BASE+0)
 
+#define GDT_ENTRY_KERNEXEC_KERNEL_CS	(4)
+
 #define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE+1)
 
 #define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE+4)
@@ -113,6 +120,12 @@
 #define __KERNEL_STACK_CANARY		0
 #endif
 
+#define GDT_ENTRY_PCIBIOS_CS		(GDT_ENTRY_KERNEL_BASE+17)
+#define __PCIBIOS_CS (GDT_ENTRY_PCIBIOS_CS * 8)
+
+#define GDT_ENTRY_PCIBIOS_DS		(GDT_ENTRY_KERNEL_BASE+18)
+#define __PCIBIOS_DS (GDT_ENTRY_PCIBIOS_DS * 8)
+
 #define GDT_ENTRY_DOUBLEFAULT_TSS	31
 
 /*
@@ -140,7 +153,7 @@
  */
 
 /* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
-#define SEGMENT_IS_PNP_CODE(x)   (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
+#define SEGMENT_IS_PNP_CODE(x)   (((x) & 0xFFFCU) == PNP_CS32 || ((x) & 0xFFFCU) == PNP_CS16)
 
 
 #else
@@ -164,6 +177,8 @@
 #define __USER32_CS   (GDT_ENTRY_DEFAULT_USER32_CS*8+3)
 #define __USER32_DS	__USER_DS
 
+#define GDT_ENTRY_KERNEXEC_KERNEL_CS 7
+
 #define GDT_ENTRY_TSS 8	/* needs two entries */
 #define GDT_ENTRY_LDT 10 /* needs two entries */
 #define GDT_ENTRY_TLS_MIN 12
@@ -172,6 +187,8 @@
 #define GDT_ENTRY_PER_CPU 15	/* Abused to load per CPU data from limit */
 #define __PER_CPU_SEG	(GDT_ENTRY_PER_CPU * 8 + 3)
 
+#define GDT_ENTRY_UDEREF_KERNEL_DS 16
+
 /* TLS indexes for 64bit - hardcoded in arch_prctl */
 #define FS_TLS 0
 #define GS_TLS 1
@@ -179,12 +196,14 @@
 #define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
 #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
 
-#define GDT_ENTRIES 16
+#define GDT_ENTRIES 17
 
 #endif
 
 #define __KERNEL_CS	(GDT_ENTRY_KERNEL_CS*8)
+#define __KERNEXEC_KERNEL_CS	(GDT_ENTRY_KERNEXEC_KERNEL_CS*8)
 #define __KERNEL_DS	(GDT_ENTRY_KERNEL_DS*8)
+#define __UDEREF_KERNEL_DS	(GDT_ENTRY_UDEREF_KERNEL_DS*8)
 #define __USER_DS	(GDT_ENTRY_DEFAULT_USER_DS*8+3)
 #define __USER_CS	(GDT_ENTRY_DEFAULT_USER_CS*8+3)
 #ifndef CONFIG_PARAVIRT
@@ -267,7 +286,7 @@ static inline unsigned long get_limit(un
 {
 	unsigned long __limit;
 	asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
-	return __limit + 1;
+	return __limit;
 }
 
 #endif /* !__ASSEMBLY__ */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/smap.h linux-4.0.9-pax/arch/x86/include/asm/smap.h
--- linux-4.0.9/arch/x86/include/asm/smap.h	2015-03-18 15:21:50.248349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/smap.h	2015-04-15 12:13:52.910318622 +0200
@@ -25,11 +25,40 @@
 
 #include <asm/alternative-asm.h>
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+#define ASM_PAX_OPEN_USERLAND					\
+	661: jmp 663f;						\
+	.pushsection .altinstr_replacement, "a" ;		\
+	662: pushq %rax; nop;					\
+	.popsection ;						\
+	.pushsection .altinstructions, "a" ;			\
+	altinstruction_entry 661b, 662b, X86_FEATURE_STRONGUDEREF, 2, 2;\
+	.popsection ;						\
+	call __pax_open_userland;				\
+	popq %rax;						\
+	663:
+
+#define ASM_PAX_CLOSE_USERLAND					\
+	661: jmp 663f;						\
+	.pushsection .altinstr_replacement, "a" ;		\
+	662: pushq %rax; nop;					\
+	.popsection;						\
+	.pushsection .altinstructions, "a" ;			\
+	altinstruction_entry 661b, 662b, X86_FEATURE_STRONGUDEREF, 2, 2;\
+	.popsection;						\
+	call __pax_close_userland;				\
+	popq %rax;						\
+	663:
+#else
+#define ASM_PAX_OPEN_USERLAND
+#define ASM_PAX_CLOSE_USERLAND
+#endif
+
 #ifdef CONFIG_X86_SMAP
 
 #define ASM_CLAC							\
 	661: ASM_NOP3 ;							\
-	.pushsection .altinstr_replacement, "ax" ;			\
+	.pushsection .altinstr_replacement, "a" ;			\
 	662: __ASM_CLAC ;						\
 	.popsection ;							\
 	.pushsection .altinstructions, "a" ;				\
@@ -38,7 +67,7 @@
 
 #define ASM_STAC							\
 	661: ASM_NOP3 ;							\
-	.pushsection .altinstr_replacement, "ax" ;			\
+	.pushsection .altinstr_replacement, "a" ;			\
 	662: __ASM_STAC ;						\
 	.popsection ;							\
 	.pushsection .altinstructions, "a" ;				\
@@ -56,6 +85,37 @@
 
 #include <asm/alternative.h>
 
+#define __HAVE_ARCH_PAX_OPEN_USERLAND
+#define __HAVE_ARCH_PAX_CLOSE_USERLAND
+
+extern void __pax_open_userland(void);
+static __always_inline unsigned long pax_open_userland(void)
+{
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	asm volatile(ALTERNATIVE(ASM_NOP5, "call %P[open]", X86_FEATURE_STRONGUDEREF)
+		:
+		: [open] "i" (__pax_open_userland)
+		: "memory", "rax");
+#endif
+
+	return 0;
+}
+
+extern void __pax_close_userland(void);
+static __always_inline unsigned long pax_close_userland(void)
+{
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	asm volatile(ALTERNATIVE(ASM_NOP5, "call %P[close]", X86_FEATURE_STRONGUDEREF)
+		:
+		: [close] "i" (__pax_close_userland)
+		: "memory", "rax");
+#endif
+
+	return 0;
+}
+
 #ifdef CONFIG_X86_SMAP
 
 static __always_inline void clac(void)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/smp.h linux-4.0.9-pax/arch/x86/include/asm/smp.h
--- linux-4.0.9/arch/x86/include/asm/smp.h	2015-03-18 15:21:50.248349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/smp.h	2015-04-15 12:13:52.910318622 +0200
@@ -35,7 +35,7 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_
 /* cpus sharing the last level cache: */
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
 DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
-DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
+DECLARE_PER_CPU_READ_MOSTLY(unsigned int, cpu_number);
 
 static inline struct cpumask *cpu_sibling_mask(int cpu)
 {
@@ -78,7 +78,7 @@ struct smp_ops {
 
 	void (*send_call_func_ipi)(const struct cpumask *mask);
 	void (*send_call_func_single_ipi)(int cpu);
-};
+} __no_const;
 
 /* Globals due to paravirt */
 extern void set_cpu_sibling_map(int cpu);
@@ -191,14 +191,8 @@ extern unsigned disabled_cpus;
 extern int safe_smp_processor_id(void);
 
 #elif defined(CONFIG_X86_64_SMP)
-#define raw_smp_processor_id() (this_cpu_read(cpu_number))
-
-#define stack_smp_processor_id()					\
-({								\
-	struct thread_info *ti;						\
-	__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));	\
-	ti->cpu;							\
-})
+#define raw_smp_processor_id()		(this_cpu_read(cpu_number))
+#define stack_smp_processor_id()	raw_smp_processor_id()
 #define safe_smp_processor_id()		smp_processor_id()
 
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/stackprotector.h linux-4.0.9-pax/arch/x86/include/asm/stackprotector.h
--- linux-4.0.9/arch/x86/include/asm/stackprotector.h	2015-03-18 15:21:50.248349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/stackprotector.h	2015-04-15 12:13:52.910318622 +0200
@@ -47,7 +47,7 @@
  * head_32 for boot CPU and setup_per_cpu_areas() for others.
  */
 #define GDT_STACK_CANARY_INIT						\
-	[GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18),
+	[GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x17),
 
 /*
  * Initialize the stackprotector canary value.
@@ -112,7 +112,7 @@ static inline void setup_stack_canary_se
 
 static inline void load_stack_canary_segment(void)
 {
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_32) && !defined(CONFIG_PAX_MEMORY_UDEREF)
 	asm volatile ("mov %0, %%gs" : : "r" (0));
 #endif
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/stacktrace.h linux-4.0.9-pax/arch/x86/include/asm/stacktrace.h
--- linux-4.0.9/arch/x86/include/asm/stacktrace.h	2015-03-18 15:21:50.248349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/stacktrace.h	2015-04-15 12:13:52.910318622 +0200
@@ -11,28 +11,20 @@
 
 extern int kstack_depth_to_print;
 
-struct thread_info;
+struct task_struct;
 struct stacktrace_ops;
 
-typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo,
-				      unsigned long *stack,
-				      unsigned long bp,
-				      const struct stacktrace_ops *ops,
-				      void *data,
-				      unsigned long *end,
-				      int *graph);
-
-extern unsigned long
-print_context_stack(struct thread_info *tinfo,
-		    unsigned long *stack, unsigned long bp,
-		    const struct stacktrace_ops *ops, void *data,
-		    unsigned long *end, int *graph);
-
-extern unsigned long
-print_context_stack_bp(struct thread_info *tinfo,
-		       unsigned long *stack, unsigned long bp,
-		       const struct stacktrace_ops *ops, void *data,
-		       unsigned long *end, int *graph);
+typedef unsigned long walk_stack_t(struct task_struct *task,
+				   void *stack_start,
+				   unsigned long *stack,
+				   unsigned long bp,
+				   const struct stacktrace_ops *ops,
+				   void *data,
+				   unsigned long *end,
+				   int *graph);
+
+extern walk_stack_t print_context_stack;
+extern walk_stack_t print_context_stack_bp;
 
 /* Generic stack tracer with callbacks */
 
@@ -40,7 +32,7 @@ struct stacktrace_ops {
 	void (*address)(void *data, unsigned long address, int reliable);
 	/* On negative return stop dumping */
 	int (*stack)(void *data, char *name);
-	walk_stack_t	walk_stack;
+	walk_stack_t	*walk_stack;
 };
 
 void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/switch_to.h linux-4.0.9-pax/arch/x86/include/asm/switch_to.h
--- linux-4.0.9/arch/x86/include/asm/switch_to.h	2015-03-18 15:21:50.248349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/switch_to.h	2015-04-15 12:13:52.910318622 +0200
@@ -112,7 +112,7 @@ do {									\
 	     "call __switch_to\n\t"					  \
 	     "movq "__percpu_arg([current_task])",%%rsi\n\t"		  \
 	     __switch_canary						  \
-	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
+	     "movq "__percpu_arg([thread_info])",%%r8\n\t"		  \
 	     "movq %%rax,%%rdi\n\t" 					  \
 	     "testl  %[_tif_fork],%P[ti_flags](%%r8)\n\t"		  \
 	     "jnz   ret_from_fork\n\t"					  \
@@ -123,7 +123,7 @@ do {									\
 	       [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
 	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
 	       [_tif_fork] "i" (_TIF_FORK),			  	  \
-	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
+	       [thread_info] "m" (current_tinfo),			  \
 	       [current_task] "m" (current_task)			  \
 	       __switch_canary_iparam					  \
 	     : "memory", "cc" __EXTRA_CLOBBER)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/thread_info.h linux-4.0.9-pax/arch/x86/include/asm/thread_info.h
--- linux-4.0.9/arch/x86/include/asm/thread_info.h	2015-04-13 11:21:01.686617470 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/thread_info.h	2015-04-15 12:13:52.910318622 +0200
@@ -24,7 +24,6 @@ struct exec_domain;
 #include <linux/atomic.h>
 
 struct thread_info {
-	struct task_struct	*task;		/* main task structure */
 	struct exec_domain	*exec_domain;	/* execution domain */
 	__u32			flags;		/* low level flags */
 	__u32			status;		/* thread synchronous flags */
@@ -32,13 +31,13 @@ struct thread_info {
 	int			saved_preempt_count;
 	mm_segment_t		addr_limit;
 	void __user		*sysenter_return;
+	unsigned long		lowest_stack;
 	unsigned int		sig_on_uaccess_error:1;
 	unsigned int		uaccess_err:1;	/* uaccess failed */
 };
 
-#define INIT_THREAD_INFO(tsk)			\
+#define INIT_THREAD_INFO			\
 {						\
-	.task		= &tsk,			\
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
@@ -46,7 +45,7 @@ struct thread_info {
 	.addr_limit	= KERNEL_DS,		\
 }
 
-#define init_thread_info	(init_thread_union.thread_info)
+#define init_thread_info	(init_thread_union.stack)
 #define init_stack		(init_thread_union.stack)
 
 #else /* !__ASSEMBLY__ */
@@ -145,7 +144,6 @@ struct thread_info {
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
 
 #define STACK_WARN		(THREAD_SIZE/8)
-#define KERNEL_STACK_OFFSET	(5*(BITS_PER_LONG/8))
 
 /*
  * macros/functions for gaining access to the thread information structure
@@ -156,12 +154,11 @@ struct thread_info {
 
 DECLARE_PER_CPU(unsigned long, kernel_stack);
 
+DECLARE_PER_CPU(struct thread_info *, current_tinfo);
+
 static inline struct thread_info *current_thread_info(void)
 {
-	struct thread_info *ti;
-	ti = (void *)(this_cpu_read_stable(kernel_stack) +
-		      KERNEL_STACK_OFFSET - THREAD_SIZE);
-	return ti;
+	return this_cpu_read_stable(current_tinfo);
 }
 
 static inline unsigned long current_stack_pointer(void)
@@ -179,14 +176,7 @@ static inline unsigned long current_stac
 
 /* how to get the thread information struct from ASM */
 #define GET_THREAD_INFO(reg) \
-	_ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
-	_ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;
-
-/*
- * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
- * a certain register (to be used in assembler memory operands).
- */
-#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
+	_ASM_MOV PER_CPU_VAR(current_tinfo),reg ;
 
 #endif
 
@@ -242,5 +232,12 @@ static inline bool is_ia32_task(void)
 extern void arch_task_cache_init(void);
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 extern void arch_release_task_struct(struct task_struct *tsk);
+
+#define __HAVE_THREAD_FUNCTIONS
+#define task_thread_info(task)	(&(task)->tinfo)
+#define task_stack_page(task)	((task)->stack)
+#define setup_thread_stack(p, org) do {} while (0)
+#define end_of_stack(p) ((unsigned long *)task_stack_page(p) + 1)
+
 #endif
 #endif /* _ASM_X86_THREAD_INFO_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/tlbflush.h linux-4.0.9-pax/arch/x86/include/asm/tlbflush.h
--- linux-4.0.9/arch/x86/include/asm/tlbflush.h	2015-04-13 11:21:01.686617470 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/tlbflush.h	2015-04-15 12:13:52.910318622 +0200
@@ -86,18 +86,44 @@ static inline void cr4_set_bits_and_upda
 
 static inline void __native_flush_tlb(void)
 {
+	if (static_cpu_has(X86_FEATURE_INVPCID)) {
+		u64 descriptor[2];
+
+		descriptor[0] = PCID_KERNEL;
+		asm volatile(__ASM_INVPCID : : "d"(&descriptor), "a"(INVPCID_ALL_NONGLOBAL) : "memory");
+		return;
+	}
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	if (static_cpu_has(X86_FEATURE_PCID)) {
+		unsigned int cpu = raw_get_cpu();
+
+		native_write_cr3(__pa(get_cpu_pgd(cpu, user)) | PCID_USER);
+		native_write_cr3(__pa(get_cpu_pgd(cpu, kernel)) | PCID_KERNEL);
+		raw_put_cpu_no_resched();
+		return;
+	}
+#endif
+
 	native_write_cr3(native_read_cr3());
 }
 
 static inline void __native_flush_tlb_global_irq_disabled(void)
 {
-	unsigned long cr4;
+	if (static_cpu_has(X86_FEATURE_INVPCID)) {
+		u64 descriptor[2];
 
-	cr4 = this_cpu_read(cpu_tlbstate.cr4);
-	/* clear PGE */
-	native_write_cr4(cr4 & ~X86_CR4_PGE);
-	/* write old PGE again and flush TLBs */
-	native_write_cr4(cr4);
+		descriptor[0] = PCID_KERNEL;
+		asm volatile(__ASM_INVPCID : : "d"(&descriptor), "a"(INVPCID_ALL_GLOBAL) : "memory");
+	} else {
+		unsigned long cr4;
+
+		cr4 = this_cpu_read(cpu_tlbstate.cr4);
+		/* clear PGE */
+		native_write_cr4(cr4 & ~X86_CR4_PGE);
+		/* write old PGE again and flush TLBs */
+		native_write_cr4(cr4);
+	}
 }
 
 static inline void __native_flush_tlb_global(void)
@@ -118,6 +144,41 @@ static inline void __native_flush_tlb_gl
 
 static inline void __native_flush_tlb_single(unsigned long addr)
 {
+	if (static_cpu_has(X86_FEATURE_INVPCID)) {
+		u64 descriptor[2];
+
+		descriptor[0] = PCID_KERNEL;
+		descriptor[1] = addr;
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+		if (!static_cpu_has(X86_FEATURE_STRONGUDEREF) || addr >= TASK_SIZE_MAX) {
+			if (addr < TASK_SIZE_MAX)
+				descriptor[1] += pax_user_shadow_base;
+			asm volatile(__ASM_INVPCID : : "d"(&descriptor), "a"(INVPCID_SINGLE_ADDRESS) : "memory");
+		}
+
+		descriptor[0] = PCID_USER;
+		descriptor[1] = addr;
+#endif
+
+		asm volatile(__ASM_INVPCID : : "d"(&descriptor), "a"(INVPCID_SINGLE_ADDRESS) : "memory");
+		return;
+	}
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	if (static_cpu_has(X86_FEATURE_PCID)) {
+		unsigned int cpu = raw_get_cpu();
+
+		native_write_cr3(__pa(get_cpu_pgd(cpu, user)) | PCID_USER | PCID_NOFLUSH);
+		asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+		native_write_cr3(__pa(get_cpu_pgd(cpu, kernel)) | PCID_KERNEL | PCID_NOFLUSH);
+		raw_put_cpu_no_resched();
+
+		if (!static_cpu_has(X86_FEATURE_STRONGUDEREF) && addr < TASK_SIZE_MAX)
+			addr += pax_user_shadow_base;
+	}
+#endif
+
 	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
 }
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/uaccess_32.h linux-4.0.9-pax/arch/x86/include/asm/uaccess_32.h
--- linux-4.0.9/arch/x86/include/asm/uaccess_32.h	2015-03-18 15:21:50.248349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/uaccess_32.h	2015-04-15 12:13:52.910318622 +0200
@@ -40,9 +40,14 @@ unsigned long __must_check __copy_from_u
  * anything, so this is accurate.
  */
 
-static __always_inline unsigned long __must_check
+static __always_inline __size_overflow(3) unsigned long __must_check
 __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 {
+	if ((long)n < 0)
+		return n;
+
+	check_object_size(from, n, true);
+
 	if (__builtin_constant_p(n)) {
 		unsigned long ret;
 
@@ -82,12 +87,16 @@ static __always_inline unsigned long __m
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	might_fault();
+
 	return __copy_to_user_inatomic(to, from, n);
 }
 
-static __always_inline unsigned long
+static __always_inline __size_overflow(3) unsigned long
 __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 {
+	if ((long)n < 0)
+		return n;
+
 	/* Avoid zeroing the tail if the copy fails..
 	 * If 'n' is constant and 1, 2, or 4, we do still zero on a failure,
 	 * but as the zeroing behaviour is only significant when n is not
@@ -137,6 +146,12 @@ static __always_inline unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	might_fault();
+
+	if ((long)n < 0)
+		return n;
+
+	check_object_size(to, n, false);
+
 	if (__builtin_constant_p(n)) {
 		unsigned long ret;
 
@@ -159,6 +174,10 @@ static __always_inline unsigned long __c
 				const void __user *from, unsigned long n)
 {
 	might_fault();
+
+	if ((long)n < 0)
+		return n;
+
 	if (__builtin_constant_p(n)) {
 		unsigned long ret;
 
@@ -181,7 +200,10 @@ static __always_inline unsigned long
 __copy_from_user_inatomic_nocache(void *to, const void __user *from,
 				  unsigned long n)
 {
-       return __copy_from_user_ll_nocache_nozero(to, from, n);
+	if ((long)n < 0)
+		return n;
+
+	return __copy_from_user_ll_nocache_nozero(to, from, n);
 }
 
 #endif /* _ASM_X86_UACCESS_32_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/uaccess_64.h linux-4.0.9-pax/arch/x86/include/asm/uaccess_64.h
--- linux-4.0.9/arch/x86/include/asm/uaccess_64.h	2015-04-13 11:21:01.686617470 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/uaccess_64.h	2015-04-15 12:19:32.850300472 +0200
@@ -10,6 +10,9 @@
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
 #include <asm/page.h>
+#include <asm/pgtable.h>
+
+#define set_fs(x)	(current_thread_info()->addr_limit = (x))
 
 /*
  * Copy To/From Userspace
@@ -23,8 +26,8 @@ copy_user_generic_string(void *to, const
 __must_check unsigned long
 copy_user_generic_unrolled(void *to, const void *from, unsigned len);
 
-static __always_inline __must_check unsigned long
-copy_user_generic(void *to, const void *from, unsigned len)
+static __always_inline __must_check __size_overflow(3) unsigned long
+copy_user_generic(void *to, const void *from, unsigned long len)
 {
 	unsigned ret;
 
@@ -46,121 +49,170 @@ copy_user_generic(void *to, const void *
 }
 
 __must_check unsigned long
-copy_in_user(void __user *to, const void __user *from, unsigned len);
+copy_in_user(void __user *to, const void __user *from, unsigned long len);
 
 static __always_inline __must_check
-int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
+unsigned long __copy_from_user_nocheck(void *dst, const void __user *src, unsigned long size)
 {
-	int ret = 0;
+	size_t sz = __compiletime_object_size(dst);
+	unsigned ret = 0;
+
+	if (size > INT_MAX)
+		return size;
+
+	check_object_size(dst, size, false);
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	if (!access_ok_noprefault(VERIFY_READ, src, size))
+		return size;
+#endif
+
+	if (unlikely(sz != (size_t)-1 && sz < size)) {
+		 if(__builtin_constant_p(size))
+			copy_from_user_overflow();
+		else
+			__copy_from_user_overflow(sz, size);
+		return size;
+	}
 
 	if (!__builtin_constant_p(size))
-		return copy_user_generic(dst, (__force void *)src, size);
+		return copy_user_generic(dst, (__force_kernel const void *)____m(src), size);
 	switch (size) {
-	case 1:__get_user_asm(*(u8 *)dst, (u8 __user *)src,
+	case 1:__get_user_asm(*(u8 *)dst, (const u8 __user *)src,
 			      ret, "b", "b", "=q", 1);
 		return ret;
-	case 2:__get_user_asm(*(u16 *)dst, (u16 __user *)src,
+	case 2:__get_user_asm(*(u16 *)dst, (const u16 __user *)src,
 			      ret, "w", "w", "=r", 2);
 		return ret;
-	case 4:__get_user_asm(*(u32 *)dst, (u32 __user *)src,
+	case 4:__get_user_asm(*(u32 *)dst, (const u32 __user *)src,
 			      ret, "l", "k", "=r", 4);
 		return ret;
-	case 8:__get_user_asm(*(u64 *)dst, (u64 __user *)src,
+	case 8:__get_user_asm(*(u64 *)dst, (const u64 __user *)src,
 			      ret, "q", "", "=r", 8);
 		return ret;
 	case 10:
-		__get_user_asm(*(u64 *)dst, (u64 __user *)src,
+		__get_user_asm(*(u64 *)dst, (const u64 __user *)src,
 			       ret, "q", "", "=r", 10);
 		if (unlikely(ret))
 			return ret;
 		__get_user_asm(*(u16 *)(8 + (char *)dst),
-			       (u16 __user *)(8 + (char __user *)src),
+			       (const u16 __user *)(8 + (const char __user *)src),
 			       ret, "w", "w", "=r", 2);
 		return ret;
 	case 16:
-		__get_user_asm(*(u64 *)dst, (u64 __user *)src,
+		__get_user_asm(*(u64 *)dst, (const u64 __user *)src,
 			       ret, "q", "", "=r", 16);
 		if (unlikely(ret))
 			return ret;
 		__get_user_asm(*(u64 *)(8 + (char *)dst),
-			       (u64 __user *)(8 + (char __user *)src),
+			       (const u64 __user *)(8 + (const char __user *)src),
 			       ret, "q", "", "=r", 8);
 		return ret;
 	default:
-		return copy_user_generic(dst, (__force void *)src, size);
+		return copy_user_generic(dst, (__force_kernel const void *)____m(src), size);
 	}
 }
 
 static __always_inline __must_check
-int __copy_from_user(void *dst, const void __user *src, unsigned size)
+unsigned long __copy_from_user(void *dst, const void __user *src, unsigned long size)
 {
 	might_fault();
 	return __copy_from_user_nocheck(dst, src, size);
 }
 
 static __always_inline __must_check
-int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size)
+unsigned long __copy_to_user_nocheck(void __user *dst, const void *src, unsigned long size)
 {
-	int ret = 0;
+	size_t sz = __compiletime_object_size(src);
+	unsigned ret = 0;
+
+	if (size > INT_MAX)
+		return size;
+
+	check_object_size(src, size, true);
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	if (!access_ok_noprefault(VERIFY_WRITE, dst, size))
+		return size;
+#endif
+
+	if (unlikely(sz != (size_t)-1 && sz < size)) {
+		 if(__builtin_constant_p(size))
+			copy_to_user_overflow();
+		else
+			__copy_to_user_overflow(sz, size);
+		return size;
+	}
 
 	if (!__builtin_constant_p(size))
-		return copy_user_generic((__force void *)dst, src, size);
+		return copy_user_generic((__force_kernel void *)____m(dst), src, size);
 	switch (size) {
-	case 1:__put_user_asm(*(u8 *)src, (u8 __user *)dst,
+	case 1:__put_user_asm(*(const u8 *)src, (u8 __user *)dst,
 			      ret, "b", "b", "iq", 1);
 		return ret;
-	case 2:__put_user_asm(*(u16 *)src, (u16 __user *)dst,
+	case 2:__put_user_asm(*(const u16 *)src, (u16 __user *)dst,
 			      ret, "w", "w", "ir", 2);
 		return ret;
-	case 4:__put_user_asm(*(u32 *)src, (u32 __user *)dst,
+	case 4:__put_user_asm(*(const u32 *)src, (u32 __user *)dst,
 			      ret, "l", "k", "ir", 4);
 		return ret;
-	case 8:__put_user_asm(*(u64 *)src, (u64 __user *)dst,
+	case 8:__put_user_asm(*(const u64 *)src, (u64 __user *)dst,
 			      ret, "q", "", "er", 8);
 		return ret;
 	case 10:
-		__put_user_asm(*(u64 *)src, (u64 __user *)dst,
+		__put_user_asm(*(const u64 *)src, (u64 __user *)dst,
 			       ret, "q", "", "er", 10);
 		if (unlikely(ret))
 			return ret;
 		asm("":::"memory");
-		__put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst,
+		__put_user_asm(4[(const u16 *)src], 4 + (u16 __user *)dst,
 			       ret, "w", "w", "ir", 2);
 		return ret;
 	case 16:
-		__put_user_asm(*(u64 *)src, (u64 __user *)dst,
+		__put_user_asm(*(const u64 *)src, (u64 __user *)dst,
 			       ret, "q", "", "er", 16);
 		if (unlikely(ret))
 			return ret;
 		asm("":::"memory");
-		__put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst,
+		__put_user_asm(1[(const u64 *)src], 1 + (u64 __user *)dst,
 			       ret, "q", "", "er", 8);
 		return ret;
 	default:
-		return copy_user_generic((__force void *)dst, src, size);
+		return copy_user_generic((__force_kernel void *)____m(dst), src, size);
 	}
 }
 
 static __always_inline __must_check
-int __copy_to_user(void __user *dst, const void *src, unsigned size)
+unsigned long __copy_to_user(void __user *dst, const void *src, unsigned long size)
 {
 	might_fault();
 	return __copy_to_user_nocheck(dst, src, size);
 }
 
 static __always_inline __must_check
-int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
+unsigned long __copy_in_user(void __user *dst, const void __user *src, unsigned size)
 {
-	int ret = 0;
+	unsigned ret = 0;
 
 	might_fault();
+
+	if (size > INT_MAX)
+		return size;
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	if (!access_ok_noprefault(VERIFY_READ, src, size))
+		return size;
+	if (!access_ok_noprefault(VERIFY_WRITE, dst, size))
+		return size;
+#endif
+
 	if (!__builtin_constant_p(size))
-		return copy_user_generic((__force void *)dst,
-					 (__force void *)src, size);
+		return copy_user_generic((__force_kernel void *)____m(dst),
+					 (__force_kernel const void *)____m(src), size);
 	switch (size) {
 	case 1: {
 		u8 tmp;
-		__get_user_asm(tmp, (u8 __user *)src,
+		__get_user_asm(tmp, (const u8 __user *)src,
 			       ret, "b", "b", "=q", 1);
 		if (likely(!ret))
 			__put_user_asm(tmp, (u8 __user *)dst,
@@ -169,7 +221,7 @@ int __copy_in_user(void __user *dst, con
 	}
 	case 2: {
 		u16 tmp;
-		__get_user_asm(tmp, (u16 __user *)src,
+		__get_user_asm(tmp, (const u16 __user *)src,
 			       ret, "w", "w", "=r", 2);
 		if (likely(!ret))
 			__put_user_asm(tmp, (u16 __user *)dst,
@@ -179,7 +231,7 @@ int __copy_in_user(void __user *dst, con
 
 	case 4: {
 		u32 tmp;
-		__get_user_asm(tmp, (u32 __user *)src,
+		__get_user_asm(tmp, (const u32 __user *)src,
 			       ret, "l", "k", "=r", 4);
 		if (likely(!ret))
 			__put_user_asm(tmp, (u32 __user *)dst,
@@ -188,7 +240,7 @@ int __copy_in_user(void __user *dst, con
 	}
 	case 8: {
 		u64 tmp;
-		__get_user_asm(tmp, (u64 __user *)src,
+		__get_user_asm(tmp, (const u64 __user *)src,
 			       ret, "q", "", "=r", 8);
 		if (likely(!ret))
 			__put_user_asm(tmp, (u64 __user *)dst,
@@ -196,41 +248,58 @@ int __copy_in_user(void __user *dst, con
 		return ret;
 	}
 	default:
-		return copy_user_generic((__force void *)dst,
-					 (__force void *)src, size);
+		return copy_user_generic((__force_kernel void *)____m(dst),
+					 (__force_kernel const void *)____m(src), size);
 	}
 }
 
-static __must_check __always_inline int
-__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size)
+static __must_check __always_inline unsigned long
+__copy_from_user_inatomic(void *dst, const void __user *src, unsigned long size)
 {
 	return __copy_from_user_nocheck(dst, src, size);
 }
 
-static __must_check __always_inline int
-__copy_to_user_inatomic(void __user *dst, const void *src, unsigned size)
+static __must_check __always_inline unsigned long
+__copy_to_user_inatomic(void __user *dst, const void *src, unsigned long size)
 {
 	return __copy_to_user_nocheck(dst, src, size);
 }
 
-extern long __copy_user_nocache(void *dst, const void __user *src,
-				unsigned size, int zerorest);
+extern unsigned long __copy_user_nocache(void *dst, const void __user *src,
+				unsigned long size, int zerorest);
 
-static inline int
-__copy_from_user_nocache(void *dst, const void __user *src, unsigned size)
+static inline unsigned long
+__copy_from_user_nocache(void *dst, const void __user *src, unsigned long size)
 {
 	might_fault();
+
+	if (size > INT_MAX)
+		return size;
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	if (!access_ok_noprefault(VERIFY_READ, src, size))
+		return size;
+#endif
+
 	return __copy_user_nocache(dst, src, size, 1);
 }
 
-static inline int
+static inline unsigned long
 __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
-				  unsigned size)
+				  unsigned long size)
 {
+	if (size > INT_MAX)
+		return size;
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	if (!access_ok_noprefault(VERIFY_READ, src, size))
+		return size;
+#endif
+
 	return __copy_user_nocache(dst, src, size, 0);
 }
 
 unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len);
+copy_user_handle_tail(char __user *to, char __user *from, unsigned long len) __size_overflow(3);
 
 #endif /* _ASM_X86_UACCESS_64_H */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/uaccess.h linux-4.0.9-pax/arch/x86/include/asm/uaccess.h
--- linux-4.0.9/arch/x86/include/asm/uaccess.h	2015-04-13 11:21:01.686617470 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/uaccess.h	2015-04-15 12:13:52.910318622 +0200
@@ -7,6 +7,7 @@
 #include <linux/compiler.h>
 #include <linux/thread_info.h>
 #include <linux/string.h>
+#include <linux/spinlock.h>
 #include <asm/asm.h>
 #include <asm/page.h>
 #include <asm/smap.h>
@@ -29,7 +30,12 @@
 
 #define get_ds()	(KERNEL_DS)
 #define get_fs()	(current_thread_info()->addr_limit)
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF)
+void __set_fs(mm_segment_t x);
+void set_fs(mm_segment_t x);
+#else
 #define set_fs(x)	(current_thread_info()->addr_limit = (x))
+#endif
 
 #define segment_eq(a, b)	((a).seg == (b).seg)
 
@@ -85,8 +91,36 @@ static inline bool __chk_range_not_ok(un
  * checks that the pointer is in the user space range - after calling
  * this function, memory access functions may still return -EFAULT.
  */
-#define access_ok(type, addr, size) \
-	likely(!__range_not_ok(addr, size, user_addr_max()))
+extern int _cond_resched(void);
+#define access_ok_noprefault(type, addr, size) (likely(!__range_not_ok(addr, size, user_addr_max())))
+#define access_ok(type, addr, size)					\
+({									\
+	unsigned long __size = size;					\
+	unsigned long __addr = (unsigned long)addr;			\
+	bool __ret_ao = __range_not_ok(__addr, __size, user_addr_max()) == 0;\
+	if (__ret_ao && __size) {					\
+		unsigned long __addr_ao = __addr & PAGE_MASK;		\
+		unsigned long __end_ao = __addr + __size - 1;		\
+		if (unlikely((__end_ao ^ __addr_ao) & PAGE_MASK)) {	\
+			while (__addr_ao <= __end_ao) {			\
+				char __c_ao;				\
+				__addr_ao += PAGE_SIZE;			\
+				if (__size > PAGE_SIZE)			\
+					_cond_resched();		\
+				if (__get_user(__c_ao, (char __user *)__addr))	\
+					break;				\
+				if (type != VERIFY_WRITE) {		\
+					__addr = __addr_ao;		\
+					continue;			\
+				}					\
+				if (__put_user(__c_ao, (char __user *)__addr))	\
+					break;				\
+				__addr = __addr_ao;			\
+			}						\
+		}							\
+	}								\
+	__ret_ao;							\
+})
 
 /*
  * The exception table consists of pairs of addresses relative to the
@@ -134,11 +168,13 @@ extern int __get_user_8(void);
 extern int __get_user_bad(void);
 
 /*
- * This is a type: either unsigned long, if the argument fits into
- * that type, or otherwise unsigned long long.
+ * This is a type: either (un)signed int, if the argument fits into
+ * that type, or otherwise (un)signed long long.
  */
 #define __inttype(x) \
-__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
+__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0U),		\
+	__builtin_choose_expr(__type_is_unsigned(__typeof__(x)), 0ULL, 0LL),\
+	__builtin_choose_expr(__type_is_unsigned(__typeof__(x)), 0U, 0)))
 
 /**
  * get_user: - Get a simple variable from user space.
@@ -176,10 +212,12 @@ __typeof__(__builtin_choose_expr(sizeof(
 	register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX);		\
 	__chk_user_ptr(ptr);						\
 	might_fault();							\
+	pax_open_userland();						\
 	asm volatile("call __get_user_%P3"				\
 		     : "=a" (__ret_gu), "=r" (__val_gu)			\
 		     : "0" (ptr), "i" (sizeof(*(ptr))));		\
 	(x) = (__force __typeof__(*(ptr))) __val_gu;			\
+	pax_close_userland();						\
 	__ret_gu;							\
 })
 
@@ -187,13 +225,21 @@ __typeof__(__builtin_choose_expr(sizeof(
 	asm volatile("call __put_user_" #size : "=a" (__ret_pu)	\
 		     : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
 
-
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF)
+#define __copyuser_seg "gs;"
+#define __COPYUSER_SET_ES "pushl %%gs; popl %%es\n"
+#define __COPYUSER_RESTORE_ES "pushl %%ss; popl %%es\n"
+#else
+#define __copyuser_seg
+#define __COPYUSER_SET_ES
+#define __COPYUSER_RESTORE_ES
+#endif
 
 #ifdef CONFIG_X86_32
 #define __put_user_asm_u64(x, addr, err, errret)			\
 	asm volatile(ASM_STAC "\n"					\
-		     "1:	movl %%eax,0(%2)\n"			\
-		     "2:	movl %%edx,4(%2)\n"			\
+		     "1:	"__copyuser_seg"movl %%eax,0(%2)\n"	\
+		     "2:	"__copyuser_seg"movl %%edx,4(%2)\n"	\
 		     "3: " ASM_CLAC "\n"				\
 		     ".section .fixup,\"ax\"\n"				\
 		     "4:	movl %3,%0\n"				\
@@ -206,8 +252,8 @@ __typeof__(__builtin_choose_expr(sizeof(
 
 #define __put_user_asm_ex_u64(x, addr)					\
 	asm volatile(ASM_STAC "\n"					\
-		     "1:	movl %%eax,0(%1)\n"			\
-		     "2:	movl %%edx,4(%1)\n"			\
+		     "1:	"__copyuser_seg"movl %%eax,0(%1)\n"	\
+		     "2:	"__copyuser_seg"movl %%edx,4(%1)\n"	\
 		     "3: " ASM_CLAC "\n"				\
 		     _ASM_EXTABLE_EX(1b, 2b)				\
 		     _ASM_EXTABLE_EX(2b, 3b)				\
@@ -257,7 +303,8 @@ extern void __put_user_8(void);
 	__typeof__(*(ptr)) __pu_val;				\
 	__chk_user_ptr(ptr);					\
 	might_fault();						\
-	__pu_val = x;						\
+	__pu_val = (x);						\
+	pax_open_userland();					\
 	switch (sizeof(*(ptr))) {				\
 	case 1:							\
 		__put_user_x(1, __pu_val, ptr, __ret_pu);	\
@@ -275,6 +322,7 @@ extern void __put_user_8(void);
 		__put_user_x(X, __pu_val, ptr, __ret_pu);	\
 		break;						\
 	}							\
+	pax_close_userland();					\
 	__ret_pu;						\
 })
 
@@ -355,8 +403,10 @@ do {									\
 } while (0)
 
 #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret)	\
+do {									\
+	pax_open_userland();						\
 	asm volatile(ASM_STAC "\n"					\
-		     "1:	mov"itype" %2,%"rtype"1\n"		\
+		     "1:	"__copyuser_seg"mov"itype" %2,%"rtype"1\n"\
 		     "2: " ASM_CLAC "\n"				\
 		     ".section .fixup,\"ax\"\n"				\
 		     "3:	mov %3,%0\n"				\
@@ -364,8 +414,10 @@ do {									\
 		     "	jmp 2b\n"					\
 		     ".previous\n"					\
 		     _ASM_EXTABLE(1b, 3b)				\
-		     : "=r" (err), ltype(x)				\
-		     : "m" (__m(addr)), "i" (errret), "0" (err))
+		     : "=r" (err), ltype (x)				\
+		     : "m" (__m(addr)), "i" (errret), "0" (err));	\
+	pax_close_userland();						\
+} while (0)
 
 #define __get_user_size_ex(x, ptr, size)				\
 do {									\
@@ -389,7 +441,7 @@ do {									\
 } while (0)
 
 #define __get_user_asm_ex(x, addr, itype, rtype, ltype)			\
-	asm volatile("1:	mov"itype" %1,%"rtype"0\n"		\
+	asm volatile("1:	"__copyuser_seg"mov"itype" %1,%"rtype"0\n"\
 		     "2:\n"						\
 		     _ASM_EXTABLE_EX(1b, 2b)				\
 		     : ltype(x) : "m" (__m(addr)))
@@ -406,13 +458,24 @@ do {									\
 	int __gu_err;							\
 	unsigned long __gu_val;						\
 	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);	\
-	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
+	(x) = (__typeof__(*(ptr)))__gu_val;				\
 	__gu_err;							\
 })
 
 /* FIXME: this hack is definitely wrong -AK */
 struct __large_struct { unsigned long buf[100]; };
-#define __m(x) (*(struct __large_struct __user *)(x))
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+#define ____m(x)					\
+({							\
+	unsigned long ____x = (unsigned long)(x);	\
+	if (____x < pax_user_shadow_base)		\
+		____x += pax_user_shadow_base;		\
+	(typeof(x))____x;				\
+})
+#else
+#define ____m(x) (x)
+#endif
+#define __m(x) (*(struct __large_struct __user *)____m(x))
 
 /*
  * Tell gcc we read from memory instead of writing: this is because
@@ -420,8 +483,10 @@ struct __large_struct { unsigned long bu
  * aliasing issues.
  */
 #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret)	\
+do {									\
+	pax_open_userland();						\
 	asm volatile(ASM_STAC "\n"					\
-		     "1:	mov"itype" %"rtype"1,%2\n"		\
+		     "1:	"__copyuser_seg"mov"itype" %"rtype"1,%2\n"\
 		     "2: " ASM_CLAC "\n"				\
 		     ".section .fixup,\"ax\"\n"				\
 		     "3:	mov %3,%0\n"				\
@@ -429,10 +494,12 @@ struct __large_struct { unsigned long bu
 		     ".previous\n"					\
 		     _ASM_EXTABLE(1b, 3b)				\
 		     : "=r"(err)					\
-		     : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
+		     : ltype (x), "m" (__m(addr)), "i" (errret), "0" (err));\
+	pax_close_userland();						\
+} while (0)
 
 #define __put_user_asm_ex(x, addr, itype, rtype, ltype)			\
-	asm volatile("1:	mov"itype" %"rtype"0,%1\n"		\
+	asm volatile("1:	"__copyuser_seg"mov"itype" %"rtype"0,%1\n"\
 		     "2:\n"						\
 		     _ASM_EXTABLE_EX(1b, 2b)				\
 		     : : ltype(x), "m" (__m(addr)))
@@ -442,11 +509,13 @@ struct __large_struct { unsigned long bu
  */
 #define uaccess_try	do {						\
 	current_thread_info()->uaccess_err = 0;				\
+	pax_open_userland();						\
 	stac();								\
 	barrier();
 
 #define uaccess_catch(err)						\
 	clac();								\
+	pax_close_userland();						\
 	(err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0);	\
 } while (0)
 
@@ -471,8 +540,12 @@ struct __large_struct { unsigned long bu
  * On error, the variable @x is set to zero.
  */
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+#define __get_user(x, ptr)	get_user((x), (ptr))
+#else
 #define __get_user(x, ptr)						\
 	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+#endif
 
 /**
  * __put_user: - Write a simple value into user space, with less checking.
@@ -494,8 +567,12 @@ struct __large_struct { unsigned long bu
  * Returns zero on success, or -EFAULT on error.
  */
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+#define __put_user(x, ptr)	put_user((x), (ptr))
+#else
 #define __put_user(x, ptr)						\
 	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+#endif
 
 #define __get_user_unaligned __get_user
 #define __put_user_unaligned __put_user
@@ -513,7 +590,7 @@ struct __large_struct { unsigned long bu
 #define get_user_ex(x, ptr)	do {					\
 	unsigned long __gue_val;					\
 	__get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr))));	\
-	(x) = (__force __typeof__(*(ptr)))__gue_val;			\
+	(x) = (__typeof__(*(ptr)))__gue_val;				\
 } while (0)
 
 #define put_user_try		uaccess_try
@@ -531,7 +608,7 @@ extern __must_check long strlen_user(con
 extern __must_check long strnlen_user(const char __user *str, long n);
 
 unsigned long __must_check clear_user(void __user *mem, unsigned long len);
-unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
+unsigned long __must_check __clear_user(void __user *mem, unsigned long len) __size_overflow(2);
 
 extern void __cmpxchg_wrong_size(void)
 	__compiletime_error("Bad argument size for cmpxchg");
@@ -542,18 +619,19 @@ extern void __cmpxchg_wrong_size(void)
 	__typeof__(ptr) __uval = (uval);				\
 	__typeof__(*(ptr)) __old = (old);				\
 	__typeof__(*(ptr)) __new = (new);				\
+	pax_open_userland();						\
 	switch (size) {							\
 	case 1:								\
 	{								\
 		asm volatile("\t" ASM_STAC "\n"				\
-			"1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n"		\
+			"1:\t" LOCK_PREFIX __copyuser_seg"cmpxchgb %4, %2\n"\
 			"2:\t" ASM_CLAC "\n"				\
 			"\t.section .fixup, \"ax\"\n"			\
 			"3:\tmov     %3, %0\n"				\
 			"\tjmp     2b\n"				\
 			"\t.previous\n"					\
 			_ASM_EXTABLE(1b, 3b)				\
-			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\
+			: "+r" (__ret), "=a" (__old), "+m" (*____m(ptr))\
 			: "i" (-EFAULT), "q" (__new), "1" (__old)	\
 			: "memory"					\
 		);							\
@@ -562,14 +640,14 @@ extern void __cmpxchg_wrong_size(void)
 	case 2:								\
 	{								\
 		asm volatile("\t" ASM_STAC "\n"				\
-			"1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n"		\
+			"1:\t" LOCK_PREFIX __copyuser_seg"cmpxchgw %4, %2\n"\
 			"2:\t" ASM_CLAC "\n"				\
 			"\t.section .fixup, \"ax\"\n"			\
 			"3:\tmov     %3, %0\n"				\
 			"\tjmp     2b\n"				\
 			"\t.previous\n"					\
 			_ASM_EXTABLE(1b, 3b)				\
-			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\
+			: "+r" (__ret), "=a" (__old), "+m" (*____m(ptr))\
 			: "i" (-EFAULT), "r" (__new), "1" (__old)	\
 			: "memory"					\
 		);							\
@@ -578,14 +656,14 @@ extern void __cmpxchg_wrong_size(void)
 	case 4:								\
 	{								\
 		asm volatile("\t" ASM_STAC "\n"				\
-			"1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"		\
+			"1:\t" LOCK_PREFIX __copyuser_seg"cmpxchgl %4, %2\n"\
 			"2:\t" ASM_CLAC "\n"				\
 			"\t.section .fixup, \"ax\"\n"			\
 			"3:\tmov     %3, %0\n"				\
 			"\tjmp     2b\n"				\
 			"\t.previous\n"					\
 			_ASM_EXTABLE(1b, 3b)				\
-			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\
+			: "+r" (__ret), "=a" (__old), "+m" (*____m(ptr))\
 			: "i" (-EFAULT), "r" (__new), "1" (__old)	\
 			: "memory"					\
 		);							\
@@ -597,14 +675,14 @@ extern void __cmpxchg_wrong_size(void)
 			__cmpxchg_wrong_size();				\
 									\
 		asm volatile("\t" ASM_STAC "\n"				\
-			"1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n"		\
+			"1:\t" LOCK_PREFIX __copyuser_seg"cmpxchgq %4, %2\n"\
 			"2:\t" ASM_CLAC "\n"				\
 			"\t.section .fixup, \"ax\"\n"			\
 			"3:\tmov     %3, %0\n"				\
 			"\tjmp     2b\n"				\
 			"\t.previous\n"					\
 			_ASM_EXTABLE(1b, 3b)				\
-			: "+r" (__ret), "=a" (__old), "+m" (*(ptr))	\
+			: "+r" (__ret), "=a" (__old), "+m" (*____m(ptr))\
 			: "i" (-EFAULT), "r" (__new), "1" (__old)	\
 			: "memory"					\
 		);							\
@@ -613,6 +691,7 @@ extern void __cmpxchg_wrong_size(void)
 	default:							\
 		__cmpxchg_wrong_size();					\
 	}								\
+	pax_close_userland();						\
 	*__uval = __old;						\
 	__ret;								\
 })
@@ -636,17 +715,6 @@ extern struct movsl_mask {
 
 #define ARCH_HAS_NOCACHE_UACCESS 1
 
-#ifdef CONFIG_X86_32
-# include <asm/uaccess_32.h>
-#else
-# include <asm/uaccess_64.h>
-#endif
-
-unsigned long __must_check _copy_from_user(void *to, const void __user *from,
-					   unsigned n);
-unsigned long __must_check _copy_to_user(void __user *to, const void *from,
-					 unsigned n);
-
 #ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
 # define copy_user_diag __compiletime_error
 #else
@@ -656,7 +724,7 @@ unsigned long __must_check _copy_to_user
 extern void copy_user_diag("copy_from_user() buffer size is too small")
 copy_from_user_overflow(void);
 extern void copy_user_diag("copy_to_user() buffer size is too small")
-copy_to_user_overflow(void) __asm__("copy_from_user_overflow");
+copy_to_user_overflow(void);
 
 #undef copy_user_diag
 
@@ -669,7 +737,7 @@ __copy_from_user_overflow(void) __asm__(
 
 extern void
 __compiletime_warning("copy_to_user() buffer size is not provably correct")
-__copy_to_user_overflow(void) __asm__("copy_from_user_overflow");
+__copy_to_user_overflow(void) __asm__("copy_to_user_overflow");
 #define __copy_to_user_overflow(size, count) __copy_to_user_overflow()
 
 #else
@@ -684,10 +752,16 @@ __copy_from_user_overflow(int size, unsi
 
 #endif
 
+#ifdef CONFIG_X86_32
+# include <asm/uaccess_32.h>
+#else
+# include <asm/uaccess_64.h>
+#endif
+
 static inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	int sz = __compiletime_object_size(to);
+	size_t sz = __compiletime_object_size(to);
 
 	might_fault();
 
@@ -709,12 +783,15 @@ copy_from_user(void *to, const void __us
 	 * case, and do only runtime checking for non-constant sizes.
 	 */
 
-	if (likely(sz < 0 || sz >= n))
-		n = _copy_from_user(to, from, n);
-	else if(__builtin_constant_p(n))
-		copy_from_user_overflow();
-	else
-		__copy_from_user_overflow(sz, n);
+	if (likely(sz != (size_t)-1  && sz < n)) {
+		 if(__builtin_constant_p(n))
+			copy_from_user_overflow();
+		else
+			__copy_from_user_overflow(sz, n);
+	} else if (access_ok(VERIFY_READ, from, n))
+		n = __copy_from_user(to, from, n);
+	else if ((long)n > 0)
+		memset(to, 0, n);
 
 	return n;
 }
@@ -722,17 +799,18 @@ copy_from_user(void *to, const void __us
 static inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	int sz = __compiletime_object_size(from);
+	size_t sz = __compiletime_object_size(from);
 
 	might_fault();
 
 	/* See the comment in copy_from_user() above. */
-	if (likely(sz < 0 || sz >= n))
-		n = _copy_to_user(to, from, n);
-	else if(__builtin_constant_p(n))
-		copy_to_user_overflow();
-	else
-		__copy_to_user_overflow(sz, n);
+	if (likely(sz != (size_t)-1  && sz < n)) {
+		 if(__builtin_constant_p(n))
+			copy_to_user_overflow();
+		else
+			__copy_to_user_overflow(sz, n);
+	} else if (access_ok(VERIFY_WRITE, to, n))
+		n = __copy_to_user(to, from, n);
 
 	return n;
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/word-at-a-time.h linux-4.0.9-pax/arch/x86/include/asm/word-at-a-time.h
--- linux-4.0.9/arch/x86/include/asm/word-at-a-time.h	2015-03-18 15:21:50.248349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/word-at-a-time.h	2015-04-15 12:13:52.910318622 +0200
@@ -11,7 +11,7 @@
  * and shift, for example.
  */
 struct word_at_a_time {
-	const unsigned long one_bits, high_bits;
+	unsigned long one_bits, high_bits;
 };
 
 #define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/x86_init.h linux-4.0.9-pax/arch/x86/include/asm/x86_init.h
--- linux-4.0.9/arch/x86/include/asm/x86_init.h	2015-03-18 15:21:50.248349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/asm/x86_init.h	2015-04-15 12:13:52.910318622 +0200
@@ -129,7 +129,7 @@ struct x86_init_ops {
 	struct x86_init_timers		timers;
 	struct x86_init_iommu		iommu;
 	struct x86_init_pci		pci;
-};
+} __no_const;
 
 /**
  * struct x86_cpuinit_ops - platform specific cpu hotplug setups
@@ -140,7 +140,7 @@ struct x86_cpuinit_ops {
 	void (*setup_percpu_clockev)(void);
 	void (*early_percpu_clock_init)(void);
 	void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
-};
+} __no_const;
 
 struct timespec;
 
@@ -168,7 +168,7 @@ struct x86_platform_ops {
 	void (*save_sched_clock_state)(void);
 	void (*restore_sched_clock_state)(void);
 	void (*apic_post_init)(void);
-};
+} __no_const;
 
 struct pci_dev;
 struct msi_msg;
@@ -182,7 +182,7 @@ struct x86_msi_ops {
 	void (*teardown_msi_irqs)(struct pci_dev *dev);
 	void (*restore_msi_irqs)(struct pci_dev *dev);
 	int  (*setup_hpet_msi)(unsigned int irq, unsigned int id);
-};
+} __no_const;
 
 struct IO_APIC_route_entry;
 struct io_apic_irq_attr;
@@ -203,7 +203,7 @@ struct x86_io_apic_ops {
 				       unsigned int destination, int vector,
 				       struct io_apic_irq_attr *attr);
 	void		(*eoi_ioapic_pin)(int apic, int pin, int vector);
-};
+} __no_const;
 
 extern struct x86_init_ops x86_init;
 extern struct x86_cpuinit_ops x86_cpuinit;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/xen/page.h linux-4.0.9-pax/arch/x86/include/asm/xen/page.h
--- linux-4.0.9/arch/x86/include/asm/xen/page.h	2015-04-13 11:21:01.702617469 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/xen/page.h	2015-04-15 12:13:52.910318622 +0200
@@ -82,7 +82,7 @@ static inline int xen_safe_read_ulong(un
  * - get_phys_to_machine() is to be called by __pfn_to_mfn() only in special
  *   cases needing an extended handling.
  */
-static inline unsigned long __pfn_to_mfn(unsigned long pfn)
+static inline unsigned long __intentional_overflow(-1) __pfn_to_mfn(unsigned long pfn)
 {
 	unsigned long mfn;
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/asm/xsave.h linux-4.0.9-pax/arch/x86/include/asm/xsave.h
--- linux-4.0.9/arch/x86/include/asm/xsave.h	2015-04-13 11:21:01.702617469 +0200
+++ linux-4.0.9-pax/arch/x86/include/asm/xsave.h	2015-04-15 12:13:52.910318622 +0200
@@ -223,12 +223,16 @@ static inline int xsave_user(struct xsav
 	if (unlikely(err))
 		return -EFAULT;
 
+	pax_open_userland();
 	__asm__ __volatile__(ASM_STAC "\n"
-			     "1:"XSAVE"\n"
+			     "1:"
+			     __copyuser_seg
+			     XSAVE"\n"
 			     "2: " ASM_CLAC "\n"
 			     xstate_fault
 			     : "D" (buf), "a" (-1), "d" (-1), "0" (0)
 			     : "memory");
+	pax_close_userland();
 	return err;
 }
 
@@ -238,16 +242,20 @@ static inline int xsave_user(struct xsav
 static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
 {
 	int err = 0;
-	struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
+	struct xsave_struct *xstate = ((__force_kernel struct xsave_struct *)buf);
 	u32 lmask = mask;
 	u32 hmask = mask >> 32;
 
+	pax_open_userland();
 	__asm__ __volatile__(ASM_STAC "\n"
-			     "1:"XRSTOR"\n"
+			     "1:"
+			     __copyuser_seg
+			     XRSTOR"\n"
 			     "2: " ASM_CLAC "\n"
 			     xstate_fault
 			     : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
 			     : "memory");	/* memory required? */
+	pax_close_userland();
 	return err;
 }
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/uapi/asm/e820.h linux-4.0.9-pax/arch/x86/include/uapi/asm/e820.h
--- linux-4.0.9/arch/x86/include/uapi/asm/e820.h	2015-03-18 15:21:50.252349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/uapi/asm/e820.h	2015-04-15 12:13:52.910318622 +0200
@@ -58,7 +58,7 @@ struct e820map {
 #define ISA_START_ADDRESS	0xa0000
 #define ISA_END_ADDRESS		0x100000
 
-#define BIOS_BEGIN		0x000a0000
+#define BIOS_BEGIN		0x000c0000
 #define BIOS_END		0x00100000
 
 #define BIOS_ROM_BASE		0xffe00000
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/include/uapi/asm/ptrace-abi.h linux-4.0.9-pax/arch/x86/include/uapi/asm/ptrace-abi.h
--- linux-4.0.9/arch/x86/include/uapi/asm/ptrace-abi.h	2015-03-18 15:21:50.252349253 +0100
+++ linux-4.0.9-pax/arch/x86/include/uapi/asm/ptrace-abi.h	2015-04-15 12:13:52.910318622 +0200
@@ -49,7 +49,6 @@
 #define EFLAGS 144
 #define RSP 152
 #define SS 160
-#define ARGOFFSET R11
 #endif /* __ASSEMBLY__ */
 
 /* top of stack page */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/Kconfig linux-4.0.9-pax/arch/x86/Kconfig
--- linux-4.0.9/arch/x86/Kconfig	2015-07-10 20:07:37.563036133 +0200
+++ linux-4.0.9-pax/arch/x86/Kconfig	2015-07-10 20:07:47.715035591 +0200
@@ -132,7 +132,7 @@ config X86
 	select RTC_LIB
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
-	select HAVE_CC_STACKPROTECTOR
+	select HAVE_CC_STACKPROTECTOR if X86_64 || !PAX_MEMORY_UDEREF
 	select GENERIC_CPU_AUTOPROBE
 	select HAVE_ARCH_AUDITSYSCALL
 	select ARCH_SUPPORTS_ATOMIC_RMW
@@ -266,7 +266,7 @@ config X86_HT
 
 config X86_32_LAZY_GS
 	def_bool y
-	depends on X86_32 && !CC_STACKPROTECTOR
+	depends on X86_32 && !CC_STACKPROTECTOR && !PAX_MEMORY_UDEREF
 
 config ARCH_HWEIGHT_CFLAGS
 	string
@@ -1274,7 +1274,7 @@ config PAGE_OFFSET
 	hex
 	default 0xB0000000 if VMSPLIT_3G_OPT
 	default 0x80000000 if VMSPLIT_2G
-	default 0x78000000 if VMSPLIT_2G_OPT
+	default 0x70000000 if VMSPLIT_2G_OPT
 	default 0x40000000 if VMSPLIT_1G
 	default 0xC0000000
 	depends on X86_32
@@ -1900,7 +1900,9 @@ config X86_NEED_RELOCS
 
 config PHYSICAL_ALIGN
 	hex "Alignment value to which kernel should be aligned"
-	default "0x200000"
+	default "0x1000000"
+	range 0x200000 0x1000000 if PAX_KERNEXEC && X86_PAE
+	range 0x400000 0x1000000 if PAX_KERNEXEC && !X86_PAE
 	range 0x2000 0x1000000 if X86_32
 	range 0x200000 0x1000000 if X86_64
 	---help---
@@ -1983,6 +1985,7 @@ config COMPAT_VDSO
 	def_bool n
 	prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)"
 	depends on X86_32 || IA32_EMULATION
+	depends on !PAX_PAGEEXEC && !PAX_SEGMEXEC && !PAX_KERNEXEC && !PAX_MEMORY_UDEREF
 	---help---
 	  Certain buggy versions of glibc will crash if they are
 	  presented with a 32-bit vDSO that is not mapped at the address
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/Kconfig.cpu linux-4.0.9-pax/arch/x86/Kconfig.cpu
--- linux-4.0.9/arch/x86/Kconfig.cpu	2015-03-18 15:21:50.252349253 +0100
+++ linux-4.0.9-pax/arch/x86/Kconfig.cpu	2015-04-15 12:13:52.910318622 +0200
@@ -319,7 +319,7 @@ config X86_PPRO_FENCE
 
 config X86_F00F_BUG
 	def_bool y
-	depends on M586MMX || M586TSC || M586 || M486
+	depends on (M586MMX || M586TSC || M586 || M486) && !PAX_KERNEXEC
 
 config X86_INVD_BUG
 	def_bool y
@@ -327,7 +327,7 @@ config X86_INVD_BUG
 
 config X86_ALIGNMENT_16
 	def_bool y
-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK8 || MK7 || MK6 || MCORE2 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
 
 config X86_INTEL_USERCOPY
 	def_bool y
@@ -369,7 +369,7 @@ config X86_CMPXCHG64
 # generates cmov.
 config X86_CMOV
 	def_bool y
-	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
+	depends on (MK8 || MK7 || MCORE2 || MPSC || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
 
 config X86_MINIMUM_CPU_FAMILY
 	int
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/Kconfig.debug linux-4.0.9-pax/arch/x86/Kconfig.debug
--- linux-4.0.9/arch/x86/Kconfig.debug	2015-04-13 11:21:01.618617474 +0200
+++ linux-4.0.9-pax/arch/x86/Kconfig.debug	2015-04-15 12:13:52.914318622 +0200
@@ -93,7 +93,7 @@ config EFI_PGT_DUMP
 config DEBUG_RODATA
 	bool "Write protect kernel read-only data structures"
 	default y
-	depends on DEBUG_KERNEL
+	depends on DEBUG_KERNEL && BROKEN
 	---help---
 	  Mark the kernel read-only data as write-protected in the pagetables,
 	  in order to catch accidental (and incorrect) writes to such const
@@ -111,7 +111,7 @@ config DEBUG_RODATA_TEST
 
 config DEBUG_SET_MODULE_RONX
 	bool "Set loadable kernel module data as NX and text as RO"
-	depends on MODULES
+	depends on MODULES && BROKEN
 	---help---
 	  This option helps catch unintended modifications to loadable
 	  kernel module's text and read-only data. It also prevents execution
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/acpi/boot.c linux-4.0.9-pax/arch/x86/kernel/acpi/boot.c
--- linux-4.0.9/arch/x86/kernel/acpi/boot.c	2015-04-13 11:21:01.738617467 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/acpi/boot.c	2015-04-15 12:13:52.914318622 +0200
@@ -1361,7 +1361,7 @@ static void __init acpi_reduced_hw_init(
  * If your system is blacklisted here, but you find that acpi=force
  * works for you, please contact linux-acpi@vger.kernel.org
  */
-static struct dmi_system_id __initdata acpi_dmi_table[] = {
+static const struct dmi_system_id __initconst acpi_dmi_table[] = {
 	/*
 	 * Boxes that need ACPI disabled
 	 */
@@ -1436,7 +1436,7 @@ static struct dmi_system_id __initdata a
 };
 
 /* second table for DMI checks that should run after early-quirks */
-static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
+static const struct dmi_system_id __initconst acpi_dmi_table_late[] = {
 	/*
 	 * HP laptops which use a DSDT reporting as HP/SB400/10000,
 	 * which includes some code which overrides all temperature
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/acpi/sleep.c linux-4.0.9-pax/arch/x86/kernel/acpi/sleep.c
--- linux-4.0.9/arch/x86/kernel/acpi/sleep.c	2015-04-13 11:21:01.738617467 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/acpi/sleep.c	2015-04-15 12:13:52.914318622 +0200
@@ -99,8 +99,12 @@ int x86_acpi_suspend_lowlevel(void)
 #else /* CONFIG_64BIT */
 #ifdef CONFIG_SMP
 	stack_start = (unsigned long)temp_stack + sizeof(temp_stack);
+
+	pax_open_kernel();
 	early_gdt_descr.address =
 			(unsigned long)get_cpu_gdt_table(smp_processor_id());
+	pax_close_kernel();
+
 	initial_gs = per_cpu_offset(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/acpi/wakeup_32.S linux-4.0.9-pax/arch/x86/kernel/acpi/wakeup_32.S
--- linux-4.0.9/arch/x86/kernel/acpi/wakeup_32.S	2015-03-18 15:21:50.252349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/acpi/wakeup_32.S	2015-04-15 12:13:52.914318622 +0200
@@ -29,13 +29,11 @@ wakeup_pmode_return:
 	# and restore the stack ... but you need gdt for this to work
 	movl	saved_context_esp, %esp
 
-	movl	%cs:saved_magic, %eax
-	cmpl	$0x12345678, %eax
+	cmpl	$0x12345678, saved_magic
 	jne	bogus_magic
 
 	# jump to place where we left off
-	movl	saved_eip, %eax
-	jmp	*%eax
+	jmp	*(saved_eip)
 
 bogus_magic:
 	jmp	bogus_magic
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/alternative.c linux-4.0.9-pax/arch/x86/kernel/alternative.c
--- linux-4.0.9/arch/x86/kernel/alternative.c	2015-03-18 15:21:50.252349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/alternative.c	2015-07-10 19:58:51.935064198 +0200
@@ -268,6 +268,13 @@ void __init_or_module apply_alternatives
 	 */
 	for (a = start; a < end; a++) {
 		instr = (u8 *)&a->instr_offset + a->instr_offset;
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+		instr += ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR;
+		if (instr < (u8 *)_text || (u8 *)_einittext <= instr)
+			instr -= ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR;
+#endif
+
 		replacement = (u8 *)&a->repl_offset + a->repl_offset;
 		BUG_ON(a->replacementlen > a->instrlen);
 		BUG_ON(a->instrlen > sizeof(insnbuf));
@@ -284,6 +291,11 @@ void __init_or_module apply_alternatives
 		add_nops(insnbuf + a->replacementlen,
 			 a->instrlen - a->replacementlen);
 
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+		if (instr < (u8 *)_text || (u8 *)_einittext <= instr)
+			instr = ktva_ktla(instr);
+#endif
+
 		text_poke_early(instr, insnbuf, a->instrlen);
 	}
 }
@@ -299,10 +311,16 @@ static void alternatives_smp_lock(const
 	for (poff = start; poff < end; poff++) {
 		u8 *ptr = (u8 *)poff + *poff;
 
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+		ptr += ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR;
+		if (ptr < (u8 *)_text || (u8 *)_einittext <= ptr)
+			ptr -= ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR;
+#endif
+
 		if (!*poff || ptr < text || ptr >= text_end)
 			continue;
 		/* turn DS segment override prefix into lock prefix */
-		if (*ptr == 0x3e)
+		if (*ktla_ktva(ptr) == 0x3e)
 			text_poke(ptr, ((unsigned char []){0xf0}), 1);
 	}
 	mutex_unlock(&text_mutex);
@@ -317,10 +335,16 @@ static void alternatives_smp_unlock(cons
 	for (poff = start; poff < end; poff++) {
 		u8 *ptr = (u8 *)poff + *poff;
 
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+		ptr += ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR;
+		if (ptr < (u8 *)_text || (u8 *)_einittext <= ptr)
+			ptr -= ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR;
+#endif
+
 		if (!*poff || ptr < text || ptr >= text_end)
 			continue;
 		/* turn lock prefix into DS segment override prefix */
-		if (*ptr == 0xf0)
+		if (*ktla_ktva(ptr) == 0xf0)
 			text_poke(ptr, ((unsigned char []){0x3E}), 1);
 	}
 	mutex_unlock(&text_mutex);
@@ -457,7 +481,7 @@ void __init_or_module apply_paravirt(str
 
 		BUG_ON(p->len > MAX_PATCH_LEN);
 		/* prep the buffer with the original instructions */
-		memcpy(insnbuf, p->instr, p->len);
+		memcpy(insnbuf, ktla_ktva(p->instr), p->len);
 		used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
 					 (unsigned long)p->instr, p->len);
 
@@ -504,7 +528,7 @@ void __init alternative_instructions(voi
 	if (!uniproc_patched || num_possible_cpus() == 1)
 		free_init_pages("SMP alternatives",
 				(unsigned long)__smp_locks,
-				(unsigned long)__smp_locks_end);
+				PAGE_ALIGN((unsigned long)__smp_locks_end));
 #endif
 
 	apply_paravirt(__parainstructions, __parainstructions_end);
@@ -524,13 +548,17 @@ void __init alternative_instructions(voi
  * instructions. And on the local CPU you need to be protected again NMI or MCE
  * handlers seeing an inconsistent instruction while you patch.
  */
-void *__init_or_module text_poke_early(void *addr, const void *opcode,
+void *__kprobes text_poke_early(void *addr, const void *opcode,
 					      size_t len)
 {
 	unsigned long flags;
 	local_irq_save(flags);
-	memcpy(addr, opcode, len);
+
+	pax_open_kernel();
+	memcpy(ktla_ktva(addr), opcode, len);
 	sync_core();
+	pax_close_kernel();
+
 	local_irq_restore(flags);
 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
 	   that causes hangs on some VIA CPUs. */
@@ -552,36 +580,22 @@ void *__init_or_module text_poke_early(v
  */
 void *text_poke(void *addr, const void *opcode, size_t len)
 {
-	unsigned long flags;
-	char *vaddr;
+	unsigned char *vaddr = ktla_ktva(addr);
 	struct page *pages[2];
-	int i;
+	size_t i;
 
 	if (!core_kernel_text((unsigned long)addr)) {
-		pages[0] = vmalloc_to_page(addr);
-		pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
+		pages[0] = vmalloc_to_page(vaddr);
+		pages[1] = vmalloc_to_page(vaddr + PAGE_SIZE);
 	} else {
-		pages[0] = virt_to_page(addr);
+		pages[0] = virt_to_page(vaddr);
 		WARN_ON(!PageReserved(pages[0]));
-		pages[1] = virt_to_page(addr + PAGE_SIZE);
+		pages[1] = virt_to_page(vaddr + PAGE_SIZE);
 	}
 	BUG_ON(!pages[0]);
-	local_irq_save(flags);
-	set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
-	if (pages[1])
-		set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
-	vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
-	memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
-	clear_fixmap(FIX_TEXT_POKE0);
-	if (pages[1])
-		clear_fixmap(FIX_TEXT_POKE1);
-	local_flush_tlb();
-	sync_core();
-	/* Could also do a CLFLUSH here to speed up CPU recovery; but
-	   that causes hangs on some VIA CPUs. */
+	text_poke_early(addr, opcode, len);
 	for (i = 0; i < len; i++)
-		BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
-	local_irq_restore(flags);
+		BUG_ON((vaddr)[i] != ((const unsigned char *)opcode)[i]);
 	return addr;
 }
 
@@ -601,7 +615,7 @@ int poke_int3_handler(struct pt_regs *re
 	if (likely(!bp_patching_in_progress))
 		return 0;
 
-	if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
+	if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
 		return 0;
 
 	/* set up the specified breakpoint handler */
@@ -635,7 +649,7 @@ int poke_int3_handler(struct pt_regs *re
  */
 void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
 {
-	unsigned char int3 = 0xcc;
+	const unsigned char int3 = 0xcc;
 
 	bp_int3_handler = handler;
 	bp_int3_addr = (u8 *)addr + sizeof(int3);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/apic/apic.c linux-4.0.9-pax/arch/x86/kernel/apic/apic.c
--- linux-4.0.9/arch/x86/kernel/apic/apic.c	2015-04-13 11:21:01.746617467 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/apic/apic.c	2015-04-15 12:13:52.914318622 +0200
@@ -171,7 +171,7 @@ int first_system_vector = FIRST_SYSTEM_V
 /*
  * Debug level, exported for io_apic.c
  */
-unsigned int apic_verbosity;
+int apic_verbosity;
 
 int pic_mode;
 
@@ -1918,7 +1918,7 @@ static inline void __smp_error_interrupt
 		apic_write(APIC_ESR, 0);
 	v = apic_read(APIC_ESR);
 	ack_APIC_irq();
-	atomic_inc(&irq_err_count);
+	atomic_inc_unchecked(&irq_err_count);
 
 	apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x",
 		    smp_processor_id(), v);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/apic/apic_flat_64.c linux-4.0.9-pax/arch/x86/kernel/apic/apic_flat_64.c
--- linux-4.0.9/arch/x86/kernel/apic/apic_flat_64.c	2015-03-18 15:21:50.252349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/apic/apic_flat_64.c	2015-04-15 12:13:52.914318622 +0200
@@ -154,7 +154,7 @@ static int flat_probe(void)
 	return 1;
 }
 
-static struct apic apic_flat =  {
+static struct apic apic_flat __read_only =  {
 	.name				= "flat",
 	.probe				= flat_probe,
 	.acpi_madt_oem_check		= flat_acpi_madt_oem_check,
@@ -260,7 +260,7 @@ static int physflat_probe(void)
 	return 0;
 }
 
-static struct apic apic_physflat =  {
+static struct apic apic_physflat __read_only =  {
 
 	.name				= "physical flat",
 	.probe				= physflat_probe,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/apic/apic_noop.c linux-4.0.9-pax/arch/x86/kernel/apic/apic_noop.c
--- linux-4.0.9/arch/x86/kernel/apic/apic_noop.c	2015-03-18 15:21:50.252349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/apic/apic_noop.c	2015-04-15 12:13:52.914318622 +0200
@@ -108,7 +108,7 @@ static void noop_apic_write(u32 reg, u32
 	WARN_ON_ONCE(cpu_has_apic && !disable_apic);
 }
 
-struct apic apic_noop = {
+struct apic apic_noop __read_only = {
 	.name				= "noop",
 	.probe				= noop_probe,
 	.acpi_madt_oem_check		= NULL,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/apic/bigsmp_32.c linux-4.0.9-pax/arch/x86/kernel/apic/bigsmp_32.c
--- linux-4.0.9/arch/x86/kernel/apic/bigsmp_32.c	2015-03-18 15:21:50.252349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/apic/bigsmp_32.c	2015-04-15 12:13:52.914318622 +0200
@@ -147,7 +147,7 @@ static int probe_bigsmp(void)
 	return dmi_bigsmp;
 }
 
-static struct apic apic_bigsmp = {
+static struct apic apic_bigsmp __read_only = {
 
 	.name				= "bigsmp",
 	.probe				= probe_bigsmp,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/apic/io_apic.c linux-4.0.9-pax/arch/x86/kernel/apic/io_apic.c
--- linux-4.0.9/arch/x86/kernel/apic/io_apic.c	2015-04-13 11:21:01.746617467 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/apic/io_apic.c	2015-04-30 13:41:56.534966842 +0200
@@ -1862,7 +1862,7 @@ int native_ioapic_set_affinity(struct ir
 	return ret;
 }
 
-atomic_t irq_mis_count;
+atomic_unchecked_t irq_mis_count;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
@@ -2003,7 +2003,7 @@ static void ack_ioapic_level(struct irq_
 	 * at the cpu.
 	 */
 	if (!(v & (1 << (i & 0x1f)))) {
-		atomic_inc(&irq_mis_count);
+		atomic_inc_unchecked(&irq_mis_count);
 
 		eoi_ioapic_irq(irq, cfg);
 	}
@@ -2011,7 +2011,7 @@ static void ack_ioapic_level(struct irq_
 	ioapic_irqd_unmask(data, cfg, masked);
 }
 
-static struct irq_chip ioapic_chip __read_mostly = {
+static struct irq_chip ioapic_chip = {
 	.name			= "IO-APIC",
 	.irq_startup		= startup_ioapic_irq,
 	.irq_mask		= mask_ioapic_irq,
@@ -2070,7 +2070,7 @@ static void ack_lapic_irq(struct irq_dat
 	ack_APIC_irq();
 }
 
-static struct irq_chip lapic_chip __read_mostly = {
+static struct irq_chip lapic_chip = {
 	.name		= "local-APIC",
 	.irq_mask	= mask_lapic_irq,
 	.irq_unmask	= unmask_lapic_irq,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/apic/probe_32.c linux-4.0.9-pax/arch/x86/kernel/apic/probe_32.c
--- linux-4.0.9/arch/x86/kernel/apic/probe_32.c	2015-03-18 15:21:50.256349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/apic/probe_32.c	2015-04-15 12:13:52.914318622 +0200
@@ -72,7 +72,7 @@ static int probe_default(void)
 	return 1;
 }
 
-static struct apic apic_default = {
+static struct apic apic_default __read_only = {
 
 	.name				= "default",
 	.probe				= probe_default,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/apic/vector.c linux-4.0.9-pax/arch/x86/kernel/apic/vector.c
--- linux-4.0.9/arch/x86/kernel/apic/vector.c	2015-03-18 15:21:50.256349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/apic/vector.c	2015-04-15 12:13:52.914318622 +0200
@@ -21,7 +21,7 @@
 
 static DEFINE_RAW_SPINLOCK(vector_lock);
 
-void lock_vector_lock(void)
+void lock_vector_lock(void) __acquires(vector_lock)
 {
 	/* Used to the online set of cpus does not change
 	 * during assign_irq_vector.
@@ -29,7 +29,7 @@ void lock_vector_lock(void)
 	raw_spin_lock(&vector_lock);
 }
 
-void unlock_vector_lock(void)
+void unlock_vector_lock(void) __releases(vector_lock)
 {
 	raw_spin_unlock(&vector_lock);
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/apic/x2apic_cluster.c linux-4.0.9-pax/arch/x86/kernel/apic/x2apic_cluster.c
--- linux-4.0.9/arch/x86/kernel/apic/x2apic_cluster.c	2015-03-18 15:21:50.256349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/apic/x2apic_cluster.c	2015-04-15 12:13:52.914318622 +0200
@@ -182,7 +182,7 @@ update_clusterinfo(struct notifier_block
 	return notifier_from_errno(err);
 }
 
-static struct notifier_block __refdata x2apic_cpu_notifier = {
+static struct notifier_block x2apic_cpu_notifier = {
 	.notifier_call = update_clusterinfo,
 };
 
@@ -234,7 +234,7 @@ static void cluster_vector_allocation_do
 		cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
 }
 
-static struct apic apic_x2apic_cluster = {
+static struct apic apic_x2apic_cluster __read_only = {
 
 	.name				= "cluster x2apic",
 	.probe				= x2apic_cluster_probe,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/apic/x2apic_phys.c linux-4.0.9-pax/arch/x86/kernel/apic/x2apic_phys.c
--- linux-4.0.9/arch/x86/kernel/apic/x2apic_phys.c	2015-03-18 15:21:50.256349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/apic/x2apic_phys.c	2015-04-15 12:13:52.914318622 +0200
@@ -88,7 +88,7 @@ static int x2apic_phys_probe(void)
 	return apic == &apic_x2apic_phys;
 }
 
-static struct apic apic_x2apic_phys = {
+static struct apic apic_x2apic_phys __read_only = {
 
 	.name				= "physical x2apic",
 	.probe				= x2apic_phys_probe,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/apic/x2apic_uv_x.c linux-4.0.9-pax/arch/x86/kernel/apic/x2apic_uv_x.c
--- linux-4.0.9/arch/x86/kernel/apic/x2apic_uv_x.c	2015-03-18 15:21:50.256349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/apic/x2apic_uv_x.c	2015-04-15 12:13:52.914318622 +0200
@@ -348,7 +348,7 @@ static int uv_probe(void)
 	return apic == &apic_x2apic_uv_x;
 }
 
-static struct apic __refdata apic_x2apic_uv_x = {
+static struct apic apic_x2apic_uv_x __read_only = {
 
 	.name				= "UV large system",
 	.probe				= uv_probe,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/apm_32.c linux-4.0.9-pax/arch/x86/kernel/apm_32.c
--- linux-4.0.9/arch/x86/kernel/apm_32.c	2015-03-18 15:21:50.256349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/apm_32.c	2015-04-30 00:51:09.160291617 +0200
@@ -432,7 +432,7 @@ static DEFINE_MUTEX(apm_mutex);
  * This is for buggy BIOS's that refer to (real mode) segment 0x40
  * even though they are called in protected mode.
  */
-static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092,
+static const struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4093,
 			(unsigned long)__va(0x400UL), PAGE_SIZE - 0x400 - 1);
 
 static const char driver_version[] = "1.16ac";	/* no spaces */
@@ -610,7 +610,10 @@ static long __apm_bios_call(void *_call)
 	BUG_ON(cpu != 0);
 	gdt = get_cpu_gdt_table(cpu);
 	save_desc_40 = gdt[0x40 / 8];
+
+	pax_open_kernel();
 	gdt[0x40 / 8] = bad_bios_desc;
+	pax_close_kernel();
 
 	apm_irq_save(flags);
 	APM_DO_SAVE_SEGS;
@@ -619,7 +622,11 @@ static long __apm_bios_call(void *_call)
 			  &call->esi);
 	APM_DO_RESTORE_SEGS;
 	apm_irq_restore(flags);
+
+	pax_open_kernel();
 	gdt[0x40 / 8] = save_desc_40;
+	pax_close_kernel();
+
 	put_cpu();
 
 	return call->eax & 0xff;
@@ -686,7 +693,10 @@ static long __apm_bios_call_simple(void
 	BUG_ON(cpu != 0);
 	gdt = get_cpu_gdt_table(cpu);
 	save_desc_40 = gdt[0x40 / 8];
+
+	pax_open_kernel();
 	gdt[0x40 / 8] = bad_bios_desc;
+	pax_close_kernel();
 
 	apm_irq_save(flags);
 	APM_DO_SAVE_SEGS;
@@ -694,7 +704,11 @@ static long __apm_bios_call_simple(void
 					 &call->eax);
 	APM_DO_RESTORE_SEGS;
 	apm_irq_restore(flags);
+
+	pax_open_kernel();
 	gdt[0x40 / 8] = save_desc_40;
+	pax_close_kernel();
+
 	put_cpu();
 	return error;
 }
@@ -2039,7 +2053,7 @@ static int __init swab_apm_power_in_minu
 	return 0;
 }
 
-static struct dmi_system_id __initdata apm_dmi_table[] = {
+static const struct dmi_system_id __initconst apm_dmi_table[] = {
 	{
 		print_if_true,
 		KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 1.02+ may be needed for Linux APM.",
@@ -2349,12 +2363,15 @@ static int __init apm_init(void)
 	 * code to that CPU.
 	 */
 	gdt = get_cpu_gdt_table(0);
+
+	pax_open_kernel();
 	set_desc_base(&gdt[APM_CS >> 3],
 		 (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
 	set_desc_base(&gdt[APM_CS_16 >> 3],
 		 (unsigned long)__va((unsigned long)apm_info.bios.cseg_16 << 4));
 	set_desc_base(&gdt[APM_DS >> 3],
 		 (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4));
+	pax_close_kernel();
 
 	proc_create("apm", 0, NULL, &apm_file_ops);
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/asm-offsets_64.c linux-4.0.9-pax/arch/x86/kernel/asm-offsets_64.c
--- linux-4.0.9/arch/x86/kernel/asm-offsets_64.c	2015-03-18 15:21:50.256349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/asm-offsets_64.c	2015-04-15 12:13:52.914318622 +0200
@@ -80,6 +80,7 @@ int main(void)
 	BLANK();
 #undef ENTRY
 
+	DEFINE(TSS_size, sizeof(struct tss_struct));
 	OFFSET(TSS_ist, tss_struct, x86_tss.ist);
 	BLANK();
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/asm-offsets.c linux-4.0.9-pax/arch/x86/kernel/asm-offsets.c
--- linux-4.0.9/arch/x86/kernel/asm-offsets.c	2015-03-18 15:21:50.256349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/asm-offsets.c	2015-04-15 12:13:52.914318622 +0200
@@ -32,6 +32,8 @@ void common(void) {
 	OFFSET(TI_flags, thread_info, flags);
 	OFFSET(TI_status, thread_info, status);
 	OFFSET(TI_addr_limit, thread_info, addr_limit);
+	OFFSET(TI_lowest_stack, thread_info, lowest_stack);
+	DEFINE(TI_task_thread_sp0, offsetof(struct task_struct, thread.sp0) - offsetof(struct task_struct, tinfo));
 
 	BLANK();
 	OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
@@ -52,8 +54,26 @@ void common(void) {
 	OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
 	OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
 	OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+
+#ifdef CONFIG_PAX_KERNEXEC
+	OFFSET(PV_CPU_write_cr0, pv_cpu_ops, write_cr0);
+#endif
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	OFFSET(PV_MMU_read_cr3, pv_mmu_ops, read_cr3);
+	OFFSET(PV_MMU_write_cr3, pv_mmu_ops, write_cr3);
+#ifdef CONFIG_X86_64
+	OFFSET(PV_MMU_set_pgd_batched, pv_mmu_ops, set_pgd_batched);
+#endif
 #endif
 
+#endif
+
+	BLANK();
+	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
+	DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
+	DEFINE(THREAD_SIZE_asm, THREAD_SIZE);
+
 #ifdef CONFIG_XEN
 	BLANK();
 	OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/amd.c linux-4.0.9-pax/arch/x86/kernel/cpu/amd.c
--- linux-4.0.9/arch/x86/kernel/cpu/amd.c	2015-04-13 11:21:01.746617467 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/amd.c	2015-04-15 12:13:52.914318622 +0200
@@ -717,7 +717,7 @@ static void init_amd(struct cpuinfo_x86
 static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 {
 	/* AMD errata T13 (order #21922) */
-	if ((c->x86 == 6)) {
+	if (c->x86 == 6) {
 		/* Duron Rev A0 */
 		if (c->x86_model == 3 && c->x86_mask == 0)
 			size = 64;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/common.c linux-4.0.9-pax/arch/x86/kernel/cpu/common.c
--- linux-4.0.9/arch/x86/kernel/cpu/common.c	2015-04-13 11:21:01.746617467 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/common.c	2015-04-15 12:13:52.914318622 +0200
@@ -91,60 +91,6 @@ static const struct cpu_dev default_cpu
 
 static const struct cpu_dev *this_cpu = &default_cpu;
 
-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
-#ifdef CONFIG_X86_64
-	/*
-	 * We need valid kernel segments for data and code in long mode too
-	 * IRET will check the segment types  kkeil 2000/10/28
-	 * Also sysret mandates a special GDT layout
-	 *
-	 * TLS descriptors are currently at a different place compared to i386.
-	 * Hopefully nobody expects them at a fixed place (Wine?)
-	 */
-	[GDT_ENTRY_KERNEL32_CS]		= GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
-	[GDT_ENTRY_KERNEL_CS]		= GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
-	[GDT_ENTRY_KERNEL_DS]		= GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
-	[GDT_ENTRY_DEFAULT_USER32_CS]	= GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
-	[GDT_ENTRY_DEFAULT_USER_DS]	= GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
-	[GDT_ENTRY_DEFAULT_USER_CS]	= GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
-#else
-	[GDT_ENTRY_KERNEL_CS]		= GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
-	[GDT_ENTRY_KERNEL_DS]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
-	[GDT_ENTRY_DEFAULT_USER_CS]	= GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
-	[GDT_ENTRY_DEFAULT_USER_DS]	= GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
-	/*
-	 * Segments used for calling PnP BIOS have byte granularity.
-	 * They code segments and data segments have fixed 64k limits,
-	 * the transfer segment sizes are set at run time.
-	 */
-	/* 32-bit code */
-	[GDT_ENTRY_PNPBIOS_CS32]	= GDT_ENTRY_INIT(0x409a, 0, 0xffff),
-	/* 16-bit code */
-	[GDT_ENTRY_PNPBIOS_CS16]	= GDT_ENTRY_INIT(0x009a, 0, 0xffff),
-	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_DS]		= GDT_ENTRY_INIT(0x0092, 0, 0xffff),
-	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_TS1]		= GDT_ENTRY_INIT(0x0092, 0, 0),
-	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_TS2]		= GDT_ENTRY_INIT(0x0092, 0, 0),
-	/*
-	 * The APM segments have byte granularity and their bases
-	 * are set at run time.  All have 64k limits.
-	 */
-	/* 32-bit code */
-	[GDT_ENTRY_APMBIOS_BASE]	= GDT_ENTRY_INIT(0x409a, 0, 0xffff),
-	/* 16-bit code */
-	[GDT_ENTRY_APMBIOS_BASE+1]	= GDT_ENTRY_INIT(0x009a, 0, 0xffff),
-	/* data */
-	[GDT_ENTRY_APMBIOS_BASE+2]	= GDT_ENTRY_INIT(0x4092, 0, 0xffff),
-
-	[GDT_ENTRY_ESPFIX_SS]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
-	[GDT_ENTRY_PERCPU]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
-	GDT_STACK_CANARY_INIT
-#endif
-} };
-EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
-
 static int __init x86_xsave_setup(char *s)
 {
 	if (strlen(s))
@@ -306,6 +252,59 @@ static __always_inline void setup_smap(s
 	}
 }
 
+#ifdef CONFIG_X86_64
+static __init int setup_disable_pcid(char *arg)
+{
+	setup_clear_cpu_cap(X86_FEATURE_PCID);
+	setup_clear_cpu_cap(X86_FEATURE_INVPCID);
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	if (clone_pgd_mask != ~(pgdval_t)0UL)
+		pax_user_shadow_base = 1UL << TASK_SIZE_MAX_SHIFT;
+#endif
+
+	return 1;
+}
+__setup("nopcid", setup_disable_pcid);
+
+static void setup_pcid(struct cpuinfo_x86 *c)
+{
+	if (!cpu_has(c, X86_FEATURE_PCID)) {
+		clear_cpu_cap(c, X86_FEATURE_INVPCID);
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+		if (clone_pgd_mask != ~(pgdval_t)0UL) {
+			pax_open_kernel();
+			pax_user_shadow_base = 1UL << TASK_SIZE_MAX_SHIFT;
+			pax_close_kernel();
+			printk("PAX: slow and weak UDEREF enabled\n");
+		} else
+			printk("PAX: UDEREF disabled\n");
+#endif
+
+		return;
+	}
+
+	printk("PAX: PCID detected\n");
+	cr4_set_bits(X86_CR4_PCIDE);
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	pax_open_kernel();
+	clone_pgd_mask = ~(pgdval_t)0UL;
+	pax_close_kernel();
+	if (pax_user_shadow_base)
+		printk("PAX: weak UDEREF enabled\n");
+	else {
+		set_cpu_cap(c, X86_FEATURE_STRONGUDEREF);
+		printk("PAX: strong UDEREF enabled\n");
+	}
+#endif
+
+	if (cpu_has(c, X86_FEATURE_INVPCID))
+		printk("PAX: INVPCID detected\n");
+}
+#endif
+
 /*
  * Some CPU features depend on higher CPUID levels, which may not always
  * be available due to CPUID level capping or broken virtualization
@@ -406,7 +405,7 @@ void switch_to_new_gdt(int cpu)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (long)get_cpu_gdt_table(cpu);
+	gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 	/* Reload the per-cpu base */
@@ -897,6 +896,20 @@ static void identify_cpu(struct cpuinfo_
 	setup_smep(c);
 	setup_smap(c);
 
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_PAX_PAGEEXEC
+	if (!(__supported_pte_mask & _PAGE_NX))
+		clear_cpu_cap(c, X86_FEATURE_PSE);
+#endif
+#if defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+	clear_cpu_cap(c, X86_FEATURE_SEP);
+#endif
+#endif
+
+#ifdef CONFIG_X86_64
+	setup_pcid(c);
+#endif
+
 	/*
 	 * The vendor-specific functions might have changed features.
 	 * Now we do "generic changes."
@@ -979,7 +992,7 @@ static void syscall32_cpu_init(void)
 void enable_sep_cpu(void)
 {
 	int cpu = get_cpu();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct tss_struct *tss = init_tss + cpu;
 
 	if (!boot_cpu_has(X86_FEATURE_SEP)) {
 		put_cpu();
@@ -1117,14 +1130,16 @@ static __init int setup_disablecpuid(cha
 }
 __setup("clearcpuid=", setup_disablecpuid);
 
+DEFINE_PER_CPU(struct thread_info *, current_tinfo) = &init_task.tinfo;
+EXPORT_PER_CPU_SYMBOL(current_tinfo);
+
 DEFINE_PER_CPU(unsigned long, kernel_stack) =
-	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+	(unsigned long)&init_thread_union - 16 + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
 #ifdef CONFIG_X86_64
-struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
-struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
-				    (unsigned long) debug_idt_table };
+struct desc_ptr idt_descr __read_only = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
+const struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) debug_idt_table };
 
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
 		     irq_stack_union) __aligned(PAGE_SIZE) __visible;
@@ -1307,7 +1322,7 @@ void cpu_init(void)
 	 */
 	load_ucode_ap();
 
-	t = &per_cpu(init_tss, cpu);
+	t = init_tss + cpu;
 	oist = &per_cpu(orig_ist, cpu);
 
 #ifdef CONFIG_NUMA
@@ -1339,7 +1354,6 @@ void cpu_init(void)
 	wrmsrl(MSR_KERNEL_GS_BASE, 0);
 	barrier();
 
-	x86_configure_nx();
 	x2apic_setup();
 
 	/*
@@ -1391,7 +1405,7 @@ void cpu_init(void)
 {
 	int cpu = smp_processor_id();
 	struct task_struct *curr = current;
-	struct tss_struct *t = &per_cpu(init_tss, cpu);
+	struct tss_struct *t = init_tss + cpu;
 	struct thread_struct *thread = &curr->thread;
 
 	wait_for_master_cpu(cpu);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/intel_cacheinfo.c linux-4.0.9-pax/arch/x86/kernel/cpu/intel_cacheinfo.c
--- linux-4.0.9/arch/x86/kernel/cpu/intel_cacheinfo.c	2015-04-13 11:21:01.758617466 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/intel_cacheinfo.c	2015-04-15 12:13:52.914318622 +0200
@@ -1024,6 +1024,22 @@ static struct attribute *default_attrs[]
 };
 
 #ifdef CONFIG_AMD_NB
+static struct attribute *default_attrs_amd_nb[] = {
+	&type.attr,
+	&level.attr,
+	&coherency_line_size.attr,
+	&physical_line_partition.attr,
+	&ways_of_associativity.attr,
+	&number_of_sets.attr,
+	&size.attr,
+	&shared_cpu_map.attr,
+	&shared_cpu_list.attr,
+	NULL,
+	NULL,
+	NULL,
+	NULL
+};
+
 static struct attribute **amd_l3_attrs(void)
 {
 	static struct attribute **attrs;
@@ -1034,18 +1050,7 @@ static struct attribute **amd_l3_attrs(v
 
 	n = ARRAY_SIZE(default_attrs);
 
-	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
-		n += 2;
-
-	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-		n += 1;
-
-	attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
-	if (attrs == NULL)
-		return attrs = default_attrs;
-
-	for (n = 0; default_attrs[n]; n++)
-		attrs[n] = default_attrs[n];
+	attrs = default_attrs_amd_nb;
 
 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
 		attrs[n++] = &cache_disable_0.attr;
@@ -1096,6 +1101,13 @@ static struct kobj_type ktype_cache = {
 	.default_attrs	= default_attrs,
 };
 
+#ifdef CONFIG_AMD_NB
+static struct kobj_type ktype_cache_amd_nb = {
+	.sysfs_ops	= &sysfs_ops,
+	.default_attrs	= default_attrs_amd_nb,
+};
+#endif
+
 static struct kobj_type ktype_percpu_entry = {
 	.sysfs_ops	= &sysfs_ops,
 };
@@ -1161,20 +1173,26 @@ static int cache_add_dev(struct device *
 		return retval;
 	}
 
+#ifdef CONFIG_AMD_NB
+	amd_l3_attrs();
+#endif
+
 	for (i = 0; i < num_cache_leaves; i++) {
+		struct kobj_type *ktype;
+
 		this_object = INDEX_KOBJECT_PTR(cpu, i);
 		this_object->cpu = cpu;
 		this_object->index = i;
 
 		this_leaf = CPUID4_INFO_IDX(cpu, i);
 
-		ktype_cache.default_attrs = default_attrs;
+		ktype = &ktype_cache;
 #ifdef CONFIG_AMD_NB
 		if (this_leaf->base.nb)
-			ktype_cache.default_attrs = amd_l3_attrs();
+			ktype = &ktype_cache_amd_nb;
 #endif
 		retval = kobject_init_and_add(&(this_object->kobj),
-					      &ktype_cache,
+					      ktype,
 					      per_cpu(ici_cache_kobject, cpu),
 					      "index%1lu", i);
 		if (unlikely(retval)) {
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/Makefile linux-4.0.9-pax/arch/x86/kernel/cpu/Makefile
--- linux-4.0.9/arch/x86/kernel/cpu/Makefile	2015-03-18 15:21:50.256349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/Makefile	2015-04-15 12:13:52.918318622 +0200
@@ -8,10 +8,6 @@ CFLAGS_REMOVE_common.o = -pg
 CFLAGS_REMOVE_perf_event.o = -pg
 endif
 
-# Make sure load_percpu_segment has no stackprotector
-nostackp := $(call cc-option, -fno-stack-protector)
-CFLAGS_common.o		:= $(nostackp)
-
 obj-y			:= intel_cacheinfo.o scattered.o topology.o
 obj-y			+= common.o
 obj-y			+= rdrand.o
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/mcheck/mce.c linux-4.0.9-pax/arch/x86/kernel/cpu/mcheck/mce.c
--- linux-4.0.9/arch/x86/kernel/cpu/mcheck/mce.c	2015-06-15 16:02:22.267183858 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/mcheck/mce.c	2015-06-15 16:02:33.019183834 +0200
@@ -47,6 +47,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
+#include <asm/local.h>
 
 #include "mce-internal.h"
 
@@ -258,7 +259,7 @@ static void print_mce(struct mce *m)
 			!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
 				m->cs, m->ip);
 
-		if (m->cs == __KERNEL_CS)
+		if (m->cs == __KERNEL_CS || m->cs == __KERNEXEC_KERNEL_CS)
 			print_symbol("{%s}", m->ip);
 		pr_cont("\n");
 	}
@@ -291,10 +292,10 @@ static void print_mce(struct mce *m)
 
 #define PANIC_TIMEOUT 5 /* 5 seconds */
 
-static atomic_t mce_panicked;
+static atomic_unchecked_t mce_panicked;
 
 static int fake_panic;
-static atomic_t mce_fake_panicked;
+static atomic_unchecked_t mce_fake_panicked;
 
 /* Panic in progress. Enable interrupts and wait for final IPI */
 static void wait_for_panic(void)
@@ -318,7 +319,7 @@ static void mce_panic(const char *msg, s
 		/*
 		 * Make sure only one CPU runs in machine check panic
 		 */
-		if (atomic_inc_return(&mce_panicked) > 1)
+		if (atomic_inc_return_unchecked(&mce_panicked) > 1)
 			wait_for_panic();
 		barrier();
 
@@ -326,7 +327,7 @@ static void mce_panic(const char *msg, s
 		console_verbose();
 	} else {
 		/* Don't log too much for fake panic */
-		if (atomic_inc_return(&mce_fake_panicked) > 1)
+		if (atomic_inc_return_unchecked(&mce_fake_panicked) > 1)
 			return;
 	}
 	/* First print corrected ones that are still unlogged */
@@ -365,7 +366,7 @@ static void mce_panic(const char *msg, s
 	if (!fake_panic) {
 		if (panic_timeout == 0)
 			panic_timeout = mca_cfg.panic_timeout;
-		panic(msg);
+		panic("%s", msg);
 	} else
 		pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
 }
@@ -746,7 +747,7 @@ static int mce_timed_out(u64 *t, const c
 	 * might have been modified by someone else.
 	 */
 	rmb();
-	if (atomic_read(&mce_panicked))
+	if (atomic_read_unchecked(&mce_panicked))
 		wait_for_panic();
 	if (!mca_cfg.monarch_timeout)
 		goto out;
@@ -1672,7 +1673,7 @@ static void unexpected_machine_check(str
 }
 
 /* Call the installed machine check handler for this CPU setup. */
-void (*machine_check_vector)(struct pt_regs *, long error_code) =
+void (*machine_check_vector)(struct pt_regs *, long error_code) __read_only =
 						unexpected_machine_check;
 
 /*
@@ -1695,7 +1696,9 @@ void mcheck_cpu_init(struct cpuinfo_x86
 		return;
 	}
 
+	pax_open_kernel();
 	machine_check_vector = do_machine_check;
+	pax_close_kernel();
 
 	__mcheck_cpu_init_generic();
 	__mcheck_cpu_init_vendor(c);
@@ -1709,7 +1712,7 @@ void mcheck_cpu_init(struct cpuinfo_x86
  */
 
 static DEFINE_SPINLOCK(mce_chrdev_state_lock);
-static int mce_chrdev_open_count;	/* #times opened */
+static local_t mce_chrdev_open_count;	/* #times opened */
 static int mce_chrdev_open_exclu;	/* already open exclusive? */
 
 static int mce_chrdev_open(struct inode *inode, struct file *file)
@@ -1717,7 +1720,7 @@ static int mce_chrdev_open(struct inode
 	spin_lock(&mce_chrdev_state_lock);
 
 	if (mce_chrdev_open_exclu ||
-	    (mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
+	    (local_read(&mce_chrdev_open_count) && (file->f_flags & O_EXCL))) {
 		spin_unlock(&mce_chrdev_state_lock);
 
 		return -EBUSY;
@@ -1725,7 +1728,7 @@ static int mce_chrdev_open(struct inode
 
 	if (file->f_flags & O_EXCL)
 		mce_chrdev_open_exclu = 1;
-	mce_chrdev_open_count++;
+	local_inc(&mce_chrdev_open_count);
 
 	spin_unlock(&mce_chrdev_state_lock);
 
@@ -1736,7 +1739,7 @@ static int mce_chrdev_release(struct ino
 {
 	spin_lock(&mce_chrdev_state_lock);
 
-	mce_chrdev_open_count--;
+	local_dec(&mce_chrdev_open_count);
 	mce_chrdev_open_exclu = 0;
 
 	spin_unlock(&mce_chrdev_state_lock);
@@ -2411,7 +2414,7 @@ static __init void mce_init_banks(void)
 
 	for (i = 0; i < mca_cfg.banks; i++) {
 		struct mce_bank *b = &mce_banks[i];
-		struct device_attribute *a = &b->attr;
+		device_attribute_no_const *a = &b->attr;
 
 		sysfs_attr_init(&a->attr);
 		a->attr.name	= b->attrname;
@@ -2518,7 +2521,7 @@ struct dentry *mce_get_debugfs_dir(void)
 static void mce_reset(void)
 {
 	cpu_missing = 0;
-	atomic_set(&mce_fake_panicked, 0);
+	atomic_set_unchecked(&mce_fake_panicked, 0);
 	atomic_set(&mce_executing, 0);
 	atomic_set(&mce_callin, 0);
 	atomic_set(&global_nwo, 0);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/mcheck/p5.c linux-4.0.9-pax/arch/x86/kernel/cpu/mcheck/p5.c
--- linux-4.0.9/arch/x86/kernel/cpu/mcheck/p5.c	2015-04-13 11:21:01.758617466 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/mcheck/p5.c	2015-04-15 12:13:52.918318622 +0200
@@ -12,6 +12,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
+#include <asm/pgtable.h>
 
 /* By default disabled */
 int mce_p5_enabled __read_mostly;
@@ -55,7 +56,9 @@ void intel_p5_mcheck_init(struct cpuinfo
 	if (!cpu_has(c, X86_FEATURE_MCE))
 		return;
 
+	pax_open_kernel();
 	machine_check_vector = pentium_machine_check;
+	pax_close_kernel();
 	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/mcheck/winchip.c linux-4.0.9-pax/arch/x86/kernel/cpu/mcheck/winchip.c
--- linux-4.0.9/arch/x86/kernel/cpu/mcheck/winchip.c	2015-04-13 11:21:01.758617466 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/mcheck/winchip.c	2015-04-15 12:13:52.918318622 +0200
@@ -11,6 +11,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
+#include <asm/pgtable.h>
 
 /* Machine check handler for WinChip C6: */
 static void winchip_machine_check(struct pt_regs *regs, long error_code)
@@ -28,7 +29,9 @@ void winchip_mcheck_init(struct cpuinfo_
 {
 	u32 lo, hi;
 
+	pax_open_kernel();
 	machine_check_vector = winchip_machine_check;
+	pax_close_kernel();
 	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/microcode/core.c linux-4.0.9-pax/arch/x86/kernel/cpu/microcode/core.c
--- linux-4.0.9/arch/x86/kernel/cpu/microcode/core.c	2015-03-18 15:21:50.260349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/microcode/core.c	2015-04-15 12:13:52.918318622 +0200
@@ -518,7 +518,7 @@ mc_cpu_callback(struct notifier_block *n
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __refdata mc_cpu_notifier = {
+static struct notifier_block mc_cpu_notifier = {
 	.notifier_call	= mc_cpu_callback,
 };
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/microcode/intel.c linux-4.0.9-pax/arch/x86/kernel/cpu/microcode/intel.c
--- linux-4.0.9/arch/x86/kernel/cpu/microcode/intel.c	2015-04-13 11:21:01.758617466 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/microcode/intel.c	2015-04-15 12:13:52.918318622 +0200
@@ -298,13 +298,13 @@ static enum ucode_state request_microcod
 
 static int get_ucode_user(void *to, const void *from, size_t n)
 {
-	return copy_from_user(to, from, n);
+	return copy_from_user(to, (const void __force_user *)from, n);
 }
 
 static enum ucode_state
 request_microcode_user(int cpu, const void __user *buf, size_t size)
 {
-	return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
+	return generic_load_microcode(cpu, (__force_kernel void *)buf, size, &get_ucode_user);
 }
 
 static void microcode_fini_cpu(int cpu)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/mtrr/main.c linux-4.0.9-pax/arch/x86/kernel/cpu/mtrr/main.c
--- linux-4.0.9/arch/x86/kernel/cpu/mtrr/main.c	2015-03-18 15:21:50.260349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/mtrr/main.c	2015-04-15 12:13:52.918318622 +0200
@@ -66,7 +66,7 @@ static DEFINE_MUTEX(mtrr_mutex);
 u64 size_or_mask, size_and_mask;
 static bool mtrr_aps_delayed_init;
 
-static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
+static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __read_only;
 
 const struct mtrr_ops *mtrr_if;
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/mtrr/mtrr.h linux-4.0.9-pax/arch/x86/kernel/cpu/mtrr/mtrr.h
--- linux-4.0.9/arch/x86/kernel/cpu/mtrr/mtrr.h	2015-03-18 15:21:50.260349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/mtrr/mtrr.h	2015-04-15 12:13:52.918318622 +0200
@@ -25,7 +25,7 @@ struct mtrr_ops {
 	int	(*validate_add_page)(unsigned long base, unsigned long size,
 				     unsigned int type);
 	int	(*have_wrcomb)(void);
-};
+} __do_const;
 
 extern int generic_get_free_region(unsigned long base, unsigned long size,
 				   int replace_reg);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/perf_event_amd_iommu.c linux-4.0.9-pax/arch/x86/kernel/cpu/perf_event_amd_iommu.c
--- linux-4.0.9/arch/x86/kernel/cpu/perf_event_amd_iommu.c	2015-03-18 15:21:50.260349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/perf_event_amd_iommu.c	2015-04-15 12:13:52.918318622 +0200
@@ -402,7 +402,7 @@ static void perf_iommu_del(struct perf_e
 static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
 {
 	struct attribute **attrs;
-	struct attribute_group *attr_group;
+	attribute_group_no_const *attr_group;
 	int i = 0, j;
 
 	while (amd_iommu_v2_event_descs[i].attr.attr.name)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/perf_event.c linux-4.0.9-pax/arch/x86/kernel/cpu/perf_event.c
--- linux-4.0.9/arch/x86/kernel/cpu/perf_event.c	2015-04-13 11:21:01.782617465 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/perf_event.c	2015-04-15 12:13:52.918318622 +0200
@@ -1376,7 +1376,7 @@ static void __init pmu_check_apic(void)
 
 }
 
-static struct attribute_group x86_pmu_format_group = {
+static attribute_group_no_const x86_pmu_format_group = {
 	.name = "format",
 	.attrs = NULL,
 };
@@ -1475,7 +1475,7 @@ static struct attribute *events_attr[] =
 	NULL,
 };
 
-static struct attribute_group x86_pmu_events_group = {
+static attribute_group_no_const x86_pmu_events_group = {
 	.name = "events",
 	.attrs = events_attr,
 };
@@ -2037,7 +2037,7 @@ static unsigned long get_segment_base(un
 		if (idx > GDT_ENTRIES)
 			return 0;
 
-		desc = raw_cpu_ptr(gdt_page.gdt);
+		desc = get_cpu_gdt_table(smp_processor_id());
 	}
 
 	return get_desc_base(desc + idx);
@@ -2127,7 +2127,7 @@ perf_callchain_user(struct perf_callchai
 			break;
 
 		perf_callchain_store(entry, frame.return_address);
-		fp = frame.next_frame;
+		fp = (const void __force_user *)frame.next_frame;
 	}
 }
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/perf_event_intel.c linux-4.0.9-pax/arch/x86/kernel/cpu/perf_event_intel.c
--- linux-4.0.9/arch/x86/kernel/cpu/perf_event_intel.c	2015-04-13 11:21:01.790617464 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/perf_event_intel.c	2015-04-15 12:13:52.918318622 +0200
@@ -2353,10 +2353,10 @@ __init int intel_pmu_init(void)
 		x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
 
 	if (boot_cpu_has(X86_FEATURE_PDCM)) {
-		u64 capabilities;
+		u64 capabilities = x86_pmu.intel_cap.capabilities;
 
-		rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
-		x86_pmu.intel_cap.capabilities = capabilities;
+		if (rdmsrl_safe(MSR_IA32_PERF_CAPABILITIES, &x86_pmu.intel_cap.capabilities))
+			x86_pmu.intel_cap.capabilities = capabilities;
 	}
 
 	intel_ds_init();
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/perf_event_intel_rapl.c linux-4.0.9-pax/arch/x86/kernel/cpu/perf_event_intel_rapl.c
--- linux-4.0.9/arch/x86/kernel/cpu/perf_event_intel_rapl.c	2015-06-15 16:02:22.267183858 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/perf_event_intel_rapl.c	2015-06-15 16:02:33.019183834 +0200
@@ -465,7 +465,7 @@ static struct attribute *rapl_events_hsw
 	NULL,
 };
 
-static struct attribute_group rapl_pmu_events_group = {
+static attribute_group_no_const rapl_pmu_events_group __read_only = {
 	.name = "events",
 	.attrs = NULL, /* patched at runtime */
 };
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/perf_event_intel_uncore.c linux-4.0.9-pax/arch/x86/kernel/cpu/perf_event_intel_uncore.c
--- linux-4.0.9/arch/x86/kernel/cpu/perf_event_intel_uncore.c	2015-03-18 15:21:50.260349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/perf_event_intel_uncore.c	2015-04-15 12:13:52.918318622 +0200
@@ -733,7 +733,7 @@ static void __init uncore_types_exit(str
 static int __init uncore_type_init(struct intel_uncore_type *type)
 {
 	struct intel_uncore_pmu *pmus;
-	struct attribute_group *attr_group;
+	attribute_group_no_const *attr_group;
 	struct attribute **attrs;
 	int i, j;
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpu/perf_event_intel_uncore.h linux-4.0.9-pax/arch/x86/kernel/cpu/perf_event_intel_uncore.h
--- linux-4.0.9/arch/x86/kernel/cpu/perf_event_intel_uncore.h	2015-03-18 15:21:50.260349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/cpu/perf_event_intel_uncore.h	2015-04-15 12:13:52.918318622 +0200
@@ -114,7 +114,7 @@ struct intel_uncore_box {
 struct uncore_event_desc {
 	struct kobj_attribute attr;
 	const char *config;
-};
+} __do_const;
 
 ssize_t uncore_event_show(struct kobject *kobj,
 			  struct kobj_attribute *attr, char *buf);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/cpuid.c linux-4.0.9-pax/arch/x86/kernel/cpuid.c
--- linux-4.0.9/arch/x86/kernel/cpuid.c	2015-03-18 15:21:50.260349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/cpuid.c	2015-04-15 12:13:52.918318622 +0200
@@ -170,7 +170,7 @@ static int cpuid_class_cpu_callback(stru
 	return notifier_from_errno(err);
 }
 
-static struct notifier_block __refdata cpuid_class_cpu_notifier =
+static struct notifier_block cpuid_class_cpu_notifier =
 {
 	.notifier_call = cpuid_class_cpu_callback,
 };
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/crash.c linux-4.0.9-pax/arch/x86/kernel/crash.c
--- linux-4.0.9/arch/x86/kernel/crash.c	2015-03-18 15:21:50.260349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/crash.c	2015-04-15 12:13:52.918318622 +0200
@@ -105,7 +105,7 @@ static void kdump_nmi_callback(int cpu,
 #ifdef CONFIG_X86_32
 	struct pt_regs fixed_regs;
 
-	if (!user_mode_vm(regs)) {
+	if (!user_mode(regs)) {
 		crash_fixup_ss_esp(&fixed_regs, regs);
 		regs = &fixed_regs;
 	}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/crash_dump_64.c linux-4.0.9-pax/arch/x86/kernel/crash_dump_64.c
--- linux-4.0.9/arch/x86/kernel/crash_dump_64.c	2015-03-18 15:21:50.260349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/crash_dump_64.c	2015-04-15 12:13:52.918318622 +0200
@@ -36,7 +36,7 @@ ssize_t copy_oldmem_page(unsigned long p
 		return -ENOMEM;
 
 	if (userbuf) {
-		if (copy_to_user(buf, vaddr + offset, csize)) {
+		if (copy_to_user((char __force_user *)buf, vaddr + offset, csize)) {
 			iounmap(vaddr);
 			return -EFAULT;
 		}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/doublefault.c linux-4.0.9-pax/arch/x86/kernel/doublefault.c
--- linux-4.0.9/arch/x86/kernel/doublefault.c	2015-03-18 15:21:50.264349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/doublefault.c	2015-04-15 12:13:52.918318622 +0200
@@ -12,7 +12,7 @@
 
 #define DOUBLEFAULT_STACKSIZE (1024)
 static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
-#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
+#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE-2)
 
 #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
 
@@ -22,7 +22,7 @@ static void doublefault_fn(void)
 	unsigned long gdt, tss;
 
 	native_store_gdt(&gdt_desc);
-	gdt = gdt_desc.address;
+	gdt = (unsigned long)gdt_desc.address;
 
 	printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
 
@@ -59,10 +59,10 @@ struct tss_struct doublefault_tss __cach
 		/* 0x2 bit is always set */
 		.flags		= X86_EFLAGS_SF | 0x2,
 		.sp		= STACK_START,
-		.es		= __USER_DS,
+		.es		= __KERNEL_DS,
 		.cs		= __KERNEL_CS,
 		.ss		= __KERNEL_DS,
-		.ds		= __USER_DS,
+		.ds		= __KERNEL_DS,
 		.fs		= __KERNEL_PERCPU,
 
 		.__cr3		= __pa_nodebug(swapper_pg_dir),
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/dumpstack_32.c linux-4.0.9-pax/arch/x86/kernel/dumpstack_32.c
--- linux-4.0.9/arch/x86/kernel/dumpstack_32.c	2015-03-18 15:21:50.264349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/dumpstack_32.c	2015-06-29 01:02:29.106427990 +0200
@@ -61,15 +61,14 @@ void dump_trace(struct task_struct *task
 		bp = stack_frame(task, regs);
 
 	for (;;) {
-		struct thread_info *context;
+		void *stack_start = (void *)((unsigned long)stack & ~(THREAD_SIZE-1));
 		void *end_stack;
 
 		end_stack = is_hardirq_stack(stack, cpu);
 		if (!end_stack)
 			end_stack = is_softirq_stack(stack, cpu);
 
-		context = task_thread_info(task);
-		bp = ops->walk_stack(context, stack, bp, ops, data,
+		bp = ops->walk_stack(task, stack_start, stack, bp, ops, data,
 				     end_stack, &graph);
 
 		/* Stop if not on irq stack */
@@ -123,27 +122,28 @@ void show_regs(struct pt_regs *regs)
 	int i;
 
 	show_regs_print_info(KERN_EMERG);
-	__show_regs(regs, !user_mode_vm(regs));
+	__show_regs(regs, !user_mode(regs));
 
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
 	 */
-	if (!user_mode_vm(regs)) {
+	if (!user_mode(regs)) {
 		unsigned int code_prologue = code_bytes * 43 / 64;
 		unsigned int code_len = code_bytes;
 		unsigned char c;
 		u8 *ip;
+		unsigned long cs_base = get_desc_base(&get_cpu_gdt_table(0)[(0xffff & regs->cs) >> 3]);
 
 		pr_emerg("Stack:\n");
 		show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
 
 		pr_emerg("Code:");
 
-		ip = (u8 *)regs->ip - code_prologue;
+		ip = (u8 *)regs->ip - code_prologue + cs_base;
 		if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
 			/* try starting at IP */
-			ip = (u8 *)regs->ip;
+			ip = (u8 *)regs->ip + cs_base;
 			code_len = code_len - code_prologue + 1;
 		}
 		for (i = 0; i < code_len; i++, ip++) {
@@ -152,7 +152,7 @@ void show_regs(struct pt_regs *regs)
 				pr_cont("  Bad EIP value.");
 				break;
 			}
-			if (ip == (u8 *)regs->ip)
+			if (ip == (u8 *)regs->ip + cs_base)
 				pr_cont(" <%02x>", c);
 			else
 				pr_cont(" %02x", c);
@@ -165,6 +165,7 @@ int is_valid_bugaddr(unsigned long ip)
 {
 	unsigned short ud2;
 
+	ip = ktla_ktva(ip);
 	if (ip < PAGE_OFFSET)
 		return 0;
 	if (probe_kernel_address((unsigned short *)ip, ud2))
@@ -172,3 +173,15 @@ int is_valid_bugaddr(unsigned long ip)
 
 	return ud2 == 0x0b0f;
 }
+
+#ifdef CONFIG_PAX_MEMORY_STACKLEAK
+void __used pax_check_alloca(unsigned long size)
+{
+	unsigned long sp = (unsigned long)&sp, stack_left;
+
+	/* all kernel stacks are of the same size */
+	stack_left = sp & (THREAD_SIZE - 1);
+	BUG_ON(stack_left < 256 || size >= stack_left - 256);
+}
+EXPORT_SYMBOL(pax_check_alloca);
+#endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/dumpstack_64.c linux-4.0.9-pax/arch/x86/kernel/dumpstack_64.c
--- linux-4.0.9/arch/x86/kernel/dumpstack_64.c	2015-03-18 15:21:50.264349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/dumpstack_64.c	2015-06-29 01:02:41.026427964 +0200
@@ -153,12 +153,12 @@ void dump_trace(struct task_struct *task
 		const struct stacktrace_ops *ops, void *data)
 {
 	const unsigned cpu = get_cpu();
-	struct thread_info *tinfo;
 	unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
 	unsigned long dummy;
 	unsigned used = 0;
 	int graph = 0;
 	int done = 0;
+	void *stack_start;
 
 	if (!task)
 		task = current;
@@ -179,7 +179,6 @@ void dump_trace(struct task_struct *task
 	 * current stack address. If the stacks consist of nested
 	 * exceptions
 	 */
-	tinfo = task_thread_info(task);
 	while (!done) {
 		unsigned long *stack_end;
 		enum stack_type stype;
@@ -202,7 +201,7 @@ void dump_trace(struct task_struct *task
 			if (ops->stack(data, id) < 0)
 				break;
 
-			bp = ops->walk_stack(tinfo, stack, bp, ops,
+			bp = ops->walk_stack(task, stack_end - EXCEPTION_STKSZ, stack, bp, ops,
 					     data, stack_end, &graph);
 			ops->stack(data, "<EOE>");
 			/*
@@ -210,6 +209,8 @@ void dump_trace(struct task_struct *task
 			 * second-to-last pointer (index -2 to end) in the
 			 * exception stack:
 			 */
+			if ((u16)stack_end[-1] != __KERNEL_DS)
+				goto out;
 			stack = (unsigned long *) stack_end[-2];
 			done = 0;
 			break;
@@ -218,7 +219,7 @@ void dump_trace(struct task_struct *task
 
 			if (ops->stack(data, "IRQ") < 0)
 				break;
-			bp = ops->walk_stack(tinfo, stack, bp,
+			bp = ops->walk_stack(task, irq_stack, stack, bp,
 				     ops, data, stack_end, &graph);
 			/*
 			 * We link to the next stack (which would be
@@ -240,7 +241,9 @@ void dump_trace(struct task_struct *task
 	/*
 	 * This handles the process stack:
 	 */
-	bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
+	stack_start = (void *)((unsigned long)stack & ~(THREAD_SIZE-1));
+	bp = ops->walk_stack(task, stack_start, stack, bp, ops, data, NULL, &graph);
+out:
 	put_cpu();
 }
 EXPORT_SYMBOL(dump_trace);
@@ -344,8 +347,55 @@ int is_valid_bugaddr(unsigned long ip)
 {
 	unsigned short ud2;
 
-	if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
+	if (probe_kernel_address((unsigned short *)ip, ud2))
 		return 0;
 
 	return ud2 == 0x0b0f;
 }
+
+#ifdef CONFIG_PAX_MEMORY_STACKLEAK
+void __used pax_check_alloca(unsigned long size)
+{
+	unsigned long sp = (unsigned long)&sp, stack_start, stack_end;
+	unsigned cpu, used;
+	char *id;
+
+	/* check the process stack first */
+	stack_start = (unsigned long)task_stack_page(current);
+	stack_end = stack_start + THREAD_SIZE;
+	if (likely(stack_start <= sp && sp < stack_end)) {
+		unsigned long stack_left = sp & (THREAD_SIZE - 1);
+		BUG_ON(stack_left < 256 || size >= stack_left - 256);
+		return;
+	}
+
+	cpu = get_cpu();
+
+	/* check the irq stacks */
+	stack_end = (unsigned long)per_cpu(irq_stack_ptr, cpu);
+	stack_start = stack_end - IRQ_STACK_SIZE;
+	if (stack_start <= sp && sp < stack_end) {
+		unsigned long stack_left = sp & (IRQ_STACK_SIZE - 1);
+		put_cpu();
+		BUG_ON(stack_left < 256 || size >= stack_left - 256);
+		return;
+	}
+
+	/* check the exception stacks */
+	used = 0;
+	stack_end = (unsigned long)in_exception_stack(cpu, sp, &used, &id);
+	stack_start = stack_end - EXCEPTION_STKSZ;
+	if (stack_end && stack_start <= sp && sp < stack_end) {
+		unsigned long stack_left = sp & (EXCEPTION_STKSZ - 1);
+		put_cpu();
+		BUG_ON(stack_left < 256 || size >= stack_left - 256);
+		return;
+	}
+
+	put_cpu();
+
+	/* unknown stack */
+	BUG();
+}
+EXPORT_SYMBOL(pax_check_alloca);
+#endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/dumpstack.c linux-4.0.9-pax/arch/x86/kernel/dumpstack.c
--- linux-4.0.9/arch/x86/kernel/dumpstack.c	2015-04-13 11:21:01.790617464 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/dumpstack.c	2015-04-15 12:13:52.918318622 +0200
@@ -40,16 +40,14 @@ void printk_address(unsigned long addres
 static void
 print_ftrace_graph_addr(unsigned long addr, void *data,
 			const struct stacktrace_ops *ops,
-			struct thread_info *tinfo, int *graph)
+			struct task_struct *task, int *graph)
 {
-	struct task_struct *task;
 	unsigned long ret_addr;
 	int index;
 
 	if (addr != (unsigned long)return_to_handler)
 		return;
 
-	task = tinfo->task;
 	index = task->curr_ret_stack;
 
 	if (!task->ret_stack || index < *graph)
@@ -66,7 +64,7 @@ print_ftrace_graph_addr(unsigned long ad
 static inline void
 print_ftrace_graph_addr(unsigned long addr, void *data,
 			const struct stacktrace_ops *ops,
-			struct thread_info *tinfo, int *graph)
+			struct task_struct *task, int *graph)
 { }
 #endif
 
@@ -77,10 +75,8 @@ print_ftrace_graph_addr(unsigned long ad
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
 
-static inline int valid_stack_ptr(struct thread_info *tinfo,
-			void *p, unsigned int size, void *end)
+static inline int valid_stack_ptr(void *t, void *p, unsigned int size, void *end)
 {
-	void *t = tinfo;
 	if (end) {
 		if (p < end && p >= (end-THREAD_SIZE))
 			return 1;
@@ -91,14 +87,14 @@ static inline int valid_stack_ptr(struct
 }
 
 unsigned long
-print_context_stack(struct thread_info *tinfo,
+print_context_stack(struct task_struct *task, void *stack_start,
 		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data,
 		unsigned long *end, int *graph)
 {
 	struct stack_frame *frame = (struct stack_frame *)bp;
 
-	while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
+	while (valid_stack_ptr(stack_start, stack, sizeof(*stack), end)) {
 		unsigned long addr;
 
 		addr = *stack;
@@ -110,7 +106,7 @@ print_context_stack(struct thread_info *
 			} else {
 				ops->address(data, addr, 0);
 			}
-			print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
+			print_ftrace_graph_addr(addr, data, ops, task, graph);
 		}
 		stack++;
 	}
@@ -119,7 +115,7 @@ print_context_stack(struct thread_info *
 EXPORT_SYMBOL_GPL(print_context_stack);
 
 unsigned long
-print_context_stack_bp(struct thread_info *tinfo,
+print_context_stack_bp(struct task_struct *task, void *stack_start,
 		       unsigned long *stack, unsigned long bp,
 		       const struct stacktrace_ops *ops, void *data,
 		       unsigned long *end, int *graph)
@@ -127,7 +123,7 @@ print_context_stack_bp(struct thread_inf
 	struct stack_frame *frame = (struct stack_frame *)bp;
 	unsigned long *ret_addr = &frame->return_address;
 
-	while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) {
+	while (valid_stack_ptr(stack_start, ret_addr, sizeof(*ret_addr), end)) {
 		unsigned long addr = *ret_addr;
 
 		if (!__kernel_text_address(addr))
@@ -136,7 +132,7 @@ print_context_stack_bp(struct thread_inf
 		ops->address(data, addr, 1);
 		frame = frame->next_frame;
 		ret_addr = &frame->return_address;
-		print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
+		print_ftrace_graph_addr(addr, data, ops, task, graph);
 	}
 
 	return (unsigned long)frame;
@@ -155,7 +151,7 @@ static int print_trace_stack(void *data,
 static void print_trace_address(void *data, unsigned long addr, int reliable)
 {
 	touch_nmi_watchdog();
-	printk(data);
+	printk("%s", (char *)data);
 	printk_stack_address(addr, reliable);
 }
 
@@ -246,7 +242,7 @@ void oops_end(unsigned long flags, struc
 		panic("Fatal exception in interrupt");
 	if (panic_on_oops)
 		panic("Fatal exception");
-	do_exit(signr);
+	do_group_exit(signr);
 }
 NOKPROBE_SYMBOL(oops_end);
 
@@ -278,7 +274,7 @@ int __die(const char *str, struct pt_reg
 	print_modules();
 	show_regs(regs);
 #ifdef CONFIG_X86_32
-	if (user_mode_vm(regs)) {
+	if (user_mode(regs)) {
 		sp = regs->sp;
 		ss = regs->ss & 0xffff;
 	} else {
@@ -307,7 +303,7 @@ void die(const char *str, struct pt_regs
 	unsigned long flags = oops_begin();
 	int sig = SIGSEGV;
 
-	if (!user_mode_vm(regs))
+	if (!user_mode(regs))
 		report_bug(regs->ip, regs);
 
 	if (__die(str, regs, err))
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/e820.c linux-4.0.9-pax/arch/x86/kernel/e820.c
--- linux-4.0.9/arch/x86/kernel/e820.c	2015-04-13 11:21:01.790617464 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/e820.c	2015-04-15 12:13:52.922318622 +0200
@@ -794,8 +794,8 @@ unsigned long __init e820_end_of_low_ram
 
 static void early_panic(char *msg)
 {
-	early_printk(msg);
-	panic(msg);
+	early_printk("%s", msg);
+	panic("%s", msg);
 }
 
 static int userdef __initdata;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/early_printk.c linux-4.0.9-pax/arch/x86/kernel/early_printk.c
--- linux-4.0.9/arch/x86/kernel/early_printk.c	2015-04-13 11:21:01.790617464 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/early_printk.c	2015-04-15 12:13:52.922318622 +0200
@@ -7,6 +7,7 @@
 #include <linux/pci_regs.h>
 #include <linux/pci_ids.h>
 #include <linux/errno.h>
+#include <linux/sched.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/fcntl.h>
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/entry_32.S linux-4.0.9-pax/arch/x86/kernel/entry_32.S
--- linux-4.0.9/arch/x86/kernel/entry_32.S	2015-04-13 11:21:01.790617464 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/entry_32.S	2015-04-15 12:13:52.922318622 +0200
@@ -177,13 +177,154 @@
 	/*CFI_REL_OFFSET gs, PT_GS*/
 .endm
 .macro SET_KERNEL_GS reg
+
+#ifdef CONFIG_CC_STACKPROTECTOR
 	movl $(__KERNEL_STACK_CANARY), \reg
+#elif defined(CONFIG_PAX_MEMORY_UDEREF)
+	movl $(__USER_DS), \reg
+#else
+	xorl \reg, \reg
+#endif
+
 	movl \reg, %gs
 .endm
 
 #endif	/* CONFIG_X86_32_LAZY_GS */
 
-.macro SAVE_ALL
+.macro pax_enter_kernel
+#ifdef CONFIG_PAX_KERNEXEC
+	call pax_enter_kernel
+#endif
+.endm
+
+.macro pax_exit_kernel
+#ifdef CONFIG_PAX_KERNEXEC
+	call pax_exit_kernel
+#endif
+.endm
+
+#ifdef CONFIG_PAX_KERNEXEC
+ENTRY(pax_enter_kernel)
+#ifdef CONFIG_PARAVIRT
+	pushl %eax
+	pushl %ecx
+	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0)
+	mov %eax, %esi
+#else
+	mov %cr0, %esi
+#endif
+	bts $16, %esi
+	jnc 1f
+	mov %cs, %esi
+	cmp $__KERNEL_CS, %esi
+	jz 3f
+	ljmp $__KERNEL_CS, $3f
+1:	ljmp $__KERNEXEC_KERNEL_CS, $2f
+2:
+#ifdef CONFIG_PARAVIRT
+	mov %esi, %eax
+	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_write_cr0)
+#else
+	mov %esi, %cr0
+#endif
+3:
+#ifdef CONFIG_PARAVIRT
+	popl %ecx
+	popl %eax
+#endif
+	ret
+ENDPROC(pax_enter_kernel)
+
+ENTRY(pax_exit_kernel)
+#ifdef CONFIG_PARAVIRT
+	pushl %eax
+	pushl %ecx
+#endif
+	mov %cs, %esi
+	cmp $__KERNEXEC_KERNEL_CS, %esi
+	jnz 2f
+#ifdef CONFIG_PARAVIRT
+	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0);
+	mov %eax, %esi
+#else
+	mov %cr0, %esi
+#endif
+	btr $16, %esi
+	ljmp $__KERNEL_CS, $1f
+1:
+#ifdef CONFIG_PARAVIRT
+	mov %esi, %eax
+	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_write_cr0);
+#else
+	mov %esi, %cr0
+#endif
+2:
+#ifdef CONFIG_PARAVIRT
+	popl %ecx
+	popl %eax
+#endif
+	ret
+ENDPROC(pax_exit_kernel)
+#endif
+
+	.macro pax_erase_kstack
+#ifdef CONFIG_PAX_MEMORY_STACKLEAK
+	call pax_erase_kstack
+#endif
+	.endm
+
+#ifdef CONFIG_PAX_MEMORY_STACKLEAK
+/*
+ * ebp: thread_info
+ */
+ENTRY(pax_erase_kstack)
+	pushl %edi
+	pushl %ecx
+	pushl %eax
+
+	mov TI_lowest_stack(%ebp), %edi
+	mov $0xB4DD00D5, %eax
+	std
+
+1:	mov %edi, %ecx
+	and $THREAD_SIZE_asm - 1, %ecx
+	shr $2, %ecx
+	repne scasl
+	jecxz 2f
+
+	cmp $2*16, %ecx
+	jc 2f
+
+	mov $2*16, %ecx
+	repe scasl
+	jecxz 2f
+	jne 1b
+
+2:	cld
+	or $2*4, %edi
+	mov %esp, %ecx
+	sub %edi, %ecx
+
+	cmp $THREAD_SIZE_asm, %ecx
+	jb 3f
+	ud2
+3:
+
+	shr $2, %ecx
+	rep stosl
+
+	mov TI_task_thread_sp0(%ebp), %edi
+	sub $128, %edi
+	mov %edi, TI_lowest_stack(%ebp)
+
+	popl %eax
+	popl %ecx
+	popl %edi
+	ret
+ENDPROC(pax_erase_kstack)
+#endif
+
+.macro __SAVE_ALL _DS
 	cld
 	PUSH_GS
 	pushl_cfi %fs
@@ -206,7 +347,7 @@
 	CFI_REL_OFFSET ecx, 0
 	pushl_cfi %ebx
 	CFI_REL_OFFSET ebx, 0
-	movl $(__USER_DS), %edx
+	movl $\_DS, %edx
 	movl %edx, %ds
 	movl %edx, %es
 	movl $(__KERNEL_PERCPU), %edx
@@ -214,6 +355,15 @@
 	SET_KERNEL_GS %edx
 .endm
 
+.macro SAVE_ALL
+#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+	__SAVE_ALL __KERNEL_DS
+	pax_enter_kernel
+#else
+	__SAVE_ALL __USER_DS
+#endif
+.endm
+
 .macro RESTORE_INT_REGS
 	popl_cfi %ebx
 	CFI_RESTORE ebx
@@ -297,7 +447,7 @@ ENTRY(ret_from_fork)
 	popfl_cfi
 	jmp syscall_exit
 	CFI_ENDPROC
-END(ret_from_fork)
+ENDPROC(ret_from_fork)
 
 ENTRY(ret_from_kernel_thread)
 	CFI_STARTPROC
@@ -340,7 +490,15 @@ ret_from_intr:
 	andl $SEGMENT_RPL_MASK, %eax
 #endif
 	cmpl $USER_RPL, %eax
+
+#ifdef CONFIG_PAX_KERNEXEC
+	jae resume_userspace
+
+	pax_exit_kernel
+	jmp resume_kernel
+#else
 	jb resume_kernel		# not returning to v8086 or userspace
+#endif
 
 ENTRY(resume_userspace)
 	LOCKDEP_SYS_EXIT
@@ -352,8 +510,8 @@ ENTRY(resume_userspace)
 	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done on
 					# int/exception return?
 	jne work_pending
-	jmp restore_all
-END(ret_from_exception)
+	jmp restore_all_pax
+ENDPROC(ret_from_exception)
 
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
@@ -365,7 +523,7 @@ need_resched:
 	jz restore_all
 	call preempt_schedule_irq
 	jmp need_resched
-END(resume_kernel)
+ENDPROC(resume_kernel)
 #endif
 	CFI_ENDPROC
 
@@ -395,30 +553,45 @@ sysenter_past_esp:
 	/*CFI_REL_OFFSET cs, 0*/
 	/*
 	 * Push current_thread_info()->sysenter_return to the stack.
-	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
-	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
 	 */
-	pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
+	pushl_cfi $0
 	CFI_REL_OFFSET eip, 0
 
 	pushl_cfi %eax
 	SAVE_ALL
+	GET_THREAD_INFO(%ebp)
+	movl TI_sysenter_return(%ebp),%ebp
+	movl %ebp,PT_EIP(%esp)
 	ENABLE_INTERRUPTS(CLBR_NONE)
 
 /*
  * Load the potential sixth argument from user stack.
  * Careful about security.
  */
+	movl PT_OLDESP(%esp),%ebp
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	mov PT_OLDSS(%esp),%ds
+1:	movl %ds:(%ebp),%ebp
+	push %ss
+	pop %ds
+#else
 	cmpl $__PAGE_OFFSET-3,%ebp
 	jae syscall_fault
 	ASM_STAC
 1:	movl (%ebp),%ebp
 	ASM_CLAC
+#endif
+
 	movl %ebp,PT_EBP(%esp)
 	_ASM_EXTABLE(1b,syscall_fault)
 
 	GET_THREAD_INFO(%ebp)
 
+#ifdef CONFIG_PAX_RANDKSTACK
+	pax_erase_kstack
+#endif
+
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 	jnz sysenter_audit
 sysenter_do_call:
@@ -434,12 +607,24 @@ sysenter_after_call:
 	testl $_TIF_ALLWORK_MASK, %ecx
 	jne sysexit_audit
 sysenter_exit:
+
+#ifdef CONFIG_PAX_RANDKSTACK
+	pushl_cfi %eax
+	movl %esp, %eax
+	call pax_randomize_kstack
+	popl_cfi %eax
+#endif
+
+	pax_erase_kstack
+
 /* if something modifies registers it must also disable sysexit */
 	movl PT_EIP(%esp), %edx
 	movl PT_OLDESP(%esp), %ecx
 	xorl %ebp,%ebp
 	TRACE_IRQS_ON
 1:	mov  PT_FS(%esp), %fs
+2:	mov  PT_DS(%esp), %ds
+3:	mov  PT_ES(%esp), %es
 	PTGS_TO_GS
 	ENABLE_INTERRUPTS_SYSEXIT
 
@@ -453,6 +638,9 @@ sysenter_audit:
 	pushl_cfi PT_ESI(%esp)		/* a3: 5th arg */
 	pushl_cfi PT_EDX+4(%esp)	/* a2: 4th arg */
 	call __audit_syscall_entry
+
+	pax_erase_kstack
+
 	popl_cfi %ecx /* get that remapped edx off the stack */
 	popl_cfi %ecx /* get that remapped esi off the stack */
 	movl PT_EAX(%esp),%eax		/* reload syscall number */
@@ -479,10 +667,16 @@ sysexit_audit:
 
 	CFI_ENDPROC
 .pushsection .fixup,"ax"
-2:	movl $0,PT_FS(%esp)
+4:	movl $0,PT_FS(%esp)
+	jmp 1b
+5:	movl $0,PT_DS(%esp)
+	jmp 1b
+6:	movl $0,PT_ES(%esp)
 	jmp 1b
 .popsection
-	_ASM_EXTABLE(1b,2b)
+	_ASM_EXTABLE(1b,4b)
+	_ASM_EXTABLE(2b,5b)
+	_ASM_EXTABLE(3b,6b)
 	PTGS_TO_GS_EX
 ENDPROC(ia32_sysenter_target)
 
@@ -493,6 +687,11 @@ ENTRY(system_call)
 	pushl_cfi %eax			# save orig_eax
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
+
+#ifdef CONFIG_PAX_RANDKSTACK
+	pax_erase_kstack
+#endif
+
 					# system call tracing in operation / emulation
 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 	jnz syscall_trace_entry
@@ -512,6 +711,15 @@ syscall_exit:
 	testl $_TIF_ALLWORK_MASK, %ecx	# current->work
 	jne syscall_exit_work
 
+restore_all_pax:
+
+#ifdef CONFIG_PAX_RANDKSTACK
+	movl %esp, %eax
+	call pax_randomize_kstack
+#endif
+
+	pax_erase_kstack
+
 restore_all:
 	TRACE_IRQS_IRET
 restore_all_notrace:
@@ -566,14 +774,34 @@ ldt_ss:
  * compensating for the offset by changing to the ESPFIX segment with
  * a base address that matches for the difference.
  */
-#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
+#define GDT_ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)(%ebx)
 	mov %esp, %edx			/* load kernel esp */
 	mov PT_OLDESP(%esp), %eax	/* load userspace esp */
 	mov %dx, %ax			/* eax: new kernel esp */
 	sub %eax, %edx			/* offset (low word is 0) */
+#ifdef CONFIG_SMP
+	movl PER_CPU_VAR(cpu_number), %ebx
+	shll $PAGE_SHIFT_asm, %ebx
+	addl $cpu_gdt_table, %ebx
+#else
+	movl $cpu_gdt_table, %ebx
+#endif
 	shr $16, %edx
-	mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
-	mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
+
+#ifdef CONFIG_PAX_KERNEXEC
+	mov %cr0, %esi
+	btr $16, %esi
+	mov %esi, %cr0
+#endif
+
+	mov %dl, 4 + GDT_ESPFIX_SS /* bits 16..23 */
+	mov %dh, 7 + GDT_ESPFIX_SS /* bits 24..31 */
+
+#ifdef CONFIG_PAX_KERNEXEC
+	bts $16, %esi
+	mov %esi, %cr0
+#endif
+
 	pushl_cfi $__ESPFIX_SS
 	pushl_cfi %eax			/* new kernel esp */
 	/* Disable interrupts, but do not irqtrace this section: we
@@ -603,20 +831,18 @@ work_resched:
 	movl TI_flags(%ebp), %ecx
 	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
 					# than syscall tracing?
-	jz restore_all
+	jz restore_all_pax
 	testb $_TIF_NEED_RESCHED, %cl
 	jnz work_resched
 
 work_notifysig:				# deal with pending signals and
 					# notify-resume requests
+	movl %esp, %eax
 #ifdef CONFIG_VM86
 	testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
-	movl %esp, %eax
 	jne work_notifysig_v86		# returning to kernel-space or
 					# vm86-space
 1:
-#else
-	movl %esp, %eax
 #endif
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
@@ -637,7 +863,7 @@ work_notifysig_v86:
 	movl %eax, %esp
 	jmp 1b
 #endif
-END(work_pending)
+ENDPROC(work_pending)
 
 	# perform syscall exit tracing
 	ALIGN
@@ -645,11 +871,14 @@ syscall_trace_entry:
 	movl $-ENOSYS,PT_EAX(%esp)
 	movl %esp, %eax
 	call syscall_trace_enter
+
+	pax_erase_kstack
+
 	/* What it returned is what we'll actually use.  */
 	cmpl $(NR_syscalls), %eax
 	jnae syscall_call
 	jmp syscall_exit
-END(syscall_trace_entry)
+ENDPROC(syscall_trace_entry)
 
 	# perform syscall exit tracing
 	ALIGN
@@ -662,26 +891,30 @@ syscall_exit_work:
 	movl %esp, %eax
 	call syscall_trace_leave
 	jmp resume_userspace
-END(syscall_exit_work)
+ENDPROC(syscall_exit_work)
 	CFI_ENDPROC
 
 	RING0_INT_FRAME			# can't unwind into user space anyway
 syscall_fault:
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	push %ss
+	pop %ds
+#endif
 	ASM_CLAC
 	GET_THREAD_INFO(%ebp)
 	movl $-EFAULT,PT_EAX(%esp)
 	jmp resume_userspace
-END(syscall_fault)
+ENDPROC(syscall_fault)
 
 syscall_badsys:
 	movl $-ENOSYS,%eax
 	jmp syscall_after_call
-END(syscall_badsys)
+ENDPROC(syscall_badsys)
 
 sysenter_badsys:
 	movl $-ENOSYS,%eax
 	jmp sysenter_after_call
-END(sysenter_badsys)
+ENDPROC(sysenter_badsys)
 	CFI_ENDPROC
 
 .macro FIXUP_ESPFIX_STACK
@@ -694,8 +927,15 @@ END(sysenter_badsys)
  */
 #ifdef CONFIG_X86_ESPFIX32
 	/* fixup the stack */
-	mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
-	mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
+#ifdef CONFIG_SMP
+	movl PER_CPU_VAR(cpu_number), %ebx
+	shll $PAGE_SHIFT_asm, %ebx
+	addl $cpu_gdt_table, %ebx
+#else
+	movl $cpu_gdt_table, %ebx
+#endif
+	mov 4 + GDT_ESPFIX_SS, %al /* bits 16..23 */
+	mov 7 + GDT_ESPFIX_SS, %ah /* bits 24..31 */
 	shl $16, %eax
 	addl %esp, %eax			/* the adjusted stack pointer */
 	pushl_cfi $__KERNEL_DS
@@ -751,7 +991,7 @@ vector=vector+1
   .endr
 2:	jmp common_interrupt
 .endr
-END(irq_entries_start)
+ENDPROC(irq_entries_start)
 
 .previous
 END(interrupt)
@@ -808,7 +1048,7 @@ ENTRY(coprocessor_error)
 	pushl_cfi $do_coprocessor_error
 	jmp error_code
 	CFI_ENDPROC
-END(coprocessor_error)
+ENDPROC(coprocessor_error)
 
 ENTRY(simd_coprocessor_error)
 	RING0_INT_FRAME
@@ -821,7 +1061,7 @@ ENTRY(simd_coprocessor_error)
 .section .altinstructions,"a"
 	altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
 .previous
-.section .altinstr_replacement,"ax"
+.section .altinstr_replacement,"a"
 663:	pushl $do_simd_coprocessor_error
 664:
 .previous
@@ -830,7 +1070,7 @@ ENTRY(simd_coprocessor_error)
 #endif
 	jmp error_code
 	CFI_ENDPROC
-END(simd_coprocessor_error)
+ENDPROC(simd_coprocessor_error)
 
 ENTRY(device_not_available)
 	RING0_INT_FRAME
@@ -839,18 +1079,18 @@ ENTRY(device_not_available)
 	pushl_cfi $do_device_not_available
 	jmp error_code
 	CFI_ENDPROC
-END(device_not_available)
+ENDPROC(device_not_available)
 
 #ifdef CONFIG_PARAVIRT
 ENTRY(native_iret)
 	iret
 	_ASM_EXTABLE(native_iret, iret_exc)
-END(native_iret)
+ENDPROC(native_iret)
 
 ENTRY(native_irq_enable_sysexit)
 	sti
 	sysexit
-END(native_irq_enable_sysexit)
+ENDPROC(native_irq_enable_sysexit)
 #endif
 
 ENTRY(overflow)
@@ -860,7 +1100,7 @@ ENTRY(overflow)
 	pushl_cfi $do_overflow
 	jmp error_code
 	CFI_ENDPROC
-END(overflow)
+ENDPROC(overflow)
 
 ENTRY(bounds)
 	RING0_INT_FRAME
@@ -869,7 +1109,7 @@ ENTRY(bounds)
 	pushl_cfi $do_bounds
 	jmp error_code
 	CFI_ENDPROC
-END(bounds)
+ENDPROC(bounds)
 
 ENTRY(invalid_op)
 	RING0_INT_FRAME
@@ -878,7 +1118,7 @@ ENTRY(invalid_op)
 	pushl_cfi $do_invalid_op
 	jmp error_code
 	CFI_ENDPROC
-END(invalid_op)
+ENDPROC(invalid_op)
 
 ENTRY(coprocessor_segment_overrun)
 	RING0_INT_FRAME
@@ -887,7 +1127,7 @@ ENTRY(coprocessor_segment_overrun)
 	pushl_cfi $do_coprocessor_segment_overrun
 	jmp error_code
 	CFI_ENDPROC
-END(coprocessor_segment_overrun)
+ENDPROC(coprocessor_segment_overrun)
 
 ENTRY(invalid_TSS)
 	RING0_EC_FRAME
@@ -895,7 +1135,7 @@ ENTRY(invalid_TSS)
 	pushl_cfi $do_invalid_TSS
 	jmp error_code
 	CFI_ENDPROC
-END(invalid_TSS)
+ENDPROC(invalid_TSS)
 
 ENTRY(segment_not_present)
 	RING0_EC_FRAME
@@ -903,7 +1143,7 @@ ENTRY(segment_not_present)
 	pushl_cfi $do_segment_not_present
 	jmp error_code
 	CFI_ENDPROC
-END(segment_not_present)
+ENDPROC(segment_not_present)
 
 ENTRY(stack_segment)
 	RING0_EC_FRAME
@@ -911,7 +1151,7 @@ ENTRY(stack_segment)
 	pushl_cfi $do_stack_segment
 	jmp error_code
 	CFI_ENDPROC
-END(stack_segment)
+ENDPROC(stack_segment)
 
 ENTRY(alignment_check)
 	RING0_EC_FRAME
@@ -919,7 +1159,7 @@ ENTRY(alignment_check)
 	pushl_cfi $do_alignment_check
 	jmp error_code
 	CFI_ENDPROC
-END(alignment_check)
+ENDPROC(alignment_check)
 
 ENTRY(divide_error)
 	RING0_INT_FRAME
@@ -928,7 +1168,7 @@ ENTRY(divide_error)
 	pushl_cfi $do_divide_error
 	jmp error_code
 	CFI_ENDPROC
-END(divide_error)
+ENDPROC(divide_error)
 
 #ifdef CONFIG_X86_MCE
 ENTRY(machine_check)
@@ -938,7 +1178,7 @@ ENTRY(machine_check)
 	pushl_cfi machine_check_vector
 	jmp error_code
 	CFI_ENDPROC
-END(machine_check)
+ENDPROC(machine_check)
 #endif
 
 ENTRY(spurious_interrupt_bug)
@@ -948,7 +1188,7 @@ ENTRY(spurious_interrupt_bug)
 	pushl_cfi $do_spurious_interrupt_bug
 	jmp error_code
 	CFI_ENDPROC
-END(spurious_interrupt_bug)
+ENDPROC(spurious_interrupt_bug)
 
 #ifdef CONFIG_XEN
 /* Xen doesn't set %esp to be precisely what the normal sysenter
@@ -1057,7 +1297,7 @@ BUILD_INTERRUPT3(hyperv_callback_vector,
 
 ENTRY(mcount)
 	ret
-END(mcount)
+ENDPROC(mcount)
 
 ENTRY(ftrace_caller)
 	pushl %eax
@@ -1087,7 +1327,7 @@ ftrace_graph_call:
 .globl ftrace_stub
 ftrace_stub:
 	ret
-END(ftrace_caller)
+ENDPROC(ftrace_caller)
 
 ENTRY(ftrace_regs_caller)
 	pushf	/* push flags before compare (in cs location) */
@@ -1185,7 +1425,7 @@ trace:
 	popl %ecx
 	popl %eax
 	jmp ftrace_stub
-END(mcount)
+ENDPROC(mcount)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 #endif /* CONFIG_FUNCTION_TRACER */
 
@@ -1203,7 +1443,7 @@ ENTRY(ftrace_graph_caller)
 	popl %ecx
 	popl %eax
 	ret
-END(ftrace_graph_caller)
+ENDPROC(ftrace_graph_caller)
 
 .globl return_to_handler
 return_to_handler:
@@ -1264,15 +1504,18 @@ error_code:
 	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
 	REG_TO_PTGS %ecx
 	SET_KERNEL_GS %ecx
-	movl $(__USER_DS), %ecx
+	movl $(__KERNEL_DS), %ecx
 	movl %ecx, %ds
 	movl %ecx, %es
+
+	pax_enter_kernel
+
 	TRACE_IRQS_OFF
 	movl %esp,%eax			# pt_regs pointer
 	call *%edi
 	jmp ret_from_exception
 	CFI_ENDPROC
-END(page_fault)
+ENDPROC(page_fault)
 
 /*
  * Debug traps and NMI can happen at the one SYSENTER instruction
@@ -1315,7 +1558,7 @@ debug_stack_correct:
 	call do_debug
 	jmp ret_from_exception
 	CFI_ENDPROC
-END(debug)
+ENDPROC(debug)
 
 /*
  * NMI is doubly nasty. It can happen _while_ we're handling
@@ -1355,6 +1598,9 @@ nmi_stack_correct:
 	xorl %edx,%edx		# zero error code
 	movl %esp,%eax		# pt_regs pointer
 	call do_nmi
+
+	pax_exit_kernel
+
 	jmp restore_all_notrace
 	CFI_ENDPROC
 
@@ -1392,13 +1638,16 @@ nmi_espfix_stack:
 	FIXUP_ESPFIX_STACK		# %eax == %esp
 	xorl %edx,%edx			# zero error code
 	call do_nmi
+
+	pax_exit_kernel
+
 	RESTORE_REGS
 	lss 12+4(%esp), %esp		# back to espfix stack
 	CFI_ADJUST_CFA_OFFSET -24
 	jmp irq_return
 #endif
 	CFI_ENDPROC
-END(nmi)
+ENDPROC(nmi)
 
 ENTRY(int3)
 	RING0_INT_FRAME
@@ -1411,14 +1660,14 @@ ENTRY(int3)
 	call do_int3
 	jmp ret_from_exception
 	CFI_ENDPROC
-END(int3)
+ENDPROC(int3)
 
 ENTRY(general_protection)
 	RING0_EC_FRAME
 	pushl_cfi $do_general_protection
 	jmp error_code
 	CFI_ENDPROC
-END(general_protection)
+ENDPROC(general_protection)
 
 #ifdef CONFIG_KVM_GUEST
 ENTRY(async_page_fault)
@@ -1427,6 +1676,6 @@ ENTRY(async_page_fault)
 	pushl_cfi $do_async_page_fault
 	jmp error_code
 	CFI_ENDPROC
-END(async_page_fault)
+ENDPROC(async_page_fault)
 #endif
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/entry_64.S linux-4.0.9-pax/arch/x86/kernel/entry_64.S
--- linux-4.0.9/arch/x86/kernel/entry_64.S	2015-04-13 11:21:01.794617464 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/entry_64.S	2015-06-29 02:20:31.882417555 +0200
@@ -59,6 +59,8 @@
 #include <asm/smap.h>
 #include <asm/pgtable_types.h>
 #include <linux/err.h>
+#include <asm/pgtable.h>
+#include <asm/alternative-asm.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -81,6 +83,431 @@ ENTRY(native_usergs_sysret64)
 ENDPROC(native_usergs_sysret64)
 #endif /* CONFIG_PARAVIRT */
 
+	.macro ljmpq sel, off
+#if defined(CONFIG_MPSC) || defined(CONFIG_MCORE2) || defined (CONFIG_MATOM)
+	.byte 0x48; ljmp *1234f(%rip)
+	.pushsection .rodata
+	.align 16
+	1234: .quad \off; .word \sel
+	.popsection
+#else
+	pushq $\sel
+	pushq $\off
+	lretq
+#endif
+	.endm
+
+	.macro pax_enter_kernel
+	pax_set_fptr_mask
+#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+	call pax_enter_kernel
+#endif
+	.endm
+
+	.macro pax_exit_kernel
+#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+	call pax_exit_kernel
+#endif
+
+	.endm
+
+#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+ENTRY(pax_enter_kernel)
+	pushq %rdi
+
+#ifdef CONFIG_PARAVIRT
+	PV_SAVE_REGS(CLBR_RDI)
+#endif
+
+#ifdef CONFIG_PAX_KERNEXEC
+	GET_CR0_INTO_RDI
+	bts $X86_CR0_WP_BIT,%rdi
+	jnc 3f
+	mov %cs,%edi
+	cmp $__KERNEL_CS,%edi
+	jnz 2f
+1:
+#endif
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	661: jmp 111f
+	.pushsection .altinstr_replacement, "a"
+	662: ASM_NOP2
+	.popsection
+	.pushsection .altinstructions, "a"
+	altinstruction_entry 661b, 662b, X86_FEATURE_PCID, 2, 2
+	.popsection
+	GET_CR3_INTO_RDI
+	cmp $0,%dil
+	jnz 112f
+	mov $__KERNEL_DS,%edi
+	mov %edi,%ss
+	jmp 111f
+112:	cmp $1,%dil
+	jz 113f
+	ud2
+113:	sub $4097,%rdi
+	bts $63,%rdi
+	SET_RDI_INTO_CR3
+	mov $__UDEREF_KERNEL_DS,%edi
+	mov %edi,%ss
+111:
+#endif
+
+#ifdef CONFIG_PARAVIRT
+	PV_RESTORE_REGS(CLBR_RDI)
+#endif
+
+	popq %rdi
+	pax_force_retaddr
+	retq
+
+#ifdef CONFIG_PAX_KERNEXEC
+2:	ljmpq __KERNEL_CS,1b
+3:	ljmpq __KERNEXEC_KERNEL_CS,4f
+4:	SET_RDI_INTO_CR0
+	jmp 1b
+#endif
+ENDPROC(pax_enter_kernel)
+
+ENTRY(pax_exit_kernel)
+	pushq %rdi
+
+#ifdef CONFIG_PARAVIRT
+	PV_SAVE_REGS(CLBR_RDI)
+#endif
+
+#ifdef CONFIG_PAX_KERNEXEC
+	mov %cs,%rdi
+	cmp $__KERNEXEC_KERNEL_CS,%edi
+	jz 2f
+	GET_CR0_INTO_RDI
+	bts $X86_CR0_WP_BIT,%rdi
+	jnc 4f
+1:
+#endif
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	661: jmp 111f
+	.pushsection .altinstr_replacement, "a"
+	662: ASM_NOP2
+	.popsection
+	.pushsection .altinstructions, "a"
+	altinstruction_entry 661b, 662b, X86_FEATURE_PCID, 2, 2
+	.popsection
+	mov %ss,%edi
+	cmp $__UDEREF_KERNEL_DS,%edi
+	jnz 111f
+	GET_CR3_INTO_RDI
+	cmp $0,%dil
+	jz 112f
+	ud2
+112:	add $4097,%rdi
+	bts $63,%rdi
+	SET_RDI_INTO_CR3
+	mov $__KERNEL_DS,%edi
+	mov %edi,%ss
+111:
+#endif
+
+#ifdef CONFIG_PARAVIRT
+	PV_RESTORE_REGS(CLBR_RDI);
+#endif
+
+	popq %rdi
+	pax_force_retaddr
+	retq
+
+#ifdef CONFIG_PAX_KERNEXEC
+2:	GET_CR0_INTO_RDI
+	btr $X86_CR0_WP_BIT,%rdi
+	jnc 4f
+	ljmpq __KERNEL_CS,3f
+3:	SET_RDI_INTO_CR0
+	jmp 1b
+4:	ud2
+	jmp 4b
+#endif
+ENDPROC(pax_exit_kernel)
+#endif
+
+	.macro pax_enter_kernel_user
+	pax_set_fptr_mask
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	call pax_enter_kernel_user
+#endif
+	.endm
+
+	.macro pax_exit_kernel_user
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	call pax_exit_kernel_user
+#endif
+#ifdef CONFIG_PAX_RANDKSTACK
+	pushq %rax
+	pushq %r11
+	call pax_randomize_kstack
+	popq %r11
+	popq %rax
+#endif
+	.endm
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+ENTRY(pax_enter_kernel_user)
+	pushq %rdi
+	pushq %rbx
+
+#ifdef CONFIG_PARAVIRT
+	PV_SAVE_REGS(CLBR_RDI)
+#endif
+
+	661: jmp 111f
+	.pushsection .altinstr_replacement, "a"
+	662: ASM_NOP2
+	.popsection
+	.pushsection .altinstructions, "a"
+	altinstruction_entry 661b, 662b, X86_FEATURE_PCID, 2, 2
+	.popsection
+	GET_CR3_INTO_RDI
+	cmp $1,%dil
+	jnz 4f
+	sub $4097,%rdi
+	bts $63,%rdi
+	SET_RDI_INTO_CR3
+	jmp 3f
+111:
+
+	GET_CR3_INTO_RDI
+	mov %rdi,%rbx
+	add $__START_KERNEL_map,%rbx
+	sub phys_base(%rip),%rbx
+
+#ifdef CONFIG_PARAVIRT
+	cmpl $0, pv_info+PARAVIRT_enabled
+	jz 1f
+	pushq %rdi
+	i = 0
+	.rept USER_PGD_PTRS
+	mov i*8(%rbx),%rsi
+	mov $0,%sil
+	lea i*8(%rbx),%rdi
+	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_set_pgd_batched)
+	i = i + 1
+	.endr
+	popq %rdi
+	jmp 2f
+1:
+#endif
+
+	i = 0
+	.rept USER_PGD_PTRS
+	movb $0,i*8(%rbx)
+	i = i + 1
+	.endr
+
+2:	SET_RDI_INTO_CR3
+
+#ifdef CONFIG_PAX_KERNEXEC
+	GET_CR0_INTO_RDI
+	bts $X86_CR0_WP_BIT,%rdi
+	SET_RDI_INTO_CR0
+#endif
+
+3:
+
+#ifdef CONFIG_PARAVIRT
+	PV_RESTORE_REGS(CLBR_RDI)
+#endif
+
+	popq %rbx
+	popq %rdi
+	pax_force_retaddr
+	retq
+4:	ud2
+ENDPROC(pax_enter_kernel_user)
+
+ENTRY(pax_exit_kernel_user)
+	pushq %rdi
+	pushq %rbx
+
+#ifdef CONFIG_PARAVIRT
+	PV_SAVE_REGS(CLBR_RDI)
+#endif
+
+	GET_CR3_INTO_RDI
+	661: jmp 1f
+	.pushsection .altinstr_replacement, "a"
+	662: ASM_NOP2
+	.popsection
+	.pushsection .altinstructions, "a"
+	altinstruction_entry 661b, 662b, X86_FEATURE_PCID, 2, 2
+	.popsection
+	cmp $0,%dil
+	jnz 3f
+	add $4097,%rdi
+	bts $63,%rdi
+	SET_RDI_INTO_CR3
+	jmp 2f
+1:
+
+	mov %rdi,%rbx
+
+#ifdef CONFIG_PAX_KERNEXEC
+	GET_CR0_INTO_RDI
+	btr $X86_CR0_WP_BIT,%rdi
+	jnc 3f
+	SET_RDI_INTO_CR0
+#endif
+
+	add $__START_KERNEL_map,%rbx
+	sub phys_base(%rip),%rbx
+
+#ifdef CONFIG_PARAVIRT
+	cmpl $0, pv_info+PARAVIRT_enabled
+	jz 1f
+	i = 0
+	.rept USER_PGD_PTRS
+	mov i*8(%rbx),%rsi
+	mov $0x67,%sil
+	lea i*8(%rbx),%rdi
+	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_set_pgd_batched)
+	i = i + 1
+	.endr
+	jmp 2f
+1:
+#endif
+
+	i = 0
+	.rept USER_PGD_PTRS
+	movb $0x67,i*8(%rbx)
+	i = i + 1
+	.endr
+2:
+
+#ifdef CONFIG_PARAVIRT
+	PV_RESTORE_REGS(CLBR_RDI)
+#endif
+
+	popq %rbx
+	popq %rdi
+	pax_force_retaddr
+	retq
+3:	ud2
+ENDPROC(pax_exit_kernel_user)
+#endif
+
+	.macro pax_enter_kernel_nmi
+	pax_set_fptr_mask
+
+#ifdef CONFIG_PAX_KERNEXEC
+	GET_CR0_INTO_RDI
+	bts $X86_CR0_WP_BIT,%rdi
+	jc 110f
+	SET_RDI_INTO_CR0
+	or $2,%ebx
+110:
+#endif
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	661: jmp 111f
+	.pushsection .altinstr_replacement, "a"
+	662: ASM_NOP2
+	.popsection
+	.pushsection .altinstructions, "a"
+	altinstruction_entry 661b, 662b, X86_FEATURE_PCID, 2, 2
+	.popsection
+	GET_CR3_INTO_RDI
+	cmp $0,%dil
+	jz 111f
+	sub $4097,%rdi
+	or $4,%ebx
+	bts $63,%rdi
+	SET_RDI_INTO_CR3
+	mov $__UDEREF_KERNEL_DS,%edi
+	mov %edi,%ss
+111:
+#endif
+	.endm
+
+	.macro pax_exit_kernel_nmi
+#ifdef CONFIG_PAX_KERNEXEC
+	btr $1,%ebx
+	jnc 110f
+	GET_CR0_INTO_RDI
+	btr $X86_CR0_WP_BIT,%rdi
+	SET_RDI_INTO_CR0
+110:
+#endif
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	btr $2,%ebx
+	jnc 111f
+	GET_CR3_INTO_RDI
+	add $4097,%rdi
+	bts $63,%rdi
+	SET_RDI_INTO_CR3
+	mov $__KERNEL_DS,%edi
+	mov %edi,%ss
+111:
+#endif
+	.endm
+
+	.macro pax_erase_kstack
+#ifdef CONFIG_PAX_MEMORY_STACKLEAK
+	call pax_erase_kstack
+#endif
+	.endm
+
+#ifdef CONFIG_PAX_MEMORY_STACKLEAK
+ENTRY(pax_erase_kstack)
+	pushq %rdi
+	pushq %rcx
+	pushq %rax
+	pushq %r11
+
+	GET_THREAD_INFO(%r11)
+	mov TI_lowest_stack(%r11), %rdi
+	mov $0xB4DD00D5BADBABE5, %rax
+	std
+
+1:	mov %edi, %ecx
+	and $THREAD_SIZE_asm - 1, %ecx
+	shr $3, %ecx
+	repne scasq
+	jecxz 2f
+
+	cmp $2*8, %ecx
+	jc 2f
+
+	mov $2*8, %ecx
+	repe scasq
+	jecxz 2f
+	jne 1b
+
+2:	cld
+	or $2*8, %rdi
+	mov %esp, %ecx
+	sub %edi, %ecx
+
+	cmp $THREAD_SIZE_asm, %rcx
+	jb 3f
+	ud2
+3:
+
+	shr $3, %ecx
+	rep stosq
+
+	mov TI_task_thread_sp0(%r11), %rdi
+	sub $256, %rdi
+	mov %rdi, TI_lowest_stack(%r11)
+
+	popq %r11
+	popq %rax
+	popq %rcx
+	popq %rdi
+	pax_force_retaddr
+	ret
+ENDPROC(pax_erase_kstack)
+#endif
 
 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -117,7 +544,7 @@ ENDPROC(native_usergs_sysret64)
 .endm
 
 .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
-	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
+	bt   $X86_EFLAGS_IF_BIT,EFLAGS-\offset(%rsp)	/* interrupts off? */
 	jnc  1f
 	TRACE_IRQS_ON_DEBUG
 1:
@@ -243,9 +670,52 @@ ENTRY(save_paranoid)
 	js 1f	/* negative -> in kernel */
 	SWAPGS
 	xorl %ebx,%ebx
-1:	ret
+1:
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	testb $3, CS+8(%rsp)
+	jnz 1f
+	pax_enter_kernel
+	jmp 2f
+1:	pax_enter_kernel_user
+2:
+#else
+	pax_enter_kernel
+#endif
+	pax_force_retaddr
+	ret
+	CFI_ENDPROC
+ENDPROC(save_paranoid)
+
+ENTRY(save_paranoid_nmi)
+	XCPT_FRAME 1 RDI+8
+	cld
+	movq_cfi rdi, RDI+8
+	movq_cfi rsi, RSI+8
+	movq_cfi rdx, RDX+8
+	movq_cfi rcx, RCX+8
+	movq_cfi rax, RAX+8
+	movq_cfi r8, R8+8
+	movq_cfi r9, R9+8
+	movq_cfi r10, R10+8
+	movq_cfi r11, R11+8
+	movq_cfi rbx, RBX+8
+	movq_cfi rbp, RBP+8
+	movq_cfi r12, R12+8
+	movq_cfi r13, R13+8
+	movq_cfi r14, R14+8
+	movq_cfi r15, R15+8
+	movl $1,%ebx
+	movl $MSR_GS_BASE,%ecx
+	rdmsr
+	testl %edx,%edx
+	js 1f	/* negative -> in kernel */
+	SWAPGS
+	xorl %ebx,%ebx
+1:	pax_enter_kernel_nmi
+	pax_force_retaddr
+	ret
 	CFI_ENDPROC
-END(save_paranoid)
+ENDPROC(save_paranoid_nmi)
 
 /*
  * A newly forked process directly context switches into this address.
@@ -266,7 +736,7 @@ ENTRY(ret_from_fork)
 
 	RESTORE_REST
 
-	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
+	testb $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
 	jz   1f
 
 	/*
@@ -279,15 +749,13 @@ ENTRY(ret_from_fork)
 	jmp  int_ret_from_sys_call
 
 1:
-	subq $REST_SKIP, %rsp	# leave space for volatiles
-	CFI_ADJUST_CFA_OFFSET	REST_SKIP
 	movq %rbp, %rdi
 	call *%rbx
 	movl $0, RAX(%rsp)
 	RESTORE_REST
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
-END(ret_from_fork)
+ENDPROC(ret_from_fork)
 
 /*
  * System call entry. Up to 6 arguments in registers are supported.
@@ -324,7 +792,7 @@ END(ret_from_fork)
 ENTRY(system_call)
 	CFI_STARTPROC	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
+	CFI_DEF_CFA	rsp,0
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	SWAPGS_UNSAFE_STACK
@@ -337,16 +805,23 @@ GLOBAL(system_call_after_swapgs)
 
 	movq	%rsp,PER_CPU_VAR(old_rsp)
 	movq	PER_CPU_VAR(kernel_stack),%rsp
+	SAVE_ARGS 8*6, 0, rax_enosys=1
+	pax_enter_kernel_user
+
+#ifdef CONFIG_PAX_RANDKSTACK
+	pax_erase_kstack
+#endif
+
 	/*
 	 * No need to follow this irqs off/on section - it's straight
 	 * and short:
 	 */
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	SAVE_ARGS 8, 0, rax_enosys=1
 	movq_cfi rax,(ORIG_RAX-ARGOFFSET)
 	movq  %rcx,RIP-ARGOFFSET(%rsp)
 	CFI_REL_OFFSET rip,RIP-ARGOFFSET
-	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	GET_THREAD_INFO(%rcx)
+	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
 	jnz tracesys
 system_call_fastpath:
 #if __SYSCALL_MASK == ~0
@@ -376,10 +851,13 @@ ret_from_sys_call:
 	 * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
 	 * very bad.
 	 */
-	testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+	GET_THREAD_INFO(%rcx)
+	testl $_TIF_ALLWORK_MASK,TI_flags(%rcx)
 	jnz int_ret_from_sys_call_fixup	/* Go the the slow path */
 
 	CFI_REMEMBER_STATE
+	pax_exit_kernel_user
+	pax_erase_kstack
 	/*
 	 * sysretq will re-enable interrupts:
 	 */
@@ -399,12 +877,15 @@ int_ret_from_sys_call_fixup:
 
 	/* Do syscall tracing */
 tracesys:
-	leaq -REST_SKIP(%rsp), %rdi
+	movq %rsp, %rdi
 	movq $AUDIT_ARCH_X86_64, %rsi
 	call syscall_trace_enter_phase1
 	test %rax, %rax
 	jnz tracesys_phase2		/* if needed, run the slow path */
-	LOAD_ARGS 0			/* else restore clobbered regs */
+
+	pax_erase_kstack
+
+	LOAD_ARGS			/* else restore clobbered regs */
 	jmp system_call_fastpath	/*      and return to the fast path */
 
 tracesys_phase2:
@@ -415,12 +896,14 @@ tracesys_phase2:
 	movq %rax,%rdx
 	call syscall_trace_enter_phase2
 
+	pax_erase_kstack
+
 	/*
 	 * Reload arg registers from stack in case ptrace changed them.
 	 * We don't reload %rax because syscall_trace_entry_phase2() returned
 	 * the value it wants us to use in the table lookup.
 	 */
-	LOAD_ARGS ARGOFFSET, 1
+	LOAD_ARGS 1
 	RESTORE_REST
 #if __SYSCALL_MASK == ~0
 	cmpq $__NR_syscall_max,%rax
@@ -451,7 +934,9 @@ GLOBAL(int_with_check)
 	andl %edi,%edx
 	jnz   int_careful
 	andl    $~TS_COMPAT,TI_status(%rcx)
-	jmp   retint_swapgs
+	pax_exit_kernel_user
+	pax_erase_kstack
+	jmp   retint_swapgs_pax
 
 	/* Either reschedule or signal or syscall exit tracking needed. */
 	/* First do a reschedule test. */
@@ -497,7 +982,7 @@ int_restore_rest:
 	TRACE_IRQS_OFF
 	jmp int_with_check
 	CFI_ENDPROC
-END(system_call)
+ENDPROC(system_call)
 
 	.macro FORK_LIKE func
 ENTRY(stub_\func)
@@ -510,9 +995,10 @@ ENTRY(stub_\func)
 	DEFAULT_FRAME 0 8		/* offset 8: return address */
 	call sys_\func
 	RESTORE_TOP_OF_STACK %r11, 8
-	ret $REST_SKIP		/* pop extended registers */
+	pax_force_retaddr
+	ret
 	CFI_ENDPROC
-END(stub_\func)
+ENDPROC(stub_\func)
 	.endm
 
 	.macro FIXED_FRAME label,func
@@ -522,9 +1008,10 @@ ENTRY(\label)
 	FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
 	call \func
 	RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
-END(\label)
+ENDPROC(\label)
 	.endm
 
 	FORK_LIKE  clone
@@ -543,7 +1030,7 @@ ENTRY(stub_execve)
 	RESTORE_REST
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
-END(stub_execve)
+ENDPROC(stub_execve)
 
 ENTRY(stub_execveat)
 	CFI_STARTPROC
@@ -557,7 +1044,7 @@ ENTRY(stub_execveat)
 	RESTORE_REST
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
-END(stub_execveat)
+ENDPROC(stub_execveat)
 
 /*
  * sigreturn is special because it needs to restore all registers on return.
@@ -574,7 +1061,7 @@ ENTRY(stub_rt_sigreturn)
 	RESTORE_REST
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
-END(stub_rt_sigreturn)
+ENDPROC(stub_rt_sigreturn)
 
 #ifdef CONFIG_X86_X32_ABI
 ENTRY(stub_x32_rt_sigreturn)
@@ -588,7 +1075,7 @@ ENTRY(stub_x32_rt_sigreturn)
 	RESTORE_REST
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
-END(stub_x32_rt_sigreturn)
+ENDPROC(stub_x32_rt_sigreturn)
 
 ENTRY(stub_x32_execve)
 	CFI_STARTPROC
@@ -602,7 +1089,7 @@ ENTRY(stub_x32_execve)
 	RESTORE_REST
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
-END(stub_x32_execve)
+ENDPROC(stub_x32_execve)
 
 ENTRY(stub_x32_execveat)
 	CFI_STARTPROC
@@ -616,7 +1103,7 @@ ENTRY(stub_x32_execveat)
 	RESTORE_REST
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
-END(stub_x32_execveat)
+ENDPROC(stub_x32_execveat)
 
 #endif
 
@@ -653,7 +1140,7 @@ vector=vector+1
 2:	jmp common_interrupt
 .endr
 	CFI_ENDPROC
-END(irq_entries_start)
+ENDPROC(irq_entries_start)
 
 .previous
 END(interrupt)
@@ -670,28 +1157,29 @@ END(interrupt)
 /* 0(%rsp): ~(interrupt number) */
 	.macro interrupt func
 	/* reserve pt_regs for scratch regs and rbp */
-	subq $ORIG_RAX-RBP, %rsp
-	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
+	subq $ORIG_RAX, %rsp
+	CFI_ADJUST_CFA_OFFSET ORIG_RAX
 	cld
-	/* start from rbp in pt_regs and jump over */
-	movq_cfi rdi, (RDI-RBP)
-	movq_cfi rsi, (RSI-RBP)
-	movq_cfi rdx, (RDX-RBP)
-	movq_cfi rcx, (RCX-RBP)
-	movq_cfi rax, (RAX-RBP)
-	movq_cfi  r8,  (R8-RBP)
-	movq_cfi  r9,  (R9-RBP)
-	movq_cfi r10, (R10-RBP)
-	movq_cfi r11, (R11-RBP)
+	/* start from r15 in pt_regs and jump over */
+	movq_cfi rdi, RDI
+	movq_cfi rsi, RSI
+	movq_cfi rdx, RDX
+	movq_cfi rcx, RCX
+	movq_cfi rax, RAX
+	movq_cfi  r8,  R8
+	movq_cfi  r9,  R9
+	movq_cfi r10, R10
+	movq_cfi r11, R11
+	movq_cfi r12, R12
 
 	/* Save rbp so that we can unwind from get_irq_regs() */
-	movq_cfi rbp, 0
+	movq_cfi rbp, RBP
 
 	/* Save previous stack value */
 	movq %rsp, %rsi
 
-	leaq -RBP(%rsp),%rdi	/* arg1 for handler */
-	testl $3, CS-RBP(%rsi)
+	movq %rsp,%rdi	/* arg1 for handler */
+	testb $3, CS(%rsi)
 	je 1f
 	SWAPGS
 	/*
@@ -711,6 +1199,18 @@ END(interrupt)
 			0x06 /* DW_OP_deref */, \
 			0x08 /* DW_OP_const1u */, SS+8-RBP, \
 			0x22 /* DW_OP_plus */
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	testb $3, CS(%rdi)
+	jnz 1f
+	pax_enter_kernel
+	jmp 2f
+1:	pax_enter_kernel_user
+2:
+#else
+	pax_enter_kernel
+#endif
+
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
 
@@ -735,14 +1235,14 @@ ret_from_intr:
 
 	/* Restore saved previous stack */
 	popq %rsi
-	CFI_DEF_CFA rsi,SS+8-RBP	/* reg/off reset after def_cfa_expr */
-	leaq ARGOFFSET-RBP(%rsi), %rsp
+	CFI_DEF_CFA rsi,SS+8	/* reg/off reset after def_cfa_expr */
+	movq %rsi, %rsp
 	CFI_DEF_CFA_REGISTER	rsp
-	CFI_ADJUST_CFA_OFFSET	RBP-ARGOFFSET
+	CFI_ADJUST_CFA_OFFSET	-ARGOFFSET
 
 exit_intr:
 	GET_THREAD_INFO(%rcx)
-	testl $3,CS-ARGOFFSET(%rsp)
+	testb $3,CS-ARGOFFSET(%rsp)
 	je retint_kernel
 
 	/* Interrupt came from user space */
@@ -764,14 +1264,16 @@ retint_swapgs:		/* return to user-space
 	 * The iretq could re-enable interrupts:
 	 */
 	DISABLE_INTERRUPTS(CLBR_ANY)
+	pax_exit_kernel_user
+retint_swapgs_pax:
 	TRACE_IRQS_IRETQ
 
 	/*
 	 * Try to use SYSRET instead of IRET if we're returning to
 	 * a completely clean 64-bit userspace context.
 	 */
-	movq (RCX-R11)(%rsp), %rcx
-	cmpq %rcx,(RIP-R11)(%rsp)		/* RCX == RIP */
+	movq (RCX-ARGOFFSET)(%rsp), %rcx
+	cmpq %rcx,(RIP-ARGOFFSET)(%rsp)		/* RCX == RIP */
 	jne opportunistic_sysret_failed
 
 	/*
@@ -792,7 +1294,7 @@ retint_swapgs:		/* return to user-space
 	shr $__VIRTUAL_MASK_SHIFT, %rcx
 	jnz opportunistic_sysret_failed
 
-	cmpq $__USER_CS,(CS-R11)(%rsp)		/* CS must match SYSRET */
+	cmpq $__USER_CS,(CS-ARGOFFSET)(%rsp)	/* CS must match SYSRET */
 	jne opportunistic_sysret_failed
 
 	movq (R11-ARGOFFSET)(%rsp), %r11
@@ -838,6 +1340,27 @@ opportunistic_sysret_failed:
 
 retint_restore_args:	/* return to kernel space */
 	DISABLE_INTERRUPTS(CLBR_ANY)
+	pax_exit_kernel
+
+#if defined(CONFIG_EFI) && defined(CONFIG_PAX_KERNEXEC)
+	/* This is a quirk to allow IRQs/NMIs/MCEs during early EFI setup,
+	 * namely calling EFI runtime services with a phys mapping. We're
+	 * starting off with NOPs and patch in the real instrumentation
+	 * (BTS/OR) before starting any userland process; even before starting
+	 * up the APs.
+	 */
+	.pushsection .altinstr_replacement, "a"
+	601: pax_force_retaddr (RIP-ARGOFFSET)
+	602:
+	.popsection
+	603: .fill 602b-601b, 1, 0x90
+	.pushsection .altinstructions, "a"
+	altinstruction_entry 603b, 601b, X86_FEATURE_ALWAYS, 602b-601b, 602b-601b
+	.popsection
+#else
+	pax_force_retaddr (RIP-ARGOFFSET)
+#endif
+
 	/*
 	 * The iretq could re-enable interrupts:
 	 */
@@ -875,15 +1398,15 @@ native_irq_return_ldt:
 	SWAPGS
 	movq PER_CPU_VAR(espfix_waddr),%rdi
 	movq %rax,(0*8)(%rdi)	/* RAX */
-	movq (2*8)(%rsp),%rax	/* RIP */
+	movq (2*8 + RIP-RIP)(%rsp),%rax	/* RIP */
 	movq %rax,(1*8)(%rdi)
-	movq (3*8)(%rsp),%rax	/* CS */
+	movq (2*8 + CS-RIP)(%rsp),%rax	/* CS */
 	movq %rax,(2*8)(%rdi)
-	movq (4*8)(%rsp),%rax	/* RFLAGS */
+	movq (2*8 + EFLAGS-RIP)(%rsp),%rax	/* RFLAGS */
 	movq %rax,(3*8)(%rdi)
-	movq (6*8)(%rsp),%rax	/* SS */
+	movq (2*8 + SS-RIP)(%rsp),%rax	/* SS */
 	movq %rax,(5*8)(%rdi)
-	movq (5*8)(%rsp),%rax	/* RSP */
+	movq (2*8 + RSP-RIP)(%rsp),%rax	/* RSP */
 	movq %rax,(4*8)(%rdi)
 	andl $0xffff0000,%eax
 	popq_cfi %rdi
@@ -937,7 +1460,7 @@ ENTRY(retint_kernel)
 	jmp exit_intr
 #endif
 	CFI_ENDPROC
-END(common_interrupt)
+ENDPROC(common_interrupt)
 
 /*
  * APIC interrupts.
@@ -951,7 +1474,7 @@ ENTRY(\sym)
 	interrupt \do_sym
 	jmp ret_from_intr
 	CFI_ENDPROC
-END(\sym)
+ENDPROC(\sym)
 .endm
 
 #ifdef CONFIG_TRACING
@@ -1024,7 +1547,7 @@ apicinterrupt IRQ_WORK_VECTOR \
 /*
  * Exception entry points.
  */
-#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
+#define INIT_TSS_IST(x) (TSS_ist + ((x) - 1) * 8)(%r13)
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
@@ -1080,6 +1603,12 @@ ENTRY(\sym)
 	.endif
 
 	.if \shift_ist != -1
+#ifdef CONFIG_SMP
+	imul $TSS_size, PER_CPU_VAR(cpu_number), %r13d
+	lea init_tss(%r13), %r13
+#else
+	lea init_tss(%rip), %r13
+#endif
 	subq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
 	.endif
 
@@ -1126,7 +1655,7 @@ ENTRY(\sym)
 	.endif
 
 	CFI_ENDPROC
-END(\sym)
+ENDPROC(\sym)
 .endm
 
 #ifdef CONFIG_TRACING
@@ -1167,9 +1696,10 @@ gs_change:
 2:	mfence		/* workaround */
 	SWAPGS
 	popfq_cfi
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
-END(native_load_gs_index)
+ENDPROC(native_load_gs_index)
 
 	_ASM_EXTABLE(gs_change,bad_gs)
 	.section .fixup,"ax"
@@ -1197,9 +1727,10 @@ ENTRY(do_softirq_own_stack)
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_ADJUST_CFA_OFFSET   -8
 	decl PER_CPU_VAR(irq_count)
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
-END(do_softirq_own_stack)
+ENDPROC(do_softirq_own_stack)
 
 #ifdef CONFIG_XEN
 idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
@@ -1240,7 +1771,7 @@ ENTRY(xen_do_hypervisor_callback)   # do
 #endif
 	jmp  error_exit
 	CFI_ENDPROC
-END(xen_do_hypervisor_callback)
+ENDPROC(xen_do_hypervisor_callback)
 
 /*
  * Hypervisor uses this for application faults while it executes.
@@ -1299,7 +1830,7 @@ ENTRY(xen_failsafe_callback)
 	SAVE_ALL
 	jmp error_exit
 	CFI_ENDPROC
-END(xen_failsafe_callback)
+ENDPROC(xen_failsafe_callback)
 
 apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
 	xen_hvm_callback_vector xen_evtchn_do_upcall
@@ -1344,18 +1875,25 @@ ENTRY(paranoid_exit)
 	DEFAULT_FRAME
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF_DEBUG
-	testl %ebx,%ebx				/* swapgs needed? */
+	testl $1,%ebx				/* swapgs needed? */
 	jnz paranoid_restore
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	pax_exit_kernel_user
+#else
+	pax_exit_kernel
+#endif
 	TRACE_IRQS_IRETQ 0
 	SWAPGS_UNSAFE_STACK
 	RESTORE_ALL 8
 	INTERRUPT_RETURN
 paranoid_restore:
+	pax_exit_kernel
 	TRACE_IRQS_IRETQ_DEBUG 0
 	RESTORE_ALL 8
+	pax_force_retaddr_bts
 	INTERRUPT_RETURN
 	CFI_ENDPROC
-END(paranoid_exit)
+ENDPROC(paranoid_exit)
 
 /*
  * Exception entry point. This expects an error code/orig_rax on the stack.
@@ -1382,12 +1920,23 @@ ENTRY(error_entry)
 	movq %r14, R14+8(%rsp)
 	movq %r15, R15+8(%rsp)
 	xorl %ebx,%ebx
-	testl $3,CS+8(%rsp)
+	testb $3,CS+8(%rsp)
 	je error_kernelspace
 error_swapgs:
 	SWAPGS
 error_sti:
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	testb $3, CS+8(%rsp)
+	jnz 1f
+	pax_enter_kernel
+	jmp 2f
+1:	pax_enter_kernel_user
+2:
+#else
+	pax_enter_kernel
+#endif
 	TRACE_IRQS_OFF
+	pax_force_retaddr
 	ret
 
 /*
@@ -1422,7 +1971,7 @@ error_bad_iret:
 	decl %ebx	/* Return to usergs */
 	jmp error_sti
 	CFI_ENDPROC
-END(error_entry)
+ENDPROC(error_entry)
 
 
 /* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
@@ -1433,7 +1982,7 @@ ENTRY(error_exit)
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)
-	testl %eax,%eax
+	testl $1,%eax
 	jne retint_kernel
 	LOCKDEP_SYS_EXIT_IRQ
 	movl TI_flags(%rcx),%edx
@@ -1442,7 +1991,7 @@ ENTRY(error_exit)
 	jnz retint_careful
 	jmp retint_swapgs
 	CFI_ENDPROC
-END(error_exit)
+ENDPROC(error_exit)
 
 /*
  * Test if a given stack is an NMI stack or not.
@@ -1500,9 +2049,11 @@ ENTRY(nmi)
 	 * If %cs was not the kernel segment, then the NMI triggered in user
 	 * space, which means it is definitely not nested.
 	 */
+	cmpl $__KERNEXEC_KERNEL_CS, 16(%rsp)
+	je 1f
 	cmpl $__KERNEL_CS, 16(%rsp)
 	jne first_nmi
-
+1:
 	/*
 	 * Check the special variable on the stack to see if NMIs are
 	 * executing.
@@ -1536,8 +2087,7 @@ nested_nmi:
 
 1:
 	/* Set up the interrupted NMIs stack to jump to repeat_nmi */
-	leaq -1*8(%rsp), %rdx
-	movq %rdx, %rsp
+	subq $8, %rsp
 	CFI_ADJUST_CFA_OFFSET 1*8
 	leaq -10*8(%rsp), %rdx
 	pushq_cfi $__KERNEL_DS
@@ -1555,6 +2105,7 @@ nested_nmi_out:
 	CFI_RESTORE rdx
 
 	/* No need to check faults here */
+#	pax_force_retaddr_bts
 	INTERRUPT_RETURN
 
 	CFI_RESTORE_STATE
@@ -1651,13 +2202,13 @@ end_repeat_nmi:
 	subq $ORIG_RAX-R15, %rsp
 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
 	/*
-	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
+	 * Use save_paranoid_nmi to handle SWAPGS, but no need to use paranoid_exit
 	 * as we should not be calling schedule in NMI context.
 	 * Even with normal interrupts enabled. An NMI should not be
 	 * setting NEED_RESCHED or anything that normal interrupts and
 	 * exceptions might do.
 	 */
-	call save_paranoid
+	call save_paranoid_nmi
 	DEFAULT_FRAME 0
 
 	/*
@@ -1667,9 +2218,9 @@ end_repeat_nmi:
 	 * NMI itself takes a page fault, the page fault that was preempted
 	 * will read the information from the NMI page fault and not the
 	 * origin fault. Save it off and restore it if it changes.
-	 * Use the r12 callee-saved register.
+	 * Use the r13 callee-saved register.
 	 */
-	movq %cr2, %r12
+	movq %cr2, %r13
 
 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
 	movq %rsp,%rdi
@@ -1678,29 +2229,34 @@ end_repeat_nmi:
 
 	/* Did the NMI take a page fault? Restore cr2 if it did */
 	movq %cr2, %rcx
-	cmpq %rcx, %r12
+	cmpq %rcx, %r13
 	je 1f
-	movq %r12, %cr2
+	movq %r13, %cr2
 1:
 	
-	testl %ebx,%ebx				/* swapgs needed? */
+	testl $1,%ebx				/* swapgs needed? */
 	jnz nmi_restore
 nmi_swapgs:
 	SWAPGS_UNSAFE_STACK
 nmi_restore:
+	pax_exit_kernel_nmi
 	/* Pop the extra iret frame at once */
 	RESTORE_ALL 6*8
+	testb $3, 8(%rsp)
+	jnz 1f
+	pax_force_retaddr_bts
+1:
 
 	/* Clear the NMI executing stack variable */
 	movq $0, 5*8(%rsp)
 	jmp irq_return
 	CFI_ENDPROC
-END(nmi)
+ENDPROC(nmi)
 
 ENTRY(ignore_sysret)
 	CFI_STARTPROC
 	mov $-ENOSYS,%eax
 	sysret
 	CFI_ENDPROC
-END(ignore_sysret)
+ENDPROC(ignore_sysret)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/espfix_64.c linux-4.0.9-pax/arch/x86/kernel/espfix_64.c
--- linux-4.0.9/arch/x86/kernel/espfix_64.c	2015-03-18 15:21:50.268349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/espfix_64.c	2015-04-15 12:13:52.922318622 +0200
@@ -70,8 +70,7 @@ static DEFINE_MUTEX(espfix_init_mutex);
 #define ESPFIX_MAX_PAGES  DIV_ROUND_UP(CONFIG_NR_CPUS, ESPFIX_STACKS_PER_PAGE)
 static void *espfix_pages[ESPFIX_MAX_PAGES];
 
-static __page_aligned_bss pud_t espfix_pud_page[PTRS_PER_PUD]
-	__aligned(PAGE_SIZE);
+static __page_aligned_rodata pud_t espfix_pud_page[PTRS_PER_PUD];
 
 static unsigned int page_random, slot_random;
 
@@ -122,11 +121,17 @@ static void init_espfix_random(void)
 void __init init_espfix_bsp(void)
 {
 	pgd_t *pgd_p;
+	unsigned long index = pgd_index(ESPFIX_BASE_ADDR);
 
 	/* Install the espfix pud into the kernel page directory */
-	pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
+	pgd_p = &init_level4_pgt[index];
 	pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
 
+#ifdef CONFIG_PAX_PER_CPU_PGD
+	clone_pgd_range(get_cpu_pgd(0, kernel) + index, swapper_pg_dir + index, 1);
+	clone_pgd_range(get_cpu_pgd(0, user) + index, swapper_pg_dir + index, 1);
+#endif
+
 	/* Randomize the locations */
 	init_espfix_random();
 
@@ -194,7 +199,7 @@ void init_espfix_ap(void)
 		set_pte(&pte_p[n*PTE_STRIDE], pte);
 
 	/* Job is done for this CPU and any CPU which shares this page */
-	ACCESS_ONCE(espfix_pages[page]) = stack_page;
+	ACCESS_ONCE_RW(espfix_pages[page]) = stack_page;
 
 unlock_done:
 	mutex_unlock(&espfix_init_mutex);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/ftrace.c linux-4.0.9-pax/arch/x86/kernel/ftrace.c
--- linux-4.0.9/arch/x86/kernel/ftrace.c	2015-03-18 15:21:50.268349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/ftrace.c	2015-06-27 23:44:01.806449019 +0200
@@ -89,7 +89,7 @@ static unsigned long text_ip_addr(unsign
 	 * kernel identity mapping to modify code.
 	 */
 	if (within(ip, (unsigned long)_text, (unsigned long)_etext))
-		ip = (unsigned long)__va(__pa_symbol(ip));
+		ip = (unsigned long)__va(__pa_symbol(ktla_ktva(ip)));
 
 	return ip;
 }
@@ -105,6 +105,8 @@ ftrace_modify_code_direct(unsigned long
 {
 	unsigned char replaced[MCOUNT_INSN_SIZE];
 
+	ip = ktla_ktva(ip);
+
 	/*
 	 * Note: Due to modules and __init, code can
 	 *  disappear and change, we need to protect against faulting
@@ -230,7 +232,7 @@ static int update_ftrace_func(unsigned l
 	unsigned char old[MCOUNT_INSN_SIZE];
 	int ret;
 
-	memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);
+	memcpy(old, (void *)ktla_ktva(ip), MCOUNT_INSN_SIZE);
 
 	ftrace_update_func = ip;
 	/* Make sure the breakpoints see the ftrace_update_func update */
@@ -311,7 +313,7 @@ static int add_break(unsigned long ip, c
 	unsigned char replaced[MCOUNT_INSN_SIZE];
 	unsigned char brk = BREAKPOINT_INSTRUCTION;
 
-	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+	if (probe_kernel_read(replaced, (void *)ktla_ktva(ip), MCOUNT_INSN_SIZE))
 		return -EFAULT;
 
 	/* Make sure it is what we expect it to be */
@@ -670,11 +672,11 @@ static unsigned char *ftrace_jmp_replace
 /* Module allocation simplifies allocating memory for code */
 static inline void *alloc_tramp(unsigned long size)
 {
-	return module_alloc(size);
+	return module_alloc_exec(size);
 }
 static inline void tramp_free(void *tramp)
 {
-	module_memfree(tramp);
+	module_memfree_exec(tramp);
 }
 #else
 /* Trampolines can only be created if modules are supported */
@@ -753,7 +755,9 @@ create_trampoline(struct ftrace_ops *ops
 	*tramp_size = size + MCOUNT_INSN_SIZE + sizeof(void *);
 
 	/* Copy ftrace_caller onto the trampoline memory */
+	pax_open_kernel();
 	ret = probe_kernel_read(trampoline, (void *)start_offset, size);
+	pax_close_kernel();
 	if (WARN_ON(ret < 0)) {
 		tramp_free(trampoline);
 		return 0;
@@ -763,6 +767,7 @@ create_trampoline(struct ftrace_ops *ops
 
 	/* The trampoline ends with a jmp to ftrace_return */
 	jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return);
+	pax_open_kernel();
 	memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE);
 
 	/*
@@ -775,6 +780,7 @@ create_trampoline(struct ftrace_ops *ops
 
 	ptr = (unsigned long *)(trampoline + size + MCOUNT_INSN_SIZE);
 	*ptr = (unsigned long)ops;
+	pax_close_kernel();
 
 	op_offset -= start_offset;
 	memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE);
@@ -792,7 +798,9 @@ create_trampoline(struct ftrace_ops *ops
 	op_ptr.offset = offset;
 
 	/* put in the new offset to the ftrace_ops */
+	pax_open_kernel();
 	memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
+	pax_close_kernel();
 
 	/* ALLOC_TRAMP flags lets us know we created it */
 	ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/head_32.S linux-4.0.9-pax/arch/x86/kernel/head_32.S
--- linux-4.0.9/arch/x86/kernel/head_32.S	2015-06-26 10:29:22.454538574 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/head_32.S	2015-06-26 10:29:32.598538551 +0200
@@ -26,6 +26,12 @@
 /* Physical address */
 #define pa(X) ((X) - __PAGE_OFFSET)
 
+#ifdef CONFIG_PAX_KERNEXEC
+#define ta(X) (X)
+#else
+#define ta(X) ((X) - __PAGE_OFFSET)
+#endif
+
 /*
  * References to members of the new_cpu_data structure.
  */
@@ -55,11 +61,7 @@
  * and small than max_low_pfn, otherwise will waste some page table entries
  */
 
-#if PTRS_PER_PMD > 1
-#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
-#else
-#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
-#endif
+#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PTE)
 
 /* Number of possible pages in the lowmem region */
 LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT)
@@ -78,6 +80,12 @@ INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_P
 RESERVE_BRK(pagetables, INIT_MAP_SIZE)
 
 /*
+ * Real beginning of normal "text" segment
+ */
+ENTRY(stext)
+ENTRY(_stext)
+
+/*
  * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
  * %esi points to the real-mode code as a 32-bit pointer.
  * CS and DS must be 4 GB flat segments, but we don't depend on
@@ -85,6 +93,13 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
  * can.
  */
 __HEAD
+
+#ifdef CONFIG_PAX_KERNEXEC
+	jmp startup_32
+/* PaX: fill first page in .text with int3 to catch NULL derefs in kernel mode */
+.fill PAGE_SIZE-5,1,0xcc
+#endif
+
 ENTRY(startup_32)
 	movl pa(stack_start),%ecx
 	
@@ -106,6 +121,59 @@ ENTRY(startup_32)
 2:
 	leal -__PAGE_OFFSET(%ecx),%esp
 
+#ifdef CONFIG_SMP
+	movl $pa(cpu_gdt_table),%edi
+	movl $__per_cpu_load,%eax
+	movw %ax,GDT_ENTRY_PERCPU * 8 + 2(%edi)
+	rorl $16,%eax
+	movb %al,GDT_ENTRY_PERCPU * 8 + 4(%edi)
+	movb %ah,GDT_ENTRY_PERCPU * 8 + 7(%edi)
+	movl $__per_cpu_end - 1,%eax
+	subl $__per_cpu_start,%eax
+	movw %ax,GDT_ENTRY_PERCPU * 8 + 0(%edi)
+#endif
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	movl $NR_CPUS,%ecx
+	movl $pa(cpu_gdt_table),%edi
+1:
+	movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c09700),GDT_ENTRY_KERNEL_DS * 8 + 4(%edi)
+	movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c0fb00),GDT_ENTRY_DEFAULT_USER_CS * 8 + 4(%edi)
+	movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c0f300),GDT_ENTRY_DEFAULT_USER_DS * 8 + 4(%edi)
+	addl $PAGE_SIZE_asm,%edi
+	loop 1b
+#endif
+
+#ifdef CONFIG_PAX_KERNEXEC
+	movl $pa(boot_gdt),%edi
+	movl $__LOAD_PHYSICAL_ADDR,%eax
+	movw %ax,GDT_ENTRY_BOOT_CS * 8 + 2(%edi)
+	rorl $16,%eax
+	movb %al,GDT_ENTRY_BOOT_CS * 8 + 4(%edi)
+	movb %ah,GDT_ENTRY_BOOT_CS * 8 + 7(%edi)
+	rorl $16,%eax
+
+	ljmp $(__BOOT_CS),$1f
+1:
+
+	movl $NR_CPUS,%ecx
+	movl $pa(cpu_gdt_table),%edi
+	addl $__PAGE_OFFSET,%eax
+1:
+	movb $0xc0,GDT_ENTRY_KERNEL_CS * 8 + 6(%edi)
+	movb $0xc0,GDT_ENTRY_KERNEXEC_KERNEL_CS * 8 + 6(%edi)
+	movw %ax,GDT_ENTRY_KERNEL_CS * 8 + 2(%edi)
+	movw %ax,GDT_ENTRY_KERNEXEC_KERNEL_CS * 8 + 2(%edi)
+	rorl $16,%eax
+	movb %al,GDT_ENTRY_KERNEL_CS * 8 + 4(%edi)
+	movb %al,GDT_ENTRY_KERNEXEC_KERNEL_CS * 8 + 4(%edi)
+	movb %ah,GDT_ENTRY_KERNEL_CS * 8 + 7(%edi)
+	movb %ah,GDT_ENTRY_KERNEXEC_KERNEL_CS * 8 + 7(%edi)
+	rorl $16,%eax
+	addl $PAGE_SIZE_asm,%edi
+	loop 1b
+#endif
+
 /*
  * Clear BSS first so that there are no surprises...
  */
@@ -201,8 +269,11 @@ ENTRY(startup_32)
 	movl %eax, pa(max_pfn_mapped)
 
 	/* Do early initialization of the fixmap area */
-	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
-	movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
+#ifdef CONFIG_COMPAT_VDSO
+	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR+_PAGE_USER,pa(initial_pg_pmd+0x1000*KPMDS-8)
+#else
+	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,pa(initial_pg_pmd+0x1000*KPMDS-8)
+#endif
 #else	/* Not PAE */
 
 page_pde_offset = (__PAGE_OFFSET >> 20);
@@ -232,8 +303,11 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
 	movl %eax, pa(max_pfn_mapped)
 
 	/* Do early initialization of the fixmap area */
-	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
-	movl %eax,pa(initial_page_table+0xffc)
+#ifdef CONFIG_COMPAT_VDSO
+	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR+_PAGE_USER,pa(initial_page_table+0xffc)
+#else
+	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,pa(initial_page_table+0xffc)
+#endif
 #endif
 
 #ifdef CONFIG_PARAVIRT
@@ -247,9 +321,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
 	cmpl $num_subarch_entries, %eax
 	jae bad_subarch
 
-	movl pa(subarch_entries)(,%eax,4), %eax
-	subl $__PAGE_OFFSET, %eax
-	jmp *%eax
+	jmp *pa(subarch_entries)(,%eax,4)
 
 bad_subarch:
 WEAK(lguest_entry)
@@ -261,10 +333,10 @@ WEAK(xen_entry)
 	__INITDATA
 
 subarch_entries:
-	.long default_entry		/* normal x86/PC */
-	.long lguest_entry		/* lguest hypervisor */
-	.long xen_entry			/* Xen hypervisor */
-	.long default_entry		/* Moorestown MID */
+	.long ta(default_entry)		/* normal x86/PC */
+	.long ta(lguest_entry)		/* lguest hypervisor */
+	.long ta(xen_entry)		/* Xen hypervisor */
+	.long ta(default_entry)		/* Moorestown MID */
 num_subarch_entries = (. - subarch_entries) / 4
 .previous
 #else
@@ -354,6 +426,7 @@ default_entry:
 	movl pa(mmu_cr4_features),%eax
 	movl %eax,%cr4
 
+#ifdef CONFIG_X86_PAE
 	testb $X86_CR4_PAE, %al		# check if PAE is enabled
 	jz enable_paging
 
@@ -382,6 +455,9 @@ default_entry:
 	/* Make changes effective */
 	wrmsr
 
+	btsl $_PAGE_BIT_NX-32,pa(__supported_pte_mask+4)
+#endif
+
 enable_paging:
 
 /*
@@ -449,14 +525,20 @@ is486:
 1:	movl $(__KERNEL_DS),%eax	# reload all the segment registers
 	movl %eax,%ss			# after changing gdt.
 
-	movl $(__USER_DS),%eax		# DS/ES contains default USER segment
+#	movl $(__KERNEL_DS),%eax	# DS/ES contains default KERNEL segment
 	movl %eax,%ds
 	movl %eax,%es
 
 	movl $(__KERNEL_PERCPU), %eax
 	movl %eax,%fs			# set this cpu's percpu
 
+#ifdef CONFIG_CC_STACKPROTECTOR
 	movl $(__KERNEL_STACK_CANARY),%eax
+#elif defined(CONFIG_PAX_MEMORY_UDEREF)
+	movl $(__USER_DS),%eax
+#else
+	xorl %eax,%eax
+#endif
 	movl %eax,%gs
 
 	xorl %eax,%eax			# Clear LDT
@@ -513,8 +595,11 @@ setup_once:
 	 * relocation.  Manually set base address in stack canary
 	 * segment descriptor.
 	 */
-	movl $gdt_page,%eax
+	movl $cpu_gdt_table,%eax
 	movl $stack_canary,%ecx
+#ifdef CONFIG_SMP
+	addl $__per_cpu_load,%ecx
+#endif
 	movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
 	shrl $16, %ecx
 	movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
@@ -551,7 +636,7 @@ early_idt_handler_common:
 	cmpl $2,(%esp)		# X86_TRAP_NMI
 	je is_nmi		# Ignore NMI
 
-	cmpl $2,%ss:early_recursion_flag
+	cmpl $1,%ss:early_recursion_flag
 	je hlt_loop
 	incl %ss:early_recursion_flag
 
@@ -589,8 +674,8 @@ early_idt_handler_common:
 	pushl (20+6*4)(%esp)	/* trapno */
 	pushl $fault_msg
 	call printk
-#endif
 	call dump_stack
+#endif
 hlt_loop:
 	hlt
 	jmp hlt_loop
@@ -610,8 +695,11 @@ ENDPROC(early_idt_handler_common)
 /* This is the default interrupt "handler" :-) */
 	ALIGN
 ignore_int:
-	cld
 #ifdef CONFIG_PRINTK
+	cmpl $2,%ss:early_recursion_flag
+	je hlt_loop
+	incl %ss:early_recursion_flag
+	cld
 	pushl %eax
 	pushl %ecx
 	pushl %edx
@@ -620,9 +708,6 @@ ignore_int:
 	movl $(__KERNEL_DS),%eax
 	movl %eax,%ds
 	movl %eax,%es
-	cmpl $2,early_recursion_flag
-	je hlt_loop
-	incl early_recursion_flag
 	pushl 16(%esp)
 	pushl 24(%esp)
 	pushl 32(%esp)
@@ -656,29 +741,34 @@ ENTRY(setup_once_ref)
 /*
  * BSS section
  */
-__PAGE_ALIGNED_BSS
-	.align PAGE_SIZE
 #ifdef CONFIG_X86_PAE
+.section .initial_pg_pmd,"a",@progbits
 initial_pg_pmd:
 	.fill 1024*KPMDS,4,0
 #else
+.section .initial_page_table,"a",@progbits
 ENTRY(initial_page_table)
 	.fill 1024,4,0
 #endif
+.section .initial_pg_fixmap,"a",@progbits
 initial_pg_fixmap:
 	.fill 1024,4,0
+.section .empty_zero_page,"a",@progbits
 ENTRY(empty_zero_page)
 	.fill 4096,1,0
+.section .swapper_pg_dir,"a",@progbits
 ENTRY(swapper_pg_dir)
+#ifdef CONFIG_X86_PAE
+	.fill 4,8,0
+#else
 	.fill 1024,4,0
+#endif
 
 /*
  * This starts the data section.
  */
 #ifdef CONFIG_X86_PAE
-__PAGE_ALIGNED_DATA
-	/* Page-aligned for the benefit of paravirt? */
-	.align PAGE_SIZE
+.section .initial_page_table,"a",@progbits
 ENTRY(initial_page_table)
 	.long	pa(initial_pg_pmd+PGD_IDENT_ATTR),0	/* low identity map */
 # if KPMDS == 3
@@ -697,12 +787,20 @@ ENTRY(initial_page_table)
 #  error "Kernel PMDs should be 1, 2 or 3"
 # endif
 	.align PAGE_SIZE		/* needs to be page-sized too */
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+ENTRY(cpu_pgd)
+	.rept 2*NR_CPUS
+	.fill	4,8,0
+	.endr
+#endif
+
 #endif
 
 .data
 .balign 4
 ENTRY(stack_start)
-	.long init_thread_union+THREAD_SIZE
+	.long init_thread_union+THREAD_SIZE-8
 
 __INITRODATA
 int_msg:
@@ -730,7 +828,7 @@ fault_msg:
  * segment size, and 32-bit linear address value:
  */
 
-	.data
+.section .rodata,"a",@progbits
 .globl boot_gdt_descr
 .globl idt_descr
 
@@ -739,7 +837,7 @@ fault_msg:
 	.word 0				# 32 bit align gdt_desc.address
 boot_gdt_descr:
 	.word __BOOT_DS+7
-	.long boot_gdt - __PAGE_OFFSET
+	.long pa(boot_gdt)
 
 	.word 0				# 32-bit align idt_desc.address
 idt_descr:
@@ -750,7 +848,7 @@ idt_descr:
 	.word 0				# 32 bit align gdt_desc.address
 ENTRY(early_gdt_descr)
 	.word GDT_ENTRIES*8-1
-	.long gdt_page			/* Overwritten for secondary CPUs */
+	.long cpu_gdt_table		/* Overwritten for secondary CPUs */
 
 /*
  * The boot_gdt must mirror the equivalent in setup.S and is
@@ -759,5 +857,65 @@ ENTRY(early_gdt_descr)
 	.align L1_CACHE_BYTES
 ENTRY(boot_gdt)
 	.fill GDT_ENTRY_BOOT_CS,8,0
-	.quad 0x00cf9a000000ffff	/* kernel 4GB code at 0x00000000 */
-	.quad 0x00cf92000000ffff	/* kernel 4GB data at 0x00000000 */
+	.quad 0x00cf9b000000ffff	/* kernel 4GB code at 0x00000000 */
+	.quad 0x00cf93000000ffff	/* kernel 4GB data at 0x00000000 */
+
+	.align PAGE_SIZE_asm
+ENTRY(cpu_gdt_table)
+	.rept NR_CPUS
+	.quad 0x0000000000000000	/* NULL descriptor */
+	.quad 0x0000000000000000	/* 0x0b reserved */
+	.quad 0x0000000000000000	/* 0x13 reserved */
+	.quad 0x0000000000000000	/* 0x1b reserved */
+
+#ifdef CONFIG_PAX_KERNEXEC
+	.quad 0x00cf9b000000ffff	/* 0x20 alternate kernel 4GB code at 0x00000000 */
+#else
+	.quad 0x0000000000000000	/* 0x20 unused */
+#endif
+
+	.quad 0x0000000000000000	/* 0x28 unused */
+	.quad 0x0000000000000000	/* 0x33 TLS entry 1 */
+	.quad 0x0000000000000000	/* 0x3b TLS entry 2 */
+	.quad 0x0000000000000000	/* 0x43 TLS entry 3 */
+	.quad 0x0000000000000000	/* 0x4b reserved */
+	.quad 0x0000000000000000	/* 0x53 reserved */
+	.quad 0x0000000000000000	/* 0x5b reserved */
+
+	.quad 0x00cf9b000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
+	.quad 0x00cf93000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
+	.quad 0x00cffb000000ffff	/* 0x73 user 4GB code at 0x00000000 */
+	.quad 0x00cff3000000ffff	/* 0x7b user 4GB data at 0x00000000 */
+
+	.quad 0x0000000000000000	/* 0x80 TSS descriptor */
+	.quad 0x0000000000000000	/* 0x88 LDT descriptor */
+
+	/*
+	 * Segments used for calling PnP BIOS have byte granularity.
+	 * The code segments and data segments have fixed 64k limits,
+	 * the transfer segment sizes are set at run time.
+	 */
+	.quad 0x00409b000000ffff	/* 0x90 32-bit code */
+	.quad 0x00009b000000ffff	/* 0x98 16-bit code */
+	.quad 0x000093000000ffff	/* 0xa0 16-bit data */
+	.quad 0x0000930000000000	/* 0xa8 16-bit data */
+	.quad 0x0000930000000000	/* 0xb0 16-bit data */
+
+	/*
+	 * The APM segments have byte granularity and their bases
+	 * are set at run time.  All have 64k limits.
+	 */
+	.quad 0x00409b000000ffff	/* 0xb8 APM CS    code */
+	.quad 0x00009b000000ffff	/* 0xc0 APM CS 16 code (16 bit) */
+	.quad 0x004093000000ffff	/* 0xc8 APM DS    data */
+
+	.quad 0x00c093000000ffff	/* 0xd0 - ESPFIX SS */
+	.quad 0x0040930000000000	/* 0xd8 - PERCPU */
+	.quad 0x0040910000000017	/* 0xe0 - STACK_CANARY */
+	.quad 0x0000000000000000	/* 0xe8 - PCIBIOS_CS */
+	.quad 0x0000000000000000	/* 0xf0 - PCIBIOS_DS */
+	.quad 0x0000000000000000	/* 0xf8 - GDT entry 31: double-fault TSS */
+
+	/* Be sure this is zeroed to avoid false validations in Xen */
+	.fill PAGE_SIZE_asm - GDT_SIZE,1,0
+	.endr
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/head64.c linux-4.0.9-pax/arch/x86/kernel/head64.c
--- linux-4.0.9/arch/x86/kernel/head64.c	2015-06-26 10:29:22.454538574 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/head64.c	2015-06-26 10:29:32.594538551 +0200
@@ -68,12 +68,12 @@ again:
 	pgd = *pgd_p;
 
 	/*
-	 * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is
-	 * critical -- __PAGE_OFFSET would point us back into the dynamic
+	 * The use of __early_va rather than __va here is critical:
+	 * __va would point us back into the dynamic
 	 * range and we might end up looping forever...
 	 */
 	if (pgd)
-		pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
+		pud_p = (pudval_t *)(__early_va(pgd & PTE_PFN_MASK));
 	else {
 		if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
 			reset_early_page_tables();
@@ -83,13 +83,13 @@ again:
 		pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
 		for (i = 0; i < PTRS_PER_PUD; i++)
 			pud_p[i] = 0;
-		*pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
+		*pgd_p = (pgdval_t)__pa(pud_p) + _KERNPG_TABLE;
 	}
 	pud_p += pud_index(address);
 	pud = *pud_p;
 
 	if (pud)
-		pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
+		pmd_p = (pmdval_t *)(__early_va(pud & PTE_PFN_MASK));
 	else {
 		if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
 			reset_early_page_tables();
@@ -99,7 +99,7 @@ again:
 		pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
 		for (i = 0; i < PTRS_PER_PMD; i++)
 			pmd_p[i] = 0;
-		*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
+		*pud_p = (pudval_t)__pa(pmd_p) + _KERNPG_TABLE;
 	}
 	pmd = (physaddr & PMD_MASK) + early_pmd_flags;
 	pmd_p[pmd_index(address)] = pmd;
@@ -180,7 +180,6 @@ asmlinkage __visible void __init x86_64_
 	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
 		early_printk("Kernel alive\n");
 
-	clear_page(init_level4_pgt);
 	/* set init_level4_pgt kernel high mapping*/
 	init_level4_pgt[511] = early_level4_pgt[511];
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/head_64.S linux-4.0.9-pax/arch/x86/kernel/head_64.S
--- linux-4.0.9/arch/x86/kernel/head_64.S	2015-06-26 10:29:22.454538574 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/head_64.S	2015-06-26 10:29:32.598538551 +0200
@@ -20,6 +20,8 @@
 #include <asm/processor-flags.h>
 #include <asm/percpu.h>
 #include <asm/nops.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/asm-offsets.h>
@@ -41,6 +43,12 @@ L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET
 L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
 L4_START_KERNEL = pgd_index(__START_KERNEL_map)
 L3_START_KERNEL = pud_index(__START_KERNEL_map)
+L4_VMALLOC_START = pgd_index(VMALLOC_START)
+L3_VMALLOC_START = pud_index(VMALLOC_START)
+L4_VMALLOC_END = pgd_index(VMALLOC_END)
+L3_VMALLOC_END = pud_index(VMALLOC_END)
+L4_VMEMMAP_START = pgd_index(VMEMMAP_START)
+L3_VMEMMAP_START = pud_index(VMEMMAP_START)
 
 	.text
 	__HEAD
@@ -89,11 +97,26 @@ startup_64:
 	 * Fixup the physical addresses in the page table
 	 */
 	addq	%rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip)
+	addq	%rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip)
+	addq	%rbp, init_level4_pgt + (L4_VMALLOC_START*8)(%rip)
+	addq	%rbp, init_level4_pgt + (L4_VMALLOC_END*8)(%rip)
+	addq	%rbp, init_level4_pgt + (L4_VMEMMAP_START*8)(%rip)
+	addq	%rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip)
 
-	addq	%rbp, level3_kernel_pgt + (510*8)(%rip)
-	addq	%rbp, level3_kernel_pgt + (511*8)(%rip)
+	addq	%rbp, level3_ident_pgt + (0*8)(%rip)
+#ifndef CONFIG_XEN
+	addq	%rbp, level3_ident_pgt + (1*8)(%rip)
+#endif
+
+	addq	%rbp, level3_vmemmap_pgt + (L3_VMEMMAP_START*8)(%rip)
 
+	addq	%rbp, level3_kernel_pgt + (L3_START_KERNEL*8)(%rip)
+	addq	%rbp, level3_kernel_pgt + ((L3_START_KERNEL+1)*8)(%rip)
+
+	addq	%rbp, level2_fixmap_pgt + (504*8)(%rip)
+	addq	%rbp, level2_fixmap_pgt + (505*8)(%rip)
 	addq	%rbp, level2_fixmap_pgt + (506*8)(%rip)
+	addq	%rbp, level2_fixmap_pgt + (507*8)(%rip)
 
 	/*
 	 * Set up the identity mapping for the switchover.  These
@@ -174,11 +197,12 @@ ENTRY(secondary_startup_64)
 	 * after the boot processor executes this code.
 	 */
 
+	orq	$-1, %rbp
 	movq	$(init_level4_pgt - __START_KERNEL_map), %rax
 1:
 
-	/* Enable PAE mode and PGE */
-	movl	$(X86_CR4_PAE | X86_CR4_PGE), %ecx
+	/* Enable PAE mode and PSE/PGE */
+	movl	$(X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE), %ecx
 	movq	%rcx, %cr4
 
 	/* Setup early boot stage 4 level pagetables. */
@@ -199,10 +223,21 @@ ENTRY(secondary_startup_64)
 	movl	$MSR_EFER, %ecx
 	rdmsr
 	btsl	$_EFER_SCE, %eax	/* Enable System Call */
-	btl	$20,%edi		/* No Execute supported? */
+	btl	$(X86_FEATURE_NX & 31),%edi	/* No Execute supported? */
 	jnc     1f
 	btsl	$_EFER_NX, %eax
+	cmpq	$-1, %rbp
+	je	1f
 	btsq	$_PAGE_BIT_NX,early_pmd_flags(%rip)
+	btsq	$_PAGE_BIT_NX, init_level4_pgt + 8*L4_PAGE_OFFSET(%rip)
+	btsq	$_PAGE_BIT_NX, init_level4_pgt + 8*L4_VMALLOC_START(%rip)
+	btsq	$_PAGE_BIT_NX, init_level4_pgt + 8*L4_VMALLOC_END(%rip)
+	btsq	$_PAGE_BIT_NX, init_level4_pgt + 8*L4_VMEMMAP_START(%rip)
+	btsq	$_PAGE_BIT_NX, level2_fixmap_pgt + 8*504(%rip)
+	btsq	$_PAGE_BIT_NX, level2_fixmap_pgt + 8*505(%rip)
+	btsq	$_PAGE_BIT_NX, level2_fixmap_pgt + 8*506(%rip)
+	btsq	$_PAGE_BIT_NX, level2_fixmap_pgt + 8*507(%rip)
+	btsq	$_PAGE_BIT_NX, __supported_pte_mask(%rip)
 1:	wrmsr				/* Make changes effective */
 
 	/* Setup cr0 */
@@ -282,6 +317,7 @@ ENTRY(secondary_startup_64)
 	 *	REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
 	 *		address given in m16:64.
 	 */
+	pax_set_fptr_mask
 	movq	initial_code(%rip),%rax
 	pushq	$0		# fake return address to stop unwinder
 	pushq	$__KERNEL_CS	# set correct cs
@@ -313,7 +349,7 @@ ENDPROC(start_cpu0)
 	.quad	INIT_PER_CPU_VAR(irq_stack_union)
 
 	GLOBAL(stack_start)
-	.quad  init_thread_union+THREAD_SIZE-8
+	.quad  init_thread_union+THREAD_SIZE-16
 	.word  0
 	__FINITDATA
 
@@ -393,7 +429,7 @@ early_idt_handler_common:
 	call dump_stack
 #ifdef CONFIG_KALLSYMS	
 	leaq early_idt_ripmsg(%rip),%rdi
-	movq 40(%rsp),%rsi	# %rip again
+	movq 88(%rsp),%rsi	# %rip again
 	call __print_symbol
 #endif
 #endif /* EARLY_PRINTK */
@@ -422,6 +458,7 @@ ENDPROC(early_idt_handler_common)
 early_recursion_flag:
 	.long 0
 
+	.section .rodata,"a",@progbits
 #ifdef CONFIG_EARLY_PRINTK
 early_idt_msg:
 	.asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
@@ -449,29 +486,52 @@ NEXT_PAGE(early_level4_pgt)
 NEXT_PAGE(early_dynamic_pgts)
 	.fill	512*EARLY_DYNAMIC_PAGE_TABLES,8,0
 
-	.data
+	.section .rodata,"a",@progbits
 
-#ifndef CONFIG_XEN
 NEXT_PAGE(init_level4_pgt)
-	.fill	512,8,0
-#else
-NEXT_PAGE(init_level4_pgt)
-	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
 	.org    init_level4_pgt + L4_PAGE_OFFSET*8, 0
 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+	.org	init_level4_pgt + L4_VMALLOC_START*8, 0
+	.quad	level3_vmalloc_start_pgt - __START_KERNEL_map + _KERNPG_TABLE
+	.org	init_level4_pgt + L4_VMALLOC_END*8, 0
+	.quad	level3_vmalloc_end_pgt - __START_KERNEL_map + _KERNPG_TABLE
+	.org	init_level4_pgt + L4_VMEMMAP_START*8, 0
+	.quad	level3_vmemmap_pgt - __START_KERNEL_map + _KERNPG_TABLE
 	.org    init_level4_pgt + L4_START_KERNEL*8, 0
 	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
 	.quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
 
+#ifdef CONFIG_PAX_PER_CPU_PGD
+NEXT_PAGE(cpu_pgd)
+	.rept 2*NR_CPUS
+	.fill	512,8,0
+	.endr
+#endif
+
 NEXT_PAGE(level3_ident_pgt)
 	.quad	level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+#ifdef CONFIG_XEN
 	.fill	511, 8, 0
+#else
+	.quad	level2_ident_pgt + PAGE_SIZE - __START_KERNEL_map + _KERNPG_TABLE
+	.fill	510,8,0
+#endif
+
+NEXT_PAGE(level3_vmalloc_start_pgt)
+	.fill	512,8,0
+
+NEXT_PAGE(level3_vmalloc_end_pgt)
+	.fill	512,8,0
+
+NEXT_PAGE(level3_vmemmap_pgt)
+	.fill	L3_VMEMMAP_START,8,0
+	.quad	level2_vmemmap_pgt - __START_KERNEL_map + _KERNPG_TABLE
+
 NEXT_PAGE(level2_ident_pgt)
-	/* Since I easily can, map the first 1G.
+	/* Since I easily can, map the first 2G.
 	 * Don't set NX because code runs from these pages.
 	 */
-	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
-#endif
+	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, 2*PTRS_PER_PMD)
 
 NEXT_PAGE(level3_kernel_pgt)
 	.fill	L3_START_KERNEL,8,0
@@ -479,6 +539,9 @@ NEXT_PAGE(level3_kernel_pgt)
 	.quad	level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
 	.quad	level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
 
+NEXT_PAGE(level2_vmemmap_pgt)
+	.fill	512,8,0
+
 NEXT_PAGE(level2_kernel_pgt)
 	/*
 	 * 512 MB kernel mapping. We spend a full page on this pagetable
@@ -494,23 +557,61 @@ NEXT_PAGE(level2_kernel_pgt)
 		KERNEL_IMAGE_SIZE/PMD_SIZE)
 
 NEXT_PAGE(level2_fixmap_pgt)
-	.fill	506,8,0
-	.quad	level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
-	/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
-	.fill	5,8,0
+	.fill	504,8,0
+	.quad	level1_fixmap_pgt - __START_KERNEL_map + 0 * PAGE_SIZE + _PAGE_TABLE
+	.quad	level1_fixmap_pgt - __START_KERNEL_map + 1 * PAGE_SIZE + _PAGE_TABLE
+	.quad	level1_fixmap_pgt - __START_KERNEL_map + 2 * PAGE_SIZE + _PAGE_TABLE
+	.quad	level1_vsyscall_pgt - __START_KERNEL_map + _PAGE_TABLE
+	/* 6MB reserved for vsyscalls + a 2MB hole = 3 + 1 entries */
+	.fill	4,8,0
 
 NEXT_PAGE(level1_fixmap_pgt)
+	.fill	3*512,8,0
+
+NEXT_PAGE(level1_vsyscall_pgt)
 	.fill	512,8,0
 
 #undef PMDS
 
-	.data
+	.align PAGE_SIZE
+ENTRY(cpu_gdt_table)
+	.rept NR_CPUS
+	.quad	0x0000000000000000	/* NULL descriptor */
+	.quad	0x00cf9b000000ffff	/* __KERNEL32_CS */
+	.quad	0x00af9b000000ffff	/* __KERNEL_CS */
+	.quad	0x00cf93000000ffff	/* __KERNEL_DS */
+	.quad	0x00cffb000000ffff	/* __USER32_CS */
+	.quad	0x00cff3000000ffff	/* __USER_DS, __USER32_DS  */
+	.quad	0x00affb000000ffff	/* __USER_CS */
+
+#ifdef CONFIG_PAX_KERNEXEC
+	.quad	0x00af9b000000ffff	/* __KERNEXEC_KERNEL_CS */
+#else
+	.quad	0x0			/* unused */
+#endif
+
+	.quad	0,0			/* TSS */
+	.quad	0,0			/* LDT */
+	.quad	0,0,0			/* three TLS descriptors */
+	.quad	0x0000f40000000000	/* node/CPU stored in limit */
+	/* asm/segment.h:GDT_ENTRIES must match this */
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	.quad	0x00cf93000000ffff	/* __UDEREF_KERNEL_DS */
+#else
+	.quad	0x0			/* unused */
+#endif
+
+	/* zero the remaining page */
+	.fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
+	.endr
+
 	.align 16
 	.globl early_gdt_descr
 early_gdt_descr:
 	.word	GDT_ENTRIES*8-1
 early_gdt_descr_base:
-	.quad	INIT_PER_CPU_VAR(gdt_page)
+	.quad	cpu_gdt_table
 
 ENTRY(phys_base)
 	/* This must match the first entry in level2_kernel_pgt */
@@ -534,8 +635,8 @@ NEXT_PAGE(kasan_zero_pud)
 
 
 #include "../../x86/xen/xen-head.S"
-	
-	__PAGE_ALIGNED_BSS
+
+	.section .rodata,"a",@progbits
 NEXT_PAGE(empty_zero_page)
 	.skip PAGE_SIZE
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/i386_ksyms_32.c linux-4.0.9-pax/arch/x86/kernel/i386_ksyms_32.c
--- linux-4.0.9/arch/x86/kernel/i386_ksyms_32.c	2015-03-18 15:21:50.268349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/i386_ksyms_32.c	2015-04-15 12:13:52.922318622 +0200
@@ -20,8 +20,12 @@ extern void cmpxchg8b_emu(void);
 EXPORT_SYMBOL(cmpxchg8b_emu);
 #endif
 
+EXPORT_SYMBOL_GPL(cpu_gdt_table);
+
 /* Networking helper routines. */
 EXPORT_SYMBOL(csum_partial_copy_generic);
+EXPORT_SYMBOL(csum_partial_copy_generic_to_user);
+EXPORT_SYMBOL(csum_partial_copy_generic_from_user);
 
 EXPORT_SYMBOL(__get_user_1);
 EXPORT_SYMBOL(__get_user_2);
@@ -44,3 +48,11 @@ EXPORT_SYMBOL(___preempt_schedule);
 EXPORT_SYMBOL(___preempt_schedule_context);
 #endif
 #endif
+
+#ifdef CONFIG_PAX_KERNEXEC
+EXPORT_SYMBOL(__LOAD_PHYSICAL_ADDR);
+#endif
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+EXPORT_SYMBOL(cpu_pgd);
+#endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/i387.c linux-4.0.9-pax/arch/x86/kernel/i387.c
--- linux-4.0.9/arch/x86/kernel/i387.c	2015-06-15 16:02:22.267183858 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/i387.c	2015-06-15 16:02:33.019183834 +0200
@@ -68,7 +68,7 @@ static inline bool interrupted_kernel_fp
 static inline bool interrupted_user_mode(void)
 {
 	struct pt_regs *regs = get_irq_regs();
-	return regs && user_mode_vm(regs);
+	return regs && user_mode(regs);
 }
 
 /*
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/i8259.c linux-4.0.9-pax/arch/x86/kernel/i8259.c
--- linux-4.0.9/arch/x86/kernel/i8259.c	2015-03-18 15:21:50.268349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/i8259.c	2015-04-15 12:13:52.922318622 +0200
@@ -110,7 +110,7 @@ static int i8259A_irq_pending(unsigned i
 static void make_8259A_irq(unsigned int irq)
 {
 	disable_irq_nosync(irq);
-	io_apic_irqs &= ~(1<<irq);
+	io_apic_irqs &= ~(1UL<<irq);
 	irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
 	enable_irq(irq);
 }
@@ -208,7 +208,7 @@ spurious_8259A_irq:
 			       "spurious 8259A interrupt: IRQ%d.\n", irq);
 			spurious_irq_mask |= irqmask;
 		}
-		atomic_inc(&irq_err_count);
+		atomic_inc_unchecked(&irq_err_count);
 		/*
 		 * Theoretically we do not have to handle this IRQ,
 		 * but in Linux this does not cause problems and is
@@ -349,14 +349,16 @@ static void init_8259A(int auto_eoi)
 	/* (slave's support for AEOI in flat mode is to be investigated) */
 	outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
 
+	pax_open_kernel();
 	if (auto_eoi)
 		/*
 		 * In AEOI mode we just have to mask the interrupt
 		 * when acking.
 		 */
-		i8259A_chip.irq_mask_ack = disable_8259A_irq;
+		*(void **)&i8259A_chip.irq_mask_ack = disable_8259A_irq;
 	else
-		i8259A_chip.irq_mask_ack = mask_and_ack_8259A;
+		*(void **)&i8259A_chip.irq_mask_ack = mask_and_ack_8259A;
+	pax_close_kernel();
 
 	udelay(100);		/* wait for 8259A to initialize */
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/io_delay.c linux-4.0.9-pax/arch/x86/kernel/io_delay.c
--- linux-4.0.9/arch/x86/kernel/io_delay.c	2015-03-18 15:21:50.268349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/io_delay.c	2015-04-15 12:13:52.922318622 +0200
@@ -58,7 +58,7 @@ static int __init dmi_io_delay_0xed_port
  * Quirk table for systems that misbehave (lock up, etc.) if port
  * 0x80 is used:
  */
-static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
+static const struct dmi_system_id __initconst io_delay_0xed_port_dmi_table[] = {
 	{
 		.callback	= dmi_io_delay_0xed_port,
 		.ident		= "Compaq Presario V6000",
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/ioport.c linux-4.0.9-pax/arch/x86/kernel/ioport.c
--- linux-4.0.9/arch/x86/kernel/ioport.c	2015-03-18 15:21:50.268349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/ioport.c	2015-04-15 12:13:52.922318622 +0200
@@ -54,7 +54,7 @@ asmlinkage long sys_ioperm(unsigned long
 	 * because the ->io_bitmap_max value must match the bitmap
 	 * contents:
 	 */
-	tss = &per_cpu(init_tss, get_cpu());
+	tss = init_tss + get_cpu();
 
 	if (turn_on)
 		bitmap_clear(t->io_bitmap_ptr, from, num);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/irq_32.c linux-4.0.9-pax/arch/x86/kernel/irq_32.c
--- linux-4.0.9/arch/x86/kernel/irq_32.c	2015-04-13 11:21:01.850617461 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/irq_32.c	2015-04-15 12:13:52.922318622 +0200
@@ -39,7 +39,7 @@ static int check_stack_overflow(void)
 	__asm__ __volatile__("andl %%esp,%0" :
 			     "=r" (sp) : "0" (THREAD_SIZE - 1));
 
-	return sp < (sizeof(struct thread_info) + STACK_WARN);
+	return sp < STACK_WARN;
 }
 
 static void print_stack_overflow(void)
@@ -77,10 +77,9 @@ static inline void *current_stack(void)
 static inline int
 execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
 {
-	struct irq_stack *curstk, *irqstk;
+	struct irq_stack *irqstk;
 	u32 *isp, *prev_esp, arg1, arg2;
 
-	curstk = (struct irq_stack *) current_stack();
 	irqstk = __this_cpu_read(hardirq_stack);
 
 	/*
@@ -89,15 +88,19 @@ execute_on_irq_stack(int overflow, struc
 	 * handler) we can't do that and just have to keep using the
 	 * current stack (which is the irq stack already after all)
 	 */
-	if (unlikely(curstk == irqstk))
+	if (unlikely((void *)current_stack_pointer - (void *)irqstk < THREAD_SIZE))
 		return 0;
 
-	isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
+	isp = (u32 *) ((char *)irqstk + sizeof(*irqstk) - 8);
 
 	/* Save the next esp at the bottom of the stack */
 	prev_esp = (u32 *)irqstk;
 	*prev_esp = current_stack_pointer();
 
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	__set_fs(MAKE_MM_SEG(0));
+#endif
+
 	if (unlikely(overflow))
 		call_on_stack(print_stack_overflow, isp);
 
@@ -108,6 +111,11 @@ execute_on_irq_stack(int overflow, struc
 		     :  "0" (irq),   "1" (desc),  "2" (isp),
 			"D" (desc->handle_irq)
 		     : "memory", "cc", "ecx");
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	__set_fs(current_thread_info()->addr_limit);
+#endif
+
 	return 1;
 }
 
@@ -116,32 +124,18 @@ execute_on_irq_stack(int overflow, struc
  */
 void irq_ctx_init(int cpu)
 {
-	struct irq_stack *irqstk;
-
 	if (per_cpu(hardirq_stack, cpu))
 		return;
 
-	irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
-					       THREADINFO_GFP,
-					       THREAD_SIZE_ORDER));
-	per_cpu(hardirq_stack, cpu) = irqstk;
-
-	irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
-					       THREADINFO_GFP,
-					       THREAD_SIZE_ORDER));
-	per_cpu(softirq_stack, cpu) = irqstk;
-
-	printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
-	       cpu, per_cpu(hardirq_stack, cpu),  per_cpu(softirq_stack, cpu));
+	per_cpu(hardirq_stack, cpu) = page_address(alloc_pages_node(cpu_to_node(cpu), THREADINFO_GFP, THREAD_SIZE_ORDER));
+	per_cpu(softirq_stack, cpu) = page_address(alloc_pages_node(cpu_to_node(cpu), THREADINFO_GFP, THREAD_SIZE_ORDER));
 }
 
 void do_softirq_own_stack(void)
 {
-	struct thread_info *curstk;
 	struct irq_stack *irqstk;
 	u32 *isp, *prev_esp;
 
-	curstk = current_stack();
 	irqstk = __this_cpu_read(softirq_stack);
 
 	/* build the stack frame on the softirq stack */
@@ -151,7 +145,16 @@ void do_softirq_own_stack(void)
 	prev_esp = (u32 *)irqstk;
 	*prev_esp = current_stack_pointer();
 
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	__set_fs(MAKE_MM_SEG(0));
+#endif
+
 	call_on_stack(__do_softirq, isp);
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	__set_fs(current_thread_info()->addr_limit);
+#endif
+
 }
 
 bool handle_irq(unsigned irq, struct pt_regs *regs)
@@ -165,7 +168,7 @@ bool handle_irq(unsigned irq, struct pt_
 	if (unlikely(!desc))
 		return false;
 
-	if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
+	if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
 		if (unlikely(overflow))
 			print_stack_overflow();
 		desc->handle_irq(irq, desc);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/irq_64.c linux-4.0.9-pax/arch/x86/kernel/irq_64.c
--- linux-4.0.9/arch/x86/kernel/irq_64.c	2015-03-18 15:21:50.268349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/irq_64.c	2015-04-15 12:13:52.922318622 +0200
@@ -44,7 +44,7 @@ static inline void stack_overflow_check(
 	u64 estack_top, estack_bottom;
 	u64 curbase = (u64)task_stack_page(current);
 
-	if (user_mode_vm(regs))
+	if (user_mode(regs))
 		return;
 
 	if (regs->sp >= curbase + sizeof(struct thread_info) +
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/irq.c linux-4.0.9-pax/arch/x86/kernel/irq.c
--- linux-4.0.9/arch/x86/kernel/irq.c	2015-04-13 11:21:01.850617461 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/irq.c	2015-04-15 12:13:52.926318621 +0200
@@ -22,7 +22,7 @@
 #define CREATE_TRACE_POINTS
 #include <asm/trace/irq_vectors.h>
 
-atomic_t irq_err_count;
+atomic_unchecked_t irq_err_count;
 
 /* Function pointer for generic interrupt vector handling */
 void (*x86_platform_ipi_callback)(void) = NULL;
@@ -132,9 +132,9 @@ int arch_show_interrupts(struct seq_file
 		seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count);
 	seq_puts(p, "  Hypervisor callback interrupts\n");
 #endif
-	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
+	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read_unchecked(&irq_err_count));
 #if defined(CONFIG_X86_IO_APIC)
-	seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
+	seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read_unchecked(&irq_mis_count));
 #endif
 	return 0;
 }
@@ -174,7 +174,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 
 u64 arch_irq_stat(void)
 {
-	u64 sum = atomic_read(&irq_err_count);
+	u64 sum = atomic_read_unchecked(&irq_err_count);
 	return sum;
 }
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/jump_label.c linux-4.0.9-pax/arch/x86/kernel/jump_label.c
--- linux-4.0.9/arch/x86/kernel/jump_label.c	2015-03-18 15:21:50.268349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/jump_label.c	2015-04-15 12:13:52.926318621 +0200
@@ -51,7 +51,7 @@ static void __jump_label_transform(struc
 			 * Jump label is enabled for the first time.
 			 * So we expect a default_nop...
 			 */
-			if (unlikely(memcmp((void *)entry->code, default_nop, 5)
+			if (unlikely(memcmp((void *)ktla_ktva(entry->code), default_nop, 5)
 				     != 0))
 				bug_at((void *)entry->code, __LINE__);
 		} else {
@@ -59,7 +59,7 @@ static void __jump_label_transform(struc
 			 * ...otherwise expect an ideal_nop. Otherwise
 			 * something went horribly wrong.
 			 */
-			if (unlikely(memcmp((void *)entry->code, ideal_nop, 5)
+			if (unlikely(memcmp((void *)ktla_ktva(entry->code), ideal_nop, 5)
 				     != 0))
 				bug_at((void *)entry->code, __LINE__);
 		}
@@ -75,13 +75,13 @@ static void __jump_label_transform(struc
 		 * are converting the default nop to the ideal nop.
 		 */
 		if (init) {
-			if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
+			if (unlikely(memcmp((void *)ktla_ktva(entry->code), default_nop, 5) != 0))
 				bug_at((void *)entry->code, __LINE__);
 		} else {
 			code.jump = 0xe9;
 			code.offset = entry->target -
 				(entry->code + JUMP_LABEL_NOP_SIZE);
-			if (unlikely(memcmp((void *)entry->code, &code, 5) != 0))
+			if (unlikely(memcmp((void *)ktla_ktva(entry->code), &code, 5) != 0))
 				bug_at((void *)entry->code, __LINE__);
 		}
 		memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/kgdb.c linux-4.0.9-pax/arch/x86/kernel/kgdb.c
--- linux-4.0.9/arch/x86/kernel/kgdb.c	2015-04-13 11:21:01.850617461 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/kgdb.c	2015-04-15 12:13:52.926318621 +0200
@@ -126,11 +126,11 @@ char *dbg_get_reg(int regno, void *mem,
 #ifdef CONFIG_X86_32
 	switch (regno) {
 	case GDB_SS:
-		if (!user_mode_vm(regs))
+		if (!user_mode(regs))
 			*(unsigned long *)mem = __KERNEL_DS;
 		break;
 	case GDB_SP:
-		if (!user_mode_vm(regs))
+		if (!user_mode(regs))
 			*(unsigned long *)mem = kernel_stack_pointer(regs);
 		break;
 	case GDB_GS:
@@ -228,7 +228,10 @@ static void kgdb_correct_hw_break(void)
 		bp->attr.bp_addr = breakinfo[breakno].addr;
 		bp->attr.bp_len = breakinfo[breakno].len;
 		bp->attr.bp_type = breakinfo[breakno].type;
-		info->address = breakinfo[breakno].addr;
+		if (breakinfo[breakno].type == X86_BREAKPOINT_EXECUTE)
+			info->address = ktla_ktva(breakinfo[breakno].addr);
+		else
+			info->address = breakinfo[breakno].addr;
 		info->len = breakinfo[breakno].len;
 		info->type = breakinfo[breakno].type;
 		val = arch_install_hw_breakpoint(bp);
@@ -475,12 +478,12 @@ int kgdb_arch_handle_exception(int e_vec
 	case 'k':
 		/* clear the trace bit */
 		linux_regs->flags &= ~X86_EFLAGS_TF;
-		atomic_set(&kgdb_cpu_doing_single_step, -1);
+		atomic_set_unchecked(&kgdb_cpu_doing_single_step, -1);
 
 		/* set the trace bit if we're stepping */
 		if (remcomInBuffer[0] == 's') {
 			linux_regs->flags |= X86_EFLAGS_TF;
-			atomic_set(&kgdb_cpu_doing_single_step,
+			atomic_set_unchecked(&kgdb_cpu_doing_single_step,
 				   raw_smp_processor_id());
 		}
 
@@ -545,7 +548,7 @@ static int __kgdb_notify(struct die_args
 
 	switch (cmd) {
 	case DIE_DEBUG:
-		if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
+		if (atomic_read_unchecked(&kgdb_cpu_doing_single_step) != -1) {
 			if (user_mode(regs))
 				return single_step_cont(regs, args);
 			break;
@@ -750,11 +753,11 @@ int kgdb_arch_set_breakpoint(struct kgdb
 #endif /* CONFIG_DEBUG_RODATA */
 
 	bpt->type = BP_BREAKPOINT;
-	err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+	err = probe_kernel_read(bpt->saved_instr, ktla_ktva((char *)bpt->bpt_addr),
 				BREAK_INSTR_SIZE);
 	if (err)
 		return err;
-	err = probe_kernel_write((char *)bpt->bpt_addr,
+	err = probe_kernel_write(ktla_ktva((char *)bpt->bpt_addr),
 				 arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
 #ifdef CONFIG_DEBUG_RODATA
 	if (!err)
@@ -767,7 +770,7 @@ int kgdb_arch_set_breakpoint(struct kgdb
 		return -EBUSY;
 	text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
 		  BREAK_INSTR_SIZE);
-	err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
+	err = probe_kernel_read(opc, ktla_ktva((char *)bpt->bpt_addr), BREAK_INSTR_SIZE);
 	if (err)
 		return err;
 	if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
@@ -792,13 +795,13 @@ int kgdb_arch_remove_breakpoint(struct k
 	if (mutex_is_locked(&text_mutex))
 		goto knl_write;
 	text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE);
-	err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
+	err = probe_kernel_read(opc, ktla_ktva((char *)bpt->bpt_addr), BREAK_INSTR_SIZE);
 	if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
 		goto knl_write;
 	return err;
 knl_write:
 #endif /* CONFIG_DEBUG_RODATA */
-	return probe_kernel_write((char *)bpt->bpt_addr,
+	return probe_kernel_write(ktla_ktva((char *)bpt->bpt_addr),
 				  (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
 }
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/kprobes/core.c linux-4.0.9-pax/arch/x86/kernel/kprobes/core.c
--- linux-4.0.9/arch/x86/kernel/kprobes/core.c	2015-06-29 23:02:28.014445600 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/kprobes/core.c	2015-06-29 23:03:38.902445442 +0200
@@ -120,9 +120,12 @@ __synthesize_relative_insn(void *from, v
 		s32 raddr;
 	} __packed *insn;
 
-	insn = (struct __arch_relative_insn *)from;
+	insn = (struct __arch_relative_insn *)ktla_ktva(from);
+
+	pax_open_kernel();
 	insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
 	insn->op = op;
+	pax_close_kernel();
 }
 
 /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
@@ -168,7 +171,7 @@ int can_boost(kprobe_opcode_t *opcodes)
 	kprobe_opcode_t opcode;
 	kprobe_opcode_t *orig_opcodes = opcodes;
 
-	if (search_exception_tables((unsigned long)opcodes))
+	if (search_exception_tables(ktva_ktla((unsigned long)opcodes)))
 		return 0;	/* Page fault may occur on this address. */
 
 retry:
@@ -260,12 +263,12 @@ __recover_probed_insn(kprobe_opcode_t *b
 	 * Fortunately, we know that the original code is the ideal 5-byte
 	 * long NOP.
 	 */
-	memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+	memcpy(buf, (void *)ktla_ktva(addr), MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
 	if (faddr)
 		memcpy(buf, ideal_nops[NOP_ATOMIC5], 5);
 	else
 		buf[0] = kp->opcode;
-	return (unsigned long)buf;
+	return ktva_ktla((unsigned long)buf);
 }
 
 /*
@@ -367,7 +370,9 @@ int __copy_instruction(u8 *dest, u8 *src
 	/* Another subsystem puts a breakpoint, failed to recover */
 	if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
 		return 0;
+	pax_open_kernel();
 	memcpy(dest, insn.kaddr, length);
+	pax_close_kernel();
 
 #ifdef CONFIG_X86_64
 	if (insn_rip_relative(&insn)) {
@@ -394,7 +399,9 @@ int __copy_instruction(u8 *dest, u8 *src
 			return 0;
 		}
 		disp = (u8 *) dest + insn_offset_displacement(&insn);
+		pax_open_kernel();
 		*(s32 *) disp = (s32) newdisp;
+		pax_close_kernel();
 	}
 #endif
 	return length;
@@ -536,7 +543,7 @@ static void setup_singlestep(struct kpro
 		 * nor set current_kprobe, because it doesn't use single
 		 * stepping.
 		 */
-		regs->ip = (unsigned long)p->ainsn.insn;
+		regs->ip = ktva_ktla((unsigned long)p->ainsn.insn);
 		preempt_enable_no_resched();
 		return;
 	}
@@ -553,9 +560,9 @@ static void setup_singlestep(struct kpro
 	regs->flags &= ~X86_EFLAGS_IF;
 	/* single step inline if the instruction is an int3 */
 	if (p->opcode == BREAKPOINT_INSTRUCTION)
-		regs->ip = (unsigned long)p->addr;
+		regs->ip = ktla_ktva((unsigned long)p->addr);
 	else
-		regs->ip = (unsigned long)p->ainsn.insn;
+		regs->ip = ktva_ktla((unsigned long)p->ainsn.insn);
 }
 NOKPROBE_SYMBOL(setup_singlestep);
 
@@ -605,7 +612,7 @@ int kprobe_int3_handler(struct pt_regs *
 	struct kprobe *p;
 	struct kprobe_ctlblk *kcb;
 
-	if (user_mode_vm(regs))
+	if (user_mode(regs))
 		return 0;
 
 	addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
@@ -640,7 +647,7 @@ int kprobe_int3_handler(struct pt_regs *
 				setup_singlestep(p, regs, kcb, 0);
 			return 1;
 		}
-	} else if (*addr != BREAKPOINT_INSTRUCTION) {
+	} else if (*(kprobe_opcode_t *)ktla_ktva((unsigned long)addr) != BREAKPOINT_INSTRUCTION) {
 		/*
 		 * The breakpoint instruction was removed right
 		 * after we hit it.  Another cpu has removed
@@ -687,6 +694,9 @@ static void __used kretprobe_trampoline_
 			"	movq %rax, 152(%rsp)\n"
 			RESTORE_REGS_STRING
 			"	popfq\n"
+#ifdef KERNEXEC_PLUGIN
+			"	btsq $63,(%rsp)\n"
+#endif
 #else
 			"	pushf\n"
 			SAVE_REGS_STRING
@@ -827,7 +837,7 @@ static void resume_execution(struct kpro
 			     struct kprobe_ctlblk *kcb)
 {
 	unsigned long *tos = stack_addr(regs);
-	unsigned long copy_ip = (unsigned long)p->ainsn.insn;
+	unsigned long copy_ip = ktva_ktla((unsigned long)p->ainsn.insn);
 	unsigned long orig_ip = (unsigned long)p->addr;
 	kprobe_opcode_t *insn = p->ainsn.insn;
 
@@ -1010,7 +1020,7 @@ int kprobe_exceptions_notify(struct noti
 	struct die_args *args = data;
 	int ret = NOTIFY_DONE;
 
-	if (args->regs && user_mode_vm(args->regs))
+	if (args->regs && user_mode(args->regs))
 		return ret;
 
 	if (val == DIE_GPF) {
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/kprobes/opt.c linux-4.0.9-pax/arch/x86/kernel/kprobes/opt.c
--- linux-4.0.9/arch/x86/kernel/kprobes/opt.c	2015-04-13 11:21:01.850617461 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/kprobes/opt.c	2015-04-15 12:13:52.926318621 +0200
@@ -79,6 +79,7 @@ found:
 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
 {
+	pax_open_kernel();
 #ifdef CONFIG_X86_64
 	*addr++ = 0x48;
 	*addr++ = 0xbf;
@@ -86,6 +87,7 @@ static void synthesize_set_arg1(kprobe_o
 	*addr++ = 0xb8;
 #endif
 	*(unsigned long *)addr = val;
+	pax_close_kernel();
 }
 
 asm (
@@ -342,7 +344,7 @@ int arch_prepare_optimized_kprobe(struct
 	 * Verify if the address gap is in 2GB range, because this uses
 	 * a relative jump.
 	 */
-	rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
+	rel = (long)op->optinsn.insn - ktla_ktva((long)op->kp.addr) + RELATIVEJUMP_SIZE;
 	if (abs(rel) > 0x7fffffff) {
 		__arch_remove_optimized_kprobe(op, 0);
 		return -ERANGE;
@@ -359,16 +361,18 @@ int arch_prepare_optimized_kprobe(struct
 	op->optinsn.size = ret;
 
 	/* Copy arch-dep-instance from template */
-	memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
+	pax_open_kernel();
+	memcpy(buf, ktla_ktva(&optprobe_template_entry), TMPL_END_IDX);
+	pax_close_kernel();
 
 	/* Set probe information */
 	synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
 
 	/* Set probe function call */
-	synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
+	synthesize_relcall(ktva_ktla(buf) + TMPL_CALL_IDX, optimized_callback);
 
 	/* Set returning jmp instruction at the tail of out-of-line buffer */
-	synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
+	synthesize_reljump(ktva_ktla(buf) + TMPL_END_IDX + op->optinsn.size,
 			   (u8 *)op->kp.addr + op->optinsn.size);
 
 	flush_icache_range((unsigned long) buf,
@@ -393,7 +397,7 @@ void arch_optimize_kprobes(struct list_h
 		WARN_ON(kprobe_disabled(&op->kp));
 
 		/* Backup instructions which will be replaced by jump address */
-		memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
+		memcpy(op->optinsn.copied_insn, ktla_ktva(op->kp.addr) + INT3_SIZE,
 		       RELATIVE_ADDR_SIZE);
 
 		insn_buf[0] = RELATIVEJUMP_OPCODE;
@@ -441,7 +445,7 @@ int setup_detour_execution(struct kprobe
 		/* This kprobe is really able to run optimized path. */
 		op = container_of(p, struct optimized_kprobe, kp);
 		/* Detour through copied instructions */
-		regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
+		regs->ip = ktva_ktla((unsigned long)op->optinsn.insn) + TMPL_END_IDX;
 		if (!reenter)
 			reset_current_kprobe();
 		preempt_enable_no_resched();
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/ksysfs.c linux-4.0.9-pax/arch/x86/kernel/ksysfs.c
--- linux-4.0.9/arch/x86/kernel/ksysfs.c	2015-03-18 15:21:50.272349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/ksysfs.c	2015-04-15 12:13:52.926318621 +0200
@@ -184,7 +184,7 @@ out:
 
 static struct kobj_attribute type_attr = __ATTR_RO(type);
 
-static struct bin_attribute data_attr = {
+static bin_attribute_no_const data_attr __read_only = {
 	.attr = {
 		.name = "data",
 		.mode = S_IRUGO,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/ldt.c linux-4.0.9-pax/arch/x86/kernel/ldt.c
--- linux-4.0.9/arch/x86/kernel/ldt.c	2015-03-18 15:21:50.272349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/ldt.c	2015-06-28 00:12:23.510445227 +0200
@@ -66,13 +66,13 @@ static int alloc_ldt(mm_context_t *pc, i
 	if (reload) {
 #ifdef CONFIG_SMP
 		preempt_disable();
-		load_LDT(pc);
+		load_LDT_nolock(pc);
 		if (!cpumask_equal(mm_cpumask(current->mm),
 				   cpumask_of(smp_processor_id())))
 			smp_call_function(flush_ldt, current->mm, 1);
 		preempt_enable();
 #else
-		load_LDT(pc);
+		load_LDT_nolock(pc);
 #endif
 	}
 	if (oldsize) {
@@ -94,7 +94,7 @@ static inline int copy_ldt(mm_context_t
 		return err;
 
 	for (i = 0; i < old->size; i++)
-		write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
+		write_ldt_entry(new->ldt, i, old->ldt + i);
 	return 0;
 }
 
@@ -115,6 +115,24 @@ int init_new_context(struct task_struct
 		retval = copy_ldt(&mm->context, &old_mm->context);
 		mutex_unlock(&old_mm->context.lock);
 	}
+
+	if (tsk == current) {
+		mm->context.vdso = 0;
+
+#ifdef CONFIG_X86_32
+#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)
+		mm->context.user_cs_base = 0UL;
+		mm->context.user_cs_limit = ~0UL;
+
+#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP)
+		cpumask_clear(&mm->context.cpu_user_cs_mask);
+#endif
+
+#endif
+#endif
+
+	}
+
 	return retval;
 }
 
@@ -229,6 +247,13 @@ static int write_ldt(void __user *ptr, u
 		}
 	}
 
+#ifdef CONFIG_PAX_SEGMEXEC
+	if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (ldt_info.contents & MODIFY_LDT_CONTENTS_CODE)) {
+		error = -EINVAL;
+		goto out_unlock;
+	}
+#endif
+
 	if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
 		error = -EINVAL;
 		goto out_unlock;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/livepatch.c linux-4.0.9-pax/arch/x86/kernel/livepatch.c
--- linux-4.0.9/arch/x86/kernel/livepatch.c	2015-04-13 11:21:01.854617461 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/livepatch.c	2015-04-15 12:13:52.926318621 +0200
@@ -41,9 +41,10 @@ int klp_write_module_reloc(struct module
 	int ret, numpages, size = 4;
 	bool readonly;
 	unsigned long val;
-	unsigned long core = (unsigned long)mod->module_core;
-	unsigned long core_ro_size = mod->core_ro_size;
-	unsigned long core_size = mod->core_size;
+	unsigned long core_rx = (unsigned long)mod->module_core_rx;
+	unsigned long core_rw = (unsigned long)mod->module_core_rw;
+	unsigned long core_size_rx = mod->core_size_rx;
+	unsigned long core_size_rw = mod->core_size_rw;
 
 	switch (type) {
 	case R_X86_64_NONE:
@@ -66,11 +67,12 @@ int klp_write_module_reloc(struct module
 		return -EINVAL;
 	}
 
-	if (loc < core || loc >= core + core_size)
+	if ((loc < core_rx || loc >= core_rx + core_size_rx) &&
+	    (loc < core_rw || loc >= core_rw + core_size_rw))
 		/* loc does not point to any symbol inside the module */
 		return -EINVAL;
 
-	if (loc < core + core_ro_size)
+	if (loc < core_rx + core_size_rx)
 		readonly = true;
 	else
 		readonly = false;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/machine_kexec_32.c linux-4.0.9-pax/arch/x86/kernel/machine_kexec_32.c
--- linux-4.0.9/arch/x86/kernel/machine_kexec_32.c	2015-03-18 15:21:50.272349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/machine_kexec_32.c	2015-04-15 12:13:52.926318621 +0200
@@ -26,7 +26,7 @@
 #include <asm/cacheflush.h>
 #include <asm/debugreg.h>
 
-static void set_idt(void *newidt, __u16 limit)
+static void set_idt(struct desc_struct *newidt, __u16 limit)
 {
 	struct desc_ptr curidt;
 
@@ -38,7 +38,7 @@ static void set_idt(void *newidt, __u16
 }
 
 
-static void set_gdt(void *newgdt, __u16 limit)
+static void set_gdt(struct desc_struct *newgdt, __u16 limit)
 {
 	struct desc_ptr curgdt;
 
@@ -216,7 +216,7 @@ void machine_kexec(struct kimage *image)
 	}
 
 	control_page = page_address(image->control_code_page);
-	memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
+	memcpy(control_page, (void *)ktla_ktva((unsigned long)relocate_kernel), KEXEC_CONTROL_CODE_MAX_SIZE);
 
 	relocate_kernel_ptr = control_page;
 	page_list[PA_CONTROL_PAGE] = __pa(control_page);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/Makefile linux-4.0.9-pax/arch/x86/kernel/Makefile
--- linux-4.0.9/arch/x86/kernel/Makefile	2015-04-13 11:21:01.738617467 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/Makefile	2015-04-15 12:13:52.926318621 +0200
@@ -28,7 +28,7 @@ obj-y			+= time.o ioport.o ldt.o dumpsta
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-y			+= probe_roms.o
-obj-$(CONFIG_X86_32)	+= i386_ksyms_32.o
+obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= mcount_64.o
 obj-y			+= syscall_$(BITS).o vsyscall_gtod.o
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/mcount_64.S linux-4.0.9-pax/arch/x86/kernel/mcount_64.S
--- linux-4.0.9/arch/x86/kernel/mcount_64.S	2015-03-18 15:21:50.272349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/mcount_64.S	2015-04-15 12:13:52.926318621 +0200
@@ -7,7 +7,7 @@
 #include <linux/linkage.h>
 #include <asm/ptrace.h>
 #include <asm/ftrace.h>
-
+#include <asm/alternative-asm.h>
 
 	.code64
 	.section .entry.text, "ax"
@@ -148,8 +148,9 @@
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 ENTRY(function_hook)
+	pax_force_retaddr
 	retq
-END(function_hook)
+ENDPROC(function_hook)
 
 ENTRY(ftrace_caller)
 	/* save_mcount_regs fills in first two parameters */
@@ -181,8 +182,9 @@ GLOBAL(ftrace_graph_call)
 #endif
 
 GLOBAL(ftrace_stub)
+	pax_force_retaddr
 	retq
-END(ftrace_caller)
+ENDPROC(ftrace_caller)
 
 ENTRY(ftrace_regs_caller)
 	/* Save the current flags before any operations that can change them */
@@ -253,7 +255,7 @@ GLOBAL(ftrace_regs_caller_end)
 
 	jmp ftrace_return
 
-END(ftrace_regs_caller)
+ENDPROC(ftrace_regs_caller)
 
 
 #else /* ! CONFIG_DYNAMIC_FTRACE */
@@ -272,18 +274,20 @@ fgraph_trace:
 #endif
 
 GLOBAL(ftrace_stub)
+	pax_force_retaddr
 	retq
 
 trace:
 	/* save_mcount_regs fills in first two parameters */
 	save_mcount_regs
 
+	pax_force_fptr ftrace_trace_function
 	call   *ftrace_trace_function
 
 	restore_mcount_regs
 
 	jmp fgraph_trace
-END(function_hook)
+ENDPROC(function_hook)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 #endif /* CONFIG_FUNCTION_TRACER */
 
@@ -305,8 +309,9 @@ ENTRY(ftrace_graph_caller)
 
 	restore_mcount_regs
 
+	pax_force_retaddr
 	retq
-END(ftrace_graph_caller)
+ENDPROC(ftrace_graph_caller)
 
 GLOBAL(return_to_handler)
 	subq  $24, %rsp
@@ -322,5 +327,7 @@ GLOBAL(return_to_handler)
 	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
 	addq $24, %rsp
+	pax_force_fptr %rdi
 	jmp *%rdi
+ENDPROC(return_to_handler)
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/module.c linux-4.0.9-pax/arch/x86/kernel/module.c
--- linux-4.0.9/arch/x86/kernel/module.c	2015-04-13 11:21:01.854617461 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/module.c	2015-04-15 12:13:52.926318621 +0200
@@ -82,17 +82,17 @@ static unsigned long int get_module_load
 }
 #endif
 
-void *module_alloc(unsigned long size)
+static inline void *__module_alloc(unsigned long size, pgprot_t prot)
 {
 	void *p;
 
-	if (PAGE_ALIGN(size) > MODULES_LEN)
+	if (!size || PAGE_ALIGN(size) > MODULES_LEN)
 		return NULL;
 
 	p = __vmalloc_node_range(size, MODULE_ALIGN,
 				    MODULES_VADDR + get_module_load_offset(),
-				    MODULES_END, GFP_KERNEL | __GFP_HIGHMEM,
-				    PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+				    MODULES_END, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+				    prot, 0, NUMA_NO_NODE,
 				    __builtin_return_address(0));
 	if (p && (kasan_module_alloc(p, size) < 0)) {
 		vfree(p);
@@ -102,6 +102,51 @@ void *module_alloc(unsigned long size)
 	return p;
 }
 
+void *module_alloc(unsigned long size)
+{
+
+#ifdef CONFIG_PAX_KERNEXEC
+	return __module_alloc(size, PAGE_KERNEL);
+#else
+	return __module_alloc(size, PAGE_KERNEL_EXEC);
+#endif
+
+}
+
+#ifdef CONFIG_PAX_KERNEXEC
+#ifdef CONFIG_X86_32
+void *module_alloc_exec(unsigned long size)
+{
+	struct vm_struct *area;
+
+	if (size == 0)
+		return NULL;
+
+	area = __get_vm_area(size, VM_ALLOC, (unsigned long)&MODULES_EXEC_VADDR, (unsigned long)&MODULES_EXEC_END);
+return area ? area->addr : NULL;
+}
+EXPORT_SYMBOL(module_alloc_exec);
+
+void module_memfree_exec(void *module_region)
+{
+	vunmap(module_region);
+}
+EXPORT_SYMBOL(module_memfree_exec);
+#else
+void module_memfree_exec(void *module_region)
+{
+	module_memfree(module_region);
+}
+EXPORT_SYMBOL(module_memfree_exec);
+
+void *module_alloc_exec(unsigned long size)
+{
+	return __module_alloc(size, PAGE_KERNEL_RX);
+}
+EXPORT_SYMBOL(module_alloc_exec);
+#endif
+#endif
+
 #ifdef CONFIG_X86_32
 int apply_relocate(Elf32_Shdr *sechdrs,
 		   const char *strtab,
@@ -112,14 +157,16 @@ int apply_relocate(Elf32_Shdr *sechdrs,
 	unsigned int i;
 	Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr;
 	Elf32_Sym *sym;
-	uint32_t *location;
+	uint32_t *plocation, location;
 
 	DEBUGP("Applying relocate section %u to %u\n",
 	       relsec, sechdrs[relsec].sh_info);
 	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
 		/* This is where to make the change */
-		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
-			+ rel[i].r_offset;
+		plocation = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset;
+		location = (uint32_t)plocation;
+		if (sechdrs[sechdrs[relsec].sh_info].sh_flags & SHF_EXECINSTR)
+			plocation = ktla_ktva((void *)plocation);
 		/* This is the symbol it is referring to.  Note that all
 		   undefined symbols have been resolved.  */
 		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
@@ -128,11 +175,15 @@ int apply_relocate(Elf32_Shdr *sechdrs,
 		switch (ELF32_R_TYPE(rel[i].r_info)) {
 		case R_386_32:
 			/* We add the value into the location given */
-			*location += sym->st_value;
+			pax_open_kernel();
+			*plocation += sym->st_value;
+			pax_close_kernel();
 			break;
 		case R_386_PC32:
 			/* Add the value, subtract its position */
-			*location += sym->st_value - (uint32_t)location;
+			pax_open_kernel();
+			*plocation += sym->st_value - location;
+			pax_close_kernel();
 			break;
 		default:
 			pr_err("%s: Unknown relocation: %u\n",
@@ -177,21 +228,30 @@ int apply_relocate_add(Elf64_Shdr *sechd
 		case R_X86_64_NONE:
 			break;
 		case R_X86_64_64:
+			pax_open_kernel();
 			*(u64 *)loc = val;
+			pax_close_kernel();
 			break;
 		case R_X86_64_32:
+			pax_open_kernel();
 			*(u32 *)loc = val;
+			pax_close_kernel();
 			if (val != *(u32 *)loc)
 				goto overflow;
 			break;
 		case R_X86_64_32S:
+			pax_open_kernel();
 			*(s32 *)loc = val;
+			pax_close_kernel();
 			if ((s64)val != *(s32 *)loc)
 				goto overflow;
 			break;
 		case R_X86_64_PC32:
 			val -= (u64)loc;
+			pax_open_kernel();
 			*(u32 *)loc = val;
+			pax_close_kernel();
+
 #if 0
 			if ((s64)val != *(s32 *)loc)
 				goto overflow;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/msr.c linux-4.0.9-pax/arch/x86/kernel/msr.c
--- linux-4.0.9/arch/x86/kernel/msr.c	2015-03-18 15:21:50.272349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/msr.c	2015-04-15 12:13:52.926318621 +0200
@@ -235,7 +235,7 @@ static int msr_class_cpu_callback(struct
 	return notifier_from_errno(err);
 }
 
-static struct notifier_block __refdata msr_class_cpu_notifier = {
+static struct notifier_block msr_class_cpu_notifier = {
 	.notifier_call = msr_class_cpu_callback,
 };
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/nmi.c linux-4.0.9-pax/arch/x86/kernel/nmi.c
--- linux-4.0.9/arch/x86/kernel/nmi.c	2015-03-18 15:21:50.272349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/nmi.c	2015-04-15 12:13:52.926318621 +0200
@@ -98,16 +98,16 @@ fs_initcall(nmi_warning_debugfs);
 
 static void nmi_max_handler(struct irq_work *w)
 {
-	struct nmiaction *a = container_of(w, struct nmiaction, irq_work);
+	struct nmiwork *n = container_of(w, struct nmiwork, irq_work);
 	int remainder_ns, decimal_msecs;
-	u64 whole_msecs = ACCESS_ONCE(a->max_duration);
+	u64 whole_msecs = ACCESS_ONCE(n->max_duration);
 
 	remainder_ns = do_div(whole_msecs, (1000 * 1000));
 	decimal_msecs = remainder_ns / 1000;
 
 	printk_ratelimited(KERN_INFO
 		"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
-		a->handler, whole_msecs, decimal_msecs);
+		n->action->handler, whole_msecs, decimal_msecs);
 }
 
 static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
@@ -134,11 +134,11 @@ static int nmi_handle(unsigned int type,
 		delta = sched_clock() - delta;
 		trace_nmi_handler(a->handler, (int)delta, thishandled);
 
-		if (delta < nmi_longest_ns || delta < a->max_duration)
+		if (delta < nmi_longest_ns || delta < a->work->max_duration)
 			continue;
 
-		a->max_duration = delta;
-		irq_work_queue(&a->irq_work);
+		a->work->max_duration = delta;
+		irq_work_queue(&a->work->irq_work);
 	}
 
 	rcu_read_unlock();
@@ -148,7 +148,7 @@ static int nmi_handle(unsigned int type,
 }
 NOKPROBE_SYMBOL(nmi_handle);
 
-int __register_nmi_handler(unsigned int type, struct nmiaction *action)
+int __register_nmi_handler(unsigned int type, const struct nmiaction *action)
 {
 	struct nmi_desc *desc = nmi_to_desc(type);
 	unsigned long flags;
@@ -156,7 +156,8 @@ int __register_nmi_handler(unsigned int
 	if (!action->handler)
 		return -EINVAL;
 
-	init_irq_work(&action->irq_work, nmi_max_handler);
+	action->work->action = action;
+	init_irq_work(&action->work->irq_work, nmi_max_handler);
 
 	spin_lock_irqsave(&desc->lock, flags);
 
@@ -174,9 +175,9 @@ int __register_nmi_handler(unsigned int
 	 * event confuses some handlers (kdump uses this flag)
 	 */
 	if (action->flags & NMI_FLAG_FIRST)
-		list_add_rcu(&action->list, &desc->head);
+		pax_list_add_rcu((struct list_head *)&action->list, &desc->head);
 	else
-		list_add_tail_rcu(&action->list, &desc->head);
+		pax_list_add_tail_rcu((struct list_head *)&action->list, &desc->head);
 	
 	spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
@@ -199,7 +200,7 @@ void unregister_nmi_handler(unsigned int
 		if (!strcmp(n->name, name)) {
 			WARN(in_nmi(),
 				"Trying to free NMI (%s) from NMI context!\n", n->name);
-			list_del_rcu(&n->list);
+			pax_list_del_rcu((struct list_head *)&n->list);
 			break;
 		}
 	}
@@ -528,6 +529,17 @@ static inline void nmi_nesting_postproce
 dotraplinkage notrace void
 do_nmi(struct pt_regs *regs, long error_code)
 {
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+	if (!user_mode(regs)) {
+		unsigned long cs = regs->cs & 0xFFFF;
+		unsigned long ip = ktva_ktla(regs->ip);
+
+		if ((cs == __KERNEL_CS || cs == __KERNEXEC_KERNEL_CS) && ip <= (unsigned long)_etext)
+			regs->ip = ip;
+	}
+#endif
+
 	nmi_nesting_preprocess(regs);
 
 	nmi_enter();
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/nmi_selftest.c linux-4.0.9-pax/arch/x86/kernel/nmi_selftest.c
--- linux-4.0.9/arch/x86/kernel/nmi_selftest.c	2015-03-18 15:21:50.272349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/nmi_selftest.c	2015-04-15 12:13:52.926318621 +0200
@@ -43,7 +43,7 @@ static void __init init_nmi_testsuite(vo
 {
 	/* trap all the unknown NMIs we may generate */
 	register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk",
-			__initdata);
+			__initconst);
 }
 
 static void __init cleanup_nmi_testsuite(void)
@@ -66,7 +66,7 @@ static void __init test_nmi_ipi(struct c
 	unsigned long timeout;
 
 	if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
-				 NMI_FLAG_FIRST, "nmi_selftest", __initdata)) {
+				 NMI_FLAG_FIRST, "nmi_selftest", __initconst)) {
 		nmi_fail = FAILURE;
 		return;
 	}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/paravirt.c linux-4.0.9-pax/arch/x86/kernel/paravirt.c
--- linux-4.0.9/arch/x86/kernel/paravirt.c	2015-03-18 15:21:50.272349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/paravirt.c	2015-04-15 12:13:52.926318621 +0200
@@ -56,6 +56,9 @@ u64 _paravirt_ident_64(u64 x)
 {
 	return x;
 }
+#if defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE)
+PV_CALLEE_SAVE_REGS_THUNK(_paravirt_ident_64);
+#endif
 
 void __init default_banner(void)
 {
@@ -142,16 +145,20 @@ unsigned paravirt_patch_default(u8 type,
 
 	if (opfunc == NULL)
 		/* If there's no function, patch it with a ud2a (BUG) */
-		ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
-	else if (opfunc == _paravirt_nop)
+		ret = paravirt_patch_insns(insnbuf, len, ktva_ktla(ud2a), ud2a+sizeof(ud2a));
+	else if (opfunc == (void *)_paravirt_nop)
 		/* If the operation is a nop, then nop the callsite */
 		ret = paravirt_patch_nop();
 
 	/* identity functions just return their single argument */
-	else if (opfunc == _paravirt_ident_32)
+	else if (opfunc == (void *)_paravirt_ident_32)
 		ret = paravirt_patch_ident_32(insnbuf, len);
-	else if (opfunc == _paravirt_ident_64)
+	else if (opfunc == (void *)_paravirt_ident_64)
+		ret = paravirt_patch_ident_64(insnbuf, len);
+#if defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE)
+	else if (opfunc == (void *)__raw_callee_save__paravirt_ident_64)
 		ret = paravirt_patch_ident_64(insnbuf, len);
+#endif
 
 	else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
 		 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
@@ -176,7 +183,7 @@ unsigned paravirt_patch_insns(void *insn
 	if (insn_len > len || start == NULL)
 		insn_len = len;
 	else
-		memcpy(insnbuf, start, insn_len);
+		memcpy(insnbuf, ktla_ktva(start), insn_len);
 
 	return insn_len;
 }
@@ -300,7 +307,7 @@ enum paravirt_lazy_mode paravirt_get_laz
 	return this_cpu_read(paravirt_lazy_mode);
 }
 
-struct pv_info pv_info = {
+struct pv_info pv_info __read_only = {
 	.name = "bare hardware",
 	.paravirt_enabled = 0,
 	.kernel_rpl = 0,
@@ -311,16 +318,16 @@ struct pv_info pv_info = {
 #endif
 };
 
-struct pv_init_ops pv_init_ops = {
+struct pv_init_ops pv_init_ops __read_only = {
 	.patch = native_patch,
 };
 
-struct pv_time_ops pv_time_ops = {
+struct pv_time_ops pv_time_ops __read_only = {
 	.sched_clock = native_sched_clock,
 	.steal_clock = native_steal_clock,
 };
 
-__visible struct pv_irq_ops pv_irq_ops = {
+__visible struct pv_irq_ops pv_irq_ops __read_only = {
 	.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
 	.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
 	.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
@@ -332,7 +339,7 @@ __visible struct pv_irq_ops pv_irq_ops =
 #endif
 };
 
-__visible struct pv_cpu_ops pv_cpu_ops = {
+__visible struct pv_cpu_ops pv_cpu_ops __read_only = {
 	.cpuid = native_cpuid,
 	.get_debugreg = native_get_debugreg,
 	.set_debugreg = native_set_debugreg,
@@ -395,21 +402,26 @@ NOKPROBE_SYMBOL(native_get_debugreg);
 NOKPROBE_SYMBOL(native_set_debugreg);
 NOKPROBE_SYMBOL(native_load_idt);
 
-struct pv_apic_ops pv_apic_ops = {
+struct pv_apic_ops pv_apic_ops __read_only= {
 #ifdef CONFIG_X86_LOCAL_APIC
 	.startup_ipi_hook = paravirt_nop,
 #endif
 };
 
-#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_PAE
+/* 64-bit pagetable entries */
+#define PTE_IDENT	PV_CALLEE_SAVE(_paravirt_ident_64)
+#else
 /* 32-bit pagetable entries */
 #define PTE_IDENT	__PV_IS_CALLEE_SAVE(_paravirt_ident_32)
+#endif
 #else
 /* 64-bit pagetable entries */
 #define PTE_IDENT	__PV_IS_CALLEE_SAVE(_paravirt_ident_64)
 #endif
 
-struct pv_mmu_ops pv_mmu_ops = {
+struct pv_mmu_ops pv_mmu_ops __read_only = {
 
 	.read_cr2 = native_read_cr2,
 	.write_cr2 = native_write_cr2,
@@ -459,6 +471,7 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.make_pud = PTE_IDENT,
 
 	.set_pgd = native_set_pgd,
+	.set_pgd_batched = native_set_pgd_batched,
 #endif
 #endif /* PAGETABLE_LEVELS >= 3 */
 
@@ -479,6 +492,12 @@ struct pv_mmu_ops pv_mmu_ops = {
 	},
 
 	.set_fixmap = native_set_fixmap,
+
+#ifdef CONFIG_PAX_KERNEXEC
+	.pax_open_kernel = native_pax_open_kernel,
+	.pax_close_kernel = native_pax_close_kernel,
+#endif
+
 };
 
 EXPORT_SYMBOL_GPL(pv_time_ops);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/paravirt_patch_64.c linux-4.0.9-pax/arch/x86/kernel/paravirt_patch_64.c
--- linux-4.0.9/arch/x86/kernel/paravirt_patch_64.c	2015-03-18 15:21:50.272349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/paravirt_patch_64.c	2015-04-15 12:13:52.926318621 +0200
@@ -9,7 +9,11 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq;
 DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
+
+#ifndef CONFIG_PAX_MEMORY_UDEREF
 DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
+#endif
+
 DEF_NATIVE(pv_cpu_ops, clts, "clts");
 DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
 
@@ -57,7 +61,11 @@ unsigned native_patch(u8 type, u16 clobb
 		PATCH_SITE(pv_mmu_ops, read_cr3);
 		PATCH_SITE(pv_mmu_ops, write_cr3);
 		PATCH_SITE(pv_cpu_ops, clts);
+
+#ifndef CONFIG_PAX_MEMORY_UDEREF
 		PATCH_SITE(pv_mmu_ops, flush_tlb_single);
+#endif
+
 		PATCH_SITE(pv_cpu_ops, wbinvd);
 
 	patch_site:
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/paravirt-spinlocks.c linux-4.0.9-pax/arch/x86/kernel/paravirt-spinlocks.c
--- linux-4.0.9/arch/x86/kernel/paravirt-spinlocks.c	2015-03-18 15:21:50.272349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/paravirt-spinlocks.c	2015-04-15 12:13:52.926318621 +0200
@@ -8,7 +8,7 @@
 
 #include <asm/paravirt.h>
 
-struct pv_lock_ops pv_lock_ops = {
+struct pv_lock_ops pv_lock_ops __read_only = {
 #ifdef CONFIG_SMP
 	.lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
 	.unlock_kick = paravirt_nop,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/pci-calgary_64.c linux-4.0.9-pax/arch/x86/kernel/pci-calgary_64.c
--- linux-4.0.9/arch/x86/kernel/pci-calgary_64.c	2015-03-18 15:21:50.272349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/pci-calgary_64.c	2015-04-15 12:13:52.926318621 +0200
@@ -1347,7 +1347,7 @@ static void __init get_tce_space_from_ta
 			tce_space = be64_to_cpu(readq(target));
 			tce_space = tce_space & TAR_SW_BITS;
 
-			tce_space = tce_space & (~specified_table_size);
+			tce_space = tce_space & (~(unsigned long)specified_table_size);
 			info->tce_space = (u64 *)__va(tce_space);
 		}
 	}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/pci-iommu_table.c linux-4.0.9-pax/arch/x86/kernel/pci-iommu_table.c
--- linux-4.0.9/arch/x86/kernel/pci-iommu_table.c	2015-03-18 15:21:50.272349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/pci-iommu_table.c	2015-04-15 12:13:52.926318621 +0200
@@ -2,7 +2,7 @@
 #include <asm/iommu_table.h>
 #include <linux/string.h>
 #include <linux/kallsyms.h>
-
+#include <linux/sched.h>
 
 #define DEBUG 1
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/pci-swiotlb.c linux-4.0.9-pax/arch/x86/kernel/pci-swiotlb.c
--- linux-4.0.9/arch/x86/kernel/pci-swiotlb.c	2015-03-18 15:21:50.272349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/pci-swiotlb.c	2015-04-15 12:13:52.926318621 +0200
@@ -33,7 +33,7 @@ void x86_swiotlb_free_coherent(struct de
 				      struct dma_attrs *attrs)
 {
 	if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
-		swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+		swiotlb_free_coherent(dev, size, vaddr, dma_addr, attrs);
 	else
 		dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/process_32.c linux-4.0.9-pax/arch/x86/kernel/process_32.c
--- linux-4.0.9/arch/x86/kernel/process_32.c	2015-04-13 11:21:01.854617461 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/process_32.c	2015-04-15 12:13:52.926318621 +0200
@@ -64,6 +64,7 @@ asmlinkage void ret_from_kernel_thread(v
 unsigned long thread_saved_pc(struct task_struct *tsk)
 {
 	return ((unsigned long *)tsk->thread.sp)[3];
+//XXX	return tsk->thread.eip;
 }
 
 void __show_regs(struct pt_regs *regs, int all)
@@ -73,19 +74,18 @@ void __show_regs(struct pt_regs *regs, i
 	unsigned long sp;
 	unsigned short ss, gs;
 
-	if (user_mode_vm(regs)) {
+	if (user_mode(regs)) {
 		sp = regs->sp;
 		ss = regs->ss & 0xffff;
-		gs = get_user_gs(regs);
 	} else {
 		sp = kernel_stack_pointer(regs);
 		savesegment(ss, ss);
-		savesegment(gs, gs);
 	}
+	gs = get_user_gs(regs);
 
 	printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
 			(u16)regs->cs, regs->ip, regs->flags,
-			smp_processor_id());
+			raw_smp_processor_id());
 	print_symbol("EIP is at %s\n", regs->ip);
 
 	printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
@@ -132,21 +132,22 @@ void release_thread(struct task_struct *
 int copy_thread(unsigned long clone_flags, unsigned long sp,
 	unsigned long arg, struct task_struct *p)
 {
-	struct pt_regs *childregs = task_pt_regs(p);
+	struct pt_regs *childregs = task_stack_page(p) + THREAD_SIZE - sizeof(struct pt_regs) - 8;
 	struct task_struct *tsk;
 	int err;
 
 	p->thread.sp = (unsigned long) childregs;
 	p->thread.sp0 = (unsigned long) (childregs+1);
+	p->tinfo.lowest_stack = (unsigned long)task_stack_page(p) + 2 * sizeof(unsigned long);
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 	if (unlikely(p->flags & PF_KTHREAD)) {
 		/* kernel thread */
 		memset(childregs, 0, sizeof(struct pt_regs));
 		p->thread.ip = (unsigned long) ret_from_kernel_thread;
-		task_user_gs(p) = __KERNEL_STACK_CANARY;
-		childregs->ds = __USER_DS;
-		childregs->es = __USER_DS;
+		savesegment(gs, childregs->gs);
+		childregs->ds = __KERNEL_DS;
+		childregs->es = __KERNEL_DS;
 		childregs->fs = __KERNEL_PERCPU;
 		childregs->bx = sp;	/* function */
 		childregs->bp = arg;
@@ -248,7 +249,7 @@ __switch_to(struct task_struct *prev_p,
 	struct thread_struct *prev = &prev_p->thread,
 				 *next = &next_p->thread;
 	int cpu = smp_processor_id();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct tss_struct *tss = init_tss + cpu;
 	fpu_switch_t fpu;
 
 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
@@ -272,6 +273,10 @@ __switch_to(struct task_struct *prev_p,
 	 */
 	lazy_save_gs(prev->gs);
 
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	__set_fs(task_thread_info(next_p)->addr_limit);
+#endif
+
 	/*
 	 * Load the per-thread Thread-Local Storage descriptor.
 	 */
@@ -310,9 +315,9 @@ __switch_to(struct task_struct *prev_p,
 	 */
 	arch_end_context_switch(next_p);
 
-	this_cpu_write(kernel_stack,
-		  (unsigned long)task_stack_page(next_p) +
-		  THREAD_SIZE - KERNEL_STACK_OFFSET);
+	this_cpu_write(current_task, next_p);
+	this_cpu_write(current_tinfo, &next_p->tinfo);
+	this_cpu_write(kernel_stack, next->sp0);
 
 	/*
 	 * Restore %gs if needed (which is common)
@@ -322,8 +327,6 @@ __switch_to(struct task_struct *prev_p,
 
 	switch_fpu_finish(next_p, fpu);
 
-	this_cpu_write(current_task, next_p);
-
 	return prev_p;
 }
 
@@ -353,4 +356,3 @@ unsigned long get_wchan(struct task_stru
 	} while (count++ < 16);
 	return 0;
 }
-
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/process_64.c linux-4.0.9-pax/arch/x86/kernel/process_64.c
--- linux-4.0.9/arch/x86/kernel/process_64.c	2015-04-13 11:21:01.854617461 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/process_64.c	2015-04-15 12:13:52.930318621 +0200
@@ -158,10 +158,11 @@ int copy_thread(unsigned long clone_flag
 	struct pt_regs *childregs;
 	struct task_struct *me = current;
 
-	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE - 16;
 	childregs = task_pt_regs(p);
 	p->thread.sp = (unsigned long) childregs;
 	p->thread.usersp = me->thread.usersp;
+	p->tinfo.lowest_stack = (unsigned long)task_stack_page(p) + 2 * sizeof(unsigned long);
 	set_tsk_thread_flag(p, TIF_FORK);
 	p->thread.io_bitmap_ptr = NULL;
 
@@ -171,6 +172,8 @@ int copy_thread(unsigned long clone_flag
 	p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
 	savesegment(es, p->thread.es);
 	savesegment(ds, p->thread.ds);
+	savesegment(ss, p->thread.ss);
+	BUG_ON(p->thread.ss == __UDEREF_KERNEL_DS);
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 	if (unlikely(p->flags & PF_KTHREAD)) {
@@ -277,7 +280,7 @@ __switch_to(struct task_struct *prev_p,
 	struct thread_struct *prev = &prev_p->thread;
 	struct thread_struct *next = &next_p->thread;
 	int cpu = smp_processor_id();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct tss_struct *tss = init_tss + cpu;
 	unsigned fsindex, gsindex;
 	fpu_switch_t fpu;
 
@@ -331,6 +334,10 @@ __switch_to(struct task_struct *prev_p,
 	if (unlikely(next->ds | prev->ds))
 		loadsegment(ds, next->ds);
 
+	savesegment(ss, prev->ss);
+	if (unlikely(next->ss != prev->ss))
+		loadsegment(ss, next->ss);
+
 	/*
 	 * Switch FS and GS.
 	 *
@@ -404,6 +411,7 @@ __switch_to(struct task_struct *prev_p,
 	prev->usersp = this_cpu_read(old_rsp);
 	this_cpu_write(old_rsp, next->usersp);
 	this_cpu_write(current_task, next_p);
+	this_cpu_write(current_tinfo, &next_p->tinfo);
 
 	/*
 	 * If it were not for PREEMPT_ACTIVE we could guarantee that the
@@ -413,9 +421,7 @@ __switch_to(struct task_struct *prev_p,
 	task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
 	this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
 
-	this_cpu_write(kernel_stack,
-		  (unsigned long)task_stack_page(next_p) +
-		  THREAD_SIZE - KERNEL_STACK_OFFSET);
+	this_cpu_write(kernel_stack, next->sp0);
 
 	/*
 	 * Now maybe reload the debug registers and handle I/O bitmaps
@@ -485,12 +491,11 @@ unsigned long get_wchan(struct task_stru
 	if (!p || p == current || p->state == TASK_RUNNING)
 		return 0;
 	stack = (unsigned long)task_stack_page(p);
-	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
+	if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE-16-sizeof(u64))
 		return 0;
 	fp = *(u64 *)(p->thread.sp);
 	do {
-		if (fp < (unsigned long)stack ||
-		    fp >= (unsigned long)stack+THREAD_SIZE)
+		if (fp < stack || fp > stack+THREAD_SIZE-16-sizeof(u64))
 			return 0;
 		ip = *(u64 *)(fp+8);
 		if (!in_sched_functions(ip))
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/process.c linux-4.0.9-pax/arch/x86/kernel/process.c
--- linux-4.0.9/arch/x86/kernel/process.c	2015-05-07 02:10:37.868265495 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/process.c	2015-05-07 02:10:56.292266037 +0200
@@ -38,7 +38,8 @@
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
+struct tss_struct init_tss[NR_CPUS] __visible ____cacheline_internodealigned_in_smp = { [0 ... NR_CPUS-1] = INIT_TSS };
+EXPORT_SYMBOL(init_tss);
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -96,7 +97,7 @@ void arch_task_cache_init(void)
         task_xstate_cachep =
         	kmem_cache_create("task_xstate", xstate_size,
 				  __alignof__(union thread_xstate),
-				  SLAB_PANIC | SLAB_NOTRACK, NULL);
+				  SLAB_PANIC | SLAB_NOTRACK | SLAB_USERCOPY, NULL);
 	setup_xstate_comp();
 }
 
@@ -110,7 +111,7 @@ void exit_thread(void)
 	unsigned long *bp = t->io_bitmap_ptr;
 
 	if (bp) {
-		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
+		struct tss_struct *tss = init_tss + get_cpu();
 
 		t->io_bitmap_ptr = NULL;
 		clear_thread_flag(TIF_IO_BITMAP);
@@ -130,6 +131,9 @@ void flush_thread(void)
 {
 	struct task_struct *tsk = current;
 
+#if defined(CONFIG_X86_32) && !defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_PAX_MEMORY_UDEREF)
+	loadsegment(gs, 0);
+#endif
 	flush_ptrace_hw_breakpoint(tsk);
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
 	drop_init_fpu(tsk);
@@ -276,7 +280,7 @@ static void __exit_idle(void)
 void exit_idle(void)
 {
 	/* idle loop has pid 0 */
-	if (current->pid)
+	if (task_pid_nr(current))
 		return;
 	__exit_idle();
 }
@@ -329,7 +333,7 @@ bool xen_set_default_idle(void)
 	return ret;
 }
 #endif
-void stop_this_cpu(void *dummy)
+__noreturn void stop_this_cpu(void *dummy)
 {
 	local_irq_disable();
 	/*
@@ -508,16 +512,37 @@ static int __init idle_setup(char *str)
 }
 early_param("idle", idle_setup);
 
-unsigned long arch_align_stack(unsigned long sp)
+#ifdef CONFIG_PAX_RANDKSTACK
+void pax_randomize_kstack(struct pt_regs *regs)
 {
-	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() % 8192;
-	return sp & ~0xf;
-}
+	struct thread_struct *thread = &current->thread;
+	unsigned long time;
 
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long range_end = mm->brk + 0x02000000;
-	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
+	if (!randomize_va_space)
+		return;
+
+	if (v8086_mode(regs))
+		return;
+
+	rdtscl(time);
 
+	/* P4 seems to return a 0 LSB, ignore it */
+#ifdef CONFIG_MPENTIUM4
+	time &= 0x3EUL;
+	time <<= 2;
+#elif defined(CONFIG_X86_64)
+	time &= 0xFUL;
+	time <<= 4;
+#else
+	time &= 0x1FUL;
+	time <<= 3;
+#endif
+
+	thread->sp0 ^= time;
+	load_sp0(init_tss + smp_processor_id(), thread);
+
+#ifdef CONFIG_X86_64
+	this_cpu_write(kernel_stack, thread->sp0);
+#endif
+}
+#endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/ptrace.c linux-4.0.9-pax/arch/x86/kernel/ptrace.c
--- linux-4.0.9/arch/x86/kernel/ptrace.c	2015-03-18 15:21:50.276349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/ptrace.c	2015-04-15 12:13:52.930318621 +0200
@@ -186,10 +186,10 @@ unsigned long kernel_stack_pointer(struc
 	unsigned long sp = (unsigned long)&regs->sp;
 	u32 *prev_esp;
 
-	if (context == (sp & ~(THREAD_SIZE - 1)))
+	if (context == ((sp + 8) & ~(THREAD_SIZE - 1)))
 		return sp;
 
-	prev_esp = (u32 *)(context);
+	prev_esp = *(u32 **)(context);
 	if (prev_esp)
 		return (unsigned long)prev_esp;
 
@@ -588,7 +588,7 @@ static void ptrace_triggered(struct perf
 static unsigned long ptrace_get_dr7(struct perf_event *bp[])
 {
 	int i;
-	int dr7 = 0;
+	unsigned long dr7 = 0;
 	struct arch_hw_breakpoint *info;
 
 	for (i = 0; i < HBP_NUM; i++) {
@@ -822,7 +822,7 @@ long arch_ptrace(struct task_struct *chi
 		 unsigned long addr, unsigned long data)
 {
 	int ret;
-	unsigned long __user *datap = (unsigned long __user *)data;
+	unsigned long __user *datap = (__force unsigned long __user *)data;
 
 	switch (request) {
 	/* read the word at location addr in the USER area. */
@@ -907,14 +907,14 @@ long arch_ptrace(struct task_struct *chi
 		if ((int) addr < 0)
 			return -EIO;
 		ret = do_get_thread_area(child, addr,
-					(struct user_desc __user *)data);
+					(__force struct user_desc __user *) data);
 		break;
 
 	case PTRACE_SET_THREAD_AREA:
 		if ((int) addr < 0)
 			return -EIO;
 		ret = do_set_thread_area(child, addr,
-					(struct user_desc __user *)data, 0);
+					(__force struct user_desc __user *) data, 0);
 		break;
 #endif
 
@@ -1292,7 +1292,7 @@ long compat_arch_ptrace(struct task_stru
 
 #ifdef CONFIG_X86_64
 
-static struct user_regset x86_64_regsets[] __read_mostly = {
+static user_regset_no_const x86_64_regsets[] __read_only = {
 	[REGSET_GENERAL] = {
 		.core_note_type = NT_PRSTATUS,
 		.n = sizeof(struct user_regs_struct) / sizeof(long),
@@ -1333,7 +1333,7 @@ static const struct user_regset_view use
 #endif	/* CONFIG_X86_64 */
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
-static struct user_regset x86_32_regsets[] __read_mostly = {
+static user_regset_no_const x86_32_regsets[] __read_only = {
 	[REGSET_GENERAL] = {
 		.core_note_type = NT_PRSTATUS,
 		.n = sizeof(struct user_regs_struct32) / sizeof(u32),
@@ -1386,7 +1386,7 @@ static const struct user_regset_view use
  */
 u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
-void update_regset_xstate_info(unsigned int size, u64 xstate_mask)
+void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
 {
 #ifdef CONFIG_X86_64
 	x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
@@ -1421,7 +1421,7 @@ static void fill_sigtrap_info(struct tas
 	memset(info, 0, sizeof(*info));
 	info->si_signo = SIGTRAP;
 	info->si_code = si_code;
-	info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL;
+	info->si_addr = user_mode(regs) ? (__force void __user *)regs->ip : NULL;
 }
 
 void user_single_step_siginfo(struct task_struct *tsk,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/pvclock.c linux-4.0.9-pax/arch/x86/kernel/pvclock.c
--- linux-4.0.9/arch/x86/kernel/pvclock.c	2015-05-07 02:10:37.888265496 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/pvclock.c	2015-05-07 02:10:56.292266037 +0200
@@ -51,11 +51,11 @@ void pvclock_touch_watchdogs(void)
 	reset_hung_task_detector();
 }
 
-static atomic64_t last_value = ATOMIC64_INIT(0);
+static atomic64_unchecked_t last_value = ATOMIC64_INIT(0);
 
 void pvclock_resume(void)
 {
-	atomic64_set(&last_value, 0);
+	atomic64_set_unchecked(&last_value, 0);
 }
 
 u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
@@ -105,11 +105,11 @@ cycle_t pvclock_clocksource_read(struct
 	 * updating at the same time, and one of them could be slightly behind,
 	 * making the assumption that last_value always go forward fail to hold.
 	 */
-	last = atomic64_read(&last_value);
+	last = atomic64_read_unchecked(&last_value);
 	do {
 		if (ret < last)
 			return last;
-		last = atomic64_cmpxchg(&last_value, last, ret);
+		last = atomic64_cmpxchg_unchecked(&last_value, last, ret);
 	} while (unlikely(last != ret));
 
 	return ret;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/reboot.c linux-4.0.9-pax/arch/x86/kernel/reboot.c
--- linux-4.0.9/arch/x86/kernel/reboot.c	2015-04-13 11:21:01.854617461 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/reboot.c	2015-04-30 00:51:09.180291617 +0200
@@ -70,6 +70,11 @@ static int __init set_bios_reboot(const
 
 void __noreturn machine_real_restart(unsigned int type)
 {
+
+#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF))
+	struct desc_struct *gdt;
+#endif
+
 	local_irq_disable();
 
 	/*
@@ -97,7 +102,29 @@ void __noreturn machine_real_restart(uns
 
 	/* Jump to the identity-mapped low memory code */
 #ifdef CONFIG_X86_32
-	asm volatile("jmpl *%0" : :
+
+#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)
+	gdt = get_cpu_gdt_table(smp_processor_id());
+	pax_open_kernel();
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	gdt[GDT_ENTRY_KERNEL_DS].type = 3;
+	gdt[GDT_ENTRY_KERNEL_DS].limit = 0xf;
+	loadsegment(ds, __KERNEL_DS);
+	loadsegment(es, __KERNEL_DS);
+	loadsegment(ss, __KERNEL_DS);
+#endif
+#ifdef CONFIG_PAX_KERNEXEC
+	gdt[GDT_ENTRY_KERNEL_CS].base0 = 0;
+	gdt[GDT_ENTRY_KERNEL_CS].base1 = 0;
+	gdt[GDT_ENTRY_KERNEL_CS].base2 = 0;
+	gdt[GDT_ENTRY_KERNEL_CS].limit0 = 0xffff;
+	gdt[GDT_ENTRY_KERNEL_CS].limit = 0xf;
+	gdt[GDT_ENTRY_KERNEL_CS].g = 1;
+#endif
+	pax_close_kernel();
+#endif
+
+	asm volatile("ljmpl *%0" : :
 		     "rm" (real_mode_header->machine_real_restart_asm),
 		     "a" (type));
 #else
@@ -137,7 +164,7 @@ static int __init set_kbd_reboot(const s
 /*
  * This is a single dmi_table handling all reboot quirks.
  */
-static struct dmi_system_id __initdata reboot_dmi_table[] = {
+static const struct dmi_system_id __initconst reboot_dmi_table[] = {
 
 	/* Acer */
 	{	/* Handle reboot issue on Acer Aspire one */
@@ -511,7 +538,7 @@ void __attribute__((weak)) mach_reboot_f
  * This means that this function can never return, it can misbehave
  * by not rebooting properly and hanging.
  */
-static void native_machine_emergency_restart(void)
+static void __noreturn native_machine_emergency_restart(void)
 {
 	int i;
 	int attempt = 0;
@@ -631,13 +658,13 @@ void native_machine_shutdown(void)
 #endif
 }
 
-static void __machine_emergency_restart(int emergency)
+static void __noreturn __machine_emergency_restart(int emergency)
 {
 	reboot_emergency = emergency;
 	machine_ops.emergency_restart();
 }
 
-static void native_machine_restart(char *__unused)
+static void __noreturn native_machine_restart(char *__unused)
 {
 	pr_notice("machine restart\n");
 
@@ -646,7 +673,7 @@ static void native_machine_restart(char
 	__machine_emergency_restart(0);
 }
 
-static void native_machine_halt(void)
+static void __noreturn native_machine_halt(void)
 {
 	/* Stop other cpus and apics */
 	machine_shutdown();
@@ -656,7 +683,7 @@ static void native_machine_halt(void)
 	stop_this_cpu(NULL);
 }
 
-static void native_machine_power_off(void)
+static void __noreturn native_machine_power_off(void)
 {
 	if (pm_power_off) {
 		if (!reboot_force)
@@ -665,9 +692,10 @@ static void native_machine_power_off(voi
 	}
 	/* A fallback in case there is no PM info available */
 	tboot_shutdown(TB_SHUTDOWN_HALT);
+	unreachable();
 }
 
-struct machine_ops machine_ops = {
+struct machine_ops machine_ops __read_only = {
 	.power_off = native_machine_power_off,
 	.shutdown = native_machine_shutdown,
 	.emergency_restart = native_machine_emergency_restart,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/reboot_fixups_32.c linux-4.0.9-pax/arch/x86/kernel/reboot_fixups_32.c
--- linux-4.0.9/arch/x86/kernel/reboot_fixups_32.c	2015-03-18 15:21:50.276349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/reboot_fixups_32.c	2015-04-15 12:13:52.930318621 +0200
@@ -57,7 +57,7 @@ struct device_fixup {
 	unsigned int vendor;
 	unsigned int device;
 	void (*reboot_fixup)(struct pci_dev *);
-};
+} __do_const;
 
 /*
  * PCI ids solely used for fixups_table go here
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/relocate_kernel_64.S linux-4.0.9-pax/arch/x86/kernel/relocate_kernel_64.S
--- linux-4.0.9/arch/x86/kernel/relocate_kernel_64.S	2015-03-18 15:21:50.276349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/relocate_kernel_64.S	2015-04-15 12:13:52.930318621 +0200
@@ -96,8 +96,7 @@ relocate_kernel:
 
 	/* jump to identity mapped page */
 	addq	$(identity_mapped - relocate_kernel), %r8
-	pushq	%r8
-	ret
+	jmp	*%r8
 
 identity_mapped:
 	/* set return address to 0 if not preserving context */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/setup.c linux-4.0.9-pax/arch/x86/kernel/setup.c
--- linux-4.0.9/arch/x86/kernel/setup.c	2015-04-13 11:21:01.866617460 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/setup.c	2015-04-15 12:13:52.930318621 +0200
@@ -111,6 +111,7 @@
 #include <asm/mce.h>
 #include <asm/alternative.h>
 #include <asm/prom.h>
+#include <asm/boot.h>
 
 /*
  * max_low_pfn_mapped: highest direct mapped pfn under 4GB
@@ -206,10 +207,12 @@ EXPORT_SYMBOL(boot_cpu_data);
 #endif
 
 
-#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-__visible unsigned long mmu_cr4_features;
+#ifdef CONFIG_X86_64
+__visible unsigned long mmu_cr4_features __read_only = X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE;
+#elif defined(CONFIG_X86_PAE)
+__visible unsigned long mmu_cr4_features __read_only = X86_CR4_PAE;
 #else
-__visible unsigned long mmu_cr4_features = X86_CR4_PAE;
+__visible unsigned long mmu_cr4_features __read_only;
 #endif
 
 /* Boot loader ID and version as integers, for the benefit of proc_dointvec */
@@ -771,7 +774,7 @@ static void __init trim_bios_range(void)
 	 * area (640->1Mb) as ram even though it is not.
 	 * take them out.
 	 */
-	e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
+	e820_remove_range(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_RAM, 1);
 
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 }
@@ -779,7 +782,7 @@ static void __init trim_bios_range(void)
 /* called before trim_bios_range() to spare extra sanitize */
 static void __init e820_add_kernel_range(void)
 {
-	u64 start = __pa_symbol(_text);
+	u64 start = __pa_symbol(ktla_ktva(_text));
 	u64 size = __pa_symbol(_end) - start;
 
 	/*
@@ -855,8 +858,12 @@ dump_kernel_offset(struct notifier_block
 
 void __init setup_arch(char **cmdline_p)
 {
+#ifdef CONFIG_X86_32
+	memblock_reserve(LOAD_PHYSICAL_ADDR, __pa_symbol(__bss_stop) - LOAD_PHYSICAL_ADDR);
+#else
 	memblock_reserve(__pa_symbol(_text),
 			 (unsigned long)__bss_stop - (unsigned long)_text);
+#endif
 
 	early_reserve_initrd();
 
@@ -954,16 +961,16 @@ void __init setup_arch(char **cmdline_p)
 
 	if (!boot_params.hdr.root_flags)
 		root_mountflags &= ~MS_RDONLY;
-	init_mm.start_code = (unsigned long) _text;
-	init_mm.end_code = (unsigned long) _etext;
+	init_mm.start_code = ktla_ktva((unsigned long) _text);
+	init_mm.end_code = ktla_ktva((unsigned long) _etext);
 	init_mm.end_data = (unsigned long) _edata;
 	init_mm.brk = _brk_end;
 
 	mpx_mm_init(&init_mm);
 
-	code_resource.start = __pa_symbol(_text);
-	code_resource.end = __pa_symbol(_etext)-1;
-	data_resource.start = __pa_symbol(_etext);
+	code_resource.start = __pa_symbol(ktla_ktva(_text));
+	code_resource.end = __pa_symbol(ktla_ktva(_etext))-1;
+	data_resource.start = __pa_symbol(_sdata);
 	data_resource.end = __pa_symbol(_edata)-1;
 	bss_resource.start = __pa_symbol(__bss_start);
 	bss_resource.end = __pa_symbol(__bss_stop)-1;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/setup_percpu.c linux-4.0.9-pax/arch/x86/kernel/setup_percpu.c
--- linux-4.0.9/arch/x86/kernel/setup_percpu.c	2015-03-18 15:21:50.276349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/setup_percpu.c	2015-04-15 12:13:52.930318621 +0200
@@ -21,19 +21,17 @@
 #include <asm/cpu.h>
 #include <asm/stackprotector.h>
 
-DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU_READ_MOSTLY(unsigned int, cpu_number);
 EXPORT_PER_CPU_SYMBOL(cpu_number);
+#endif
 
-#ifdef CONFIG_X86_64
 #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
-#else
-#define BOOT_PERCPU_OFFSET 0
-#endif
 
 DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
 EXPORT_PER_CPU_SYMBOL(this_cpu_off);
 
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
+unsigned long __per_cpu_offset[NR_CPUS] __read_only = {
 	[0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
 };
 EXPORT_SYMBOL(__per_cpu_offset);
@@ -66,7 +64,7 @@ static bool __init pcpu_need_numa(void)
 {
 #ifdef CONFIG_NEED_MULTIPLE_NODES
 	pg_data_t *last = NULL;
-	unsigned int cpu;
+	int cpu;
 
 	for_each_possible_cpu(cpu) {
 		int node = early_cpu_to_node(cpu);
@@ -155,10 +153,10 @@ static inline void setup_percpu_segment(
 {
 #ifdef CONFIG_X86_32
 	struct desc_struct gdt;
+	unsigned long base = per_cpu_offset(cpu);
 
-	pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
-			0x2 | DESCTYPE_S, 0x8);
-	gdt.s = 1;
+	pack_descriptor(&gdt, base, (VMALLOC_END - base - 1) >> PAGE_SHIFT,
+			0x83 | DESCTYPE_S, 0xC);
 	write_gdt_entry(get_cpu_gdt_table(cpu),
 			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
 #endif
@@ -219,6 +217,11 @@ void __init setup_per_cpu_areas(void)
 	/* alrighty, percpu areas up and running */
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu) {
+#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_X86_32
+		unsigned long canary = per_cpu(stack_canary.canary, cpu);
+#endif
+#endif
 		per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
 		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 		per_cpu(cpu_number, cpu) = cpu;
@@ -259,6 +262,12 @@ void __init setup_per_cpu_areas(void)
 		 */
 		set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
 #endif
+#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_X86_32
+		if (!cpu)
+			per_cpu(stack_canary.canary, cpu) = canary;
+#endif
+#endif
 		/*
 		 * Up to this point, the boot CPU has been using .init.data
 		 * area.  Reload any changed state for the boot CPU.
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/signal.c linux-4.0.9-pax/arch/x86/kernel/signal.c
--- linux-4.0.9/arch/x86/kernel/signal.c	2015-04-13 11:21:01.866617460 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/signal.c	2015-04-15 12:13:52.930318621 +0200
@@ -190,7 +190,7 @@ static unsigned long align_sigframe(unsi
 	 * Align the stack pointer according to the i386 ABI,
 	 * i.e. so that on function entry ((sp + 4) & 15) == 0.
 	 */
-	sp = ((sp + 4) & -16ul) - 4;
+	sp = ((sp - 12) & -16ul) - 4;
 #else /* !CONFIG_X86_32 */
 	sp = round_down(sp, 16) - 8;
 #endif
@@ -298,10 +298,9 @@ __setup_frame(int sig, struct ksignal *k
 	}
 
 	if (current->mm->context.vdso)
-		restorer = current->mm->context.vdso +
-			selected_vdso32->sym___kernel_sigreturn;
+		restorer = (void __force_user *)(current->mm->context.vdso + selected_vdso32->sym___kernel_sigreturn);
 	else
-		restorer = &frame->retcode;
+		restorer = (void __user *)&frame->retcode;
 	if (ksig->ka.sa.sa_flags & SA_RESTORER)
 		restorer = ksig->ka.sa.sa_restorer;
 
@@ -315,7 +314,7 @@ __setup_frame(int sig, struct ksignal *k
 	 * reasons and because gdb uses it as a signature to notice
 	 * signal handler stack frames.
 	 */
-	err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode);
+	err |= __put_user(*((u64 *)&retcode), (u64 __user *)frame->retcode);
 
 	if (err)
 		return -EFAULT;
@@ -362,8 +361,10 @@ static int __setup_rt_frame(int sig, str
 		save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 
 		/* Set up to return from userspace.  */
-		restorer = current->mm->context.vdso +
-			selected_vdso32->sym___kernel_rt_sigreturn;
+		if (current->mm->context.vdso)
+			restorer = (void __force_user *)(current->mm->context.vdso + selected_vdso32->sym___kernel_rt_sigreturn);
+		else
+			restorer = (void __user *)&frame->retcode;
 		if (ksig->ka.sa.sa_flags & SA_RESTORER)
 			restorer = ksig->ka.sa.sa_restorer;
 		put_user_ex(restorer, &frame->pretcode);
@@ -375,7 +376,7 @@ static int __setup_rt_frame(int sig, str
 		 * reasons and because gdb uses it as a signature to notice
 		 * signal handler stack frames.
 		 */
-		put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
+		put_user_ex(*((u64 *)&rt_retcode), (u64 __user *)frame->retcode);
 	} put_user_catch(err);
 	
 	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/smpboot.c linux-4.0.9-pax/arch/x86/kernel/smpboot.c
--- linux-4.0.9/arch/x86/kernel/smpboot.c	2015-04-13 11:21:01.866617460 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/smpboot.c	2015-04-15 12:13:52.930318621 +0200
@@ -229,14 +229,17 @@ static void notrace start_secondary(void
 
 	enable_start_cpu0 = 0;
 
-#ifdef CONFIG_X86_32
+	/* otherwise gcc will move up smp_processor_id before the cpu_init */
+	barrier();
+
 	/* switch away from the initial page table */
+#ifdef CONFIG_PAX_PER_CPU_PGD
+	load_cr3(get_cpu_pgd(smp_processor_id(), kernel));
+#else
 	load_cr3(swapper_pg_dir);
-	__flush_tlb_all();
 #endif
+	__flush_tlb_all();
 
-	/* otherwise gcc will move up smp_processor_id before the cpu_init */
-	barrier();
 	/*
 	 * Check TSC synchronization with the BP:
 	 */
@@ -800,8 +803,9 @@ static int do_boot_cpu(int apicid, int c
 	alternatives_enable_smp();
 
 	idle->thread.sp = (unsigned long) (((struct pt_regs *)
-			  (THREAD_SIZE +  task_stack_page(idle))) - 1);
+			  (THREAD_SIZE - 16 + task_stack_page(idle))) - 1);
 	per_cpu(current_task, cpu) = idle;
+	per_cpu(current_tinfo, cpu) = &idle->tinfo;
 
 #ifdef CONFIG_X86_32
 	/* Stack for startup_32 can be just as for start_secondary onwards */
@@ -810,10 +814,10 @@ static int do_boot_cpu(int apicid, int c
 	clear_tsk_thread_flag(idle, TIF_FORK);
 	initial_gs = per_cpu_offset(cpu);
 #endif
-	per_cpu(kernel_stack, cpu) =
-		(unsigned long)task_stack_page(idle) -
-		KERNEL_STACK_OFFSET + THREAD_SIZE;
+	per_cpu(kernel_stack, cpu) = (unsigned long)task_stack_page(idle) - 16 + THREAD_SIZE;
+	pax_open_kernel();
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+	pax_close_kernel();
 	initial_code = (unsigned long)start_secondary;
 	stack_start  = idle->thread.sp;
 
@@ -953,6 +957,15 @@ int native_cpu_up(unsigned int cpu, stru
 	/* the FPU context is blank, nobody can own it */
 	__cpu_disable_lazy_restore(cpu);
 
+#ifdef CONFIG_PAX_PER_CPU_PGD
+	clone_pgd_range(get_cpu_pgd(cpu, kernel) + KERNEL_PGD_BOUNDARY,
+			swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+			KERNEL_PGD_PTRS);
+	clone_pgd_range(get_cpu_pgd(cpu, user) + KERNEL_PGD_BOUNDARY,
+			swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+			KERNEL_PGD_PTRS);
+#endif
+
 	err = do_boot_cpu(apicid, cpu, tidle);
 	if (err) {
 		pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/smp.c linux-4.0.9-pax/arch/x86/kernel/smp.c
--- linux-4.0.9/arch/x86/kernel/smp.c	2015-03-18 15:21:50.276349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/smp.c	2015-04-15 12:13:52.930318621 +0200
@@ -341,7 +341,7 @@ static int __init nonmi_ipi_setup(char *
 
 __setup("nonmi_ipi", nonmi_ipi_setup);
 
-struct smp_ops smp_ops = {
+struct smp_ops smp_ops __read_only = {
 	.smp_prepare_boot_cpu	= native_smp_prepare_boot_cpu,
 	.smp_prepare_cpus	= native_smp_prepare_cpus,
 	.smp_cpus_done		= native_smp_cpus_done,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/step.c linux-4.0.9-pax/arch/x86/kernel/step.c
--- linux-4.0.9/arch/x86/kernel/step.c	2015-03-18 15:21:50.276349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/step.c	2015-04-15 12:13:52.930318621 +0200
@@ -27,10 +27,10 @@ unsigned long convert_ip_to_linear(struc
 		struct desc_struct *desc;
 		unsigned long base;
 
-		seg &= ~7UL;
+		seg >>= 3;
 
 		mutex_lock(&child->mm->context.lock);
-		if (unlikely((seg >> 3) >= child->mm->context.size))
+		if (unlikely(seg >= child->mm->context.size))
 			addr = -1L; /* bogus selector, access would fault */
 		else {
 			desc = child->mm->context.ldt + seg;
@@ -42,7 +42,8 @@ unsigned long convert_ip_to_linear(struc
 			addr += base;
 		}
 		mutex_unlock(&child->mm->context.lock);
-	}
+	} else if (seg == __KERNEL_CS || seg == __KERNEXEC_KERNEL_CS)
+		addr = ktla_ktva(addr);
 
 	return addr;
 }
@@ -53,6 +54,9 @@ static int is_setting_trap_flag(struct t
 	unsigned char opcode[15];
 	unsigned long addr = convert_ip_to_linear(child, regs);
 
+	if (addr == -EINVAL)
+		return 0;
+
 	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
 	for (i = 0; i < copied; i++) {
 		switch (opcode[i]) {
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/sys_i386_32.c linux-4.0.9-pax/arch/x86/kernel/sys_i386_32.c
--- linux-4.0.9/arch/x86/kernel/sys_i386_32.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/sys_i386_32.c	2015-04-15 12:13:52.930318621 +0200
@@ -0,0 +1,184 @@
+/*
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/i386
+ * platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <linux/ipc.h>
+#include <linux/elf.h>
+
+#include <linux/uaccess.h>
+#include <linux/unistd.h>
+
+#include <asm/syscalls.h>
+
+int i386_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
+{
+	unsigned long pax_task_size = TASK_SIZE;
+
+#ifdef CONFIG_PAX_SEGMEXEC
+	if (current->mm->pax_flags & MF_PAX_SEGMEXEC)
+		pax_task_size = SEGMEXEC_TASK_SIZE;
+#endif
+
+	if (flags & MAP_FIXED)
+		if (len > pax_task_size || addr > pax_task_size - len)
+			return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
+ */
+static unsigned long get_align_mask(void)
+{
+	if (va_align.flags < 0 || !(va_align.flags & ALIGN_VA_32))
+		return 0;
+
+	if (!(current->flags & PF_RANDOMIZE))
+		return 0;
+
+	return va_align.mask;
+}
+
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long pax_task_size = TASK_SIZE;
+	struct vm_unmapped_area_info info;
+
+#ifdef CONFIG_PAX_SEGMEXEC
+	if (mm->pax_flags & MF_PAX_SEGMEXEC)
+		pax_task_size = SEGMEXEC_TASK_SIZE;
+#endif
+
+	pax_task_size -= PAGE_SIZE;
+
+	if (len > pax_task_size)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED)
+		return addr;
+
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		if (pax_task_size - len >= addr) {
+			vma = find_vma(mm, addr);
+			if (check_heap_stack_gap(vma, addr, len))
+				return addr;
+		}
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.align_mask = filp ? get_align_mask() : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+
+#ifdef CONFIG_PAX_PAGEEXEC
+	if (!(__supported_pte_mask & _PAGE_NX) && (mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE)) {
+		info.low_limit = 0x00110000UL;
+		info.high_limit = mm->start_code;
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (mm->pax_flags & MF_PAX_RANDMMAP)
+			info.low_limit += mm->delta_mmap & 0x03FFF000UL;
+#endif
+
+		if (info.low_limit < info.high_limit) {
+			addr = vm_unmapped_area(&info);
+			if (!IS_ERR_VALUE(addr))
+				return addr;
+		}
+	} else
+#endif
+
+	info.low_limit = mm->mmap_base;
+	info.high_limit = pax_task_size;
+
+	return vm_unmapped_area(&info);
+}
+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+			  const unsigned long len, const unsigned long pgoff,
+			  const unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = addr0, pax_task_size = TASK_SIZE;
+	struct vm_unmapped_area_info info;
+
+#ifdef CONFIG_PAX_SEGMEXEC
+	if (mm->pax_flags & MF_PAX_SEGMEXEC)
+		pax_task_size = SEGMEXEC_TASK_SIZE;
+#endif
+
+	pax_task_size -= PAGE_SIZE;
+
+	/* requested length too big for entire address space */
+	if (len > pax_task_size)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED)
+		return addr;
+
+#ifdef CONFIG_PAX_PAGEEXEC
+	if (!(__supported_pte_mask & _PAGE_NX) && (mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE))
+		goto bottomup;
+#endif
+
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
+	/* requesting a specific address */
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		if (pax_task_size - len >= addr) {
+			vma = find_vma(mm, addr);
+			if (check_heap_stack_gap(vma, addr, len))
+				return addr;
+		}
+	}
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = PAGE_SIZE;
+	info.high_limit = mm->mmap_base;
+	info.align_mask = filp ? get_align_mask() : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	addr = vm_unmapped_area(&info);
+	if (!(addr & ~PAGE_MASK))
+		return addr;
+	VM_BUG_ON(addr != -ENOMEM);
+
+bottomup:
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/sys_x86_64.c linux-4.0.9-pax/arch/x86/kernel/sys_x86_64.c
--- linux-4.0.9/arch/x86/kernel/sys_x86_64.c	2015-03-18 15:21:50.276349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/sys_x86_64.c	2015-04-15 12:13:52.930318621 +0200
@@ -81,8 +81,8 @@ out:
 	return error;
 }
 
-static void find_start_end(unsigned long flags, unsigned long *begin,
-			   unsigned long *end)
+static void find_start_end(struct mm_struct *mm, unsigned long flags,
+			   unsigned long *begin, unsigned long *end)
 {
 	if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) {
 		unsigned long new_begin;
@@ -101,7 +101,7 @@ static void find_start_end(unsigned long
 				*begin = new_begin;
 		}
 	} else {
-		*begin = current->mm->mmap_legacy_base;
+		*begin = mm->mmap_legacy_base;
 		*end = TASK_SIZE;
 	}
 }
@@ -118,16 +118,19 @@ arch_get_unmapped_area(struct file *filp
 	if (flags & MAP_FIXED)
 		return addr;
 
-	find_start_end(flags, &begin, &end);
+	find_start_end(mm, flags, &begin, &end);
 
 	if (len > end)
 		return -ENOMEM;
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	if (addr) {
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
-		if (end - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		if (end - len >= addr && check_heap_stack_gap(vma, addr, len))
 			return addr;
 	}
 
@@ -161,12 +164,15 @@ arch_get_unmapped_area_topdown(struct fi
 	if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
 		goto bottomup;
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	/* requesting a specific address */
 	if (addr) {
 		addr = PAGE_ALIGN(addr);
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-				(!vma || addr + len <= vma->vm_start))
+		if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len))
 			return addr;
 	}
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/tboot.c linux-4.0.9-pax/arch/x86/kernel/tboot.c
--- linux-4.0.9/arch/x86/kernel/tboot.c	2015-03-18 15:21:50.280349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/tboot.c	2015-05-07 03:15:47.776380453 +0200
@@ -44,6 +44,7 @@
 #include <asm/setup.h>
 #include <asm/e820.h>
 #include <asm/io.h>
+#include <asm/tlbflush.h>
 
 #include "../realmode/rm/wakeup.h"
 
@@ -221,7 +222,7 @@ static int tboot_setup_sleep(void)
 
 void tboot_shutdown(u32 shutdown_type)
 {
-	void (*shutdown)(void);
+	void (* __noreturn shutdown)(void);
 
 	if (!tboot_enabled())
 		return;
@@ -242,8 +243,9 @@ void tboot_shutdown(u32 shutdown_type)
 	tboot->shutdown_type = shutdown_type;
 
 	switch_to_tboot_pt();
+	cr4_clear_bits(X86_CR4_PCIDE);
 
-	shutdown = (void(*)(void))(unsigned long)tboot->shutdown_entry;
+	shutdown = (void *)(unsigned long)tboot->shutdown_entry;
 	shutdown();
 
 	/* should not reach here */
@@ -310,7 +312,7 @@ static int tboot_extended_sleep(u8 sleep
 	return -ENODEV;
 }
 
-static atomic_t ap_wfs_count;
+static atomic_unchecked_t ap_wfs_count;
 
 static int tboot_wait_for_aps(int num_aps)
 {
@@ -334,9 +336,9 @@ static int tboot_cpu_callback(struct not
 {
 	switch (action) {
 	case CPU_DYING:
-		atomic_inc(&ap_wfs_count);
+		atomic_inc_unchecked(&ap_wfs_count);
 		if (num_online_cpus() == 1)
-			if (tboot_wait_for_aps(atomic_read(&ap_wfs_count)))
+			if (tboot_wait_for_aps(atomic_read_unchecked(&ap_wfs_count)))
 				return NOTIFY_BAD;
 		break;
 	}
@@ -422,7 +424,7 @@ static __init int tboot_late_init(void)
 
 	tboot_create_trampoline();
 
-	atomic_set(&ap_wfs_count, 0);
+	atomic_set_unchecked(&ap_wfs_count, 0);
 	register_hotcpu_notifier(&tboot_cpu_notifier);
 
 #ifdef CONFIG_DEBUG_FS
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/time.c linux-4.0.9-pax/arch/x86/kernel/time.c
--- linux-4.0.9/arch/x86/kernel/time.c	2015-03-18 15:21:50.280349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/time.c	2015-04-15 12:13:52.930318621 +0200
@@ -30,9 +30,9 @@ unsigned long profile_pc(struct pt_regs
 {
 	unsigned long pc = instruction_pointer(regs);
 
-	if (!user_mode_vm(regs) && in_lock_functions(pc)) {
+	if (!user_mode(regs) && in_lock_functions(pc)) {
 #ifdef CONFIG_FRAME_POINTER
-		return *(unsigned long *)(regs->bp + sizeof(long));
+		return ktla_ktva(*(unsigned long *)(regs->bp + sizeof(long)));
 #else
 		unsigned long *sp =
 			(unsigned long *)kernel_stack_pointer(regs);
@@ -41,11 +41,17 @@ unsigned long profile_pc(struct pt_regs
 		 * or above a saved flags. Eflags has bits 22-31 zero,
 		 * kernel addresses don't.
 		 */
+
+#ifdef CONFIG_PAX_KERNEXEC
+		return ktla_ktva(sp[0]);
+#else
 		if (sp[0] >> 22)
 			return sp[0];
 		if (sp[1] >> 22)
 			return sp[1];
 #endif
+
+#endif
 	}
 	return pc;
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/tls.c linux-4.0.9-pax/arch/x86/kernel/tls.c
--- linux-4.0.9/arch/x86/kernel/tls.c	2015-03-18 15:21:50.280349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/tls.c	2015-04-15 12:13:52.930318621 +0200
@@ -139,6 +139,11 @@ int do_set_thread_area(struct task_struc
 	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
+#ifdef CONFIG_PAX_SEGMEXEC
+	if ((p->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE))
+		return -EINVAL;
+#endif
+
 	set_tls_desc(p, idx, &info, 1);
 
 	return 0;
@@ -256,7 +261,7 @@ int regset_tls_set(struct task_struct *t
 
 	if (kbuf)
 		info = kbuf;
-	else if (__copy_from_user(infobuf, ubuf, count))
+	else if (count > sizeof infobuf || __copy_from_user(infobuf, ubuf, count))
 		return -EFAULT;
 	else
 		info = infobuf;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/tracepoint.c linux-4.0.9-pax/arch/x86/kernel/tracepoint.c
--- linux-4.0.9/arch/x86/kernel/tracepoint.c	2015-03-18 15:21:50.280349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/tracepoint.c	2015-04-15 12:13:52.930318621 +0200
@@ -9,11 +9,11 @@
 #include <linux/atomic.h>
 
 atomic_t trace_idt_ctr = ATOMIC_INIT(0);
-struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
+const struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
 				(unsigned long) trace_idt_table };
 
 /* No need to be aligned, but done to keep all IDTs defined the same way. */
-gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
+gate_desc trace_idt_table[NR_VECTORS] __page_aligned_rodata;
 
 static int trace_irq_vector_refcount;
 static DEFINE_MUTEX(irq_vector_mutex);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/traps.c linux-4.0.9-pax/arch/x86/kernel/traps.c
--- linux-4.0.9/arch/x86/kernel/traps.c	2015-04-13 11:21:01.866617460 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/traps.c	2015-05-12 23:32:52.845260031 +0200
@@ -68,7 +68,7 @@
 #include <asm/proto.h>
 
 /* No need to be aligned, but done to keep all IDTs defined the same way. */
-gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss;
+gate_desc debug_idt_table[NR_VECTORS] __page_aligned_rodata;
 #else
 #include <asm/processor-flags.h>
 #include <asm/setup.h>
@@ -77,7 +77,7 @@ asmlinkage int system_call(void);
 #endif
 
 /* Must be page-aligned because the real IDT is used in a fixmap. */
-gate_desc idt_table[NR_VECTORS] __page_aligned_bss;
+gate_desc idt_table[NR_VECTORS] __page_aligned_rodata;
 
 DECLARE_BITMAP(used_vectors, NR_VECTORS);
 EXPORT_SYMBOL_GPL(used_vectors);
@@ -112,7 +112,7 @@ enum ctx_state ist_enter(struct pt_regs
 {
 	enum ctx_state prev_state;
 
-	if (user_mode_vm(regs)) {
+	if (user_mode(regs)) {
 		/* Other than that, we're just an exception. */
 		prev_state = exception_enter();
 	} else {
@@ -146,7 +146,7 @@ void ist_exit(struct pt_regs *regs, enum
 	/* Must be before exception_exit. */
 	preempt_count_sub(HARDIRQ_OFFSET);
 
-	if (user_mode_vm(regs))
+	if (user_mode(regs))
 		return exception_exit(prev_state);
 	else
 		rcu_nmi_exit();
@@ -158,7 +158,7 @@ void ist_exit(struct pt_regs *regs, enum
  *
  * IST exception handlers normally cannot schedule.  As a special
  * exception, if the exception interrupted userspace code (i.e.
- * user_mode_vm(regs) would return true) and the exception was not
+ * user_mode(regs) would return true) and the exception was not
  * a double fault, it can be safe to schedule.  ist_begin_non_atomic()
  * begins a non-atomic section within an ist_enter()/ist_exit() region.
  * Callers are responsible for enabling interrupts themselves inside
@@ -167,7 +167,7 @@ void ist_exit(struct pt_regs *regs, enum
  */
 void ist_begin_non_atomic(struct pt_regs *regs)
 {
-	BUG_ON(!user_mode_vm(regs));
+	BUG_ON(!user_mode(regs));
 
 	/*
 	 * Sanity check: we need to be on the normal thread stack.  This
@@ -191,11 +191,11 @@ void ist_end_non_atomic(void)
 }
 
 static nokprobe_inline int
-do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
+do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str,
 		  struct pt_regs *regs,	long error_code)
 {
 #ifdef CONFIG_X86_32
-	if (regs->flags & X86_VM_MASK) {
+	if (v8086_mode(regs)) {
 		/*
 		 * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
 		 * On nmi (interrupt 2), do_trap should not be called.
@@ -208,12 +208,24 @@ do_trap_no_signal(struct task_struct *ts
 		return -1;
 	}
 #endif
-	if (!user_mode(regs)) {
+	if (!user_mode_novm(regs)) {
 		if (!fixup_exception(regs)) {
 			tsk->thread.error_code = error_code;
 			tsk->thread.trap_nr = trapnr;
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+			if (trapnr == X86_TRAP_SS && ((regs->cs & 0xFFFF) == __KERNEL_CS || (regs->cs & 0xFFFF) == __KERNEXEC_KERNEL_CS))
+				str = "PAX: suspicious stack segment fault";
+#endif
+
 			die(str, regs, error_code);
 		}
+
+#ifdef CONFIG_PAX_REFCOUNT
+		if (trapnr == X86_TRAP_OF)
+			pax_report_refcount_overflow(regs);
+#endif
+
 		return 0;
 	}
 
@@ -252,7 +264,7 @@ static siginfo_t *fill_trap_info(struct
 }
 
 static void
-do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
+do_trap(int trapnr, int signr, const char *str, struct pt_regs *regs,
 	long error_code, siginfo_t *info)
 {
 	struct task_struct *tsk = current;
@@ -276,7 +288,7 @@ do_trap(int trapnr, int signr, char *str
 	if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
 	    printk_ratelimit()) {
 		pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
-			tsk->comm, tsk->pid, str,
+			tsk->comm, task_pid_nr(tsk), str,
 			regs->ip, regs->sp, error_code);
 		print_vma_addr(" in ", regs->ip);
 		pr_cont("\n");
@@ -384,7 +396,7 @@ dotraplinkage void do_bounds(struct pt_r
 		goto exit;
 	conditional_sti(regs);
 
-	if (!user_mode_vm(regs))
+	if (!user_mode(regs))
 		die("bounds", regs, error_code);
 
 	if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
@@ -463,7 +475,7 @@ do_general_protection(struct pt_regs *re
 	conditional_sti(regs);
 
 #ifdef CONFIG_X86_32
-	if (regs->flags & X86_VM_MASK) {
+	if (v8086_mode(regs)) {
 		local_irq_enable();
 		handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
 		goto exit;
@@ -471,18 +483,42 @@ do_general_protection(struct pt_regs *re
 #endif
 
 	tsk = current;
-	if (!user_mode(regs)) {
+	if (!user_mode_novm(regs)) {
 		if (fixup_exception(regs))
 			goto exit;
 
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_nr = X86_TRAP_GP;
 		if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
-			       X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
+			       X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) {
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+		if ((regs->cs & 0xFFFF) == __KERNEL_CS || (regs->cs & 0xFFFF) == __KERNEXEC_KERNEL_CS)
+			die("PAX: suspicious general protection fault", regs, error_code);
+		else
+#endif
+
 			die("general protection fault", regs, error_code);
+		}
 		goto exit;
 	}
 
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC)
+	if (!(__supported_pte_mask & _PAGE_NX) && tsk->mm && (tsk->mm->pax_flags & MF_PAX_PAGEEXEC)) {
+		struct mm_struct *mm = tsk->mm;
+		unsigned long limit;
+
+		down_write(&mm->mmap_sem);
+		limit = mm->context.user_cs_limit;
+		if (limit < TASK_SIZE) {
+			track_exec_limit(mm, limit, TASK_SIZE, VM_EXEC);
+			up_write(&mm->mmap_sem);
+			return;
+		}
+		up_write(&mm->mmap_sem);
+	}
+#endif
+
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_nr = X86_TRAP_GP;
 
@@ -581,13 +617,16 @@ struct bad_iret_stack *fixup_bad_iret(st
 		container_of(task_pt_regs(current),
 			     struct bad_iret_stack, regs);
 
+	if ((current->thread.sp0 ^ (unsigned long)s) < THREAD_SIZE)
+		new_stack = s;
+
 	/* Copy the IRET target to the new stack. */
 	memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
 
 	/* Copy the remainder of the stack from the current stack. */
 	memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
 
-	BUG_ON(!user_mode_vm(&new_stack->regs));
+	BUG_ON(!user_mode(&new_stack->regs));
 	return new_stack;
 }
 NOKPROBE_SYMBOL(fixup_bad_iret);
@@ -637,7 +676,7 @@ dotraplinkage void do_debug(struct pt_re
 	 * then it's very likely the result of an icebp/int01 trap.
 	 * User wants a sigtrap for that.
 	 */
-	if (!dr6 && user_mode_vm(regs))
+	if (!dr6 && user_mode(regs))
 		user_icebp = 1;
 
 	/* Catch kmemcheck conditions first of all! */
@@ -673,7 +712,7 @@ dotraplinkage void do_debug(struct pt_re
 	/* It's safe to allow irq's after DR6 has been saved */
 	preempt_conditional_sti(regs);
 
-	if (regs->flags & X86_VM_MASK) {
+	if (v8086_mode(regs)) {
 		handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
 					X86_TRAP_DB);
 		preempt_conditional_cli(regs);
@@ -688,7 +727,7 @@ dotraplinkage void do_debug(struct pt_re
 	 * We already checked v86 mode above, so we can check for kernel mode
 	 * by just checking the CPL of CS.
 	 */
-	if ((dr6 & DR_STEP) && !user_mode(regs)) {
+	if ((dr6 & DR_STEP) && !user_mode_novm(regs)) {
 		tsk->thread.debugreg6 &= ~DR_STEP;
 		set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
 		regs->flags &= ~X86_EFLAGS_TF;
@@ -721,7 +760,7 @@ static void math_error(struct pt_regs *r
 		return;
 	conditional_sti(regs);
 
-	if (!user_mode_vm(regs))
+	if (!user_mode(regs))
 	{
 		if (!fixup_exception(regs)) {
 			task->thread.error_code = error_code;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/tsc.c linux-4.0.9-pax/arch/x86/kernel/tsc.c
--- linux-4.0.9/arch/x86/kernel/tsc.c	2015-03-18 15:21:50.280349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/tsc.c	2015-04-15 12:13:52.934318621 +0200
@@ -150,7 +150,7 @@ static void cyc2ns_write_end(int cpu, st
 	 */
 	smp_wmb();
 
-	ACCESS_ONCE(c2n->head) = data;
+	ACCESS_ONCE_RW(c2n->head) = data;
 }
 
 /*
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/uprobes.c linux-4.0.9-pax/arch/x86/kernel/uprobes.c
--- linux-4.0.9/arch/x86/kernel/uprobes.c	2015-04-13 11:21:01.866617460 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/uprobes.c	2015-04-15 12:13:52.934318621 +0200
@@ -912,7 +912,7 @@ int arch_uprobe_exception_notify(struct
 	int ret = NOTIFY_DONE;
 
 	/* We are only interested in userspace traps */
-	if (regs && !user_mode_vm(regs))
+	if (regs && !user_mode(regs))
 		return NOTIFY_DONE;
 
 	switch (val) {
@@ -986,7 +986,7 @@ arch_uretprobe_hijack_return_addr(unsign
 
 	if (nleft != rasize) {
 		pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, "
-			"%%ip=%#lx\n", current->pid, regs->sp, regs->ip);
+			"%%ip=%#lx\n", task_pid_nr(current), regs->sp, regs->ip);
 
 		force_sig_info(SIGSEGV, SEND_SIG_FORCED, current);
 	}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/vm86_32.c linux-4.0.9-pax/arch/x86/kernel/vm86_32.c
--- linux-4.0.9/arch/x86/kernel/vm86_32.c	2015-03-18 15:21:50.280349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/vm86_32.c	2015-04-15 12:13:52.934318621 +0200
@@ -150,7 +150,7 @@ struct pt_regs *save_v86_state(struct ke
 		do_exit(SIGSEGV);
 	}
 
-	tss = &per_cpu(init_tss, get_cpu());
+	tss = init_tss + get_cpu();
 	current->thread.sp0 = current->thread.saved_sp0;
 	current->thread.sysenter_cs = __KERNEL_CS;
 	load_sp0(tss, &current->thread);
@@ -318,7 +318,7 @@ static void do_sys_vm86(struct kernel_vm
 	tsk->thread.saved_fs = info->regs32->fs;
 	tsk->thread.saved_gs = get_user_gs(info->regs32);
 
-	tss = &per_cpu(init_tss, get_cpu());
+	tss = init_tss + get_cpu();
 	tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
 	if (cpu_has_sep)
 		tsk->thread.sysenter_cs = 0;
@@ -525,7 +525,7 @@ static void do_int(struct kernel_vm86_re
 		goto cannot_handle;
 	if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored))
 		goto cannot_handle;
-	intr_ptr = (unsigned long __user *) (i << 2);
+	intr_ptr = (__force unsigned long __user *) (i << 2);
 	if (get_user(segoffs, intr_ptr))
 		goto cannot_handle;
 	if ((segoffs >> 16) == BIOSSEG)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/vmlinux.lds.S linux-4.0.9-pax/arch/x86/kernel/vmlinux.lds.S
--- linux-4.0.9/arch/x86/kernel/vmlinux.lds.S	2015-03-18 15:21:50.280349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/vmlinux.lds.S	2015-04-15 12:13:52.934318621 +0200
@@ -26,6 +26,13 @@
 #include <asm/page_types.h>
 #include <asm/cache.h>
 #include <asm/boot.h>
+#include <asm/segment.h>
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+#define __KERNEL_TEXT_OFFSET	(LOAD_OFFSET + ____LOAD_PHYSICAL_ADDR)
+#else
+#define __KERNEL_TEXT_OFFSET	0
+#endif
 
 #undef i386     /* in case the preprocessor is a 32bit one */
 
@@ -69,30 +76,43 @@ jiffies_64 = jiffies;
 
 PHDRS {
 	text PT_LOAD FLAGS(5);          /* R_E */
+#ifdef CONFIG_X86_32
+	module PT_LOAD FLAGS(5);        /* R_E */
+#endif
+#ifdef CONFIG_XEN
+	rodata PT_LOAD FLAGS(5);        /* R_E */
+#else
+	rodata PT_LOAD FLAGS(4);        /* R__ */
+#endif
 	data PT_LOAD FLAGS(6);          /* RW_ */
-#ifdef CONFIG_X86_64
+	init.begin PT_LOAD FLAGS(6);    /* RW_ */
 #ifdef CONFIG_SMP
 	percpu PT_LOAD FLAGS(6);        /* RW_ */
 #endif
+	text.init PT_LOAD FLAGS(5);     /* R_E */
+	text.exit PT_LOAD FLAGS(5);     /* R_E */
 	init PT_LOAD FLAGS(7);          /* RWE */
-#endif
 	note PT_NOTE FLAGS(0);          /* ___ */
 }
 
 SECTIONS
 {
 #ifdef CONFIG_X86_32
-        . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
-        phys_startup_32 = startup_32 - LOAD_OFFSET;
+	. = LOAD_OFFSET + ____LOAD_PHYSICAL_ADDR;
 #else
-        . = __START_KERNEL;
-        phys_startup_64 = startup_64 - LOAD_OFFSET;
+	. = __START_KERNEL;
 #endif
 
 	/* Text and read-only data */
-	.text :  AT(ADDR(.text) - LOAD_OFFSET) {
-		_text = .;
+	.text (. - __KERNEL_TEXT_OFFSET): AT(ADDR(.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) {
 		/* bootstrapping code */
+#ifdef CONFIG_X86_32
+		phys_startup_32 = startup_32 - LOAD_OFFSET + __KERNEL_TEXT_OFFSET;
+#else
+		phys_startup_64 = startup_64 - LOAD_OFFSET + __KERNEL_TEXT_OFFSET;
+#endif
+		__LOAD_PHYSICAL_ADDR = . - LOAD_OFFSET + __KERNEL_TEXT_OFFSET;
+		_text = .;
 		HEAD_TEXT
 		. = ALIGN(8);
 		_stext = .;
@@ -104,13 +124,47 @@ SECTIONS
 		IRQENTRY_TEXT
 		*(.fixup)
 		*(.gnu.warning)
-		/* End of text section */
-		_etext = .;
 	} :text = 0x9090
 
-	NOTES :text :note
+	. += __KERNEL_TEXT_OFFSET;
+
+#ifdef CONFIG_X86_32
+	. = ALIGN(PAGE_SIZE);
+	.module.text : AT(ADDR(.module.text) - LOAD_OFFSET) {
+
+#ifdef CONFIG_PAX_KERNEXEC
+		MODULES_EXEC_VADDR = .;
+		BYTE(0)
+		. += (CONFIG_PAX_KERNEXEC_MODULE_TEXT * 1024 * 1024);
+		. = ALIGN(HPAGE_SIZE) - 1;
+		MODULES_EXEC_END = .;
+#endif
 
-	EXCEPTION_TABLE(16) :text = 0x9090
+	} :module
+#endif
+
+	.text.end : AT(ADDR(.text.end) - LOAD_OFFSET) {
+		/* End of text section */
+		BYTE(0)
+		_etext = . - __KERNEL_TEXT_OFFSET;
+	}
+
+#ifdef CONFIG_X86_32
+	. = ALIGN(PAGE_SIZE);
+	.rodata.page_aligned : AT(ADDR(.rodata.page_aligned) - LOAD_OFFSET) {
+		. = ALIGN(PAGE_SIZE);
+		*(.empty_zero_page)
+		*(.initial_pg_fixmap)
+		*(.initial_pg_pmd)
+		*(.initial_page_table)
+		*(.swapper_pg_dir)
+	} :rodata
+#endif
+
+	. = ALIGN(PAGE_SIZE);
+	NOTES :rodata :note
+
+	EXCEPTION_TABLE(16) :rodata
 
 #if defined(CONFIG_DEBUG_RODATA)
 	/* .text should occupy whole number of pages */
@@ -122,16 +176,20 @@ SECTIONS
 
 	/* Data */
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {
+
+#ifdef CONFIG_PAX_KERNEXEC
+		. = ALIGN(HPAGE_SIZE);
+#else
+		. = ALIGN(PAGE_SIZE);
+#endif
+
 		/* Start of data section */
 		_sdata = .;
 
 		/* init_task */
 		INIT_TASK_DATA(THREAD_SIZE)
 
-#ifdef CONFIG_X86_32
-		/* 32 bit has nosave before _edata */
 		NOSAVE_DATA
-#endif
 
 		PAGE_ALIGNED_DATA(PAGE_SIZE)
 
@@ -174,12 +232,19 @@ SECTIONS
        . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
 
 	/* Init code and data - will be freed after init */
-	. = ALIGN(PAGE_SIZE);
 	.init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) {
+		BYTE(0)
+
+#ifdef CONFIG_PAX_KERNEXEC
+		. = ALIGN(HPAGE_SIZE);
+#else
+		. = ALIGN(PAGE_SIZE);
+#endif
+
 		__init_begin = .; /* paired with __init_end */
-	}
+	} :init.begin
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
 	/*
 	 * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
 	 * output PHDR, so the next output section - .init.text - should
@@ -190,12 +255,27 @@ SECTIONS
 	       "per-CPU data too large - increase CONFIG_PHYSICAL_START")
 #endif
 
-	INIT_TEXT_SECTION(PAGE_SIZE)
-#ifdef CONFIG_X86_64
-	:init
-#endif
+	. = ALIGN(PAGE_SIZE);
+	init_begin = .;
+	.init.text (. - __KERNEL_TEXT_OFFSET): AT(init_begin - LOAD_OFFSET) {
+		VMLINUX_SYMBOL(_sinittext) = .;
+		INIT_TEXT
+		. = ALIGN(PAGE_SIZE);
+	} :text.init
+
+	/*
+	 * .exit.text is discard at runtime, not link time, to deal with
+	 *  references from .altinstructions and .eh_frame
+	 */
+	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) {
+		EXIT_TEXT
+		VMLINUX_SYMBOL(_einittext) = .;
+		. = ALIGN(16);
+	} :text.exit
+	. = init_begin + SIZEOF(.init.text) + SIZEOF(.exit.text);
 
-	INIT_DATA_SECTION(16)
+	. = ALIGN(PAGE_SIZE);
+	INIT_DATA_SECTION(16) :init
 
 	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
 		__x86_cpu_dev_start = .;
@@ -266,19 +346,12 @@ SECTIONS
 	}
 
 	. = ALIGN(8);
-	/*
-	 * .exit.text is discard at runtime, not link time, to deal with
-	 *  references from .altinstructions and .eh_frame
-	 */
-	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
-		EXIT_TEXT
-	}
 
 	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
 		EXIT_DATA
 	}
 
-#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
+#ifndef CONFIG_SMP
 	PERCPU_SECTION(INTERNODE_CACHE_BYTES)
 #endif
 
@@ -297,16 +370,10 @@ SECTIONS
 	.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
 		__smp_locks = .;
 		*(.smp_locks)
-		. = ALIGN(PAGE_SIZE);
 		__smp_locks_end = .;
+		. = ALIGN(PAGE_SIZE);
 	}
 
-#ifdef CONFIG_X86_64
-	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-		NOSAVE_DATA
-	}
-#endif
-
 	/* BSS */
 	. = ALIGN(PAGE_SIZE);
 	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
@@ -322,6 +389,7 @@ SECTIONS
 		__brk_base = .;
 		. += 64 * 1024;		/* 64k alignment slop space */
 		*(.brk_reservation)	/* areas brk users have reserved */
+		. = ALIGN(HPAGE_SIZE);
 		__brk_limit = .;
 	}
 
@@ -348,13 +416,12 @@ SECTIONS
  * for the boot processor.
  */
 #define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
-INIT_PER_CPU(gdt_page);
 INIT_PER_CPU(irq_stack_union);
 
 /*
  * Build-time check on the image size:
  */
-. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
+. = ASSERT((_end - _text - __KERNEL_TEXT_OFFSET <= KERNEL_IMAGE_SIZE),
 	   "kernel image bigger than KERNEL_IMAGE_SIZE");
 
 #ifdef CONFIG_SMP
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/vsyscall_64.c linux-4.0.9-pax/arch/x86/kernel/vsyscall_64.c
--- linux-4.0.9/arch/x86/kernel/vsyscall_64.c	2015-03-18 15:21:50.280349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/vsyscall_64.c	2015-04-15 12:13:52.934318621 +0200
@@ -38,15 +38,13 @@
 #define CREATE_TRACE_POINTS
 #include "vsyscall_trace.h"
 
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
+static enum { EMULATE, NONE } vsyscall_mode = EMULATE;
 
 static int __init vsyscall_setup(char *str)
 {
 	if (str) {
 		if (!strcmp("emulate", str))
 			vsyscall_mode = EMULATE;
-		else if (!strcmp("native", str))
-			vsyscall_mode = NATIVE;
 		else if (!strcmp("none", str))
 			vsyscall_mode = NONE;
 		else
@@ -264,8 +262,7 @@ do_ret:
 	return true;
 
 sigsegv:
-	force_sig(SIGSEGV, current);
-	return true;
+	do_group_exit(SIGKILL);
 }
 
 /*
@@ -283,8 +280,8 @@ static struct vm_operations_struct gate_
 static struct vm_area_struct gate_vma = {
 	.vm_start	= VSYSCALL_ADDR,
 	.vm_end		= VSYSCALL_ADDR + PAGE_SIZE,
-	.vm_page_prot	= PAGE_READONLY_EXEC,
-	.vm_flags	= VM_READ | VM_EXEC,
+	.vm_page_prot	= PAGE_READONLY,
+	.vm_flags	= VM_READ,
 	.vm_ops		= &gate_vma_ops,
 };
 
@@ -325,10 +322,7 @@ void __init map_vsyscall(void)
 	unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
 
 	if (vsyscall_mode != NONE)
-		__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
-			     vsyscall_mode == NATIVE
-			     ? PAGE_KERNEL_VSYSCALL
-			     : PAGE_KERNEL_VVAR);
+		__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, PAGE_KERNEL_VVAR);
 
 	BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
 		     (unsigned long)VSYSCALL_ADDR);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/x8664_ksyms_64.c linux-4.0.9-pax/arch/x86/kernel/x8664_ksyms_64.c
--- linux-4.0.9/arch/x86/kernel/x8664_ksyms_64.c	2015-04-13 11:21:01.878617460 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/x8664_ksyms_64.c	2015-04-15 12:13:52.934318621 +0200
@@ -34,8 +34,6 @@ EXPORT_SYMBOL(copy_user_generic_string);
 EXPORT_SYMBOL(copy_user_generic_unrolled);
 EXPORT_SYMBOL(copy_user_enhanced_fast_string);
 EXPORT_SYMBOL(__copy_user_nocache);
-EXPORT_SYMBOL(_copy_from_user);
-EXPORT_SYMBOL(_copy_to_user);
 
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
@@ -79,3 +77,7 @@ EXPORT_SYMBOL(___preempt_schedule);
 EXPORT_SYMBOL(___preempt_schedule_context);
 #endif
 #endif
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+EXPORT_SYMBOL(cpu_pgd);
+#endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/x86_init.c linux-4.0.9-pax/arch/x86/kernel/x86_init.c
--- linux-4.0.9/arch/x86/kernel/x86_init.c	2015-03-18 15:21:50.280349253 +0100
+++ linux-4.0.9-pax/arch/x86/kernel/x86_init.c	2015-04-15 12:13:52.934318621 +0200
@@ -93,7 +93,7 @@ struct x86_cpuinit_ops x86_cpuinit = {
 static void default_nmi_init(void) { };
 static int default_i8042_detect(void) { return 1; };
 
-struct x86_platform_ops x86_platform = {
+struct x86_platform_ops x86_platform __read_only = {
 	.calibrate_tsc			= native_calibrate_tsc,
 	.get_wallclock			= mach_get_cmos_time,
 	.set_wallclock			= mach_set_rtc_mmss,
@@ -109,7 +109,7 @@ struct x86_platform_ops x86_platform = {
 EXPORT_SYMBOL_GPL(x86_platform);
 
 #if defined(CONFIG_PCI_MSI)
-struct x86_msi_ops x86_msi = {
+struct x86_msi_ops x86_msi __read_only = {
 	.setup_msi_irqs		= native_setup_msi_irqs,
 	.compose_msi_msg	= native_compose_msi_msg,
 	.teardown_msi_irq	= native_teardown_msi_irq,
@@ -140,7 +140,7 @@ void arch_restore_msi_irqs(struct pci_de
 }
 #endif
 
-struct x86_io_apic_ops x86_io_apic_ops = {
+struct x86_io_apic_ops x86_io_apic_ops __read_only = {
 	.init			= native_io_apic_init_mappings,
 	.read			= native_io_apic_read,
 	.write			= native_io_apic_write,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kernel/xsave.c linux-4.0.9-pax/arch/x86/kernel/xsave.c
--- linux-4.0.9/arch/x86/kernel/xsave.c	2015-04-13 11:21:01.878617460 +0200
+++ linux-4.0.9-pax/arch/x86/kernel/xsave.c	2015-04-15 12:13:52.934318621 +0200
@@ -168,18 +168,18 @@ static inline int save_xstate_epilog(voi
 
 	/* Setup the bytes not touched by the [f]xsave and reserved for SW. */
 	sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
-	err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
+	err = __copy_to_user(x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
 
 	if (!use_xsave())
 		return err;
 
-	err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
+	err |= __put_user(FP_XSTATE_MAGIC2, (__u32 __user *)(buf + xstate_size));
 
 	/*
 	 * Read the xstate_bv which we copied (directly from the cpu or
 	 * from the state in task struct) to the user buffers.
 	 */
-	err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
+	err |= __get_user(xstate_bv, (__u32 __user *)&x->xsave_hdr.xstate_bv);
 
 	/*
 	 * For legacy compatible, we always set FP/SSE bits in the bit
@@ -194,7 +194,7 @@ static inline int save_xstate_epilog(voi
 	 */
 	xstate_bv |= XSTATE_FPSSE;
 
-	err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
+	err |= __put_user(xstate_bv, (__u32 __user *)&x->xsave_hdr.xstate_bv);
 
 	return err;
 }
@@ -203,6 +203,7 @@ static inline int save_user_xstate(struc
 {
 	int err;
 
+	buf = (struct xsave_struct __user *)____m(buf);
 	if (use_xsave())
 		err = xsave_user(buf);
 	else if (use_fxsr())
@@ -313,6 +314,7 @@ sanitize_restored_xstate(struct task_str
  */
 static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only)
 {
+	buf = (void __user *)____m(buf);
 	if (use_xsave()) {
 		if ((unsigned long)buf % 64 || fx_only) {
 			u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kvm/cpuid.c linux-4.0.9-pax/arch/x86/kvm/cpuid.c
--- linux-4.0.9/arch/x86/kvm/cpuid.c	2015-06-15 16:02:22.271183858 +0200
+++ linux-4.0.9-pax/arch/x86/kvm/cpuid.c	2015-06-15 16:02:33.019183834 +0200
@@ -186,15 +186,20 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm
 			      struct kvm_cpuid2 *cpuid,
 			      struct kvm_cpuid_entry2 __user *entries)
 {
-	int r;
+	int r, i;
 
 	r = -E2BIG;
 	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
 		goto out;
 	r = -EFAULT;
-	if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
-			   cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
+	if (!access_ok(VERIFY_READ, entries, cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
 		goto out;
+	for (i = 0; i < cpuid->nent; ++i) {
+		struct kvm_cpuid_entry2 cpuid_entry;
+		if (__copy_from_user(&cpuid_entry, entries + i, sizeof(cpuid_entry)))
+			goto out;
+		vcpu->arch.cpuid_entries[i] = cpuid_entry;
+	}
 	vcpu->arch.cpuid_nent = cpuid->nent;
 	kvm_apic_set_version(vcpu);
 	kvm_x86_ops->cpuid_update(vcpu);
@@ -207,15 +212,19 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm
 			      struct kvm_cpuid2 *cpuid,
 			      struct kvm_cpuid_entry2 __user *entries)
 {
-	int r;
+	int r, i;
 
 	r = -E2BIG;
 	if (cpuid->nent < vcpu->arch.cpuid_nent)
 		goto out;
 	r = -EFAULT;
-	if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
-			 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
+	if (!access_ok(VERIFY_WRITE, entries, vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
 		goto out;
+	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
+		struct kvm_cpuid_entry2 cpuid_entry = vcpu->arch.cpuid_entries[i];
+		if (__copy_to_user(entries + i, &cpuid_entry, sizeof(cpuid_entry)))
+			goto out;
+	}
 	return 0;
 
 out:
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kvm/emulate.c linux-4.0.9-pax/arch/x86/kvm/emulate.c
--- linux-4.0.9/arch/x86/kvm/emulate.c	2015-04-13 11:21:01.878617460 +0200
+++ linux-4.0.9-pax/arch/x86/kvm/emulate.c	2015-04-15 12:13:52.934318621 +0200
@@ -3572,7 +3572,7 @@ static int check_cr_write(struct x86_emu
 	int cr = ctxt->modrm_reg;
 	u64 efer = 0;
 
-	static u64 cr_reserved_bits[] = {
+	static const u64 cr_reserved_bits[] = {
 		0xffffffff00000000ULL,
 		0, 0, 0, /* CR3 checked later */
 		CR4_RESERVED_BITS,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kvm/lapic.c linux-4.0.9-pax/arch/x86/kvm/lapic.c
--- linux-4.0.9/arch/x86/kvm/lapic.c	2015-07-10 20:07:37.563036133 +0200
+++ linux-4.0.9-pax/arch/x86/kvm/lapic.c	2015-07-10 20:07:47.719035591 +0200
@@ -56,7 +56,7 @@
 #define APIC_BUS_CYCLE_NS 1
 
 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
-#define apic_debug(fmt, arg...)
+#define apic_debug(fmt, arg...) do {} while (0)
 
 #define APIC_LVT_NUM			6
 /* 14 is the version for Xeon and Pentium 8.4.8*/
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kvm/paging_tmpl.h linux-4.0.9-pax/arch/x86/kvm/paging_tmpl.h
--- linux-4.0.9/arch/x86/kvm/paging_tmpl.h	2015-06-15 16:02:22.271183858 +0200
+++ linux-4.0.9-pax/arch/x86/kvm/paging_tmpl.h	2015-06-15 16:02:33.023183834 +0200
@@ -343,7 +343,7 @@ retry_walk:
 		if (unlikely(kvm_is_error_hva(host_addr)))
 			goto error;
 
-		ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
+		ptep_user = (pt_element_t __force_user *)((void *)host_addr + offset);
 		if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
 			goto error;
 		walker->ptep_user[walker->level - 1] = ptep_user;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kvm/svm.c linux-4.0.9-pax/arch/x86/kvm/svm.c
--- linux-4.0.9/arch/x86/kvm/svm.c	2015-07-10 20:07:37.567036133 +0200
+++ linux-4.0.9-pax/arch/x86/kvm/svm.c	2015-07-10 20:07:47.719035591 +0200
@@ -3570,7 +3570,11 @@ static void reload_tss(struct kvm_vcpu *
 	int cpu = raw_smp_processor_id();
 
 	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+
+	pax_open_kernel();
 	sd->tss_desc->type = 9; /* available 32/64-bit TSS */
+	pax_close_kernel();
+
 	load_TR_desc();
 }
 
@@ -3966,6 +3970,10 @@ static void svm_vcpu_run(struct kvm_vcpu
 #endif
 #endif
 
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	__set_fs(current_thread_info()->addr_limit);
+#endif
+
 	reload_tss(vcpu);
 
 	local_irq_disable();
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kvm/vmx.c linux-4.0.9-pax/arch/x86/kvm/vmx.c
--- linux-4.0.9/arch/x86/kvm/vmx.c	2015-06-15 16:02:22.275183858 +0200
+++ linux-4.0.9-pax/arch/x86/kvm/vmx.c	2015-06-15 16:02:33.027183834 +0200
@@ -1440,12 +1440,12 @@ static void vmcs_write64(unsigned long f
 #endif
 }
 
-static void vmcs_clear_bits(unsigned long field, u32 mask)
+static void vmcs_clear_bits(unsigned long field, unsigned long mask)
 {
 	vmcs_writel(field, vmcs_readl(field) & ~mask);
 }
 
-static void vmcs_set_bits(unsigned long field, u32 mask)
+static void vmcs_set_bits(unsigned long field, unsigned long mask)
 {
 	vmcs_writel(field, vmcs_readl(field) | mask);
 }
@@ -1705,7 +1705,11 @@ static void reload_tss(void)
 	struct desc_struct *descs;
 
 	descs = (void *)gdt->address;
+
+	pax_open_kernel();
 	descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
+	pax_close_kernel();
+
 	load_TR_desc();
 }
 
@@ -1941,6 +1945,10 @@ static void vmx_vcpu_load(struct kvm_vcp
 		vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
 		vmcs_writel(HOST_GDTR_BASE, gdt->address);   /* 22.2.4 */
 
+#ifdef CONFIG_PAX_PER_CPU_PGD
+		vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
+#endif
+
 		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
 		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
 		vmx->loaded_vmcs->cpu = cpu;
@@ -2233,7 +2241,7 @@ static void setup_msrs(struct vcpu_vmx *
  * reads and returns guest's timestamp counter "register"
  * guest_tsc = host_tsc + tsc_offset    -- 21.3
  */
-static u64 guest_read_tsc(void)
+static u64 __intentional_overflow(-1) guest_read_tsc(void)
 {
 	u64 host_tsc, tsc_offset;
 
@@ -4466,7 +4474,10 @@ static void vmx_set_constant_host_state(
 	unsigned long cr4;
 
 	vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS);  /* 22.2.3 */
+
+#ifndef CONFIG_PAX_PER_CPU_PGD
 	vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
+#endif
 
 	/* Save the most likely value for this task's CR4 in the VMCS. */
 	cr4 = cr4_read_shadow();
@@ -4493,7 +4504,7 @@ static void vmx_set_constant_host_state(
 	vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
 	vmx->host_idt_base = dt.address;
 
-	vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
+	vmcs_writel(HOST_RIP, ktla_ktva(vmx_return)); /* 22.2.5 */
 
 	rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
 	vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
@@ -6104,11 +6115,17 @@ static __init int hardware_setup(void)
 	 * page upon invalidation.  No need to do anything if not
 	 * using the APIC_ACCESS_ADDR VMCS field.
 	 */
-	if (!flexpriority_enabled)
-		kvm_x86_ops->set_apic_access_page_addr = NULL;
+	if (!flexpriority_enabled) {
+		pax_open_kernel();
+		*(void **)&kvm_x86_ops->set_apic_access_page_addr = NULL;
+		pax_close_kernel();
+	}
 
-	if (!cpu_has_vmx_tpr_shadow())
-		kvm_x86_ops->update_cr8_intercept = NULL;
+	if (!cpu_has_vmx_tpr_shadow()) {
+		pax_open_kernel();
+		*(void **)&kvm_x86_ops->update_cr8_intercept = NULL;
+		pax_close_kernel();
+	}
 
 	if (enable_ept && !cpu_has_vmx_ept_2m_page())
 		kvm_disable_largepages();
@@ -6119,14 +6136,16 @@ static __init int hardware_setup(void)
 	if (!cpu_has_vmx_apicv())
 		enable_apicv = 0;
 
+	pax_open_kernel();
 	if (enable_apicv)
-		kvm_x86_ops->update_cr8_intercept = NULL;
+		*(void **)&kvm_x86_ops->update_cr8_intercept = NULL;
 	else {
-		kvm_x86_ops->hwapic_irr_update = NULL;
-		kvm_x86_ops->hwapic_isr_update = NULL;
-		kvm_x86_ops->deliver_posted_interrupt = NULL;
-		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
+		*(void **)&kvm_x86_ops->hwapic_irr_update = NULL;
+		*(void **)&kvm_x86_ops->hwapic_isr_update = NULL;
+		*(void **)&kvm_x86_ops->deliver_posted_interrupt = NULL;
+		*(void **)&kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
 	}
+	pax_close_kernel();
 
 	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
 	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
@@ -6179,10 +6198,12 @@ static __init int hardware_setup(void)
 		enable_pml = 0;
 
 	if (!enable_pml) {
-		kvm_x86_ops->slot_enable_log_dirty = NULL;
-		kvm_x86_ops->slot_disable_log_dirty = NULL;
-		kvm_x86_ops->flush_log_dirty = NULL;
-		kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
+		pax_open_kernel();
+		*(void **)&kvm_x86_ops->slot_enable_log_dirty = NULL;
+		*(void **)&kvm_x86_ops->slot_disable_log_dirty = NULL;
+		*(void **)&kvm_x86_ops->flush_log_dirty = NULL;
+		*(void **)&kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
+		pax_close_kernel();
 	}
 
 	return alloc_kvm_area();
@@ -8227,6 +8248,12 @@ static void __noclone vmx_vcpu_run(struc
 		"jmp 2f \n\t"
 		"1: " __ex(ASM_VMX_VMRESUME) "\n\t"
 		"2: "
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+		"ljmp %[cs],$3f\n\t"
+		"3: "
+#endif
+
 		/* Save guest registers, load host registers, keep flags */
 		"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
 		"pop %0 \n\t"
@@ -8279,6 +8306,11 @@ static void __noclone vmx_vcpu_run(struc
 #endif
 		[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
 		[wordsize]"i"(sizeof(ulong))
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+		,[cs]"i"(__KERNEL_CS)
+#endif
+
 	      : "cc", "memory"
 #ifdef CONFIG_X86_64
 		, "rax", "rbx", "rdi", "rsi"
@@ -8292,7 +8324,7 @@ static void __noclone vmx_vcpu_run(struc
 	if (debugctlmsr)
 		update_debugctlmsr(debugctlmsr);
 
-#ifndef CONFIG_X86_64
+#ifdef CONFIG_X86_32
 	/*
 	 * The sysexit path does not restore ds/es, so we must set them to
 	 * a reasonable value ourselves.
@@ -8301,8 +8333,18 @@ static void __noclone vmx_vcpu_run(struc
 	 * may be executed in interrupt context, which saves and restore segments
 	 * around it, nullifying its effect.
 	 */
-	loadsegment(ds, __USER_DS);
-	loadsegment(es, __USER_DS);
+	loadsegment(ds, __KERNEL_DS);
+	loadsegment(es, __KERNEL_DS);
+	loadsegment(ss, __KERNEL_DS);
+
+#ifdef CONFIG_PAX_KERNEXEC
+	loadsegment(fs, __KERNEL_PERCPU);
+#endif
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	__set_fs(current_thread_info()->addr_limit);
+#endif
+
 #endif
 
 	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/kvm/x86.c linux-4.0.9-pax/arch/x86/kvm/x86.c
--- linux-4.0.9/arch/x86/kvm/x86.c	2015-06-15 16:02:22.279183858 +0200
+++ linux-4.0.9-pax/arch/x86/kvm/x86.c	2015-06-15 16:02:33.031183834 +0200
@@ -1895,8 +1895,8 @@ static int xen_hvm_config(struct kvm_vcp
 {
 	struct kvm *kvm = vcpu->kvm;
 	int lm = is_long_mode(vcpu);
-	u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
-		: (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
+	u8 __user *blob_addr = lm ? (u8 __user *)(long)kvm->arch.xen_hvm_config.blob_addr_64
+		: (u8 __user *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
 	u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
 		: kvm->arch.xen_hvm_config.blob_size_32;
 	u32 page_num = data & ~PAGE_MASK;
@@ -2833,6 +2833,8 @@ long kvm_arch_dev_ioctl(struct file *fil
 		if (n < msr_list.nmsrs)
 			goto out;
 		r = -EFAULT;
+		if (num_msrs_to_save > ARRAY_SIZE(msrs_to_save))
+			goto out;
 		if (copy_to_user(user_msr_list->indices, &msrs_to_save,
 				 num_msrs_to_save * sizeof(u32)))
 			goto out;
@@ -5737,7 +5739,7 @@ static struct notifier_block pvclock_gto
 };
 #endif
 
-int kvm_arch_init(void *opaque)
+int kvm_arch_init(const void *opaque)
 {
 	int r;
 	struct kvm_x86_ops *ops = opaque;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lguest/boot.c linux-4.0.9-pax/arch/x86/lguest/boot.c
--- linux-4.0.9/arch/x86/lguest/boot.c	2015-04-13 11:21:01.894617459 +0200
+++ linux-4.0.9-pax/arch/x86/lguest/boot.c	2015-04-15 12:13:52.938318621 +0200
@@ -1340,9 +1340,10 @@ static __init int early_put_chars(u32 vt
  * Rebooting also tells the Host we're finished, but the RESTART flag tells the
  * Launcher to reboot us.
  */
-static void lguest_restart(char *reason)
+static __noreturn void lguest_restart(char *reason)
 {
 	hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0);
+	BUG();
 }
 
 /*G:050
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/atomic64_386_32.S linux-4.0.9-pax/arch/x86/lib/atomic64_386_32.S
--- linux-4.0.9/arch/x86/lib/atomic64_386_32.S	2015-03-18 15:21:50.288349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/atomic64_386_32.S	2015-04-15 12:13:52.938318621 +0200
@@ -48,6 +48,10 @@ BEGIN(read)
 	movl  (v), %eax
 	movl 4(v), %edx
 RET_ENDP
+BEGIN(read_unchecked)
+	movl  (v), %eax
+	movl 4(v), %edx
+RET_ENDP
 #undef v
 
 #define v %esi
@@ -55,6 +59,10 @@ BEGIN(set)
 	movl %ebx,  (v)
 	movl %ecx, 4(v)
 RET_ENDP
+BEGIN(set_unchecked)
+	movl %ebx,  (v)
+	movl %ecx, 4(v)
+RET_ENDP
 #undef v
 
 #define v  %esi
@@ -70,6 +78,20 @@ RET_ENDP
 BEGIN(add)
 	addl %eax,  (v)
 	adcl %edx, 4(v)
+
+#ifdef CONFIG_PAX_REFCOUNT
+	jno 0f
+	subl %eax,  (v)
+	sbbl %edx, 4(v)
+	int $4
+0:
+	_ASM_EXTABLE(0b, 0b)
+#endif
+
+RET_ENDP
+BEGIN(add_unchecked)
+	addl %eax,  (v)
+	adcl %edx, 4(v)
 RET_ENDP
 #undef v
 
@@ -77,6 +99,24 @@ RET_ENDP
 BEGIN(add_return)
 	addl  (v), %eax
 	adcl 4(v), %edx
+
+#ifdef CONFIG_PAX_REFCOUNT
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
+
+	movl %eax,  (v)
+	movl %edx, 4(v)
+
+#ifdef CONFIG_PAX_REFCOUNT
+2:
+#endif
+
+RET_ENDP
+BEGIN(add_return_unchecked)
+	addl  (v), %eax
+	adcl 4(v), %edx
 	movl %eax,  (v)
 	movl %edx, 4(v)
 RET_ENDP
@@ -86,6 +126,20 @@ RET_ENDP
 BEGIN(sub)
 	subl %eax,  (v)
 	sbbl %edx, 4(v)
+
+#ifdef CONFIG_PAX_REFCOUNT
+	jno 0f
+	addl %eax,  (v)
+	adcl %edx, 4(v)
+	int $4
+0:
+	_ASM_EXTABLE(0b, 0b)
+#endif
+
+RET_ENDP
+BEGIN(sub_unchecked)
+	subl %eax,  (v)
+	sbbl %edx, 4(v)
 RET_ENDP
 #undef v
 
@@ -96,6 +150,27 @@ BEGIN(sub_return)
 	sbbl $0, %edx
 	addl  (v), %eax
 	adcl 4(v), %edx
+
+#ifdef CONFIG_PAX_REFCOUNT
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
+
+	movl %eax,  (v)
+	movl %edx, 4(v)
+
+#ifdef CONFIG_PAX_REFCOUNT
+2:
+#endif
+
+RET_ENDP
+BEGIN(sub_return_unchecked)
+	negl %edx
+	negl %eax
+	sbbl $0, %edx
+	addl  (v), %eax
+	adcl 4(v), %edx
 	movl %eax,  (v)
 	movl %edx, 4(v)
 RET_ENDP
@@ -105,6 +180,20 @@ RET_ENDP
 BEGIN(inc)
 	addl $1,  (v)
 	adcl $0, 4(v)
+
+#ifdef CONFIG_PAX_REFCOUNT
+	jno 0f
+	subl $1,  (v)
+	sbbl $0, 4(v)
+	int $4
+0:
+	_ASM_EXTABLE(0b, 0b)
+#endif
+
+RET_ENDP
+BEGIN(inc_unchecked)
+	addl $1,  (v)
+	adcl $0, 4(v)
 RET_ENDP
 #undef v
 
@@ -114,6 +203,26 @@ BEGIN(inc_return)
 	movl 4(v), %edx
 	addl $1, %eax
 	adcl $0, %edx
+
+#ifdef CONFIG_PAX_REFCOUNT
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
+
+	movl %eax,  (v)
+	movl %edx, 4(v)
+
+#ifdef CONFIG_PAX_REFCOUNT
+2:
+#endif
+
+RET_ENDP
+BEGIN(inc_return_unchecked)
+	movl  (v), %eax
+	movl 4(v), %edx
+	addl $1, %eax
+	adcl $0, %edx
 	movl %eax,  (v)
 	movl %edx, 4(v)
 RET_ENDP
@@ -123,6 +232,20 @@ RET_ENDP
 BEGIN(dec)
 	subl $1,  (v)
 	sbbl $0, 4(v)
+
+#ifdef CONFIG_PAX_REFCOUNT
+	jno 0f
+	addl $1,  (v)
+	adcl $0, 4(v)
+	int $4
+0:
+	_ASM_EXTABLE(0b, 0b)
+#endif
+
+RET_ENDP
+BEGIN(dec_unchecked)
+	subl $1,  (v)
+	sbbl $0, 4(v)
 RET_ENDP
 #undef v
 
@@ -132,6 +255,26 @@ BEGIN(dec_return)
 	movl 4(v), %edx
 	subl $1, %eax
 	sbbl $0, %edx
+
+#ifdef CONFIG_PAX_REFCOUNT
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
+
+	movl %eax,  (v)
+	movl %edx, 4(v)
+
+#ifdef CONFIG_PAX_REFCOUNT
+2:
+#endif
+
+RET_ENDP
+BEGIN(dec_return_unchecked)
+	movl  (v), %eax
+	movl 4(v), %edx
+	subl $1, %eax
+	sbbl $0, %edx
 	movl %eax,  (v)
 	movl %edx, 4(v)
 RET_ENDP
@@ -143,6 +286,13 @@ BEGIN(add_unless)
 	adcl %edx, %edi
 	addl  (v), %eax
 	adcl 4(v), %edx
+
+#ifdef CONFIG_PAX_REFCOUNT
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
+
 	cmpl %eax, %ecx
 	je 3f
 1:
@@ -168,6 +318,13 @@ BEGIN(inc_not_zero)
 1:
 	addl $1, %eax
 	adcl $0, %edx
+
+#ifdef CONFIG_PAX_REFCOUNT
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
+
 	movl %eax,  (v)
 	movl %edx, 4(v)
 	movl $1, %eax
@@ -186,6 +343,13 @@ BEGIN(dec_if_positive)
 	movl 4(v), %edx
 	subl $1, %eax
 	sbbl $0, %edx
+
+#ifdef CONFIG_PAX_REFCOUNT
+	into
+1234:
+	_ASM_EXTABLE(1234b, 1f)
+#endif
+
 	js 1f
 	movl %eax,  (v)
 	movl %edx, 4(v)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/atomic64_cx8_32.S linux-4.0.9-pax/arch/x86/lib/atomic64_cx8_32.S
--- linux-4.0.9/arch/x86/lib/atomic64_cx8_32.S	2015-03-18 15:21:50.288349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/atomic64_cx8_32.S	2015-04-15 12:13:52.938318621 +0200
@@ -35,10 +35,20 @@ ENTRY(atomic64_read_cx8)
 	CFI_STARTPROC
 
 	read64 %ecx
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_read_cx8)
 
+ENTRY(atomic64_read_unchecked_cx8)
+	CFI_STARTPROC
+
+	read64 %ecx
+	pax_force_retaddr
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_read_unchecked_cx8)
+
 ENTRY(atomic64_set_cx8)
 	CFI_STARTPROC
 
@@ -48,10 +58,25 @@ ENTRY(atomic64_set_cx8)
 	cmpxchg8b (%esi)
 	jne 1b
 
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_set_cx8)
 
+ENTRY(atomic64_set_unchecked_cx8)
+	CFI_STARTPROC
+
+1:
+/* we don't need LOCK_PREFIX since aligned 64-bit writes
+ * are atomic on 586 and newer */
+	cmpxchg8b (%esi)
+	jne 1b
+
+	pax_force_retaddr
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_set_unchecked_cx8)
+
 ENTRY(atomic64_xchg_cx8)
 	CFI_STARTPROC
 
@@ -60,12 +85,13 @@ ENTRY(atomic64_xchg_cx8)
 	cmpxchg8b (%esi)
 	jne 1b
 
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_xchg_cx8)
 
-.macro addsub_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
+.macro addsub_return func ins insc unchecked=""
+ENTRY(atomic64_\func\()_return\unchecked\()_cx8)
 	CFI_STARTPROC
 	SAVE ebp
 	SAVE ebx
@@ -82,27 +108,44 @@ ENTRY(atomic64_\func\()_return_cx8)
 	movl %edx, %ecx
 	\ins\()l %esi, %ebx
 	\insc\()l %edi, %ecx
+
+.ifb \unchecked
+#ifdef CONFIG_PAX_REFCOUNT
+	into
+2:
+	_ASM_EXTABLE(2b, 3f)
+#endif
+.endif
+
 	LOCK_PREFIX
 	cmpxchg8b (%ebp)
 	jne 1b
-
-10:
 	movl %ebx, %eax
 	movl %ecx, %edx
+
+.ifb \unchecked
+#ifdef CONFIG_PAX_REFCOUNT
+3:
+#endif
+.endif
+
 	RESTORE edi
 	RESTORE esi
 	RESTORE ebx
 	RESTORE ebp
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
-ENDPROC(atomic64_\func\()_return_cx8)
+ENDPROC(atomic64_\func\()_return\unchecked\()_cx8)
 .endm
 
 addsub_return add add adc
 addsub_return sub sub sbb
+addsub_return add add adc _unchecked
+addsub_return sub sub sbb _unchecked
 
-.macro incdec_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
+.macro incdec_return func ins insc unchecked=""
+ENTRY(atomic64_\func\()_return\unchecked\()_cx8)
 	CFI_STARTPROC
 	SAVE ebx
 
@@ -112,21 +155,39 @@ ENTRY(atomic64_\func\()_return_cx8)
 	movl %edx, %ecx
 	\ins\()l $1, %ebx
 	\insc\()l $0, %ecx
+
+.ifb \unchecked
+#ifdef CONFIG_PAX_REFCOUNT
+	into
+2:
+	_ASM_EXTABLE(2b, 3f)
+#endif
+.endif
+
 	LOCK_PREFIX
 	cmpxchg8b (%esi)
 	jne 1b
 
-10:
 	movl %ebx, %eax
 	movl %ecx, %edx
+
+.ifb \unchecked
+#ifdef CONFIG_PAX_REFCOUNT
+3:
+#endif
+.endif
+
 	RESTORE ebx
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
-ENDPROC(atomic64_\func\()_return_cx8)
+ENDPROC(atomic64_\func\()_return\unchecked\()_cx8)
 .endm
 
 incdec_return inc add adc
 incdec_return dec sub sbb
+incdec_return inc add adc _unchecked
+incdec_return dec sub sbb _unchecked
 
 ENTRY(atomic64_dec_if_positive_cx8)
 	CFI_STARTPROC
@@ -138,6 +199,13 @@ ENTRY(atomic64_dec_if_positive_cx8)
 	movl %edx, %ecx
 	subl $1, %ebx
 	sbb $0, %ecx
+
+#ifdef CONFIG_PAX_REFCOUNT
+	into
+1234:
+	_ASM_EXTABLE(1234b, 2f)
+#endif
+
 	js 2f
 	LOCK_PREFIX
 	cmpxchg8b (%esi)
@@ -147,6 +215,7 @@ ENTRY(atomic64_dec_if_positive_cx8)
 	movl %ebx, %eax
 	movl %ecx, %edx
 	RESTORE ebx
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_dec_if_positive_cx8)
@@ -171,6 +240,13 @@ ENTRY(atomic64_add_unless_cx8)
 	movl %edx, %ecx
 	addl %ebp, %ebx
 	adcl %edi, %ecx
+
+#ifdef CONFIG_PAX_REFCOUNT
+	into
+1234:
+	_ASM_EXTABLE(1234b, 3f)
+#endif
+
 	LOCK_PREFIX
 	cmpxchg8b (%esi)
 	jne 1b
@@ -181,6 +257,7 @@ ENTRY(atomic64_add_unless_cx8)
 	CFI_ADJUST_CFA_OFFSET -8
 	RESTORE ebx
 	RESTORE ebp
+	pax_force_retaddr
 	ret
 4:
 	cmpl %edx, 4(%esp)
@@ -203,6 +280,13 @@ ENTRY(atomic64_inc_not_zero_cx8)
 	xorl %ecx, %ecx
 	addl $1, %ebx
 	adcl %edx, %ecx
+
+#ifdef CONFIG_PAX_REFCOUNT
+	into
+1234:
+	_ASM_EXTABLE(1234b, 3f)
+#endif
+
 	LOCK_PREFIX
 	cmpxchg8b (%esi)
 	jne 1b
@@ -210,6 +294,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
 	movl $1, %eax
 3:
 	RESTORE ebx
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(atomic64_inc_not_zero_cx8)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/checksum_32.S linux-4.0.9-pax/arch/x86/lib/checksum_32.S
--- linux-4.0.9/arch/x86/lib/checksum_32.S	2015-03-18 15:21:50.288349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/checksum_32.S	2015-04-15 12:13:52.942318620 +0200
@@ -29,7 +29,8 @@
 #include <asm/dwarf2.h>
 #include <asm/errno.h>
 #include <asm/asm.h>
-				
+#include <asm/segment.h>
+
 /*
  * computes a partial checksum, e.g. for TCP/UDP fragments
  */
@@ -293,9 +294,24 @@ unsigned int csum_partial_copy_generic (
 
 #define ARGBASE 16		
 #define FP		12
-		
-ENTRY(csum_partial_copy_generic)
+
+ENTRY(csum_partial_copy_generic_to_user)
 	CFI_STARTPROC
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	pushl_cfi %gs
+	popl_cfi %es
+	jmp csum_partial_copy_generic
+#endif
+
+ENTRY(csum_partial_copy_generic_from_user)
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	pushl_cfi %gs
+	popl_cfi %ds
+#endif
+
+ENTRY(csum_partial_copy_generic)
 	subl  $4,%esp	
 	CFI_ADJUST_CFA_OFFSET 4
 	pushl_cfi %edi
@@ -317,7 +333,7 @@ ENTRY(csum_partial_copy_generic)
 	jmp 4f
 SRC(1:	movw (%esi), %bx	)
 	addl $2, %esi
-DST(	movw %bx, (%edi)	)
+DST(	movw %bx, %es:(%edi)	)
 	addl $2, %edi
 	addw %bx, %ax	
 	adcl $0, %eax
@@ -329,30 +345,30 @@ DST(	movw %bx, (%edi)	)
 SRC(1:	movl (%esi), %ebx	)
 SRC(	movl 4(%esi), %edx	)
 	adcl %ebx, %eax
-DST(	movl %ebx, (%edi)	)
+DST(	movl %ebx, %es:(%edi)	)
 	adcl %edx, %eax
-DST(	movl %edx, 4(%edi)	)
+DST(	movl %edx, %es:4(%edi)	)
 
 SRC(	movl 8(%esi), %ebx	)
 SRC(	movl 12(%esi), %edx	)
 	adcl %ebx, %eax
-DST(	movl %ebx, 8(%edi)	)
+DST(	movl %ebx, %es:8(%edi)	)
 	adcl %edx, %eax
-DST(	movl %edx, 12(%edi)	)
+DST(	movl %edx, %es:12(%edi)	)
 
 SRC(	movl 16(%esi), %ebx 	)
 SRC(	movl 20(%esi), %edx	)
 	adcl %ebx, %eax
-DST(	movl %ebx, 16(%edi)	)
+DST(	movl %ebx, %es:16(%edi)	)
 	adcl %edx, %eax
-DST(	movl %edx, 20(%edi)	)
+DST(	movl %edx, %es:20(%edi)	)
 
 SRC(	movl 24(%esi), %ebx	)
 SRC(	movl 28(%esi), %edx	)
 	adcl %ebx, %eax
-DST(	movl %ebx, 24(%edi)	)
+DST(	movl %ebx, %es:24(%edi)	)
 	adcl %edx, %eax
-DST(	movl %edx, 28(%edi)	)
+DST(	movl %edx, %es:28(%edi)	)
 
 	lea 32(%esi), %esi
 	lea 32(%edi), %edi
@@ -366,7 +382,7 @@ DST(	movl %edx, 28(%edi)	)
 	shrl $2, %edx			# This clears CF
 SRC(3:	movl (%esi), %ebx	)
 	adcl %ebx, %eax
-DST(	movl %ebx, (%edi)	)
+DST(	movl %ebx, %es:(%edi)	)
 	lea 4(%esi), %esi
 	lea 4(%edi), %edi
 	dec %edx
@@ -378,12 +394,12 @@ DST(	movl %ebx, (%edi)	)
 	jb 5f
 SRC(	movw (%esi), %cx	)
 	leal 2(%esi), %esi
-DST(	movw %cx, (%edi)	)
+DST(	movw %cx, %es:(%edi)	)
 	leal 2(%edi), %edi
 	je 6f
 	shll $16,%ecx
 SRC(5:	movb (%esi), %cl	)
-DST(	movb %cl, (%edi)	)
+DST(	movb %cl, %es:(%edi)	)
 6:	addl %ecx, %eax
 	adcl $0, %eax
 7:
@@ -394,7 +410,7 @@ DST(	movb %cl, (%edi)	)
 
 6001:
 	movl ARGBASE+20(%esp), %ebx	# src_err_ptr
-	movl $-EFAULT, (%ebx)
+	movl $-EFAULT, %ss:(%ebx)
 
 	# zero the complete destination - computing the rest
 	# is too much work 
@@ -407,11 +423,15 @@ DST(	movb %cl, (%edi)	)
 
 6002:
 	movl ARGBASE+24(%esp), %ebx	# dst_err_ptr
-	movl $-EFAULT,(%ebx)
+	movl $-EFAULT,%ss:(%ebx)
 	jmp 5000b
 
 .previous
 
+	pushl_cfi %ss
+	popl_cfi %ds
+	pushl_cfi %ss
+	popl_cfi %es
 	popl_cfi %ebx
 	CFI_RESTORE ebx
 	popl_cfi %esi
@@ -421,26 +441,43 @@ DST(	movb %cl, (%edi)	)
 	popl_cfi %ecx			# equivalent to addl $4,%esp
 	ret	
 	CFI_ENDPROC
-ENDPROC(csum_partial_copy_generic)
+ENDPROC(csum_partial_copy_generic_to_user)
 
 #else
 
 /* Version for PentiumII/PPro */
 
 #define ROUND1(x) \
+	nop; nop; nop;				\
 	SRC(movl x(%esi), %ebx	)	;	\
 	addl %ebx, %eax			;	\
-	DST(movl %ebx, x(%edi)	)	; 
+	DST(movl %ebx, %es:x(%edi))	;
 
 #define ROUND(x) \
+	nop; nop; nop;				\
 	SRC(movl x(%esi), %ebx	)	;	\
 	adcl %ebx, %eax			;	\
-	DST(movl %ebx, x(%edi)	)	;
+	DST(movl %ebx, %es:x(%edi))	;
 
 #define ARGBASE 12
-		
-ENTRY(csum_partial_copy_generic)
+
+ENTRY(csum_partial_copy_generic_to_user)
 	CFI_STARTPROC
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	pushl_cfi %gs
+	popl_cfi %es
+	jmp csum_partial_copy_generic
+#endif
+
+ENTRY(csum_partial_copy_generic_from_user)
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	pushl_cfi %gs
+	popl_cfi %ds
+#endif
+
+ENTRY(csum_partial_copy_generic)
 	pushl_cfi %ebx
 	CFI_REL_OFFSET ebx, 0
 	pushl_cfi %edi
@@ -461,7 +498,7 @@ ENTRY(csum_partial_copy_generic)
 	subl %ebx, %edi  
 	lea  -1(%esi),%edx
 	andl $-32,%edx
-	lea 3f(%ebx,%ebx), %ebx
+	lea 3f(%ebx,%ebx,2), %ebx
 	testl %esi, %esi 
 	jmp *%ebx
 1:	addl $64,%esi
@@ -482,19 +519,19 @@ ENTRY(csum_partial_copy_generic)
 	jb 5f
 SRC(	movw (%esi), %dx         )
 	leal 2(%esi), %esi
-DST(	movw %dx, (%edi)         )
+DST(	movw %dx, %es:(%edi)     )
 	leal 2(%edi), %edi
 	je 6f
 	shll $16,%edx
 5:
 SRC(	movb (%esi), %dl         )
-DST(	movb %dl, (%edi)         )
+DST(	movb %dl, %es:(%edi)     )
 6:	addl %edx, %eax
 	adcl $0, %eax
 7:
 .section .fixup, "ax"
 6001:	movl	ARGBASE+20(%esp), %ebx	# src_err_ptr	
-	movl $-EFAULT, (%ebx)
+	movl $-EFAULT, %ss:(%ebx)
 	# zero the complete destination (computing the rest is too much work)
 	movl ARGBASE+8(%esp),%edi	# dst
 	movl ARGBASE+12(%esp),%ecx	# len
@@ -502,10 +539,17 @@ DST(	movb %dl, (%edi)         )
 	rep; stosb
 	jmp 7b
 6002:	movl ARGBASE+24(%esp), %ebx	# dst_err_ptr
-	movl $-EFAULT, (%ebx)
+	movl $-EFAULT, %ss:(%ebx)
 	jmp  7b			
 .previous				
 
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	pushl_cfi %ss
+	popl_cfi %ds
+	pushl_cfi %ss
+	popl_cfi %es
+#endif
+
 	popl_cfi %esi
 	CFI_RESTORE esi
 	popl_cfi %edi
@@ -514,7 +558,7 @@ DST(	movb %dl, (%edi)         )
 	CFI_RESTORE ebx
 	ret
 	CFI_ENDPROC
-ENDPROC(csum_partial_copy_generic)
+ENDPROC(csum_partial_copy_generic_to_user)
 				
 #undef ROUND
 #undef ROUND1		
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/clear_page_64.S linux-4.0.9-pax/arch/x86/lib/clear_page_64.S
--- linux-4.0.9/arch/x86/lib/clear_page_64.S	2015-03-18 15:21:50.288349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/clear_page_64.S	2015-04-15 12:13:52.942318620 +0200
@@ -11,6 +11,7 @@ ENTRY(clear_page_c)
 	movl $4096/8,%ecx
 	xorl %eax,%eax
 	rep stosq
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(clear_page_c)
@@ -20,6 +21,7 @@ ENTRY(clear_page_c_e)
 	movl $4096,%ecx
 	xorl %eax,%eax
 	rep stosb
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(clear_page_c_e)
@@ -43,6 +45,7 @@ ENTRY(clear_page)
 	leaq	64(%rdi),%rdi
 	jnz	.Lloop
 	nop
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 .Lclear_page_end:
@@ -58,7 +61,7 @@ ENDPROC(clear_page)
 
 #include <asm/cpufeature.h>
 
-	.section .altinstr_replacement,"ax"
+	.section .altinstr_replacement,"a"
 1:	.byte 0xeb					/* jmp <disp8> */
 	.byte (clear_page_c - clear_page) - (2f - 1b)	/* offset */
 2:	.byte 0xeb					/* jmp <disp8> */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/cmpxchg16b_emu.S linux-4.0.9-pax/arch/x86/lib/cmpxchg16b_emu.S
--- linux-4.0.9/arch/x86/lib/cmpxchg16b_emu.S	2015-03-18 15:21:50.288349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/cmpxchg16b_emu.S	2015-04-15 12:13:52.942318620 +0200
@@ -8,6 +8,7 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/percpu.h>
+#include <asm/alternative-asm.h>
 
 .text
 
@@ -46,12 +47,14 @@ CFI_STARTPROC
 	CFI_REMEMBER_STATE
 	popfq_cfi
 	mov $1, %al
+	pax_force_retaddr
 	ret
 
 	CFI_RESTORE_STATE
 .Lnot_same:
 	popfq_cfi
 	xor %al,%al
+	pax_force_retaddr
 	ret
 
 CFI_ENDPROC
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/copy_page_64.S linux-4.0.9-pax/arch/x86/lib/copy_page_64.S
--- linux-4.0.9/arch/x86/lib/copy_page_64.S	2015-03-18 15:21:50.288349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/copy_page_64.S	2015-04-15 12:13:52.942318620 +0200
@@ -9,6 +9,7 @@ copy_page_rep:
 	CFI_STARTPROC
 	movl	$4096/8, %ecx
 	rep	movsq
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(copy_page_rep)
@@ -24,8 +25,8 @@ ENTRY(copy_page)
 	CFI_ADJUST_CFA_OFFSET 2*8
 	movq	%rbx,	(%rsp)
 	CFI_REL_OFFSET rbx, 0
-	movq	%r12,	1*8(%rsp)
-	CFI_REL_OFFSET r12, 1*8
+	movq	%r13,	1*8(%rsp)
+	CFI_REL_OFFSET r13, 1*8
 
 	movl	$(4096/64)-5,	%ecx
 	.p2align 4
@@ -38,7 +39,7 @@ ENTRY(copy_page)
 	movq	0x8*4(%rsi), %r9
 	movq	0x8*5(%rsi), %r10
 	movq	0x8*6(%rsi), %r11
-	movq	0x8*7(%rsi), %r12
+	movq	0x8*7(%rsi), %r13
 
 	prefetcht0 5*64(%rsi)
 
@@ -49,7 +50,7 @@ ENTRY(copy_page)
 	movq	%r9,  0x8*4(%rdi)
 	movq	%r10, 0x8*5(%rdi)
 	movq	%r11, 0x8*6(%rdi)
-	movq	%r12, 0x8*7(%rdi)
+	movq	%r13, 0x8*7(%rdi)
 
 	leaq	64 (%rsi), %rsi
 	leaq	64 (%rdi), %rdi
@@ -68,7 +69,7 @@ ENTRY(copy_page)
 	movq	0x8*4(%rsi), %r9
 	movq	0x8*5(%rsi), %r10
 	movq	0x8*6(%rsi), %r11
-	movq	0x8*7(%rsi), %r12
+	movq	0x8*7(%rsi), %r13
 
 	movq	%rax, 0x8*0(%rdi)
 	movq	%rbx, 0x8*1(%rdi)
@@ -77,7 +78,7 @@ ENTRY(copy_page)
 	movq	%r9,  0x8*4(%rdi)
 	movq	%r10, 0x8*5(%rdi)
 	movq	%r11, 0x8*6(%rdi)
-	movq	%r12, 0x8*7(%rdi)
+	movq	%r13, 0x8*7(%rdi)
 
 	leaq	64(%rdi), %rdi
 	leaq	64(%rsi), %rsi
@@ -85,10 +86,11 @@ ENTRY(copy_page)
 
 	movq	(%rsp), %rbx
 	CFI_RESTORE rbx
-	movq	1*8(%rsp), %r12
-	CFI_RESTORE r12
+	movq	1*8(%rsp), %r13
+	CFI_RESTORE r13
 	addq	$2*8, %rsp
 	CFI_ADJUST_CFA_OFFSET -2*8
+	pax_force_retaddr
 	ret
 .Lcopy_page_end:
 	CFI_ENDPROC
@@ -99,7 +101,7 @@ ENDPROC(copy_page)
 
 #include <asm/cpufeature.h>
 
-	.section .altinstr_replacement,"ax"
+	.section .altinstr_replacement,"a"
 1:	.byte 0xeb					/* jmp <disp8> */
 	.byte (copy_page_rep - copy_page) - (2f - 1b)	/* offset */
 2:
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/copy_user_64.S linux-4.0.9-pax/arch/x86/lib/copy_user_64.S
--- linux-4.0.9/arch/x86/lib/copy_user_64.S	2015-03-18 15:21:50.288349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/copy_user_64.S	2015-04-15 12:13:52.942318620 +0200
@@ -18,31 +18,7 @@
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
-
-/*
- * By placing feature2 after feature1 in altinstructions section, we logically
- * implement:
- * If CPU has feature2, jmp to alt2 is used
- * else if CPU has feature1, jmp to alt1 is used
- * else jmp to orig is used.
- */
-	.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
-0:
-	.byte 0xe9	/* 32bit jump */
-	.long \orig-1f	/* by default jump to orig */
-1:
-	.section .altinstr_replacement,"ax"
-2:	.byte 0xe9			/* near jump with 32bit immediate */
-	.long \alt1-1b /* offset */   /* or alternatively to alt1 */
-3:	.byte 0xe9			/* near jump with 32bit immediate */
-	.long \alt2-1b /* offset */   /* or alternatively to alt2 */
-	.previous
-
-	.section .altinstructions,"a"
-	altinstruction_entry 0b,2b,\feature1,5,5
-	altinstruction_entry 0b,3b,\feature2,5,5
-	.previous
-	.endm
+#include <asm/pgtable.h>
 
 	.macro ALIGN_DESTINATION
 #ifdef FIX_ALIGNMENT
@@ -70,52 +46,6 @@
 #endif
 	.endm
 
-/* Standard copy_to_user with segment limit checking */
-ENTRY(_copy_to_user)
-	CFI_STARTPROC
-	GET_THREAD_INFO(%rax)
-	movq %rdi,%rcx
-	addq %rdx,%rcx
-	jc bad_to_user
-	cmpq TI_addr_limit(%rax),%rcx
-	ja bad_to_user
-	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
-		copy_user_generic_unrolled,copy_user_generic_string,	\
-		copy_user_enhanced_fast_string
-	CFI_ENDPROC
-ENDPROC(_copy_to_user)
-
-/* Standard copy_from_user with segment limit checking */
-ENTRY(_copy_from_user)
-	CFI_STARTPROC
-	GET_THREAD_INFO(%rax)
-	movq %rsi,%rcx
-	addq %rdx,%rcx
-	jc bad_from_user
-	cmpq TI_addr_limit(%rax),%rcx
-	ja bad_from_user
-	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
-		copy_user_generic_unrolled,copy_user_generic_string,	\
-		copy_user_enhanced_fast_string
-	CFI_ENDPROC
-ENDPROC(_copy_from_user)
-
-	.section .fixup,"ax"
-	/* must zero dest */
-ENTRY(bad_from_user)
-bad_from_user:
-	CFI_STARTPROC
-	movl %edx,%ecx
-	xorl %eax,%eax
-	rep
-	stosb
-bad_to_user:
-	movl %edx,%eax
-	ret
-	CFI_ENDPROC
-ENDPROC(bad_from_user)
-	.previous
-
 /*
  * copy_user_generic_unrolled - memory copy with exception handling.
  * This version is for CPUs like P4 that don't have efficient micro
@@ -131,6 +61,7 @@ ENDPROC(bad_from_user)
  */
 ENTRY(copy_user_generic_unrolled)
 	CFI_STARTPROC
+	ASM_PAX_OPEN_USERLAND
 	ASM_STAC
 	cmpl $8,%edx
 	jb 20f		/* less then 8 bytes, go to byte copy loop */
@@ -180,6 +111,8 @@ ENTRY(copy_user_generic_unrolled)
 	jnz 21b
 23:	xor %eax,%eax
 	ASM_CLAC
+	ASM_PAX_CLOSE_USERLAND
+	pax_force_retaddr
 	ret
 
 	.section .fixup,"ax"
@@ -235,6 +168,7 @@ ENDPROC(copy_user_generic_unrolled)
  */
 ENTRY(copy_user_generic_string)
 	CFI_STARTPROC
+	ASM_PAX_OPEN_USERLAND
 	ASM_STAC
 	cmpl $8,%edx
 	jb 2f		/* less than 8 bytes, go to byte copy loop */
@@ -249,6 +183,8 @@ ENTRY(copy_user_generic_string)
 	movsb
 	xorl %eax,%eax
 	ASM_CLAC
+	ASM_PAX_CLOSE_USERLAND
+	pax_force_retaddr
 	ret
 
 	.section .fixup,"ax"
@@ -276,12 +212,15 @@ ENDPROC(copy_user_generic_string)
  */
 ENTRY(copy_user_enhanced_fast_string)
 	CFI_STARTPROC
+	ASM_PAX_OPEN_USERLAND
 	ASM_STAC
 	movl %edx,%ecx
 1:	rep
 	movsb
 	xorl %eax,%eax
 	ASM_CLAC
+	ASM_PAX_CLOSE_USERLAND
+	pax_force_retaddr
 	ret
 
 	.section .fixup,"ax"
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/copy_user_nocache_64.S linux-4.0.9-pax/arch/x86/lib/copy_user_nocache_64.S
--- linux-4.0.9/arch/x86/lib/copy_user_nocache_64.S	2015-03-18 15:21:50.288349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/copy_user_nocache_64.S	2015-04-15 12:13:52.942318620 +0200
@@ -8,6 +8,7 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/alternative-asm.h>
 
 #define FIX_ALIGNMENT 1
 
@@ -16,6 +17,7 @@
 #include <asm/thread_info.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
+#include <asm/pgtable.h>
 
 	.macro ALIGN_DESTINATION
 #ifdef FIX_ALIGNMENT
@@ -49,6 +51,16 @@
  */
 ENTRY(__copy_user_nocache)
 	CFI_STARTPROC
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	mov pax_user_shadow_base,%rcx
+	cmp %rcx,%rsi
+	jae 1f
+	add %rcx,%rsi
+1:
+#endif
+
+	ASM_PAX_OPEN_USERLAND
 	ASM_STAC
 	cmpl $8,%edx
 	jb 20f		/* less then 8 bytes, go to byte copy loop */
@@ -98,7 +110,9 @@ ENTRY(__copy_user_nocache)
 	jnz 21b
 23:	xorl %eax,%eax
 	ASM_CLAC
+	ASM_PAX_CLOSE_USERLAND
 	sfence
+	pax_force_retaddr
 	ret
 
 	.section .fixup,"ax"
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/csum-copy_64.S linux-4.0.9-pax/arch/x86/lib/csum-copy_64.S
--- linux-4.0.9/arch/x86/lib/csum-copy_64.S	2015-03-18 15:21:50.288349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/csum-copy_64.S	2015-04-15 12:13:52.942318620 +0200
@@ -9,6 +9,7 @@
 #include <asm/dwarf2.h>
 #include <asm/errno.h>
 #include <asm/asm.h>
+#include <asm/alternative-asm.h>
 
 /*
  * Checksum copy with exception handling.
@@ -56,8 +57,8 @@ ENTRY(csum_partial_copy_generic)
 	CFI_ADJUST_CFA_OFFSET 7*8
 	movq  %rbx, 2*8(%rsp)
 	CFI_REL_OFFSET rbx, 2*8
-	movq  %r12, 3*8(%rsp)
-	CFI_REL_OFFSET r12, 3*8
+	movq  %r15, 3*8(%rsp)
+	CFI_REL_OFFSET r15, 3*8
 	movq  %r14, 4*8(%rsp)
 	CFI_REL_OFFSET r14, 4*8
 	movq  %r13, 5*8(%rsp)
@@ -72,16 +73,16 @@ ENTRY(csum_partial_copy_generic)
 	movl  %edx, %ecx
 
 	xorl  %r9d, %r9d
-	movq  %rcx, %r12
+	movq  %rcx, %r15
 
-	shrq  $6, %r12
+	shrq  $6, %r15
 	jz	.Lhandle_tail       /* < 64 */
 
 	clc
 
 	/* main loop. clear in 64 byte blocks */
 	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
-	/* r11:	temp3, rdx: temp4, r12 loopcnt */
+	/* r11:	temp3, rdx: temp4, r15 loopcnt */
 	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
 	.p2align 4
 .Lloop:
@@ -115,7 +116,7 @@ ENTRY(csum_partial_copy_generic)
 	adcq  %r14, %rax
 	adcq  %r13, %rax
 
-	decl %r12d
+	decl %r15d
 
 	dest
 	movq %rbx, (%rsi)
@@ -210,8 +211,8 @@ ENTRY(csum_partial_copy_generic)
 .Lende:
 	movq 2*8(%rsp), %rbx
 	CFI_RESTORE rbx
-	movq 3*8(%rsp), %r12
-	CFI_RESTORE r12
+	movq 3*8(%rsp), %r15
+	CFI_RESTORE r15
 	movq 4*8(%rsp), %r14
 	CFI_RESTORE r14
 	movq 5*8(%rsp), %r13
@@ -220,6 +221,7 @@ ENTRY(csum_partial_copy_generic)
 	CFI_RESTORE rbp
 	addq $7*8, %rsp
 	CFI_ADJUST_CFA_OFFSET -7*8
+	pax_force_retaddr
 	ret
 	CFI_RESTORE_STATE
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/csum-wrappers_64.c linux-4.0.9-pax/arch/x86/lib/csum-wrappers_64.c
--- linux-4.0.9/arch/x86/lib/csum-wrappers_64.c	2015-03-18 15:21:50.288349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/csum-wrappers_64.c	2015-04-15 12:13:52.942318620 +0200
@@ -52,10 +52,12 @@ csum_partial_copy_from_user(const void _
 			len -= 2;
 		}
 	}
+	pax_open_userland();
 	stac();
-	isum = csum_partial_copy_generic((__force const void *)src,
+	isum = csum_partial_copy_generic((const void __force_kernel *)____m(src),
 				dst, len, isum, errp, NULL);
 	clac();
+	pax_close_userland();
 	if (unlikely(*errp))
 		goto out_err;
 
@@ -109,10 +111,12 @@ csum_partial_copy_to_user(const void *sr
 	}
 
 	*errp = 0;
+	pax_open_userland();
 	stac();
-	ret = csum_partial_copy_generic(src, (void __force *)dst,
+	ret = csum_partial_copy_generic(src, (void __force_kernel *)____m(dst),
 					len, isum, NULL, errp);
 	clac();
+	pax_close_userland();
 	return ret;
 }
 EXPORT_SYMBOL(csum_partial_copy_to_user);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/getuser.S linux-4.0.9-pax/arch/x86/lib/getuser.S
--- linux-4.0.9/arch/x86/lib/getuser.S	2015-03-18 15:21:50.288349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/getuser.S	2015-04-15 12:13:52.942318620 +0200
@@ -33,17 +33,40 @@
 #include <asm/thread_info.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
+#include <asm/segment.h>
+#include <asm/pgtable.h>
+#include <asm/alternative-asm.h>
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF)
+#define __copyuser_seg gs;
+#else
+#define __copyuser_seg
+#endif
 
 	.text
 ENTRY(__get_user_1)
 	CFI_STARTPROC
+
+#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF)
 	GET_THREAD_INFO(%_ASM_DX)
 	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	mov pax_user_shadow_base,%_ASM_DX
+	cmp %_ASM_DX,%_ASM_AX
+	jae 1234f
+	add %_ASM_DX,%_ASM_AX
+1234:
+#endif
+
+#endif
+
 	ASM_STAC
-1:	movzbl (%_ASM_AX),%edx
+1:	__copyuser_seg movzbl (%_ASM_AX),%edx
 	xor %eax,%eax
 	ASM_CLAC
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(__get_user_1)
@@ -51,14 +74,28 @@ ENDPROC(__get_user_1)
 ENTRY(__get_user_2)
 	CFI_STARTPROC
 	add $1,%_ASM_AX
+
+#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF)
 	jc bad_get_user
 	GET_THREAD_INFO(%_ASM_DX)
 	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	mov pax_user_shadow_base,%_ASM_DX
+	cmp %_ASM_DX,%_ASM_AX
+	jae 1234f
+	add %_ASM_DX,%_ASM_AX
+1234:
+#endif
+
+#endif
+
 	ASM_STAC
-2:	movzwl -1(%_ASM_AX),%edx
+2:	__copyuser_seg movzwl -1(%_ASM_AX),%edx
 	xor %eax,%eax
 	ASM_CLAC
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(__get_user_2)
@@ -66,14 +103,28 @@ ENDPROC(__get_user_2)
 ENTRY(__get_user_4)
 	CFI_STARTPROC
 	add $3,%_ASM_AX
+
+#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF)
 	jc bad_get_user
 	GET_THREAD_INFO(%_ASM_DX)
 	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	mov pax_user_shadow_base,%_ASM_DX
+	cmp %_ASM_DX,%_ASM_AX
+	jae 1234f
+	add %_ASM_DX,%_ASM_AX
+1234:
+#endif
+
+#endif
+
 	ASM_STAC
-3:	movl -3(%_ASM_AX),%edx
+3:	__copyuser_seg movl -3(%_ASM_AX),%edx
 	xor %eax,%eax
 	ASM_CLAC
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(__get_user_4)
@@ -86,10 +137,20 @@ ENTRY(__get_user_8)
 	GET_THREAD_INFO(%_ASM_DX)
 	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+	mov pax_user_shadow_base,%_ASM_DX
+	cmp %_ASM_DX,%_ASM_AX
+	jae 1234f
+	add %_ASM_DX,%_ASM_AX
+1234:
+#endif
+
 	ASM_STAC
 4:	movq -7(%_ASM_AX),%rdx
 	xor %eax,%eax
 	ASM_CLAC
+	pax_force_retaddr
 	ret
 #else
 	add $7,%_ASM_AX
@@ -98,10 +159,11 @@ ENTRY(__get_user_8)
 	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user_8
 	ASM_STAC
-4:	movl -7(%_ASM_AX),%edx
-5:	movl -3(%_ASM_AX),%ecx
+4:	__copyuser_seg movl -7(%_ASM_AX),%edx
+5:	__copyuser_seg movl -3(%_ASM_AX),%ecx
 	xor %eax,%eax
 	ASM_CLAC
+	pax_force_retaddr
 	ret
 #endif
 	CFI_ENDPROC
@@ -113,6 +175,7 @@ bad_get_user:
 	xor %edx,%edx
 	mov $(-EFAULT),%_ASM_AX
 	ASM_CLAC
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 END(bad_get_user)
@@ -124,6 +187,7 @@ bad_get_user_8:
 	xor %ecx,%ecx
 	mov $(-EFAULT),%_ASM_AX
 	ASM_CLAC
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 END(bad_get_user_8)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/insn.c linux-4.0.9-pax/arch/x86/lib/insn.c
--- linux-4.0.9/arch/x86/lib/insn.c	2015-05-07 02:10:38.020265499 +0200
+++ linux-4.0.9-pax/arch/x86/lib/insn.c	2015-05-07 02:11:34.160267150 +0200
@@ -20,8 +20,10 @@
 
 #ifdef __KERNEL__
 #include <linux/string.h>
+#include <asm/pgtable_types.h>
 #else
 #include <string.h>
+#define ktla_ktva(addr) addr
 #endif
 #include <asm/inat.h>
 #include <asm/insn.h>
@@ -60,9 +62,9 @@ void insn_init(struct insn *insn, const
 		buf_len = MAX_INSN_SIZE;
 
 	memset(insn, 0, sizeof(*insn));
-	insn->kaddr = kaddr;
-	insn->end_kaddr = kaddr + buf_len;
-	insn->next_byte = kaddr;
+	insn->kaddr = ktla_ktva(kaddr);
+	insn->end_kaddr = insn->kaddr + buf_len;
+	insn->next_byte = insn->kaddr;
 	insn->x86_64 = x86_64 ? 1 : 0;
 	insn->opnd_bytes = 4;
 	if (x86_64)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/iomap_copy_64.S linux-4.0.9-pax/arch/x86/lib/iomap_copy_64.S
--- linux-4.0.9/arch/x86/lib/iomap_copy_64.S	2015-03-18 15:21:50.292349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/iomap_copy_64.S	2015-04-15 12:13:52.942318620 +0200
@@ -17,6 +17,7 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/alternative-asm.h>
 
 /*
  * override generic version in lib/iomap_copy.c
@@ -25,6 +26,7 @@ ENTRY(__iowrite32_copy)
 	CFI_STARTPROC
 	movl %edx,%ecx
 	rep movsd
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(__iowrite32_copy)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/memcpy_64.S linux-4.0.9-pax/arch/x86/lib/memcpy_64.S
--- linux-4.0.9/arch/x86/lib/memcpy_64.S	2015-04-13 11:21:01.894617459 +0200
+++ linux-4.0.9-pax/arch/x86/lib/memcpy_64.S	2015-04-15 12:13:52.942318620 +0200
@@ -24,7 +24,7 @@
  * This gets patched over the unrolled variant (below) via the
  * alternative instructions framework:
  */
-	.section .altinstr_replacement, "ax", @progbits
+	.section .altinstr_replacement, "a", @progbits
 .Lmemcpy_c:
 	movq %rdi, %rax
 	movq %rdx, %rcx
@@ -33,6 +33,7 @@
 	rep movsq
 	movl %edx, %ecx
 	rep movsb
+	pax_force_retaddr
 	ret
 .Lmemcpy_e:
 	.previous
@@ -44,11 +45,12 @@
  * This gets patched over the unrolled variant (below) via the
  * alternative instructions framework:
  */
-	.section .altinstr_replacement, "ax", @progbits
+	.section .altinstr_replacement, "a", @progbits
 .Lmemcpy_c_e:
 	movq %rdi, %rax
 	movq %rdx, %rcx
 	rep movsb
+	pax_force_retaddr
 	ret
 .Lmemcpy_e_e:
 	.previous
@@ -138,6 +140,7 @@ ENTRY(memcpy)
 	movq %r9,	1*8(%rdi)
 	movq %r10,	-2*8(%rdi, %rdx)
 	movq %r11,	-1*8(%rdi, %rdx)
+	pax_force_retaddr
 	retq
 	.p2align 4
 .Lless_16bytes:
@@ -150,6 +153,7 @@ ENTRY(memcpy)
 	movq -1*8(%rsi, %rdx),	%r9
 	movq %r8,	0*8(%rdi)
 	movq %r9,	-1*8(%rdi, %rdx)
+	pax_force_retaddr
 	retq
 	.p2align 4
 .Lless_8bytes:
@@ -163,6 +167,7 @@ ENTRY(memcpy)
 	movl -4(%rsi, %rdx), %r8d
 	movl %ecx, (%rdi)
 	movl %r8d, -4(%rdi, %rdx)
+	pax_force_retaddr
 	retq
 	.p2align 4
 .Lless_3bytes:
@@ -181,6 +186,7 @@ ENTRY(memcpy)
 	movb %cl, (%rdi)
 
 .Lend:
+	pax_force_retaddr
 	retq
 	CFI_ENDPROC
 ENDPROC(memcpy)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/memmove_64.S linux-4.0.9-pax/arch/x86/lib/memmove_64.S
--- linux-4.0.9/arch/x86/lib/memmove_64.S	2015-04-13 11:21:01.894617459 +0200
+++ linux-4.0.9-pax/arch/x86/lib/memmove_64.S	2015-04-15 12:13:52.942318620 +0200
@@ -205,14 +205,16 @@ ENTRY(__memmove)
 	movb (%rsi), %r11b
 	movb %r11b, (%rdi)
 13:
+	pax_force_retaddr
 	retq
 	CFI_ENDPROC
 
-	.section .altinstr_replacement,"ax"
+	.section .altinstr_replacement,"a"
 .Lmemmove_begin_forward_efs:
 	/* Forward moving data. */
 	movq %rdx, %rcx
 	rep movsb
+	pax_force_retaddr
 	retq
 .Lmemmove_end_forward_efs:
 	.previous
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/memset_64.S linux-4.0.9-pax/arch/x86/lib/memset_64.S
--- linux-4.0.9/arch/x86/lib/memset_64.S	2015-04-13 11:21:01.894617459 +0200
+++ linux-4.0.9-pax/arch/x86/lib/memset_64.S	2015-04-15 12:13:52.942318620 +0200
@@ -16,7 +16,7 @@
  * 
  * rax   original destination
  */	
-	.section .altinstr_replacement, "ax", @progbits
+	.section .altinstr_replacement, "a", @progbits
 .Lmemset_c:
 	movq %rdi,%r9
 	movq %rdx,%rcx
@@ -30,6 +30,7 @@
 	movl %edx,%ecx
 	rep stosb
 	movq %r9,%rax
+	pax_force_retaddr
 	ret
 .Lmemset_e:
 	.previous
@@ -45,13 +46,14 @@
  *
  * rax   original destination
  */
-	.section .altinstr_replacement, "ax", @progbits
+	.section .altinstr_replacement, "a", @progbits
 .Lmemset_c_e:
 	movq %rdi,%r9
 	movb %sil,%al
 	movq %rdx,%rcx
 	rep stosb
 	movq %r9,%rax
+	pax_force_retaddr
 	ret
 .Lmemset_e_e:
 	.previous
@@ -120,6 +122,7 @@ ENTRY(__memset)
 
 .Lende:
 	movq	%r10,%rax
+	pax_force_retaddr
 	ret
 
 	CFI_RESTORE_STATE
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/mmx_32.c linux-4.0.9-pax/arch/x86/lib/mmx_32.c
--- linux-4.0.9/arch/x86/lib/mmx_32.c	2015-03-18 15:21:50.292349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/mmx_32.c	2015-04-15 12:13:52.942318620 +0200
@@ -29,6 +29,7 @@ void *_mmx_memcpy(void *to, const void *
 {
 	void *p;
 	int i;
+	unsigned long cr0;
 
 	if (unlikely(in_interrupt()))
 		return __memcpy(to, from, len);
@@ -39,44 +40,72 @@ void *_mmx_memcpy(void *to, const void *
 	kernel_fpu_begin();
 
 	__asm__ __volatile__ (
-		"1: prefetch (%0)\n"		/* This set is 28 bytes */
-		"   prefetch 64(%0)\n"
-		"   prefetch 128(%0)\n"
-		"   prefetch 192(%0)\n"
-		"   prefetch 256(%0)\n"
+		"1: prefetch (%1)\n"		/* This set is 28 bytes */
+		"   prefetch 64(%1)\n"
+		"   prefetch 128(%1)\n"
+		"   prefetch 192(%1)\n"
+		"   prefetch 256(%1)\n"
 		"2:  \n"
 		".section .fixup, \"ax\"\n"
-		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
+		"3:  \n"
+
+#ifdef CONFIG_PAX_KERNEXEC
+		"   movl %%cr0, %0\n"
+		"   movl %0, %%eax\n"
+		"   andl $0xFFFEFFFF, %%eax\n"
+		"   movl %%eax, %%cr0\n"
+#endif
+
+		"   movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
+
+#ifdef CONFIG_PAX_KERNEXEC
+		"   movl %0, %%cr0\n"
+#endif
+
 		"   jmp 2b\n"
 		".previous\n"
 			_ASM_EXTABLE(1b, 3b)
-			: : "r" (from));
+			: "=&r" (cr0) : "r" (from) : "ax");
 
 	for ( ; i > 5; i--) {
 		__asm__ __volatile__ (
-		"1:  prefetch 320(%0)\n"
-		"2:  movq (%0), %%mm0\n"
-		"  movq 8(%0), %%mm1\n"
-		"  movq 16(%0), %%mm2\n"
-		"  movq 24(%0), %%mm3\n"
-		"  movq %%mm0, (%1)\n"
-		"  movq %%mm1, 8(%1)\n"
-		"  movq %%mm2, 16(%1)\n"
-		"  movq %%mm3, 24(%1)\n"
-		"  movq 32(%0), %%mm0\n"
-		"  movq 40(%0), %%mm1\n"
-		"  movq 48(%0), %%mm2\n"
-		"  movq 56(%0), %%mm3\n"
-		"  movq %%mm0, 32(%1)\n"
-		"  movq %%mm1, 40(%1)\n"
-		"  movq %%mm2, 48(%1)\n"
-		"  movq %%mm3, 56(%1)\n"
+		"1:  prefetch 320(%1)\n"
+		"2:  movq (%1), %%mm0\n"
+		"  movq 8(%1), %%mm1\n"
+		"  movq 16(%1), %%mm2\n"
+		"  movq 24(%1), %%mm3\n"
+		"  movq %%mm0, (%2)\n"
+		"  movq %%mm1, 8(%2)\n"
+		"  movq %%mm2, 16(%2)\n"
+		"  movq %%mm3, 24(%2)\n"
+		"  movq 32(%1), %%mm0\n"
+		"  movq 40(%1), %%mm1\n"
+		"  movq 48(%1), %%mm2\n"
+		"  movq 56(%1), %%mm3\n"
+		"  movq %%mm0, 32(%2)\n"
+		"  movq %%mm1, 40(%2)\n"
+		"  movq %%mm2, 48(%2)\n"
+		"  movq %%mm3, 56(%2)\n"
 		".section .fixup, \"ax\"\n"
-		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
+		"3:\n"
+
+#ifdef CONFIG_PAX_KERNEXEC
+		"   movl %%cr0, %0\n"
+		"   movl %0, %%eax\n"
+		"   andl $0xFFFEFFFF, %%eax\n"
+		"   movl %%eax, %%cr0\n"
+#endif
+
+		"   movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
+
+#ifdef CONFIG_PAX_KERNEXEC
+		"   movl %0, %%cr0\n"
+#endif
+
 		"   jmp 2b\n"
 		".previous\n"
 			_ASM_EXTABLE(1b, 3b)
-			: : "r" (from), "r" (to) : "memory");
+			: "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax");
 
 		from += 64;
 		to += 64;
@@ -158,6 +187,7 @@ static void fast_clear_page(void *page)
 static void fast_copy_page(void *to, void *from)
 {
 	int i;
+	unsigned long cr0;
 
 	kernel_fpu_begin();
 
@@ -166,42 +196,70 @@ static void fast_copy_page(void *to, voi
 	 * but that is for later. -AV
 	 */
 	__asm__ __volatile__(
-		"1: prefetch (%0)\n"
-		"   prefetch 64(%0)\n"
-		"   prefetch 128(%0)\n"
-		"   prefetch 192(%0)\n"
-		"   prefetch 256(%0)\n"
+		"1: prefetch (%1)\n"
+		"   prefetch 64(%1)\n"
+		"   prefetch 128(%1)\n"
+		"   prefetch 192(%1)\n"
+		"   prefetch 256(%1)\n"
 		"2:  \n"
 		".section .fixup, \"ax\"\n"
-		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
+		"3:  \n"
+
+#ifdef CONFIG_PAX_KERNEXEC
+		"   movl %%cr0, %0\n"
+		"   movl %0, %%eax\n"
+		"   andl $0xFFFEFFFF, %%eax\n"
+		"   movl %%eax, %%cr0\n"
+#endif
+
+		"   movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
+
+#ifdef CONFIG_PAX_KERNEXEC
+		"   movl %0, %%cr0\n"
+#endif
+
 		"   jmp 2b\n"
 		".previous\n"
-			_ASM_EXTABLE(1b, 3b) : : "r" (from));
+			_ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from) : "ax");
 
 	for (i = 0; i < (4096-320)/64; i++) {
 		__asm__ __volatile__ (
-		"1: prefetch 320(%0)\n"
-		"2: movq (%0), %%mm0\n"
-		"   movntq %%mm0, (%1)\n"
-		"   movq 8(%0), %%mm1\n"
-		"   movntq %%mm1, 8(%1)\n"
-		"   movq 16(%0), %%mm2\n"
-		"   movntq %%mm2, 16(%1)\n"
-		"   movq 24(%0), %%mm3\n"
-		"   movntq %%mm3, 24(%1)\n"
-		"   movq 32(%0), %%mm4\n"
-		"   movntq %%mm4, 32(%1)\n"
-		"   movq 40(%0), %%mm5\n"
-		"   movntq %%mm5, 40(%1)\n"
-		"   movq 48(%0), %%mm6\n"
-		"   movntq %%mm6, 48(%1)\n"
-		"   movq 56(%0), %%mm7\n"
-		"   movntq %%mm7, 56(%1)\n"
+		"1: prefetch 320(%1)\n"
+		"2: movq (%1), %%mm0\n"
+		"   movntq %%mm0, (%2)\n"
+		"   movq 8(%1), %%mm1\n"
+		"   movntq %%mm1, 8(%2)\n"
+		"   movq 16(%1), %%mm2\n"
+		"   movntq %%mm2, 16(%2)\n"
+		"   movq 24(%1), %%mm3\n"
+		"   movntq %%mm3, 24(%2)\n"
+		"   movq 32(%1), %%mm4\n"
+		"   movntq %%mm4, 32(%2)\n"
+		"   movq 40(%1), %%mm5\n"
+		"   movntq %%mm5, 40(%2)\n"
+		"   movq 48(%1), %%mm6\n"
+		"   movntq %%mm6, 48(%2)\n"
+		"   movq 56(%1), %%mm7\n"
+		"   movntq %%mm7, 56(%2)\n"
 		".section .fixup, \"ax\"\n"
-		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
+		"3:\n"
+
+#ifdef CONFIG_PAX_KERNEXEC
+		"   movl %%cr0, %0\n"
+		"   movl %0, %%eax\n"
+		"   andl $0xFFFEFFFF, %%eax\n"
+		"   movl %%eax, %%cr0\n"
+#endif
+
+		"   movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
+
+#ifdef CONFIG_PAX_KERNEXEC
+		"   movl %0, %%cr0\n"
+#endif
+
 		"   jmp 2b\n"
 		".previous\n"
-		_ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory");
+		_ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax");
 
 		from += 64;
 		to += 64;
@@ -280,47 +338,76 @@ static void fast_clear_page(void *page)
 static void fast_copy_page(void *to, void *from)
 {
 	int i;
+	unsigned long cr0;
 
 	kernel_fpu_begin();
 
 	__asm__ __volatile__ (
-		"1: prefetch (%0)\n"
-		"   prefetch 64(%0)\n"
-		"   prefetch 128(%0)\n"
-		"   prefetch 192(%0)\n"
-		"   prefetch 256(%0)\n"
+		"1: prefetch (%1)\n"
+		"   prefetch 64(%1)\n"
+		"   prefetch 128(%1)\n"
+		"   prefetch 192(%1)\n"
+		"   prefetch 256(%1)\n"
 		"2:  \n"
 		".section .fixup, \"ax\"\n"
-		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
+		"3:  \n"
+
+#ifdef CONFIG_PAX_KERNEXEC
+		"   movl %%cr0, %0\n"
+		"   movl %0, %%eax\n"
+		"   andl $0xFFFEFFFF, %%eax\n"
+		"   movl %%eax, %%cr0\n"
+#endif
+
+		"   movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
+
+#ifdef CONFIG_PAX_KERNEXEC
+		"   movl %0, %%cr0\n"
+#endif
+
 		"   jmp 2b\n"
 		".previous\n"
-			_ASM_EXTABLE(1b, 3b) : : "r" (from));
+			_ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from) : "ax");
 
 	for (i = 0; i < 4096/64; i++) {
 		__asm__ __volatile__ (
-		"1: prefetch 320(%0)\n"
-		"2: movq (%0), %%mm0\n"
-		"   movq 8(%0), %%mm1\n"
-		"   movq 16(%0), %%mm2\n"
-		"   movq 24(%0), %%mm3\n"
-		"   movq %%mm0, (%1)\n"
-		"   movq %%mm1, 8(%1)\n"
-		"   movq %%mm2, 16(%1)\n"
-		"   movq %%mm3, 24(%1)\n"
-		"   movq 32(%0), %%mm0\n"
-		"   movq 40(%0), %%mm1\n"
-		"   movq 48(%0), %%mm2\n"
-		"   movq 56(%0), %%mm3\n"
-		"   movq %%mm0, 32(%1)\n"
-		"   movq %%mm1, 40(%1)\n"
-		"   movq %%mm2, 48(%1)\n"
-		"   movq %%mm3, 56(%1)\n"
+		"1: prefetch 320(%1)\n"
+		"2: movq (%1), %%mm0\n"
+		"   movq 8(%1), %%mm1\n"
+		"   movq 16(%1), %%mm2\n"
+		"   movq 24(%1), %%mm3\n"
+		"   movq %%mm0, (%2)\n"
+		"   movq %%mm1, 8(%2)\n"
+		"   movq %%mm2, 16(%2)\n"
+		"   movq %%mm3, 24(%2)\n"
+		"   movq 32(%1), %%mm0\n"
+		"   movq 40(%1), %%mm1\n"
+		"   movq 48(%1), %%mm2\n"
+		"   movq 56(%1), %%mm3\n"
+		"   movq %%mm0, 32(%2)\n"
+		"   movq %%mm1, 40(%2)\n"
+		"   movq %%mm2, 48(%2)\n"
+		"   movq %%mm3, 56(%2)\n"
 		".section .fixup, \"ax\"\n"
-		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
+		"3:\n"
+
+#ifdef CONFIG_PAX_KERNEXEC
+		"   movl %%cr0, %0\n"
+		"   movl %0, %%eax\n"
+		"   andl $0xFFFEFFFF, %%eax\n"
+		"   movl %%eax, %%cr0\n"
+#endif
+
+		"   movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
+
+#ifdef CONFIG_PAX_KERNEXEC
+		"   movl %0, %%cr0\n"
+#endif
+
 		"   jmp 2b\n"
 		".previous\n"
 			_ASM_EXTABLE(1b, 3b)
-			: : "r" (from), "r" (to) : "memory");
+			: "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax");
 
 		from += 64;
 		to += 64;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/msr-reg.S linux-4.0.9-pax/arch/x86/lib/msr-reg.S
--- linux-4.0.9/arch/x86/lib/msr-reg.S	2015-03-18 15:21:50.292349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/msr-reg.S	2015-04-15 12:13:52.942318620 +0200
@@ -3,6 +3,7 @@
 #include <asm/dwarf2.h>
 #include <asm/asm.h>
 #include <asm/msr.h>
+#include <asm/alternative-asm.h>
 
 #ifdef CONFIG_X86_64
 /*
@@ -37,6 +38,7 @@ ENTRY(\op\()_safe_regs)
 	movl    %edi, 28(%r10)
 	popq_cfi %rbp
 	popq_cfi %rbx
+	pax_force_retaddr
 	ret
 3:
 	CFI_RESTORE_STATE
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/putuser.S linux-4.0.9-pax/arch/x86/lib/putuser.S
--- linux-4.0.9/arch/x86/lib/putuser.S	2015-03-18 15:21:50.292349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/putuser.S	2015-04-15 12:13:52.942318620 +0200
@@ -16,7 +16,9 @@
 #include <asm/errno.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
-
+#include <asm/segment.h>
+#include <asm/pgtable.h>
+#include <asm/alternative-asm.h>
 
 /*
  * __put_user_X
@@ -30,57 +32,125 @@
  * as they get called from within inline assembly.
  */
 
-#define ENTER	CFI_STARTPROC ; \
-		GET_THREAD_INFO(%_ASM_BX)
-#define EXIT	ASM_CLAC ;	\
-		ret ;		\
+#define ENTER	CFI_STARTPROC
+#define EXIT	ASM_CLAC ;		\
+		pax_force_retaddr ;	\
+		ret ;			\
 		CFI_ENDPROC
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+#define _DEST %_ASM_CX,%_ASM_BX
+#else
+#define _DEST %_ASM_CX
+#endif
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF)
+#define __copyuser_seg gs;
+#else
+#define __copyuser_seg
+#endif
+
 .text
 ENTRY(__put_user_1)
 	ENTER
+
+#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF)
+	GET_THREAD_INFO(%_ASM_BX)
 	cmp TI_addr_limit(%_ASM_BX),%_ASM_CX
 	jae bad_put_user
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	mov pax_user_shadow_base,%_ASM_BX
+	cmp %_ASM_BX,%_ASM_CX
+	jb 1234f
+	xor %ebx,%ebx
+1234:
+#endif
+
+#endif
+
 	ASM_STAC
-1:	movb %al,(%_ASM_CX)
+1:	__copyuser_seg movb %al,(_DEST)
 	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_1)
 
 ENTRY(__put_user_2)
 	ENTER
+
+#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF)
+	GET_THREAD_INFO(%_ASM_BX)
 	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $1,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	mov pax_user_shadow_base,%_ASM_BX
+	cmp %_ASM_BX,%_ASM_CX
+	jb 1234f
+	xor %ebx,%ebx
+1234:
+#endif
+
+#endif
+
 	ASM_STAC
-2:	movw %ax,(%_ASM_CX)
+2:	__copyuser_seg movw %ax,(_DEST)
 	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_2)
 
 ENTRY(__put_user_4)
 	ENTER
+
+#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF)
+	GET_THREAD_INFO(%_ASM_BX)
 	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $3,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	mov pax_user_shadow_base,%_ASM_BX
+	cmp %_ASM_BX,%_ASM_CX
+	jb 1234f
+	xor %ebx,%ebx
+1234:
+#endif
+
+#endif
+
 	ASM_STAC
-3:	movl %eax,(%_ASM_CX)
+3:	__copyuser_seg movl %eax,(_DEST)
 	xor %eax,%eax
 	EXIT
 ENDPROC(__put_user_4)
 
 ENTRY(__put_user_8)
 	ENTER
+
+#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF)
+	GET_THREAD_INFO(%_ASM_BX)
 	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $7,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	mov pax_user_shadow_base,%_ASM_BX
+	cmp %_ASM_BX,%_ASM_CX
+	jb 1234f
+	xor %ebx,%ebx
+1234:
+#endif
+
+#endif
+
 	ASM_STAC
-4:	mov %_ASM_AX,(%_ASM_CX)
+4:	__copyuser_seg mov %_ASM_AX,(_DEST)
 #ifdef CONFIG_X86_32
-5:	movl %edx,4(%_ASM_CX)
+5:	__copyuser_seg movl %edx,4(_DEST)
 #endif
 	xor %eax,%eax
 	EXIT
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/rwsem.S linux-4.0.9-pax/arch/x86/lib/rwsem.S
--- linux-4.0.9/arch/x86/lib/rwsem.S	2015-03-18 15:21:50.292349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/rwsem.S	2015-04-15 12:13:52.942318620 +0200
@@ -94,6 +94,7 @@ ENTRY(call_rwsem_down_read_failed)
 	__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
 	CFI_RESTORE __ASM_REG(dx)
 	restore_common_regs
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(call_rwsem_down_read_failed)
@@ -104,6 +105,7 @@ ENTRY(call_rwsem_down_write_failed)
 	movq %rax,%rdi
 	call rwsem_down_write_failed
 	restore_common_regs
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(call_rwsem_down_write_failed)
@@ -117,7 +119,8 @@ ENTRY(call_rwsem_wake)
 	movq %rax,%rdi
 	call rwsem_wake
 	restore_common_regs
-1:	ret
+1:	pax_force_retaddr
+	ret
 	CFI_ENDPROC
 ENDPROC(call_rwsem_wake)
 
@@ -131,6 +134,7 @@ ENTRY(call_rwsem_downgrade_wake)
 	__ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
 	CFI_RESTORE __ASM_REG(dx)
 	restore_common_regs
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 ENDPROC(call_rwsem_downgrade_wake)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/thunk_64.S linux-4.0.9-pax/arch/x86/lib/thunk_64.S
--- linux-4.0.9/arch/x86/lib/thunk_64.S	2015-03-18 15:21:50.292349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/thunk_64.S	2015-04-15 12:13:52.942318620 +0200
@@ -9,6 +9,7 @@
 #include <asm/dwarf2.h>
 #include <asm/calling.h>
 #include <asm/asm.h>
+#include <asm/alternative-asm.h>
 
 	/* rdi:	arg1 ... normal C conventions. rax is saved/restored. */
 	.macro THUNK name, func, put_ret_addr_in_rdi=0
@@ -16,11 +17,11 @@
 \name:
 	CFI_STARTPROC
 
-	/* this one pushes 9 elems, the next one would be %rIP */
-	SAVE_ARGS
+	/* this one pushes 15+1 elems, the next one would be %rIP */
+	SAVE_ARGS 8
 
 	.if \put_ret_addr_in_rdi
-	movq_cfi_restore 9*8, rdi
+	movq_cfi_restore RIP, rdi
 	.endif
 
 	call \func
@@ -47,9 +48,10 @@
 
 	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
 	CFI_STARTPROC
-	SAVE_ARGS
+	SAVE_ARGS 8
 restore:
-	RESTORE_ARGS
+	RESTORE_ARGS 1,8
+	pax_force_retaddr
 	ret
 	CFI_ENDPROC
 	_ASM_NOKPROBE(restore)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/usercopy_32.c linux-4.0.9-pax/arch/x86/lib/usercopy_32.c
--- linux-4.0.9/arch/x86/lib/usercopy_32.c	2015-03-18 15:21:50.292349253 +0100
+++ linux-4.0.9-pax/arch/x86/lib/usercopy_32.c	2015-04-15 12:13:52.942318620 +0200
@@ -42,11 +42,13 @@ do {									\
 	int __d0;							\
 	might_fault();							\
 	__asm__ __volatile__(						\
+		__COPYUSER_SET_ES					\
 		ASM_STAC "\n"						\
 		"0:	rep; stosl\n"					\
 		"	movl %2,%0\n"					\
 		"1:	rep; stosb\n"					\
 		"2: " ASM_CLAC "\n"					\
+		__COPYUSER_RESTORE_ES					\
 		".section .fixup,\"ax\"\n"				\
 		"3:	lea 0(%2,%0,4),%0\n"				\
 		"	jmp 2b\n"					\
@@ -98,7 +100,7 @@ EXPORT_SYMBOL(__clear_user);
 
 #ifdef CONFIG_X86_INTEL_USERCOPY
 static unsigned long
-__copy_user_intel(void __user *to, const void *from, unsigned long size)
+__generic_copy_to_user_intel(void __user *to, const void *from, unsigned long size)
 {
 	int d0, d1;
 	__asm__ __volatile__(
@@ -110,36 +112,36 @@ __copy_user_intel(void __user *to, const
 		       "       .align 2,0x90\n"
 		       "3:     movl 0(%4), %%eax\n"
 		       "4:     movl 4(%4), %%edx\n"
-		       "5:     movl %%eax, 0(%3)\n"
-		       "6:     movl %%edx, 4(%3)\n"
+		       "5:     "__copyuser_seg" movl %%eax, 0(%3)\n"
+		       "6:     "__copyuser_seg" movl %%edx, 4(%3)\n"
 		       "7:     movl 8(%4), %%eax\n"
 		       "8:     movl 12(%4),%%edx\n"
-		       "9:     movl %%eax, 8(%3)\n"
-		       "10:    movl %%edx, 12(%3)\n"
+		       "9:     "__copyuser_seg" movl %%eax, 8(%3)\n"
+		       "10:    "__copyuser_seg" movl %%edx, 12(%3)\n"
 		       "11:    movl 16(%4), %%eax\n"
 		       "12:    movl 20(%4), %%edx\n"
-		       "13:    movl %%eax, 16(%3)\n"
-		       "14:    movl %%edx, 20(%3)\n"
+		       "13:    "__copyuser_seg" movl %%eax, 16(%3)\n"
+		       "14:    "__copyuser_seg" movl %%edx, 20(%3)\n"
 		       "15:    movl 24(%4), %%eax\n"
 		       "16:    movl 28(%4), %%edx\n"
-		       "17:    movl %%eax, 24(%3)\n"
-		       "18:    movl %%edx, 28(%3)\n"
+		       "17:    "__copyuser_seg" movl %%eax, 24(%3)\n"
+		       "18:    "__copyuser_seg" movl %%edx, 28(%3)\n"
 		       "19:    movl 32(%4), %%eax\n"
 		       "20:    movl 36(%4), %%edx\n"
-		       "21:    movl %%eax, 32(%3)\n"
-		       "22:    movl %%edx, 36(%3)\n"
+		       "21:    "__copyuser_seg" movl %%eax, 32(%3)\n"
+		       "22:    "__copyuser_seg" movl %%edx, 36(%3)\n"
 		       "23:    movl 40(%4), %%eax\n"
 		       "24:    movl 44(%4), %%edx\n"
-		       "25:    movl %%eax, 40(%3)\n"
-		       "26:    movl %%edx, 44(%3)\n"
+		       "25:    "__copyuser_seg" movl %%eax, 40(%3)\n"
+		       "26:    "__copyuser_seg" movl %%edx, 44(%3)\n"
 		       "27:    movl 48(%4), %%eax\n"
 		       "28:    movl 52(%4), %%edx\n"
-		       "29:    movl %%eax, 48(%3)\n"
-		       "30:    movl %%edx, 52(%3)\n"
+		       "29:    "__copyuser_seg" movl %%eax, 48(%3)\n"
+		       "30:    "__copyuser_seg" movl %%edx, 52(%3)\n"
 		       "31:    movl 56(%4), %%eax\n"
 		       "32:    movl 60(%4), %%edx\n"
-		       "33:    movl %%eax, 56(%3)\n"
-		       "34:    movl %%edx, 60(%3)\n"
+		       "33:    "__copyuser_seg" movl %%eax, 56(%3)\n"
+		       "34:    "__copyuser_seg" movl %%edx, 60(%3)\n"
 		       "       addl $-64, %0\n"
 		       "       addl $64, %4\n"
 		       "       addl $64, %3\n"
@@ -149,10 +151,116 @@ __copy_user_intel(void __user *to, const
 		       "       shrl  $2, %0\n"
 		       "       andl  $3, %%eax\n"
 		       "       cld\n"
+		       __COPYUSER_SET_ES
 		       "99:    rep; movsl\n"
 		       "36:    movl %%eax, %0\n"
 		       "37:    rep; movsb\n"
 		       "100:\n"
+		       __COPYUSER_RESTORE_ES
+		       ".section .fixup,\"ax\"\n"
+		       "101:   lea 0(%%eax,%0,4),%0\n"
+		       "       jmp 100b\n"
+		       ".previous\n"
+		       _ASM_EXTABLE(1b,100b)
+		       _ASM_EXTABLE(2b,100b)
+		       _ASM_EXTABLE(3b,100b)
+		       _ASM_EXTABLE(4b,100b)
+		       _ASM_EXTABLE(5b,100b)
+		       _ASM_EXTABLE(6b,100b)
+		       _ASM_EXTABLE(7b,100b)
+		       _ASM_EXTABLE(8b,100b)
+		       _ASM_EXTABLE(9b,100b)
+		       _ASM_EXTABLE(10b,100b)
+		       _ASM_EXTABLE(11b,100b)
+		       _ASM_EXTABLE(12b,100b)
+		       _ASM_EXTABLE(13b,100b)
+		       _ASM_EXTABLE(14b,100b)
+		       _ASM_EXTABLE(15b,100b)
+		       _ASM_EXTABLE(16b,100b)
+		       _ASM_EXTABLE(17b,100b)
+		       _ASM_EXTABLE(18b,100b)
+		       _ASM_EXTABLE(19b,100b)
+		       _ASM_EXTABLE(20b,100b)
+		       _ASM_EXTABLE(21b,100b)
+		       _ASM_EXTABLE(22b,100b)
+		       _ASM_EXTABLE(23b,100b)
+		       _ASM_EXTABLE(24b,100b)
+		       _ASM_EXTABLE(25b,100b)
+		       _ASM_EXTABLE(26b,100b)
+		       _ASM_EXTABLE(27b,100b)
+		       _ASM_EXTABLE(28b,100b)
+		       _ASM_EXTABLE(29b,100b)
+		       _ASM_EXTABLE(30b,100b)
+		       _ASM_EXTABLE(31b,100b)
+		       _ASM_EXTABLE(32b,100b)
+		       _ASM_EXTABLE(33b,100b)
+		       _ASM_EXTABLE(34b,100b)
+		       _ASM_EXTABLE(35b,100b)
+		       _ASM_EXTABLE(36b,100b)
+		       _ASM_EXTABLE(37b,100b)
+		       _ASM_EXTABLE(99b,101b)
+		       : "=&c"(size), "=&D" (d0), "=&S" (d1)
+		       :  "1"(to), "2"(from), "0"(size)
+		       : "eax", "edx", "memory");
+	return size;
+}
+
+static unsigned long
+__generic_copy_from_user_intel(void *to, const void __user *from, unsigned long size)
+{
+	int d0, d1;
+	__asm__ __volatile__(
+		       "       .align 2,0x90\n"
+		       "1:     "__copyuser_seg" movl 32(%4), %%eax\n"
+		       "       cmpl $67, %0\n"
+		       "       jbe 3f\n"
+		       "2:     "__copyuser_seg" movl 64(%4), %%eax\n"
+		       "       .align 2,0x90\n"
+		       "3:     "__copyuser_seg" movl 0(%4), %%eax\n"
+		       "4:     "__copyuser_seg" movl 4(%4), %%edx\n"
+		       "5:     movl %%eax, 0(%3)\n"
+		       "6:     movl %%edx, 4(%3)\n"
+		       "7:     "__copyuser_seg" movl 8(%4), %%eax\n"
+		       "8:     "__copyuser_seg" movl 12(%4),%%edx\n"
+		       "9:     movl %%eax, 8(%3)\n"
+		       "10:    movl %%edx, 12(%3)\n"
+		       "11:    "__copyuser_seg" movl 16(%4), %%eax\n"
+		       "12:    "__copyuser_seg" movl 20(%4), %%edx\n"
+		       "13:    movl %%eax, 16(%3)\n"
+		       "14:    movl %%edx, 20(%3)\n"
+		       "15:    "__copyuser_seg" movl 24(%4), %%eax\n"
+		       "16:    "__copyuser_seg" movl 28(%4), %%edx\n"
+		       "17:    movl %%eax, 24(%3)\n"
+		       "18:    movl %%edx, 28(%3)\n"
+		       "19:    "__copyuser_seg" movl 32(%4), %%eax\n"
+		       "20:    "__copyuser_seg" movl 36(%4), %%edx\n"
+		       "21:    movl %%eax, 32(%3)\n"
+		       "22:    movl %%edx, 36(%3)\n"
+		       "23:    "__copyuser_seg" movl 40(%4), %%eax\n"
+		       "24:    "__copyuser_seg" movl 44(%4), %%edx\n"
+		       "25:    movl %%eax, 40(%3)\n"
+		       "26:    movl %%edx, 44(%3)\n"
+		       "27:    "__copyuser_seg" movl 48(%4), %%eax\n"
+		       "28:    "__copyuser_seg" movl 52(%4), %%edx\n"
+		       "29:    movl %%eax, 48(%3)\n"
+		       "30:    movl %%edx, 52(%3)\n"
+		       "31:    "__copyuser_seg" movl 56(%4), %%eax\n"
+		       "32:    "__copyuser_seg" movl 60(%4), %%edx\n"
+		       "33:    movl %%eax, 56(%3)\n"
+		       "34:    movl %%edx, 60(%3)\n"
+		       "       addl $-64, %0\n"
+		       "       addl $64, %4\n"
+		       "       addl $64, %3\n"
+		       "       cmpl $63, %0\n"
+		       "       ja  1b\n"
+		       "35:    movl  %0, %%eax\n"
+		       "       shrl  $2, %0\n"
+		       "       andl  $3, %%eax\n"
+		       "       cld\n"
+		       "99:    rep; "__copyuser_seg" movsl\n"
+		       "36:    movl %%eax, %0\n"
+		       "37:    rep; "__copyuser_seg" movsb\n"
+		       "100:\n"
 		       ".section .fixup,\"ax\"\n"
 		       "101:   lea 0(%%eax,%0,4),%0\n"
 		       "       jmp 100b\n"
@@ -207,41 +315,41 @@ __copy_user_zeroing_intel(void *to, cons
 	int d0, d1;
 	__asm__ __volatile__(
 		       "        .align 2,0x90\n"
-		       "0:      movl 32(%4), %%eax\n"
+		       "0:      "__copyuser_seg" movl 32(%4), %%eax\n"
 		       "        cmpl $67, %0\n"
 		       "        jbe 2f\n"
-		       "1:      movl 64(%4), %%eax\n"
+		       "1:      "__copyuser_seg" movl 64(%4), %%eax\n"
 		       "        .align 2,0x90\n"
-		       "2:      movl 0(%4), %%eax\n"
-		       "21:     movl 4(%4), %%edx\n"
+		       "2:      "__copyuser_seg" movl 0(%4), %%eax\n"
+		       "21:     "__copyuser_seg" movl 4(%4), %%edx\n"
 		       "        movl %%eax, 0(%3)\n"
 		       "        movl %%edx, 4(%3)\n"
-		       "3:      movl 8(%4), %%eax\n"
-		       "31:     movl 12(%4),%%edx\n"
+		       "3:      "__copyuser_seg" movl 8(%4), %%eax\n"
+		       "31:     "__copyuser_seg" movl 12(%4),%%edx\n"
 		       "        movl %%eax, 8(%3)\n"
 		       "        movl %%edx, 12(%3)\n"
-		       "4:      movl 16(%4), %%eax\n"
-		       "41:     movl 20(%4), %%edx\n"
+		       "4:      "__copyuser_seg" movl 16(%4), %%eax\n"
+		       "41:     "__copyuser_seg" movl 20(%4), %%edx\n"
 		       "        movl %%eax, 16(%3)\n"
 		       "        movl %%edx, 20(%3)\n"
-		       "10:     movl 24(%4), %%eax\n"
-		       "51:     movl 28(%4), %%edx\n"
+		       "10:     "__copyuser_seg" movl 24(%4), %%eax\n"
+		       "51:     "__copyuser_seg" movl 28(%4), %%edx\n"
 		       "        movl %%eax, 24(%3)\n"
 		       "        movl %%edx, 28(%3)\n"
-		       "11:     movl 32(%4), %%eax\n"
-		       "61:     movl 36(%4), %%edx\n"
+		       "11:     "__copyuser_seg" movl 32(%4), %%eax\n"
+		       "61:     "__copyuser_seg" movl 36(%4), %%edx\n"
 		       "        movl %%eax, 32(%3)\n"
 		       "        movl %%edx, 36(%3)\n"
-		       "12:     movl 40(%4), %%eax\n"
-		       "71:     movl 44(%4), %%edx\n"
+		       "12:     "__copyuser_seg" movl 40(%4), %%eax\n"
+		       "71:     "__copyuser_seg" movl 44(%4), %%edx\n"
 		       "        movl %%eax, 40(%3)\n"
 		       "        movl %%edx, 44(%3)\n"
-		       "13:     movl 48(%4), %%eax\n"
-		       "81:     movl 52(%4), %%edx\n"
+		       "13:     "__copyuser_seg" movl 48(%4), %%eax\n"
+		       "81:     "__copyuser_seg" movl 52(%4), %%edx\n"
 		       "        movl %%eax, 48(%3)\n"
 		       "        movl %%edx, 52(%3)\n"
-		       "14:     movl 56(%4), %%eax\n"
-		       "91:     movl 60(%4), %%edx\n"
+		       "14:     "__copyuser_seg" movl 56(%4), %%eax\n"
+		       "91:     "__copyuser_seg" movl 60(%4), %%edx\n"
 		       "        movl %%eax, 56(%3)\n"
 		       "        movl %%edx, 60(%3)\n"
 		       "        addl $-64, %0\n"
@@ -253,9 +361,9 @@ __copy_user_zeroing_intel(void *to, cons
 		       "        shrl  $2, %0\n"
 		       "        andl $3, %%eax\n"
 		       "        cld\n"
-		       "6:      rep; movsl\n"
+		       "6:      rep; "__copyuser_seg" movsl\n"
 		       "        movl %%eax,%0\n"
-		       "7:      rep; movsb\n"
+		       "7:      rep; "__copyuser_seg" movsb\n"
 		       "8:\n"
 		       ".section .fixup,\"ax\"\n"
 		       "9:      lea 0(%%eax,%0,4),%0\n"
@@ -305,41 +413,41 @@ static unsigned long __copy_user_zeroing
 
 	__asm__ __volatile__(
 	       "        .align 2,0x90\n"
-	       "0:      movl 32(%4), %%eax\n"
+	       "0:      "__copyuser_seg" movl 32(%4), %%eax\n"
 	       "        cmpl $67, %0\n"
 	       "        jbe 2f\n"
-	       "1:      movl 64(%4), %%eax\n"
+	       "1:      "__copyuser_seg" movl 64(%4), %%eax\n"
 	       "        .align 2,0x90\n"
-	       "2:      movl 0(%4), %%eax\n"
-	       "21:     movl 4(%4), %%edx\n"
+	       "2:      "__copyuser_seg" movl 0(%4), %%eax\n"
+	       "21:     "__copyuser_seg" movl 4(%4), %%edx\n"
 	       "        movnti %%eax, 0(%3)\n"
 	       "        movnti %%edx, 4(%3)\n"
-	       "3:      movl 8(%4), %%eax\n"
-	       "31:     movl 12(%4),%%edx\n"
+	       "3:      "__copyuser_seg" movl 8(%4), %%eax\n"
+	       "31:     "__copyuser_seg" movl 12(%4),%%edx\n"
 	       "        movnti %%eax, 8(%3)\n"
 	       "        movnti %%edx, 12(%3)\n"
-	       "4:      movl 16(%4), %%eax\n"
-	       "41:     movl 20(%4), %%edx\n"
+	       "4:      "__copyuser_seg" movl 16(%4), %%eax\n"
+	       "41:     "__copyuser_seg" movl 20(%4), %%edx\n"
 	       "        movnti %%eax, 16(%3)\n"
 	       "        movnti %%edx, 20(%3)\n"
-	       "10:     movl 24(%4), %%eax\n"
-	       "51:     movl 28(%4), %%edx\n"
+	       "10:     "__copyuser_seg" movl 24(%4), %%eax\n"
+	       "51:     "__copyuser_seg" movl 28(%4), %%edx\n"
 	       "        movnti %%eax, 24(%3)\n"
 	       "        movnti %%edx, 28(%3)\n"
-	       "11:     movl 32(%4), %%eax\n"
-	       "61:     movl 36(%4), %%edx\n"
+	       "11:     "__copyuser_seg" movl 32(%4), %%eax\n"
+	       "61:     "__copyuser_seg" movl 36(%4), %%edx\n"
 	       "        movnti %%eax, 32(%3)\n"
 	       "        movnti %%edx, 36(%3)\n"
-	       "12:     movl 40(%4), %%eax\n"
-	       "71:     movl 44(%4), %%edx\n"
+	       "12:     "__copyuser_seg" movl 40(%4), %%eax\n"
+	       "71:     "__copyuser_seg" movl 44(%4), %%edx\n"
 	       "        movnti %%eax, 40(%3)\n"
 	       "        movnti %%edx, 44(%3)\n"
-	       "13:     movl 48(%4), %%eax\n"
-	       "81:     movl 52(%4), %%edx\n"
+	       "13:     "__copyuser_seg" movl 48(%4), %%eax\n"
+	       "81:     "__copyuser_seg" movl 52(%4), %%edx\n"
 	       "        movnti %%eax, 48(%3)\n"
 	       "        movnti %%edx, 52(%3)\n"
-	       "14:     movl 56(%4), %%eax\n"
-	       "91:     movl 60(%4), %%edx\n"
+	       "14:     "__copyuser_seg" movl 56(%4), %%eax\n"
+	       "91:     "__copyuser_seg" movl 60(%4), %%edx\n"
 	       "        movnti %%eax, 56(%3)\n"
 	       "        movnti %%edx, 60(%3)\n"
 	       "        addl $-64, %0\n"
@@ -352,9 +460,9 @@ static unsigned long __copy_user_zeroing
 	       "        shrl  $2, %0\n"
 	       "        andl $3, %%eax\n"
 	       "        cld\n"
-	       "6:      rep; movsl\n"
+	       "6:      rep; "__copyuser_seg" movsl\n"
 	       "        movl %%eax,%0\n"
-	       "7:      rep; movsb\n"
+	       "7:      rep; "__copyuser_seg" movsb\n"
 	       "8:\n"
 	       ".section .fixup,\"ax\"\n"
 	       "9:      lea 0(%%eax,%0,4),%0\n"
@@ -399,41 +507,41 @@ static unsigned long __copy_user_intel_n
 
 	__asm__ __volatile__(
 	       "        .align 2,0x90\n"
-	       "0:      movl 32(%4), %%eax\n"
+	       "0:      "__copyuser_seg" movl 32(%4), %%eax\n"
 	       "        cmpl $67, %0\n"
 	       "        jbe 2f\n"
-	       "1:      movl 64(%4), %%eax\n"
+	       "1:      "__copyuser_seg" movl 64(%4), %%eax\n"
 	       "        .align 2,0x90\n"
-	       "2:      movl 0(%4), %%eax\n"
-	       "21:     movl 4(%4), %%edx\n"
+	       "2:      "__copyuser_seg" movl 0(%4), %%eax\n"
+	       "21:     "__copyuser_seg" movl 4(%4), %%edx\n"
 	       "        movnti %%eax, 0(%3)\n"
 	       "        movnti %%edx, 4(%3)\n"
-	       "3:      movl 8(%4), %%eax\n"
-	       "31:     movl 12(%4),%%edx\n"
+	       "3:      "__copyuser_seg" movl 8(%4), %%eax\n"
+	       "31:     "__copyuser_seg" movl 12(%4),%%edx\n"
 	       "        movnti %%eax, 8(%3)\n"
 	       "        movnti %%edx, 12(%3)\n"
-	       "4:      movl 16(%4), %%eax\n"
-	       "41:     movl 20(%4), %%edx\n"
+	       "4:      "__copyuser_seg" movl 16(%4), %%eax\n"
+	       "41:     "__copyuser_seg" movl 20(%4), %%edx\n"
 	       "        movnti %%eax, 16(%3)\n"
 	       "        movnti %%edx, 20(%3)\n"
-	       "10:     movl 24(%4), %%eax\n"
-	       "51:     movl 28(%4), %%edx\n"
+	       "10:     "__copyuser_seg" movl 24(%4), %%eax\n"
+	       "51:     "__copyuser_seg" movl 28(%4), %%edx\n"
 	       "        movnti %%eax, 24(%3)\n"
 	       "        movnti %%edx, 28(%3)\n"
-	       "11:     movl 32(%4), %%eax\n"
-	       "61:     movl 36(%4), %%edx\n"
+	       "11:     "__copyuser_seg" movl 32(%4), %%eax\n"
+	       "61:     "__copyuser_seg" movl 36(%4), %%edx\n"
 	       "        movnti %%eax, 32(%3)\n"
 	       "        movnti %%edx, 36(%3)\n"
-	       "12:     movl 40(%4), %%eax\n"
-	       "71:     movl 44(%4), %%edx\n"
+	       "12:     "__copyuser_seg" movl 40(%4), %%eax\n"
+	       "71:     "__copyuser_seg" movl 44(%4), %%edx\n"
 	       "        movnti %%eax, 40(%3)\n"
 	       "        movnti %%edx, 44(%3)\n"
-	       "13:     movl 48(%4), %%eax\n"
-	       "81:     movl 52(%4), %%edx\n"
+	       "13:     "__copyuser_seg" movl 48(%4), %%eax\n"
+	       "81:     "__copyuser_seg" movl 52(%4), %%edx\n"
 	       "        movnti %%eax, 48(%3)\n"
 	       "        movnti %%edx, 52(%3)\n"
-	       "14:     movl 56(%4), %%eax\n"
-	       "91:     movl 60(%4), %%edx\n"
+	       "14:     "__copyuser_seg" movl 56(%4), %%eax\n"
+	       "91:     "__copyuser_seg" movl 60(%4), %%edx\n"
 	       "        movnti %%eax, 56(%3)\n"
 	       "        movnti %%edx, 60(%3)\n"
 	       "        addl $-64, %0\n"
@@ -446,9 +554,9 @@ static unsigned long __copy_user_intel_n
 	       "        shrl  $2, %0\n"
 	       "        andl $3, %%eax\n"
 	       "        cld\n"
-	       "6:      rep; movsl\n"
+	       "6:      rep; "__copyuser_seg" movsl\n"
 	       "        movl %%eax,%0\n"
-	       "7:      rep; movsb\n"
+	       "7:      rep; "__copyuser_seg" movsb\n"
 	       "8:\n"
 	       ".section .fixup,\"ax\"\n"
 	       "9:      lea 0(%%eax,%0,4),%0\n"
@@ -488,32 +596,36 @@ static unsigned long __copy_user_intel_n
  */
 unsigned long __copy_user_zeroing_intel(void *to, const void __user *from,
 					unsigned long size);
-unsigned long __copy_user_intel(void __user *to, const void *from,
+unsigned long __generic_copy_to_user_intel(void __user *to, const void *from,
+					unsigned long size);
+unsigned long __generic_copy_from_user_intel(void *to, const void __user *from,
 					unsigned long size);
 unsigned long __copy_user_zeroing_intel_nocache(void *to,
 				const void __user *from, unsigned long size);
 #endif /* CONFIG_X86_INTEL_USERCOPY */
 
 /* Generic arbitrary sized copy.  */
-#define __copy_user(to, from, size)					\
+#define __copy_user(to, from, size, prefix, set, restore)		\
 do {									\
 	int __d0, __d1, __d2;						\
 	__asm__ __volatile__(						\
+		set							\
 		"	cmp  $7,%0\n"					\
 		"	jbe  1f\n"					\
 		"	movl %1,%0\n"					\
 		"	negl %0\n"					\
 		"	andl $7,%0\n"					\
 		"	subl %0,%3\n"					\
-		"4:	rep; movsb\n"					\
+		"4:	rep; "prefix"movsb\n"				\
 		"	movl %3,%0\n"					\
 		"	shrl $2,%0\n"					\
 		"	andl $3,%3\n"					\
 		"	.align 2,0x90\n"				\
-		"0:	rep; movsl\n"					\
+		"0:	rep; "prefix"movsl\n"				\
 		"	movl %3,%0\n"					\
-		"1:	rep; movsb\n"					\
+		"1:	rep; "prefix"movsb\n"				\
 		"2:\n"							\
+		restore							\
 		".section .fixup,\"ax\"\n"				\
 		"5:	addl %3,%0\n"					\
 		"	jmp 2b\n"					\
@@ -538,14 +650,14 @@ do {									\
 		"	negl %0\n"					\
 		"	andl $7,%0\n"					\
 		"	subl %0,%3\n"					\
-		"4:	rep; movsb\n"					\
+		"4:	rep; "__copyuser_seg"movsb\n"			\
 		"	movl %3,%0\n"					\
 		"	shrl $2,%0\n"					\
 		"	andl $3,%3\n"					\
 		"	.align 2,0x90\n"				\
-		"0:	rep; movsl\n"					\
+		"0:	rep; "__copyuser_seg"movsl\n"			\
 		"	movl %3,%0\n"					\
-		"1:	rep; movsb\n"					\
+		"1:	rep; "__copyuser_seg"movsb\n"			\
 		"2:\n"							\
 		".section .fixup,\"ax\"\n"				\
 		"5:	addl %3,%0\n"					\
@@ -572,9 +684,9 @@ unsigned long __copy_to_user_ll(void __u
 {
 	stac();
 	if (movsl_is_ok(to, from, n))
-		__copy_user(to, from, n);
+		__copy_user(to, from, n, "", __COPYUSER_SET_ES, __COPYUSER_RESTORE_ES);
 	else
-		n = __copy_user_intel(to, from, n);
+		n = __generic_copy_to_user_intel(to, from, n);
 	clac();
 	return n;
 }
@@ -598,10 +710,9 @@ unsigned long __copy_from_user_ll_nozero
 {
 	stac();
 	if (movsl_is_ok(to, from, n))
-		__copy_user(to, from, n);
+		__copy_user(to, from, n, __copyuser_seg, "", "");
 	else
-		n = __copy_user_intel((void __user *)to,
-				      (const void *)from, n);
+		n = __generic_copy_from_user_intel(to, from, n);
 	clac();
 	return n;
 }
@@ -632,58 +743,38 @@ unsigned long __copy_from_user_ll_nocach
 	if (n > 64 && cpu_has_xmm2)
 		n = __copy_user_intel_nocache(to, from, n);
 	else
-		__copy_user(to, from, n);
+		__copy_user(to, from, n, __copyuser_seg, "", "");
 #else
-	__copy_user(to, from, n);
+	__copy_user(to, from, n, __copyuser_seg, "", "");
 #endif
 	clac();
 	return n;
 }
 EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
 
-/**
- * copy_to_user: - Copy a block of data into user space.
- * @to:   Destination address, in user space.
- * @from: Source address, in kernel space.
- * @n:    Number of bytes to copy.
- *
- * Context: User context only.  This function may sleep.
- *
- * Copy data from kernel space to user space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
-unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+void __set_fs(mm_segment_t x)
 {
-	if (access_ok(VERIFY_WRITE, to, n))
-		n = __copy_to_user(to, from, n);
-	return n;
+	switch (x.seg) {
+	case 0:
+		loadsegment(gs, 0);
+		break;
+	case TASK_SIZE_MAX:
+		loadsegment(gs, __USER_DS);
+		break;
+	case -1UL:
+		loadsegment(gs, __KERNEL_DS);
+		break;
+	default:
+		BUG();
+	}
 }
-EXPORT_SYMBOL(_copy_to_user);
+EXPORT_SYMBOL(__set_fs);
 
-/**
- * copy_from_user: - Copy a block of data from user space.
- * @to:   Destination address, in kernel space.
- * @from: Source address, in user space.
- * @n:    Number of bytes to copy.
- *
- * Context: User context only.  This function may sleep.
- *
- * Copy data from user space to kernel space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- *
- * If some data could not be copied, this function will pad the copied
- * data to the requested size using zero bytes.
- */
-unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
+void set_fs(mm_segment_t x)
 {
-	if (access_ok(VERIFY_READ, from, n))
-		n = __copy_from_user(to, from, n);
-	else
-		memset(to, 0, n);
-	return n;
+	current_thread_info()->addr_limit = x;
+	__set_fs(x);
 }
-EXPORT_SYMBOL(_copy_from_user);
+EXPORT_SYMBOL(set_fs);
+#endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/lib/usercopy_64.c linux-4.0.9-pax/arch/x86/lib/usercopy_64.c
--- linux-4.0.9/arch/x86/lib/usercopy_64.c	2015-05-07 02:10:38.032265500 +0200
+++ linux-4.0.9-pax/arch/x86/lib/usercopy_64.c	2015-05-12 16:13:03.183826739 +0200
@@ -18,6 +18,7 @@ unsigned long __clear_user(void __user *
 	might_fault();
 	/* no memory constraint because it doesn't change any memory gcc knows
 	   about */
+	pax_open_userland();
 	stac();
 	asm volatile(
 		"	testq  %[size8],%[size8]\n"
@@ -39,9 +40,10 @@ unsigned long __clear_user(void __user *
 		_ASM_EXTABLE(0b,3b)
 		_ASM_EXTABLE(1b,2b)
 		: [size8] "=&c"(size), [dst] "=&D" (__d0)
-		: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
+		: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(____m(addr)),
 		  [zero] "r" (0UL), [eight] "r" (8UL));
 	clac();
+	pax_close_userland();
 	return size;
 }
 EXPORT_SYMBOL(__clear_user);
@@ -54,12 +56,11 @@ unsigned long clear_user(void __user *to
 }
 EXPORT_SYMBOL(clear_user);
 
-unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len)
+unsigned long copy_in_user(void __user *to, const void __user *from, unsigned long len)
 {
-	if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { 
-		return copy_user_generic((__force void *)to, (__force void *)from, len);
-	} 
-	return len;		
+	if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len))
+		return copy_user_generic((void __force_kernel *)____m(to), (void __force_kernel *)____m(from), len);
+	return len;
 }
 EXPORT_SYMBOL(copy_in_user);
 
@@ -69,8 +70,10 @@ EXPORT_SYMBOL(copy_in_user);
  * it is not necessary to optimize tail handling.
  */
 __visible unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len)
+copy_user_handle_tail(char __user *to, char __user *from, unsigned long len)
 {
+	clac();
+	pax_close_userland();
 	for (; len; --len, to++) {
 		char c;
 
@@ -79,10 +82,9 @@ copy_user_handle_tail(char *to, char *fr
 		if (__put_user_nocheck(c, to, sizeof(char)))
 			break;
 	}
-	clac();
 
 	/* If the destination is a kernel buffer, we always clear the end */
-	if (!__addr_ok(to))
+	if (!__addr_ok(to) && (unsigned long)to >= TASK_SIZE_MAX + pax_user_shadow_base)
 		memset(to, 0, len);
 	return len;
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/Makefile linux-4.0.9-pax/arch/x86/Makefile
--- linux-4.0.9/arch/x86/Makefile	2015-04-13 11:21:01.618617474 +0200
+++ linux-4.0.9-pax/arch/x86/Makefile	2015-04-15 12:13:52.942318620 +0200
@@ -65,9 +65,6 @@ ifeq ($(CONFIG_X86_32),y)
         # CPU-specific tuning. Anything which can be shared with UML should go here.
         include $(srctree)/arch/x86/Makefile_32.cpu
         KBUILD_CFLAGS += $(cflags-y)
-
-        # temporary until string.h is fixed
-        KBUILD_CFLAGS += -ffreestanding
 else
         BITS := 64
         UTS_MACHINE := x86_64
@@ -107,6 +104,9 @@ else
         KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args)
 endif
 
+# temporary until string.h is fixed
+KBUILD_CFLAGS += -ffreestanding
+
 # Make sure compiler does not have buggy stack-protector support.
 ifdef CONFIG_CC_STACKPROTECTOR
 	cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
@@ -181,6 +181,7 @@ archheaders:
 	$(Q)$(MAKE) $(build)=arch/x86/syscalls all
 
 archprepare:
+	$(if $(LDFLAGS_BUILD_ID),,$(error $(OLD_LD)))
 ifeq ($(CONFIG_KEXEC_FILE),y)
 	$(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c
 endif
@@ -264,3 +265,9 @@ define archhelp
   echo  '                  FDARGS="..."  arguments for the booted kernel'
   echo  '                  FDINITRD=file initrd for the booted kernel'
 endef
+
+define OLD_LD
+
+*** ${VERSION}.${PATCHLEVEL} PaX kernels no longer build correctly with old versions of binutils.
+*** Please upgrade your binutils to 2.18 or newer
+endef
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/extable.c linux-4.0.9-pax/arch/x86/mm/extable.c
--- linux-4.0.9/arch/x86/mm/extable.c	2015-03-18 15:21:50.292349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/extable.c	2015-04-15 12:13:52.946318620 +0200
@@ -6,12 +6,24 @@
 static inline unsigned long
 ex_insn_addr(const struct exception_table_entry *x)
 {
-	return (unsigned long)&x->insn + x->insn;
+	unsigned long reloc = 0;
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+	reloc = ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR;
+#endif
+
+	return (unsigned long)&x->insn + x->insn + reloc;
 }
 static inline unsigned long
 ex_fixup_addr(const struct exception_table_entry *x)
 {
-	return (unsigned long)&x->fixup + x->fixup;
+	unsigned long reloc = 0;
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+	reloc = ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR;
+#endif
+
+	return (unsigned long)&x->fixup + x->fixup + reloc;
 }
 
 int fixup_exception(struct pt_regs *regs)
@@ -20,7 +32,7 @@ int fixup_exception(struct pt_regs *regs
 	unsigned long new_ip;
 
 #ifdef CONFIG_PNPBIOS
-	if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
+	if (unlikely(!v8086_mode(regs) && SEGMENT_IS_PNP_CODE(regs->cs))) {
 		extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
 		extern u32 pnp_bios_is_utter_crap;
 		pnp_bios_is_utter_crap = 1;
@@ -145,6 +157,13 @@ void sort_extable(struct exception_table
 		i += 4;
 		p->fixup -= i;
 		i += 4;
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC)
+		BUILD_BUG_ON(!IS_ENABLED(CONFIG_BUILDTIME_EXTABLE_SORT));
+		p->insn -= ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR;
+		p->fixup -= ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR;
+#endif
+
 	}
 }
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/fault.c linux-4.0.9-pax/arch/x86/mm/fault.c
--- linux-4.0.9/arch/x86/mm/fault.c	2015-04-13 11:21:01.894617459 +0200
+++ linux-4.0.9-pax/arch/x86/mm/fault.c	2015-06-28 00:12:23.514445227 +0200
@@ -13,12 +13,19 @@
 #include <linux/hugetlb.h>		/* hstate_index_to_shift	*/
 #include <linux/prefetch.h>		/* prefetchw			*/
 #include <linux/context_tracking.h>	/* exception_enter(), ...	*/
+#include <linux/unistd.h>
+#include <linux/compiler.h>
 
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
 #include <asm/kmemcheck.h>		/* kmemcheck_*(), ...		*/
 #include <asm/fixmap.h>			/* VSYSCALL_ADDR		*/
 #include <asm/vsyscall.h>		/* emulate_vsyscall		*/
+#include <asm/tlbflush.h>
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+#include <asm/stacktrace.h>
+#endif
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/exceptions.h>
@@ -59,7 +66,7 @@ static nokprobe_inline int kprobes_fault
 	int ret = 0;
 
 	/* kprobe_running() needs smp_processor_id() */
-	if (kprobes_built_in() && !user_mode_vm(regs)) {
+	if (kprobes_built_in() && !user_mode(regs)) {
 		preempt_disable();
 		if (kprobe_running() && kprobe_fault_handler(regs, 14))
 			ret = 1;
@@ -120,7 +127,10 @@ check_prefetch_opcode(struct pt_regs *re
 		return !instr_lo || (instr_lo>>1) == 1;
 	case 0x00:
 		/* Prefetch instruction is 0x0F0D or 0x0F18 */
-		if (probe_kernel_address(instr, opcode))
+		if (user_mode(regs)) {
+			if (__copy_from_user_inatomic(&opcode, (unsigned char __force_user *)(instr), 1))
+				return 0;
+		} else if (probe_kernel_address(instr, opcode))
 			return 0;
 
 		*prefetch = (instr_lo == 0xF) &&
@@ -154,7 +164,10 @@ is_prefetch(struct pt_regs *regs, unsign
 	while (instr < max_instr) {
 		unsigned char opcode;
 
-		if (probe_kernel_address(instr, opcode))
+		if (user_mode(regs)) {
+			if (__copy_from_user_inatomic(&opcode, (unsigned char __force_user *)(instr), 1))
+				break;
+		} else if (probe_kernel_address(instr, opcode))
 			break;
 
 		instr++;
@@ -185,6 +198,34 @@ force_sig_info_fault(int si_signo, int s
 	force_sig_info(si_signo, &info, tsk);
 }
 
+#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)
+static bool pax_is_fetch_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
+#endif
+
+#ifdef CONFIG_PAX_EMUTRAMP
+static int pax_handle_fetch_fault(struct pt_regs *regs);
+#endif
+
+#ifdef CONFIG_PAX_PAGEEXEC
+static inline pmd_t * pax_get_pmd(struct mm_struct *mm, unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	pgd = pgd_offset(mm, address);
+	if (!pgd_present(*pgd))
+		return NULL;
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		return NULL;
+	pmd = pmd_offset(pud, address);
+	if (!pmd_present(*pmd))
+		return NULL;
+	return pmd;
+}
+#endif
+
 DEFINE_SPINLOCK(pgd_lock);
 LIST_HEAD(pgd_list);
 
@@ -235,10 +276,27 @@ void vmalloc_sync_all(void)
 	for (address = VMALLOC_START & PMD_MASK;
 	     address >= TASK_SIZE && address < FIXADDR_TOP;
 	     address += PMD_SIZE) {
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+		unsigned long cpu;
+#else
 		struct page *page;
+#endif
 
 		spin_lock(&pgd_lock);
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+		for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
+			pgd_t *pgd = get_cpu_pgd(cpu, user);
+			pmd_t *ret;
+
+			ret = vmalloc_sync_one(pgd, address);
+			if (!ret)
+				break;
+			pgd = get_cpu_pgd(cpu, kernel);
+#else
 		list_for_each_entry(page, &pgd_list, lru) {
+			pgd_t *pgd;
 			spinlock_t *pgt_lock;
 			pmd_t *ret;
 
@@ -246,8 +304,14 @@ void vmalloc_sync_all(void)
 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 
 			spin_lock(pgt_lock);
-			ret = vmalloc_sync_one(page_address(page), address);
+			pgd = page_address(page);
+#endif
+
+			ret = vmalloc_sync_one(pgd, address);
+
+#ifndef CONFIG_PAX_PER_CPU_PGD
 			spin_unlock(pgt_lock);
+#endif
 
 			if (!ret)
 				break;
@@ -281,6 +345,12 @@ static noinline int vmalloc_fault(unsign
 	 * an interrupt in the middle of a task switch..
 	 */
 	pgd_paddr = read_cr3();
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+	BUG_ON(__pa(get_cpu_pgd(smp_processor_id(), kernel)) != (pgd_paddr & __PHYSICAL_MASK));
+	vmalloc_sync_one(__va(pgd_paddr + PAGE_SIZE), address);
+#endif
+
 	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
 	if (!pmd_k)
 		return -1;
@@ -377,11 +447,25 @@ static noinline int vmalloc_fault(unsign
 	 * happen within a race in page table update. In the later
 	 * case just flush:
 	 */
-	pgd = pgd_offset(current->active_mm, address);
+
 	pgd_ref = pgd_offset_k(address);
 	if (pgd_none(*pgd_ref))
 		return -1;
 
+#ifdef CONFIG_PAX_PER_CPU_PGD
+	BUG_ON(__pa(get_cpu_pgd(smp_processor_id(), kernel)) != (read_cr3() & __PHYSICAL_MASK));
+	pgd = pgd_offset_cpu(smp_processor_id(), user, address);
+	if (pgd_none(*pgd)) {
+		set_pgd(pgd, *pgd_ref);
+		arch_flush_lazy_mmu_mode();
+	} else {
+		BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+	}
+	pgd = pgd_offset_cpu(smp_processor_id(), kernel, address);
+#else
+	pgd = pgd_offset(current->active_mm, address);
+#endif
+
 	if (pgd_none(*pgd)) {
 		set_pgd(pgd, *pgd_ref);
 		arch_flush_lazy_mmu_mode();
@@ -548,7 +632,7 @@ static int is_errata93(struct pt_regs *r
 static int is_errata100(struct pt_regs *regs, unsigned long address)
 {
 #ifdef CONFIG_X86_64
-	if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32))
+	if ((regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT)) && (address >> 32))
 		return 1;
 #endif
 	return 0;
@@ -575,9 +659,9 @@ static int is_f00f_bug(struct pt_regs *r
 }
 
 static const char nx_warning[] = KERN_CRIT
-"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
+"kernel tried to execute NX-protected page - exploit attempt? (uid: %d, task: %s, pid: %d)\n";
 static const char smep_warning[] = KERN_CRIT
-"unable to execute userspace code (SMEP?) (uid: %d)\n";
+"unable to execute userspace code (SMEP?) (uid: %d, task: %s, pid: %d)\n";
 
 static void
 show_fault_oops(struct pt_regs *regs, unsigned long error_code,
@@ -586,7 +670,7 @@ show_fault_oops(struct pt_regs *regs, un
 	if (!oops_may_print())
 		return;
 
-	if (error_code & PF_INSTR) {
+	if ((__supported_pte_mask & _PAGE_NX) && (error_code & PF_INSTR)) {
 		unsigned int level;
 		pgd_t *pgd;
 		pte_t *pte;
@@ -597,13 +681,19 @@ show_fault_oops(struct pt_regs *regs, un
 		pte = lookup_address_in_pgd(pgd, address, &level);
 
 		if (pte && pte_present(*pte) && !pte_exec(*pte))
-			printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
+			printk(nx_warning, from_kuid_munged(&init_user_ns, current_uid()), current->comm, task_pid_nr(current));
 		if (pte && pte_present(*pte) && pte_exec(*pte) &&
 				(pgd_flags(*pgd) & _PAGE_USER) &&
 				(__read_cr4() & X86_CR4_SMEP))
-			printk(smep_warning, from_kuid(&init_user_ns, current_uid()));
+			printk(smep_warning, from_kuid(&init_user_ns, current_uid()), current->comm, task_pid_nr(current));
 	}
 
+#ifdef CONFIG_PAX_KERNEXEC
+	if (init_mm.start_code <= address && address < init_mm.end_code)
+		printk(KERN_EMERG "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", current->comm, task_pid_nr(current),
+				from_kuid_munged(&init_user_ns, current_uid()), from_kuid_munged(&init_user_ns, current_euid()));
+#endif
+
 	printk(KERN_ALERT "BUG: unable to handle kernel ");
 	if (address < PAGE_SIZE)
 		printk(KERN_CONT "NULL pointer dereference");
@@ -782,6 +872,22 @@ __bad_area_nosemaphore(struct pt_regs *r
 				return;
 		}
 #endif
+
+#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)
+		if (pax_is_fetch_fault(regs, error_code, address)) {
+
+#ifdef CONFIG_PAX_EMUTRAMP
+			switch (pax_handle_fetch_fault(regs)) {
+			case 2:
+				return;
+			}
+#endif
+
+			pax_report_fault(regs, (void *)regs->ip, (void *)regs->sp);
+			do_group_exit(SIGKILL);
+		}
+#endif
+
 		/* Kernel addresses are always protection faults: */
 		if (address >= TASK_SIZE)
 			error_code |= PF_PROT;
@@ -864,7 +970,7 @@ do_sigbus(struct pt_regs *regs, unsigned
 	if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
 		printk(KERN_ERR
 	"MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
-			tsk->comm, tsk->pid, address);
+			tsk->comm, task_pid_nr(tsk), address);
 		code = BUS_MCEERR_AR;
 	}
 #endif
@@ -916,6 +1022,99 @@ static int spurious_fault_check(unsigned
 	return 1;
 }
 
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC)
+static int pax_handle_pageexec_fault(struct pt_regs *regs, struct mm_struct *mm, unsigned long address, unsigned long error_code)
+{
+	pte_t *pte;
+	pmd_t *pmd;
+	spinlock_t *ptl;
+	unsigned char pte_mask;
+
+	if ((__supported_pte_mask & _PAGE_NX) || (error_code & (PF_PROT|PF_USER)) != (PF_PROT|PF_USER) || v8086_mode(regs) ||
+	    !(mm->pax_flags & MF_PAX_PAGEEXEC))
+		return 0;
+
+	/* PaX: it's our fault, let's handle it if we can */
+
+	/* PaX: take a look at read faults before acquiring any locks */
+	if (unlikely(!(error_code & PF_WRITE) && (regs->ip == address))) {
+		/* instruction fetch attempt from a protected page in user mode */
+		up_read(&mm->mmap_sem);
+
+#ifdef CONFIG_PAX_EMUTRAMP
+		switch (pax_handle_fetch_fault(regs)) {
+		case 2:
+			return 1;
+		}
+#endif
+
+		pax_report_fault(regs, (void *)regs->ip, (void *)regs->sp);
+		do_group_exit(SIGKILL);
+	}
+
+	pmd = pax_get_pmd(mm, address);
+	if (unlikely(!pmd))
+		return 0;
+
+	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
+	if (unlikely(!(pte_val(*pte) & _PAGE_PRESENT) || pte_user(*pte))) {
+		pte_unmap_unlock(pte, ptl);
+		return 0;
+	}
+
+	if (unlikely((error_code & PF_WRITE) && !pte_write(*pte))) {
+		/* write attempt to a protected page in user mode */
+		pte_unmap_unlock(pte, ptl);
+		return 0;
+	}
+
+#ifdef CONFIG_SMP
+	if (likely(address > get_limit(regs->cs) && cpumask_test_cpu(smp_processor_id(), &mm->context.cpu_user_cs_mask)))
+#else
+	if (likely(address > get_limit(regs->cs)))
+#endif
+	{
+		set_pte(pte, pte_mkread(*pte));
+		__flush_tlb_one(address);
+		pte_unmap_unlock(pte, ptl);
+		up_read(&mm->mmap_sem);
+		return 1;
+	}
+
+	pte_mask = _PAGE_ACCESSED | _PAGE_USER | ((error_code & PF_WRITE) << (_PAGE_BIT_DIRTY-1));
+
+	/*
+	 * PaX: fill DTLB with user rights and retry
+	 */
+	__asm__ __volatile__ (
+		"orb %2,(%1)\n"
+#if defined(CONFIG_M586) || defined(CONFIG_M586TSC)
+/*
+ * PaX: let this uncommented 'invlpg' remind us on the behaviour of Intel's
+ * (and AMD's) TLBs. namely, they do not cache PTEs that would raise *any*
+ * page fault when examined during a TLB load attempt. this is true not only
+ * for PTEs holding a non-present entry but also present entries that will
+ * raise a page fault (such as those set up by PaX, or the copy-on-write
+ * mechanism). in effect it means that we do *not* need to flush the TLBs
+ * for our target pages since their PTEs are simply not in the TLBs at all.
+
+ * the best thing in omitting it is that we gain around 15-20% speed in the
+ * fast path of the page fault handler and can get rid of tracing since we
+ * can no longer flush unintended entries.
+ */
+		"invlpg (%0)\n"
+#endif
+		__copyuser_seg"testb $0,(%0)\n"
+		"xorb %3,(%1)\n"
+		:
+		: "r" (address), "r" (pte), "q" (pte_mask), "i" (_PAGE_USER)
+		: "memory", "cc");
+	pte_unmap_unlock(pte, ptl);
+	up_read(&mm->mmap_sem);
+	return 1;
+}
+#endif
+
 /*
  * Handle a spurious fault caused by a stale TLB entry.
  *
@@ -1001,6 +1200,9 @@ int show_unhandled_signals = 1;
 static inline int
 access_error(unsigned long error_code, struct vm_area_struct *vma)
 {
+	if ((__supported_pte_mask & _PAGE_NX) && (error_code & PF_INSTR) && !(vma->vm_flags & VM_EXEC))
+		return 1;
+
 	if (error_code & PF_WRITE) {
 		/* write, present and write, not present: */
 		if (unlikely(!(vma->vm_flags & VM_WRITE)))
@@ -1035,7 +1237,7 @@ static inline bool smap_violation(int er
 	if (error_code & PF_USER)
 		return false;
 
-	if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC))
+	if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
 		return false;
 
 	return true;
@@ -1063,6 +1265,22 @@ __do_page_fault(struct pt_regs *regs, un
 	tsk = current;
 	mm = tsk->mm;
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+	if (!user_mode(regs) && address < 2 * pax_user_shadow_base) {
+		if (!search_exception_tables(regs->ip)) {
+			printk(KERN_EMERG "PAX: please report this to pageexec@freemail.hu\n");
+			bad_area_nosemaphore(regs, error_code, address);
+			return;
+		}
+		if (address < pax_user_shadow_base) {
+			printk(KERN_EMERG "PAX: please report this to pageexec@freemail.hu\n");
+			printk(KERN_EMERG "PAX: faulting IP: %pS\n", (void *)regs->ip);
+			show_trace_log_lvl(NULL, NULL, (void *)regs->sp, regs->bp, KERN_EMERG);
+		} else
+			address -= pax_user_shadow_base;
+	}
+#endif
+
 	/*
 	 * Detect and handle instructions that would cause a page fault for
 	 * both a tracked kernel page and a userspace page.
@@ -1140,7 +1358,7 @@ __do_page_fault(struct pt_regs *regs, un
 	 * User-mode registers count as a user access even for any
 	 * potential system fault or CPU buglet:
 	 */
-	if (user_mode_vm(regs)) {
+	if (user_mode(regs)) {
 		local_irq_enable();
 		error_code |= PF_USER;
 		flags |= FAULT_FLAG_USER;
@@ -1187,6 +1405,11 @@ retry:
 		might_sleep();
 	}
 
+#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC)
+	if (pax_handle_pageexec_fault(regs, mm, address, error_code))
+		return;
+#endif
+
 	vma = find_vma(mm, address);
 	if (unlikely(!vma)) {
 		bad_area(regs, error_code, address);
@@ -1198,18 +1421,24 @@ retry:
 		bad_area(regs, error_code, address);
 		return;
 	}
-	if (error_code & PF_USER) {
-		/*
-		 * Accessing the stack below %sp is always a bug.
-		 * The large cushion allows instructions like enter
-		 * and pusha to work. ("enter $65535, $31" pushes
-		 * 32 pointers and then decrements %sp by 65535.)
-		 */
-		if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
-			bad_area(regs, error_code, address);
-			return;
-		}
+	/*
+	 * Accessing the stack below %sp is always a bug.
+	 * The large cushion allows instructions like enter
+	 * and pusha to work. ("enter $65535, $31" pushes
+	 * 32 pointers and then decrements %sp by 65535.)
+	 */
+	if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < task_pt_regs(tsk)->sp)) {
+		bad_area(regs, error_code, address);
+		return;
 	}
+
+#ifdef CONFIG_PAX_SEGMEXEC
+	if (unlikely((mm->pax_flags & MF_PAX_SEGMEXEC) && vma->vm_end - SEGMEXEC_TASK_SIZE - 1 < address - SEGMEXEC_TASK_SIZE - 1)) {
+		bad_area(regs, error_code, address);
+		return;
+	}
+#endif
+
 	if (unlikely(expand_stack(vma, address))) {
 		bad_area(regs, error_code, address);
 		return;
@@ -1329,3 +1558,292 @@ trace_do_page_fault(struct pt_regs *regs
 }
 NOKPROBE_SYMBOL(trace_do_page_fault);
 #endif /* CONFIG_TRACING */
+
+#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)
+static bool pax_is_fetch_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long ip = regs->ip;
+
+	if (v8086_mode(regs))
+		ip = ((regs->cs & 0xffff) << 4) + (ip & 0xffff);
+
+#ifdef CONFIG_PAX_PAGEEXEC
+	if (mm->pax_flags & MF_PAX_PAGEEXEC) {
+		if ((__supported_pte_mask & _PAGE_NX) && (error_code & PF_INSTR))
+			return true;
+		if (!(error_code & (PF_PROT | PF_WRITE)) && ip == address)
+			return true;
+		return false;
+	}
+#endif
+
+#ifdef CONFIG_PAX_SEGMEXEC
+	if (mm->pax_flags & MF_PAX_SEGMEXEC) {
+	if (!(error_code & (PF_PROT | PF_WRITE)) && (ip + SEGMEXEC_TASK_SIZE == address))
+			return true;
+		return false;
+	}
+#endif
+
+	return false;
+}
+#endif
+
+#ifdef CONFIG_PAX_EMUTRAMP
+static int pax_handle_fetch_fault_32(struct pt_regs *regs)
+{
+	int err;
+
+	do { /* PaX: libffi trampoline emulation */
+		unsigned char mov, jmp;
+		unsigned int addr1, addr2;
+
+#ifdef CONFIG_X86_64
+		if ((regs->ip + 9) >> 32)
+			break;
+#endif
+
+		err = get_user(mov, (unsigned char __user *)regs->ip);
+		err |= get_user(addr1, (unsigned int __user *)(regs->ip + 1));
+		err |= get_user(jmp, (unsigned char __user *)(regs->ip + 5));
+		err |= get_user(addr2, (unsigned int __user *)(regs->ip + 6));
+
+		if (err)
+			break;
+
+		if (mov == 0xB8 && jmp == 0xE9) {
+			regs->ax = addr1;
+			regs->ip = (unsigned int)(regs->ip + addr2 + 10);
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: gcc trampoline emulation #1 */
+		unsigned char mov1, mov2;
+		unsigned short jmp;
+		unsigned int addr1, addr2;
+
+#ifdef CONFIG_X86_64
+		if ((regs->ip + 11) >> 32)
+			break;
+#endif
+
+		err = get_user(mov1, (unsigned char __user *)regs->ip);
+		err |= get_user(addr1, (unsigned int __user *)(regs->ip + 1));
+		err |= get_user(mov2, (unsigned char __user *)(regs->ip + 5));
+		err |= get_user(addr2, (unsigned int __user *)(regs->ip + 6));
+		err |= get_user(jmp, (unsigned short __user *)(regs->ip + 10));
+
+		if (err)
+			break;
+
+		if (mov1 == 0xB9 && mov2 == 0xB8 && jmp == 0xE0FF) {
+			regs->cx = addr1;
+			regs->ax = addr2;
+			regs->ip = addr2;
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: gcc trampoline emulation #2 */
+		unsigned char mov, jmp;
+		unsigned int addr1, addr2;
+
+#ifdef CONFIG_X86_64
+		if ((regs->ip + 9) >> 32)
+			break;
+#endif
+
+		err = get_user(mov, (unsigned char __user *)regs->ip);
+		err |= get_user(addr1, (unsigned int __user *)(regs->ip + 1));
+		err |= get_user(jmp, (unsigned char __user *)(regs->ip + 5));
+		err |= get_user(addr2, (unsigned int __user *)(regs->ip + 6));
+
+		if (err)
+			break;
+
+		if (mov == 0xB9 && jmp == 0xE9) {
+			regs->cx = addr1;
+			regs->ip = (unsigned int)(regs->ip + addr2 + 10);
+			return 2;
+		}
+	} while (0);
+
+	return 1; /* PaX in action */
+}
+
+#ifdef CONFIG_X86_64
+static int pax_handle_fetch_fault_64(struct pt_regs *regs)
+{
+	int err;
+
+	do { /* PaX: libffi trampoline emulation */
+		unsigned short mov1, mov2, jmp1;
+		unsigned char stcclc, jmp2;
+		unsigned long addr1, addr2;
+
+		err = get_user(mov1, (unsigned short __user *)regs->ip);
+		err |= get_user(addr1, (unsigned long __user *)(regs->ip + 2));
+		err |= get_user(mov2, (unsigned short __user *)(regs->ip + 10));
+		err |= get_user(addr2, (unsigned long __user *)(regs->ip + 12));
+		err |= get_user(stcclc, (unsigned char __user *)(regs->ip + 20));
+		err |= get_user(jmp1, (unsigned short __user *)(regs->ip + 21));
+		err |= get_user(jmp2, (unsigned char __user *)(regs->ip + 23));
+
+		if (err)
+			break;
+
+		if (mov1 == 0xBB49 && mov2 == 0xBA49 && (stcclc == 0xF8 || stcclc == 0xF9) && jmp1 == 0xFF49 && jmp2 == 0xE3) {
+			regs->r11 = addr1;
+			regs->r10 = addr2;
+			if (stcclc == 0xF8)
+				regs->flags &= ~X86_EFLAGS_CF;
+			else
+				regs->flags |= X86_EFLAGS_CF;
+			regs->ip = addr1;
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: gcc trampoline emulation #1 */
+		unsigned short mov1, mov2, jmp1;
+		unsigned char jmp2;
+		unsigned int addr1;
+		unsigned long addr2;
+
+		err = get_user(mov1, (unsigned short __user *)regs->ip);
+		err |= get_user(addr1, (unsigned int __user *)(regs->ip + 2));
+		err |= get_user(mov2, (unsigned short __user *)(regs->ip + 6));
+		err |= get_user(addr2, (unsigned long __user *)(regs->ip + 8));
+		err |= get_user(jmp1, (unsigned short __user *)(regs->ip + 16));
+		err |= get_user(jmp2, (unsigned char __user *)(regs->ip + 18));
+
+		if (err)
+			break;
+
+		if (mov1 == 0xBB41 && mov2 == 0xBA49 && jmp1 == 0xFF49 && jmp2 == 0xE3) {
+			regs->r11 = addr1;
+			regs->r10 = addr2;
+			regs->ip = addr1;
+			return 2;
+		}
+	} while (0);
+
+	do { /* PaX: gcc trampoline emulation #2 */
+		unsigned short mov1, mov2, jmp1;
+		unsigned char jmp2;
+		unsigned long addr1, addr2;
+
+		err = get_user(mov1, (unsigned short __user *)regs->ip);
+		err |= get_user(addr1, (unsigned long __user *)(regs->ip + 2));
+		err |= get_user(mov2, (unsigned short __user *)(regs->ip + 10));
+		err |= get_user(addr2, (unsigned long __user *)(regs->ip + 12));
+		err |= get_user(jmp1, (unsigned short __user *)(regs->ip + 20));
+		err |= get_user(jmp2, (unsigned char __user *)(regs->ip + 22));
+
+		if (err)
+			break;
+
+		if (mov1 == 0xBB49 && mov2 == 0xBA49 && jmp1 == 0xFF49 && jmp2 == 0xE3) {
+			regs->r11 = addr1;
+			regs->r10 = addr2;
+			regs->ip = addr1;
+			return 2;
+		}
+	} while (0);
+
+	return 1; /* PaX in action */
+}
+#endif
+
+/*
+ * PaX: decide what to do with offenders (regs->ip = fault address)
+ *
+ * returns 1 when task should be killed
+ *         2 when gcc trampoline was detected
+ */
+static int pax_handle_fetch_fault(struct pt_regs *regs)
+{
+	if (v8086_mode(regs))
+		return 1;
+
+	if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP))
+		return 1;
+
+#ifdef CONFIG_X86_32
+	return pax_handle_fetch_fault_32(regs);
+#else
+	if (regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT))
+		return pax_handle_fetch_fault_32(regs);
+	else
+		return pax_handle_fetch_fault_64(regs);
+#endif
+}
+#endif
+
+#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)
+void pax_report_insns(struct pt_regs *regs, void *pc, void *sp)
+{
+	long i;
+
+	printk(KERN_ERR "PAX: bytes at PC: ");
+	for (i = 0; i < 20; i++) {
+		unsigned char c;
+		if (get_user(c, (unsigned char __force_user *)pc+i))
+			printk(KERN_CONT "?? ");
+		else
+			printk(KERN_CONT "%02x ", c);
+	}
+	printk("\n");
+
+	printk(KERN_ERR "PAX: bytes at SP-%lu: ", (unsigned long)sizeof(long));
+	for (i = -1; i < 80 / (long)sizeof(long); i++) {
+		unsigned long c;
+		if (get_user(c, (unsigned long __force_user *)sp+i)) {
+#ifdef CONFIG_X86_32
+			printk(KERN_CONT "???????? ");
+#else
+			if ((regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT)))
+				printk(KERN_CONT "???????? ???????? ");
+			else
+				printk(KERN_CONT "???????????????? ");
+#endif
+		} else {
+#ifdef CONFIG_X86_64
+			if ((regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT))) {
+				printk(KERN_CONT "%08x ", (unsigned int)c);
+				printk(KERN_CONT "%08x ", (unsigned int)(c >> 32));
+			} else
+#endif
+				printk(KERN_CONT "%0*lx ", 2 * (int)sizeof(long), c);
+		}
+	}
+	printk("\n");
+}
+#endif
+
+/**
+ * probe_kernel_write(): safely attempt to write to a location
+ * @dst: address to write to
+ * @src: pointer to the data that shall be written
+ * @size: size of the data chunk
+ *
+ * Safely write to address @dst from the buffer at @src.  If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+long notrace probe_kernel_write(void *dst, const void *src, size_t size)
+{
+	long ret;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	pagefault_disable();
+	pax_open_kernel();
+	ret = __copy_to_user_inatomic((void __force_user *)dst, src, size);
+	pax_close_kernel();
+	pagefault_enable();
+	set_fs(old_fs);
+
+	return ret ? -EFAULT : 0;
+}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/gup.c linux-4.0.9-pax/arch/x86/mm/gup.c
--- linux-4.0.9/arch/x86/mm/gup.c	2015-04-13 11:21:01.894617459 +0200
+++ linux-4.0.9-pax/arch/x86/mm/gup.c	2015-04-15 12:13:52.946318620 +0200
@@ -268,7 +268,7 @@ int __get_user_pages_fast(unsigned long
 	addr = start;
 	len = (unsigned long) nr_pages << PAGE_SHIFT;
 	end = start + len;
-	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+	if (unlikely(!access_ok_noprefault(write ? VERIFY_WRITE : VERIFY_READ,
 					(void __user *)start, len)))
 		return 0;
 
@@ -344,6 +344,10 @@ int get_user_pages_fast(unsigned long st
 		goto slow_irqon;
 #endif
 
+	if (unlikely(!access_ok_noprefault(write ? VERIFY_WRITE : VERIFY_READ,
+					(void __user *)start, len)))
+		return 0;
+
 	/*
 	 * XXX: batch / limit 'nr', to avoid large irq off latency
 	 * needs some instrumenting to determine the common sizes used by
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/highmem_32.c linux-4.0.9-pax/arch/x86/mm/highmem_32.c
--- linux-4.0.9/arch/x86/mm/highmem_32.c	2015-03-18 15:21:50.296349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/highmem_32.c	2015-04-15 12:13:52.946318620 +0200
@@ -45,7 +45,11 @@ void *kmap_atomic_prot(struct page *page
 	idx = type + KM_TYPE_NR*smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	BUG_ON(!pte_none(*(kmap_pte-idx)));
+
+	pax_open_kernel();
 	set_pte(kmap_pte-idx, mk_pte(page, prot));
+	pax_close_kernel();
+
 	arch_flush_lazy_mmu_mode();
 
 	return (void *)vaddr;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/hugetlbpage.c linux-4.0.9-pax/arch/x86/mm/hugetlbpage.c
--- linux-4.0.9/arch/x86/mm/hugetlbpage.c	2015-04-13 11:21:01.894617459 +0200
+++ linux-4.0.9-pax/arch/x86/mm/hugetlbpage.c	2015-04-15 12:13:52.946318620 +0200
@@ -114,6 +114,12 @@ static unsigned long hugetlb_get_unmappe
 		VM_BUG_ON(addr != -ENOMEM);
 		info.flags = 0;
 		info.low_limit = TASK_UNMAPPED_BASE;
+
+#ifdef CONFIG_PAX_RANDMMAP
+		if (current->mm->pax_flags & MF_PAX_RANDMMAP)
+			info.low_limit += current->mm->delta_mmap;
+#endif
+
 		info.high_limit = TASK_SIZE;
 		addr = vm_unmapped_area(&info);
 	}
@@ -128,10 +134,19 @@ hugetlb_get_unmapped_area(struct file *f
 	struct hstate *h = hstate_file(file);
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
+	unsigned long pax_task_size = TASK_SIZE;
 
 	if (len & ~huge_page_mask(h))
 		return -EINVAL;
-	if (len > TASK_SIZE)
+
+#ifdef CONFIG_PAX_SEGMEXEC
+	if (mm->pax_flags & MF_PAX_SEGMEXEC)
+		pax_task_size = SEGMEXEC_TASK_SIZE;
+#endif
+
+	pax_task_size -= PAGE_SIZE;
+
+	if (len > pax_task_size)
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED) {
@@ -140,11 +155,14 @@ hugetlb_get_unmapped_area(struct file *f
 		return addr;
 	}
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (!(mm->pax_flags & MF_PAX_RANDMMAP))
+#endif
+
 	if (addr) {
 		addr = ALIGN(addr, huge_page_size(h));
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vma->vm_start))
+		if (pax_task_size - len >= addr && check_heap_stack_gap(vma, addr, len))
 			return addr;
 	}
 	if (mm->get_unmapped_area == arch_get_unmapped_area)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/init_32.c linux-4.0.9-pax/arch/x86/mm/init_32.c
--- linux-4.0.9/arch/x86/mm/init_32.c	2015-03-18 15:21:50.296349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/init_32.c	2015-04-15 12:13:52.946318620 +0200
@@ -62,33 +62,6 @@ static noinline int do_test_wp_bit(void)
 bool __read_mostly __vmalloc_start_set = false;
 
 /*
- * Creates a middle page table and puts a pointer to it in the
- * given global directory entry. This only returns the gd entry
- * in non-PAE compilation mode, since the middle layer is folded.
- */
-static pmd_t * __init one_md_table_init(pgd_t *pgd)
-{
-	pud_t *pud;
-	pmd_t *pmd_table;
-
-#ifdef CONFIG_X86_PAE
-	if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
-		pmd_table = (pmd_t *)alloc_low_page();
-		paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
-		set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
-		pud = pud_offset(pgd, 0);
-		BUG_ON(pmd_table != pmd_offset(pud, 0));
-
-		return pmd_table;
-	}
-#endif
-	pud = pud_offset(pgd, 0);
-	pmd_table = pmd_offset(pud, 0);
-
-	return pmd_table;
-}
-
-/*
  * Create a page table and place a pointer to it in a middle page
  * directory entry:
  */
@@ -98,13 +71,28 @@ static pte_t * __init one_page_table_ini
 		pte_t *page_table = (pte_t *)alloc_low_page();
 
 		paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
+#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)
+		set_pmd(pmd, __pmd(__pa(page_table) | _KERNPG_TABLE));
+#else
 		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
+#endif
 		BUG_ON(page_table != pte_offset_kernel(pmd, 0));
 	}
 
 	return pte_offset_kernel(pmd, 0);
 }
 
+static pmd_t * __init one_md_table_init(pgd_t *pgd)
+{
+	pud_t *pud;
+	pmd_t *pmd_table;
+
+	pud = pud_offset(pgd, 0);
+	pmd_table = pmd_offset(pud, 0);
+
+	return pmd_table;
+}
+
 pmd_t * __init populate_extra_pmd(unsigned long vaddr)
 {
 	int pgd_idx = pgd_index(vaddr);
@@ -208,6 +196,7 @@ page_table_range_init(unsigned long star
 	int pgd_idx, pmd_idx;
 	unsigned long vaddr;
 	pgd_t *pgd;
+	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte = NULL;
 	unsigned long count = page_table_range_init_count(start, end);
@@ -222,8 +211,13 @@ page_table_range_init(unsigned long star
 	pgd = pgd_base + pgd_idx;
 
 	for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
-		pmd = one_md_table_init(pgd);
-		pmd = pmd + pmd_index(vaddr);
+		pud = pud_offset(pgd, vaddr);
+		pmd = pmd_offset(pud, vaddr);
+
+#ifdef CONFIG_X86_PAE
+		paravirt_alloc_pmd(&init_mm, __pa(pmd) >> PAGE_SHIFT);
+#endif
+
 		for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
 							pmd++, pmd_idx++) {
 			pte = page_table_kmap_check(one_page_table_init(pmd),
@@ -235,11 +229,20 @@ page_table_range_init(unsigned long star
 	}
 }
 
-static inline int is_kernel_text(unsigned long addr)
+static inline int is_kernel_text(unsigned long start, unsigned long end)
 {
-	if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
-		return 1;
-	return 0;
+	if ((start > ktla_ktva((unsigned long)_etext) ||
+	     end <= ktla_ktva((unsigned long)_stext)) &&
+	    (start > ktla_ktva((unsigned long)_einittext) ||
+	     end <= ktla_ktva((unsigned long)_sinittext)) &&
+
+#ifdef CONFIG_ACPI_SLEEP
+	    (start > (unsigned long)__va(acpi_wakeup_address) + 0x4000 || end <= (unsigned long)__va(acpi_wakeup_address)) &&
+#endif
+
+	    (start > (unsigned long)__va(0xfffff) || end <= (unsigned long)__va(0xc0000)))
+		return 0;
+	return 1;
 }
 
 /*
@@ -256,9 +259,10 @@ kernel_physical_mapping_init(unsigned lo
 	unsigned long last_map_addr = end;
 	unsigned long start_pfn, end_pfn;
 	pgd_t *pgd_base = swapper_pg_dir;
-	int pgd_idx, pmd_idx, pte_ofs;
+	unsigned int pgd_idx, pmd_idx, pte_ofs;
 	unsigned long pfn;
 	pgd_t *pgd;
+	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned pages_2m, pages_4k;
@@ -291,8 +295,13 @@ repeat:
 	pfn = start_pfn;
 	pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
 	pgd = pgd_base + pgd_idx;
-	for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
-		pmd = one_md_table_init(pgd);
+	for (; pgd_idx < PTRS_PER_PGD && pfn < max_low_pfn; pgd++, pgd_idx++) {
+		pud = pud_offset(pgd, 0);
+		pmd = pmd_offset(pud, 0);
+
+#ifdef CONFIG_X86_PAE
+		paravirt_alloc_pmd(&init_mm, __pa(pmd) >> PAGE_SHIFT);
+#endif
 
 		if (pfn >= end_pfn)
 			continue;
@@ -304,14 +313,13 @@ repeat:
 #endif
 		for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
 		     pmd++, pmd_idx++) {
-			unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
+			unsigned long address = pfn * PAGE_SIZE + PAGE_OFFSET;
 
 			/*
 			 * Map with big pages if possible, otherwise
 			 * create normal page tables:
 			 */
 			if (use_pse) {
-				unsigned int addr2;
 				pgprot_t prot = PAGE_KERNEL_LARGE;
 				/*
 				 * first pass will use the same initial
@@ -322,11 +330,7 @@ repeat:
 						 _PAGE_PSE);
 
 				pfn &= PMD_MASK >> PAGE_SHIFT;
-				addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
-					PAGE_OFFSET + PAGE_SIZE-1;
-
-				if (is_kernel_text(addr) ||
-				    is_kernel_text(addr2))
+				if (is_kernel_text(address, address + PMD_SIZE))
 					prot = PAGE_KERNEL_LARGE_EXEC;
 
 				pages_2m++;
@@ -343,7 +347,7 @@ repeat:
 			pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
 			pte += pte_ofs;
 			for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
-			     pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
+			     pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
 				pgprot_t prot = PAGE_KERNEL;
 				/*
 				 * first pass will use the same initial
@@ -351,7 +355,7 @@ repeat:
 				 */
 				pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
 
-				if (is_kernel_text(addr))
+				if (is_kernel_text(address, address + PAGE_SIZE))
 					prot = PAGE_KERNEL_EXEC;
 
 				pages_4k++;
@@ -474,7 +478,7 @@ void __init native_pagetable_init(void)
 
 		pud = pud_offset(pgd, va);
 		pmd = pmd_offset(pud, va);
-		if (!pmd_present(*pmd))
+		if (!pmd_present(*pmd)) // PAX TODO || pmd_large(*pmd))
 			break;
 
 		/* should not be large page here */
@@ -532,12 +536,10 @@ void __init early_ioremap_page_table_ran
 
 static void __init pagetable_init(void)
 {
-	pgd_t *pgd_base = swapper_pg_dir;
-
-	permanent_kmaps_init(pgd_base);
+	permanent_kmaps_init(swapper_pg_dir);
 }
 
-pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
+pteval_t __supported_pte_mask __read_only = ~(_PAGE_NX | _PAGE_GLOBAL);
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
 /* user-defined highmem size */
@@ -787,10 +789,10 @@ void __init mem_init(void)
 		((unsigned long)&__init_end -
 		 (unsigned long)&__init_begin) >> 10,
 
-		(unsigned long)&_etext, (unsigned long)&_edata,
-		((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
+		(unsigned long)&_sdata, (unsigned long)&_edata,
+		((unsigned long)&_edata - (unsigned long)&_sdata) >> 10,
 
-		(unsigned long)&_text, (unsigned long)&_etext,
+		ktla_ktva((unsigned long)&_text), ktla_ktva((unsigned long)&_etext),
 		((unsigned long)&_etext - (unsigned long)&_text) >> 10);
 
 	/*
@@ -884,6 +886,7 @@ void set_kernel_text_rw(void)
 	if (!kernel_set_to_readonly)
 		return;
 
+	start = ktla_ktva(start);
 	pr_debug("Set kernel text: %lx - %lx for read write\n",
 		 start, start+size);
 
@@ -898,6 +901,7 @@ void set_kernel_text_ro(void)
 	if (!kernel_set_to_readonly)
 		return;
 
+	start = ktla_ktva(start);
 	pr_debug("Set kernel text: %lx - %lx for read only\n",
 		 start, start+size);
 
@@ -926,6 +930,7 @@ void mark_rodata_ro(void)
 	unsigned long start = PFN_ALIGN(_text);
 	unsigned long size = PFN_ALIGN(_etext) - start;
 
+	start = ktla_ktva(start);
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 	printk(KERN_INFO "Write protecting the kernel text: %luk\n",
 		size >> 10);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/init_64.c linux-4.0.9-pax/arch/x86/mm/init_64.c
--- linux-4.0.9/arch/x86/mm/init_64.c	2015-03-18 15:21:50.296349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/init_64.c	2015-05-02 16:36:08.703018061 +0200
@@ -150,7 +150,7 @@ early_param("gbpages", parse_direct_gbpa
  * around without checking the pgd every time.
  */
 
-pteval_t __supported_pte_mask __read_mostly = ~0;
+pteval_t __supported_pte_mask __read_only = ~_PAGE_NX;
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
 int force_personality32;
@@ -183,7 +183,12 @@ void sync_global_pgds(unsigned long star
 
 	for (address = start; address <= end; address += PGDIR_SIZE) {
 		const pgd_t *pgd_ref = pgd_offset_k(address);
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+		unsigned long cpu;
+#else
 		struct page *page;
+#endif
 
 		/*
 		 * When it is called after memory hot remove, pgd_none()
@@ -194,6 +199,25 @@ void sync_global_pgds(unsigned long star
 			continue;
 
 		spin_lock(&pgd_lock);
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+		for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
+			pgd_t *pgd = pgd_offset_cpu(cpu, user, address);
+
+			if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
+				BUG_ON(pgd_page_vaddr(*pgd)
+				       != pgd_page_vaddr(*pgd_ref));
+
+			if (removed) {
+				if (pgd_none(*pgd_ref) && !pgd_none(*pgd))
+					pgd_clear(pgd);
+			} else {
+				if (pgd_none(*pgd))
+					set_pgd(pgd, *pgd_ref);
+			}
+
+			pgd = pgd_offset_cpu(cpu, kernel, address);
+#else
 		list_for_each_entry(page, &pgd_list, lru) {
 			pgd_t *pgd;
 			spinlock_t *pgt_lock;
@@ -202,6 +226,7 @@ void sync_global_pgds(unsigned long star
 			/* the pgt_lock only for Xen */
 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
 			spin_lock(pgt_lock);
+#endif
 
 			if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
 				BUG_ON(pgd_page_vaddr(*pgd)
@@ -215,7 +240,10 @@ void sync_global_pgds(unsigned long star
 					set_pgd(pgd, *pgd_ref);
 			}
 
+#ifndef CONFIG_PAX_PER_CPU_PGD
 			spin_unlock(pgt_lock);
+#endif
+
 		}
 		spin_unlock(&pgd_lock);
 	}
@@ -248,7 +276,7 @@ static pud_t *fill_pud(pgd_t *pgd, unsig
 {
 	if (pgd_none(*pgd)) {
 		pud_t *pud = (pud_t *)spp_getpage();
-		pgd_populate(&init_mm, pgd, pud);
+		pgd_populate_kernel(&init_mm, pgd, pud);
 		if (pud != pud_offset(pgd, 0))
 			printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
 			       pud, pud_offset(pgd, 0));
@@ -260,7 +288,7 @@ static pmd_t *fill_pmd(pud_t *pud, unsig
 {
 	if (pud_none(*pud)) {
 		pmd_t *pmd = (pmd_t *) spp_getpage();
-		pud_populate(&init_mm, pud, pmd);
+		pud_populate_kernel(&init_mm, pud, pmd);
 		if (pmd != pmd_offset(pud, 0))
 			printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
 			       pmd, pmd_offset(pud, 0));
@@ -289,7 +317,9 @@ void set_pte_vaddr_pud(pud_t *pud_page,
 	pmd = fill_pmd(pud, vaddr);
 	pte = fill_pte(pmd, vaddr);
 
+	pax_open_kernel();
 	set_pte(pte, new_pte);
+	pax_close_kernel();
 
 	/*
 	 * It's enough to flush this one mapping.
@@ -351,14 +381,12 @@ static void __init __init_extra_mapping(
 		pgd = pgd_offset_k((unsigned long)__va(phys));
 		if (pgd_none(*pgd)) {
 			pud = (pud_t *) spp_getpage();
-			set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
-						_PAGE_USER));
+			set_pgd(pgd, __pgd(__pa(pud) | _PAGE_TABLE));
 		}
 		pud = pud_offset(pgd, (unsigned long)__va(phys));
 		if (pud_none(*pud)) {
 			pmd = (pmd_t *) spp_getpage();
-			set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
-						_PAGE_USER));
+			set_pud(pud, __pud(__pa(pmd) | _PAGE_TABLE));
 		}
 		pmd = pmd_offset(pud, phys);
 		BUG_ON(!pmd_none(*pmd));
@@ -599,7 +627,7 @@ phys_pud_init(pud_t *pud_page, unsigned
 					      prot);
 
 		spin_lock(&init_mm.page_table_lock);
-		pud_populate(&init_mm, pud, pmd);
+		pud_populate_kernel(&init_mm, pud, pmd);
 		spin_unlock(&init_mm.page_table_lock);
 	}
 	__flush_tlb_all();
@@ -640,7 +668,7 @@ kernel_physical_mapping_init(unsigned lo
 						 page_size_mask);
 
 		spin_lock(&init_mm.page_table_lock);
-		pgd_populate(&init_mm, pgd, pud);
+		pgd_populate_kernel(&init_mm, pgd, pud);
 		spin_unlock(&init_mm.page_table_lock);
 		pgd_changed = true;
 	}
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/init.c linux-4.0.9-pax/arch/x86/mm/init.c
--- linux-4.0.9/arch/x86/mm/init.c	2015-04-13 11:21:01.894617459 +0200
+++ linux-4.0.9-pax/arch/x86/mm/init.c	2015-04-15 12:13:52.946318620 +0200
@@ -17,6 +17,7 @@
 #include <asm/proto.h>
 #include <asm/dma.h>		/* for MAX_DMA_PFN */
 #include <asm/microcode.h>
+#include <asm/desc.h>
 
 /*
  * We need to define the tracepoints somewhere, and tlb.c
@@ -620,7 +621,18 @@ void __init init_mem_mapping(void)
 	early_ioremap_page_table_range_init();
 #endif
 
+#ifdef CONFIG_PAX_PER_CPU_PGD
+	clone_pgd_range(get_cpu_pgd(0, kernel) + KERNEL_PGD_BOUNDARY,
+			swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+			KERNEL_PGD_PTRS);
+	clone_pgd_range(get_cpu_pgd(0, user) + KERNEL_PGD_BOUNDARY,
+			swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+			KERNEL_PGD_PTRS);
+	load_cr3(get_cpu_pgd(0, kernel));
+#else
 	load_cr3(swapper_pg_dir);
+#endif
+
 	__flush_tlb_all();
 
 	early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
@@ -638,7 +650,13 @@ void __init init_mem_mapping(void)
  */
 int devmem_is_allowed(unsigned long pagenr)
 {
-	if (pagenr < 256)
+	if (!pagenr)
+		return 1;
+#ifdef CONFIG_VM86
+	if (pagenr < (ISA_START_ADDRESS >> PAGE_SHIFT))
+		return 1;
+#endif
+	if ((ISA_START_ADDRESS >> PAGE_SHIFT) <= pagenr && pagenr < (ISA_END_ADDRESS >> PAGE_SHIFT))
 		return 1;
 	if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
 		return 0;
@@ -687,6 +705,87 @@ void free_init_pages(char *what, unsigne
 
 void free_initmem(void)
 {
+
+#ifdef CONFIG_PAX_KERNEXEC
+#ifdef CONFIG_X86_32
+	/* PaX: limit KERNEL_CS to actual size */
+	unsigned long addr, limit;
+	struct desc_struct d;
+	int cpu;
+
+	limit = paravirt_enabled() ? ktva_ktla(0xffffffff) : (unsigned long)&_etext;
+	limit = (limit - 1UL) >> PAGE_SHIFT;
+
+	memset(__LOAD_PHYSICAL_ADDR + PAGE_OFFSET, POISON_FREE_INITMEM, PAGE_SIZE);
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
+		pack_descriptor(&d, get_desc_base(&get_cpu_gdt_table(cpu)[GDT_ENTRY_KERNEL_CS]), limit, 0x9B, 0xC);
+		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_KERNEL_CS, &d, DESCTYPE_S);
+		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_KERNEXEC_KERNEL_CS, &d, DESCTYPE_S);
+	}
+
+	/* PaX: make KERNEL_CS read-only */
+	addr = PFN_ALIGN(ktla_ktva((unsigned long)&_text));
+	if (!paravirt_enabled())
+		set_memory_ro(addr, (PFN_ALIGN(_sdata) - addr) >> PAGE_SHIFT);
+/*
+		for (addr = ktla_ktva((unsigned long)&_text); addr < (unsigned long)&_sdata; addr += PMD_SIZE) {
+			pgd = pgd_offset_k(addr);
+			pud = pud_offset(pgd, addr);
+			pmd = pmd_offset(pud, addr);
+			set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW));
+		}
+*/
+#ifdef CONFIG_X86_PAE
+	set_memory_nx(PFN_ALIGN(__init_begin), (PFN_ALIGN(__init_end) - PFN_ALIGN(__init_begin)) >> PAGE_SHIFT);
+/*
+	for (addr = (unsigned long)&__init_begin; addr < (unsigned long)&__init_end; addr += PMD_SIZE) {
+		pgd = pgd_offset_k(addr);
+		pud = pud_offset(pgd, addr);
+		pmd = pmd_offset(pud, addr);
+	set_pmd(pmd, __pmd(pmd_val(*pmd) | (_PAGE_NX & __supported_pte_mask)));
+	}
+*/
+#endif
+
+#ifdef CONFIG_MODULES
+	set_memory_4k((unsigned long)MODULES_EXEC_VADDR, (MODULES_EXEC_END - MODULES_EXEC_VADDR) >> PAGE_SHIFT);
+#endif
+
+#else
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	unsigned long addr, end;
+
+	/* PaX: make kernel code/rodata read-only, rest non-executable */
+	for (addr = __START_KERNEL_map; addr < __START_KERNEL_map + KERNEL_IMAGE_SIZE; addr += PMD_SIZE) {
+		pgd = pgd_offset_k(addr);
+		pud = pud_offset(pgd, addr);
+		pmd = pmd_offset(pud, addr);
+		if (!pmd_present(*pmd))
+			continue;
+		if ((unsigned long)_text <= addr && addr < (unsigned long)_sdata)
+			set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW));
+		else
+			set_pmd(pmd, __pmd(pmd_val(*pmd) | (_PAGE_NX & __supported_pte_mask)));
+	}
+
+	addr = (unsigned long)__va(__pa(__START_KERNEL_map));
+	end = addr + KERNEL_IMAGE_SIZE;
+	for (; addr < end; addr += PMD_SIZE) {
+	pgd = pgd_offset_k(addr);
+		pud = pud_offset(pgd, addr);
+		pmd = pmd_offset(pud, addr);
+		if (!pmd_present(*pmd))
+			continue;
+		if ((unsigned long)__va(__pa(_text)) <= addr && addr < (unsigned long)__va(__pa(_sdata)))
+			set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW));
+	}
+#endif
+
+	flush_tlb_all();
+#endif
+
 	free_init_pages("unused kernel",
 			(unsigned long)(&__init_begin),
 			(unsigned long)(&__init_end));
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/iomap_32.c linux-4.0.9-pax/arch/x86/mm/iomap_32.c
--- linux-4.0.9/arch/x86/mm/iomap_32.c	2015-03-18 15:21:50.296349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/iomap_32.c	2015-04-15 12:13:52.946318620 +0200
@@ -64,7 +64,11 @@ void *kmap_atomic_prot_pfn(unsigned long
 	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR * smp_processor_id();
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+
+	pax_open_kernel();
 	set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
+	pax_close_kernel();
+
 	arch_flush_lazy_mmu_mode();
 
 	return (void *)vaddr;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/ioremap.c linux-4.0.9-pax/arch/x86/mm/ioremap.c
--- linux-4.0.9/arch/x86/mm/ioremap.c	2015-03-18 15:21:50.296349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/ioremap.c	2015-04-15 12:13:52.946318620 +0200
@@ -56,8 +56,8 @@ static int __ioremap_check_ram(unsigned
 	unsigned long i;
 
 	for (i = 0; i < nr_pages; ++i)
-		if (pfn_valid(start_pfn + i) &&
-		    !PageReserved(pfn_to_page(start_pfn + i)))
+		if (pfn_valid(start_pfn + i) && (start_pfn + i >= 0x100 ||
+		    !PageReserved(pfn_to_page(start_pfn + i))))
 			return 1;
 
 	WARN_ONCE(1, "ioremap on RAM pfn 0x%lx\n", start_pfn);
@@ -283,7 +283,7 @@ EXPORT_SYMBOL(ioremap_prot);
  *
  * Caller must ensure there is only one unmapping for the same pointer.
  */
-void iounmap(volatile void __iomem *addr)
+void iounmap(const volatile void __iomem *addr)
 {
 	struct vm_struct *p, *o;
 
@@ -332,30 +332,29 @@ EXPORT_SYMBOL(iounmap);
  */
 void *xlate_dev_mem_ptr(phys_addr_t phys)
 {
-	void *addr;
-	unsigned long start = phys & PAGE_MASK;
-
 	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
-	if (page_is_ram(start >> PAGE_SHIFT))
+	if (page_is_ram(phys >> PAGE_SHIFT))
+#ifdef CONFIG_HIGHMEM
+	if ((phys >> PAGE_SHIFT) < max_low_pfn)
+#endif
 		return __va(phys);
 
-	addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
-	if (addr)
-		addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
-
-	return addr;
+	return (void __force *)ioremap_cache(phys, PAGE_SIZE);
 }
 
 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
 {
 	if (page_is_ram(phys >> PAGE_SHIFT))
+#ifdef CONFIG_HIGHMEM
+	if ((phys >> PAGE_SHIFT) < max_low_pfn)
+#endif
 		return;
 
 	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
 	return;
 }
 
-static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
+static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __read_only __aligned(PAGE_SIZE);
 
 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 {
@@ -391,8 +390,7 @@ void __init early_ioremap_init(void)
 	early_ioremap_setup();
 
 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
-	memset(bm_pte, 0, sizeof(bm_pte));
-	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+	pmd_populate_user(&init_mm, pmd, bm_pte);
 
 	/*
 	 * The boot-ioremap range spans multiple pmds, for which
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/kmemcheck/kmemcheck.c linux-4.0.9-pax/arch/x86/mm/kmemcheck/kmemcheck.c
--- linux-4.0.9/arch/x86/mm/kmemcheck/kmemcheck.c	2015-03-18 15:21:50.296349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/kmemcheck/kmemcheck.c	2015-04-15 12:13:52.946318620 +0200
@@ -628,9 +628,9 @@ bool kmemcheck_fault(struct pt_regs *reg
 	 * memory (e.g. tracked pages)? For now, we need this to avoid
 	 * invoking kmemcheck for PnP BIOS calls.
 	 */
-	if (regs->flags & X86_VM_MASK)
+	if (v8086_mode(regs))
 		return false;
-	if (regs->cs != __KERNEL_CS)
+	if (regs->cs != __KERNEL_CS && regs->cs != __KERNEXEC_KERNEL_CS)
 		return false;
 
 	pte = kmemcheck_pte_lookup(address);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/Makefile linux-4.0.9-pax/arch/x86/mm/Makefile
--- linux-4.0.9/arch/x86/mm/Makefile	2015-04-13 11:21:01.894617459 +0200
+++ linux-4.0.9-pax/arch/x86/mm/Makefile	2015-04-15 12:13:52.946318620 +0200
@@ -35,3 +35,7 @@ obj-$(CONFIG_NUMA_EMU)		+= numa_emulatio
 obj-$(CONFIG_MEMTEST)		+= memtest.o
 
 obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
+
+quote:="
+obj-$(CONFIG_X86_64)		+= uderef_64.o
+CFLAGS_uderef_64.o		:= $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/mmap.c linux-4.0.9-pax/arch/x86/mm/mmap.c
--- linux-4.0.9/arch/x86/mm/mmap.c	2015-04-13 11:21:01.894617459 +0200
+++ linux-4.0.9-pax/arch/x86/mm/mmap.c	2015-04-15 12:13:52.946318620 +0200
@@ -52,7 +52,7 @@ static unsigned long stack_maxrandom_siz
  * Leave an at least ~128 MB hole with possible stack randomization.
  */
 #define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
-#define MAX_GAP (TASK_SIZE/6*5)
+#define MAX_GAP (pax_task_size/6*5)
 
 static int mmap_is_legacy(void)
 {
@@ -82,27 +82,40 @@ static unsigned long mmap_rnd(void)
 	return rnd << PAGE_SHIFT;
 }
 
-static unsigned long mmap_base(void)
+static unsigned long mmap_base(struct mm_struct *mm)
 {
 	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long pax_task_size = TASK_SIZE;
+
+#ifdef CONFIG_PAX_SEGMEXEC
+	if (mm->pax_flags & MF_PAX_SEGMEXEC)
+		pax_task_size = SEGMEXEC_TASK_SIZE;
+#endif
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
 	else if (gap > MAX_GAP)
 		gap = MAX_GAP;
 
-	return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd());
+	return PAGE_ALIGN(pax_task_size - gap - mmap_rnd());
 }
 
 /*
  * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
  * does, but not when emulating X86_32
  */
-static unsigned long mmap_legacy_base(void)
+static unsigned long mmap_legacy_base(struct mm_struct *mm)
 {
-	if (mmap_is_ia32())
+	if (mmap_is_ia32()) {
+
+#ifdef CONFIG_PAX_SEGMEXEC
+		if (mm->pax_flags & MF_PAX_SEGMEXEC)
+			return SEGMEXEC_TASK_UNMAPPED_BASE;
+		else
+#endif
+
 		return TASK_UNMAPPED_BASE;
-	else
+	} else
 		return TASK_UNMAPPED_BASE + mmap_rnd();
 }
 
@@ -112,8 +125,15 @@ static unsigned long mmap_legacy_base(vo
  */
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
-	mm->mmap_legacy_base = mmap_legacy_base();
-	mm->mmap_base = mmap_base();
+	mm->mmap_legacy_base = mmap_legacy_base(mm);
+	mm->mmap_base = mmap_base(mm);
+
+#ifdef CONFIG_PAX_RANDMMAP
+	if (mm->pax_flags & MF_PAX_RANDMMAP) {
+		mm->mmap_legacy_base += mm->delta_mmap;
+		mm->mmap_base -= mm->delta_mmap + mm->delta_stack;
+	}
+#endif
 
 	if (mmap_is_legacy()) {
 		mm->mmap_base = mm->mmap_legacy_base;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/mmio-mod.c linux-4.0.9-pax/arch/x86/mm/mmio-mod.c
--- linux-4.0.9/arch/x86/mm/mmio-mod.c	2015-03-18 15:21:50.296349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/mmio-mod.c	2015-04-15 12:13:52.946318620 +0200
@@ -194,7 +194,7 @@ static void pre(struct kmmio_probe *p, s
 		break;
 	default:
 		{
-			unsigned char *ip = (unsigned char *)instptr;
+			unsigned char *ip = (unsigned char *)ktla_ktva(instptr);
 			my_trace->opcode = MMIO_UNKNOWN_OP;
 			my_trace->width = 0;
 			my_trace->value = (*ip) << 16 | *(ip + 1) << 8 |
@@ -234,7 +234,7 @@ static void post(struct kmmio_probe *p,
 static void ioremap_trace_core(resource_size_t offset, unsigned long size,
 							void __iomem *addr)
 {
-	static atomic_t next_id;
+	static atomic_unchecked_t next_id;
 	struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL);
 	/* These are page-unaligned. */
 	struct mmiotrace_map map = {
@@ -258,7 +258,7 @@ static void ioremap_trace_core(resource_
 			.private = trace
 		},
 		.phys = offset,
-		.id = atomic_inc_return(&next_id)
+		.id = atomic_inc_return_unchecked(&next_id)
 	};
 	map.map_id = trace->id;
 
@@ -290,7 +290,7 @@ void mmiotrace_ioremap(resource_size_t o
 	ioremap_trace_core(offset, size, addr);
 }
 
-static void iounmap_trace_core(volatile void __iomem *addr)
+static void iounmap_trace_core(const volatile void __iomem *addr)
 {
 	struct mmiotrace_map map = {
 		.phys = 0,
@@ -328,7 +328,7 @@ not_enabled:
 	}
 }
 
-void mmiotrace_iounmap(volatile void __iomem *addr)
+void mmiotrace_iounmap(const volatile void __iomem *addr)
 {
 	might_sleep();
 	if (is_enabled()) /* recheck and proper locking in *_core() */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/numa.c linux-4.0.9-pax/arch/x86/mm/numa.c
--- linux-4.0.9/arch/x86/mm/numa.c	2015-04-13 11:21:01.894617459 +0200
+++ linux-4.0.9-pax/arch/x86/mm/numa.c	2015-04-15 12:13:52.946318620 +0200
@@ -499,7 +499,7 @@ static void __init numa_clear_kernel_nod
 	}
 }
 
-static int __init numa_register_memblks(struct numa_meminfo *mi)
+static int __init __intentional_overflow(-1) numa_register_memblks(struct numa_meminfo *mi)
 {
 	unsigned long uninitialized_var(pfn_align);
 	int i, nid;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/pageattr.c linux-4.0.9-pax/arch/x86/mm/pageattr.c
--- linux-4.0.9/arch/x86/mm/pageattr.c	2015-03-18 15:21:50.296349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/pageattr.c	2015-04-15 12:13:52.946318620 +0200
@@ -262,7 +262,7 @@ static inline pgprot_t static_protection
 	 */
 #ifdef CONFIG_PCI_BIOS
 	if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
-		pgprot_val(forbidden) |= _PAGE_NX;
+		pgprot_val(forbidden) |= _PAGE_NX & __supported_pte_mask;
 #endif
 
 	/*
@@ -270,9 +270,10 @@ static inline pgprot_t static_protection
 	 * Does not cover __inittext since that is gone later on. On
 	 * 64bit we do not enforce !NX on the low mapping
 	 */
-	if (within(address, (unsigned long)_text, (unsigned long)_etext))
-		pgprot_val(forbidden) |= _PAGE_NX;
+	if (within(address, ktla_ktva((unsigned long)_text), ktla_ktva((unsigned long)_etext)))
+		pgprot_val(forbidden) |= _PAGE_NX & __supported_pte_mask;
 
+#ifdef CONFIG_DEBUG_RODATA
 	/*
 	 * The .rodata section needs to be read-only. Using the pfn
 	 * catches all aliases.
@@ -280,6 +281,7 @@ static inline pgprot_t static_protection
 	if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
 		   __pa_symbol(__end_rodata) >> PAGE_SHIFT))
 		pgprot_val(forbidden) |= _PAGE_RW;
+#endif
 
 #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
 	/*
@@ -318,6 +320,13 @@ static inline pgprot_t static_protection
 	}
 #endif
 
+#ifdef CONFIG_PAX_KERNEXEC
+	if (within(pfn, __pa(ktla_ktva((unsigned long)&_text)), __pa((unsigned long)&_sdata))) {
+		pgprot_val(forbidden) |= _PAGE_RW;
+		pgprot_val(forbidden) |= _PAGE_NX & __supported_pte_mask;
+	}
+#endif
+
 	prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
 
 	return prot;
@@ -440,23 +449,37 @@ EXPORT_SYMBOL_GPL(slow_virt_to_phys);
 static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 {
 	/* change init_mm */
+	pax_open_kernel();
 	set_pte_atomic(kpte, pte);
+
 #ifdef CONFIG_X86_32
 	if (!SHARED_KERNEL_PMD) {
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+		unsigned long cpu;
+#else
 		struct page *page;
+#endif
 
+#ifdef CONFIG_PAX_PER_CPU_PGD
+		for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
+			pgd_t *pgd = get_cpu_pgd(cpu, kernel);
+#else
 		list_for_each_entry(page, &pgd_list, lru) {
-			pgd_t *pgd;
+			pgd_t *pgd = (pgd_t *)page_address(page);
+#endif
+
 			pud_t *pud;
 			pmd_t *pmd;
 
-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
+			pgd += pgd_index(address);
 			pud = pud_offset(pgd, address);
 			pmd = pmd_offset(pud, address);
 			set_pte_atomic((pte_t *)pmd, pte);
 		}
 	}
 #endif
+	pax_close_kernel();
 }
 
 static int
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/pat.c linux-4.0.9-pax/arch/x86/mm/pat.c
--- linux-4.0.9/arch/x86/mm/pat.c	2015-03-18 15:21:50.296349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/pat.c	2015-04-15 12:13:52.946318620 +0200
@@ -89,7 +89,7 @@ static inline enum page_cache_mode get_p
 	unsigned long pg_flags = pg->flags & _PGMT_MASK;
 
 	if (pg_flags == _PGMT_DEFAULT)
-		return -1;
+		return _PAGE_CACHE_MODE_NUM;
 	else if (pg_flags == _PGMT_WC)
 		return _PAGE_CACHE_MODE_WC;
 	else if (pg_flags == _PGMT_UC_MINUS)
@@ -346,7 +346,7 @@ static int reserve_ram_pages_type(u64 st
 
 		page = pfn_to_page(pfn);
 		type = get_page_memtype(page);
-		if (type != -1) {
+		if (type != _PAGE_CACHE_MODE_NUM) {
 			pr_info("reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%x, req 0x%x\n",
 				start, end - 1, type, req_type);
 			if (new_type)
@@ -498,7 +498,7 @@ int free_memtype(u64 start, u64 end)
 
 	if (!entry) {
 		printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
-		       current->comm, current->pid, start, end - 1);
+			current->comm, task_pid_nr(current), start, end - 1);
 		return -EINVAL;
 	}
 
@@ -532,10 +532,10 @@ static enum page_cache_mode lookup_memty
 		page = pfn_to_page(paddr >> PAGE_SHIFT);
 		rettype = get_page_memtype(page);
 		/*
-		 * -1 from get_page_memtype() implies RAM page is in its
+		 * _PAGE_CACHE_MODE_NUM from get_page_memtype() implies RAM page is in its
 		 * default state and not reserved, and hence of type WB
 		 */
-		if (rettype == -1)
+		if (rettype == _PAGE_CACHE_MODE_NUM)
 			rettype = _PAGE_CACHE_MODE_WB;
 
 		return rettype;
@@ -628,8 +628,8 @@ static inline int range_is_allowed(unsig
 
 	while (cursor < to) {
 		if (!devmem_is_allowed(pfn)) {
-			printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n",
-				current->comm, from, to - 1);
+			printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx] (%#010Lx)\n",
+				current->comm, from, to - 1, cursor);
 			return 0;
 		}
 		cursor += PAGE_SIZE;
@@ -700,7 +700,7 @@ int kernel_map_sync_memtype(u64 base, un
 	if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
 		printk(KERN_INFO "%s:%d ioremap_change_attr failed %s "
 			"for [mem %#010Lx-%#010Lx]\n",
-			current->comm, current->pid,
+			current->comm, task_pid_nr(current),
 			cattr_name(pcm),
 			base, (unsigned long long)(base + size-1));
 		return -EINVAL;
@@ -735,7 +735,7 @@ static int reserve_pfn_range(u64 paddr,
 		pcm = lookup_memtype(paddr);
 		if (want_pcm != pcm) {
 			printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
-				current->comm, current->pid,
+				current->comm, task_pid_nr(current),
 				cattr_name(want_pcm),
 				(unsigned long long)paddr,
 				(unsigned long long)(paddr + size - 1),
@@ -757,7 +757,7 @@ static int reserve_pfn_range(u64 paddr,
 			free_memtype(paddr, paddr + size);
 			printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
 				" for [mem %#010Lx-%#010Lx], got %s\n",
-				current->comm, current->pid,
+				current->comm, task_pid_nr(current),
 				cattr_name(want_pcm),
 				(unsigned long long)paddr,
 				(unsigned long long)(paddr + size - 1),
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/pat_rbtree.c linux-4.0.9-pax/arch/x86/mm/pat_rbtree.c
--- linux-4.0.9/arch/x86/mm/pat_rbtree.c	2015-03-18 15:21:50.300349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/pat_rbtree.c	2015-04-15 12:13:52.946318620 +0200
@@ -161,7 +161,7 @@ success:
 
 failure:
 	printk(KERN_INFO "%s:%d conflicting memory types "
-		"%Lx-%Lx %s<->%s\n", current->comm, current->pid, start,
+		"%Lx-%Lx %s<->%s\n", current->comm, task_pid_nr(current), start,
 		end, cattr_name(found_type), cattr_name(match->type));
 	return -EBUSY;
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/pf_in.c linux-4.0.9-pax/arch/x86/mm/pf_in.c
--- linux-4.0.9/arch/x86/mm/pf_in.c	2015-03-18 15:21:50.300349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/pf_in.c	2015-04-15 12:13:52.950318620 +0200
@@ -148,7 +148,7 @@ enum reason_type get_ins_type(unsigned l
 	int i;
 	enum reason_type rv = OTHERS;
 
-	p = (unsigned char *)ins_addr;
+	p = (unsigned char *)ktla_ktva(ins_addr);
 	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 
@@ -168,7 +168,7 @@ static unsigned int get_ins_reg_width(un
 	struct prefix_bits prf;
 	int i;
 
-	p = (unsigned char *)ins_addr;
+	p = (unsigned char *)ktla_ktva(ins_addr);
 	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 
@@ -191,7 +191,7 @@ unsigned int get_ins_mem_width(unsigned
 	struct prefix_bits prf;
 	int i;
 
-	p = (unsigned char *)ins_addr;
+	p = (unsigned char *)ktla_ktva(ins_addr);
 	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 
@@ -415,7 +415,7 @@ unsigned long get_ins_reg_val(unsigned l
 	struct prefix_bits prf;
 	int i;
 
-	p = (unsigned char *)ins_addr;
+	p = (unsigned char *)ktla_ktva(ins_addr);
 	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 	for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
@@ -470,7 +470,7 @@ unsigned long get_ins_imm_val(unsigned l
 	struct prefix_bits prf;
 	int i;
 
-	p = (unsigned char *)ins_addr;
+	p = (unsigned char *)ktla_ktva(ins_addr);
 	p += skip_prefix(p, &prf);
 	p += get_opcode(p, &opcode);
 	for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/pgtable_32.c linux-4.0.9-pax/arch/x86/mm/pgtable_32.c
--- linux-4.0.9/arch/x86/mm/pgtable_32.c	2015-03-18 15:21:50.300349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/pgtable_32.c	2015-04-15 12:13:52.950318620 +0200
@@ -47,10 +47,13 @@ void set_pte_vaddr(unsigned long vaddr,
 		return;
 	}
 	pte = pte_offset_kernel(pmd, vaddr);
+
+	pax_open_kernel();
 	if (pte_val(pteval))
 		set_pte_at(&init_mm, vaddr, pte, pteval);
 	else
 		pte_clear(&init_mm, vaddr, pte);
+	pax_close_kernel();
 
 	/*
 	 * It's enough to flush this one mapping.
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/pgtable.c linux-4.0.9-pax/arch/x86/mm/pgtable.c
--- linux-4.0.9/arch/x86/mm/pgtable.c	2015-04-13 11:21:01.894617459 +0200
+++ linux-4.0.9-pax/arch/x86/mm/pgtable.c	2015-05-12 17:44:05.132123392 +0200
@@ -97,10 +97,75 @@ static inline void pgd_list_del(pgd_t *p
 	list_del(&page->lru);
 }
 
-#define UNSHARED_PTRS_PER_PGD				\
-	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+pgdval_t clone_pgd_mask __read_only = ~_PAGE_PRESENT;
 
+void __shadow_user_pgds(pgd_t *dst, const pgd_t *src)
+{
+	unsigned int count = USER_PGD_PTRS;
+
+	if (!pax_user_shadow_base)
+		return;
+
+	while (count--)
+		*dst++ = __pgd((pgd_val(*src++) | (_PAGE_NX & __supported_pte_mask)) & ~_PAGE_USER);
+}
+#endif
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+void __clone_user_pgds(pgd_t *dst, const pgd_t *src)
+{
+	unsigned int count = USER_PGD_PTRS;
+
+	while (count--) {
+		pgd_t pgd;
+
+#ifdef CONFIG_X86_64
+		pgd = __pgd(pgd_val(*src++) | _PAGE_USER);
+#else
+		pgd = *src++;
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF)
+		pgd = __pgd(pgd_val(pgd) & clone_pgd_mask);
+#endif
+
+		*dst++ = pgd;
+	}
+
+}
+#endif
 
+#ifdef CONFIG_X86_64
+#define pxd_t				pud_t
+#define pyd_t				pgd_t
+#define paravirt_release_pxd(pfn)	paravirt_release_pud(pfn)
+#define pgtable_pxd_page_ctor(page)	true
+#define pgtable_pxd_page_dtor(page)	do {} while (0)
+#define pxd_free(mm, pud)		pud_free((mm), (pud))
+#define pyd_populate(mm, pgd, pud)	pgd_populate((mm), (pgd), (pud))
+#define pyd_offset(mm, address)		pgd_offset((mm), (address))
+#define PYD_SIZE			PGDIR_SIZE
+#define mm_inc_nr_pxds(mm)		do {} while (0)
+#define mm_dec_nr_pxds(mm)		do {} while (0)
+#else
+#define pxd_t				pmd_t
+#define pyd_t				pud_t
+#define paravirt_release_pxd(pfn)	paravirt_release_pmd(pfn)
+#define pgtable_pxd_page_ctor(page)	pgtable_pmd_page_ctor(page)
+#define pgtable_pxd_page_dtor(page)	pgtable_pmd_page_dtor(page)
+#define pxd_free(mm, pud)		pmd_free((mm), (pud))
+#define pyd_populate(mm, pgd, pud)	pud_populate((mm), (pgd), (pud))
+#define pyd_offset(mm, address)		pud_offset((mm), (address))
+#define PYD_SIZE			PUD_SIZE
+#define mm_inc_nr_pxds(mm)		mm_inc_nr_pmds(mm)
+#define mm_dec_nr_pxds(mm)		mm_dec_nr_pmds(mm)
+#endif
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+static inline void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) {}
+static inline void pgd_dtor(pgd_t *pgd) {}
+#else
 static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
 {
 	BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
@@ -141,6 +206,7 @@ static void pgd_dtor(pgd_t *pgd)
 	pgd_list_del(pgd);
 	spin_unlock(&pgd_lock);
 }
+#endif
 
 /*
  * List of all pgd's needed for non-PAE so it can invalidate entries
@@ -153,7 +219,7 @@ static void pgd_dtor(pgd_t *pgd)
  * -- nyc
  */
 
-#ifdef CONFIG_X86_PAE
+#if defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE)
 /*
  * In PAE mode, we need to do a cr3 reload (=tlb flush) when
  * updating the top-level pagetable entries to guarantee the
@@ -165,7 +231,7 @@ static void pgd_dtor(pgd_t *pgd)
  * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
  * and initialize the kernel pmds here.
  */
-#define PREALLOCATED_PMDS	UNSHARED_PTRS_PER_PGD
+#define PREALLOCATED_PXDS	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
 
 void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
 {
@@ -183,46 +249,48 @@ void pud_populate(struct mm_struct *mm,
 	 */
 	flush_tlb_mm(mm);
 }
+#elif defined(CONFIG_X86_64) && defined(CONFIG_PAX_PER_CPU_PGD)
+#define PREALLOCATED_PXDS	USER_PGD_PTRS
 #else  /* !CONFIG_X86_PAE */
 
 /* No need to prepopulate any pagetable entries in non-PAE modes. */
-#define PREALLOCATED_PMDS	0
+#define PREALLOCATED_PXDS	0
 
 #endif	/* CONFIG_X86_PAE */
 
-static void free_pmds(struct mm_struct *mm, pmd_t *pmds[])
+static void free_pxds(struct mm_struct *mm, pxd_t *pxds[])
 {
 	int i;
 
-	for(i = 0; i < PREALLOCATED_PMDS; i++)
-		if (pmds[i]) {
-			pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
-			free_page((unsigned long)pmds[i]);
-			mm_dec_nr_pmds(mm);
+	for(i = 0; i < PREALLOCATED_PXDS; i++)
+		if (pxds[i]) {
+			pgtable_pxd_page_dtor(virt_to_page(pxds[i]));
+			free_page((unsigned long)pxds[i]);
+			mm_dec_nr_pxds(mm);
 		}
 }
 
-static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
+static int preallocate_pxds(struct mm_struct *mm, pxd_t *pxds[])
 {
 	int i;
 	bool failed = false;
 
-	for(i = 0; i < PREALLOCATED_PMDS; i++) {
-		pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP);
-		if (!pmd)
+	for(i = 0; i < PREALLOCATED_PXDS; i++) {
+		pxd_t *pxd = (pxd_t *)__get_free_page(PGALLOC_GFP);
+		if (!pxd)
 			failed = true;
-		if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
-			free_page((unsigned long)pmd);
-			pmd = NULL;
+		if (pxd && !pgtable_pxd_page_ctor(virt_to_page(pxd))) {
+			free_page((unsigned long)pxd);
+			pxd = NULL;
 			failed = true;
 		}
-		if (pmd)
-			mm_inc_nr_pmds(mm);
-		pmds[i] = pmd;
+		if (pxd)
+			mm_inc_nr_pxds(mm);
+		pxds[i] = pxd;
 	}
 
 	if (failed) {
-		free_pmds(mm, pmds);
+		free_pxds(mm, pxds);
 		return -ENOMEM;
 	}
 
@@ -235,50 +303,54 @@ static int preallocate_pmds(struct mm_st
  * preallocate which never got a corresponding vma will need to be
  * freed manually.
  */
-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
+static void pgd_mop_up_pxds(struct mm_struct *mm, pgd_t *pgdp)
 {
 	int i;
 
-	for(i = 0; i < PREALLOCATED_PMDS; i++) {
+	for(i = 0; i < PREALLOCATED_PXDS; i++) {
 		pgd_t pgd = pgdp[i];
 
 		if (pgd_val(pgd) != 0) {
-			pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
+			pxd_t *pxd = (pxd_t *)pgd_page_vaddr(pgd);
 
-			pgdp[i] = native_make_pgd(0);
+			set_pgd(pgdp + i, native_make_pgd(0));
 
-			paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
-			pmd_free(mm, pmd);
-			mm_dec_nr_pmds(mm);
+			paravirt_release_pxd(pgd_val(pgd) >> PAGE_SHIFT);
+			pxd_free(mm, pxd);
+			mm_dec_nr_pxds(mm);
 		}
 	}
 }
 
-static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
+static void pgd_prepopulate_pxd(struct mm_struct *mm, pgd_t *pgd, pxd_t *pxds[])
 {
-	pud_t *pud;
+	pyd_t *pyd;
 	int i;
 
-	if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
+	if (PREALLOCATED_PXDS == 0) /* Work around gcc-3.4.x bug */
 		return;
 
-	pud = pud_offset(pgd, 0);
+#ifdef CONFIG_X86_64
+	pyd = pyd_offset(mm, 0L);
+#else
+	pyd = pyd_offset(pgd, 0L);
+#endif
 
-	for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
-		pmd_t *pmd = pmds[i];
+	for (i = 0; i < PREALLOCATED_PXDS; i++, pyd++) {
+		pxd_t *pxd = pxds[i];
 
 		if (i >= KERNEL_PGD_BOUNDARY)
-			memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
-			       sizeof(pmd_t) * PTRS_PER_PMD);
+			memcpy(pxd, (pxd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
+			       sizeof(pxd_t) * PTRS_PER_PMD);
 
-		pud_populate(mm, pud, pmd);
+		pyd_populate(mm, pyd, pxd);
 	}
 }
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *pgd;
-	pmd_t *pmds[PREALLOCATED_PMDS];
+	pxd_t *pxds[PREALLOCATED_PXDS];
 
 	pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
 
@@ -287,11 +359,11 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 
 	mm->pgd = pgd;
 
-	if (preallocate_pmds(mm, pmds) != 0)
+	if (preallocate_pxds(mm, pxds) != 0)
 		goto out_free_pgd;
 
 	if (paravirt_pgd_alloc(mm) != 0)
-		goto out_free_pmds;
+		goto out_free_pxds;
 
 	/*
 	 * Make sure that pre-populating the pmds is atomic with
@@ -301,14 +373,14 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	spin_lock(&pgd_lock);
 
 	pgd_ctor(mm, pgd);
-	pgd_prepopulate_pmd(mm, pgd, pmds);
+	pgd_prepopulate_pxd(mm, pgd, pxds);
 
 	spin_unlock(&pgd_lock);
 
 	return pgd;
 
-out_free_pmds:
-	free_pmds(mm, pmds);
+out_free_pxds:
+	free_pxds(mm, pxds);
 out_free_pgd:
 	free_page((unsigned long)pgd);
 out:
@@ -317,7 +389,7 @@ out:
 
 void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-	pgd_mop_up_pmds(mm, pgd);
+	pgd_mop_up_pxds(mm, pgd);
 	pgd_dtor(pgd);
 	paravirt_pgd_free(mm, pgd);
 	free_page((unsigned long)pgd);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/physaddr.c linux-4.0.9-pax/arch/x86/mm/physaddr.c
--- linux-4.0.9/arch/x86/mm/physaddr.c	2015-03-18 15:21:50.300349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/physaddr.c	2015-04-15 12:13:52.950318620 +0200
@@ -10,7 +10,7 @@
 #ifdef CONFIG_X86_64
 
 #ifdef CONFIG_DEBUG_VIRTUAL
-unsigned long __phys_addr(unsigned long x)
+unsigned long __intentional_overflow(-1) __phys_addr(unsigned long x)
 {
 	unsigned long y = x - __START_KERNEL_map;
 
@@ -67,7 +67,7 @@ EXPORT_SYMBOL(__virt_addr_valid);
 #else
 
 #ifdef CONFIG_DEBUG_VIRTUAL
-unsigned long __phys_addr(unsigned long x)
+unsigned long __intentional_overflow(-1) __phys_addr(unsigned long x)
 {
 	unsigned long phys_addr = x - PAGE_OFFSET;
 	/* VMALLOC_* aren't constants  */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/setup_nx.c linux-4.0.9-pax/arch/x86/mm/setup_nx.c
--- linux-4.0.9/arch/x86/mm/setup_nx.c	2015-03-18 15:21:50.300349253 +0100
+++ linux-4.0.9-pax/arch/x86/mm/setup_nx.c	2015-04-15 12:13:52.950318620 +0200
@@ -5,8 +5,10 @@
 #include <asm/pgtable.h>
 #include <asm/proto.h>
 
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 static int disable_nx;
 
+#ifndef CONFIG_PAX_PAGEEXEC
 /*
  * noexec = on|off
  *
@@ -28,12 +30,17 @@ static int __init noexec_setup(char *str
 	return 0;
 }
 early_param("noexec", noexec_setup);
+#endif
+
+#endif
 
 void x86_configure_nx(void)
 {
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 	if (cpu_has_nx && !disable_nx)
 		__supported_pte_mask |= _PAGE_NX;
 	else
+#endif
 		__supported_pte_mask &= ~_PAGE_NX;
 }
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/tlb.c linux-4.0.9-pax/arch/x86/mm/tlb.c
--- linux-4.0.9/arch/x86/mm/tlb.c	2015-04-13 11:21:01.894617459 +0200
+++ linux-4.0.9-pax/arch/x86/mm/tlb.c	2015-04-15 12:13:52.950318620 +0200
@@ -45,7 +45,11 @@ void leave_mm(int cpu)
 		BUG();
 	if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
 		cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
+
+#ifndef CONFIG_PAX_PER_CPU_PGD
 		load_cr3(swapper_pg_dir);
+#endif
+
 		/*
 		 * This gets called in the idle path where RCU
 		 * functions differently.  Tracing normally
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/mm/uderef_64.c linux-4.0.9-pax/arch/x86/mm/uderef_64.c
--- linux-4.0.9/arch/x86/mm/uderef_64.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-4.0.9-pax/arch/x86/mm/uderef_64.c	2015-06-29 01:01:39.142428102 +0200
@@ -0,0 +1,37 @@
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_PAX_MEMORY_UDEREF
+/* PaX: due to the special call convention these functions must
+ * - remain leaf functions under all configurations,
+ * - never be called directly, only dereferenced from the wrappers.
+ */
+void __used __pax_open_userland(void)
+{
+	unsigned int cpu;
+
+	if (unlikely(!segment_eq(get_fs(), USER_DS)))
+		return;
+
+	cpu = raw_get_cpu();
+	BUG_ON((read_cr3() & ~PAGE_MASK) != PCID_KERNEL);
+	write_cr3(__pa_nodebug(get_cpu_pgd(cpu, user)) | PCID_USER | PCID_NOFLUSH);
+	raw_put_cpu_no_resched();
+}
+EXPORT_SYMBOL(__pax_open_userland);
+
+void __used __pax_close_userland(void)
+{
+	unsigned int cpu;
+
+	if (unlikely(!segment_eq(get_fs(), USER_DS)))
+		return;
+
+	cpu = raw_get_cpu();
+	BUG_ON((read_cr3() & ~PAGE_MASK) != PCID_USER);
+	write_cr3(__pa_nodebug(get_cpu_pgd(cpu, kernel)) | PCID_KERNEL | PCID_NOFLUSH);
+	raw_put_cpu_no_resched();
+}
+EXPORT_SYMBOL(__pax_close_userland);
+#endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/net/bpf_jit_comp.c linux-4.0.9-pax/arch/x86/net/bpf_jit_comp.c
--- linux-4.0.9/arch/x86/net/bpf_jit_comp.c	2015-06-26 10:29:22.458538574 +0200
+++ linux-4.0.9-pax/arch/x86/net/bpf_jit_comp.c	2015-06-26 10:29:32.598538551 +0200
@@ -174,7 +174,9 @@ static u8 add_2reg(u8 byte, u32 dst_reg,
 static void jit_fill_hole(void *area, unsigned int size)
 {
 	/* fill whole space with int3 instructions */
+	pax_open_kernel();
 	memset(area, 0xcc, size);
+	pax_close_kernel();
 }
 
 struct jit_context {
@@ -924,7 +926,9 @@ common_load:
 				pr_err("bpf_jit_compile fatal error\n");
 				return -EFAULT;
 			}
+			pax_open_kernel();
 			memcpy(image + proglen, temp, ilen);
+			pax_close_kernel();
 		}
 		proglen += ilen;
 		addrs[i] = proglen;
@@ -1001,7 +1005,6 @@ void bpf_int_jit_compile(struct bpf_prog
 
 	if (image) {
 		bpf_flush_icache(header, image + proglen);
-		set_memory_ro((unsigned long)header, header->pages);
 		prog->bpf_func = (void *)image;
 		prog->jited = true;
 	}
@@ -1014,12 +1017,8 @@ void bpf_jit_free(struct bpf_prog *fp)
 	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
 	struct bpf_binary_header *header = (void *)addr;
 
-	if (!fp->jited)
-		goto free_filter;
+	if (fp->jited)
+		bpf_jit_binary_free(header);
 
-	set_memory_rw(addr, header->pages);
-	bpf_jit_binary_free(header);
-
-free_filter:
 	bpf_prog_unlock_free(fp);
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/net/bpf_jit.S linux-4.0.9-pax/arch/x86/net/bpf_jit.S
--- linux-4.0.9/arch/x86/net/bpf_jit.S	2015-03-18 15:21:50.300349253 +0100
+++ linux-4.0.9-pax/arch/x86/net/bpf_jit.S	2015-04-15 12:13:52.950318620 +0200
@@ -9,6 +9,7 @@
  */
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/alternative-asm.h>
 
 /*
  * Calling convention :
@@ -38,6 +39,7 @@ sk_load_word_positive_offset:
 	jle	bpf_slow_path_word
 	mov     (SKBDATA,%rsi),%eax
 	bswap   %eax  			/* ntohl() */
+	pax_force_retaddr
 	ret
 
 sk_load_half:
@@ -55,6 +57,7 @@ sk_load_half_positive_offset:
 	jle	bpf_slow_path_half
 	movzwl	(SKBDATA,%rsi),%eax
 	rol	$8,%ax			# ntohs()
+	pax_force_retaddr
 	ret
 
 sk_load_byte:
@@ -69,6 +72,7 @@ sk_load_byte_positive_offset:
 	cmp	%esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
 	jle	bpf_slow_path_byte
 	movzbl	(SKBDATA,%rsi),%eax
+	pax_force_retaddr
 	ret
 
 /* rsi contains offset and can be scratched */
@@ -90,6 +94,7 @@ bpf_slow_path_word:
 	js	bpf_error
 	mov	- MAX_BPF_STACK + 32(%rbp),%eax
 	bswap	%eax
+	pax_force_retaddr
 	ret
 
 bpf_slow_path_half:
@@ -98,12 +103,14 @@ bpf_slow_path_half:
 	mov	- MAX_BPF_STACK + 32(%rbp),%ax
 	rol	$8,%ax
 	movzwl	%ax,%eax
+	pax_force_retaddr
 	ret
 
 bpf_slow_path_byte:
 	bpf_slow_path_common(1)
 	js	bpf_error
 	movzbl	- MAX_BPF_STACK + 32(%rbp),%eax
+	pax_force_retaddr
 	ret
 
 #define sk_negative_common(SIZE)				\
@@ -126,6 +133,7 @@ sk_load_word_negative_offset:
 	sk_negative_common(4)
 	mov	(%rax), %eax
 	bswap	%eax
+	pax_force_retaddr
 	ret
 
 bpf_slow_path_half_neg:
@@ -137,6 +145,7 @@ sk_load_half_negative_offset:
 	mov	(%rax),%ax
 	rol	$8,%ax
 	movzwl	%ax,%eax
+	pax_force_retaddr
 	ret
 
 bpf_slow_path_byte_neg:
@@ -146,6 +155,7 @@ sk_load_byte_negative_offset:
 	.globl	sk_load_byte_negative_offset
 	sk_negative_common(1)
 	movzbl	(%rax), %eax
+	pax_force_retaddr
 	ret
 
 bpf_error:
@@ -156,4 +166,5 @@ bpf_error:
 	mov	- MAX_BPF_STACK + 16(%rbp),%r14
 	mov	- MAX_BPF_STACK + 24(%rbp),%r15
 	leaveq
+	pax_force_retaddr
 	ret
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/oprofile/backtrace.c linux-4.0.9-pax/arch/x86/oprofile/backtrace.c
--- linux-4.0.9/arch/x86/oprofile/backtrace.c	2015-03-18 15:21:50.300349253 +0100
+++ linux-4.0.9-pax/arch/x86/oprofile/backtrace.c	2015-04-15 12:13:52.950318620 +0200
@@ -46,11 +46,11 @@ dump_user_backtrace_32(struct stack_fram
 	struct stack_frame_ia32 *fp;
 	unsigned long bytes;
 
-	bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
+	bytes = copy_from_user_nmi(bufhead, (const char __force_user *)head, sizeof(bufhead));
 	if (bytes != 0)
 		return NULL;
 
-	fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
+	fp = (struct stack_frame_ia32 __force_kernel *) compat_ptr(bufhead[0].next_frame);
 
 	oprofile_add_trace(bufhead[0].return_address);
 
@@ -92,7 +92,7 @@ static struct stack_frame *dump_user_bac
 	struct stack_frame bufhead[2];
 	unsigned long bytes;
 
-	bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
+	bytes = copy_from_user_nmi(bufhead, (const char __force_user *)head, sizeof(bufhead));
 	if (bytes != 0)
 		return NULL;
 
@@ -111,7 +111,7 @@ x86_backtrace(struct pt_regs * const reg
 {
 	struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
 
-	if (!user_mode_vm(regs)) {
+	if (!user_mode(regs)) {
 		unsigned long stack = kernel_stack_pointer(regs);
 		if (depth)
 			dump_trace(NULL, regs, (unsigned long *)stack, 0,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/oprofile/nmi_int.c linux-4.0.9-pax/arch/x86/oprofile/nmi_int.c
--- linux-4.0.9/arch/x86/oprofile/nmi_int.c	2015-03-18 15:21:50.300349253 +0100
+++ linux-4.0.9-pax/arch/x86/oprofile/nmi_int.c	2015-04-15 12:13:52.950318620 +0200
@@ -23,6 +23,7 @@
 #include <asm/nmi.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
+#include <asm/pgtable.h>
 
 #include "op_counter.h"
 #include "op_x86_model.h"
@@ -785,8 +786,11 @@ int __init op_nmi_init(struct oprofile_o
 	if (ret)
 		return ret;
 
-	if (!model->num_virt_counters)
-		model->num_virt_counters = model->num_counters;
+	if (!model->num_virt_counters) {
+		pax_open_kernel();
+		*(unsigned int *)&model->num_virt_counters = model->num_counters;
+		pax_close_kernel();
+	}
 
 	mux_init(ops);
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/oprofile/op_model_amd.c linux-4.0.9-pax/arch/x86/oprofile/op_model_amd.c
--- linux-4.0.9/arch/x86/oprofile/op_model_amd.c	2015-03-18 15:21:50.300349253 +0100
+++ linux-4.0.9-pax/arch/x86/oprofile/op_model_amd.c	2015-04-15 12:13:52.950318620 +0200
@@ -519,9 +519,11 @@ static int op_amd_init(struct oprofile_o
 		num_counters = AMD64_NUM_COUNTERS;
 	}
 
-	op_amd_spec.num_counters = num_counters;
-	op_amd_spec.num_controls = num_counters;
-	op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
+	pax_open_kernel();
+	*(unsigned int *)&op_amd_spec.num_counters = num_counters;
+	*(unsigned int *)&op_amd_spec.num_controls = num_counters;
+	*(unsigned int *)&op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
+	pax_close_kernel();
 
 	return 0;
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/oprofile/op_model_ppro.c linux-4.0.9-pax/arch/x86/oprofile/op_model_ppro.c
--- linux-4.0.9/arch/x86/oprofile/op_model_ppro.c	2015-03-18 15:21:50.300349253 +0100
+++ linux-4.0.9-pax/arch/x86/oprofile/op_model_ppro.c	2015-04-15 12:13:52.950318620 +0200
@@ -19,6 +19,7 @@
 #include <asm/msr.h>
 #include <asm/apic.h>
 #include <asm/nmi.h>
+#include <asm/pgtable.h>
 
 #include "op_x86_model.h"
 #include "op_counter.h"
@@ -221,8 +222,10 @@ static void arch_perfmon_setup_counters(
 
 	num_counters = min((int)eax.split.num_counters, OP_MAX_COUNTER);
 
-	op_arch_perfmon_spec.num_counters = num_counters;
-	op_arch_perfmon_spec.num_controls = num_counters;
+	pax_open_kernel();
+	*(unsigned int *)&op_arch_perfmon_spec.num_counters = num_counters;
+	*(unsigned int *)&op_arch_perfmon_spec.num_controls = num_counters;
+	pax_close_kernel();
 }
 
 static int arch_perfmon_init(struct oprofile_operations *ignore)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/oprofile/op_x86_model.h linux-4.0.9-pax/arch/x86/oprofile/op_x86_model.h
--- linux-4.0.9/arch/x86/oprofile/op_x86_model.h	2015-03-18 15:21:50.300349253 +0100
+++ linux-4.0.9-pax/arch/x86/oprofile/op_x86_model.h	2015-04-15 12:13:52.950318620 +0200
@@ -52,7 +52,7 @@ struct op_x86_model_spec {
 	void		(*switch_ctrl)(struct op_x86_model_spec const *model,
 				       struct op_msrs const * const msrs);
 #endif
-};
+} __do_const;
 
 struct op_counter_config;
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/pci/intel_mid_pci.c linux-4.0.9-pax/arch/x86/pci/intel_mid_pci.c
--- linux-4.0.9/arch/x86/pci/intel_mid_pci.c	2015-04-13 11:21:01.950617456 +0200
+++ linux-4.0.9-pax/arch/x86/pci/intel_mid_pci.c	2015-04-15 12:13:52.950318620 +0200
@@ -258,7 +258,7 @@ int __init intel_mid_pci_init(void)
 	pci_mmcfg_late_init();
 	pcibios_enable_irq = intel_mid_pci_irq_enable;
 	pcibios_disable_irq = intel_mid_pci_irq_disable;
-	pci_root_ops = intel_mid_pci_ops;
+	memcpy((void *)&pci_root_ops, &intel_mid_pci_ops, sizeof pci_root_ops);
 	pci_soc_mode = 1;
 	/* Continue with standard init */
 	return 1;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/pci/irq.c linux-4.0.9-pax/arch/x86/pci/irq.c
--- linux-4.0.9/arch/x86/pci/irq.c	2015-03-18 15:21:50.300349253 +0100
+++ linux-4.0.9-pax/arch/x86/pci/irq.c	2015-04-15 12:13:52.950318620 +0200
@@ -51,7 +51,7 @@ struct irq_router {
 struct irq_router_handler {
 	u16 vendor;
 	int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device);
-};
+} __do_const;
 
 int (*pcibios_enable_irq)(struct pci_dev *dev) = pirq_enable_irq;
 void (*pcibios_disable_irq)(struct pci_dev *dev) = pirq_disable_irq;
@@ -791,7 +791,7 @@ static __init int pico_router_probe(stru
 	return 0;
 }
 
-static __initdata struct irq_router_handler pirq_routers[] = {
+static __initconst const struct irq_router_handler pirq_routers[] = {
 	{ PCI_VENDOR_ID_INTEL, intel_router_probe },
 	{ PCI_VENDOR_ID_AL, ali_router_probe },
 	{ PCI_VENDOR_ID_ITE, ite_router_probe },
@@ -818,7 +818,7 @@ static struct pci_dev *pirq_router_dev;
 static void __init pirq_find_router(struct irq_router *r)
 {
 	struct irq_routing_table *rt = pirq_table;
-	struct irq_router_handler *h;
+	const struct irq_router_handler *h;
 
 #ifdef CONFIG_PCI_BIOS
 	if (!rt->signature) {
@@ -1091,7 +1091,7 @@ static int __init fix_acer_tm360_irqrout
 	return 0;
 }
 
-static struct dmi_system_id __initdata pciirq_dmi_table[] = {
+static const struct dmi_system_id __initconst pciirq_dmi_table[] = {
 	{
 		.callback = fix_broken_hp_bios_irq9,
 		.ident = "HP Pavilion N5400 Series Laptop",
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/pci/pcbios.c linux-4.0.9-pax/arch/x86/pci/pcbios.c
--- linux-4.0.9/arch/x86/pci/pcbios.c	2015-03-18 15:21:50.300349253 +0100
+++ linux-4.0.9-pax/arch/x86/pci/pcbios.c	2015-04-15 12:13:52.950318620 +0200
@@ -79,7 +79,7 @@ union bios32 {
 static struct {
 	unsigned long address;
 	unsigned short segment;
-} bios32_indirect __initdata = { 0, __KERNEL_CS };
+} bios32_indirect __initconst = { 0, __PCIBIOS_CS };
 
 /*
  * Returns the entry point for the given service, NULL on error
@@ -92,37 +92,80 @@ static unsigned long __init bios32_servi
 	unsigned long length;		/* %ecx */
 	unsigned long entry;		/* %edx */
 	unsigned long flags;
+	struct desc_struct d, *gdt;
 
 	local_irq_save(flags);
-	__asm__("lcall *(%%edi); cld"
+
+	gdt = get_cpu_gdt_table(smp_processor_id());
+
+	pack_descriptor(&d, 0UL, 0xFFFFFUL, 0x9B, 0xC);
+	write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_CS, &d, DESCTYPE_S);
+	pack_descriptor(&d, 0UL, 0xFFFFFUL, 0x93, 0xC);
+	write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_DS, &d, DESCTYPE_S);
+
+	__asm__("movw %w7, %%ds; lcall *(%%edi); push %%ss; pop %%ds; cld"
 		: "=a" (return_code),
 		  "=b" (address),
 		  "=c" (length),
 		  "=d" (entry)
 		: "0" (service),
 		  "1" (0),
-		  "D" (&bios32_indirect));
+		  "D" (&bios32_indirect),
+		  "r"(__PCIBIOS_DS)
+		: "memory");
+
+	pax_open_kernel();
+	gdt[GDT_ENTRY_PCIBIOS_CS].a = 0;
+	gdt[GDT_ENTRY_PCIBIOS_CS].b = 0;
+	gdt[GDT_ENTRY_PCIBIOS_DS].a = 0;
+	gdt[GDT_ENTRY_PCIBIOS_DS].b = 0;
+	pax_close_kernel();
+
 	local_irq_restore(flags);
 
 	switch (return_code) {
-		case 0:
-			return address + entry;
-		case 0x80:	/* Not present */
-			printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service);
-			return 0;
-		default: /* Shouldn't happen */
-			printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n",
-				service, return_code);
+	case 0: {
+		int cpu;
+		unsigned char flags;
+
+		printk(KERN_INFO "bios32_service: base:%08lx length:%08lx entry:%08lx\n", address, length, entry);
+		if (address >= 0xFFFF0 || length > 0x100000 - address || length <= entry) {
+			printk(KERN_WARNING "bios32_service: not valid\n");
 			return 0;
+		}
+		address = address + PAGE_OFFSET;
+		length += 16UL; /* some BIOSs underreport this... */
+		flags = 4;
+		if (length >= 64*1024*1024) {
+			length >>= PAGE_SHIFT;
+			flags |= 8;
+		}
+
+		for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
+			gdt = get_cpu_gdt_table(cpu);
+			pack_descriptor(&d, address, length, 0x9b, flags);
+			write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_CS, &d, DESCTYPE_S);
+			pack_descriptor(&d, address, length, 0x93, flags);
+			write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_DS, &d, DESCTYPE_S);
+		}
+		return entry;
+	}
+	case 0x80:	/* Not present */
+		printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service);
+		return 0;
+	default: /* Shouldn't happen */
+		printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n",
+			service, return_code);
+		return 0;
 	}
 }
 
 static struct {
 	unsigned long address;
 	unsigned short segment;
-} pci_indirect = { 0, __KERNEL_CS };
+} pci_indirect __read_only = { 0, __PCIBIOS_CS };
 
-static int pci_bios_present;
+static int pci_bios_present __read_only;
 
 static int __init check_pcibios(void)
 {
@@ -131,11 +174,13 @@ static int __init check_pcibios(void)
 	unsigned long flags, pcibios_entry;
 
 	if ((pcibios_entry = bios32_service(PCI_SERVICE))) {
-		pci_indirect.address = pcibios_entry + PAGE_OFFSET;
+		pci_indirect.address = pcibios_entry;
 
 		local_irq_save(flags);
-		__asm__(
-			"lcall *(%%edi); cld\n\t"
+		__asm__("movw %w6, %%ds\n\t"
+			"lcall *%%ss:(%%edi); cld\n\t"
+			"push %%ss\n\t"
+			"pop %%ds\n\t"
 			"jc 1f\n\t"
 			"xor %%ah, %%ah\n"
 			"1:"
@@ -144,7 +189,8 @@ static int __init check_pcibios(void)
 			  "=b" (ebx),
 			  "=c" (ecx)
 			: "1" (PCIBIOS_PCI_BIOS_PRESENT),
-			  "D" (&pci_indirect)
+			  "D" (&pci_indirect),
+			  "r" (__PCIBIOS_DS)
 			: "memory");
 		local_irq_restore(flags);
 
@@ -189,7 +235,10 @@ static int pci_bios_read(unsigned int se
 
 	switch (len) {
 	case 1:
-		__asm__("lcall *(%%esi); cld\n\t"
+		__asm__("movw %w6, %%ds\n\t"
+			"lcall *%%ss:(%%esi); cld\n\t"
+			"push %%ss\n\t"
+			"pop %%ds\n\t"
 			"jc 1f\n\t"
 			"xor %%ah, %%ah\n"
 			"1:"
@@ -198,7 +247,8 @@ static int pci_bios_read(unsigned int se
 			: "1" (PCIBIOS_READ_CONFIG_BYTE),
 			  "b" (bx),
 			  "D" ((long)reg),
-			  "S" (&pci_indirect));
+			  "S" (&pci_indirect),
+			  "r" (__PCIBIOS_DS));
 		/*
 		 * Zero-extend the result beyond 8 bits, do not trust the
 		 * BIOS having done it:
@@ -206,7 +256,10 @@ static int pci_bios_read(unsigned int se
 		*value &= 0xff;
 		break;
 	case 2:
-		__asm__("lcall *(%%esi); cld\n\t"
+		__asm__("movw %w6, %%ds\n\t"
+			"lcall *%%ss:(%%esi); cld\n\t"
+			"push %%ss\n\t"
+			"pop %%ds\n\t"
 			"jc 1f\n\t"
 			"xor %%ah, %%ah\n"
 			"1:"
@@ -215,7 +268,8 @@ static int pci_bios_read(unsigned int se
 			: "1" (PCIBIOS_READ_CONFIG_WORD),
 			  "b" (bx),
 			  "D" ((long)reg),
-			  "S" (&pci_indirect));
+			  "S" (&pci_indirect),
+			  "r" (__PCIBIOS_DS));
 		/*
 		 * Zero-extend the result beyond 16 bits, do not trust the
 		 * BIOS having done it:
@@ -223,7 +277,10 @@ static int pci_bios_read(unsigned int se
 		*value &= 0xffff;
 		break;
 	case 4:
-		__asm__("lcall *(%%esi); cld\n\t"
+		__asm__("movw %w6, %%ds\n\t"
+			"lcall *%%ss:(%%esi); cld\n\t"
+			"push %%ss\n\t"
+			"pop %%ds\n\t"
 			"jc 1f\n\t"
 			"xor %%ah, %%ah\n"
 			"1:"
@@ -232,7 +289,8 @@ static int pci_bios_read(unsigned int se
 			: "1" (PCIBIOS_READ_CONFIG_DWORD),
 			  "b" (bx),
 			  "D" ((long)reg),
-			  "S" (&pci_indirect));
+			  "S" (&pci_indirect),
+			  "r" (__PCIBIOS_DS));
 		break;
 	}
 
@@ -256,7 +314,10 @@ static int pci_bios_write(unsigned int s
 
 	switch (len) {
 	case 1:
-		__asm__("lcall *(%%esi); cld\n\t"
+		__asm__("movw %w6, %%ds\n\t"
+			"lcall *%%ss:(%%esi); cld\n\t"
+			"push %%ss\n\t"
+			"pop %%ds\n\t"
 			"jc 1f\n\t"
 			"xor %%ah, %%ah\n"
 			"1:"
@@ -265,10 +326,14 @@ static int pci_bios_write(unsigned int s
 			  "c" (value),
 			  "b" (bx),
 			  "D" ((long)reg),
-			  "S" (&pci_indirect));
+			  "S" (&pci_indirect),
+			  "r" (__PCIBIOS_DS));
 		break;
 	case 2:
-		__asm__("lcall *(%%esi); cld\n\t"
+		__asm__("movw %w6, %%ds\n\t"
+			"lcall *%%ss:(%%esi); cld\n\t"
+			"push %%ss\n\t"
+			"pop %%ds\n\t"
 			"jc 1f\n\t"
 			"xor %%ah, %%ah\n"
 			"1:"
@@ -277,10 +342,14 @@ static int pci_bios_write(unsigned int s
 			  "c" (value),
 			  "b" (bx),
 			  "D" ((long)reg),
-			  "S" (&pci_indirect));
+			  "S" (&pci_indirect),
+			  "r" (__PCIBIOS_DS));
 		break;
 	case 4:
-		__asm__("lcall *(%%esi); cld\n\t"
+		__asm__("movw %w6, %%ds\n\t"
+			"lcall *%%ss:(%%esi); cld\n\t"
+			"push %%ss\n\t"
+			"pop %%ds\n\t"
 			"jc 1f\n\t"
 			"xor %%ah, %%ah\n"
 			"1:"
@@ -289,7 +358,8 @@ static int pci_bios_write(unsigned int s
 			  "c" (value),
 			  "b" (bx),
 			  "D" ((long)reg),
-			  "S" (&pci_indirect));
+			  "S" (&pci_indirect),
+			  "r" (__PCIBIOS_DS));
 		break;
 	}
 
@@ -394,10 +464,13 @@ struct irq_routing_table * pcibios_get_i
 
 	DBG("PCI: Fetching IRQ routing table... ");
 	__asm__("push %%es\n\t"
+		"movw %w8, %%ds\n\t"
 		"push %%ds\n\t"
 		"pop  %%es\n\t"
-		"lcall *(%%esi); cld\n\t"
+		"lcall *%%ss:(%%esi); cld\n\t"
 		"pop %%es\n\t"
+		"push %%ss\n\t"
+		"pop %%ds\n"
 		"jc 1f\n\t"
 		"xor %%ah, %%ah\n"
 		"1:"
@@ -408,7 +481,8 @@ struct irq_routing_table * pcibios_get_i
 		  "1" (0),
 		  "D" ((long) &opt),
 		  "S" (&pci_indirect),
-		  "m" (opt)
+		  "m" (opt),
+		  "r" (__PCIBIOS_DS)
 		: "memory");
 	DBG("OK  ret=%d, size=%d, map=%x\n", ret, opt.size, map);
 	if (ret & 0xff00)
@@ -432,7 +506,10 @@ int pcibios_set_irq_routing(struct pci_d
 {
 	int ret;
 
-	__asm__("lcall *(%%esi); cld\n\t"
+	__asm__("movw %w5, %%ds\n\t"
+		"lcall *%%ss:(%%esi); cld\n\t"
+		"push %%ss\n\t"
+		"pop %%ds\n"
 		"jc 1f\n\t"
 		"xor %%ah, %%ah\n"
 		"1:"
@@ -440,7 +517,8 @@ int pcibios_set_irq_routing(struct pci_d
 		: "0" (PCIBIOS_SET_PCI_HW_INT),
 		  "b" ((dev->bus->number << 8) | dev->devfn),
 		  "c" ((irq << 8) | (pin + 10)),
-		  "S" (&pci_indirect));
+		  "S" (&pci_indirect),
+		  "r" (__PCIBIOS_DS));
 	return !(ret & 0xff00);
 }
 EXPORT_SYMBOL(pcibios_set_irq_routing);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/platform/efi/efi_32.c linux-4.0.9-pax/arch/x86/platform/efi/efi_32.c
--- linux-4.0.9/arch/x86/platform/efi/efi_32.c	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/platform/efi/efi_32.c	2015-04-15 12:13:52.950318620 +0200
@@ -61,11 +61,22 @@ void __init efi_call_phys_prolog(void)
 {
 	struct desc_ptr gdt_descr;
 
+#ifdef CONFIG_PAX_KERNEXEC
+	struct desc_struct d;
+#endif
+
 	local_irq_save(efi_rt_eflags);
 
 	load_cr3(initial_page_table);
 	__flush_tlb_all();
 
+#ifdef CONFIG_PAX_KERNEXEC
+	pack_descriptor(&d, 0, 0xFFFFF, 0x9B, 0xC);
+	write_gdt_entry(get_cpu_gdt_table(0), GDT_ENTRY_KERNEXEC_EFI_CS, &d, DESCTYPE_S);
+	pack_descriptor(&d, 0, 0xFFFFF, 0x93, 0xC);
+	write_gdt_entry(get_cpu_gdt_table(0), GDT_ENTRY_KERNEXEC_EFI_DS, &d, DESCTYPE_S);
+#endif
+
 	gdt_descr.address = __pa(get_cpu_gdt_table(0));
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
@@ -75,11 +86,24 @@ void __init efi_call_phys_epilog(void)
 {
 	struct desc_ptr gdt_descr;
 
+#ifdef CONFIG_PAX_KERNEXEC
+	struct desc_struct d;
+
+	memset(&d, 0, sizeof d);
+	write_gdt_entry(get_cpu_gdt_table(0), GDT_ENTRY_KERNEXEC_EFI_CS, &d, DESCTYPE_S);
+	write_gdt_entry(get_cpu_gdt_table(0), GDT_ENTRY_KERNEXEC_EFI_DS, &d, DESCTYPE_S);
+#endif
+
 	gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
+#ifdef CONFIG_PAX_PER_CPU_PGD
+	load_cr3(get_cpu_pgd(smp_processor_id(), kernel));
+#else
 	load_cr3(swapper_pg_dir);
+#endif
+
 	__flush_tlb_all();
 
 	local_irq_restore(efi_rt_eflags);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/platform/efi/efi_64.c linux-4.0.9-pax/arch/x86/platform/efi/efi_64.c
--- linux-4.0.9/arch/x86/platform/efi/efi_64.c	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/platform/efi/efi_64.c	2015-04-15 12:13:52.950318620 +0200
@@ -98,6 +98,11 @@ void __init efi_call_phys_prolog(void)
 		vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
 		set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
 	}
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+	load_cr3(swapper_pg_dir);
+#endif
+
 	__flush_tlb_all();
 }
 
@@ -115,6 +120,11 @@ void __init efi_call_phys_epilog(void)
 	for (pgd = 0; pgd < n_pgds; pgd++)
 		set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
 	kfree(save_pgd);
+
+#ifdef CONFIG_PAX_PER_CPU_PGD
+	load_cr3(get_cpu_pgd(smp_processor_id(), kernel));
+#endif
+
 	__flush_tlb_all();
 	local_irq_restore(efi_flags);
 	early_code_mapping_set_exec(0);
@@ -145,8 +155,23 @@ int __init efi_setup_page_tables(unsigne
 	unsigned npages;
 	pgd_t *pgd;
 
-	if (efi_enabled(EFI_OLD_MEMMAP))
+	if (efi_enabled(EFI_OLD_MEMMAP)) {
+		/* PaX: We need to disable the NX bit in the PGD, otherwise we won't be
+		 * able to execute the EFI services.
+		 */
+		if (__supported_pte_mask & _PAGE_NX) {
+			unsigned long addr = (unsigned long) __va(0);
+			pgd_t pe = __pgd(pgd_val(*pgd_offset_k(addr)) &  ~_PAGE_NX);
+
+			pr_info("PAX: Disabling NX protection for low memory map. Try booting without \"efi=old_map\"\n");
+#ifdef CONFIG_PAX_PER_CPU_PGD
+			set_pgd(pgd_offset_cpu(0, kernel, addr), pe);
+#endif
+			set_pgd(pgd_offset_k(addr), pe);
+		}
+
 		return 0;
+	}
 
 	efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd;
 	pgd = __va(efi_scratch.efi_pgt);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/platform/efi/efi_stub_32.S linux-4.0.9-pax/arch/x86/platform/efi/efi_stub_32.S
--- linux-4.0.9/arch/x86/platform/efi/efi_stub_32.S	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/platform/efi/efi_stub_32.S	2015-04-15 12:13:52.950318620 +0200
@@ -6,7 +6,9 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/init.h>
 #include <asm/page_types.h>
+#include <asm/segment.h>
 
 /*
  * efi_call_phys(void *, ...) is a function with variable parameters.
@@ -20,7 +22,7 @@
  * service functions will comply with gcc calling convention, too.
  */
 
-.text
+__INIT
 ENTRY(efi_call_phys)
 	/*
 	 * 0. The function can only be called in Linux kernel. So CS has been
@@ -36,10 +38,24 @@ ENTRY(efi_call_phys)
 	 * The mapping of lower virtual memory has been created in prolog and
 	 * epilog.
 	 */
-	movl	$1f, %edx
-	subl	$__PAGE_OFFSET, %edx
-	jmp	*%edx
+#ifdef CONFIG_PAX_KERNEXEC
+	movl	$(__KERNEXEC_EFI_DS), %edx
+	mov	%edx, %ds
+	mov	%edx, %es
+	mov	%edx, %ss
+	addl	$2f,(1f)
+	ljmp	*(1f)
+
+__INITDATA
+1:	.long __LOAD_PHYSICAL_ADDR, __KERNEXEC_EFI_CS
+.previous
+
+2:
+	subl	$2b,(1b)
+#else
+	jmp	1f-__PAGE_OFFSET
 1:
+#endif
 
 	/*
 	 * 2. Now on the top of stack is the return
@@ -47,14 +63,8 @@ ENTRY(efi_call_phys)
 	 * parameter 2, ..., param n. To make things easy, we save the return
 	 * address of efi_call_phys in a global variable.
 	 */
-	popl	%edx
-	movl	%edx, saved_return_addr
-	/* get the function pointer into ECX*/
-	popl	%ecx
-	movl	%ecx, efi_rt_function_ptr
-	movl	$2f, %edx
-	subl	$__PAGE_OFFSET, %edx
-	pushl	%edx
+	popl	(saved_return_addr)
+	popl	(efi_rt_function_ptr)
 
 	/*
 	 * 3. Clear PG bit in %CR0.
@@ -73,9 +83,8 @@ ENTRY(efi_call_phys)
 	/*
 	 * 5. Call the physical function.
 	 */
-	jmp	*%ecx
+	call	*(efi_rt_function_ptr-__PAGE_OFFSET)
 
-2:
 	/*
 	 * 6. After EFI runtime service returns, control will return to
 	 * following instruction. We'd better readjust stack pointer first.
@@ -88,35 +97,36 @@ ENTRY(efi_call_phys)
 	movl	%cr0, %edx
 	orl	$0x80000000, %edx
 	movl	%edx, %cr0
-	jmp	1f
-1:
+
 	/*
 	 * 8. Now restore the virtual mode from flat mode by
 	 * adding EIP with PAGE_OFFSET.
 	 */
-	movl	$1f, %edx
-	jmp	*%edx
+#ifdef CONFIG_PAX_KERNEXEC
+	movl	$(__KERNEL_DS), %edx
+	mov	%edx, %ds
+	mov	%edx, %es
+	mov	%edx, %ss
+	ljmp	$(__KERNEL_CS),$1f
+#else
+	jmp	1f+__PAGE_OFFSET
+#endif
 1:
 
 	/*
 	 * 9. Balance the stack. And because EAX contain the return value,
 	 * we'd better not clobber it.
 	 */
-	leal	efi_rt_function_ptr, %edx
-	movl	(%edx), %ecx
-	pushl	%ecx
+	pushl	(efi_rt_function_ptr)
 
 	/*
-	 * 10. Push the saved return address onto the stack and return.
+	 * 10. Return to the saved return address.
 	 */
-	leal	saved_return_addr, %edx
-	movl	(%edx), %ecx
-	pushl	%ecx
-	ret
+	jmpl	*(saved_return_addr)
 ENDPROC(efi_call_phys)
 .previous
 
-.data
+__INITDATA
 saved_return_addr:
 	.long 0
 efi_rt_function_ptr:
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/platform/efi/efi_stub_64.S linux-4.0.9-pax/arch/x86/platform/efi/efi_stub_64.S
--- linux-4.0.9/arch/x86/platform/efi/efi_stub_64.S	2015-04-13 11:21:03.262617386 +0200
+++ linux-4.0.9-pax/arch/x86/platform/efi/efi_stub_64.S	2015-04-15 12:13:52.950318620 +0200
@@ -11,6 +11,7 @@
 #include <asm/msr.h>
 #include <asm/processor-flags.h>
 #include <asm/page_types.h>
+#include <asm/alternative-asm.h>
 
 #define SAVE_XMM			\
 	mov %rsp, %rax;			\
@@ -88,6 +89,7 @@ ENTRY(efi_call)
 	RESTORE_PGT
 	addq $48, %rsp
 	RESTORE_XMM
+	pax_force_retaddr 0, 1
 	ret
 ENDPROC(efi_call)
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/platform/intel-mid/intel-mid.c linux-4.0.9-pax/arch/x86/platform/intel-mid/intel-mid.c
--- linux-4.0.9/arch/x86/platform/intel-mid/intel-mid.c	2015-04-13 11:21:04.170617337 +0200
+++ linux-4.0.9-pax/arch/x86/platform/intel-mid/intel-mid.c	2015-04-15 12:13:52.950318620 +0200
@@ -63,7 +63,7 @@ enum intel_mid_timer_options intel_mid_t
 /* intel_mid_ops to store sub arch ops */
 struct intel_mid_ops *intel_mid_ops;
 /* getter function for sub arch ops*/
-static void *(*get_intel_mid_ops[])(void) = INTEL_MID_OPS_INIT;
+static const void *(*get_intel_mid_ops[])(void) = INTEL_MID_OPS_INIT;
 enum intel_mid_cpu_type __intel_mid_cpu_chip;
 EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip);
 
@@ -71,9 +71,10 @@ static void intel_mid_power_off(void)
 {
 };
 
-static void intel_mid_reboot(void)
+static void __noreturn intel_mid_reboot(void)
 {
 	intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
+	BUG();
 }
 
 static unsigned long __init intel_mid_calibrate_tsc(void)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/platform/intel-mid/intel_mid_weak_decls.h linux-4.0.9-pax/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
--- linux-4.0.9/arch/x86/platform/intel-mid/intel_mid_weak_decls.h	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/platform/intel-mid/intel_mid_weak_decls.h	2015-04-15 12:13:52.950318620 +0200
@@ -13,6 +13,6 @@
 /* For every CPU addition a new get_<cpuname>_ops interface needs
  * to be added.
  */
-extern void *get_penwell_ops(void);
-extern void *get_cloverview_ops(void);
-extern void *get_tangier_ops(void);
+extern const void *get_penwell_ops(void);
+extern const void *get_cloverview_ops(void);
+extern const void *get_tangier_ops(void);
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/platform/intel-mid/mfld.c linux-4.0.9-pax/arch/x86/platform/intel-mid/mfld.c
--- linux-4.0.9/arch/x86/platform/intel-mid/mfld.c	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/platform/intel-mid/mfld.c	2015-04-15 12:13:52.950318620 +0200
@@ -64,12 +64,12 @@ static void __init penwell_arch_setup(vo
 	pm_power_off = mfld_power_off;
 }
 
-void *get_penwell_ops(void)
+const void *get_penwell_ops(void)
 {
 	return &penwell_ops;
 }
 
-void *get_cloverview_ops(void)
+const void *get_cloverview_ops(void)
 {
 	return &penwell_ops;
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/platform/intel-mid/mrfl.c linux-4.0.9-pax/arch/x86/platform/intel-mid/mrfl.c
--- linux-4.0.9/arch/x86/platform/intel-mid/mrfl.c	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/platform/intel-mid/mrfl.c	2015-04-15 12:13:52.950318620 +0200
@@ -97,7 +97,7 @@ static struct intel_mid_ops tangier_ops
 	.arch_setup = tangier_arch_setup,
 };
 
-void *get_tangier_ops(void)
+const void *get_tangier_ops(void)
 {
 	return &tangier_ops;
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/platform/intel-quark/imr_selftest.c linux-4.0.9-pax/arch/x86/platform/intel-quark/imr_selftest.c
--- linux-4.0.9/arch/x86/platform/intel-quark/imr_selftest.c	2015-04-13 11:21:04.218617335 +0200
+++ linux-4.0.9-pax/arch/x86/platform/intel-quark/imr_selftest.c	2015-04-15 12:13:52.954318620 +0200
@@ -54,7 +54,7 @@ static void __init imr_self_test_result(
  */
 static void __init imr_self_test(void)
 {
-	phys_addr_t base  = virt_to_phys(&_text);
+	phys_addr_t base  = virt_to_phys(ktla_ktva(_text));
 	size_t size = virt_to_phys(&__end_rodata) - base;
 	const char *fmt_over = "overlapped IMR @ (0x%08lx - 0x%08lx)\n";
 	int ret;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/platform/olpc/olpc_dt.c linux-4.0.9-pax/arch/x86/platform/olpc/olpc_dt.c
--- linux-4.0.9/arch/x86/platform/olpc/olpc_dt.c	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/platform/olpc/olpc_dt.c	2015-04-15 12:13:52.954318620 +0200
@@ -156,7 +156,7 @@ void * __init prom_early_alloc(unsigned
 	return res;
 }
 
-static struct of_pdt_ops prom_olpc_ops __initdata = {
+static struct of_pdt_ops prom_olpc_ops __initconst = {
 	.nextprop = olpc_dt_nextprop,
 	.getproplen = olpc_dt_getproplen,
 	.getproperty = olpc_dt_getproperty,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/power/cpu.c linux-4.0.9-pax/arch/x86/power/cpu.c
--- linux-4.0.9/arch/x86/power/cpu.c	2015-04-13 11:21:04.234617334 +0200
+++ linux-4.0.9-pax/arch/x86/power/cpu.c	2015-04-15 12:13:52.954318620 +0200
@@ -134,11 +134,8 @@ static void do_fpu_end(void)
 static void fix_processor_context(void)
 {
 	int cpu = smp_processor_id();
-	struct tss_struct *t = &per_cpu(init_tss, cpu);
-#ifdef CONFIG_X86_64
-	struct desc_struct *desc = get_cpu_gdt_table(cpu);
-	tss_desc tss;
-#endif
+	struct tss_struct *t = init_tss + cpu;
+
 	set_tss_desc(cpu, t);	/*
 				 * This just modifies memory; should not be
 				 * necessary. But... This is necessary, because
@@ -147,10 +144,6 @@ static void fix_processor_context(void)
 				 */
 
 #ifdef CONFIG_X86_64
-	memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
-	tss.type = 0x9; /* The available 64-bit TSS (see AMD vol 2, pg 91 */
-	write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
-
 	syscall_init();				/* This sets MSR_*STAR and related */
 #endif
 	load_TR_desc();				/* This does ltr */
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/realmode/init.c linux-4.0.9-pax/arch/x86/realmode/init.c
--- linux-4.0.9/arch/x86/realmode/init.c	2015-04-13 11:21:04.270617332 +0200
+++ linux-4.0.9-pax/arch/x86/realmode/init.c	2015-04-15 12:13:52.954318620 +0200
@@ -68,7 +68,13 @@ void __init setup_real_mode(void)
 		__va(real_mode_header->trampoline_header);
 
 #ifdef CONFIG_X86_32
-	trampoline_header->start = __pa_symbol(startup_32_smp);
+	trampoline_header->start = __pa_symbol(ktla_ktva(startup_32_smp));
+
+#ifdef CONFIG_PAX_KERNEXEC
+	trampoline_header->start -= LOAD_PHYSICAL_ADDR;
+#endif
+
+	trampoline_header->boot_cs = __BOOT_CS;
 	trampoline_header->gdt_limit = __BOOT_DS + 7;
 	trampoline_header->gdt_base = __pa_symbol(boot_gdt);
 #else
@@ -84,7 +90,7 @@ void __init setup_real_mode(void)
 	*trampoline_cr4_features = __read_cr4();
 
 	trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
-	trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
+	trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd & ~_PAGE_NX;
 	trampoline_pgd[511] = init_level4_pgt[511].pgd;
 #endif
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/realmode/rm/header.S linux-4.0.9-pax/arch/x86/realmode/rm/header.S
--- linux-4.0.9/arch/x86/realmode/rm/header.S	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/realmode/rm/header.S	2015-04-15 12:13:52.954318620 +0200
@@ -30,7 +30,9 @@ GLOBAL(real_mode_header)
 #endif
 	/* APM/BIOS reboot */
 	.long	pa_machine_real_restart_asm
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_32
+	.long	__KERNEL_CS
+#else
 	.long	__KERNEL32_CS
 #endif
 END(real_mode_header)
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/realmode/rm/Makefile linux-4.0.9-pax/arch/x86/realmode/rm/Makefile
--- linux-4.0.9/arch/x86/realmode/rm/Makefile	2015-04-13 11:21:04.270617332 +0200
+++ linux-4.0.9-pax/arch/x86/realmode/rm/Makefile	2015-04-15 12:13:52.954318620 +0200
@@ -68,5 +68,8 @@ $(obj)/realmode.relocs: $(obj)/realmode.
 
 KBUILD_CFLAGS	:= $(LINUXINCLUDE) $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
 		   -I$(srctree)/arch/x86/boot
+ifdef CONSTIFY_PLUGIN
+KBUILD_CFLAGS	+= -fplugin-arg-constify_plugin-no-constify
+endif
 KBUILD_AFLAGS	:= $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/realmode/rm/reboot.S linux-4.0.9-pax/arch/x86/realmode/rm/reboot.S
--- linux-4.0.9/arch/x86/realmode/rm/reboot.S	2014-03-31 12:47:57.311131318 +0200
+++ linux-4.0.9-pax/arch/x86/realmode/rm/reboot.S	2015-05-07 03:04:54.132361235 +0200
@@ -27,6 +27,10 @@ ENTRY(machine_real_restart_asm)
 	lgdtl	pa_tr_gdt
 
 	/* Disable paging to drop us out of long mode */
+	movl	%cr4, %eax
+	andl	$~X86_CR4_PCIDE, %eax
+	movl	%eax, %cr4
+
 	movl	%cr0, %eax
 	andl	$~X86_CR0_PG, %eax
 	movl	%eax, %cr0
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/realmode/rm/trampoline_32.S linux-4.0.9-pax/arch/x86/realmode/rm/trampoline_32.S
--- linux-4.0.9/arch/x86/realmode/rm/trampoline_32.S	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/realmode/rm/trampoline_32.S	2015-04-15 12:13:52.954318620 +0200
@@ -24,6 +24,12 @@
 #include <asm/page_types.h>
 #include "realmode.h"
 
+#ifdef CONFIG_PAX_KERNEXEC
+#define ta(X) (X)
+#else
+#define ta(X) (pa_ ## X)
+#endif
+
 	.text
 	.code16
 
@@ -38,8 +44,6 @@ ENTRY(trampoline_start)
 
 	cli			# We should be safe anyway
 
-	movl	tr_start, %eax	# where we need to go
-
 	movl	$0xA5A5A5A5, trampoline_status
 				# write marker for master knows we're running
 
@@ -55,7 +59,7 @@ ENTRY(trampoline_start)
 	movw	$1, %dx			# protected mode (PE) bit
 	lmsw	%dx			# into protected mode
 
-	ljmpl	$__BOOT_CS, $pa_startup_32
+	ljmpl *(trampoline_header)
 
 	.section ".text32","ax"
 	.code32
@@ -66,7 +70,7 @@ ENTRY(startup_32)			# note: also used fr
 	.balign 8
 GLOBAL(trampoline_header)
 	tr_start:		.space	4
-	tr_gdt_pad:		.space	2
+	tr_boot_cs:		.space	2
 	tr_gdt:			.space	6
 END(trampoline_header)
 	
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/realmode/rm/trampoline_64.S linux-4.0.9-pax/arch/x86/realmode/rm/trampoline_64.S
--- linux-4.0.9/arch/x86/realmode/rm/trampoline_64.S	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/realmode/rm/trampoline_64.S	2015-04-15 12:13:52.954318620 +0200
@@ -93,6 +93,7 @@ ENTRY(startup_32)
 	movl	%edx, %gs
 
 	movl	pa_tr_cr4, %eax
+	andl	$~X86_CR4_PCIDE, %eax
 	movl	%eax, %cr4		# Enable PAE mode
 
 	# Setup trampoline 4 level pagetables
@@ -106,7 +107,7 @@ ENTRY(startup_32)
 	wrmsr
 
 	# Enable paging and in turn activate Long Mode
-	movl	$(X86_CR0_PG | X86_CR0_WP | X86_CR0_PE), %eax
+	movl	$(X86_CR0_PG | X86_CR0_PE), %eax
 	movl	%eax, %cr0
 
 	/*
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/realmode/rm/wakeup_asm.S linux-4.0.9-pax/arch/x86/realmode/rm/wakeup_asm.S
--- linux-4.0.9/arch/x86/realmode/rm/wakeup_asm.S	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/realmode/rm/wakeup_asm.S	2015-04-15 12:13:52.954318620 +0200
@@ -126,11 +126,10 @@ ENTRY(wakeup_start)
 	lgdtl	pmode_gdt
 
 	/* This really couldn't... */
-	movl	pmode_entry, %eax
 	movl	pmode_cr0, %ecx
 	movl	%ecx, %cr0
-	ljmpl	$__KERNEL_CS, $pa_startup_32
-	/* -> jmp *%eax in trampoline_32.S */
+
+	ljmpl	*pmode_entry
 #else
 	jmp	trampoline_start
 #endif
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/tools/Makefile linux-4.0.9-pax/arch/x86/tools/Makefile
--- linux-4.0.9/arch/x86/tools/Makefile	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/tools/Makefile	2015-04-15 12:13:52.954318620 +0200
@@ -37,7 +37,7 @@ $(obj)/test_get_len.o: $(srctree)/arch/x
 
 $(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -ggdb
 hostprogs-y	+= relocs
 relocs-objs     := relocs_32.o relocs_64.o relocs_common.o
 PHONY += relocs
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/tools/relocs.c linux-4.0.9-pax/arch/x86/tools/relocs.c
--- linux-4.0.9/arch/x86/tools/relocs.c	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/tools/relocs.c	2015-04-15 12:13:52.954318620 +0200
@@ -1,5 +1,7 @@
 /* This is included from relocs_32/64.c */
 
+#include "../../../include/generated/autoconf.h"
+
 #define ElfW(type)		_ElfW(ELF_BITS, type)
 #define _ElfW(bits, type)	__ElfW(bits, type)
 #define __ElfW(bits, type)	Elf##bits##_##type
@@ -11,6 +13,7 @@
 #define Elf_Sym			ElfW(Sym)
 
 static Elf_Ehdr ehdr;
+static Elf_Phdr *phdr;
 
 struct relocs {
 	uint32_t	*offset;
@@ -386,9 +389,39 @@ static void read_ehdr(FILE *fp)
 	}
 }
 
+static void read_phdrs(FILE *fp)
+{
+	unsigned int i;
+
+	phdr = calloc(ehdr.e_phnum, sizeof(Elf_Phdr));
+	if (!phdr) {
+		die("Unable to allocate %d program headers\n",
+		    ehdr.e_phnum);
+	}
+	if (fseek(fp, ehdr.e_phoff, SEEK_SET) < 0) {
+		die("Seek to %d failed: %s\n",
+			ehdr.e_phoff, strerror(errno));
+	}
+	if (fread(phdr, sizeof(*phdr), ehdr.e_phnum, fp) != ehdr.e_phnum) {
+		die("Cannot read ELF program headers: %s\n",
+			strerror(errno));
+	}
+	for(i = 0; i < ehdr.e_phnum; i++) {
+		phdr[i].p_type      = elf_word_to_cpu(phdr[i].p_type);
+		phdr[i].p_offset    = elf_off_to_cpu(phdr[i].p_offset);
+		phdr[i].p_vaddr     = elf_addr_to_cpu(phdr[i].p_vaddr);
+		phdr[i].p_paddr     = elf_addr_to_cpu(phdr[i].p_paddr);
+		phdr[i].p_filesz    = elf_word_to_cpu(phdr[i].p_filesz);
+		phdr[i].p_memsz     = elf_word_to_cpu(phdr[i].p_memsz);
+		phdr[i].p_flags     = elf_word_to_cpu(phdr[i].p_flags);
+		phdr[i].p_align     = elf_word_to_cpu(phdr[i].p_align);
+	}
+
+}
+
 static void read_shdrs(FILE *fp)
 {
-	int i;
+	unsigned int i;
 	Elf_Shdr shdr;
 
 	secs = calloc(ehdr.e_shnum, sizeof(struct section));
@@ -423,7 +456,7 @@ static void read_shdrs(FILE *fp)
 
 static void read_strtabs(FILE *fp)
 {
-	int i;
+	unsigned int i;
 	for (i = 0; i < ehdr.e_shnum; i++) {
 		struct section *sec = &secs[i];
 		if (sec->shdr.sh_type != SHT_STRTAB) {
@@ -448,7 +481,7 @@ static void read_strtabs(FILE *fp)
 
 static void read_symtabs(FILE *fp)
 {
-	int i,j;
+	unsigned int i,j;
 	for (i = 0; i < ehdr.e_shnum; i++) {
 		struct section *sec = &secs[i];
 		if (sec->shdr.sh_type != SHT_SYMTAB) {
@@ -479,9 +512,11 @@ static void read_symtabs(FILE *fp)
 }
 
 
-static void read_relocs(FILE *fp)
+static void read_relocs(FILE *fp, int use_real_mode)
 {
-	int i,j;
+	unsigned int i,j;
+	uint32_t base;
+
 	for (i = 0; i < ehdr.e_shnum; i++) {
 		struct section *sec = &secs[i];
 		if (sec->shdr.sh_type != SHT_REL_TYPE) {
@@ -501,9 +536,22 @@ static void read_relocs(FILE *fp)
 			die("Cannot read symbol table: %s\n",
 				strerror(errno));
 		}
+		base = 0;
+
+#ifdef CONFIG_X86_32
+		for (j = 0; !use_real_mode && j < ehdr.e_phnum; j++) {
+			if (phdr[j].p_type != PT_LOAD )
+				continue;
+			if (secs[sec->shdr.sh_info].shdr.sh_offset < phdr[j].p_offset || secs[sec->shdr.sh_info].shdr.sh_offset >= phdr[j].p_offset + phdr[j].p_filesz)
+				continue;
+			base = CONFIG_PAGE_OFFSET + phdr[j].p_paddr - phdr[j].p_vaddr;
+			break;
+		}
+#endif
+
 		for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) {
 			Elf_Rel *rel = &sec->reltab[j];
-			rel->r_offset = elf_addr_to_cpu(rel->r_offset);
+			rel->r_offset = elf_addr_to_cpu(rel->r_offset) + base;
 			rel->r_info   = elf_xword_to_cpu(rel->r_info);
 #if (SHT_REL_TYPE == SHT_RELA)
 			rel->r_addend = elf_xword_to_cpu(rel->r_addend);
@@ -515,7 +563,7 @@ static void read_relocs(FILE *fp)
 
 static void print_absolute_symbols(void)
 {
-	int i;
+	unsigned int i;
 	const char *format;
 
 	if (ELF_BITS == 64)
@@ -528,7 +576,7 @@ static void print_absolute_symbols(void)
 	for (i = 0; i < ehdr.e_shnum; i++) {
 		struct section *sec = &secs[i];
 		char *sym_strtab;
-		int j;
+		unsigned int j;
 
 		if (sec->shdr.sh_type != SHT_SYMTAB) {
 			continue;
@@ -555,7 +603,7 @@ static void print_absolute_symbols(void)
 
 static void print_absolute_relocs(void)
 {
-	int i, printed = 0;
+	unsigned int i, printed = 0;
 	const char *format;
 
 	if (ELF_BITS == 64)
@@ -568,7 +616,7 @@ static void print_absolute_relocs(void)
 		struct section *sec_applies, *sec_symtab;
 		char *sym_strtab;
 		Elf_Sym *sh_symtab;
-		int j;
+		unsigned int j;
 		if (sec->shdr.sh_type != SHT_REL_TYPE) {
 			continue;
 		}
@@ -645,13 +693,13 @@ static void add_reloc(struct relocs *r,
 static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
 			Elf_Sym *sym, const char *symname))
 {
-	int i;
+	unsigned int i;
 	/* Walk through the relocations */
 	for (i = 0; i < ehdr.e_shnum; i++) {
 		char *sym_strtab;
 		Elf_Sym *sh_symtab;
 		struct section *sec_applies, *sec_symtab;
-		int j;
+		unsigned int j;
 		struct section *sec = &secs[i];
 
 		if (sec->shdr.sh_type != SHT_REL_TYPE) {
@@ -830,6 +878,23 @@ static int do_reloc32(struct section *se
 {
 	unsigned r_type = ELF32_R_TYPE(rel->r_info);
 	int shn_abs = (sym->st_shndx == SHN_ABS) && !is_reloc(S_REL, symname);
+	char *sym_strtab = sec->link->link->strtab;
+
+	/* Don't relocate actual per-cpu variables, they are absolute indices, not addresses */
+	if (!strcmp(sec_name(sym->st_shndx), ".data..percpu") && strcmp(sym_name(sym_strtab, sym), "__per_cpu_load"))
+		return 0;
+
+#ifdef CONFIG_PAX_KERNEXEC
+	/* Don't relocate actual code, they are relocated implicitly by the base address of KERNEL_CS */
+	if (!strcmp(sec_name(sym->st_shndx), ".text.end") && !strcmp(sym_name(sym_strtab, sym), "_etext"))
+		return 0;
+	if (!strcmp(sec_name(sym->st_shndx), ".init.text"))
+		return 0;
+	if (!strcmp(sec_name(sym->st_shndx), ".exit.text"))
+		return 0;
+	if (!strcmp(sec_name(sym->st_shndx), ".text") && strcmp(sym_name(sym_strtab, sym), "__LOAD_PHYSICAL_ADDR"))
+		return 0;
+#endif
 
 	switch (r_type) {
 	case R_386_NONE:
@@ -968,7 +1033,7 @@ static int write32_as_text(uint32_t v, F
 
 static void emit_relocs(int as_text, int use_real_mode)
 {
-	int i;
+	unsigned int i;
 	int (*write_reloc)(uint32_t, FILE *) = write32;
 	int (*do_reloc)(struct section *sec, Elf_Rel *rel, Elf_Sym *sym,
 			const char *symname);
@@ -1078,10 +1143,11 @@ void process(FILE *fp, int use_real_mode
 {
 	regex_init(use_real_mode);
 	read_ehdr(fp);
+	read_phdrs(fp);
 	read_shdrs(fp);
 	read_strtabs(fp);
 	read_symtabs(fp);
-	read_relocs(fp);
+	read_relocs(fp, use_real_mode);
 	if (ELF_BITS == 64)
 		percpu_init();
 	if (show_absolute_syms) {
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/um/mem_32.c linux-4.0.9-pax/arch/x86/um/mem_32.c
--- linux-4.0.9/arch/x86/um/mem_32.c	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/um/mem_32.c	2015-04-15 12:13:52.954318620 +0200
@@ -21,7 +21,7 @@ static int __init gate_vma_init(void)
 	gate_vma.vm_start = FIXADDR_USER_START;
 	gate_vma.vm_end = FIXADDR_USER_END;
 	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
-	gate_vma.vm_page_prot = __P101;
+	gate_vma.vm_page_prot = vm_get_page_prot(gate_vma.vm_flags);
 
 	return 0;
 }
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/um/tls_32.c linux-4.0.9-pax/arch/x86/um/tls_32.c
--- linux-4.0.9/arch/x86/um/tls_32.c	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/um/tls_32.c	2015-04-15 12:13:52.954318620 +0200
@@ -260,7 +260,7 @@ out:
 	if (unlikely(task == current &&
 		     !t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed)) {
 		printk(KERN_ERR "get_tls_entry: task with pid %d got here "
-				"without flushed TLS.", current->pid);
+				"without flushed TLS.", task_pid_nr(current));
 	}
 
 	return 0;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/vdso/Makefile linux-4.0.9-pax/arch/x86/vdso/Makefile
--- linux-4.0.9/arch/x86/vdso/Makefile	2015-06-26 10:29:22.458538574 +0200
+++ linux-4.0.9-pax/arch/x86/vdso/Makefile	2015-06-26 10:29:32.598538551 +0200
@@ -175,7 +175,7 @@ quiet_cmd_vdso = VDSO    $@
 		       -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
 		 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
 
-VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
+VDSO_LDFLAGS = -fPIC -shared -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
 	$(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS)
 GCOV_PROFILE := n
 
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/vdso/vdso2c.h linux-4.0.9-pax/arch/x86/vdso/vdso2c.h
--- linux-4.0.9/arch/x86/vdso/vdso2c.h	2015-03-18 15:21:50.304349253 +0100
+++ linux-4.0.9-pax/arch/x86/vdso/vdso2c.h	2015-04-15 12:13:52.954318620 +0200
@@ -12,7 +12,7 @@ static void BITSFUNC(go)(void *raw_addr,
 	unsigned long load_size = -1;  /* Work around bogus warning */
 	unsigned long mapping_size;
 	ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
-	int i;
+	unsigned int i;
 	unsigned long j;
 	ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
 		*alt_sec = NULL;
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/vdso/vdso32-setup.c linux-4.0.9-pax/arch/x86/vdso/vdso32-setup.c
--- linux-4.0.9/arch/x86/vdso/vdso32-setup.c	2015-03-18 15:21:50.308349253 +0100
+++ linux-4.0.9-pax/arch/x86/vdso/vdso32-setup.c	2015-04-15 12:13:52.954318620 +0200
@@ -14,6 +14,7 @@
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
 #include <asm/vdso.h>
+#include <asm/mman.h>
 
 #ifdef CONFIG_COMPAT_VDSO
 #define VDSO_DEFAULT	0
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/vdso/vma.c linux-4.0.9-pax/arch/x86/vdso/vma.c
--- linux-4.0.9/arch/x86/vdso/vma.c	2015-03-18 15:21:50.308349253 +0100
+++ linux-4.0.9-pax/arch/x86/vdso/vma.c	2015-04-15 12:13:52.954318620 +0200
@@ -19,10 +19,7 @@
 #include <asm/page.h>
 #include <asm/hpet.h>
 #include <asm/desc.h>
-
-#if defined(CONFIG_X86_64)
-unsigned int __read_mostly vdso64_enabled = 1;
-#endif
+#include <asm/mman.h>
 
 void __init init_vdso_image(const struct vdso_image *image)
 {
@@ -101,6 +98,11 @@ static int map_vdso(const struct vdso_im
 		.pages = no_pages,
 	};
 
+#ifdef CONFIG_PAX_RANDMMAP
+	if (mm->pax_flags & MF_PAX_RANDMMAP)
+		calculate_addr = false;
+#endif
+
 	if (calculate_addr) {
 		addr = vdso_addr(current->mm->start_stack,
 				 image->size - image->sym_vvar_start);
@@ -111,14 +113,14 @@ static int map_vdso(const struct vdso_im
 	down_write(&mm->mmap_sem);
 
 	addr = get_unmapped_area(NULL, addr,
-				 image->size - image->sym_vvar_start, 0, 0);
+				 image->size - image->sym_vvar_start, 0, MAP_EXECUTABLE);
 	if (IS_ERR_VALUE(addr)) {
 		ret = addr;
 		goto up_fail;
 	}
 
 	text_start = addr - image->sym_vvar_start;
-	current->mm->context.vdso = (void __user *)text_start;
+	mm->context.vdso = text_start;
 
 	/*
 	 * MAYWRITE to allow gdb to COW and set breakpoints
@@ -163,15 +165,12 @@ static int map_vdso(const struct vdso_im
 			hpet_address >> PAGE_SHIFT,
 			PAGE_SIZE,
 			pgprot_noncached(PAGE_READONLY));
-
-		if (ret)
-			goto up_fail;
 	}
 #endif
 
 up_fail:
 	if (ret)
-		current->mm->context.vdso = NULL;
+		current->mm->context.vdso = 0;
 
 	up_write(&mm->mmap_sem);
 	return ret;
@@ -191,8 +190,8 @@ static int load_vdso32(void)
 
 	if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
 		current_thread_info()->sysenter_return =
-			current->mm->context.vdso +
-			selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
+			(void __force_user *)(current->mm->context.vdso +
+			selected_vdso32->sym_VDSO32_SYSENTER_RETURN);
 
 	return 0;
 }
@@ -201,9 +200,6 @@ static int load_vdso32(void)
 #ifdef CONFIG_X86_64
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
-	if (!vdso64_enabled)
-		return 0;
-
 	return map_vdso(&vdso_image_64, true);
 }
 
@@ -212,12 +208,8 @@ int compat_arch_setup_additional_pages(s
 				       int uses_interp)
 {
 #ifdef CONFIG_X86_X32_ABI
-	if (test_thread_flag(TIF_X32)) {
-		if (!vdso64_enabled)
-			return 0;
-
+	if (test_thread_flag(TIF_X32))
 		return map_vdso(&vdso_image_x32, true);
-	}
 #endif
 
 	return load_vdso32();
@@ -231,15 +223,6 @@ int arch_setup_additional_pages(struct l
 #endif
 
 #ifdef CONFIG_X86_64
-static __init int vdso_setup(char *s)
-{
-	vdso64_enabled = simple_strtoul(s, NULL, 0);
-	return 0;
-}
-__setup("vdso=", vdso_setup);
-#endif
-
-#ifdef CONFIG_X86_64
 static void vgetcpu_cpu_init(void *arg)
 {
 	int cpu = smp_processor_id();
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/xen/enlighten.c linux-4.0.9-pax/arch/x86/xen/enlighten.c
--- linux-4.0.9/arch/x86/xen/enlighten.c	2015-04-13 11:21:04.278617332 +0200
+++ linux-4.0.9-pax/arch/x86/xen/enlighten.c	2015-04-15 12:13:52.954318620 +0200
@@ -125,8 +125,6 @@ EXPORT_SYMBOL_GPL(xen_start_info);
 
 struct shared_info xen_dummy_shared_info;
 
-void *xen_initial_gdt;
-
 RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
 __read_mostly int xen_have_vector_callback;
 EXPORT_SYMBOL_GPL(xen_have_vector_callback);
@@ -544,8 +542,7 @@ static void xen_load_gdt(const struct de
 {
 	unsigned long va = dtr->address;
 	unsigned int size = dtr->size + 1;
-	unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
-	unsigned long frames[pages];
+	unsigned long frames[65536 / PAGE_SIZE];
 	int f;
 
 	/*
@@ -593,8 +590,7 @@ static void __init xen_load_gdt_boot(con
 {
 	unsigned long va = dtr->address;
 	unsigned int size = dtr->size + 1;
-	unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
-	unsigned long frames[pages];
+	unsigned long frames[(GDT_SIZE + PAGE_SIZE - 1) / PAGE_SIZE];
 	int f;
 
 	/*
@@ -602,7 +598,7 @@ static void __init xen_load_gdt_boot(con
 	 * 8-byte entries, or 16 4k pages..
 	 */
 
-	BUG_ON(size > 65536);
+	BUG_ON(size > GDT_SIZE);
 	BUG_ON(va & ~PAGE_MASK);
 
 	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
@@ -991,7 +987,7 @@ static u32 xen_safe_apic_wait_icr_idle(v
         return 0;
 }
 
-static void set_xen_basic_apic_ops(void)
+static void __init set_xen_basic_apic_ops(void)
 {
 	apic->read = xen_apic_read;
 	apic->write = xen_apic_write;
@@ -1308,30 +1304,30 @@ static const struct pv_apic_ops xen_apic
 #endif
 };
 
-static void xen_reboot(int reason)
+static __noreturn void xen_reboot(int reason)
 {
 	struct sched_shutdown r = { .reason = reason };
 
-	if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
-		BUG();
+	HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
+	BUG();
 }
 
-static void xen_restart(char *msg)
+static __noreturn void xen_restart(char *msg)
 {
 	xen_reboot(SHUTDOWN_reboot);
 }
 
-static void xen_emergency_restart(void)
+static __noreturn void xen_emergency_restart(void)
 {
 	xen_reboot(SHUTDOWN_reboot);
 }
 
-static void xen_machine_halt(void)
+static __noreturn void xen_machine_halt(void)
 {
 	xen_reboot(SHUTDOWN_poweroff);
 }
 
-static void xen_machine_power_off(void)
+static __noreturn void xen_machine_power_off(void)
 {
 	if (pm_power_off)
 		pm_power_off();
@@ -1484,8 +1480,11 @@ static void __ref xen_setup_gdt(int cpu)
 	pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
 	pv_cpu_ops.load_gdt = xen_load_gdt_boot;
 
-	setup_stack_canary_segment(0);
-	switch_to_new_gdt(0);
+	setup_stack_canary_segment(cpu);
+#ifdef CONFIG_X86_64
+	load_percpu_segment(cpu);
+#endif
+	switch_to_new_gdt(cpu);
 
 	pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;
 	pv_cpu_ops.load_gdt = xen_load_gdt;
@@ -1600,7 +1599,17 @@ asmlinkage __visible void __init xen_sta
 	__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
 
 	/* Work out if we support NX */
-	x86_configure_nx();
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+	if ((cpuid_eax(0x80000000) & 0xffff0000) == 0x80000000 &&
+	    (cpuid_edx(0x80000001) & (1U << (X86_FEATURE_NX & 31)))) {
+		unsigned l, h;
+
+		__supported_pte_mask |= _PAGE_NX;
+		rdmsr(MSR_EFER, l, h);
+		l |= EFER_NX;
+		wrmsr(MSR_EFER, l, h);
+	}
+#endif
 
 	/* Get mfn list */
 	xen_build_dynamic_phys_to_machine();
@@ -1628,13 +1637,6 @@ asmlinkage __visible void __init xen_sta
 
 	machine_ops = xen_machine_ops;
 
-	/*
-	 * The only reliable way to retain the initial address of the
-	 * percpu gdt_page is to remember it here, so we can go and
-	 * mark it RW later, when the initial percpu area is freed.
-	 */
-	xen_initial_gdt = &per_cpu(gdt_page, 0);
-
 	xen_smp_init();
 
 #ifdef CONFIG_ACPI_NUMA
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/xen/mmu.c linux-4.0.9-pax/arch/x86/xen/mmu.c
--- linux-4.0.9/arch/x86/xen/mmu.c	2015-04-13 11:21:04.278617332 +0200
+++ linux-4.0.9-pax/arch/x86/xen/mmu.c	2015-05-17 22:29:57.469418079 +0200
@@ -379,7 +379,7 @@ static pteval_t pte_mfn_to_pfn(pteval_t
 	return val;
 }
 
-static pteval_t pte_pfn_to_mfn(pteval_t val)
+static pteval_t __intentional_overflow(-1) pte_pfn_to_mfn(pteval_t val)
 {
 	if (val & _PAGE_PRESENT) {
 		unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
@@ -1835,7 +1835,11 @@ void __init xen_setup_kernel_pagetable(p
 		 * L3_k[511] -> level2_fixmap_pgt */
 		convert_pfn_mfn(level3_kernel_pgt);
 
+		convert_pfn_mfn(level3_vmalloc_start_pgt);
+		convert_pfn_mfn(level3_vmalloc_end_pgt);
+		convert_pfn_mfn(level3_vmemmap_pgt);
 		/* L3_k[511][506] -> level1_fixmap_pgt */
+		/* L3_k[511][507] -> level1_vsyscall_pgt */
 		convert_pfn_mfn(level2_fixmap_pgt);
 	}
 	/* We get [511][511] and have Xen's version of level2_kernel_pgt */
@@ -1860,11 +1864,18 @@ void __init xen_setup_kernel_pagetable(p
 		set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
 		set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
 		set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+		set_page_prot(level3_vmalloc_start_pgt, PAGE_KERNEL_RO);
+		set_page_prot(level3_vmalloc_end_pgt, PAGE_KERNEL_RO);
+		set_page_prot(level3_vmemmap_pgt, PAGE_KERNEL_RO);
 		set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
 		set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
+		set_page_prot(level2_vmemmap_pgt, PAGE_KERNEL_RO);
 		set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
 		set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
-		set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
+		set_page_prot(level1_fixmap_pgt[0], PAGE_KERNEL_RO);
+		set_page_prot(level1_fixmap_pgt[1], PAGE_KERNEL_RO);
+		set_page_prot(level1_fixmap_pgt[2], PAGE_KERNEL_RO);
+		set_page_prot(level1_vsyscall_pgt, PAGE_KERNEL_RO);
 
 		/* Pin down new L4 */
 		pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
@@ -2048,6 +2059,7 @@ static void __init xen_post_allocator_in
 	pv_mmu_ops.set_pud = xen_set_pud;
 #if PAGETABLE_LEVELS == 4
 	pv_mmu_ops.set_pgd = xen_set_pgd;
+	pv_mmu_ops.set_pgd_batched = xen_set_pgd;
 #endif
 
 	/* This will work as long as patching hasn't happened yet
@@ -2126,6 +2138,7 @@ static const struct pv_mmu_ops xen_mmu_o
 	.pud_val = PV_CALLEE_SAVE(xen_pud_val),
 	.make_pud = PV_CALLEE_SAVE(xen_make_pud),
 	.set_pgd = xen_set_pgd_hyper,
+	.set_pgd_batched = xen_set_pgd_hyper,
 
 	.alloc_pud = xen_alloc_pmd_init,
 	.release_pud = xen_release_pmd_init,
diff -NurpX linux-4.0.9-pax/Documentation/dontdiff linux-4.0.9/arch/x86/xen/smp.c linux-4.0.9-pax/arch/x86/xen/smp.c
--- linux-4.0.9/arch/x86/xen/smp.c	2015-04-13 11:21:04.286617331 +0200
+++ linux-4.0.9-pax/arch/x86/xen/smp.c	2015-04-15 12:13:52.954318620 +0200
@@ -288,17 +288,13 @@ static void __init xen_smp_prepare_boot_
 
 	if (xen_pv_domain()) {
 		if (!xen_feature(XENFEAT_writable_page_tables))
-			/* We've switched to the "real" per-cpu gdt, so make
-			 * sure the old memory can be recycled. */
-			make_lowmem_page_readwrite(xen_initial_gdt);
-
 #ifdef CONFIG_X86_32
 		/*
 		 * Xen starts us with XEN_FLAT_RING1_DS, but linux code
 		 * expects __USER_DS
 		 */
-		loadsegment(ds, __USER_DS);
-		loadsegment(es, __USER_DS);
+		loadsegment(ds, __KERNEL_DS);
+		loadsegment(es, __KERNEL_DS);
 #endif
 
 		xen_filter_cpu_maps();
@@ -379,7 +375,7 @@ cpu_initialize_context(unsigned int cpu,
 #ifdef CONFIG_X86_32
 	/* Note: PVH is not yet supported on x86_32. */
 	ctxt->user_regs.fs = __KERNEL_PERCPU;
-	ctxt->user_regs.gs