diff -urNp linux-2.6.11/Makefile linux-2.6.11/Makefile --- linux-2.6.11/Makefile 2005-03-02 02:38:13.000000000 -0500 +++ linux-2.6.11/Makefile 2005-03-07 08:59:37.000000000 -0500 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 11 -EXTRAVERSION = +EXTRAVERSION = -grsec NAME=Woozy Numbat # *DOCUMENTATION* @@ -561,7 +561,7 @@ export MODLIB ifeq ($(KBUILD_EXTMOD),) -core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ +core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ grsecurity/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ diff -urNp linux-2.6.11/arch/alpha/kernel/osf_sys.c linux-2.6.11/arch/alpha/kernel/osf_sys.c --- linux-2.6.11/arch/alpha/kernel/osf_sys.c 2005-03-02 02:37:30.000000000 -0500 +++ linux-2.6.11/arch/alpha/kernel/osf_sys.c 2005-03-07 08:59:37.000000000 -0500 @@ -179,6 +179,11 @@ osf_mmap(unsigned long addr, unsigned lo struct file *file = NULL; unsigned long ret = -EBADF; +#ifdef CONFIG_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + #if 0 if (flags & (_MAP_HASSEMAPHORE | _MAP_INHERIT | _MAP_UNALIGNED)) printk("%s: unimplemented OSF mmap flags %04lx\n", @@ -1288,6 +1293,10 @@ arch_get_unmapped_area(struct file *filp merely specific addresses, but regions of memory -- perhaps this feature should be incorporated into all ports? */ +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); if (addr != (unsigned long) -ENOMEM) @@ -1295,8 +1304,16 @@ arch_get_unmapped_area(struct file *filp } /* Next, try allocating at TASK_UNMAPPED_BASE. */ - addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE), - len, limit); + + addr = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (current->mm->flags & MF_PAX_RANDMMAP) + addr += current->mm->delta_mmap; +#endif + + addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); + if (addr != (unsigned long) -ENOMEM) return addr; diff -urNp linux-2.6.11/arch/alpha/kernel/ptrace.c linux-2.6.11/arch/alpha/kernel/ptrace.c --- linux-2.6.11/arch/alpha/kernel/ptrace.c 2005-03-02 02:38:25.000000000 -0500 +++ linux-2.6.11/arch/alpha/kernel/ptrace.c 2005-03-07 08:59:37.000000000 -0500 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -289,6 +290,9 @@ do_sys_ptrace(long request, long pid, lo if (!child) goto out_notsk; + if (gr_handle_ptrace(child, request)) + goto out; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out; diff -urNp linux-2.6.11/arch/alpha/mm/fault.c linux-2.6.11/arch/alpha/mm/fault.c --- linux-2.6.11/arch/alpha/mm/fault.c 2005-03-02 02:37:52.000000000 -0500 +++ linux-2.6.11/arch/alpha/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,142 @@ __load_new_mm_context(struct mm_struct * __reload_thread(pcb); } +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + * 4 when legitimate ET_EXEC was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; +#endif + +#ifdef CONFIG_PAX_RANDEXEC + if (current->mm->flags & MF_PAX_RANDEXEC) { + if (regs->pc >= current->mm->start_code && + regs->pc < current->mm->end_code) + { + if (regs->r26 == regs->pc) + return 1; + + regs->pc += current->mm->delta_exec; + return 4; + } + } +#endif + +#ifdef CONFIG_PAX_EMUPLT + do { /* PaX: patched PLT emulation #1 */ + unsigned int ldah, ldq, jmp; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(ldq, (unsigned int *)(regs->pc+4)); + err |= get_user(jmp, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U) == 0x277B0000U && + (ldq & 0xFFFF0000U) == 0xA77B0000U && + jmp == 0x6BFB0000U) + { + unsigned long r27, addr; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = ldq | 0xFFFFFFFFFFFF0000UL; + + addr = regs->r27 + ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + err = get_user(r27, (unsigned long*)addr); + if (err) + break; + + regs->r27 = r27; + regs->pc = r27; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #2 */ + unsigned int ldah, lda, br; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(lda, (unsigned int *)(regs->pc+4)); + err |= get_user(br, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U)== 0x277B0000U && + (lda & 0xFFFF0000U) == 0xA77B0000U && + (br & 0xFFE00000U) == 0xC3E00000U) + { + unsigned long addr = br | 0xFFFFFFFFFFE00000UL; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = lda | 0xFFFFFFFFFFFF0000UL; + + regs->r27 += ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + regs->pc += 12 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation */ + unsigned int br; + + err = get_user(br, (unsigned int *)regs->pc); + + if (!err && (br & 0xFFE00000U) == 0xC3800000U) { + unsigned int br2, ldq, nop, jmp; + unsigned long addr = br | 0xFFFFFFFFFFE00000UL, resolver; + + addr = regs->pc + 4 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + err = get_user(br2, (unsigned int *)addr); + err |= get_user(ldq, (unsigned int *)(addr+4)); + err |= get_user(nop, (unsigned int *)(addr+8)); + err |= get_user(jmp, (unsigned int *)(addr+12)); + err |= get_user(resolver, (unsigned long *)(addr+16)); + + if (err) + break; + + if (br2 == 0xC3600000U && + ldq == 0xA77B000CU && + nop == 0x47FF041FU && + jmp == 0x6B7B0000U) + { + regs->r28 = regs->pc+4; + regs->r27 = addr+16; + regs->pc = resolver; + return 3; + } + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) { + printk("."); + break; + } + printk("%08x ", c); + } + printk("\n"); +} +#endif /* * This routine handles page faults. It determines the address, @@ -125,7 +262,7 @@ do_page_fault(unsigned long address, uns goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; - if (expand_stack(vma, address)) + if (expand_stack(current, vma, address)) goto bad_area; /* Ok, we have a good vm_area for this memory access, so @@ -133,8 +270,34 @@ do_page_fault(unsigned long address, uns good_area: si_code = SEGV_ACCERR; if (cause < 0) { - if (!(vma->vm_flags & VM_EXEC)) + if (!(vma->vm_flags & VM_EXEC)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(mm->flags & MF_PAX_PAGEEXEC) || address != regs->pc) + goto bad_area; + + up_read(&mm->mmap_sem); + switch(pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + +#ifdef CONFIG_PAX_RANDEXEC + case 4: + return; +#endif + + } + pax_report_fault(regs, (void*)regs->pc, (void*)rdusp()); + do_exit(SIGKILL); +#else goto bad_area; +#endif + + } } else if (!cause) { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) diff -urNp linux-2.6.11/arch/arm/mm/fault.c linux-2.6.11/arch/arm/mm/fault.c --- linux-2.6.11/arch/arm/mm/fault.c 2005-03-02 02:38:38.000000000 -0500 +++ linux-2.6.11/arch/arm/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -208,7 +208,7 @@ survive: goto survive; check_stack: - if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) + if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(tsk, vma, addr)) goto good_area; out: return fault; diff -urNp linux-2.6.11/arch/arm/mm/mmap.c linux-2.6.11/arch/arm/mm/mmap.c --- linux-2.6.11/arch/arm/mm/mmap.c 2005-03-02 02:38:10.000000000 -0500 +++ linux-2.6.11/arch/arm/mm/mmap.c 2005-03-07 08:59:37.000000000 -0500 @@ -62,6 +62,10 @@ arch_get_unmapped_area(struct file *filp if (len > TASK_SIZE) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { if (do_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -88,8 +92,8 @@ full_search: * Start a new search - just in case we missed * some holes. */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; goto full_search; } return -ENOMEM; diff -urNp linux-2.6.11/arch/arm26/mm/fault.c linux-2.6.11/arch/arm26/mm/fault.c --- linux-2.6.11/arch/arm26/mm/fault.c 2005-03-02 02:37:52.000000000 -0500 +++ linux-2.6.11/arch/arm26/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -197,7 +197,7 @@ survive: goto survive; check_stack: - if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) + if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(tsk, vma, addr)) goto good_area; out: return fault; diff -urNp linux-2.6.11/arch/cris/mm/fault.c linux-2.6.11/arch/cris/mm/fault.c --- linux-2.6.11/arch/cris/mm/fault.c 2005-03-02 02:38:25.000000000 -0500 +++ linux-2.6.11/arch/cris/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -207,7 +207,7 @@ do_page_fault(unsigned long address, str if (address + PAGE_SIZE < rdusp()) goto bad_area; } - if (expand_stack(vma, address)) + if (expand_stack(tsk, vma, address)) goto bad_area; /* diff -urNp linux-2.6.11/arch/i386/Kconfig linux-2.6.11/arch/i386/Kconfig --- linux-2.6.11/arch/i386/Kconfig 2005-03-02 02:37:49.000000000 -0500 +++ linux-2.6.11/arch/i386/Kconfig 2005-03-07 08:59:37.000000000 -0500 @@ -409,7 +409,7 @@ config X86_POPAD_OK config X86_ALIGNMENT_16 bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK8 || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 default y config X86_GOOD_APIC diff -urNp linux-2.6.11/arch/i386/kernel/apm.c linux-2.6.11/arch/i386/kernel/apm.c --- linux-2.6.11/arch/i386/kernel/apm.c 2005-03-02 02:37:47.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/apm.c 2005-03-07 08:59:37.000000000 -0500 @@ -598,19 +598,39 @@ static u8 apm_bios_call(u32 func, u32 eb int cpu; struct desc_struct save_desc_40; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr3; +#endif + cpus = apm_save_cpus(); - cpu = get_cpu(); - save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; - per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(flags, cr3); +#endif + + save_desc_40 = cpu_gdt_table[cpu][0x40 / 8]; + cpu_gdt_table[cpu][0x40 / 8] = bad_bios_desc; + +#ifndef CONFIG_PAX_KERNEXEC local_save_flags(flags); APM_DO_CLI; +#endif + APM_DO_SAVE_SEGS; apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); APM_DO_RESTORE_SEGS; + +#ifndef CONFIG_PAX_KERNEXEC local_irq_restore(flags); - per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = save_desc_40; +#endif + + cpu_gdt_table[cpu][0x40 / 8] = save_desc_40; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(flags, cr3); +#endif + put_cpu(); apm_restore_cpus(cpus); @@ -640,20 +660,39 @@ static u8 apm_bios_call_simple(u32 func, int cpu; struct desc_struct save_desc_40; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr3; +#endif cpus = apm_save_cpus(); - cpu = get_cpu(); - save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; - per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(flags, cr3); +#endif + + save_desc_40 = cpu_gdt_table[cpu][0x40 / 8]; + cpu_gdt_table[cpu][0x40 / 8] = bad_bios_desc; + +#ifndef CONFIG_PAX_KERNEXEC local_save_flags(flags); APM_DO_CLI; +#endif + APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); APM_DO_RESTORE_SEGS; + +#ifndef CONFIG_PAX_KERNEXEC local_irq_restore(flags); - __get_cpu_var(cpu_gdt_table)[0x40 / 8] = save_desc_40; +#endif + + cpu_gdt_table[cpu][0x40 / 8] = save_desc_40; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(flags, cr3); +#endif + put_cpu(); apm_restore_cpus(cpus); return error; @@ -2294,35 +2333,35 @@ static int __init apm_init(void) apm_bios_entry.segment = APM_CS; for (i = 0; i < NR_CPUS; i++) { - set_base(per_cpu(cpu_gdt_table, i)[APM_CS >> 3], + set_base(cpu_gdt_table[i][APM_CS >> 3], __va((unsigned long)apm_info.bios.cseg << 4)); - set_base(per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], + set_base(cpu_gdt_table[i][APM_CS_16 >> 3], __va((unsigned long)apm_info.bios.cseg_16 << 4)); - set_base(per_cpu(cpu_gdt_table, i)[APM_DS >> 3], + set_base(cpu_gdt_table[i][APM_DS >> 3], __va((unsigned long)apm_info.bios.dseg << 4)); #ifndef APM_RELAX_SEGMENTS if (apm_info.bios.version == 0x100) { #endif /* For ASUS motherboard, Award BIOS rev 110 (and others?) */ - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 - 1); + _set_limit((char *)&cpu_gdt_table[i][APM_CS >> 3], 64 * 1024 - 1); /* For some unknown machine. */ - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], 64 * 1024 - 1); + _set_limit((char *)&cpu_gdt_table[i][APM_CS_16 >> 3], 64 * 1024 - 1); /* For the DEC Hinote Ultra CT475 (and others?) */ - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3], 64 * 1024 - 1); + _set_limit((char *)&cpu_gdt_table[i][APM_DS >> 3], 64 * 1024 - 1); #ifndef APM_RELAX_SEGMENTS } else { - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], + _set_limit((char *)&cpu_gdt_table[i][APM_CS >> 3], (apm_info.bios.cseg_len - 1) & 0xffff); - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], + _set_limit((char *)&cpu_gdt_table[i][APM_CS_16 >> 3], (apm_info.bios.cseg_16_len - 1) & 0xffff); - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3], + _set_limit((char *)&cpu_gdt_table[i][APM_DS >> 3], (apm_info.bios.dseg_len - 1) & 0xffff); /* workaround for broken BIOSes */ if (apm_info.bios.cseg_len <= apm_info.bios.offset) - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 -1); + _set_limit((char *)&cpu_gdt_table[i][APM_CS >> 3], 64 * 1024 -1); if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 64kB */ /* for the BIOS that assumes granularity = 1 */ - per_cpu(cpu_gdt_table, i)[APM_DS >> 3].b |= 0x800000; + cpu_gdt_table[i][APM_DS >> 3].b |= 0x800000; printk(KERN_NOTICE "apm: we set the granularity of dseg.\n"); } } diff -urNp linux-2.6.11/arch/i386/kernel/cpu/common.c linux-2.6.11/arch/i386/kernel/cpu/common.c --- linux-2.6.11/arch/i386/kernel/cpu/common.c 2005-03-02 02:37:47.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/cpu/common.c 2005-03-07 08:59:37.000000000 -0500 @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -18,8 +17,7 @@ #include "cpu.h" -DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]); -EXPORT_PER_CPU_SYMBOL(cpu_gdt_table); +EXPORT_SYMBOL_GPL(cpu_gdt_table); static int cachesize_override __initdata = -1; static int disable_x86_fxsr __initdata = 0; @@ -369,6 +367,10 @@ void __init identify_cpu(struct cpuinfo_ if (this_cpu->c_init) this_cpu->c_init(c); +#if defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_NOVSYSCALL) + clear_bit(X86_FEATURE_SEP, c->x86_capability); +#endif + /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); @@ -555,7 +557,7 @@ void __init early_cpu_init(void) void __init cpu_init (void) { int cpu = smp_processor_id(); - struct tss_struct * t = &per_cpu(init_tss, cpu); + struct tss_struct * t = init_tss + cpu; struct thread_struct *thread = ¤t->thread; if (cpu_test_and_set(cpu, cpu_initialized)) { @@ -577,17 +579,16 @@ void __init cpu_init (void) * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table, - GDT_SIZE); - cpu_gdt_descr[cpu].size = GDT_SIZE - 1; - cpu_gdt_descr[cpu].address = - (unsigned long)&per_cpu(cpu_gdt_table, cpu); + if (cpu) { + memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); + cpu_gdt_descr[cpu].size = GDT_SIZE - 1; + cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; + } /* * Set up the per-thread TLS descriptor cache: */ - memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu), - GDT_ENTRY_TLS_ENTRIES * 8); + memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES * 8); __asm__ __volatile__("lgdt %0" : : "m" (cpu_gdt_descr[cpu])); __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); @@ -609,7 +610,7 @@ void __init cpu_init (void) load_esp0(t, thread); set_tss_desc(cpu,t); load_TR_desc(); - load_LDT(&init_mm.context); + _load_LDT(&init_mm.context); /* Set up doublefault TSS pointer in the GDT */ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); diff -urNp linux-2.6.11/arch/i386/kernel/entry.S linux-2.6.11/arch/i386/kernel/entry.S --- linux-2.6.11/arch/i386/kernel/entry.S 2005-03-02 02:37:51.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/entry.S 2005-03-07 08:59:37.000000000 -0500 @@ -229,6 +229,15 @@ sysenter_past_esp: movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work + +#ifdef CONFIG_PAX_RANDKSTACK + pushl %eax + call pax_randomize_kstack + popl %eax +#endif + + xorl %ebp,%ebp /* prevent info leak */ + /* if something modifies registers it must also disable sysexit */ movl EIP(%esp), %edx movl OLDESP(%esp), %ecx @@ -257,6 +266,11 @@ syscall_exit: movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work + +#ifdef CONFIG_PAX_RANDKSTACK + call pax_randomize_kstack +#endif + restore_all: RESTORE_ALL @@ -571,7 +585,7 @@ ENTRY(spurious_interrupt_bug) pushl $do_spurious_interrupt_bug jmp error_code -.data +.section .rodata,"a",@progbits ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit diff -urNp linux-2.6.11/arch/i386/kernel/head.S linux-2.6.11/arch/i386/kernel/head.S --- linux-2.6.11/arch/i386/kernel/head.S 2005-03-02 02:37:48.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/head.S 2005-03-07 08:59:37.000000000 -0500 @@ -48,6 +48,12 @@ /* + * Real beginning of normal "text" segment + */ +ENTRY(stext) +ENTRY(_stext) + +/* * 32-bit kernel entrypoint; only used by the boot CPU. On entry, * %esi points to the real-mode code as a 32-bit pointer. * CS and DS must be 4 GB flat segments, but we don't depend on @@ -78,6 +84,19 @@ ENTRY(startup_32) shrl $2,%ecx rep ; stosl +#ifdef CONFIG_PAX_KERNEXEC + movl $ __KERNEL_TEXT_OFFSET,%eax + movw %ax,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 2) + rorl $16,%eax + movb %al,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 4) + movb %ah,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 7) + + movb %al,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 4) + movb %ah,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 7) + rorl $16,%eax + movw %ax,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 2) +#endif + /* * Initialize page tables. This creates a PDE and a set of page * tables, which are located immediately beyond _end. The variable @@ -88,24 +107,42 @@ ENTRY(startup_32) * Warning: don't use %esi or the stack in this code. However, %esp * can be used as a GPR if you really need it... */ -page_pde_offset = (__PAGE_OFFSET >> 20); - +#ifdef CONFIG_X86_PAE +page_pde_offset = ((__PAGE_OFFSET >> 21) * (4096 / PTRS_PER_PTE)); +#else +page_pde_offset = ((__PAGE_OFFSET >> 22) * (4096 / PTRS_PER_PTE)); +#endif movl $(pg0 - __PAGE_OFFSET), %edi +#ifdef CONFIG_X86_PAE + movl $(swapper_pm_dir - __PAGE_OFFSET), %edx +#else movl $(swapper_pg_dir - __PAGE_OFFSET), %edx - movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ +#endif + movl $0x063, %eax /* 0x063 = DIRTY+ACCESSED+PRESENT+RW */ 10: - leal 0x007(%edi),%ecx /* Create PDE entry */ + leal 0x063(%edi),%ecx /* Create PDE entry */ movl %ecx,(%edx) /* Store identity PDE entry */ movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ +#ifdef CONFIG_X86_PAE + movl $0,4(%edx) + movl $0,page_pde_offset+4(%edx) + addl $8,%edx + movl $512, %ecx +#else addl $4,%edx movl $1024, %ecx +#endif 11: stosl +#ifdef CONFIG_X86_PAE + movl $0,(%edi) + addl $4,%edi +#endif addl $0x1000,%eax loop 11b /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ - /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ - leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp + /* bytes beyond the end of our own page tables; the +0x063 is the attribute bits */ + leal (INIT_MAP_BEYOND_END+0x063)(%edi),%ebp cmpl %ebp,%eax jb 10b movl %edi,(init_pg_tables_end - __PAGE_OFFSET) @@ -128,6 +165,11 @@ ENTRY(startup_32_smp) movl %eax,%fs movl %eax,%gs + /* This is a secondary processor (AP) */ + xorl %ebx,%ebx + incl %ebx +#endif /* CONFIG_SMP */ + /* * New page tables may be in 4Mbyte page mode and may * be using the global pages. @@ -143,26 +185,27 @@ ENTRY(startup_32_smp) * not yet offset PAGE_OFFSET.. */ #define cr4_bits mmu_cr4_features-__PAGE_OFFSET +3: movl cr4_bits,%edx andl %edx,%edx - jz 6f + jz 5f movl %cr4,%eax # Turn on paging options (PSE,PAE,..) orl %edx,%eax movl %eax,%cr4 - btl $5, %eax # check if PAE is enabled - jnc 6f +#ifdef CONFIG_X86_PAE + movl %ebx,%edi /* Check if extended functions are implemented */ movl $0x80000000, %eax cpuid cmpl $0x80000000, %eax - jbe 6f + jbe 4f mov $0x80000001, %eax cpuid /* Execute Disable bit supported? */ btl $20, %edx - jnc 6f + jnc 4f /* Setup EFER (Extended Feature Enable Register) */ movl $0xc0000080, %ecx @@ -172,13 +215,10 @@ ENTRY(startup_32_smp) /* Make changes effective */ wrmsr -6: - /* This is a secondary processor (AP) */ - xorl %ebx,%ebx - incl %ebx - -3: -#endif /* CONFIG_SMP */ +4: + movl %edi,%ebx +#endif +5: /* * Enable paging @@ -203,9 +243,7 @@ ENTRY(startup_32_smp) #ifdef CONFIG_SMP andl %ebx,%ebx - jz 1f /* Initial CPU cleans BSS */ - jmp checkCPUtype -1: + jnz checkCPUtype /* Initial CPU cleans BSS */ #endif /* CONFIG_SMP */ /* @@ -402,32 +440,74 @@ ignore_int: popl %eax iret -/* - * Real beginning of normal "text" segment - */ -ENTRY(stext) -ENTRY(_stext) - -/* - * BSS section - */ -.section ".bss.page_aligned","w" +.section .data.swapper_pg_dir,"a",@progbits ENTRY(swapper_pg_dir) +#ifdef CONFIG_X86_PAE + .long swapper_pm_dir-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*8-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*16-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*24-__PAGE_OFFSET+1 + .long 0 +#else + .fill 1024,4,0 +#endif + +#ifdef CONFIG_PAX_KERNEXEC +ENTRY(kernexec_pg_dir) +#ifdef CONFIG_X86_PAE + .long kernexec_pm_dir-__PAGE_OFFSET+1 + .long 0 + .long kernexec_pm_dir+512*8-__PAGE_OFFSET+1 + .long 0 + .long kernexec_pm_dir+512*16-__PAGE_OFFSET+1 + .long 0 + .long kernexec_pm_dir+512*24-__PAGE_OFFSET+1 + .long 0 +#else .fill 1024,4,0 +#endif +#endif + +#ifdef CONFIG_X86_PAE +.section .data.swapper_pm_dir,"a",@progbits +ENTRY(swapper_pm_dir) + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 + +#ifdef CONFIG_PAX_KERNEXEC +ENTRY(kernexec_pm_dir) + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 +#endif +#endif + +.section .rodata.empty_zero_page,"a",@progbits ENTRY(empty_zero_page) .fill 4096,1,0 /* - * This starts the data section. - */ -.data + * The IDT has to be page-aligned to simplify the Pentium + * F0 0F bug workaround.. We have a special link segment + * for this. + */ +.section .rodata.idt,"a",@progbits +ENTRY(idt_table) + .fill 256,8,0 + +.section .rodata,"a",@progbits +ready: .byte 0 ENTRY(stack_start) .long init_thread_union+THREAD_SIZE .long __BOOT_DS -ready: .byte 0 - int_msg: .asciz "Unknown interrupt or fault at EIP %p %p %p\n" @@ -469,8 +549,8 @@ cpu_gdt_descr: .align L1_CACHE_BYTES ENTRY(boot_gdt_table) .fill GDT_ENTRY_BOOT_CS,8,0 - .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ + .quad 0x00cf9b000000ffff /* kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* kernel 4GB data at 0x00000000 */ /* * The Global Descriptor Table contains 28 quadwords, per-CPU. @@ -489,28 +569,27 @@ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* 0x4b reserved */ .quad 0x0000000000000000 /* 0x53 reserved */ .quad 0x0000000000000000 /* 0x5b reserved */ - - .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ + .quad 0x00cf9b000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x00cffb000000ffff /* 0x73 user 4GB code at 0x00000000 */ + .quad 0x00cff3000000ffff /* 0x7b user 4GB data at 0x00000000 */ .quad 0x0000000000000000 /* 0x80 TSS descriptor */ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ /* Segments used for calling PnP BIOS */ - .quad 0x00c09a0000000000 /* 0x90 32-bit code */ - .quad 0x00809a0000000000 /* 0x98 16-bit code */ - .quad 0x0080920000000000 /* 0xa0 16-bit data */ - .quad 0x0080920000000000 /* 0xa8 16-bit data */ - .quad 0x0080920000000000 /* 0xb0 16-bit data */ + .quad 0x00c09b0000000000 /* 0x90 32-bit code */ + .quad 0x00809b0000000000 /* 0x98 16-bit code */ + .quad 0x0080930000000000 /* 0xa0 16-bit data */ + .quad 0x0080930000000000 /* 0xa8 16-bit data */ + .quad 0x0080930000000000 /* 0xb0 16-bit data */ /* * The APM segments have byte granularity and their bases * and limits are set at run time. */ - .quad 0x00409a0000000000 /* 0xb8 APM CS code */ - .quad 0x00009a0000000000 /* 0xc0 APM CS 16 code (16 bit) */ - .quad 0x0040920000000000 /* 0xc8 APM DS data */ + .quad 0x00409b0000000000 /* 0xb8 APM CS code */ + .quad 0x00009b0000000000 /* 0xc0 APM CS 16 code (16 bit) */ + .quad 0x0040930000000000 /* 0xc8 APM DS data */ .quad 0x0000000000000000 /* 0xd0 - unused */ .quad 0x0000000000000000 /* 0xd8 - unused */ @@ -519,3 +598,6 @@ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* 0xf0 - unused */ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ +#ifdef CONFIG_SMP + .fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */ +#endif diff -urNp linux-2.6.11/arch/i386/kernel/init_task.c linux-2.6.11/arch/i386/kernel/init_task.c --- linux-2.6.11/arch/i386/kernel/init_task.c 2005-03-02 02:38:32.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/init_task.c 2005-03-07 08:59:37.000000000 -0500 @@ -42,5 +42,4 @@ EXPORT_SYMBOL(init_task); * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS; - +struct tss_struct init_tss[NR_CPUS] ____cacheline_maxaligned_in_smp = { [0 ... NR_CPUS-1] = INIT_TSS }; diff -urNp linux-2.6.11/arch/i386/kernel/ioport.c linux-2.6.11/arch/i386/kernel/ioport.c --- linux-2.6.11/arch/i386/kernel/ioport.c 2005-03-02 02:38:08.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/ioport.c 2005-03-07 08:59:37.000000000 -0500 @@ -15,6 +15,7 @@ #include #include #include +#include /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) @@ -63,9 +64,16 @@ asmlinkage long sys_ioperm(unsigned long if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; +#ifdef CONFIG_GRKERNSEC_IO + if (turn_on) { + gr_handle_ioperm(); +#else if (turn_on && !capable(CAP_SYS_RAWIO)) +#endif return -EPERM; - +#ifdef CONFIG_GRKERNSEC_IO + } +#endif /* * If it's the first ioperm() call in this thread's lifetime, set the * IO bitmap up. ioperm() is much less timing critical than clone(), @@ -87,7 +95,7 @@ asmlinkage long sys_ioperm(unsigned long * because the ->io_bitmap_max value must match the bitmap * contents: */ - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); @@ -137,8 +145,13 @@ asmlinkage long sys_iopl(unsigned long u return -EINVAL; /* Trying to gain more privileges? */ if (level > old) { +#ifdef CONFIG_GRKERNSEC_IO + gr_handle_iopl(); + return -EPERM; +#else if (!capable(CAP_SYS_RAWIO)) return -EPERM; +#endif } regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12); /* Make sure we return the long way (not sysenter) */ diff -urNp linux-2.6.11/arch/i386/kernel/irq.c linux-2.6.11/arch/i386/kernel/irq.c --- linux-2.6.11/arch/i386/kernel/irq.c 2005-03-02 02:37:48.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/irq.c 2005-03-07 08:59:37.000000000 -0500 @@ -113,10 +113,10 @@ fastcall unsigned int do_IRQ(struct pt_r * gcc's 3.0 and earlier don't handle that correctly. */ static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__aligned__(THREAD_SIZE), __section__(".bss.page_aligned"))); static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__aligned__(THREAD_SIZE), __section__(".bss.page_aligned"))); /* * allocate per-cpu stacks for hardirq and for softirq processing diff -urNp linux-2.6.11/arch/i386/kernel/ldt.c linux-2.6.11/arch/i386/kernel/ldt.c --- linux-2.6.11/arch/i386/kernel/ldt.c 2005-03-02 02:38:13.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/ldt.c 2005-03-07 08:59:37.000000000 -0500 @@ -102,6 +102,19 @@ int init_new_context(struct task_struct retval = copy_ldt(&mm->context, &old_mm->context); up(&old_mm->context.sem); } + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (!mm->context.user_cs_limit) { + mm->context.user_cs_base = 0UL; + mm->context.user_cs_limit = ~0UL; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) + cpus_clear(mm->context.cpu_user_cs_mask); +#endif + + } +#endif + return retval; } @@ -159,7 +172,7 @@ static int read_default_ldt(void __user { int err; unsigned long size; - void *address; + const void *address; err = 0; address = &default_ldt[0]; @@ -216,6 +229,13 @@ static int write_ldt(void __user * ptr, } } +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->flags & MF_PAX_SEGMEXEC) && (ldt_info.contents & 2)) { + error = -EINVAL; + goto out_unlock; + } +#endif + entry_1 = LDT_entry_a(&ldt_info); entry_2 = LDT_entry_b(&ldt_info); if (oldmode) diff -urNp linux-2.6.11/arch/i386/kernel/process.c linux-2.6.11/arch/i386/kernel/process.c --- linux-2.6.11/arch/i386/kernel/process.c 2005-03-02 02:37:30.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/process.c 2005-03-07 08:59:37.000000000 -0500 @@ -325,7 +325,7 @@ void exit_thread(void) /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; @@ -345,6 +345,9 @@ void flush_thread(void) { struct task_struct *tsk = current; + __asm__("movl %0,%%fs\n" + "movl %0,%%gs\n" + : : "r" (0) : "memory"); memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* @@ -387,7 +390,7 @@ int copy_thread(int nr, unsigned long cl struct task_struct *tsk; int err; - childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; + childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info - sizeof(unsigned long))) - 1; *childregs = *regs; childregs->eax = 0; childregs->esp = esp; @@ -492,9 +495,8 @@ void dump_thread(struct pt_regs * regs, int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) { struct pt_regs ptregs; - - ptregs = *(struct pt_regs *) - ((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs)); + + ptregs = *(struct pt_regs *)(tsk->thread.esp0 - sizeof(ptregs)); ptregs.xcs &= 0xffff; ptregs.xds &= 0xffff; ptregs.xes &= 0xffff; @@ -576,12 +578,20 @@ struct task_struct fastcall * __switch_t struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long flags, cr3; +#endif /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ __unlazy_fpu(prev_p); +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(flags, cr3); +#endif + /* * Reload esp0, LDT and the page table pointer: */ @@ -592,6 +602,10 @@ struct task_struct fastcall * __switch_t */ load_TLS(next, cpu); +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(flags, cr3); +#endif + /* * Save away %fs and %gs. No need to save %es and %ds, as * those are always kernel segments while inside the kernel. @@ -740,6 +754,10 @@ asmlinkage int sys_set_thread_area(struc struct desc_struct *desc; int cpu, idx; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long flags, cr3; +#endif + if (copy_from_user(&info, u_info, sizeof(info))) return -EFAULT; idx = info.entry_number; @@ -773,8 +791,17 @@ asmlinkage int sys_set_thread_area(struc desc->a = LDT_entry_a(&info); desc->b = LDT_entry_b(&info); } + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(flags, cr3); +#endif + load_TLS(t, cpu); +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(flags, cr3); +#endif + put_cpu(); return 0; @@ -828,3 +855,29 @@ asmlinkage int sys_get_thread_area(struc return 0; } +#ifdef CONFIG_PAX_RANDKSTACK +asmlinkage void pax_randomize_kstack(void) +{ + struct tss_struct *tss = init_tss + smp_processor_id(); + unsigned long time; + +#ifdef CONFIG_PAX_SOFTMODE + if (!pax_aslr) + return; +#endif + + rdtscl(time); + + /* P4 seems to return a 0 LSB, ignore it */ +#ifdef CONFIG_MPENTIUM4 + time &= 0x3EUL; + time <<= 1; +#else + time &= 0x1FUL; + time <<= 2; +#endif + + tss->esp0 ^= time; + current->thread.esp0 = tss->esp0; +} +#endif diff -urNp linux-2.6.11/arch/i386/kernel/ptrace.c linux-2.6.11/arch/i386/kernel/ptrace.c --- linux-2.6.11/arch/i386/kernel/ptrace.c 2005-03-02 02:37:51.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/ptrace.c 2005-03-07 08:59:37.000000000 -0500 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -384,6 +385,9 @@ asmlinkage int sys_ptrace(long request, if (pid == 1) /* you may not mess with init */ goto out_tsk; + if (gr_handle_ptrace(child, request)) + goto out_tsk; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; @@ -462,6 +466,17 @@ asmlinkage int sys_ptrace(long request, if(addr == (long) &dummy->u_debugreg[5]) break; if(addr < (long) &dummy->u_debugreg[4] && ((unsigned long) data) >= TASK_SIZE-3) break; + +#ifdef CONFIG_GRKERNSEC + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[3]){ + long reg = (addr - (long) &dummy->u_debugreg[0]) >> 2; + long type = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 4*reg)) & 3; + long align = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 2 + 4*reg)) & 3; + if((type & 1) && (data & align)) + break; + } +#endif /* Sanity-check data. Take one half-byte at once with * check = (val >> (16 + 4*i)) & 0xf. It contains the diff -urNp linux-2.6.11/arch/i386/kernel/reboot.c linux-2.6.11/arch/i386/kernel/reboot.c --- linux-2.6.11/arch/i386/kernel/reboot.c 2005-03-02 02:37:52.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/reboot.c 2005-03-07 08:59:37.000000000 -0500 @@ -152,18 +152,18 @@ core_initcall(reboot_init); doesn't work with at least one type of 486 motherboard. It is easy to stop this code working; hence the copious comments. */ -static unsigned long long +static const unsigned long long real_mode_gdt_entries [3] = { 0x0000000000000000ULL, /* Null descriptor */ - 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ - 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ + 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ + 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ }; static struct { unsigned short size __attribute__ ((packed)); - unsigned long long * base __attribute__ ((packed)); + const unsigned long long * base __attribute__ ((packed)); } real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries }, real_mode_idt = { 0x3ff, NULL }, diff -urNp linux-2.6.11/arch/i386/kernel/setup.c linux-2.6.11/arch/i386/kernel/setup.c --- linux-2.6.11/arch/i386/kernel/setup.c 2005-03-02 02:38:08.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/setup.c 2005-03-07 08:59:37.000000000 -0500 @@ -73,7 +73,11 @@ struct cpuinfo_x86 new_cpu_data __initda /* common cpu data for all cpus */ struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; +#ifdef CONFIG_X86_PAE +unsigned long mmu_cr4_features = X86_CR4_PAE; +#else unsigned long mmu_cr4_features; +#endif EXPORT_SYMBOL_GPL(mmu_cr4_features); #ifdef CONFIG_ACPI_INTERPRETER @@ -1411,7 +1415,7 @@ void __init setup_arch(char **cmdline_p) code_resource.start = virt_to_phys(_text); code_resource.end = virt_to_phys(_etext)-1; - data_resource.start = virt_to_phys(_etext); + data_resource.start = virt_to_phys(_data); data_resource.end = virt_to_phys(_edata)-1; parse_cmdline_early(cmdline_p); @@ -1481,6 +1485,15 @@ void __init setup_arch(char **cmdline_p) #endif } +#ifdef CONFIG_PAX_SOFTMODE +static int __init setup_pax_softmode(char *str) +{ + get_option (&str, &pax_softmode); + return 1; +} +__setup("pax_softmode=", setup_pax_softmode); +#endif + #include "setup_arch_post.h" /* * Local Variables: diff -urNp linux-2.6.11/arch/i386/kernel/signal.c linux-2.6.11/arch/i386/kernel/signal.c --- linux-2.6.11/arch/i386/kernel/signal.c 2005-03-02 02:38:08.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/signal.c 2005-03-07 08:59:37.000000000 -0500 @@ -380,7 +380,17 @@ static void setup_frame(int sig, struct goto give_sigsegv; } +#ifdef CONFIG_PAX_NOVSYSCALL + restorer = frame->retcode; +#else restorer = &__kernel_sigreturn; + +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->flags & MF_PAX_SEGMEXEC) + restorer -= SEGMEXEC_TASK_SIZE; +#endif +#endif + if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; @@ -475,7 +485,18 @@ static void setup_rt_frame(int sig, stru goto give_sigsegv; /* Set up to return from userspace. */ + +#ifdef CONFIG_PAX_NOVSYSCALL + restorer = frame->retcode; +#else restorer = &__kernel_rt_sigreturn; + +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->flags & MF_PAX_SEGMEXEC) + restorer -= SEGMEXEC_TASK_SIZE; +#endif +#endif + if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; err |= __put_user(restorer, &frame->pretcode); diff -urNp linux-2.6.11/arch/i386/kernel/sys_i386.c linux-2.6.11/arch/i386/kernel/sys_i386.c --- linux-2.6.11/arch/i386/kernel/sys_i386.c 2005-03-02 02:38:32.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/sys_i386.c 2005-03-07 08:59:37.000000000 -0500 @@ -49,6 +49,11 @@ static inline long do_mmap2( int error = -EBADF; struct file * file = NULL; +#if defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_RANDEXEC) + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { file = fget(fd); @@ -106,6 +111,182 @@ out: return err; } +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr, start_mmap, task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + if (len > task_size) + return -ENOMEM; + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->flags & MF_PAX_RANDMMAP) || !filp) +#endif + + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + start_addr = addr = mm->free_area_cache; + start_mmap = PAGE_ALIGN(task_size/3); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->flags & MF_PAX_RANDMMAP) + start_mmap += mm->delta_mmap; +#endif + + if (!(flags & MAP_EXECUTABLE) && start_addr < start_mmap) + start_addr = addr = start_mmap; + else if ((flags & MAP_EXECUTABLE) && start_addr >= start_mmap) + start_addr = addr = mm->mmap_base; + +full_search: + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (task_size - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; + goto full_search; + } + return -ENOMEM; + } + if (!vma || (addr + len <= vma->vm_start && (addr + len <= mm->start_brk || start_mmap <= addr))) { + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; + } + if (addr < start_mmap && addr + len > mm->start_brk) { + addr = start_mmap; + goto full_search; + } else + addr = vma->vm_end; + } +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long base = mm->mmap_base, addr = addr0, task_size = TASK_SIZE; + int first_time = 1; + + if (flags & MAP_EXECUTABLE) { + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + mm->free_area_cache = base; + mm->mmap_base = base; + return addr; + } + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + /* requested length too big for entire address space */ + if (len > task_size) + return -ENOMEM; + + /* dont allow allocations above current base */ + if (mm->free_area_cache > base) + mm->free_area_cache = base; + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->flags & MF_PAX_RANDMMAP) || !filp) +#endif + + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + +try_again: + /* make sure it can fit in the remaining address space */ + if (mm->free_area_cache < len) + goto fail; + + /* either no address requested or cant fit in requested address hole */ + addr = (mm->free_area_cache - len) & PAGE_MASK; + do { + /* + * Lookup failure means no vma is above this address, + * i.e. return with success: + */ + if (!(vma = find_vma(mm, addr))) + return addr; + + /* + * new region fits between prev_vma->vm_end and + * vma->vm_start, use it: + */ + if (addr+len <= vma->vm_start) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + else + /* pull free_area_cache down to the first hole */ + if (mm->free_area_cache == vma->vm_end) + mm->free_area_cache = vma->vm_start; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start-len; + } while (len <= vma->vm_start); + +fail: + /* + * if hint left us with no space for the requested + * mapping then try again: + */ + if (first_time) { + mm->free_area_cache = base; + first_time = 0; + goto try_again; + } + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->free_area_cache = base; + + return addr; +} struct sel_arg_struct { unsigned long n; diff -urNp linux-2.6.11/arch/i386/kernel/sysenter.c linux-2.6.11/arch/i386/kernel/sysenter.c --- linux-2.6.11/arch/i386/kernel/sysenter.c 2005-03-02 02:38:33.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/sysenter.c 2005-03-07 08:59:37.000000000 -0500 @@ -24,7 +24,7 @@ extern asmlinkage void sysenter_entry(vo void enable_sep_cpu(void *info) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; tss->ss1 = __KERNEL_CS; tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; @@ -41,6 +41,7 @@ void enable_sep_cpu(void *info) extern const char vsyscall_int80_start, vsyscall_int80_end; extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; +#ifndef CONFIG_PAX_NOVSYSCALL static int __init sysenter_setup(void) { void *page = (void *)get_zeroed_page(GFP_ATOMIC); @@ -63,3 +64,4 @@ static int __init sysenter_setup(void) } __initcall(sysenter_setup); +#endif diff -urNp linux-2.6.11/arch/i386/kernel/traps.c linux-2.6.11/arch/i386/kernel/traps.c --- linux-2.6.11/arch/i386/kernel/traps.c 2005-03-02 02:37:49.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/traps.c 2005-03-07 08:59:37.000000000 -0500 @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef CONFIG_EISA #include @@ -58,18 +59,13 @@ asmlinkage int system_call(void); -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, +const struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } }; /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq = 0; -/* - * The IDT has to be page-aligned to simplify the Pentium - * F0 0F bug workaround.. We have a special link segment - * for this. - */ -struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; +extern struct desc_struct idt_table[256]; asmlinkage void divide_error(void); asmlinkage void debug(void); @@ -115,6 +111,7 @@ static inline unsigned long print_contex unsigned long *stack, unsigned long ebp) { unsigned long addr; + int i = kstack_depth_to_print; #ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { @@ -123,6 +120,7 @@ static inline unsigned long print_contex print_symbol("%s", addr); printk("\n"); ebp = *(unsigned long *)ebp; + --i; } #else while (valid_stack_ptr(tinfo, stack)) { @@ -131,6 +129,7 @@ static inline unsigned long print_contex printk(" [<%08lx>]", addr); print_symbol(" %s", addr); printk("\n"); + --i; } } #endif @@ -240,7 +239,7 @@ void show_registers(struct pt_regs *regs printk("Code: "); - eip = (u8 *)regs->eip - 43; + eip = (u8 *)regs->eip - 43 + __KERNEL_TEXT_OFFSET; for (i = 0; i < 64; i++, eip++) { unsigned char c; @@ -268,7 +267,7 @@ static void handle_BUG(struct pt_regs *r if (regs->xcs & 3) goto no_bug; /* Not in kernel */ - eip = regs->eip; + eip = regs->eip + __KERNEL_TEXT_OFFSET; if (eip < PAGE_OFFSET) goto no_bug; @@ -456,7 +455,7 @@ DO_ERROR_INFO(17, SIGBUS, "alignment che fastcall void do_general_protection(struct pt_regs * regs, long error_code) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = &init_tss[cpu]; struct thread_struct *thread = ¤t->thread; /* @@ -491,6 +490,24 @@ fastcall void do_general_protection(stru if (!(regs->xcs & 3)) goto gp_in_kernel; +#ifdef CONFIG_PAX_PAGEEXEC + if (current->mm) { + struct mm_struct *mm = current->mm; + unsigned long limit; + + if (mm->flags & MF_PAX_PAGEEXEC) { + down_write(&mm->mmap_sem); + limit = mm->context.user_cs_limit; + if (limit < TASK_SIZE) { + track_exec_limit(mm, limit, TASK_SIZE, PROT_EXEC); + up_write(&mm->mmap_sem); + return; + } + up_write(&mm->mmap_sem); + } + } +#endif + current->thread.error_code = error_code; current->thread.trap_no = 13; force_sig(SIGSEGV, current); @@ -506,6 +523,13 @@ gp_in_kernel: if (notify_die(DIE_GPF, "general protection fault", regs, error_code, 13, SIGSEGV) == NOTIFY_STOP) return; + +#ifdef CONFIG_PAX_KERNEXEC + if ((regs->xcs & 0xFFFF) == __KERNEL_CS) + die("PAX: suspicious general protection fault", regs, error_code); + else +#endif + die("general protection fault", regs, error_code); } } @@ -932,6 +956,8 @@ asmlinkage void math_emulate(long arg) #ifdef CONFIG_X86_F00F_BUG void __init trap_init_f00f_bug(void) { + +#ifndef CONFIG_PAX_KERNEXEC __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); /* @@ -940,6 +966,8 @@ void __init trap_init_f00f_bug(void) */ idt_descr.address = fix_to_virt(FIX_F00F_IDT); __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); +#endif + } #endif diff -urNp linux-2.6.11/arch/i386/kernel/vm86.c linux-2.6.11/arch/i386/kernel/vm86.c --- linux-2.6.11/arch/i386/kernel/vm86.c 2005-03-02 02:37:48.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/vm86.c 2005-03-07 08:59:37.000000000 -0500 @@ -121,7 +121,7 @@ struct pt_regs * fastcall save_v86_state do_exit(SIGSEGV); } - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); current->thread.esp0 = current->thread.saved_esp0; current->thread.sysenter_cs = __KERNEL_CS; load_esp0(tss, ¤t->thread); @@ -312,7 +312,7 @@ static void do_sys_vm86(struct kernel_vm asm volatile("movl %%fs,%0":"=m" (tsk->thread.saved_fs)); asm volatile("movl %%gs,%0":"=m" (tsk->thread.saved_gs)); - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; diff -urNp linux-2.6.11/arch/i386/kernel/vmlinux.lds.S linux-2.6.11/arch/i386/kernel/vmlinux.lds.S --- linux-2.6.11/arch/i386/kernel/vmlinux.lds.S 2005-03-02 02:38:37.000000000 -0500 +++ linux-2.6.11/arch/i386/kernel/vmlinux.lds.S 2005-03-07 08:59:37.000000000 -0500 @@ -2,9 +2,12 @@ * Written by Martin Mares ; */ +#include + #include #include #include +#include OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -13,56 +16,15 @@ jiffies = jiffies_64; SECTIONS { . = __PAGE_OFFSET + 0x100000; - /* read-only */ - _text = .; /* Text and read-only data */ - .text : { - *(.text) - SCHED_TEXT - LOCK_TEXT - *(.fixup) - *(.gnu.warning) - } = 0x9090 - - _etext = .; /* End of text section */ - - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } - __stop___ex_table = .; - - RODATA - - /* writeable */ - .data : { /* Data */ - *(.data) - CONSTRUCTORS + .text.startup : { + BYTE(0xEA) /* jmp far */ + LONG(startup_32 + __KERNEL_TEXT_OFFSET - __PAGE_OFFSET) + SHORT(__BOOT_CS) } - . = ALIGN(4096); - __nosave_begin = .; - .data_nosave : { *(.data.nosave) } - . = ALIGN(4096); - __nosave_end = .; - - . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - _edata = .; /* End of data section */ - - . = ALIGN(THREAD_SIZE); /* init_task */ - .data.init_task : { *(.data.init_task) } - /* will be freed after init */ . = ALIGN(4096); /* Init code and data */ __init_begin = .; - .init.text : { - _sinittext = .; - *(.init.text) - _einittext = .; - } .init.data : { *(.init.data) } . = ALIGN(16); __setup_start = .; @@ -88,9 +50,13 @@ SECTIONS .altinstructions : { *(.altinstructions) } __alt_instructions_end = .; .altinstr_replacement : { *(.altinstr_replacement) } + +#ifndef CONFIG_PAX_KERNEXEC /* .exit.text is discard at runtime, not link time, to deal with references from .altinstructions and .eh_frame */ .exit.text : { *(.exit.text) } +#endif + .exit.data : { *(.exit.data) } . = ALIGN(4096); __initramfs_start = .; @@ -100,15 +66,107 @@ SECTIONS __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_end = .; + + /* read-only */ + +#ifdef CONFIG_PAX_KERNEXEC + __init_text_start = .; + .init.text (. - __KERNEL_TEXT_OFFSET) : AT (__init_text_start) { + _sinittext = .; + *(.init.text) + _einittext = .; + *(.exit.text) + . = ALIGN(4*1024*1024) - 1; + BYTE(0) + } + . = ALIGN(4096); + __init_end = . + __KERNEL_TEXT_OFFSET; + /* freed after init ends here */ + +/* + * PaX: this must be kept in synch with the KERNEL_CS base + * in the GDTs in arch/i386/kernel/head.S + */ + _text = .; /* Text and read-only data */ + .text : AT (. + __KERNEL_TEXT_OFFSET) { +#else + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } . = ALIGN(4096); __init_end = .; /* freed after init ends here */ - + + _text = .; /* Text and read-only data */ + .text : { +#endif + + *(.text) + SCHED_TEXT + LOCK_TEXT + *(.fixup) + *(.gnu.warning) + } = 0x9090 + + _etext = .; /* End of text section */ + . += __KERNEL_TEXT_OFFSET; + . = ALIGN(4096); /* Exception table */ + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + + . = ALIGN(4096); + .rodata.page_aligned : { + *(.rodata.empty_zero_page) + *(.rodata.idt) + } + + RODATA + +#ifdef CONFIG_PAX_KERNEXEC + . = ALIGN(4*1024*1024); +#else + . = ALIGN(32); +#endif + + /* writeable */ + _data = .; + .data : { /* Data */ + *(.data) + CONSTRUCTORS + } + + . = ALIGN(4096); + __nosave_begin = .; + .data_nosave : { *(.data.nosave) } + . = ALIGN(4096); + __nosave_end = .; + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + . = ALIGN(THREAD_SIZE); /* init_task */ + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); + .data.page_aligned : { + +#ifdef CONFIG_X86_PAE + *(.data.swapper_pm_dir) +#endif + + *(.data.swapper_pg_dir) + } + + _edata = .; /* End of data section */ + __bss_start = .; /* BSS */ .bss : { *(.bss.page_aligned) *(.bss) - } + } . = ALIGN(4); __bss_stop = .; diff -urNp linux-2.6.11/arch/i386/mm/fault.c linux-2.6.11/arch/i386/mm/fault.c --- linux-2.6.11/arch/i386/mm/fault.c 2005-03-02 02:37:30.000000000 -0500 +++ linux-2.6.11/arch/i386/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -21,6 +21,9 @@ #include /* For unblank_screen() */ #include #include +#include +#include +#include #include #include @@ -81,7 +84,7 @@ static inline unsigned long get_segment_ /* Unlikely, but must come before segment checks. */ if (unlikely((regs->eflags & VM_MASK) != 0)) - return eip + (seg << 4); + return (eip & 0xFFFF) + (seg << 4); /* By far the most common cases. */ if (likely(seg == __USER_CS || seg == __KERNEL_CS)) @@ -107,7 +110,7 @@ static inline unsigned long get_segment_ desc = (void *)desc + (seg & ~7); } else { /* Must disable preemption while reading the GDT. */ - desc = (u32 *)&per_cpu(cpu_gdt_table, get_cpu()); + desc = (u32 *)&cpu_gdt_table[get_cpu()]; desc = (void *)desc + (seg & ~7); } @@ -201,6 +204,31 @@ static inline int is_prefetch(struct pt_ fastcall void do_invalid_op(struct pt_regs *, unsigned long); +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_EMUTRAMP) || defined(CONFIG_PAX_RANDEXEC) +static int pax_handle_fetch_fault(struct pt_regs *regs); +#endif + +#ifdef CONFIG_PAX_PAGEEXEC +/* PaX: called with the page_table_lock spinlock held */ +static inline pte_t * pax_get_pte(struct mm_struct *mm, unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, address); + if (!pgd || !pgd_present(*pgd)) + return 0; + pud = pud_offset(pgd, address); + if (!pud || !pud_present(*pud)) + return 0; + pmd = pmd_offset(pud, address); + if (!pmd || !pmd_present(*pmd)) + return 0; + return pte_offset_map(pmd, address); +} +#endif + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -217,10 +245,14 @@ fastcall void do_page_fault(struct pt_re struct mm_struct *mm; struct vm_area_struct * vma; unsigned long address; - unsigned long page; int write; siginfo_t info; +#ifdef CONFIG_PAX_PAGEEXEC + pte_t *pte; + unsigned char pte_mask1, pte_mask2; +#endif + /* get the address */ __asm__("movl %%cr2,%0":"=r" (address)); @@ -232,6 +264,7 @@ fastcall void do_page_fault(struct pt_re local_irq_enable(); tsk = current; + mm = tsk->mm; info.si_code = SEGV_MAPERR; @@ -258,14 +291,99 @@ fastcall void do_page_fault(struct pt_re goto bad_area_nosemaphore; } - mm = tsk->mm; - /* * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault.. */ if (in_atomic() || !mm) - goto bad_area_nosemaphore; + goto bad_area_nopax; + +#ifdef CONFIG_PAX_PAGEEXEC + if (unlikely((error_code & 5) != 5 || + (regs->eflags & X86_EFLAGS_VM) || + !(mm->flags & MF_PAX_PAGEEXEC))) + goto not_pax_fault; + + /* PaX: it's our fault, let's handle it if we can */ + + /* PaX: take a look at read faults before acquiring any locks */ + if (unlikely(!(error_code & 2) && (regs->eip == address))) { + /* instruction fetch attempt from a protected page in user mode */ + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_RANDEXEC + case 3: + return; +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + case 2: + return; +#endif + + } + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } + + spin_lock(&mm->page_table_lock); + pte = pax_get_pte(mm, address); + if (unlikely(!pte || !(pte_val(*pte) & _PAGE_PRESENT) || pte_exec(*pte))) { + pte_unmap(pte); + spin_unlock(&mm->page_table_lock); + goto not_pax_fault; + } + + if (unlikely((error_code & 2) && !pte_write(*pte))) { + /* write attempt to a protected page in user mode */ + pte_unmap(pte); + spin_unlock(&mm->page_table_lock); + goto not_pax_fault; + } + + pte_mask1 = _PAGE_ACCESSED | _PAGE_USER | ((error_code & 2) << (_PAGE_BIT_DIRTY-1)); + +#ifdef CONFIG_SMP + if (likely(cpu_isset(smp_processor_id(), mm->context.cpu_user_cs_mask)) && address >= get_limit(regs->xcs)) + pte_mask2 = 0; + else + pte_mask2 = _PAGE_USER; +#else + pte_mask2 = (address >= get_limit(regs->xcs)) ? 0 : _PAGE_USER; +#endif + + /* + * PaX: fill DTLB with user rights and retry + */ + __asm__ __volatile__ ( + "orb %2,%1\n" +#if defined(CONFIG_M586) || defined(CONFIG_M586TSC) +/* + * PaX: let this uncommented 'invlpg' remind us on the behaviour of Intel's + * (and AMD's) TLBs. namely, they do not cache PTEs that would raise *any* + * page fault when examined during a TLB load attempt. this is true not only + * for PTEs holding a non-present entry but also present entries that will + * raise a page fault (such as those set up by PaX, or the copy-on-write + * mechanism). in effect it means that we do *not* need to flush the TLBs + * for our target pages since their PTEs are simply not in the TLBs at all. + + * the best thing in omitting it is that we gain around 15-20% speed in the + * fast path of the page fault handler and can get rid of tracing since we + * can no longer flush unintended entries. + */ + "invlpg %0\n" +#endif + "testb $0,%0\n" + "xorb %3,%1\n" + : + : "m" (*(char*)address), "m" (*(char*)pte), "q" (pte_mask1), "q" (pte_mask2) + : "memory", "cc"); + pte_unmap(pte); + spin_unlock(&mm->page_table_lock); + return; + +not_pax_fault: +#endif /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -285,7 +403,7 @@ fastcall void do_page_fault(struct pt_re if (!down_read_trylock(&mm->mmap_sem)) { if ((error_code & 4) == 0 && !search_exception_tables(regs->eip)) - goto bad_area_nosemaphore; + goto bad_area_nopax; down_read(&mm->mmap_sem); } @@ -306,7 +424,7 @@ fastcall void do_page_fault(struct pt_re if (address + 32 < regs->esp) goto bad_area; } - if (expand_stack(vma, address)) + if (expand_stack(tsk, vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so @@ -374,6 +492,45 @@ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (mm && (error_code & 4) && !(regs->eflags & X86_EFLAGS_VM)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->flags & MF_PAX_PAGEEXEC) && !(error_code & 3) && (regs->eip == address)) { + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->flags & MF_PAX_SEGMEXEC) && !(error_code & 3) && (regs->eip + SEGMEXEC_TASK_SIZE == address)) { + +#if defined(CONFIG_PAX_EMUTRAMP) || defined(CONFIG_PAX_RANDEXEC) + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_RANDEXEC + case 3: + return; +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + case 2: + return; +#endif + + } +#endif + + pax_report_fault(regs, (void*)regs->eip, (void*)regs->esp); + do_exit(SIGKILL); + } +#endif + + } +#endif + +bad_area_nopax: /* User mode accesses just cause a SIGSEGV */ if (error_code & 4) { /* @@ -441,28 +598,52 @@ no_context: #endif if (address < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); + +#ifdef CONFIG_PAX_KERNEXEC + else if (init_mm.start_code + __KERNEL_TEXT_OFFSET <= address && + address < init_mm.end_code + __KERNEL_TEXT_OFFSET) + if (tsk->curr_ip) + printk(KERN_ERR "PAX: From %u.%u.%u.%u: %s:%d, uid/euid: %u/%u, attempted to modify kernel code", + NIPQUAD(tsk->curr_ip), tsk->comm, tsk->pid, tsk->uid, tsk->euid); + else + printk(KERN_ERR "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code", + tsk->comm, tsk->pid, tsk->uid, tsk->euid); +#endif + else printk(KERN_ALERT "Unable to handle kernel paging request"); printk(" at virtual address %08lx\n",address); printk(KERN_ALERT " printing eip:\n"); printk("%08lx\n", regs->eip); - asm("movl %%cr3,%0":"=r" (page)); - page = ((unsigned long *) __va(page))[address >> 22]; - printk(KERN_ALERT "*pde = %08lx\n", page); - /* - * We must not directly access the pte in the highpte - * case, the page table might be allocated in highmem. - * And lets rather not kmap-atomic the pte, just in case - * it's allocated already. - */ + { + unsigned long index = pgd_index(address); + unsigned long pgd_paddr; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + asm("movl %%cr3,%0":"=r" (pgd_paddr)); + pgd = index + (pgd_t *)__va(pgd_paddr); + printk(KERN_ALERT "*pgd = %*llx\n", sizeof(*pgd), (unsigned long long)pgd_val(*pgd)); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); + printk(KERN_ALERT "*pmd = %*llx\n", sizeof(*pmd), (unsigned long long)pmd_val(*pmd)); + /* + * We must not directly access the pte in the highpte + * case, the page table might be allocated in highmem. + * And lets rather not kmap-atomic the pte, just in case + * it's allocated already. + */ #ifndef CONFIG_HIGHPTE - if (page & 1) { - page &= PAGE_MASK; - address &= 0x003ff000; - page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; - printk(KERN_ALERT "*pte = %08lx\n", page); - } + if (pmd_present(*pmd) && !pmd_large(*pmd)) { + pte = pte_offset_kernel(pmd, address); + printk(KERN_ALERT "*pte = %*llx\n", sizeof(*pte), (unsigned long long)pte_val(*pte)); + } #endif + } + } die("Oops", regs, error_code); bust_spinlocks(0); do_exit(SIGKILL); @@ -513,7 +694,7 @@ vmalloc_fault: * Do _not_ use "tsk" here. We might be inside * an interrupt in the middle of a task switch.. */ - int index = pgd_index(address); + unsigned long index = pgd_index(address); unsigned long pgd_paddr; pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; @@ -550,3 +731,255 @@ vmalloc_fault: return; } } + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_EMUTRAMP) || defined(CONFIG_PAX_RANDEXEC) +/* + * PaX: decide what to do with offenders (regs->eip = fault address) + * + * returns 1 when task should be killed + * 2 when gcc trampoline was detected + * 3 when legitimate ET_EXEC was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUTRAMP + static const unsigned char trans[8] = {6, 1, 2, 0, 13, 5, 3, 4}; +#endif + +#if defined(CONFIG_PAX_RANDEXEC) || defined(CONFIG_PAX_EMUTRAMP) + int err; +#endif + + if (regs->eflags & X86_EFLAGS_VM) + return 1; + +#ifdef CONFIG_PAX_RANDEXEC + if (current->mm->flags & MF_PAX_RANDEXEC) { + unsigned long esp_4; + + if (regs->eip >= current->mm->start_code && + regs->eip < current->mm->end_code) + { + err = get_user(esp_4, (unsigned long*)(regs->esp-4UL)); + if (err || esp_4 == regs->eip) + return 1; + + regs->eip += current->mm->delta_exec; + return 3; + } + } +#endif + + if (!(current->mm->flags & MF_PAX_EMUTRAMP)) + return 1; + +#ifdef CONFIG_PAX_EMUTRAMP + do { /* PaX: gcc trampoline emulation #1 */ + unsigned char mov1, mov2; + unsigned short jmp; + unsigned long addr1, addr2, ret; + unsigned short call; + + err = get_user(mov1, (unsigned char *)regs->eip); + err |= get_user(addr1, (unsigned long *)(regs->eip + 1)); + err |= get_user(mov2, (unsigned char *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long *)(regs->eip + 6)); + err |= get_user(jmp, (unsigned short *)(regs->eip + 10)); + err |= get_user(ret, (unsigned long *)regs->esp); + + if (err) + break; + + err = get_user(call, (unsigned short *)(ret-2)); + if (err) + break; + + if ((mov1 & 0xF8) == 0xB8 && + (mov2 & 0xF8) == 0xB8 && + (mov1 & 0x07) != (mov2 & 0x07) && + (jmp & 0xF8FF) == 0xE0FF && + (mov2 & 0x07) == ((jmp>>8) & 0x07) && + (call & 0xF8FF) == 0xD0FF && + regs->eip == ((unsigned long*)regs)[trans[(call>>8) & 0x07]]) + { + ((unsigned long *)regs)[trans[mov1 & 0x07]] = addr1; + ((unsigned long *)regs)[trans[mov2 & 0x07]] = addr2; + regs->eip = addr2; + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #2 */ + unsigned char mov, jmp; + unsigned long addr1, addr2, ret; + unsigned short call; + + err = get_user(mov, (unsigned char *)regs->eip); + err |= get_user(addr1, (unsigned long *)(regs->eip + 1)); + err |= get_user(jmp, (unsigned char *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long *)(regs->eip + 6)); + err |= get_user(ret, (unsigned long *)regs->esp); + + if (err) + break; + + err = get_user(call, (unsigned short *)(ret-2)); + if (err) + break; + + if ((mov & 0xF8) == 0xB8 && + jmp == 0xE9 && + (call & 0xF8FF) == 0xD0FF && + regs->eip == ((unsigned long*)regs)[trans[(call>>8) & 0x07]]) + { + ((unsigned long *)regs)[trans[mov & 0x07]] = addr1; + regs->eip += addr2 + 10; + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #3 */ + unsigned char mov, jmp; + char offset; + unsigned long addr1, addr2, ret; + unsigned short call; + + err = get_user(mov, (unsigned char *)regs->eip); + err |= get_user(addr1, (unsigned long *)(regs->eip + 1)); + err |= get_user(jmp, (unsigned char *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long *)(regs->eip + 6)); + err |= get_user(ret, (unsigned long *)regs->esp); + + if (err) + break; + + err = get_user(call, (unsigned short *)(ret-3)); + err |= get_user(offset, (char *)(ret-1)); + if (err) + break; + + if ((mov & 0xF8) == 0xB8 && + jmp == 0xE9 && + call == 0x55FF) + { + unsigned long addr; + + err = get_user(addr, (unsigned long*)(regs->ebp + (unsigned long)(long)offset)); + if (err || regs->eip != addr) + break; + + ((unsigned long *)regs)[trans[mov & 0x07]] = addr1; + regs->eip += addr2 + 10; + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #4 */ + unsigned char mov, jmp, sib; + char offset; + unsigned long addr1, addr2, ret; + unsigned short call; + + err = get_user(mov, (unsigned char *)regs->eip); + err |= get_user(addr1, (unsigned long *)(regs->eip + 1)); + err |= get_user(jmp, (unsigned char *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long *)(regs->eip + 6)); + err |= get_user(ret, (unsigned long *)regs->esp); + + if (err) + break; + + err = get_user(call, (unsigned short *)(ret-4)); + err |= get_user(sib, (unsigned char *)(ret-2)); + err |= get_user(offset, (char *)(ret-1)); + if (err) + break; + + if ((mov & 0xF8) == 0xB8 && + jmp == 0xE9 && + call == 0x54FF && + sib == 0x24) + { + unsigned long addr; + + err = get_user(addr, (unsigned long*)(regs->esp + 4 + (unsigned long)(long)offset)); + if (err || regs->eip != addr) + break; + + ((unsigned long *)regs)[trans[mov & 0x07]] = addr1; + regs->eip += addr2 + 10; + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #5 */ + unsigned char mov, jmp, sib; + unsigned long addr1, addr2, ret, offset; + unsigned short call; + + err = get_user(mov, (unsigned char *)regs->eip); + err |= get_user(addr1, (unsigned long *)(regs->eip + 1)); + err |= get_user(jmp, (unsigned char *)(regs->eip + 5)); + err |= get_user(addr2, (unsigned long *)(regs->eip + 6)); + err |= get_user(ret, (unsigned long *)regs->esp); + + if (err) + break; + + err = get_user(call, (unsigned short *)(ret-7)); + err |= get_user(sib, (unsigned char *)(ret-5)); + err |= get_user(offset, (unsigned long *)(ret-4)); + if (err) + break; + + if ((mov & 0xF8) == 0xB8 && + jmp == 0xE9 && + call == 0x94FF && + sib == 0x24) + { + unsigned long addr; + + err = get_user(addr, (unsigned long*)(regs->esp + 4 + offset)); + if (err || regs->eip != addr) + break; + + ((unsigned long *)regs)[trans[mov & 0x07]] = addr1; + regs->eip += addr2 + 10; + return 2; + } + } while (0); +#endif + + return 1; /* PaX in action */ +} +#endif + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (unsigned char*)pc+i)) { + printk("."); + break; + } + printk("%02x ", c); + } + printk("\n"); + + printk(KERN_ERR "PAX: bytes at SP: "); + for (i = 0; i < 20; i++) { + unsigned long c; + if (get_user(c, (unsigned long*)sp+i)) { + printk("."); + break; + } + printk("%08lx ", c); + } + printk("\n"); +} +#endif diff -urNp linux-2.6.11/arch/i386/mm/init.c linux-2.6.11/arch/i386/mm/init.c --- linux-2.6.11/arch/i386/mm/init.c 2005-03-02 02:38:17.000000000 -0500 +++ linux-2.6.11/arch/i386/mm/init.c 2005-03-07 08:59:37.000000000 -0500 @@ -39,6 +39,7 @@ #include #include #include +#include unsigned int __VMALLOC_RESERVE = 128 << 20; @@ -48,30 +49,6 @@ unsigned long highstart_pfn, highend_pfn static int noinline do_test_wp_bit(void); /* - * Creates a middle page table and puts a pointer to it in the - * given global directory entry. This only returns the gd entry - * in non-PAE compilation mode, since the middle layer is folded. - */ -static pmd_t * __init one_md_table_init(pgd_t *pgd) -{ - pud_t *pud; - pmd_t *pmd_table; - -#ifdef CONFIG_X86_PAE - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - pud = pud_offset(pgd, 0); - if (pmd_table != pmd_offset(pud, 0)) - BUG(); -#else - pud = pud_offset(pgd, 0); - pmd_table = pmd_offset(pud, 0); -#endif - - return pmd_table; -} - -/* * Create a page table and place a pointer to it in a middle page * directory entry. */ @@ -114,8 +91,6 @@ static void __init page_table_range_init pgd = pgd_base + pgd_idx; for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { - if (pgd_none(*pgd)) - one_md_table_init(pgd); pud = pud_offset(pgd, vaddr); pmd = pmd_offset(pud, vaddr); for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { @@ -144,6 +119,7 @@ static void __init kernel_physical_mappi { unsigned long pfn; pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; int pgd_idx, pmd_idx, pte_ofs; @@ -153,7 +129,8 @@ static void __init kernel_physical_mappi pfn = 0; for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { - pmd = one_md_table_init(pgd); + pud = pud_offset(pgd, 0); + pmd = pmd_offset(pud, 0); if (pfn >= max_low_pfn) continue; for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { @@ -312,13 +289,6 @@ static void __init pagetable_init (void) unsigned long vaddr; pgd_t *pgd_base = swapper_pg_dir; -#ifdef CONFIG_X86_PAE - int i; - /* Init entries of the first-level page table to the zero page */ - for (i = 0; i < PTRS_PER_PGD; i++) - set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); -#endif - /* Enable PSE if available */ if (cpu_has_pse) { set_in_cr4(X86_CR4_PSE); @@ -342,17 +312,6 @@ static void __init pagetable_init (void) page_table_range_init(vaddr, 0, pgd_base); permanent_kmaps_init(pgd_base); - -#ifdef CONFIG_X86_PAE - /* - * Add low memory identity-mappings - SMP needs it when - * starting up on an AP from real-mode. In the non-PAE - * case we already have these mappings through head.S. - * All user-space mappings are explicitly cleared after - * SMP startup. - */ - pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; -#endif } #if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND) @@ -386,11 +345,7 @@ void zap_low_mappings (void) * us, because pgd_clear() is a no-op on i386. */ for (i = 0; i < USER_PTRS_PER_PGD; i++) -#ifdef CONFIG_X86_PAE - set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); -#else set_pgd(swapper_pg_dir+i, __pgd(0)); -#endif flush_tlb_all(); } @@ -508,15 +463,17 @@ void __init paging_init(void) load_cr3(swapper_pg_dir); + __flush_tlb_all(); + +#ifdef CONFIG_PAX_KERNEXEC + #ifdef CONFIG_X86_PAE - /* - * We will bail out later - printk doesn't work right now so - * the user would just see a hanging kernel. - */ - if (cpu_has_pae) - set_in_cr4(X86_CR4_PAE); + memcpy(kernexec_pm_dir, swapper_pm_dir, sizeof(kernexec_pm_dir)); +#else + memcpy(kernexec_pg_dir, swapper_pg_dir, sizeof(kernexec_pg_dir)); +#endif + #endif - __flush_tlb_all(); kmap_init(); zone_sizes_init(); @@ -611,7 +568,7 @@ void __init mem_init(void) set_highmem_pages_init(bad_ppro); codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; + datasize = (unsigned long) &_edata - (unsigned long) &_data; initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); @@ -628,10 +585,6 @@ void __init mem_init(void) (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); -#ifdef CONFIG_X86_PAE - if (!cpu_has_pae) - panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); -#endif if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); @@ -702,6 +655,46 @@ void free_initmem(void) { unsigned long addr; +#ifdef CONFIG_PAX_KERNEXEC + /* PaX: limit KERNEL_CS to actual size */ + { + unsigned long limit; + int cpu; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + limit = (unsigned long)&_etext >> PAGE_SHIFT; + for (cpu = 0; cpu < NR_CPUS; cpu++) { + cpu_gdt_table[cpu][GDT_ENTRY_KERNEL_CS].a = (cpu_gdt_table[cpu][GDT_ENTRY_KERNEL_CS].a & 0xFFFF0000UL) | (limit & 0x0FFFFUL); + cpu_gdt_table[cpu][GDT_ENTRY_KERNEL_CS].b = (cpu_gdt_table[cpu][GDT_ENTRY_KERNEL_CS].b & 0xFFF0FFFFUL) | (limit & 0xF0000UL); + } + + /* PaX: make KERNEL_CS read-only */ + for (addr = __KERNEL_TEXT_OFFSET; addr < (unsigned long)&_data; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_GLOBAL)); + } + +#ifdef CONFIG_X86_PAE + memcpy(kernexec_pm_dir, swapper_pm_dir, sizeof(kernexec_pm_dir)); +#else + memcpy(kernexec_pg_dir, swapper_pg_dir, sizeof(kernexec_pg_dir)); +#endif + + for (addr = __KERNEL_TEXT_OFFSET; addr < (unsigned long)&_data; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); + } + flush_tlb_all(); + } +#endif + + memset(__init_begin, 0, (unsigned long)&__init_end - (unsigned long)&__init_begin); addr = (unsigned long)(&__init_begin); for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); diff -urNp linux-2.6.11/arch/i386/mm/mmap.c linux-2.6.11/arch/i386/mm/mmap.c --- linux-2.6.11/arch/i386/mm/mmap.c 2005-03-02 02:38:08.000000000 -0500 +++ linux-2.6.11/arch/i386/mm/mmap.c 2005-03-07 08:59:37.000000000 -0500 @@ -38,13 +38,19 @@ static inline unsigned long mmap_base(struct mm_struct *mm) { unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + unsigned long task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif if (gap < MIN_GAP) gap = MIN_GAP; else if (gap > MAX_GAP) gap = MAX_GAP; - return TASK_SIZE - (gap & PAGE_MASK); + return task_size - (gap & PAGE_MASK); } /* @@ -61,10 +67,22 @@ void arch_pick_mmap_layout(struct mm_str (current->personality & ADDR_COMPAT_LAYOUT) || current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(mm); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff -urNp linux-2.6.11/arch/i386/pci/pcbios.c linux-2.6.11/arch/i386/pci/pcbios.c --- linux-2.6.11/arch/i386/pci/pcbios.c 2005-03-02 02:38:34.000000000 -0500 +++ linux-2.6.11/arch/i386/pci/pcbios.c 2005-03-07 08:59:37.000000000 -0500 @@ -6,7 +6,7 @@ #include #include "pci.h" #include "pci-functions.h" - +#include /* BIOS32 signature: "_32_" */ #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) diff -urNp linux-2.6.11/arch/i386/power/cpu.c linux-2.6.11/arch/i386/power/cpu.c --- linux-2.6.11/arch/i386/power/cpu.c 2005-03-02 02:38:18.000000000 -0500 +++ linux-2.6.11/arch/i386/power/cpu.c 2005-03-07 08:59:37.000000000 -0500 @@ -83,10 +83,9 @@ do_fpu_end(void) static void fix_processor_context(void) { int cpu = smp_processor_id(); - struct tss_struct * t = &per_cpu(init_tss, cpu); + struct tss_struct * t = init_tss + cpu; set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ - per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TSS].b &= 0xfffffdff; load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */ diff -urNp linux-2.6.11/arch/ia64/ia32/binfmt_elf32.c linux-2.6.11/arch/ia64/ia32/binfmt_elf32.c --- linux-2.6.11/arch/ia64/ia32/binfmt_elf32.c 2005-03-02 02:38:38.000000000 -0500 +++ linux-2.6.11/arch/ia64/ia32/binfmt_elf32.c 2005-03-07 08:59:37.000000000 -0500 @@ -43,6 +43,17 @@ static void elf32_set_personality (void) #define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack)) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) ((tsk)->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) IA32_PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 43 - IA32_PAGE_SHIFT) +#define PAX_DELTA_EXEC_LSB(tsk) IA32_PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 43 - IA32_PAGE_SHIFT) +#define PAX_DELTA_STACK_LSB(tsk) IA32_PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) ((tsk)->personality == PER_LINUX32 ? 16 : 43 - IA32_PAGE_SHIFT) +#endif + /* Ugly but avoids duplication */ #include "../../../fs/binfmt_elf.c" diff -urNp linux-2.6.11/arch/ia64/ia32/ia32priv.h linux-2.6.11/arch/ia64/ia32/ia32priv.h --- linux-2.6.11/arch/ia64/ia32/ia32priv.h 2005-03-02 02:38:32.000000000 -0500 +++ linux-2.6.11/arch/ia64/ia32/ia32priv.h 2005-03-07 08:59:37.000000000 -0500 @@ -326,10 +326,17 @@ struct old_linux32_dirent { #define ELF_ARCH EM_386 #define IA32_PAGE_OFFSET 0xc0000000 -#define IA32_STACK_TOP IA32_PAGE_OFFSET #define IA32_GATE_OFFSET IA32_PAGE_OFFSET #define IA32_GATE_END IA32_PAGE_OFFSET + PAGE_SIZE +#ifdef CONFIG_PAX_RANDUSTACK +#define __IA32_DELTA_STACK (current->mm->delta_stack) +#else +#define __IA32_DELTA_STACK 0UL +#endif + +#define IA32_STACK_TOP (IA32_PAGE_OFFSET - __IA32_DELTA_STACK) + /* * The system segments (GDT, TSS, LDT) have to be mapped below 4GB so the IA-32 engine can * access them. diff -urNp linux-2.6.11/arch/ia64/ia32/sys_ia32.c linux-2.6.11/arch/ia64/ia32/sys_ia32.c --- linux-2.6.11/arch/ia64/ia32/sys_ia32.c 2005-03-02 02:38:12.000000000 -0500 +++ linux-2.6.11/arch/ia64/ia32/sys_ia32.c 2005-03-07 08:59:37.000000000 -0500 @@ -939,6 +939,11 @@ sys32_mmap (struct mmap_arg_struct __use flags = a.flags; +#ifdef CONFIG_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { file = fget(a.fd); @@ -960,6 +965,11 @@ sys32_mmap2 (unsigned int addr, unsigned struct file *file = NULL; unsigned long retval; +#ifdef CONFIG_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { file = fget(fd); diff -urNp linux-2.6.11/arch/ia64/kernel/ptrace.c linux-2.6.11/arch/ia64/kernel/ptrace.c --- linux-2.6.11/arch/ia64/kernel/ptrace.c 2005-03-02 02:38:33.000000000 -0500 +++ linux-2.6.11/arch/ia64/kernel/ptrace.c 2005-03-07 08:59:37.000000000 -0500 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -1424,6 +1425,9 @@ sys_ptrace (long request, pid_t pid, uns if (pid == 1) /* no messing around with init! */ goto out_tsk; + if (gr_handle_ptrace(child, request)) + goto out_tsk; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; diff -urNp linux-2.6.11/arch/ia64/kernel/sys_ia64.c linux-2.6.11/arch/ia64/kernel/sys_ia64.c --- linux-2.6.11/arch/ia64/kernel/sys_ia64.c 2005-03-02 02:38:10.000000000 -0500 +++ linux-2.6.11/arch/ia64/kernel/sys_ia64.c 2005-03-07 08:59:37.000000000 -0500 @@ -27,7 +27,7 @@ arch_get_unmapped_area (struct file *fil unsigned long pgoff, unsigned long flags) { long map_shared = (flags & MAP_SHARED); - unsigned long start_addr, align_mask = PAGE_SIZE - 1; + unsigned long start_addr, align_mask = PAGE_SIZE - 1, task_unmapped_base = TASK_UNMAPPED_BASE; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -38,6 +38,15 @@ arch_get_unmapped_area (struct file *fil if (REGION_NUMBER(addr) == REGION_HPAGE) addr = 0; #endif + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->flags & MF_PAX_RANDMMAP) + task_unmapped_base += mm->delta_mmap; + if ((mm->flags & MF_PAX_RANDMMAP) && addr && filp) + addr = mm->free_area_cache; + else +#endif + if (!addr) addr = mm->free_area_cache; @@ -56,9 +65,9 @@ arch_get_unmapped_area (struct file *fil for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) { - if (start_addr != TASK_UNMAPPED_BASE) { + if (start_addr != task_unmapped_base) { /* Start a new search --- just in case we missed some holes. */ - addr = TASK_UNMAPPED_BASE; + addr = task_unmapped_base; goto full_search; } return -ENOMEM; @@ -184,6 +193,11 @@ do_mmap2 (unsigned long addr, unsigned l unsigned long roff; struct file *file = NULL; +#ifdef CONFIG_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { file = fget(fd); diff -urNp linux-2.6.11/arch/ia64/mm/fault.c linux-2.6.11/arch/ia64/mm/fault.c --- linux-2.6.11/arch/ia64/mm/fault.c 2005-03-02 02:38:32.000000000 -0500 +++ linux-2.6.11/arch/ia64/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,10 @@ expand_backing_store (struct vm_area_str if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur)) return -ENOMEM; + if ((vma->vm_flags & VM_LOCKED) && + ((vma->vm_mm->locked_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur && + !capable(CAP_IPC_LOCK)) + return -ENOMEM; vma->vm_end += PAGE_SIZE; vma->vm_mm->total_vm += grow; if (vma->vm_flags & VM_LOCKED) @@ -75,6 +80,54 @@ mapped_kernel_page_is_present (unsigned return pte_present(pte); } +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->cr_iip = fault address) + * + * returns 1 when task should be killed + * 2 when legitimate ET_EXEC was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_RANDEXEC + int err; + + if (current->mm->flags & MF_PAX_RANDEXEC) { + if (regs->cr_iip >= current->mm->start_code && + regs->cr_iip < current->mm->end_code) + { +#if 0 + /* PaX: this needs fixing */ + if (regs->b0 == regs->cr_iip) + return 1; +#endif + regs->cr_iip += current->mm->delta_exec; + return 2; + } + } +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 8; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) { + printk("."); + break; + } + printk("%08x ", c); + } + printk("\n"); +} +#endif + void ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs) { @@ -130,9 +183,31 @@ ia64_do_page_fault (unsigned long addres | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT) | (((isr >> IA64_ISR_R_BIT) & 1UL) << VM_READ_BIT)); - if ((vma->vm_flags & mask) != mask) + if ((vma->vm_flags & mask) != mask) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(vma->vm_flags & VM_EXEC) && (mask & VM_EXEC)) { + if (!(mm->flags & MF_PAX_PAGEEXEC) || address != regs->cr_iip) + goto bad_area; + + up_read(&mm->mmap_sem); + switch(pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_RANDEXEC + case 2: + return; +#endif + + } + pax_report_fault(regs, (void*)regs->cr_iip, (void*)regs->r12); + do_exit(SIGKILL); + } +#endif + goto bad_area; + } + survive: /* * If for any reason at all we couldn't handle the fault, make @@ -169,7 +244,7 @@ ia64_do_page_fault (unsigned long addres if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start) || REGION_OFFSET(address) >= RGN_MAP_LIMIT) goto bad_area; - if (expand_stack(vma, address)) + if (expand_stack(current, vma, address)) goto bad_area; } else { vma = prev_vma; diff -urNp linux-2.6.11/arch/m32r/mm/fault.c linux-2.6.11/arch/m32r/mm/fault.c --- linux-2.6.11/arch/m32r/mm/fault.c 2005-03-02 02:37:52.000000000 -0500 +++ linux-2.6.11/arch/m32r/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -186,7 +186,7 @@ asmlinkage void do_page_fault(struct pt_ goto bad_area; } #endif - if (expand_stack(vma, address)) + if (expand_stack(tsk, vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so diff -urNp linux-2.6.11/arch/m68k/mm/fault.c linux-2.6.11/arch/m68k/mm/fault.c --- linux-2.6.11/arch/m68k/mm/fault.c 2005-03-02 02:38:08.000000000 -0500 +++ linux-2.6.11/arch/m68k/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -121,7 +121,7 @@ int do_page_fault(struct pt_regs *regs, if (address + 256 < rdusp()) goto map_err; } - if (expand_stack(vma, address)) + if (expand_stack(current, vma, address)) goto map_err; /* diff -urNp linux-2.6.11/arch/mips/kernel/binfmt_elfn32.c linux-2.6.11/arch/mips/kernel/binfmt_elfn32.c --- linux-2.6.11/arch/mips/kernel/binfmt_elfn32.c 2005-03-02 02:38:10.000000000 -0500 +++ linux-2.6.11/arch/mips/kernel/binfmt_elfn32.c 2005-03-07 08:59:37.000000000 -0500 @@ -50,6 +50,17 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N #undef ELF_ET_DYN_BASE #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#endif + #include #include #include diff -urNp linux-2.6.11/arch/mips/kernel/binfmt_elfo32.c linux-2.6.11/arch/mips/kernel/binfmt_elfo32.c --- linux-2.6.11/arch/mips/kernel/binfmt_elfo32.c 2005-03-02 02:37:55.000000000 -0500 +++ linux-2.6.11/arch/mips/kernel/binfmt_elfo32.c 2005-03-07 08:59:37.000000000 -0500 @@ -52,6 +52,17 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N #undef ELF_ET_DYN_BASE #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) + +#define PAX_DELTA_MMAP_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_MMAP_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_EXEC_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_EXEC_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_STACK_LSB(tsk) PAGE_SHIFT +#define PAX_DELTA_STACK_LEN(tsk) (((tsk)->thread.mflags & MF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#endif + #include #include #include diff -urNp linux-2.6.11/arch/mips/kernel/syscall.c linux-2.6.11/arch/mips/kernel/syscall.c --- linux-2.6.11/arch/mips/kernel/syscall.c 2005-03-02 02:38:18.000000000 -0500 +++ linux-2.6.11/arch/mips/kernel/syscall.c 2005-03-07 08:59:37.000000000 -0500 @@ -84,6 +84,11 @@ unsigned long arch_get_unmapped_area(str do_color_align = 0; if (filp || (flags & MAP_SHARED)) do_color_align = 1; + +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -94,6 +99,13 @@ unsigned long arch_get_unmapped_area(str (!vmm || addr + len <= vmm->vm_start)) return addr; } + +#ifdef CONFIG_PAX_RANDMMAP + if ((current->mm->flags & MF_PAX_RANDMMAP) && (!addr || filp)) + addr = TASK_UNMAPPED_BASE + current->mm->delta_mmap; + else +#endif + addr = TASK_UNMAPPED_BASE; if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); diff -urNp linux-2.6.11/arch/mips/mm/fault.c linux-2.6.11/arch/mips/mm/fault.c --- linux-2.6.11/arch/mips/mm/fault.c 2005-03-02 02:37:51.000000000 -0500 +++ linux-2.6.11/arch/mips/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -26,6 +26,24 @@ #include #include +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(void *pc) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) { + printk("."); + break; + } + printk("%08x ", c); + } + printk("\n"); +} +#endif + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -75,7 +93,7 @@ asmlinkage void do_page_fault(struct pt_ goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; - if (expand_stack(vma, address)) + if (expand_stack(tsk, vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so diff -urNp linux-2.6.11/arch/parisc/kernel/ptrace.c linux-2.6.11/arch/parisc/kernel/ptrace.c --- linux-2.6.11/arch/parisc/kernel/ptrace.c 2005-03-02 02:37:48.000000000 -0500 +++ linux-2.6.11/arch/parisc/kernel/ptrace.c 2005-03-07 08:59:37.000000000 -0500 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -114,6 +115,9 @@ long sys_ptrace(long request, pid_t pid, if (pid == 1) /* no messing around with init! */ goto out_tsk; + if (gr_handle_ptrace(child, request)) + goto out_tsk; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; diff -urNp linux-2.6.11/arch/parisc/kernel/sys_parisc.c linux-2.6.11/arch/parisc/kernel/sys_parisc.c --- linux-2.6.11/arch/parisc/kernel/sys_parisc.c 2005-03-02 02:38:25.000000000 -0500 +++ linux-2.6.11/arch/parisc/kernel/sys_parisc.c 2005-03-07 08:59:37.000000000 -0500 @@ -104,6 +104,13 @@ unsigned long arch_get_unmapped_area(str { if (len > TASK_SIZE) return -ENOMEM; + +#ifdef CONFIG_PAX_RANDMMAP + if ((current->mm->flags & MF_PAX_RANDMMAP) && (!addr || filp)) + addr = TASK_UNMAPPED_BASE + current->mm->delta_mmap; + else +#endif + if (!addr) addr = TASK_UNMAPPED_BASE; @@ -123,6 +130,12 @@ static unsigned long do_mmap2(unsigned l { struct file * file = NULL; unsigned long error = -EBADF; + +#ifdef CONFIG_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + if (!(flags & MAP_ANONYMOUS)) { file = fget(fd); if (!file) diff -urNp linux-2.6.11/arch/parisc/kernel/traps.c linux-2.6.11/arch/parisc/kernel/traps.c --- linux-2.6.11/arch/parisc/kernel/traps.c 2005-03-02 02:38:33.000000000 -0500 +++ linux-2.6.11/arch/parisc/kernel/traps.c 2005-03-07 08:59:37.000000000 -0500 @@ -680,9 +680,7 @@ void handle_interruption(int code, struc down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm,regs->iaoq[0]); - if (vma && (regs->iaoq[0] >= vma->vm_start) - && (vma->vm_flags & VM_EXEC)) { - + if (vma && (regs->iaoq[0] >= vma->vm_start)) { fault_address = regs->iaoq[0]; fault_space = regs->iasq[0]; diff -urNp linux-2.6.11/arch/parisc/mm/fault.c linux-2.6.11/arch/parisc/mm/fault.c --- linux-2.6.11/arch/parisc/mm/fault.c 2005-03-02 02:38:25.000000000 -0500 +++ linux-2.6.11/arch/parisc/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include @@ -57,7 +59,7 @@ DEFINE_PER_CPU(struct exception_data, ex static unsigned long parisc_acctyp(unsigned long code, unsigned int inst) { - if (code == 6 || code == 16) + if (code == 6 || code == 7 || code == 16) return VM_EXEC; switch (inst & 0xf0000000) { @@ -143,6 +145,139 @@ parisc_acctyp(unsigned long code, unsign } #endif +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (instruction_pointer(regs) = fault address) + * + * returns 1 when task should be killed + * 2 when rt_sigreturn trampoline was detected + * 3 when unpatched PLT trampoline was detected + * 4 when legitimate ET_EXEC was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#if defined(CONFIG_PAX_EMUPLT) || defined(CONFIG_PAX_EMUTRAMP) + int err; +#endif + +#ifdef CONFIG_PAX_RANDEXEC + if (current->mm->flags & MF_PAX_RANDEXEC) { + if (instruction_pointer(regs) >= current->mm->start_code && + instruction_pointer(regs) < current->mm->end_code) + { +#if 0 + /* PaX: this needs fixing */ + if ((regs->gr[2] & ~3UL) == instruction_pointer(regs)) + return 1; +#endif + regs->iaoq[0] += current->mm->delta_exec; + if ((regs->iaoq[1] & ~3UL) >= current->mm->start_code && + (regs->iaoq[1] & ~3UL) < current->mm->end_code) + regs->iaoq[1] += current->mm->delta_exec; + return 4; + } + } +#endif + +#ifdef CONFIG_PAX_EMUPLT + do { /* PaX: unpatched PLT emulation */ + unsigned int bl, depwi; + + err = get_user(bl, (unsigned int*)instruction_pointer(regs)); + err |= get_user(depwi, (unsigned int*)(instruction_pointer(regs)+4)); + + if (err) + break; + + if (bl == 0xEA9F1FDDU && depwi == 0xD6801C1EU) { + unsigned int ldw, bv, ldw2, addr = instruction_pointer(regs)-12; + + err = get_user(ldw, (unsigned int*)addr); + err |= get_user(bv, (unsigned int*)(addr+4)); + err |= get_user(ldw2, (unsigned int*)(addr+8)); + + if (err) + break; + + if (ldw == 0x0E801096U && + bv == 0xEAC0C000U && + ldw2 == 0x0E881095U) + { + unsigned int resolver, map; + + err = get_user(resolver, (unsigned int*)(instruction_pointer(regs)+8)); + err |= get_user(map, (unsigned int*)(instruction_pointer(regs)+12)); + if (err) + break; + + regs->gr[20] = instruction_pointer(regs)+8; + regs->gr[21] = map; + regs->gr[22] = resolver; + regs->iaoq[0] = resolver | 3UL; + regs->iaoq[1] = regs->iaoq[0] + 4; + return 3; + } + } + } while (0); +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + +#ifndef CONFIG_PAX_EMUSIGRT + if (!(current->mm->flags & MF_PAX_EMUTRAMP)) + return 1; +#endif + + do { /* PaX: rt_sigreturn emulation */ + unsigned int ldi1, ldi2, bel, nop; + + err = get_user(ldi1, (unsigned int *)instruction_pointer(regs)); + err |= get_user(ldi2, (unsigned int *)(instruction_pointer(regs)+4)); + err |= get_user(bel, (unsigned int *)(instruction_pointer(regs)+8)); + err |= get_user(nop, (unsigned int *)(instruction_pointer(regs)+12)); + + if (err) + break; + + if ((ldi1 == 0x34190000U || ldi1 == 0x34190002U) && + ldi2 == 0x3414015AU && + bel == 0xE4008200U && + nop == 0x08000240U) + { + regs->gr[25] = (ldi1 & 2) >> 1; + regs->gr[20] = __NR_rt_sigreturn; + regs->gr[31] = regs->iaoq[1] + 16; + regs->sr[0] = regs->iasq[1]; + regs->iaoq[0] = 0x100UL; + regs->iaoq[1] = regs->iaoq[0] + 4; + regs->iasq[0] = regs->sr[2]; + regs->iasq[1] = regs->sr[2]; + return 2; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) { + printk("."); + break; + } + printk("%08x ", c); + } + printk("\n"); +} +#endif + void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address) { @@ -168,8 +303,38 @@ good_area: acc_type = parisc_acctyp(code,regs->iir); - if ((vma->vm_flags & acc_type) != acc_type) + if ((vma->vm_flags & acc_type) != acc_type) { + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->flags & MF_PAX_PAGEEXEC) && (acc_type & VM_EXEC) && + (address & ~3UL) == instruction_pointer(regs)) + { + up_read(&mm->mmap_sem); + switch(pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_RANDEXEC + case 4: + return; +#endif + +#ifdef CONFIG_PAX_EMUPLT + case 3: + return; +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + case 2: + return; +#endif + + } + pax_report_fault(regs, (void*)instruction_pointer(regs), (void*)regs->gr[30]); + do_exit(SIGKILL); + } +#endif + goto bad_area; + } /* * If for any reason at all we couldn't handle the fault, make @@ -199,7 +364,7 @@ good_area: check_expansion: vma = prev_vma; - if (vma && (expand_stack(vma, address) == 0)) + if (vma && (expand_stack(tsk, vma, address) == 0)) goto good_area; /* diff -urNp linux-2.6.11/arch/ppc/kernel/ptrace.c linux-2.6.11/arch/ppc/kernel/ptrace.c --- linux-2.6.11/arch/ppc/kernel/ptrace.c 2005-03-02 02:37:51.000000000 -0500 +++ linux-2.6.11/arch/ppc/kernel/ptrace.c 2005-03-07 08:59:37.000000000 -0500 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -267,6 +268,9 @@ int sys_ptrace(long request, long pid, l if (pid == 1) /* you may not mess with init */ goto out_tsk; + if (gr_handle_ptrace(child, request)) + goto out_tsk; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; diff -urNp linux-2.6.11/arch/ppc/kernel/syscalls.c linux-2.6.11/arch/ppc/kernel/syscalls.c --- linux-2.6.11/arch/ppc/kernel/syscalls.c 2005-03-02 02:38:26.000000000 -0500 +++ linux-2.6.11/arch/ppc/kernel/syscalls.c 2005-03-07 08:59:37.000000000 -0500 @@ -165,6 +165,11 @@ do_mmap2(unsigned long addr, size_t len, struct file * file = NULL; int ret = -EBADF; +#ifdef CONFIG_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { if (!(file = fget(fd))) diff -urNp linux-2.6.11/arch/ppc/mm/fault.c linux-2.6.11/arch/ppc/mm/fault.c --- linux-2.6.11/arch/ppc/mm/fault.c 2005-03-02 02:37:52.000000000 -0500 +++ linux-2.6.11/arch/ppc/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -28,6 +28,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include @@ -51,6 +56,363 @@ unsigned long pte_misses; /* updated by unsigned long pte_errors; /* updated by do_page_fault() */ unsigned int probingmem; +#ifdef CONFIG_PAX_EMUSIGRT +void pax_syscall_close(struct vm_area_struct * vma) +{ + vma->vm_mm->call_syscall = 0UL; +} + +static struct page* pax_syscall_nopage(struct vm_area_struct *vma, unsigned long address, int *type) +{ + struct page* page; + unsigned int *kaddr; + + page = alloc_page(GFP_HIGHUSER); + if (!page) + return NOPAGE_OOM; + + kaddr = kmap(page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x44000002U; /* sc */ + __flush_dcache_icache(kaddr); + kunmap(page); + if (type) + *type = VM_FAULT_MAJOR; + return page; +} + +static struct vm_operations_struct pax_vm_ops = { + close: pax_syscall_close, + nopage: pax_syscall_nopage, +}; + +static void pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f]; + vma->vm_ops = &pax_vm_ops; + insert_vm_struct(current->mm, vma); + ++current->mm->total_vm; +} +#endif + +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->nip = fault address) + * + * returns 1 when task should be killed + * 2 when patched GOT trampoline was detected + * 3 when patched PLT trampoline was detected + * 4 when unpatched PLT trampoline was detected + * 5 when legitimate ET_EXEC was detected + * 6 when sigreturn trampoline was detected + * 7 when rt_sigreturn trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#if defined(CONFIG_PAX_EMUPLT) || defined(CONFIG_PAX_EMUSIGRT) + int err; +#endif + +#ifdef CONFIG_PAX_RANDEXEC + if (current->mm->flags & MF_PAX_RANDEXEC) { + if (regs->nip >= current->mm->start_code && + regs->nip < current->mm->end_code) + { + if (regs->link == regs->nip) + return 1; + + regs->nip += current->mm->delta_exec; + return 5; + } + } +#endif + +#ifdef CONFIG_PAX_EMUPLT + do { /* PaX: patched GOT emulation */ + unsigned int blrl; + + err = get_user(blrl, (unsigned int*)regs->nip); + + if (!err && blrl == 0x4E800021U) { + unsigned long temp = regs->nip; + + regs->nip = regs->link & 0xFFFFFFFCUL; + regs->link = temp + 4UL; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #1 */ + unsigned int b; + + err = get_user(b, (unsigned int *)regs->nip); + + if (!err && (b & 0xFC000003U) == 0x48000000U) { + regs->nip += (((b | 0xFC000000UL) ^ 0x02000000UL) + 0x02000000UL); + return 3; + } + } while (0); + + do { /* PaX: unpatched PLT emulation #1 */ + unsigned int li, b; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(b, (unsigned int *)(regs->nip+4)); + + if (!err && (li & 0xFFFF0000U) == 0x39600000U && (b & 0xFC000003U) == 0x48000000U) { + unsigned int rlwinm, add, li2, addis2, mtctr, li3, addis3, bctr; + unsigned long addr = b | 0xFC000000UL; + + addr = regs->nip + 4 + ((addr ^ 0x02000000UL) + 0x02000000UL); + err = get_user(rlwinm, (unsigned int*)addr); + err |= get_user(add, (unsigned int*)(addr+4)); + err |= get_user(li2, (unsigned int*)(addr+8)); + err |= get_user(addis2, (unsigned int*)(addr+12)); + err |= get_user(mtctr, (unsigned int*)(addr+16)); + err |= get_user(li3, (unsigned int*)(addr+20)); + err |= get_user(addis3, (unsigned int*)(addr+24)); + err |= get_user(bctr, (unsigned int*)(addr+28)); + + if (err) + break; + + if (rlwinm == 0x556C083CU && + add == 0x7D6C5A14U && + (li2 & 0xFFFF0000U) == 0x39800000U && + (addis2 & 0xFFFF0000U) == 0x3D8C0000U && + mtctr == 0x7D8903A6U && + (li3 & 0xFFFF0000U) == 0x39800000U && + (addis3 & 0xFFFF0000U) == 0x3D8C0000U && + bctr == 0x4E800420U) + { + regs->gpr[PT_R11] = 3 * (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] = (((li3 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] += (addis3 & 0xFFFFU) << 16; + regs->ctr = (((li2 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->ctr += (addis2 & 0xFFFFU) << 16; + regs->nip = regs->ctr; + return 4; + } + } + } while (0); + +#if 0 + do { /* PaX: unpatched PLT emulation #2 */ + unsigned int lis, lwzu, b, bctr; + + err = get_user(lis, (unsigned int *)regs->nip); + err |= get_user(lwzu, (unsigned int *)(regs->nip+4)); + err |= get_user(b, (unsigned int *)(regs->nip+8)); + err |= get_user(bctr, (unsigned int *)(regs->nip+12)); + + if (err) + break; + + if ((lis & 0xFFFF0000U) == 0x39600000U && + (lwzu & 0xU) == 0xU && + (b & 0xFC000003U) == 0x48000000U && + bctr == 0x4E800420U) + { + unsigned int addis, addi, rlwinm, add, li2, addis2, mtctr, li3, addis3, bctr; + unsigned long addr = b | 0xFC000000UL; + + addr = regs->nip + 12 + ((addr ^ 0x02000000UL) + 0x02000000UL); + err = get_user(addis, (unsigned int*)addr); + err |= get_user(addi, (unsigned int*)(addr+4)); + err |= get_user(rlwinm, (unsigned int*)(addr+8)); + err |= get_user(add, (unsigned int*)(addr+12)); + err |= get_user(li2, (unsigned int*)(addr+16)); + err |= get_user(addis2, (unsigned int*)(addr+20)); + err |= get_user(mtctr, (unsigned int*)(addr+24)); + err |= get_user(li3, (unsigned int*)(addr+28)); + err |= get_user(addis3, (unsigned int*)(addr+32)); + err |= get_user(bctr, (unsigned int*)(addr+36)); + + if (err) + break; + + if ((addis & 0xFFFF0000U) == 0x3D6B0000U && + (addi & 0xFFFF0000U) == 0x396B0000U && + rlwinm == 0x556C083CU && + add == 0x7D6C5A14U && + (li2 & 0xFFFF0000U) == 0x39800000U && + (addis2 & 0xFFFF0000U) == 0x3D8C0000U && + mtctr == 0x7D8903A6U && + (li3 & 0xFFFF0000U) == 0x39800000U && + (addis3 & 0xFFFF0000U) == 0x3D8C0000U && + bctr == 0x4E800420U) + { + regs->gpr[PT_R11] = + regs->gpr[PT_R11] = 3 * (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] = (((li3 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->gpr[PT_R12] += (addis3 & 0xFFFFU) << 16; + regs->ctr = (((li2 | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + regs->ctr += (addis2 & 0xFFFFU) << 16; + regs->nip = regs->ctr; + return 4; + } + } + } while (0); +#endif + + do { /* PaX: unpatched PLT emulation #3 */ + unsigned int li, b; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(b, (unsigned int *)(regs->nip+4)); + + if (!err && (li & 0xFFFF0000U) == 0x39600000U && (b & 0xFC000003U) == 0x48000000U) { + unsigned int addis, lwz, mtctr, bctr; + unsigned long addr = b | 0xFC000000UL; + + addr = regs->nip + 4 + ((addr ^ 0x02000000UL) + 0x02000000UL); + err = get_user(addis, (unsigned int*)addr); + err |= get_user(lwz, (unsigned int*)(addr+4)); + err |= get_user(mtctr, (unsigned int*)(addr+8)); + err |= get_user(bctr, (unsigned int*)(addr+12)); + + if (err) + break; + + if ((addis & 0xFFFF0000U) == 0x3D6B0000U && + (lwz & 0xFFFF0000U) == 0x816B0000U && + mtctr == 0x7D6903A6U && + bctr == 0x4E800420U) + { + unsigned int r11; + + addr = (addis << 16) + (((li | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + addr += (((lwz | 0xFFFF0000UL) ^ 0x00008000UL) + 0x00008000UL); + + err = get_user(r11, (unsigned int*)addr); + if (err) + break; + + regs->gpr[PT_R11] = r11; + regs->ctr = r11; + regs->nip = r11; + return 4; + } + } + } while (0); +#endif + +#ifdef CONFIG_PAX_EMUSIGRT + do { /* PaX: sigreturn emulation */ + unsigned int li, sc; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(sc, (unsigned int *)(regs->nip+4)); + + if (!err && li == 0x38000000U + __NR_sigreturn && sc == 0x44000002U) { + struct vm_area_struct *vma; + unsigned long call_syscall; + + down_read(¤t->mm->mmap_sem); + call_syscall = current->mm->call_syscall; + up_read(¤t->mm->mmap_sem); + if (likely(call_syscall)) + goto emulate; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_syscall) { + call_syscall = current->mm->call_syscall; + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + goto emulate; + } + + call_syscall = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_syscall & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + pax_insert_vma(vma, call_syscall); + current->mm->call_syscall = call_syscall; + up_write(¤t->mm->mmap_sem); + +emulate: + regs->gpr[PT_R0] = __NR_sigreturn; + regs->nip = call_syscall; + return 6; + } + } while (0); + + do { /* PaX: rt_sigreturn emulation */ + unsigned int li, sc; + + err = get_user(li, (unsigned int *)regs->nip); + err |= get_user(sc, (unsigned int *)(regs->nip+4)); + + if (!err && li == 0x38000000U + __NR_rt_sigreturn && sc == 0x44000002U) { + struct vm_area_struct *vma; + unsigned int call_syscall; + + down_read(¤t->mm->mmap_sem); + call_syscall = current->mm->call_syscall; + up_read(¤t->mm->mmap_sem); + if (likely(call_syscall)) + goto rt_emulate; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_syscall) { + call_syscall = current->mm->call_syscall; + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + goto rt_emulate; + } + + call_syscall = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_syscall & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + pax_insert_vma(vma, call_syscall); + current->mm->call_syscall = call_syscall; + up_write(¤t->mm->mmap_sem); + +rt_emulate: + regs->gpr[PT_R0] = __NR_rt_sigreturn; + regs->nip = call_syscall; + return 7; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) { + printk("."); + break; + } + printk("%08x ", c); + } + printk("\n"); +} +#endif + /* * Check whether the instruction at regs->nip is a store using * an update addressing form which will update r1. @@ -111,7 +473,7 @@ int do_page_fault(struct pt_regs *regs, * indicate errors in DSISR but can validly be set in SRR1. */ if (TRAP(regs) == 0x400) - error_code &= 0x48200000; + error_code &= 0x58200000; else is_write = error_code & 0x02000000; #endif /* CONFIG_4xx || CONFIG_BOOKE */ @@ -175,7 +537,7 @@ int do_page_fault(struct pt_regs *regs, && (!user_mode(regs) || !store_updates_sp(regs))) goto bad_area; } - if (expand_stack(vma, address)) + if (expand_stack(current, vma, address)) goto bad_area; good_area: @@ -205,15 +567,14 @@ good_area: } else if (TRAP(regs) == 0x400) { pte_t *ptep; -#if 0 +#if 1 /* It would be nice to actually enforce the VM execute permission on CPUs which can do so, but far too much stuff in userspace doesn't get the permissions right, so we let any page be executed for now. */ if (! (vma->vm_flags & VM_EXEC)) goto bad_area; -#endif - +#else /* Since 4xx/Book-E supports per-page execute permission, * we lazily flush dcache to icache. */ ptep = NULL; @@ -233,6 +594,7 @@ good_area: if (ptep != NULL) pte_unmap(ptep); #endif +#endif /* a read */ } else { /* protection fault */ @@ -278,6 +640,38 @@ bad_area: /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm->flags & MF_PAX_PAGEEXEC) { + if ((TRAP(regs) == 0x400) && (regs->nip == address)) { + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + case 4: + return 0; +#endif + +#ifdef CONFIG_PAX_RANDEXEC + case 5: + return 0; +#endif + +#ifdef CONFIG_PAX_EMUSIGRT + case 6: + case 7: + return 0; +#endif + + } + + pax_report_fault(regs, (void*)regs->nip, (void*)regs->gpr[1]); + do_exit(SIGKILL); + } + } +#endif + info.si_signo = SIGSEGV; info.si_errno = 0; info.si_code = code; diff -urNp linux-2.6.11/arch/ppc64/kernel/syscalls.c linux-2.6.11/arch/ppc64/kernel/syscalls.c --- linux-2.6.11/arch/ppc64/kernel/syscalls.c 2005-03-02 02:38:13.000000000 -0500 +++ linux-2.6.11/arch/ppc64/kernel/syscalls.c 2005-03-07 08:59:37.000000000 -0500 @@ -183,6 +183,11 @@ unsigned long sys_mmap(unsigned long add struct file * file = NULL; unsigned long ret = -EBADF; +#ifdef CONFIG_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + if (!(flags & MAP_ANONYMOUS)) { if (!(file = fget(fd))) goto out; diff -urNp linux-2.6.11/arch/ppc64/mm/fault.c linux-2.6.11/arch/ppc64/mm/fault.c --- linux-2.6.11/arch/ppc64/mm/fault.c 2005-03-02 02:38:17.000000000 -0500 +++ linux-2.6.11/arch/ppc64/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -76,6 +77,54 @@ static int store_updates_sp(struct pt_re return 0; } +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->nip = fault address) + * + * returns 1 when task should be killed + * 2 when legitimate ET_EXEC was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#if defined(CONFIG_PAX_EMUPLT) || defined(CONFIG_PAX_EMUSIGRT) + int err; +#endif + +#ifdef CONFIG_PAX_RANDEXEC + if (current->mm->flags & MF_PAX_RANDEXEC) { + if (regs->nip >= current->mm->start_code && + regs->nip < current->mm->end_code) + { + if (regs->link == regs->nip) + return 1; + + regs->nip += current->mm->delta_exec; + return 2; + } + } +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) { + printk("."); + break; + } + printk("%08x ", c); + } + printk("\n"); +} +#endif + /* * The error_code parameter is * - DSISR for a non-SLB data access fault, @@ -193,7 +242,7 @@ int do_page_fault(struct pt_regs *regs, goto bad_area; } - if (expand_stack(vma, address)) + if (expand_stack(current, vma, address)) goto bad_area; good_area: @@ -243,6 +292,25 @@ bad_area: bad_area_nosemaphore: /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm->flags & MF_PAX_PAGEEXEC) { + if ((regs->trap == 0x400) && (regs->nip == address)) { + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_RANDEXEC + case 2: + return; +#endif + + } + + pax_report_fault(regs, (void*)regs->nip, (void*)regs->gpr[1]); + do_exit(SIGKILL); + } + } +#endif + info.si_signo = SIGSEGV; info.si_errno = 0; info.si_code = code; diff -urNp linux-2.6.11/arch/s390/mm/fault.c linux-2.6.11/arch/s390/mm/fault.c --- linux-2.6.11/arch/s390/mm/fault.c 2005-03-02 02:38:38.000000000 -0500 +++ linux-2.6.11/arch/s390/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -225,7 +225,7 @@ do_exception(struct pt_regs *regs, unsig goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; - if (expand_stack(vma, address)) + if (expand_stack(tsk, vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so diff -urNp linux-2.6.11/arch/sh/mm/fault.c linux-2.6.11/arch/sh/mm/fault.c --- linux-2.6.11/arch/sh/mm/fault.c 2005-03-02 02:37:52.000000000 -0500 +++ linux-2.6.11/arch/sh/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -69,7 +69,7 @@ asmlinkage void do_page_fault(struct pt_ goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; - if (expand_stack(vma, address)) + if (expand_stack(tsk, vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so diff -urNp linux-2.6.11/arch/sh64/mm/fault.c linux-2.6.11/arch/sh64/mm/fault.c --- linux-2.6.11/arch/sh64/mm/fault.c 2005-03-02 02:38:08.000000000 -0500 +++ linux-2.6.11/arch/sh64/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -188,7 +188,7 @@ asmlinkage void do_page_fault(struct pt_ #endif goto bad_area; } - if (expand_stack(vma, address)) { + if (expand_stack(tsk, vma, address)) { #ifdef DEBUG_FAULT print_task(tsk); printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n", diff -urNp linux-2.6.11/arch/sparc/Makefile linux-2.6.11/arch/sparc/Makefile --- linux-2.6.11/arch/sparc/Makefile 2005-03-02 02:37:49.000000000 -0500 +++ linux-2.6.11/arch/sparc/Makefile 2005-03-07 08:59:37.000000000 -0500 @@ -34,7 +34,7 @@ libs-y += arch/sparc/prom/ arch/sparc/li # Renaming is done to avoid confusing pattern matching rules in 2.5.45 (multy-) INIT_Y := $(patsubst %/, %/built-in.o, $(init-y)) CORE_Y := $(core-y) -CORE_Y += kernel/ mm/ fs/ ipc/ security/ crypto/ +CORE_Y += kernel/ mm/ fs/ ipc/ security/ crypto/ grsecurity/ CORE_Y := $(patsubst %/, %/built-in.o, $(CORE_Y)) DRIVERS_Y := $(patsubst %/, %/built-in.o, $(drivers-y)) NET_Y := $(patsubst %/, %/built-in.o, $(net-y)) diff -urNp linux-2.6.11/arch/sparc/kernel/ptrace.c linux-2.6.11/arch/sparc/kernel/ptrace.c --- linux-2.6.11/arch/sparc/kernel/ptrace.c 2005-03-02 02:38:33.000000000 -0500 +++ linux-2.6.11/arch/sparc/kernel/ptrace.c 2005-03-07 08:59:37.000000000 -0500 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -322,6 +323,11 @@ asmlinkage void do_ptrace(struct pt_regs goto out; } + if (gr_handle_ptrace(child, request)) { + pt_error_return(regs, EPERM); + goto out_tsk; + } + if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { if (ptrace_attach(child)) { diff -urNp linux-2.6.11/arch/sparc/kernel/sys_sparc.c linux-2.6.11/arch/sparc/kernel/sys_sparc.c --- linux-2.6.11/arch/sparc/kernel/sys_sparc.c 2005-03-02 02:38:00.000000000 -0500 +++ linux-2.6.11/arch/sparc/kernel/sys_sparc.c 2005-03-07 08:59:37.000000000 -0500 @@ -55,6 +55,13 @@ unsigned long arch_get_unmapped_area(str return -ENOMEM; if (ARCH_SUN4C_SUN4 && len > 0x20000000) return -ENOMEM; + +#ifdef CONFIG_PAX_RANDMMAP + if ((current->mm->flags & MF_PAX_RANDMMAP) && (!addr || filp)) + addr = TASK_UNMAPPED_BASE + current->mm->delta_mmap; + else +#endif + if (!addr) addr = TASK_UNMAPPED_BASE; @@ -227,6 +234,11 @@ static unsigned long do_mmap2(unsigned l struct file * file = NULL; unsigned long retval = -EBADF; +#ifdef CONFIG_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + if (!(flags & MAP_ANONYMOUS)) { file = fget(fd); if (!file) diff -urNp linux-2.6.11/arch/sparc/kernel/sys_sunos.c linux-2.6.11/arch/sparc/kernel/sys_sunos.c --- linux-2.6.11/arch/sparc/kernel/sys_sunos.c 2005-03-02 02:38:07.000000000 -0500 +++ linux-2.6.11/arch/sparc/kernel/sys_sunos.c 2005-03-07 08:59:37.000000000 -0500 @@ -71,6 +71,11 @@ asmlinkage unsigned long sunos_mmap(unsi struct file * file = NULL; unsigned long retval, ret_type; +#ifdef CONFIG_PAX_RANDEXEC + if (flags & MAP_MIRROR) + return -EINVAL; +#endif + if (flags & MAP_NORESERVE) { static int cnt; if (cnt++ < 10) diff -urNp linux-2.6.11/arch/sparc/mm/fault.c linux-2.6.11/arch/sparc/mm/fault.c --- linux-2.6.11/arch/sparc/mm/fault.c 2005-03-02 02:38:37.000000000 -0500 +++ linux-2.6.11/arch/sparc/mm/fault.c 2005-03-07 08:59:37.000000000 -0500 @@ -21,6 +21,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -220,6 +224,269 @@ static unsigned long compute_si_addr(str return safe_compute_effective_address(regs, insn); } +#ifdef CONFIG_PAX_PAGEEXEC +void pax_emuplt_close(struct vm_area_struct * vma) +{ + vma->vm_mm->call_dl_resolve = 0UL; +} + +static struct page* pax_emuplt_nopage(struct vm_area_struct *vma, unsigned long address, int *type) +{ + struct page* page; + unsigned int *kaddr; + + page = alloc_page(GFP_HIGHUSER); + if (!page) + return NOPAGE_OOM; + + kaddr = kmap(page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x9DE3BFA8U; /* save */ + flush_dcache_page(page); + kunmap(page); + if (type) + *type = VM_FAULT_MAJOR; + + return page; +} + +static struct vm_operations_struct pax_vm_ops = { + close: pax_emuplt_close, + nopage: pax_emuplt_nopage, +}; + +static void pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f]; + vma->vm_ops = &pax_vm_ops; + insert_vm_struct(current->mm, vma); + ++current->mm->total_vm; +} + +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + * 4 when legitimate ET_EXEC was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; +#endif + +#ifdef CONFIG_PAX_RANDEXEC + if (current->mm->flags & MF_PAX_RANDEXEC) { + if (regs->pc >= current->mm->start_code && + regs->pc < current->mm->end_code) + { + if (regs->u_regs[UREG_RETPC] + 8UL == regs->pc) + return 1; + + regs->pc += current->mm->delta_exec; + if (regs->npc >= current->mm->start_code && + regs->npc < current->mm->end_code) + regs->npc += current->mm->delta_exec; + return 4; + } + if (regs->pc >= current->mm->start_code + current->mm->delta_exec && + regs->pc < current->mm->end_code + current->mm->delta_exec) + { + regs->pc -= current->mm->delta_exec; + if (regs->npc >= current->mm->start_code + current->mm->delta_exec && + regs->npc < current->mm->end_code + current->mm->delta_exec) + regs->npc -= current->mm->delta_exec; + } + } +#endif + +#ifdef CONFIG_PAX_EMUPLT + do { /* PaX: patched PLT emulation #1 */ + unsigned int sethi1, sethi2, jmpl; + + err = get_user(sethi1, (unsigned int*)regs->pc); + err |= get_user(sethi2, (unsigned int*)(regs->pc+4)); + err |= get_user(jmpl, (unsigned int*)(regs->pc+8)); + + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U) + { + unsigned int addr; + + regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10; + addr = regs->u_regs[UREG_G1]; + addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } while (0); + + { /* PaX: patched PLT emulation #2 */ + unsigned int ba; + + err = get_user(ba, (unsigned int*)regs->pc); + + if (!err && (ba & 0xFFC00000U) == 0x30800000U) { + unsigned int addr; + + addr = regs->pc + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } + + do { /* PaX: patched PLT emulation #3 */ + unsigned int sethi, jmpl, nop; + + err = get_user(sethi, (unsigned int*)regs->pc); + err |= get_user(jmpl, (unsigned int*)(regs->pc+4)); + err |= get_user(nop, (unsigned int*)(regs->pc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U && + nop == 0x01000000U) + { + unsigned int addr; + + addr = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] = addr; + addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 1 */ + unsigned int sethi, ba, nop; + + err = get_user(sethi, (unsigned int*)regs->pc); + err |= get_user(ba, (unsigned int*)(regs->pc+4)); + err |= get_user(nop, (unsigned int*)(regs->pc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) && + nop == 0x01000000U) + { + unsigned int addr, save, call; + + if ((ba & 0xFFC00000U) == 0x30800000U) + addr = regs->pc + 4 + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); + else + addr = regs->pc + 4 + ((((ba | 0xFFF80000U) ^ 0x00040000U) + 0x00040000U) << 2); + + err = get_user(save, (unsigned int*)addr); + err |= get_user(call, (unsigned int*)(addr+4)); + err |= get_user(nop, (unsigned int*)(addr+8)); + if (err) + break; + + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + struct vm_area_struct *vma; + unsigned long call_dl_resolve; + + down_read(¤t->mm->mmap_sem); + call_dl_resolve = current->mm->call_dl_resolve; + up_read(¤t->mm->mmap_sem); + if (likely(call_dl_resolve)) + goto emulate; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_dl_resolve) { + call_dl_resolve = current->mm->call_dl_resolve; + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + goto emulate; + } + + call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_dl_resolve & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + pax_insert_vma(vma, call_dl_resolve); + current->mm->call_dl_r