diff -u -p -L linux.2342.current/CREDITS.orig linux.2342.current/CREDITS --- linux.2342.current/CREDITS +++ linux.2342.current/CREDITS Sun Feb 6 23:02:30 2000 @@ -171,6 +171,17 @@ S: ul. Koscielna 12a S: 62-300 Wrzesnia S: Poland +N: Fred Barnes +E: frmb2@ukc.ac.uk +W: http://teddy.xylene.com/ +D: Various parport/ppdev hacks/fixes +D: Some sysmp() stuff +S: Computing Laboratory +S: University of Kent at Canterbury +S: Canterbury +S: KENT +S: CT2 1NZ + N: Paul Barton-Davis E: pbd@op.net D: Driver for WaveFront soundcards (Turtle Beach Maui, Tropez, Tropez+) @@ -915,6 +926,12 @@ E: jauderho@carumba.com W: http://www.carumba.com/ D: bug toaster (A1 sauce makes all the difference) D: Random linux hacker + +N: Tim Hockin +E: thockin@isunix.it.ilstu.edu +W: http://isunix.it.ilstu.edu +D: Pset / sysmp support +S: (in flux) N: Dirk Hohndel E: hohndel@suse.de diff -u -p -L linux.2342.current/Documentation/Configure.help.orig linux.2342.current/Documentation/Configure.help --- linux.2342.current/Documentation/Configure.help +++ linux.2342.current/Documentation/Configure.help Sun Feb 6 23:05:38 2000 @@ -125,6 +125,36 @@ CONFIG_X86_UP_IOAPIC on IO-APIC-less systems with no slowdown at all. SMP kernels include IO-APIC support unconditionally. +Verbose processor set messages +CONFIG_SMP_PSET_VERBOSE + Pset enables the kernel to assign processes to run on a single CPU + or set of CPUs (a set of processors => pset). The process can then + request to be put on a specific processor set via the sysmp() + system call. This option will give you slightly more verbose status + and error messages about SMP processor set support at boot time. + + For REALLY verbose messages, also set CONFIG_SMP_PSET_DEBUG to 1 in + include/linux/pset.h. If you are already getting more messages than + you care to see, perhaps CONFIG_SMP_PSET_DEBUG is already 1, in + which case you can set it to 0 to quiet it down. + + See also: Documentation/pset.txt and the home web site at + http://isunix.it.ilstu.edu/~thockin/pset/ + + If you don't know what to do here, say N. + +Bigphysarea support +CONFIG_BIGPHYSAREA + The bigphysarea patch allows a large amount of contiguous physical + memory to be reserved at boot time. This is generally useful to + video grabbers, high-speed A/D converters and scatter/gather NICs. + + See also: Documentation/bigphysarea.txt, and the home web site at + http://www.uni-paderborn.de/fachbereich/AG/heiss/linux/bigphysarea.html + + To reserve memory, specify bigphysarea= in the kernel boot + parameters. The useage can be monitored through /proc/bigphysarea. + Kernel math emulation CONFIG_MATH_EMULATION Linux can emulate a math coprocessor (used for floating point diff -u -p -L linux.2342.current/Documentation/bigphysarea.txt.orig linux.2342.current/Documentation/bigphysarea.txt --- linux.2342.current/Documentation/bigphysarea.txt +++ linux.2342.current/Documentation/bigphysarea.txt Sun Feb 6 23:00:00 2000 @@ -0,0 +1,38 @@ +Big physical area patch +======================= + +This is a patch against Linux 2.3.36 for the `bigphysarea' memory +allocation routines. This code allows you to reserve a large portion +of contiguous physical memory at boot time which can be +allocated/deallocated by kernel drivers. + +This sort of hack is necessary for devices such as RAM-less video +framegrabbers which need a big chunk of contiguous physical RAM, +larger than whatever get_free_pages or kmalloc can provide. Also, this +memory is safe to remap_page_range() into user space, e.g., for +mmap(). + +To use: + Use the boot option + bigphysarea= + e.g. by adding a line + append="bigphysarea=1024" + to your /etc/lilo.conf to specify the number of pages to + reserve. If you don't use this option then no pages will + be reserved. Usage can be monitored through the proc + filesystem, just type 'cat /proc/bigphysarea' + +This code is based on code from M. Welsh (mdw@cs.cornell.edu). + +Questions, comments, bug reports? Mail butenuth@uni-paderborn.de or +look at + +http://www.uni-paderborn.de/fachbereich/AG/heiss/linux/bigphysarea.html + +for a new version. + +Happy hacking! +Roger Butenuth + +2.3.36 hack by Fred Barnes (fred@xylene.com) + diff -u -p -L linux.2342.current/Documentation/pset.txt.orig linux.2342.current/Documentation/pset.txt --- linux.2342.current/Documentation/pset.txt +++ linux.2342.current/Documentation/pset.txt Sun Feb 6 23:00:00 2000 @@ -0,0 +1,24 @@ +Watch this space for more pset documentation. +http://isunix.it.ilstu.edu/~thockin/pset/ +------------------------------------------------------------------------ +In order to do anything useful with this, you should get the pset-utils +package from http://isunix.it.ilstu.edu/~thockin/pset/. The most current +version is pset-utils-0.61 + +About SMP processor set support (experimental) +----------------------------------------------- +This enables the kernel to assign every process to a single CPU or +group of CPUs (a set of processors => pset). The process can then +request to be put on a specific processor set via the sysmp() +system call. Library wrappers for this system call are (or will be) +available for maximum portability between OSes. This will also +activate a new magic SysRq key (if you have that option on) - 'C' +to reset all CPUs and processes. + +This will enlarge your kernel by about 5 KB. + +See also: Documentation/pset.txt and the home web site at + http://isunix.it.ilstu.edu/~thockin/pset/ + +http://isunix.it.ilstu.edu/~thockin/pset/ +thockin@isunix.it.ilstu.edu diff -u -p -L linux.2342.current/MAINTAINERS.orig linux.2342.current/MAINTAINERS --- linux.2342.current/MAINTAINERS +++ linux.2342.current/MAINTAINERS Sun Feb 6 23:00:00 2000 @@ -796,6 +796,12 @@ M: atong@uiuc.edu L: linux-fbdev@vcuser.vc.union.edu S: Maintained +PSET SUPPORT +P: Tim Hockin +M: thockin@isunix.it.ilstu.edu +W: http://isunix.it.ilstu.edu/~thockin/pset +S: Maintained + RAYLINK/WEBGEAR 802.11 WIRELESS LAN DRIVER P: Corey Thomas M: corey@world.std.com diff -u -p -L linux.2342.current/arch/i386/config.in.orig linux.2342.current/arch/i386/config.in --- linux.2342.current/arch/i386/config.in +++ linux.2342.current/arch/i386/config.in Sun Feb 6 23:23:15 2000 @@ -70,7 +70,10 @@ if [ "$CONFIG_SMP" != "y" ]; then define_bool CONFIG_X86_IO_APIC y define_bool CONFIG_X86_LOCAL_APIC y fi +else + bool ' Verbose processor set messages' CONFIG_SMP_PSET_VERBOSE fi +bool 'Bigphysarea support' CONFIG_BIGPHYSAREA endmenu mainmenu_option next_comment diff -u -p -L linux.2342.current/arch/i386/kernel/entry.S.orig linux.2342.current/arch/i386/kernel/entry.S --- linux.2342.current/arch/i386/kernel/entry.S +++ linux.2342.current/arch/i386/kernel/entry.S Sun Feb 6 23:13:14 2000 @@ -618,6 +618,7 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_setfsuid) /* 215 */ .long SYMBOL_NAME(sys_setfsgid) .long SYMBOL_NAME(sys_pivot_root) + .long SYMBOL_NAME(sys_sysmp) /* @@ -626,6 +627,6 @@ ENTRY(sys_call_table) * entries. Don't panic if you notice that this hasn't * been shrunk every time we add a new system call. */ - .rept NR_syscalls-217 + .rept NR_syscalls-218 .long SYMBOL_NAME(sys_ni_syscall) .endr diff -u -p -L linux.2342.current/drivers/char/sysrq.c.orig linux.2342.current/drivers/char/sysrq.c --- linux.2342.current/drivers/char/sysrq.c +++ linux.2342.current/drivers/char/sysrq.c Sun Feb 6 23:00:00 2000 @@ -24,6 +24,10 @@ #include +#ifdef __SMP__ +#include +#endif + extern void wakeup_bdflush(int); extern void reset_vc(unsigned int); extern int console_loglevel; @@ -129,6 +133,10 @@ void handle_sysrq(int key, struct pt_reg send_sig_all(SIGKILL, 1); orig_log_level = 8; break; + case 'c': + printk("Enable CPUs and Reset Psets\n"); + pset_reset_psets(); + break; default: /* Unknown: help */ if (kbd) printk("unRaw "); @@ -139,6 +147,9 @@ void handle_sysrq(int key, struct pt_reg printk("Boot "); if (sysrq_power_off) printk("Off "); +#ifdef __SMP__ + printk("Cpus/psets "); +#endif printk("Sync Unmount showPc showTasks showMem loglevel0-8 tErm kIll killalL\n"); /* Don't use 'A' as it's handled specially on the Sparc */ } diff -u -p -L linux.2342.current/fs/proc/base.c.orig linux.2342.current/fs/proc/base.c --- linux.2342.current/fs/proc/base.c +++ linux.2342.current/fs/proc/base.c Sun Feb 6 23:15:09 2000 @@ -11,6 +11,8 @@ * go into icache. We cache the reference to task_struct upon lookup too. * Eventually it should become a filesystem in its own. We don't use the * rest of procfs anymore. + * + * Pset stuff (C) 2000 Tim Hockin / Fred Barnes */ #include @@ -23,6 +25,10 @@ #include #include +#ifdef __SMP__ +#include +#endif + /* * For hysterical raisins we keep the same inumbers as in the old procfs. * Feel free to change the macro below - just keep the range distinct from @@ -498,6 +504,7 @@ enum pid_directory_inos { PROC_PID_STAT, PROC_PID_STATM, PROC_PID_MAPS, + PROC_PID_PSET, PROC_PID_CPU, PROC_PID_FD_DIR = 0x8000, /* 0x8000-0xffff */ }; @@ -512,6 +519,7 @@ static struct pid_entry base_stuff[] = { E(PROC_PID_STATM, "statm", S_IFREG|S_IRUGO), #ifdef __SMP__ E(PROC_PID_CPU, "cpu", S_IFREG|S_IRUGO), + E(PROC_PID_PSET, "pset", S_IFREG|S_IRUGO), #endif E(PROC_PID_MAPS, "maps", S_IFREG|S_IRUGO), E(PROC_PID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), @@ -863,6 +871,10 @@ static struct dentry *proc_base_lookup(s case PROC_PID_CPU: inode->i_op = &proc_info_inode_operations; inode->u.proc_i.op.proc_read = proc_pid_cpu; + break; + case PROC_PID_PSET: + inode->i_op = &proc_info_inode_operations; + inode->u.proc_i.op.proc_read = pset_get_proc_pid_pset; break; #endif case PROC_PID_MEM: diff -u -p -L linux.2342.current/fs/proc/proc_misc.c.orig linux.2342.current/fs/proc/proc_misc.c --- linux.2342.current/fs/proc/proc_misc.c +++ linux.2342.current/fs/proc/proc_misc.c Sun Feb 6 23:00:00 2000 @@ -9,6 +9,8 @@ * there. I took this into a separate file and switched the thing to generic * proc_file_inode_operations, leaving in array.c only per-process stuff. * Inumbers allocation made dynamic (via create_proc_entry()). AV, May 1999. + * + * Pset additions (C) 2000 Tim Hockin / Fred Barnes */ #include @@ -34,6 +36,10 @@ #include #include +#ifdef __SMP__ +#include +#endif + #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) @@ -65,6 +71,9 @@ extern int get_swaparea_info (char *); #ifdef CONFIG_SGI_DS1286 extern int get_ds1286_status(char *); #endif +#ifdef CONFIG_BIGPHYSAREA +extern int get_bigphysarea_info (char *); +#endif static int loadavg_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -215,6 +224,46 @@ static int cpuinfo_read_proc(char *page, return len; } +#ifdef __SMP__ +static int pset_psets_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = pset_get_proc_psets(page); + if (len <= off+count) *eof = 1; + *start = page + off; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; +} + +static int pset_cpustats_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = pset_get_proc_cpustats(page); + if (len <= off+count) *eof = 1; + *start = page + off; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; +} +#endif + +#ifdef CONFIG_BIGPHYSAREA +static int bigphysarea_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = get_bigphysarea_info(page); + if (len <= off+count) *eof = 1; + *start = page + off; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; +} +#endif + #ifdef CONFIG_PROC_HARDWARE static int hardware_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -628,6 +677,13 @@ void proc_misc_init(void) {"meminfo", meminfo_read_proc}, {"version", version_read_proc}, {"cpuinfo", cpuinfo_read_proc}, +#ifdef __SMP__ + {"psets", pset_psets_read_proc}, + {"cpustats", pset_cpustats_read_proc}, +#endif +#ifdef CONFIG_BIGPHYSAREA + {"bigphysarea", bigphysarea_read_proc}, +#endif #ifdef CONFIG_PROC_HARDWARE {"hardware", hardware_read_proc}, #endif diff -u -p -L linux.2342.current/include/asm-i386/unistd.h.orig linux.2342.current/include/asm-i386/unistd.h --- linux.2342.current/include/asm-i386/unistd.h +++ linux.2342.current/include/asm-i386/unistd.h Sun Feb 6 23:16:33 2000 @@ -222,6 +222,7 @@ #define __NR_setfsuid32 215 #define __NR_setfsgid32 216 #define __NR_pivot_root 217 +#define __NR_sysmp 218 /* user-visible error numbers are in the range -1 - -124: see */ diff -u -p -L linux.2342.current/include/linux/bigphysarea.h.orig linux.2342.current/include/linux/bigphysarea.h --- linux.2342.current/include/linux/bigphysarea.h +++ linux.2342.current/include/linux/bigphysarea.h Sun Feb 6 23:00:00 2000 @@ -0,0 +1,32 @@ +/* linux/mm/bigphysarea.h, M. Welsh (mdw@cs.cornell.edu) + * Copyright (c) 1996 by Matt Welsh. + * Extended by Roger Butenuth (butenuth@uni-paderborn.de), October 1997 + * + * This is a set of routines which allow you to reserve a large (?) + * amount of physical memory at boot-time, which can be allocated/deallocated + * by drivers. This memory is intended to be used for devices such as + * video framegrabbers which need a lot of physical RAM (above the amount + * allocated by kmalloc). This is by no means efficient or recommended; + * to be used only in extreme circumstances. + * + */ + +#ifndef __LINUX_BIGPHYSAREA_H +#define __LINUX_BIGPHYSAREA_H + +#include + +extern caddr_t bigphysarea; + +/* original interface */ +extern void bigphysarea_setup(int pages); +extern void bigphysarea_init(void); +extern caddr_t bigphysarea_alloc(int size); +extern void bigphysarea_free(caddr_t addr, int size); + +/* new interface */ +extern caddr_t bigphysarea_alloc_pages(int count, int align, int priority); +extern void bigphysarea_free_pages(caddr_t base); + +#endif __LINUX_BIGPHYSAREA_H + diff -u -p -L linux.2342.current/include/linux/capability.h.orig linux.2342.current/include/linux/capability.h --- linux.2342.current/include/linux/capability.h +++ linux.2342.current/include/linux/capability.h Sun Feb 6 23:00:00 2000 @@ -227,6 +227,7 @@ typedef __u32 kernel_cap_t; /* Allow enabling/disabling tagged queuing on SCSI controllers and sending arbitrary SCSI commands */ /* Allow setting encryption key on loopback filesystem */ +/* Allow manipulation of psets */ #define CAP_SYS_ADMIN 21 @@ -239,6 +240,7 @@ typedef __u32 kernel_cap_t; /* Allow use of FIFO and round-robin (realtime) scheduling on own processes and setting the scheduling algorithm used by another process. */ +/* Allow sysmp(MP_MUSTRUN_PID) and sysmp(MP_RUNANYWHERE_PID) commands */ #define CAP_SYS_NICE 23 diff -u -p -L linux.2342.current/include/linux/pset.h.orig linux.2342.current/include/linux/pset.h --- linux.2342.current/include/linux/pset.h +++ linux.2342.current/include/linux/pset.h Sun Feb 6 23:00:00 2000 @@ -0,0 +1,180 @@ +#ifndef __LINUX_PSET_H +#define __LINUX_PSET_H + +/* + * Generic processor set support + * Tim Hockin 1998-1999 + * based on work by Stuart Herbert (S.Herbert@sheffield.ac.uk) + */ + +#include +#include +#include +#include + +#define CONFIG_SMP_PSET_DEBUG 1 + +#if defined(CONFIG_SMP_PSET_VERBOSE) && CONFIG_SMP_PSET_DEBUG +#define PRINTD(fmt, args...) printk("Pset: "##fmt, ##args) +#else +#define PRINTD(fmt, args...) +#endif + +#define PSET_VERSION "0.65" +#define PSET_MASTER_PSET 0xFFFF + +/* pset flags */ +#define PSET_FL_MASTER 0x00000001 /* master CPU set */ +#define PSET_FL_CPU 0x00000002 /* single CPU set */ +#define PSET_FL_SYS 0x00000004 /* system CPU set */ +#define PSET_FL_NONPREEMPTIVE 0x00000008 /* non-preemptive CPU set */ +#define PSET_FL_USER 0x00000010 /* user-created CPU set */ + +/* non-enabled CPU prio boosts */ +#define PSET_CPU_RESTRICTED_BOOST 15 +#define PSET_CPU_ISOLATED_BOOST 25 +#define PSET_CPU_NONPREEMPTIVE_BOOST 50 + +/* macros */ +#define pset_is_master_pset(p) (p->flags & PSET_FL_MASTER) +#define pset_is_cpu_pset(p) (p->flags & PSET_FL_CPU) +#define pset_is_sys_pset(p) (p->flags & PSET_FL_SYS) +#define pset_is_nonpreemptive_pset(p) (p->flags & PSET_FL_NONPREEMPTIVE) +#define pset_is_user_pset(p) (p->flags & PSET_FL_USER) +#define pset_is_ro_pset(p) \ + (pset_is_master_pset(p) || pset_is_cpu_pset(p) || pset_is_sys_pset(p)) +#define pset_is_valid_cpumask(m) ((m & pset_cpumask_all) == m) +#define pset_preen_cpumask(m) (m & pset_cpumask_all) +#define pset_cpu_enabled_in_pset(c,p) (p->cpumask & (cpumask_t)1<dis_cpumask & (cpumask_t)1<cpumask == ((cpumask_t)1<flags ^= (pset_cpustats[c]->flags & f)) +#define pset_set_cpu_flag(c,f) (pset_cpustats[c]->flags |= f) + +struct pset_struct { + int id; + cpumask_t cpumask; /* active CPUs in this set */ + cpumask_t dis_cpumask; /* inactive CPUs in this set */ + unsigned int flags; /* flags for this set */ + atomic_t refcount; /* count of processes with this set */ + struct pset_struct *next; + struct pset_struct *prev; +}; + +struct cpustat_struct { + int id; + int flags; /* flags for this cpu */ + atomic_t refcount; /* count of sets with this cpu */ +}; + +/* externally visible functions */ +extern void pset_init(void) __init; +extern struct pset_struct *pset_allocate_pset(int, unsigned int); +extern void pset_deallocate_pset(struct pset_struct *); +extern void pset_add_pset_to_list(struct pset_struct *); +extern void pset_remove_pset_from_list(struct pset_struct *); +extern void pset_add_task_to_pset(struct task_struct *, struct pset_struct *); +extern void pset_remove_task_from_pset(struct task_struct *); +extern void pset_add_cpus_to_pset(cpumask_t, struct pset_struct *); +extern void pset_remove_cpus_from_pset(cpumask_t, struct pset_struct *); +extern void pset_disable_cpus_in_pset(cpumask_t, struct pset_struct *); +extern void pset_enable_cpus_in_pset(cpumask_t, struct pset_struct *); +extern void pset_reset_psets(void); +extern int pset_get_proc_psets(char *); +extern int pset_get_proc_pid_pset(struct task_struct *, char *); +extern int pset_get_proc_cpustats(char *); + +extern int sys_sysmp(int, int, int, int, unsigned long); + +/* extern vars */ +extern struct pset_struct *pset_master; +extern struct pset_struct *pset_list; +extern cpumask_t pset_cpumask_all; +extern struct cpustat_struct *pset_cpustats[NR_CPUS]; +extern rwlock_t pset_lock; +extern rwlock_t pset_cpustat_lock; + +/* inline functions */ +/* ************************************************************ + * pset_not_implemented + * a requested function was not yet implemented + * + * return -ENOSYS + * ***********************************************************/ +extern inline int pset_not_implemented(void) +{ + PRINTD("pset_not_implemented()\n"); + + return -ENOSYS; +} + +/* ************************************************************ + * pset_find_pset + * find a pset by its id + * + * return a pointer to the requested pset_struct + * return NULL if the requested pset does not exist + * ***********************************************************/ +extern inline struct pset_struct *pset_find_pset(int id) +{ + struct pset_struct *pset = pset_list; + + PRINTD("pset_find_pset(%d)\n", id); + + if (id == PSET_MASTER_PSET) { + return pset_master; + } + if (id < 0) { + return NULL; + } + + /* the list should be ordered */ + while (pset && pset->id <= id) { + if (pset->id == id) { + return pset; + } else { + pset = pset->next; + } + } + + return NULL; +} + +/* ************************************************************ + * pset_find_new_pset_id + * find an available pset id + * + * return a positive integer, the pset id, on success + * return -1 when the list is full + * ***********************************************************/ +extern inline int pset_find_new_pset_id(void) +{ + struct pset_struct *pset; + int i; + + PRINTD("pset_find_new_pset_id()\n"); + + i = pset_list->id; + pset = pset_list->next; + + /* + * find the first open spot in the list. we don't look for the + * end, because pset_master is the terminating entry + */ + while (pset) { + /* is there a gap between prev and curr */ + if (pset->id - i > 1) + return i+1; + + i = pset->id; + pset = pset->next; + } + + return -1; +} + +#endif /* ifndef __LINUX_PSET_H */ diff -u -p -L linux.2342.current/include/linux/sched.h.orig linux.2342.current/include/linux/sched.h --- linux.2342.current/include/linux/sched.h +++ linux.2342.current/include/linux/sched.h Sun Feb 6 23:23:38 2000 @@ -283,6 +283,8 @@ struct task_struct { struct list_head run_list; struct task_struct *next_task, *prev_task; int last_processor; + struct pset_struct *pset; + int pset_passedover; /* task state */ struct linux_binfmt *binfmt; @@ -399,6 +401,7 @@ struct task_struct { /* run_list */ LIST_HEAD_INIT(init_task.run_list), \ /* next_task */ &init_task,&init_task, \ /* last_proc */ 0, \ +/* pset */ NULL, 0, \ /* binfmt */ NULL, \ /* ec,brk... */ 0,0,0,0,0,0, \ /* pid etc.. */ 0,0,0,0,0, \ diff -u -p -L linux.2342.current/include/linux/sysmp.h.orig linux.2342.current/include/linux/sysmp.h --- linux.2342.current/include/linux/sysmp.h +++ linux.2342.current/include/linux/sysmp.h Sun Feb 6 23:00:00 2000 @@ -0,0 +1,84 @@ +#ifndef __LINUX_SYSMP_H +#define __LINUX_SYSMP_H + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +/* + * Support for sysmp() system call + * Tim Hockin 1998-1999 + * based on work by Stuart Herbert (S.Herbert@sheffield.ac.uk) + */ + +/* type for CPU bitmasks */ +typedef unsigned long cpumask_t; + +/* this is for SGI compatibility */ +typedef cpumask_t sbv_t; + +/* pda_stat structure (for MP_STAT) */ +struct pda_stat { + int p_cpuid; /* processor ID */ + int p_flags; /* various flags */ + int p_count; /* count of sets with this cpu */ +}; + +/* values for p_flags */ +#define CPUF_ENABLED 0x00000001 /* processor allowed to sched procs */ +#define CPUF_RESTRICTED 0x00000002 /* processor is restricted */ +#define CPUF_ISOLATED 0x00000004 /* processor is isolated */ +#define CPUF_NONPREEMPTIVE 0x00000008 /* processor is non-preemptive */ +#define CPUF_DISABLED 0x00000010 /* processor is disabled */ + +/* for SGI compatibility */ +#define PDAF_ENABLED CPUF_ENABLED +#define PDAF_ISOLATED CPUF_ISOLATED +#define PDAF_NONPREEMPTIVE CPUF_NONPREEMPTIVE +#define PDAF_MASTER 0 +#define PDAF_CLOCK 0 +#define PDAF_FASTCLOCK 0 +#define PDAF_BROADCAST_OFF 0 + +/* sysmp commands */ +#define MP_PGSIZE 1 /* system pagesize */ +#define MP_SCHED 2 /* schedctl - not yet implemented */ +#define MP_NPROCS 3 /* return # processors */ +#define MP_NAPROCS 4 /* return # active processors */ +#define MP_CURPROC 5 /* return the current processor */ +#define MP_STAT 6 /* return a processor status */ +#define MP_RESTRICT 7 /* restrict cpu to mustrun processes */ +#define MP_EMPOWER 8 /* allow cpu to run any process */ +#define MP_ISOLATE 9 /* restrict cpu to cpu pset */ +#define MP_UNISOLATE 10 /* allow cpu to run processes */ +#define MP_DISABLE 11 /* restrict CPU from executing */ +#define MP_ENABLE 12 /* allow CPU to continue running */ +#define MP_NONPREEMPTIVE 13 /* restrict a CPU to related procs */ +#define MP_PREEMPTIVE 14 /* enable normal scheduling */ +#define MP_CLOCK 15 /* ** not yet implemented */ +#define MP_FASTCLOCK 16 /* ** not yet implemented */ +#define MP_MUSTRUN 17 /* force current pid to a pset */ +#define MP_MUSTRUN_PID 18 /* force a pid to a pset */ +#define MP_GETMUSTRUN 19 /* return pset num of current proc */ +#define MP_GETMUSTRUN_PID 20 /* return pset num of a process */ +#define MP_RUNANYWHERE 21 /* run current pid on any cpu */ +#define MP_RUNANYWHERE_PID 22 /* run a pid on any cpu */ +#define MP_KERNADDR 23 /* ** not yet implemented */ +#define MP_SASZ 24 /* ** not yet implemented */ +#define MP_SAGET 25 /* ** not yet implemented */ +#define MP_SAGET1 26 /* ** not yet implemented */ +#define MP_PSET 27 /* pset subcommand */ + +/* MP_PSET sub-commands */ +#define MPPS_CREATE 1 /* create a new pset */ +#define MPPS_DELETE 2 /* delete an existing pset */ +#define MPPS_ADD 3 /* add processors to a pset */ +#define MPPS_REMOVE 4 /* remove processors from a set */ + +/* related values */ +#define PSET_ID_NEW -1 /* find a new pset id for us */ +#define PSET_ALL_CPUS (cpumask_t)ULONG_MAX /* every CPU */ + +#endif /* ifndef __LINUX_SYSMP_H */ diff -u -p -L linux.2342.current/init/main.c.orig linux.2342.current/init/main.c --- linux.2342.current/init/main.c +++ linux.2342.current/init/main.c Sun Feb 6 23:00:00 2000 @@ -7,6 +7,8 @@ * Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96 * Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96 * Simplified starting of init: Michael A. Griffith + * Added pset support - Tim Hockin Feb '99 + * Added bigphysarea support - Roger Butenuth Jan 2000 */ #define __KERNEL_SYSCALLS__ @@ -65,6 +67,14 @@ extern void nubus_init(void); #include #endif +#ifdef __SMP__ +#include +#endif + +#ifdef CONFIG_BIGPHYSAREA +#include +#endif + /* * Versions of gcc older than that listed below may actually compile * and link okay, but the end product can have subtle run time bugs. @@ -174,6 +184,19 @@ static int __init profile_setup(char *st __setup("profile=", profile_setup); +#ifdef CONFIG_BIGPHYSAREA + +static int __init own_bigphysarea_setup(char *str) +{ + int par; + if (get_option(&str,&par)) { + bigphysarea_setup(par); + } + return 1; +} + +__setup("bigphysarea=", own_bigphysarea_setup); +#endif static struct dev_name_struct { const char *name; @@ -448,6 +471,8 @@ static void __init smp_init(void) smp_boot_cpus(); smp_threads_ready=1; smp_commence(); + /* Setup pset stuff */ + pset_init(); } #endif @@ -506,6 +531,10 @@ asmlinkage void __init start_kernel(void } #endif #endif /* 0000 */ +#ifdef CONFIG_BIGPHYSAREA + /* Using the bootmem interface for getting the pages */ + bigphysarea_init(); +#endif mem_init(); kmem_cache_sizes_init(); #ifdef CONFIG_PROC_FS diff -u -p -L linux.2342.current/kernel/Makefile.orig linux.2342.current/kernel/Makefile --- linux.2342.current/kernel/Makefile +++ linux.2342.current/kernel/Makefile Sun Feb 6 23:00:00 2000 @@ -13,7 +13,7 @@ O_TARGET := kernel.o O_OBJS = sched.o dma.o fork.o exec_domain.o panic.o printk.o sys.o \ module.o exit.o itimer.o info.o time.o softirq.o resource.o \ - sysctl.o acct.o capability.o ptrace.o timer.o + sysctl.o acct.o capability.o ptrace.o timer.o pset.o sysmp.o OX_OBJS += signal.o diff -u -p -L linux.2342.current/kernel/exit.c.orig linux.2342.current/kernel/exit.c --- linux.2342.current/kernel/exit.c +++ linux.2342.current/kernel/exit.c Sun Feb 6 23:00:00 2000 @@ -12,6 +12,9 @@ #ifdef CONFIG_BSD_PROCESS_ACCT #include #endif +#ifdef __SMP__ +#include +#endif #include #include @@ -41,6 +44,9 @@ static void release(struct task_struct * free_uid(p); unhash_process(p); +#ifdef __SMP__ + pset_remove_task_from_pset(p); +#endif release_thread(p); current->cmin_flt += p->min_flt + p->cmin_flt; current->cmaj_flt += p->maj_flt + p->cmaj_flt; diff -u -p -L linux.2342.current/kernel/fork.c.orig linux.2342.current/kernel/fork.c --- linux.2342.current/kernel/fork.c +++ linux.2342.current/kernel/fork.c Sun Feb 6 23:00:00 2000 @@ -18,6 +18,10 @@ #include #include +#ifdef __SMP__ +#include +#endif + #include #include #include @@ -675,6 +679,16 @@ int do_fork(unsigned long clone_flags, u /* ?? should we just memset this ?? */ for(i = 0; i < smp_num_cpus; i++) p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0; + if (p->pset) { + atomic_inc(&(p->pset->refcount)); + } else { + /* this fails on bootup, so CYA */ + if (pset_master) { + write_lock_irq(&pset_lock); + pset_add_task_to_pset(p, pset_master); + write_unlock_irq(&pset_lock); + } + } spin_lock_init(&p->sigmask_lock); } #endif diff -u -p -L linux.2342.current/kernel/ksyms.c.orig linux.2342.current/kernel/ksyms.c --- linux.2342.current/kernel/ksyms.c +++ linux.2342.current/kernel/ksyms.c Sun Feb 6 23:22:00 2000 @@ -44,6 +44,10 @@ #include #include +#if defined(CONFIG_BIGPHYSAREA) +#include +#endif + #if defined(CONFIG_PROC_FS) #include #endif @@ -122,6 +126,19 @@ EXPORT_SYMBOL(init_mm); EXPORT_SYMBOL(kmap_high); EXPORT_SYMBOL(kunmap_high); EXPORT_SYMBOL(highmem_start_page); +#endif + +#ifdef CONFIG_BIGPHYSAREA +/* Symbols used by the SCI driver */ +EXPORT_SYMBOL(page_cache_size); +EXPORT_SYMBOL(zap_page_range); +EXPORT_SYMBOL(insert_vm_struct); + +/* Large physical area management */ +EXPORT_SYMBOL(bigphysarea_alloc); +EXPORT_SYMBOL(bigphysarea_free); +EXPORT_SYMBOL(bigphysarea_alloc_pages); +EXPORT_SYMBOL(bigphysarea_free_pages); #endif /* filesystem internal functions */ diff -u -p -L linux.2342.current/kernel/pset.c.orig linux.2342.current/kernel/pset.c --- linux.2342.current/kernel/pset.c +++ linux.2342.current/kernel/pset.c Sun Feb 6 23:28:15 2000 @@ -0,0 +1,644 @@ +/* + * Generic processor set support + * Tim Hockin 1998-1999 + * based on work by Stuart Herbert + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +/* private functions */ +void pset_dump_all_psets(void); +void pset_dump_pset(struct pset_struct *); +struct cpustat_struct *pset_allocate_cpustat(int); +void pset_dump_all_cpustats(void); +void pset_dump_cpustat(struct cpustat_struct *); + +/* globals */ +struct pset_struct *pset_master = NULL; +struct pset_struct *pset_list = NULL; +cpumask_t pset_cpumask_all = 0; +struct cpustat_struct *pset_cpustats[NR_CPUS]; +rwlock_t pset_lock = RW_LOCK_UNLOCKED; +rwlock_t pset_cpustat_lock = RW_LOCK_UNLOCKED; + +/* ************************************************************ + * pset_init + * set up the master and CPU psets + * NOTE: we don't need locks here - this is run exactly once. + * + * does not return a value + * ***********************************************************/ +void __init pset_init(void) +{ + int i; + + PRINTD("pset_init()\n"); + + printk("Pset: processor set support version %s\n", PSET_VERSION); + + /* create the master set */ + pset_master = pset_allocate_pset(PSET_MASTER_PSET, PSET_FL_MASTER); + if (!pset_master) { + panic("Unable to allocate memory for master pset!"); + } + pset_add_pset_to_list(pset_master); + + for (i = 0; i < smp_num_cpus; i++) { + struct pset_struct *new_pset; + int cpu; + + cpu = cpu_logical_map(i); + + /* create a new set for each CPU */ + new_pset = pset_allocate_pset(cpu, PSET_FL_CPU); + if (!new_pset) { + panic("Unable to create CPU set #%d!", cpu); + } + pset_add_pset_to_list(new_pset); + + /* allocate space for this CPU's stats */ + if (!pset_allocate_cpustat(cpu)) { + panic("Unable to create CPU stat #%d!", cpu); + } + + /* add this CPU to the right sets */ + pset_add_cpus_to_pset((cpumask_t)(1<cpumask | pset_master->dis_cpumask; + + printk("Pset: Created %d processor sets\n", i+1); +#ifdef CONFIG_SMP_PSET_VERBOSE + pset_dump_all_psets(); + pset_dump_all_cpustats(); +#endif +} + +/* ************************************************************ + * pset_reset_psets + * set all tasks to pset_master, enable all CPUs + * this is most likely called by the magic SysRq key + * + * does not return a value + * ***********************************************************/ +void pset_reset_psets(void) +{ + struct task_struct *p; + struct pset_struct *pset = pset_list; + int cpu; + int i; + + PRINTD("pset_reset_psets()\n"); + + /* assign all tasks to pset_master */ + read_lock(&tasklist_lock); + for_each_task(p) + pset_add_task_to_pset(p, pset_master); + read_unlock(&tasklist_lock); + + write_lock(&pset_lock); + write_lock(&pset_cpustat_lock); + + /* enable all CPUs - forcefully*/ + for (i = 0; i < smp_num_cpus ; i++) { + cpu = cpu_logical_map(i); + pset_unset_cpu_flag(cpu, 0xFFFF); + pset_set_cpu_flag(cpu, CPUF_ENABLED); + } + + /* enable all CPUs in all psets - again, forcefully*/ + while (pset) { + pset_enable_cpus_in_pset(pset_cpumask_all, pset); + pset = pset->next; + } + + write_unlock(&pset_cpustat_lock); + write_unlock(&pset_lock); + + printk("Pset: All tasks set to master pset, all CPUs enabled\n"); +} + + +/* ************************************************************ + * pset_dump_all_psets + * display a dump for every pset + * + * does not return a value + * ***********************************************************/ +void pset_dump_all_psets(void) +{ + struct pset_struct *pset = pset_list; + + PRINTD("pset_dump_all_psets()\n"); + + while (pset) { + pset_dump_pset(pset); + pset = pset->next; + } +} + +/* ************************************************************ + * pset_dump_pset + * display info about a pset + * + * does not return a value + * ***********************************************************/ +void pset_dump_pset(struct pset_struct *pset) +{ + int count = 0; + cpumask_t tmp; + + PRINTD("pset_dump_pset(pset id %d)\n", pset->id); + + tmp = pset->cpumask | pset->dis_cpumask; + while (tmp) { + if (tmp & 1) { + count++; + } + tmp >>= 1; + } + + printk("Pset: pset id: %5d, #CPUs: %5d, cpumask: 0x%08lX, flags: ", + pset->id, count, (pset->cpumask|pset->dis_cpumask)); + if (pset_is_master_pset(pset)) { + printk("master"); + } else if (pset_is_cpu_pset(pset)) { + printk("cpu"); + } else if (pset_is_sys_pset(pset)) { + printk("sys"); + } else if (pset_is_nonpreemptive_pset(pset)) { + printk("non-preemptive"); + } else if (pset_is_user_pset(pset)) { + printk("user"); + } + + printk("\n"); +} + +/* ************************************************************ + * pset_dump_all_cpustats + * display a dump for every cpustat + * + * does not return a value + * ***********************************************************/ +void pset_dump_all_cpustats(void) +{ + int i; + + PRINTD("pset_dump_all_cpustats()\n"); + + for (i = 0; i < NR_CPUS; i++) { + if (pset_cpustats[i]) { + pset_dump_cpustat(pset_cpustats[i]); + } + } +} + +/* ************************************************************ + * pset_dump_cpustat + * display a dump for a cpustat + * + * does not return a value + * ***********************************************************/ +void pset_dump_cpustat(struct cpustat_struct *cpustat) +{ + PRINTD("pset_dump_cpustat(cpustat id %d)\n", cpustat->id); + + printk("Pset: cpustat id: %5d, refcount: %5d, flags: ", + cpustat->id, atomic_read(&(cpustat->refcount))); + + if (cpustat->flags & CPUF_ENABLED) { + printk("enabled "); + } + if (cpustat->flags & CPUF_RESTRICTED) { + printk("restricted "); + } + if (cpustat->flags & CPUF_ISOLATED) { + printk("isolated "); + } + if (cpustat->flags & CPUF_DISABLED) { + printk("disabled "); + } + if (cpustat->flags & CPUF_NONPREEMPTIVE) { + printk("non-preemptive "); + } + printk("\n"); +} + +/* ************************************************************ + * pset_allocate_pset + * kmalloc some memory and set some fields + * + * return pointer to new pset_struct on success + * return NULL if no memory available + * ***********************************************************/ +struct pset_struct *pset_allocate_pset(int id, unsigned int flags) +{ + struct pset_struct *new_pset = NULL; + + PRINTD("pset_allocate_pset(%d, %u)\n", id, flags); + + new_pset = (struct pset_struct *)kmalloc(sizeof(struct pset_struct), + GFP_KERNEL); + + if (new_pset) { + new_pset->id = id; + new_pset->cpumask = (cpumask_t)0; + new_pset->dis_cpumask = (cpumask_t)0; + new_pset->flags = flags; + atomic_set(&(new_pset->refcount), 0); + new_pset->next = NULL; + new_pset->prev = NULL; + } + + return new_pset; +} + +/* ************************************************************ + * pset_deallocate_pset + * free a pset + * + * does not return a value + * ***********************************************************/ +void pset_deallocate_pset(struct pset_struct *pset) +{ + PRINTD("pset_deallocate_pset(pset id %d)\n", pset->id); + + pset_remove_cpus_from_pset(pset->cpumask|pset->dis_cpumask, pset); + pset_remove_pset_from_list(pset); + + kfree(pset); +} + +/* ************************************************************ + * pset_allocate_cpustat + * kmalloc some memory and set some fields + * + * return pointer to new cpustat_struct on success + * return NULL if no memory available + * ***********************************************************/ +struct cpustat_struct *pset_allocate_cpustat(int cpu) +{ + struct cpustat_struct *new_stat = NULL; + + PRINTD("pset_allocate_cpustat(%d)\n", cpu); + + new_stat = (struct cpustat_struct *)kmalloc(sizeof + (struct cpustat_struct), GFP_KERNEL); + + if (new_stat) { + new_stat->id = cpu; + new_stat->flags = CPUF_ENABLED; + atomic_set(&(new_stat->refcount), 0); + } + + pset_cpustats[cpu] = new_stat; + + return new_stat; +} + +/* ************************************************************ + * pset_add_pset_to_list + * Add a pset_struct to the ordered pset_list + * + * does not return a value + * ***********************************************************/ +void pset_add_pset_to_list(struct pset_struct *pset) +{ + struct pset_struct **tmp; + + PRINTD("pset_add_pset_to_list(pset id %d)\n", pset->id); + + tmp = &pset_list; + while (*tmp) { + if ((*tmp)->id < pset->id) { + tmp = &((*tmp)->next); + } else { + break; + } + } + + /* pset_list is implicitly handled */ + pset->next = *tmp; + if (*tmp) { + pset->prev = (*tmp)->prev; + (*tmp)->prev = pset; + } + *tmp = pset; +} + +/* ************************************************************ + * pset_remove_pset_from_list + * Remove a pset_struct from the ordered pset_list + * + * does not return a value + * ***********************************************************/ +void pset_remove_pset_from_list(struct pset_struct *pset) +{ + PRINTD("pset_remove_pset_from_list(pset id %d)\n", pset->id); + + /* explicitly handle this */ + if (pset_list == pset) { + pset_list = pset->next; + } + + if (pset->next) { + pset->next->prev = pset->prev; + } + if (pset->prev) { + pset->prev->next = pset->next; + } +} + +/* ************************************************************ + * pset_add_cpus_to_pset + * add a cpumask_t mask of cpus to a pset_struct + * + * does not return a value + * ***********************************************************/ +void pset_add_cpus_to_pset(cpumask_t cpus, struct pset_struct *pset) +{ + cpumask_t tmp; + int i = 0; + + PRINTD("pset_add_cpus_to_pset(%lu, pset id %d)\n", cpus, pset->id); + + /* + * only add cpus we don't have - this check is so we don't add one to + * cpumask, while it already is in dis_cpumask + */ + cpus ^= ((pset->cpumask|pset->dis_cpumask) & cpus); + pset->cpumask |= cpus; + + tmp = cpus; + while (tmp) { + if (cpus & (cpumask_t)1<refcount)); + /* make sure to obey the cpu restriction level */ + if (pset_cpustats[i]->flags != CPUF_ENABLED) + pset_disable_cpus_in_pset((cpumask_t)1<>= 1; + i++; + } +} + +/* ************************************************************ + * pset_remove_cpus_from_pset + * remove a cpumask_t mask of cpus from a pset_struct + * + * does not return a value + * ***********************************************************/ +void pset_remove_cpus_from_pset(cpumask_t cpus, struct pset_struct *pset) +{ + cpumask_t tmp; + int i = 0; + + PRINTD("pset_remove_cpus_from_pset(%lu, pset id %d)\n",cpus,pset->id); + + tmp = cpus & (pset->cpumask|pset->dis_cpumask); + + /* remove the cpus from the active and disable masks */ + pset->cpumask &= ~cpus; + pset->dis_cpumask &= ~cpus; + + while (tmp) { + if (cpus & (cpumask_t)1<refcount)); + } + tmp >>= 1; + i++; + } +} + +/* ************************************************************ + * pset_add_task_to_pset + * associate a pset with a task + * + * does not return a value + * ***********************************************************/ +void pset_add_task_to_pset(struct task_struct *p, struct pset_struct *pset) +{ + PRINTD("pset_add_task_to_pset(task id %d, pset id %d)\n", + p->pid ,pset->id); + + if (p->pset) { + pset_remove_task_from_pset(p); + } + + p->pset = pset; + atomic_inc(&(pset->refcount)); +} + +/* ************************************************************ + * pset_remove_task_from_pset + * disassociate a task from any pset + * this is in teh exit() path - try to keep it small + * + * does not return a value + * ***********************************************************/ +void pset_remove_task_from_pset(struct task_struct *p) +{ + /* this is an obnoxious message - this happens a LOT */ + /* PRINTD("pset_remove_task_from_pset(task id %d)\n",p->pid); */ + + if (!p->pset) + return; + + if (atomic_read(&(p->pset->refcount))) { + atomic_dec(&(p->pset->refcount)); + } else { + printk("Pset: Tried to decrement zero refcount for pset %d, + pid %d\n", p->pset->id, p->pid); + } + +#ifdef FIGURED_OUT_NONPREEMPTIVE + /* if this is a stolen set, and the last task, clean it up */ + /* FIXME: I don't like this here... */ + if (pset_is_nonpreemptive_pset(p->pset) + && !atomic_read(&(p->pset->refcount))) { + pset_remove_cpus_from_pset( + p->pset->cpumask|p->pset->dis_cpumask, p->pset); + kfree(p->pset); + } +#endif + + p->pset = NULL; +} + +/* ************************************************************ + * pset_disable_cpus_in_pset + * move a set of cpus from the cpumask to dis_cpumask field + * + * does not return a value + * ***********************************************************/ +void pset_disable_cpus_in_pset(cpumask_t cpus, struct pset_struct *pset) +{ + PRINTD("pset_disable_cpus_in_pset(%lu, pset id %d)\n", cpus, + pset->id); + + /* only deal with CPUs that are active in this set */ + cpus &= pset->cpumask; + + pset->cpumask &= ~cpus; + pset->dis_cpumask |= cpus; +} + +/* ************************************************************ + * pset_enable_cpus_in_pset + * move a set of cpus from the dis_cpumask to cpumask field + * + * does not return a value + * ***********************************************************/ +void pset_enable_cpus_in_pset(cpumask_t cpus, struct pset_struct *pset) +{ + PRINTD("pset_enable_cpus_in_pset(%lu, pset id %d)\n", cpus, + pset->id); + + /* only deal with CPUs that are disabled in this set */ + cpus &= pset->dis_cpumask; + + pset->dis_cpumask &= ~cpus; + pset->cpumask |= cpus; +} + +/* ************************************************************ + * pset_get_proc_psets + * fill buf with the output for /proc/psets + * + * returns the length of the buffer + * ***********************************************************/ +int pset_get_proc_psets(char *buf) +{ + int len; + struct pset_struct *pset; + + PRINTD("pset_get_proc_psets(buf)\n"); + + len=sprintf(buf, " Pset NumCPUs CPUs (active) (disabled) Refs Flags\n"); + + read_lock(&pset_lock); + for (pset = pset_list; pset; pset = pset->next) { + int count = 0; + cpumask_t tmp = pset->cpumask | pset->dis_cpumask; + + /* avoid overflowing buffer */ + if (len > EXEC_PAGESIZE - 100) { + break; + } + + while (tmp) { + if (tmp & 1) { + count++; + } + tmp >>= 1; + } + + len += sprintf(buf+len, "%5d %5d 0x%08lX 0x%08lx %5d ", + pset->id, count, pset->cpumask, pset->dis_cpumask, + atomic_read(&(pset->refcount))); + + if (pset_is_master_pset(pset)) { + len += sprintf(buf+len, "master\n"); + } else if (pset_is_cpu_pset(pset)) { + len += sprintf(buf+len, "cpu\n"); + } else if (pset_is_sys_pset(pset)) { + len += sprintf(buf+len, "sys\n"); + } else if (pset_is_nonpreemptive_pset(pset)) { + len += sprintf(buf+len, "non-preemptive\n"); + } else if (pset_is_user_pset(pset)) { + len += sprintf(buf+len, "user\n"); + } + } + read_unlock(&pset_lock); + + return len; +} + +/* ************************************************************ + * pset_get_proc_pid_pset + * fill buf with the output for /proc/PID/pset + * + * returns the length of the buffer + * Modified by Fred -- now takes task_struct*, instead of PID + * ***********************************************************/ +int pset_get_proc_pid_pset(struct task_struct *p, char *buf) +{ + int len; + + /* Do I still need tasklist_lock ? -- F. */ + read_lock(&tasklist_lock); + PRINTD("pset_get_proc_pid_pset(pid %d, buf)\n", p ? p->pid : -1); + if (!p || !p->mm) { + read_unlock(&tasklist_lock); + return 0; + } + + if (p->pset == NULL) { + len = sprintf(buf, "None\n"); + } else { + len = sprintf(buf, "%d\n", p->pset->id); + } + read_unlock(&tasklist_lock); + + return len; +} + +/* ************************************************************ + * pset_get_proc_cpustats + * fill the buffer with info for /proc/cpustats + * + * return the length of the buffer + * ***********************************************************/ +int pset_get_proc_cpustats(char *buf) +{ + int len; + int i; + + PRINTD("pset_get_proc_cpustats(buf)\n"); + + len=sprintf(buf, " CPU Refs Flags\n"); + + read_lock(&pset_cpustat_lock); + for (i = 0; i < NR_CPUS; i++) { + if (!pset_cpustats[i]) { + continue; + } + + len += sprintf(buf+len, "%5d %5d ", + pset_cpustats[i]->id, + atomic_read(&(pset_cpustats[i]->refcount))); + + if (pset_cpustats[i]->flags & CPUF_ENABLED) { + len += sprintf(buf+len, "enabled "); + } + if (pset_cpustats[i]->flags & CPUF_RESTRICTED) { + len += sprintf(buf+len, "restricted "); + } + if (pset_cpustats[i]->flags & CPUF_ISOLATED) { + len += sprintf(buf+len, "isolated "); + } + if (pset_cpustats[i]->flags & CPUF_DISABLED) { + len += sprintf(buf+len, "disabled "); + } + if (pset_cpustats[i]->flags & CPUF_NONPREEMPTIVE) { + len += sprintf(buf+len, "non-preemptive "); + } + len += sprintf(buf+len, "\n"); + } + read_unlock(&pset_cpustat_lock); + + return len; +} + + diff -u -p -L linux.2342.current/kernel/sched.c.orig linux.2342.current/kernel/sched.c --- linux.2342.current/kernel/sched.c +++ linux.2342.current/kernel/sched.c Sun Feb 6 23:00:00 2000 @@ -9,7 +9,9 @@ * make semaphores SMP safe * 1998-11-19 Implemented schedule_timeout() and related stuff * by Andrea Arcangeli - * 1998-12-28 Implemented better SMP scheduling by Ingo Molnar + * 1998-12-28 Implemented better SMP scheduling by Ingo Molnar + * 1999-04-03 Added pset handling - Tim Hockin + * 2000-01-06 Modified pset handling - Fred Barnes */ /* @@ -24,6 +26,9 @@ #include #include #include +#ifdef __SMP__ +#include +#endif #include #include @@ -84,6 +89,7 @@ struct kernel_stat kstat = { 0 }; #define idle_task(cpu) (init_tasks[cpu_number_map(cpu)]) #define can_schedule(p) (!(p)->has_cpu) +#define pset_can_schedule(p,cpu) (!(p->pset) || ((p->pset->cpumask)&(1<processor == this_cpu) weight += PROC_CHANGE_PENALTY; -#endif + + /* let's not deal with psets on the idle task */ + if (!p->pid) + goto pset_out; + + /* give p a good advantage if it has been passed over */ + weight += ((p->pset_passedover)*(PROC_CHANGE_PENALTY/2)); + + /* give an advantage if this cpu is in a non-enabled state */ + if (pset_cpustats[this_cpu]->flags != CPUF_ENABLED) + goto pset_cpu_not_enabled; +pset_out: +#endif /* __SMP__ */ /* .. and a slight advantage to the current MM */ if (p->mm == this_mm) @@ -147,6 +167,30 @@ static inline int goodness(struct task_s out: return weight; + +#ifdef __SMP__ +pset_cant_sched: + /* + * penalise p if it can't be run on this_cpu + * this has the effect of making us recalc counters + * more frequently if this_cpu is in a restricted state + */ + weight -= ((p->pset_passedover) * PROC_CHANGE_PENALTY); + goto pset_out; +pset_cpu_not_enabled: + switch (pset_cpustats[this_cpu]->flags) { + case CPUF_RESTRICTED: + weight += PSET_CPU_RESTRICTED_BOOST; + break; + case CPUF_ISOLATED: + weight += PSET_CPU_ISOLATED_BOOST; + break; + case CPUF_ISOLATED|CPUF_NONPREEMPTIVE: + weight += PSET_CPU_NONPREEMPTIVE_BOOST; + break; + } + goto pset_out; +#endif } /* @@ -445,6 +489,7 @@ asmlinkage void schedule(void) struct task_struct *prev, *next, *p; struct list_head *tmp; int this_cpu, c; + unsigned long ss_flags; if (!current->active_mm) BUG(); if (tq_scheduler) @@ -470,7 +515,7 @@ handle_bh_back: */ sched_data = & aligned_data[this_cpu].schedule_data; - spin_lock_irq(&runqueue_lock); + spin_lock_irqsave(&runqueue_lock, ss_flags); /* move an exhausted RR process to be last.. */ if (prev->policy == SCHED_RR) @@ -514,6 +559,16 @@ still_running_back: tmp = tmp->next; } +#ifdef __SMP__ + /* see if next is actually able to run on this_cpu */ + if (!pset_can_schedule(next, this_cpu)) + goto pset_cant_sched; + + /* we can happily schedule, passedover gets reset */ + next->pset_passedover = 0; +pset_cant_sched_back: +#endif + /* Do we need to re-calculate counters? */ if (!c) goto recalculate; @@ -639,6 +694,27 @@ scheduling_in_interrupt: printk("Scheduling in interrupt\n"); *(int *)0 = 0; return; + +#ifdef __SMP__ +pset_cant_sched: + /* increment this - next _SHOULD_ be running */ + next->pset_passedover++; + + /* see if we can run next now, just on another cpu */ + /* this also serves as a delay for other CPUs to grab this lock */ + reschedule_idle(next, ss_flags); + /* can't schedule -- get lock again */ + spin_lock_irqsave(&runqueue_lock, ss_flags); + + if (prev->pid) { + /* break a process context by running idle */ + next = idle_task(this_cpu); + goto pset_cant_sched_back; + } + + /* try again */ + goto repeat_schedule; +#endif } static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, const int sync) diff -u -p -L linux.2342.current/kernel/sysmp.c.orig linux.2342.current/kernel/sysmp.c --- linux.2342.current/kernel/sysmp.c +++ linux.2342.current/kernel/sysmp.c Sun Feb 6 23:28:42 2000 @@ -0,0 +1,1048 @@ +/* + * Support for sysmp() system call + * Tim Hockin 1998-1999 + * based on work by Stuart Herbert (S.Herbert@sheffield.ac.uk) + */ + +#include + +#include +#include +#include +#include +#include +#include + + +/* private functions */ +int sysmp_pgsize(void); +int sysmp_nprocs(void); +int sysmp_naprocs(void); +int sysmp_curproc(void); +int sysmp_stat(struct pda_stat *); +int sysmp_restrict(int); +int sysmp_empower(int); +int sysmp_isolate(int); +int sysmp_unisolate(int); +int sysmp_disable(int); +int sysmp_enable(int); +int sysmp_nonpreemptive(int); +int sysmp_preemptive(int); +int sysmp_mustrun(int); +int sysmp_mustrun_pid(int, pid_t); +int sysmp_getmustrun(void); +int sysmp_getmustrun_pid(pid_t); +int sysmp_runanywhere(void); +int sysmp_runanywhere_pid(pid_t); +int sysmp_pset_create(int, cpumask_t); +int sysmp_pset_delete(int); +int sysmp_pset_add(int, cpumask_t); +int sysmp_pset_remove(int, cpumask_t); + +/* ************************************************************ + * sys_sysmp + * The system call between us and the outside world + * + * return a positive value or zero on success (depends on cmd) + * return a negative value on error, indicating the error + * ***********************************************************/ +asmlinkage int sys_sysmp(int cmd, int i1, int i2, int i3, unsigned long ul) +{ + int result = 0; + + PRINTD("sys_sysmp(%d, %d, %d, %d, %lu)\n", cmd, i1, + i2, i3, ul); + + /* All functions are stubbed here, but not all are implemented */ + switch (cmd) { + case MP_PGSIZE: + result = sysmp_pgsize(); + break; + + case MP_SCHED: + result = pset_not_implemented(); + break; + + case MP_NPROCS: + result = sysmp_nprocs(); + break; + + case MP_NAPROCS: + result = sysmp_naprocs(); + break; + + case MP_CURPROC: + result = sysmp_curproc(); + break; + + case MP_STAT: + read_lock(&pset_cpustat_lock); + result = sysmp_stat((struct pda_stat *)ul); + read_unlock(&pset_cpustat_lock); + break; + + case MP_RESTRICT: + write_lock(&pset_lock); + write_lock(&pset_cpustat_lock); + result = sysmp_restrict(i1); + write_unlock(&pset_cpustat_lock); + write_unlock(&pset_lock); + break; + + case MP_EMPOWER: + write_lock(&pset_lock); + write_lock(&pset_cpustat_lock); + result = sysmp_empower(i1); + write_unlock(&pset_cpustat_lock); + write_unlock(&pset_lock); + break; + + case MP_ISOLATE: + write_lock(&pset_lock); + write_lock(&pset_cpustat_lock); + result = sysmp_isolate(i1); + write_unlock(&pset_cpustat_lock); + write_unlock(&pset_lock); + break; + + case MP_UNISOLATE: + write_lock(&pset_lock); + write_lock(&pset_cpustat_lock); + result = sysmp_unisolate(i1); + write_unlock(&pset_cpustat_lock); + write_unlock(&pset_lock); + break; + + case MP_DISABLE: + write_lock(&pset_lock); + write_lock(&pset_cpustat_lock); + result = sysmp_disable(i1); + write_unlock(&pset_cpustat_lock); + write_unlock(&pset_lock); + break; + + case MP_ENABLE: + write_lock(&pset_lock); + write_lock(&pset_cpustat_lock); + result = sysmp_enable(i1); + write_unlock(&pset_cpustat_lock); + write_unlock(&pset_lock); + break; + + case MP_NONPREEMPTIVE: + /* FIXME: do we need tasklist_lock for current ? */ + /* FIXME: this doesn't quite work as expected. + * we end up running idle some of the time on the + * non-preemptive CPU. Maybe this should be removed + * completely, or else we need per-CPU run-queues. + */ + write_lock(&pset_lock); + write_lock(&pset_cpustat_lock); + result = sysmp_nonpreemptive(i1); + write_unlock(&pset_cpustat_lock); + write_unlock(&pset_lock); + break; + + case MP_PREEMPTIVE: + /* FIXME: do we need tasklist_lock for current ? */ + /* FIXME: should exit() call this if the last process + * derived form the caller of NONPREEMPTIVE does not? + */ + write_lock(&pset_lock); + write_lock(&pset_cpustat_lock); + result = sysmp_preemptive(i1); + write_unlock(&pset_cpustat_lock); + write_unlock(&pset_lock); + break; + + case MP_CLOCK: + result = pset_not_implemented(); + break; + + case MP_FASTCLOCK: + result = pset_not_implemented(); + break; + + case MP_MUSTRUN: + /* FIXME: do we need tasklist_lock for current ? */ + write_lock(&pset_lock); + result = sysmp_mustrun(i1); + write_unlock(&pset_lock); + break; + + case MP_MUSTRUN_PID: + write_lock(&pset_lock); + /* FIXME: is read_lock sufficient here? */ + read_lock(&tasklist_lock); + result = sysmp_mustrun_pid(i1, (pid_t)i2); + read_unlock(&tasklist_lock); + write_unlock(&pset_lock); + break; + + case MP_GETMUSTRUN: + /* FIXME: do we need tasklist_lock for current ? */ + write_lock(&pset_lock); + result = sysmp_getmustrun(); + write_unlock(&pset_lock); + break; + + case MP_GETMUSTRUN_PID: + write_lock(&pset_lock); + /* FIXME: is read_lock sufficient here? */ + read_lock(&tasklist_lock); + result = sysmp_getmustrun_pid((pid_t)i1); + read_unlock(&tasklist_lock); + write_unlock(&pset_lock); + break; + + case MP_RUNANYWHERE: + /* FIXME: do we need tasklist_lock for current ? */ + write_lock(&pset_lock); + result = sysmp_runanywhere(); + write_unlock(&pset_lock); + break; + + case MP_RUNANYWHERE_PID: + write_lock(&pset_lock); + /* FIXME: is read_lock sufficient here? */ + read_lock(&tasklist_lock); + result = sysmp_runanywhere_pid((pid_t)i1); + read_unlock(&tasklist_lock); + write_unlock(&pset_lock); + break; + + case MP_KERNADDR: + result = pset_not_implemented(); + break; + + case MP_SASZ: + result = pset_not_implemented(); + break; + + case MP_SAGET: + /* make sure to note the order of arguments... + i1 is int, i2 is int, ul is (char *). + they should go to the handler as i1, ul, i2. + */ + result = pset_not_implemented(); + break; + + case MP_SAGET1: + /* make sure to note the order of arguments... + i1 is int, i2 is int, ul is (char*), i3 is int. + they should go to the handler as + i1, ul, i2, i3. + */ + result = pset_not_implemented(); + break; + + case MP_PSET: + switch (i1) { + case MPPS_CREATE: + write_lock(&pset_lock); + write_lock(&pset_cpustat_lock); + result = sysmp_pset_create((int)i2, (cpumask_t)ul); + write_unlock(&pset_cpustat_lock); + write_unlock(&pset_lock); + break; + + case MPPS_DELETE: + write_lock(&pset_lock); + write_lock(&pset_cpustat_lock); + result = sysmp_pset_delete((int)i2); + write_unlock(&pset_cpustat_lock); + write_unlock(&pset_lock); + break; + + case MPPS_ADD: + write_lock(&pset_lock); + write_lock(&pset_cpustat_lock); + result = sysmp_pset_add(i2, (cpumask_t)ul); + write_unlock(&pset_cpustat_lock); + write_unlock(&pset_lock); + break; + + case MPPS_REMOVE: + write_lock(&pset_lock); + write_lock(&pset_cpustat_lock); + result = sysmp_pset_remove(i2, (cpumask_t)ul); + write_unlock(&pset_cpustat_lock); + write_unlock(&pset_lock); + break; + + default: + result = -EINVAL; + } + break; + default: + result = -EINVAL; + } + + PRINTD("sys_sysmp() returning %d\n", result); + + return result; +} + +/* *************************************************************** + * sysmp_pgsize + * get the system pagesize + * + * return the value of EXEC_PAGESIZE + * does not fail + * **************************************************************/ +int sysmp_pgsize(void) +{ + PRINTD("sysmp_pgsize()\n"); + + return EXEC_PAGESIZE; +} + +/* *************************************************************** + * sysmp_nprocs + * get the number of processors in the system + * + * return the count of cpus + * does not fail + * **************************************************************/ +int sysmp_nprocs(void) +{ + PRINTD("sysmp_nprocs()\n"); + + return smp_num_cpus; +} + +/* *************************************************************** + * sysmp_naprocs + * get the number of active processors in the master set + * + * return the count of active cpus + * does not fail + * **************************************************************/ +int sysmp_naprocs(void) +{ + cpumask_t tmp; + int count = 0; + + PRINTD("sysmp_naprocs()\n"); + + tmp = pset_master->cpumask; + while (tmp) { + if (tmp & 1) { + count++; + } + tmp >>= 1; + } + + return count; +} + +/* *************************************************************** + * sysmp_curproc + * get the number of the current processor + * + * returns the current CPU number + * does not fail + * **************************************************************/ +int sysmp_curproc(void) +{ + PRINTD("sysmp_curproc()\n"); + + return current->processor; +} + +/* ************************************************************ + * sysmp_stat + * fill a buffer with pda_stat structures, from pset_cpustats + * + * returns 0 on success + * returns -1 if buffer is invalid + * ***********************************************************/ +int sysmp_stat(struct pda_stat *ptr) +{ + struct pda_stat tmp; + int i; + + PRINTD("pset_mp_stat(%p)\n", ptr); + + if (verify_area(VERIFY_WRITE, ptr, + sizeof(struct pda_stat)*smp_num_cpus)) { + return -1; + } + + for (i = 0; i < smp_num_cpus; i++) { + struct cpustat_struct *cs; + + cs = pset_cpustats[cpu_logical_map(i)]; + tmp.p_cpuid = cs->id; + tmp.p_count = atomic_read(&(cs->refcount)); + tmp.p_flags = cs->flags; + + copy_to_user(&ptr[i], &tmp, sizeof(struct pda_stat)); + } + + return 0; +} + +/* ************************************************************ + * sysmp_restrict + * restrict a procesor to running only MUSTRUN processes + * i.e. - disable it in the master pset + * + * returns 0 on success + * returns -EPERM if the caller is not super-user + * returns -EINVAL if the requested CPU does not exist + * returns -EBUSY if the requested CPU is the last unrestricted CPU + * returns -EBUSY if the CPU is not ENABLED + * ***********************************************************/ +int sysmp_restrict(int cpu) +{ + PRINTD("sysmp_restrict(%d)\n", cpu); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!pset_is_valid_cpumask((cpumask_t)1<flags != CPUF_ENABLED) + return -EBUSY; + + /* set CPU flags */ + pset_unset_cpu_flag(cpu, CPUF_ENABLED); + pset_set_cpu_flag(cpu, CPUF_RESTRICTED); + + /* set cpumasks */ + pset_disable_cpus_in_pset((cpumask_t)1<processor == cpu) { + current->need_resched = 1; + } + + return 0; +} + +/* ************************************************************ + * sysmp_empower + * enable a processor to run any process + * i.e. - enable it in the master pset + * + * returns 0 on success + * returns 0 if the CPU is not restricted + * returns -EPERM if the caller is not super-user + * returns -EINVAL if the requested CPU does not exist + * ***********************************************************/ +int sysmp_empower(int cpu) +{ + PRINTD("sysmp_empower(%d)\n", cpu); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!pset_is_valid_cpumask((cpumask_t)1<flags == CPUF_RESTRICTED) { + /* set CPU flags */ + pset_unset_cpu_flag(cpu, CPUF_RESTRICTED); + pset_set_cpu_flag(cpu, CPUF_ENABLED); + + /* set the cpumask */ + pset_enable_cpus_in_pset((cpumask_t)1<flags != CPUF_ENABLED) + return -EBUSY; + + /* set CPU flags */ + pset_unset_cpu_flag(cpu, CPUF_ENABLED); + pset_set_cpu_flag(cpu, CPUF_ISOLATED); + + /* set cpumasks - turn it off in all sets but its own */ + pset = pset_list; + while (pset) { + if (pset_cpu_enabled_in_pset(cpu, pset) && pset->id != cpu) { + pset_disable_cpus_in_pset((cpumask_t)1<next; + } + + /* did we just isolate the current processor ? */ + if (current->processor == cpu) { + current->need_resched = 1; + } + + return 0; +} + +/* ************************************************************ + * sysmp_unisolate + * enable a processor to run any process + * i.e. - enable it in any set that has it, maybe the master set + * + * returns 0 on success + * returns 0 if the CPU is not isolated + * returns -EPERM if the caller is not super-user + * returns -EINVAL if the requested CPU does not exist + * ***********************************************************/ +int sysmp_unisolate(int cpu) +{ + struct pset_struct *pset; + + PRINTD("sysmp_unisolate(%d)\n", cpu); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!pset_is_valid_cpumask((cpumask_t)1<flags == CPUF_ISOLATED) { + /* set CPU flags */ + pset_unset_cpu_flag(cpu, CPUF_ISOLATED); + pset_set_cpu_flag(cpu, CPUF_ENABLED); + + /* set cpumasks */ + pset = pset_list; + while (pset) { + if (pset_cpu_disabled_in_pset(cpu, pset)) { + pset_enable_cpus_in_pset((cpumask_t)1<next; + } + } + return 0; +} + +/* ************************************************************ + * sysmp_disable + * disable a procesor from running any processes (take it offline) + * i.e. - disable it in all sets + * + * returns 0 on success + * returns -EPERM if the caller is not super-user + * returns -EINVAL if the requested CPU does not exist + * returns -EBUSY if the requested CPU is the last unrestricted CPU + * returns -EBUSY if the CPU is not ENABLED + * ***********************************************************/ +int sysmp_disable(int cpu) +{ + struct pset_struct *pset; + + PRINTD("sysmp_disable(%d)\n", cpu); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!pset_is_valid_cpumask((cpumask_t)1<flags != CPUF_ENABLED) + return -EBUSY; + + /* set CPU flags */ + pset_unset_cpu_flag(cpu, CPUF_ENABLED); + pset_set_cpu_flag(cpu, CPUF_DISABLED); + + /* set cpumasks - turn it off in all sets */ + pset = pset_list; + while (pset) { + if (pset_cpu_enabled_in_pset(cpu, pset)) { + pset_disable_cpus_in_pset((cpumask_t)1<next; + } + + /* did we just disable the current processor ? */ + if (current->processor == cpu) { + current->need_resched = 1; + } + + return 0; +} + +/* ************************************************************ + * sysmp_enable + * enable a processor to run processes + * i.e. - enable it in any set that has it + * + * returns 0 on success + * returns 0 if the CPU is not disabled + * returns -EPERM if the caller is not super-user + * returns -EINVAL if the requested CPU does not exist + * ***********************************************************/ +int sysmp_enable(int cpu) +{ + struct pset_struct *pset; + + PRINTD("sysmp_enable(%d)\n", cpu); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!pset_is_valid_cpumask((cpumask_t)1<flags == CPUF_DISABLED) { + /* set CPU flags */ + pset_unset_cpu_flag(cpu, CPUF_DISABLED); + pset_set_cpu_flag(cpu, CPUF_ENABLED); + + /* set cpumasks */ + pset = pset_list; + while (pset) { + if (pset_cpu_disabled_in_pset(cpu, pset)) { + pset_enable_cpus_in_pset((cpumask_t)1<next; + } + } + + return 0; +} + +/* ************************************************************ + * sysmp_nonpreemptive + * restrict a CPU to running the current and child processes + * + * returns 0 on success + * returns -EPERM if user is not root + * returns -EINVAL if the requested CPU does not exist + * returns -EBUSY if the requested CPU is the last unrestricted CPU + * returns -EBUSY if the CPU is not isolated + * returns -ENOMEM if no memory is available for a new set + * ***********************************************************/ +int sysmp_nonpreemptive(int cpu) +{ +#ifdef FIGURED_OUT_NONPREEMPTIVE + struct pset_struct *pset, + *new; + + PRINTD("sysmp_nonpreemptive(%d)\n", cpu); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!pset_is_valid_cpumask((cpumask_t)1<flags != CPUF_ISOLATED) + return -EBUSY; + + /* we create a set, but do not add it to the pset_list */ + /* do this here, so we can return if no mem is available */ + new = pset_allocate_pset(-1, PSET_FL_NONPREEMPTIVE); + if (!new) + return -ENOMEM; + + /* set CPU flags */ + pset_unset_cpu_flag(cpu, CPUF_ENABLED); + pset_set_cpu_flag(cpu, CPUF_NONPREEMPTIVE); + + /* set cpumasks - we are isolated, so turn it off in its own set */ + pset = pset_list; + while (pset) { + if (pset->id == cpu) { + pset_disable_cpus_in_pset((cpumask_t)1<next; + } + + /* set up the new set */ + pset = current->pset; + new->id = (pset) ? (0-(pset->id)) : (0-PSET_MASTER_PSET); + + pset_add_cpus_to_pset((cpumask_t)1<pset */ + pset_remove_task_from_pset(current); + pset_add_task_to_pset(current, new); +#endif + + return 0; +} + +/* ************************************************************ + * sysmp_preemptive + * enable a CPU to run general processes + * + * returns 0 on success + * returns 0 if the CPU is not non-preemptive + * returns -EPERM if user is not root + * returns -EINVAL if the requested CPU does not exist + * ***********************************************************/ +int sysmp_preemptive(int cpu) +{ +#ifdef FIGURED_OUT_NONPREEMPTIVE + struct pset_struct *pset; + + PRINTD("sysmp_preemptive(%d)\n", cpu); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!pset_is_valid_cpumask((cpumask_t)1<flags == CPUF_NONPREEMPTIVE) { + /* set CPU flags - we should be isolated now */ + pset_unset_cpu_flag(cpu, CPUF_NONPREEMPTIVE); + + /* set cpumasks - we should still be isolated */ + pset = pset_list; + while (pset) { + if (pset->id == cpu) { + pset_enable_cpus_in_pset((cpumask_t)1<next; + } + + /* if this process was bound to the stolen CPU, restore its state */ + if (pset_is_nonpreemptive_pset(current->pset) && + pset_cpu_is_in_pset(cpu, current->pset)) { + pset = pset_find_pset(0-current->pset->id); + + /* this will handle freeing it if it is unused */ + pset_remove_task_from_pset(current); + pset_add_task_to_pset(current, pset ? pset : pset_master); + } + } + + /* + * just as a note: if a different process calls this than called + * nonpreemptive, the original process will still be bound to this + * pset. That process will eventually be removed from the pset, + * via a call here, a mustrun, or exit -- see the code in + * pset_remove_task_from_pset for details. + */ +#endif + + return 0; +} + +/* ************************************************************ + * sysmp_mustrun + * restrict the current process to a pset + * + * returns 0 on success + * returns -EINVAL if the requested pset does not exist + * ***********************************************************/ +int sysmp_mustrun(int pset_id) +{ + struct pset_struct *pset; + + PRINTD("sysmp_mustrun(%d)\n", pset_id); + + pset = pset_find_pset(pset_id); + if (pset) { + pset_remove_task_from_pset(current); + pset_add_task_to_pset(current, pset); + + /* are we currently on some other cpu, not in this set ? */ + if (!pset_cpu_is_in_pset(current->processor, pset)) { + current->need_resched = 1; + } + + return 0; + } + + return -EINVAL; +} + +/* ************************************************************ + * sysmp_mustrun_pid + * restrict a process to a pset + * + * returns 0 on success + * returns -ESRCH if the target task does not exist + * returns -EPERM if the target process is not modifiable by this user + * returns -EINVAL if the requested pset does not exist + * ***********************************************************/ +int sysmp_mustrun_pid(int pset_id, pid_t pid) +{ + struct pset_struct *pset; + struct task_struct *p; + + PRINTD("sysmp_mustrun_pid(%d, %d)\n", pset_id, pid); + + p = find_task_by_pid(pid); + if (!p) { + return -ESRCH; + } + + /* be sure we can modify this process */ + if ((current->euid ^ p->suid) && (current->euid ^ p->uid) && + (current->uid ^ p->suid) && (current->uid ^ p->uid) && + (!capable(CAP_SYS_NICE))) { + return -EPERM; + } + + pset = pset_find_pset(pset_id); + if (pset) { + pset_remove_task_from_pset(p); + pset_add_task_to_pset(p, pset); + + /* is p currently on some other cpu, not in this set ? */ + if (!pset_cpu_is_in_pset(p->processor, pset)) { + p->need_resched = 1; + } + + return 0; + } + + return -EINVAL; +} + +/* ************************************************************ + * sysmp_getmustrun + * return the pset id for the current process + * + * returns 0 on success + * returns -EINVAL if the requested process is unrestricted + * ***********************************************************/ +int sysmp_getmustrun(void) +{ + PRINTD("sysmp_getmustrun()\n"); + + if (!(current->pset)) { + return -EINVAL; + } else if (current->pset->id == PSET_MASTER_PSET) { + return -EINVAL; + } else { + return current->pset->id; + } +} + +/* ************************************************************ + * sysmp_getmustrun_pid + * return the pset id for the specified process + * + * returns 0 on success + * returns -ESRCH if the target task does not exist + * returns -EINVAL if the requested process is unrestricted + * ***********************************************************/ +int sysmp_getmustrun_pid(pid_t pid) +{ + struct task_struct *p; + + PRINTD("sysmp_getmustrun_pid(%d)\n", pid); + + p = find_task_by_pid(pid); + if (!p) { + return -ESRCH; + } + + if (!(p->pset)) { + return -EINVAL; + } else if (p->pset->id == PSET_MASTER_PSET) { + return -EINVAL; + } else { + return p->pset->id; + } +} + +/* ************************************************************ + * sysmp_runanywhere + * allow the current process to run on any active CPU + * i.e. - add it to the master_pset + * + * returns 0 on success + * ***********************************************************/ +int sysmp_runanywhere(void) +{ + PRINTD("sysmp_runanywhere()\n"); + + pset_remove_task_from_pset(current); + pset_add_task_to_pset(current, pset_master); + + return 0; +} + +/* ************************************************************ + * sysmp_runanywhere_pid + * allow a process to run on any active CPU + * i.e. - add it to the master_pset + * + * returns 0 on success + * returns -ESRCH if the target task does not exist + * returns -EPERM if the target process is not modifiable by this user + * ***********************************************************/ +int sysmp_runanywhere_pid(pid_t pid) +{ + struct task_struct *p; + + PRINTD("sysmp_runanywhere_pid(%d)\n", pid); + + p = find_task_by_pid(pid); + if (!p) { + return -ESRCH; + } + + /* be sure we can modify this process */ + if ((current->euid ^ p->suid) && (current->euid ^ p->uid) && + (current->uid ^ p->suid) && (current->uid ^ p->uid) && + (!capable(CAP_SYS_NICE))) { + return -EPERM; + } + + pset_remove_task_from_pset(p); + pset_add_task_to_pset(p, pset_master); + + return 0; +} + +/* ************************************************************ + * sysmp_pset_create + * create a new pset + * + * returns a positive integer (the pset id created) on success + * returns -EPERM if the user is not root + * returns -EEXIST if the specified pset already exists + * returns -EBUSY if PSET_ID_NEW was requested, but none is free + * returns -EINVAL if an invalid pset id was requested + * returns -ENOMEM if the pset struct can not be allocated + * ***********************************************************/ +int sysmp_pset_create(int pset_id, cpumask_t cpus) +{ + struct pset_struct *pset; + + PRINTD("sysmp_pset_create(%d, %lu)\n", pset_id, cpus); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (pset_find_pset(pset_id)) + return -EEXIST; + if (pset_id == PSET_ID_NEW) { + pset_id = pset_find_new_pset_id(); + if (pset_id < 0) + return -EBUSY; + } + if (pset_id < 0) + return -EINVAL; + + pset = pset_allocate_pset(pset_id, PSET_FL_USER); + if (!pset) + return -ENOMEM; + + pset_add_pset_to_list(pset); + pset_add_cpus_to_pset(pset_preen_cpumask(cpus), pset); + + return pset_id; +} + +/* ************************************************************ + * sysmp_pset_delete + * delete a pset from the system + * + * returns 0 on success + * returns -EPERM if the user is not root + * returns -EINVAL if the requested pset does not exist + * returns -EBUSY if the requested pset is in use + * returns -EBUSY if the requested pset is not modifiable + * ***********************************************************/ +int sysmp_pset_delete(int pset_id) +{ + struct pset_struct *pset; + + PRINTD("sysmp_pset_delete(%d)\n", pset_id); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + pset = pset_find_pset(pset_id); + if (!pset) + return -EINVAL; + if (atomic_read(&(pset->refcount)) || pset_is_ro_pset(pset)) + return -EBUSY; + + pset_deallocate_pset(pset); + + return 0; +} + + +/* ************************************************************ + * sysmp_pset_add + * add a cpumask to a pset + * + * returns 0 on success + * returns -EPERM if the user is not root + * returns -EINVAL if the requested pset does not exist + * returns -EBUSY if the requested pset is not modifiable + * ***********************************************************/ +int sysmp_pset_add(int pset_id, cpumask_t cpus) +{ + struct pset_struct *pset; + + PRINTD("sysmp_pset_add(%d, %lu)\n", pset_id, cpus); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + pset = pset_find_pset(pset_id); + if (!pset) + return -EINVAL; + if (pset_is_ro_pset(pset)) + return -EBUSY; + + pset_add_cpus_to_pset(pset_preen_cpumask(cpus), pset); + + return 0; +} + +/* ************************************************************ + * sysmp_pset_remove + * remove a cpumask from a pset + * + * returns 0 on success + * returns -EPERM if the user is not root + * returns -EINVAL if the requested pset does not exist + * returns -EBUSY if the requested pset is not modifiable + * ***********************************************************/ +int sysmp_pset_remove(int pset_id, cpumask_t cpus) +{ + struct pset_struct *pset; + + PRINTD("sysmp_pset_remove(%d, %lu)\n", pset_id, cpus); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + pset = pset_find_pset(pset_id); + if (!pset) + return -EINVAL; + if (pset_is_ro_pset(pset)) + return -EBUSY; + + pset_remove_cpus_from_pset(pset_preen_cpumask(cpus), pset); + + /* we may be on a CPU that is no longer in the set */ + if (pset == current->pset) { + current->need_resched = 1; + } + + return 0; +} diff -u -p -L linux.2342.current/mm/Makefile.orig linux.2342.current/mm/Makefile --- linux.2342.current/mm/Makefile +++ linux.2342.current/mm/Makefile Sun Feb 6 23:00:00 2000 @@ -16,4 +16,8 @@ ifeq ($(CONFIG_HIGHMEM),y) O_OBJS += highmem.o endif +ifeq ($(CONFIG_BIGPHYSAREA),y) +O_OBJS += bigphysarea.o +endif + include $(TOPDIR)/Rules.make diff -u -p -L linux.2342.current/mm/bigphysarea.c.orig linux.2342.current/mm/bigphysarea.c --- linux.2342.current/mm/bigphysarea.c +++ linux.2342.current/mm/bigphysarea.c Sun Feb 6 23:00:00 2000 @@ -0,0 +1,344 @@ +/* linux/mm/bigphysarea.c, M. Welsh (mdw@cs.cornell.edu) + * Copyright (c) 1996 by Matt Welsh. + * Extended by Roger Butenuth (butenuth@uni-paderborn.de), October 1997 + * Modified for 2.3.x kernels by Fred Barnes (fred@xylene.com), January 2000 + * + * This is a set of routines which allow you to reserve a large (?) + * amount of physical memory at boot-time, which can be allocated/deallocated + * by drivers. This memory is intended to be used for devices such as + * video framegrabbers which need a lot of physical RAM (above the amount + * allocated by kmalloc). This is by no means efficient or recommended; + * to be used only in extreme circumstances. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct range_struct { + struct range_struct *next; + caddr_t base; /* base of allocated block */ + size_t size; /* size in bytes */ +} range_t; + +static int bigphysarea_pages = 0; +/* + * 0: nothing initialized + * 1: bigphysarea_pages initialized + * 2: free list initialized + */ +static int init_level = 0; + +/* + * The free list and the used list are protected by one lock. + */ +static spinlock_t big_lock = SPIN_LOCK_UNLOCKED; +static range_t *free_list = NULL; +static range_t *used_list = NULL; + +caddr_t bigphysarea = 0; + +void bigphysarea_setup(int pages) +{ + bigphysarea_pages = pages; + return; +} + + +void bigphysarea_init(void) { + if (bigphysarea_pages == 0) + { + printk("bigphysarea: No pages requested.\n"); + return; + } + + bigphysarea = (caddr_t)alloc_bootmem_pages(bigphysarea_pages); + if( bigphysarea == (caddr_t)0 ) { + printk("bigphysarea: Unable to allocate %d pages.\n", bigphysarea_pages); + return; + } + + init_level = 1; + + printk("bigphysarea: Allocated %d pages at 0x%08lx.\n", + bigphysarea_pages, (unsigned long)bigphysarea); + + return; +} + +/* + * When we have pages but don't have a freelist, put all pages in + * one free list entry. Return 0 on success, 1 on error. + */ +static int init2(int priority) +{ + int res; + + spin_lock(&big_lock); /* >>>>>>>>>> */ + if (init_level == 1) { + free_list = kmalloc(sizeof(range_t), priority); + if (free_list != NULL) { + free_list->next = NULL; + free_list->base = bigphysarea; + free_list->size = bigphysarea_pages * PAGE_SIZE; + init_level = 2; + res = 0; + } else + res = 1; + } else + res = 1; + + spin_unlock(&big_lock); /* <<<<<<<<<< */ + return res; +} + + +/* + * Allocate `count' pages from the big physical area. Pages are aligned to + * a multiple of `align'. `priority' has the same meaning in kmalloc, it + * is needed for management information. + * This function may not be called from an interrupt, this is the reason + * we can use lock functions without disabling all interrupts. + */ +caddr_t bigphysarea_alloc_pages(int count, int align, int priority) +{ + range_t *range, **range_ptr, *new_range, *align_range; + caddr_t aligned_base; + caddr_t res = 0; + + if (init_level < 2) + if (init2(priority)) + return 0; + new_range = NULL; + align_range = NULL; + + if (align == 0) + align = PAGE_SIZE; + else + align = align * PAGE_SIZE; + + spin_lock(&big_lock); /* >>>>>>>>>> */ + /* + * Search a free block which is large enough, even with alignment. + */ + range_ptr = &free_list; + while (*range_ptr != NULL) { + range = *range_ptr; + aligned_base = + (caddr_t)((((long)range->base + align - 1) / align) * align); + if (aligned_base + count * PAGE_SIZE <= + range->base + range->size) + break; + range_ptr = &range->next; + } + if (*range_ptr == NULL) { + res = 0; + goto ret_label; + } + range = *range_ptr; + /* + * When we have to align, the pages needed for alignment can + * be put back to the free pool. + * We check here if we need a second range data structure later + * and allocate it now, so that we don't have to check for a + * failed kmalloc later. + */ + if (aligned_base - range->base + count * PAGE_SIZE < range->size) { + new_range = kmalloc(sizeof(range_t), priority); + if (new_range == NULL) { + res = 0; + goto ret_label; + } + } + if (aligned_base != range->base) { + align_range = kmalloc(sizeof(range_t), priority); + if (align_range == NULL) { + if (new_range != NULL) + kfree(new_range); + res = 0; + goto ret_label; + } + align_range->base = range->base; + align_range->size = aligned_base - range->base; + range->base = aligned_base; + range->size -= align_range->size; + align_range->next = range; + *range_ptr = align_range; + range_ptr = &align_range->next; + } + if (new_range != NULL) { + /* + * Range is larger than needed, create a new list element for + * the used list and shrink the element in the free list. + */ + new_range->base = range->base; + new_range->size = count * PAGE_SIZE; + range->base = new_range->base + new_range->size; + range->size = range->size - new_range->size; + } else { + /* + * Range fits perfectly, remove it from free list. + */ + *range_ptr = range->next; + new_range = range; + } + /* + * Insert block into used list + */ + new_range->next = used_list; + used_list = new_range; + + res = new_range->base; + ret_label: + spin_unlock(&big_lock); /* <<<<<<<<<< */ + return res; + +} + + +/* + * Free pages allocated with `bigphysarea_alloc_pages'. `base' must be an + * address returned by `bigphysarea_alloc_pages'. + * This function my not be called from an interrupt, for this reason we + * can use a lock without interrupt disabling. + */ +void bigphysarea_free_pages(caddr_t base) +{ + range_t *prev, *next, *range, **range_ptr; + + spin_lock(&big_lock); /* >>>>>>>>>> */ + /* + * Search the block in the used list. + */ + for (range_ptr = &used_list; + *range_ptr != NULL; + range_ptr = &(*range_ptr)->next) + if ((*range_ptr)->base == base) + break; + if (*range_ptr == NULL) { + printk("bigphysarea_free_pages(0x%08x), not allocated!\n", + (unsigned)base); + goto ret_label; + } + range = *range_ptr; + /* + * Remove range from the used list: + */ + *range_ptr = (*range_ptr)->next; + /* + * The free-list is sorted by address, search insertion point + * and insert block in free list. + */ + for (range_ptr = &free_list, prev = NULL; + *range_ptr != NULL; + prev = *range_ptr, range_ptr = &(*range_ptr)->next) + if ((*range_ptr)->base >= base) + break; + range->next = *range_ptr; + *range_ptr = range; + /* + * Concatenate free range with neighbors, if possible. + * Try for upper neighbor (next in list) first, then + * for lower neighbor (predecessor in list). + */ + if (range->next != NULL && + range->base + range->size == range->next->base) { + next = range->next; + range->size += range->next->size; + range->next = next->next; + kfree(next); + } + if (prev != NULL && + prev->base + prev->size == range->base) { + prev->size += prev->next->size; + prev->next = range->next; + kfree(range); + } + ret_label: + spin_unlock(&big_lock); /* <<<<<<<<<< */ +} + +caddr_t bigphysarea_alloc(int size) +{ + int pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + + return bigphysarea_alloc_pages(pages, 1, GFP_KERNEL); +} + +void bigphysarea_free(caddr_t addr, int size) +{ + (void)size; + bigphysarea_free_pages(addr); +} + +int get_bigphysarea_info(char *buf) +{ + char *p = buf; + range_t *ptr; + int free_count, free_total, free_max; + int used_count, used_total, used_max; + + if (init_level == 1) + init2(GFP_KERNEL); + + spin_lock(&big_lock); /* >>>>>>>>>> */ + free_count = 0; + free_total = 0; + free_max = 0; + for (ptr = free_list; ptr != NULL; ptr = ptr->next) { + free_count++; + free_total += ptr->size; + if (ptr->size > free_max) + free_max = ptr->size; + } + + used_count = 0; + used_total = 0; + used_max = 0; + for (ptr = used_list; ptr != NULL; ptr = ptr->next) { + used_count++; + used_total += ptr->size; + if (ptr->size > used_max) + used_max = ptr->size; + } + spin_unlock(&big_lock); /* <<<<<<<<<< */ + + if (bigphysarea_pages == 0) { + p += sprintf(p, "No big physical area allocated!\n"); + return p - buf; + } + + p += sprintf(p, "Big physical area, size %ld kB\n", + bigphysarea_pages * PAGE_SIZE / 1024); + p += sprintf(p, " free list: used list:\n"); + p += sprintf(p, "number of blocks: %8d %8d\n", + free_count, used_count); + p += sprintf(p, "size of largest block: %8d kB %8d kB\n", + free_max / 1024, used_max / 1024); + p += sprintf(p, "total: %8d kB %8d kB\n", + free_total / 1024, used_total /1024); + + return p - buf; +}