From 1d2444bb25f0b4efa6e2f7e397aba0c6fd7262d8 Mon Sep 17 00:00:00 2001 From: Planet-Lab Support Date: Mon, 8 Aug 2005 21:12:14 +0000 Subject: [PATCH] This commit was manufactured by cvs2svn to create tag 'after-kexec-2-6-10-patch'. --- configs/kernel-2.6.10-i686-planetlab.config | 264 +++++- fs/exec.c | 2 +- fs/ioctl.c | 13 - fs/posix_acl.c | 4 - include/linux/ckrm_mem.h | 98 +- include/linux/ckrm_mem_inline.h | 347 ++++--- include/linux/ckrm_tsk.h | 18 +- include/linux/ext2_fs.h | 4 +- include/linux/ext3_fs.h | 4 +- include/linux/mm.h | 2 +- include/linux/page-flags.h | 11 +- include/linux/sched.h | 107 ++- include/linux/vserver/inode.h | 7 - init/Kconfig | 13 +- kernel/ckrm/Makefile | 2 +- kernel/ckrm/ckrm_cpu_class.c | 11 - kernel/ckrm/ckrm_cpu_monitor.c | 13 +- kernel/ckrm/ckrm_mem.c | 981 -------------------- kernel/ckrm/ckrm_numtasks.c | 404 +++++--- kernel/ckrm/ckrm_numtasks_stub.c | 10 +- kernel/exit.c | 2 +- kernel/fork.c | 6 +- kernel/vserver/inode.c | 32 - mm/page_alloc.c | 15 +- mm/swap.c | 6 +- mm/vmscan.c | 318 ++----- scripts/kernel-2.6-planetlab.spec | 2 +- 27 files changed, 1012 insertions(+), 1684 deletions(-) delete mode 100644 kernel/ckrm/ckrm_mem.c diff --git a/configs/kernel-2.6.10-i686-planetlab.config b/configs/kernel-2.6.10-i686-planetlab.config index bd63671f1..8e46fe312 100644 --- a/configs/kernel-2.6.10-i686-planetlab.config +++ b/configs/kernel-2.6.10-i686-planetlab.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.10-1.14_FC2.1.planetlab.2005.04.14 -# Sat May 7 01:45:01 2005 +# Linux kernel version: 2.6.10-1.14_FC2.1.planetlab.2005.03.31 +# Thu Mar 31 11:50:25 2005 # CONFIG_X86=y CONFIG_MMU=y @@ -33,10 +33,9 @@ CONFIG_CKRM=y CONFIG_RCFS_FS=y CONFIG_CKRM_TYPE_TASKCLASS=y CONFIG_CKRM_RES_NULL=m -# CONFIG_CKRM_RES_MEM is not set +CONFIG_CKRM_RES_MEM=y # CONFIG_CKRM_TYPE_SOCKETCLASS is not set CONFIG_CKRM_RES_NUMTASKS=y -# CONFIG_CKRM_RES_NUMTASKS_FORKRATE is not set CONFIG_CKRM_CPU_SCHEDULE=y # CONFIG_CKRM_RES_BLKIO is not set CONFIG_CKRM_CPU_SCHEDULE_AT_BOOT=y @@ -702,7 +701,7 @@ CONFIG_MD_RAID5=m CONFIG_MD_RAID6=m CONFIG_MD_MULTIPATH=m CONFIG_MD_FAULTY=m -CONFIG_BLK_DEV_DM=y +CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m @@ -791,7 +790,7 @@ CONFIG_INET_IPCOMP=m CONFIG_INET_TUNNEL=m # CONFIG_ACCEPT_QUEUES is not set CONFIG_IP_TCPDIAG=m -# CONFIG_IP_TCPDIAG_IPV6 is not set +CONFIG_IP_TCPDIAG_IPV6=y # # IP: Virtual Server Configuration @@ -827,7 +826,13 @@ CONFIG_IP_VS_NQ=m # CONFIG_IP_VS_FTP=m CONFIG_ICMP_IPOD=y -# CONFIG_IPV6 is not set +CONFIG_IPV6=m +CONFIG_IPV6_PRIVACY=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_INET6_TUNNEL=m +CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set CONFIG_BRIDGE_NETFILTER=y @@ -904,6 +909,31 @@ CONFIG_IP_NF_ARP_MANGLE=m # CONFIG_IP_NF_COMPAT_IPFWADM is not set # CONFIG_IP_NF_CT_PROTO_GRE is not set +# +# IPv6: Netfilter Configuration +# +# CONFIG_IP6_NF_QUEUE is not set +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_LIMIT=m +CONFIG_IP6_NF_MATCH_MAC=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_MULTIPORT=m +CONFIG_IP6_NF_MATCH_OWNER=m +CONFIG_IP6_NF_MATCH_MARK=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_AHESP=m +CONFIG_IP6_NF_MATCH_LENGTH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_PHYSDEV=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_MARK=m +CONFIG_IP6_NF_RAW=m + # # Bridge: Netfilter Configuration # @@ -949,7 +979,7 @@ CONFIG_ATM_BR2684=m CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m # CONFIG_DECNET is not set -CONFIG_LLC=m +CONFIG_LLC=y # CONFIG_LLC2 is not set CONFIG_IPX=m # CONFIG_IPX_INTERN is not set @@ -1008,9 +1038,98 @@ CONFIG_NETPOLL=y CONFIG_NETPOLL_TRAP=y CONFIG_NET_POLL_CONTROLLER=y # CONFIG_HAMRADIO is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -# CONFIG_TUX is not set +CONFIG_IRDA=m + +# +# IrDA protocols +# +CONFIG_IRLAN=m +CONFIG_IRNET=m +CONFIG_IRCOMM=m +# CONFIG_IRDA_ULTRA is not set + +# +# IrDA options +# +CONFIG_IRDA_CACHE_LAST_LSAP=y +CONFIG_IRDA_FAST_RR=y +# CONFIG_IRDA_DEBUG is not set + +# +# Infrared-port device drivers +# + +# +# SIR device drivers +# +CONFIG_IRTTY_SIR=m + +# +# Dongle support +# +CONFIG_DONGLE=y +CONFIG_ESI_DONGLE=m +CONFIG_ACTISYS_DONGLE=m +CONFIG_TEKRAM_DONGLE=m +CONFIG_LITELINK_DONGLE=m +CONFIG_MA600_DONGLE=m +CONFIG_GIRBIL_DONGLE=m +CONFIG_MCP2120_DONGLE=m +CONFIG_OLD_BELKIN_DONGLE=m +CONFIG_ACT200L_DONGLE=m + +# +# Old SIR device drivers +# +CONFIG_IRPORT_SIR=m + +# +# Old Serial dongle support +# +# CONFIG_DONGLE_OLD is not set + +# +# FIR device drivers +# +CONFIG_USB_IRDA=m +CONFIG_SIGMATEL_FIR=m +CONFIG_TOSHIBA_FIR=m +CONFIG_VLSI_FIR=m +CONFIG_BT=m +CONFIG_BT_L2CAP=m +CONFIG_BT_SCO=m +CONFIG_BT_RFCOMM=m +CONFIG_BT_RFCOMM_TTY=y +CONFIG_BT_BNEP=m +CONFIG_BT_BNEP_MC_FILTER=y +CONFIG_BT_BNEP_PROTO_FILTER=y +CONFIG_BT_CMTP=m +CONFIG_BT_HIDP=m + +# +# Bluetooth device drivers +# +CONFIG_BT_HCIUSB=m +CONFIG_BT_HCIUSB_SCO=y +CONFIG_BT_HCIUART=m +CONFIG_BT_HCIUART_H4=y +CONFIG_BT_HCIUART_BCSP=y +CONFIG_BT_HCIUART_BCSP_TXCRC=y +CONFIG_BT_HCIBCM203X=m +CONFIG_BT_HCIBFUSB=m +CONFIG_BT_HCIDTL1=m +CONFIG_BT_HCIBT3C=m +CONFIG_BT_HCIBLUECARD=m +CONFIG_BT_HCIBTUART=m +CONFIG_BT_HCIVHCI=m +CONFIG_TUX=m + +# +# TUX options +# +CONFIG_TUX_EXTCGI=y +CONFIG_TUX_EXTENDED_LOG=y +# CONFIG_TUX_DEBUG is not set CONFIG_NETDEVICES=y CONFIG_DUMMY=m CONFIG_BONDING=m @@ -1108,7 +1227,13 @@ CONFIG_S2IO_NAPI=y # # Token Ring devices # -# CONFIG_TR is not set +CONFIG_TR=y +CONFIG_IBMOL=m +CONFIG_IBMLS=m +CONFIG_3C359=m +CONFIG_TMS380TR=m +CONFIG_TMSPCI=m +CONFIG_ABYSS=m # # Wireless LAN (non-hamradio) @@ -1174,6 +1299,7 @@ CONFIG_PCMCIA_NMCLAN=m CONFIG_PCMCIA_SMC91C92=m CONFIG_PCMCIA_XIRC2PS=m CONFIG_PCMCIA_AXNET=m +CONFIG_PCMCIA_IBMTR=m # # Wan interfaces @@ -1210,9 +1336,20 @@ CONFIG_FDDI=y # CONFIG_DEFXX is not set CONFIG_SKFP=m # CONFIG_HIPPI is not set -# CONFIG_PLIP is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set +CONFIG_PLIP=m +CONFIG_PPP=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_PPP_DEFLATE=m +# CONFIG_PPP_BSDCOMP is not set +CONFIG_PPPOE=m +CONFIG_PPPOATM=m +CONFIG_SLIP=m +CONFIG_SLIP_COMPRESSED=y +CONFIG_SLIP_SMART=y +# CONFIG_SLIP_MODE_SLIP6 is not set CONFIG_NET_FC=y # CONFIG_SHAPER is not set CONFIG_NETCONSOLE=m @@ -1886,7 +2023,95 @@ CONFIG_LOGO_LINUX_CLUT224=y # # Sound # -# CONFIG_SOUND is not set +CONFIG_SOUND=m + +# +# Advanced Linux Sound Architecture +# +CONFIG_SND=m +CONFIG_SND_TIMER=m +CONFIG_SND_PCM=m +CONFIG_SND_HWDEP=m +CONFIG_SND_RAWMIDI=m +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_RTCTIMER=m +# CONFIG_SND_VERBOSE_PRINTK is not set +# CONFIG_SND_DEBUG is not set + +# +# Generic devices +# +CONFIG_SND_MPU401_UART=m +CONFIG_SND_OPL3_LIB=m +CONFIG_SND_VX_LIB=m +CONFIG_SND_DUMMY=m +CONFIG_SND_VIRMIDI=m +CONFIG_SND_MTPAV=m +# CONFIG_SND_SERIAL_U16550 is not set +CONFIG_SND_MPU401=m + +# +# PCI devices +# +CONFIG_SND_AC97_CODEC=m +CONFIG_SND_ALI5451=m +CONFIG_SND_ATIIXP=m +CONFIG_SND_ATIIXP_MODEM=m +CONFIG_SND_AU8810=m +CONFIG_SND_AU8820=m +CONFIG_SND_AU8830=m +CONFIG_SND_AZT3328=m +CONFIG_SND_BT87X=m +# CONFIG_SND_BT87X_OVERCLOCK is not set +CONFIG_SND_CS46XX=m +CONFIG_SND_CS46XX_NEW_DSP=y +CONFIG_SND_CS4281=m +CONFIG_SND_EMU10K1=m +CONFIG_SND_KORG1212=m +CONFIG_SND_MIXART=m +CONFIG_SND_NM256=m +CONFIG_SND_RME32=m +CONFIG_SND_RME96=m +CONFIG_SND_RME9652=m +CONFIG_SND_HDSP=m +CONFIG_SND_TRIDENT=m +CONFIG_SND_YMFPCI=m +CONFIG_SND_ALS4000=m +CONFIG_SND_CMIPCI=m +CONFIG_SND_ENS1370=m +CONFIG_SND_ENS1371=m +CONFIG_SND_ES1938=m +CONFIG_SND_ES1968=m +CONFIG_SND_MAESTRO3=m +CONFIG_SND_FM801=m +CONFIG_SND_FM801_TEA575X=m +CONFIG_SND_ICE1712=m +CONFIG_SND_ICE1724=m +CONFIG_SND_INTEL8X0=m +CONFIG_SND_INTEL8X0M=m +CONFIG_SND_SONICVIBES=m +CONFIG_SND_VIA82XX=m +CONFIG_SND_VX222=m + +# +# USB devices +# +CONFIG_SND_USB_AUDIO=m +CONFIG_SND_USB_USX2Y=m + +# +# PCMCIA devices +# + +# +# Open Sound System +# +# CONFIG_SOUND_PRIME is not set # # USB support @@ -1918,7 +2143,12 @@ CONFIG_USB_SL811_HCD=m # # USB Device Class drivers # -# CONFIG_USB_BLUETOOTH_TTY is not set +# CONFIG_USB_AUDIO is not set + +# +# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem +# +CONFIG_USB_MIDI=m CONFIG_USB_ACM=m CONFIG_USB_PRINTER=m diff --git a/fs/exec.c b/fs/exec.c index 95ae49ba1..5f7f09222 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -564,7 +564,7 @@ static int exec_mmap(struct mm_struct *mm) activate_mm(active_mm, mm); task_unlock(tsk); arch_pick_mmap_layout(mm); - ckrm_task_change_mm(tsk, old_mm, mm); + ckrm_task_mm_change(tsk, old_mm, mm); if (old_mm) { if (active_mm != old_mm) BUG(); mmput(old_mm); diff --git a/fs/ioctl.c b/fs/ioctl.c index 6af7a74c8..19e902dc3 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -174,19 +174,6 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg); break; #endif - case FIOC_SETIATTR: - case FIOC_GETIATTR: - /* - * Verify that this filp is a file object, - * not (say) a socket. - */ - error = -ENOTTY; - if (S_ISREG(filp->f_dentry->d_inode->i_mode) || - S_ISDIR(filp->f_dentry->d_inode->i_mode)) - error = vc_iattr_ioctl(filp->f_dentry, - cmd, arg); - break; - default: error = -ENOTTY; if (S_ISREG(filp->f_dentry->d_inode->i_mode)) diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 9c676901a..97fbb8619 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -215,10 +215,6 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want) const struct posix_acl_entry *pa, *pe, *mask_obj; int found = 0; - /* Prevent vservers from escaping chroot() barriers */ - if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN)) - return -EACCES; - FOREACH_ACL_ENTRY(pa, acl, pe) { switch(pa->e_tag) { case ACL_USER_OBJ: diff --git a/include/linux/ckrm_mem.h b/include/linux/ckrm_mem.h index 3712aefb9..1e4c70fc1 100644 --- a/include/linux/ckrm_mem.h +++ b/include/linux/ckrm_mem.h @@ -29,8 +29,8 @@ struct ckrm_zone { struct list_head active_list; struct list_head inactive_list; - unsigned long nr_active; // # of pages in the active list - unsigned long nr_inactive; // # of pages in the inactive list + unsigned long nr_active; + unsigned long nr_inactive; unsigned long active_over; unsigned long inactive_over; @@ -38,68 +38,72 @@ struct ckrm_zone { unsigned long shrink_inactive; long shrink_weight; unsigned long shrink_flag; - - struct list_head victim_list; // list of ckrm_zones chosen for shrinking + struct list_head victim_list; /* list of ckrm_zones chosen for + * shrinking. These are over their + * 'guarantee' + */ struct zone *zone; struct ckrm_mem_res *memcls; }; struct ckrm_mem_res { unsigned long flags; - struct ckrm_core_class *core; // the core i am part of... - struct ckrm_core_class *parent; // parent of the core i am part of.... - struct ckrm_shares shares; - struct list_head mcls_list; // list of all 1-level classes - struct list_head shrink_list; // list of classes need to be shrunk - struct kref nr_users; // # of references to this class/data structure - atomic_t pg_total; // # of pages used by this class - int pg_guar; // # of pages this class is guaranteed - int pg_limit; // max # of pages this class can get - int pg_borrowed; // # of pages this class borrowed from its parent - int pg_lent; // # of pages this class lent to its children - int pg_unused; // # of pages left to this class (after giving the - // guarantees to children. need to borrow from parent if - // more than this is needed. - int impl_guar; // implicit guarantee for class with don't care guar - int nr_dontcare; // # of children with don't care guarantee + struct ckrm_core_class *core; /* the core i am part of... */ + struct ckrm_core_class *parent; /* parent of the core i am part of */ + struct ckrm_shares shares; + struct list_head mcls_list; /* list of all 1-level classes */ + struct kref nr_users; /* ref count */ + atomic_t pg_total; /* # of pages used by this class */ + int pg_guar; /* absolute # of guarantee */ + int pg_limit; /* absolute # of limit */ + int pg_borrowed; /* # of pages borrowed from parent */ + int pg_lent; /* # of pages lent to children */ + int pg_unused; /* # of pages left to this class + * (after giving the guarantees to + * children. need to borrow from + * parent if more than this is needed. + */ + int hier; /* hiearchy level, root = 0 */ + int impl_guar; /* for classes with don't care guar */ + int nr_dontcare; /* # of dont care children */ + struct ckrm_zone ckrm_zone[MAX_NR_ZONES]; + + struct list_head shrink_list; /* list of classes that are near + * limit and need to be shrunk + */ int shrink_count; unsigned long last_shrink; - int over_limit_failures; - int shrink_pages; // # of pages to free in this class - int hier; // hiearchy, root = 0 }; +#define CLS_SHRINK_BIT (1) + +#define CLS_AT_LIMIT (1) + extern atomic_t ckrm_mem_real_count; -extern unsigned int ckrm_tot_lru_pages; -extern int ckrm_nr_mem_classes; -extern struct list_head ckrm_shrink_list; -extern struct list_head ckrm_memclass_list; -extern spinlock_t ckrm_mem_lock; extern struct ckrm_res_ctlr mem_rcbs; extern struct ckrm_mem_res *ckrm_mem_root_class; +extern struct list_head ckrm_memclass_list; +extern struct list_head ckrm_shrink_list; +extern spinlock_t ckrm_mem_lock; +extern int ckrm_nr_mem_classes; +extern unsigned int ckrm_tot_lru_pages; +extern int ckrm_mem_shrink_count; +extern int ckrm_mem_shrink_to; +extern int ckrm_mem_shrink_interval ; -#define page_ckrmzone(page) ((page)->ckrm_zone) - -#define CLS_SHRINK_BIT (1) - -// used in flags. set when a class is more than 90% of its maxlimit -#define MEM_AT_LIMIT 1 - -extern void ckrm_init_mm_to_task(struct mm_struct *, struct task_struct *); -extern void ckrm_mem_evaluate_mm(struct mm_struct *, struct ckrm_mem_res *); -extern void ckrm_at_limit(struct ckrm_mem_res *); -extern int ckrm_memclass_valid(struct ckrm_mem_res *); -extern int ckrm_mem_get_shrink_to(void); -extern void check_memclass(struct ckrm_mem_res *, char *); +extern void ckrm_mem_migrate_mm(struct mm_struct *, struct ckrm_mem_res *); +extern void ckrm_mem_migrate_all_pages(struct ckrm_mem_res *, + struct ckrm_mem_res *); extern void memclass_release(struct kref *); - +extern void shrink_get_victims(struct zone *, unsigned long , + unsigned long, struct list_head *); +extern void ckrm_shrink_atlimit(struct ckrm_mem_res *); #else -#define ckrm_init_mm_to_current(a) do {} while (0) -#define ckrm_mem_evaluate_mm(a) do {} while (0) -#define ckrm_init_mm_to_task(a,b) do {} while (0) +#define ckrm_mem_migrate_mm(a, b) do {} while (0) +#define ckrm_mem_migrate_all_pages(a, b) do {} while (0) -#endif // CONFIG_CKRM_RES_MEM +#endif /* CONFIG_CKRM_RES_MEM */ -#endif //_LINUX_CKRM_MEM_H +#endif /* _LINUX_CKRM_MEM_H */ diff --git a/include/linux/ckrm_mem_inline.h b/include/linux/ckrm_mem_inline.h index 1166956b7..fe752277b 100644 --- a/include/linux/ckrm_mem_inline.h +++ b/include/linux/ckrm_mem_inline.h @@ -26,8 +26,7 @@ #ifdef CONFIG_CKRM_RES_MEM -#define INACTIVE 0 -#define ACTIVE 1 +#define ckrm_shrink_list_empty() list_empty(&ckrm_shrink_list) static inline struct ckrm_mem_res * ckrm_get_mem_class(struct task_struct *tsk) @@ -36,8 +35,6 @@ ckrm_get_mem_class(struct task_struct *tsk) struct ckrm_mem_res); } -#define ckrm_shrink_list_empty() list_empty(&ckrm_shrink_list) - static inline void ckrm_set_shrink(struct ckrm_zone *cz) { @@ -56,6 +53,18 @@ ckrm_clear_shrink(struct ckrm_zone *cz) clear_bit(CLS_SHRINK_BIT, &cz->shrink_flag); } +static inline void +set_page_ckrmzone( struct page *page, struct ckrm_zone *cz) +{ + page->ckrm_zone = cz; +} + +static inline struct ckrm_zone * +page_ckrmzone(struct page *page) +{ + return page->ckrm_zone; +} + /* * Currently, a shared page that is shared by multiple classes is charged * to a class with max available guarantee. Simply replace this function @@ -67,7 +76,7 @@ ckrm_mem_share_compare(struct ckrm_mem_res *a, struct ckrm_mem_res *b) if (a == NULL) return -(b != NULL); if (b == NULL) - return 0; + return 1; if (a->pg_guar == b->pg_guar) return 0; if (a->pg_guar == CKRM_SHARE_DONTCARE) @@ -81,29 +90,30 @@ static inline void incr_use_count(struct ckrm_mem_res *cls, int borrow) { extern int ckrm_mem_shrink_at; - if (unlikely(!cls)) + struct ckrm_mem_res *parcls = ckrm_get_res_class(cls->parent, + mem_rcbs.resid, struct ckrm_mem_res); + + if (!cls) return; - BUG_ON(!ckrm_memclass_valid(cls)); - atomic_inc(&cls->pg_total); + atomic_inc(&cls->pg_total); if (borrow) cls->pg_lent++; - if ((cls->pg_guar == CKRM_SHARE_DONTCARE) || - (atomic_read(&cls->pg_total) > cls->pg_unused)) { - struct ckrm_mem_res *parcls = ckrm_get_res_class(cls->parent, + + parcls = ckrm_get_res_class(cls->parent, mem_rcbs.resid, struct ckrm_mem_res); - if (parcls) { - incr_use_count(parcls, 1); - cls->pg_borrowed++; - } - } else { + if (parcls && ((cls->pg_guar == CKRM_SHARE_DONTCARE) || + (atomic_read(&cls->pg_total) > cls->pg_unused))) { + incr_use_count(parcls, 1); + cls->pg_borrowed++; + } else atomic_inc(&ckrm_mem_real_count); - } - if (unlikely((cls->pg_limit != CKRM_SHARE_DONTCARE) && + + if ((cls->pg_limit != CKRM_SHARE_DONTCARE) && (atomic_read(&cls->pg_total) >= ((ckrm_mem_shrink_at * cls->pg_limit) / 100)) && - ((cls->flags & MEM_AT_LIMIT) != MEM_AT_LIMIT))) { - ckrm_at_limit(cls); + ((cls->flags & CLS_AT_LIMIT) != CLS_AT_LIMIT)) { + ckrm_shrink_atlimit(cls); } return; } @@ -111,9 +121,8 @@ incr_use_count(struct ckrm_mem_res *cls, int borrow) static inline void decr_use_count(struct ckrm_mem_res *cls, int borrowed) { - if (unlikely(!cls)) + if (!cls) return; - BUG_ON(!ckrm_memclass_valid(cls)); atomic_dec(&cls->pg_total); if (borrowed) cls->pg_lent--; @@ -132,64 +141,50 @@ decr_use_count(struct ckrm_mem_res *cls, int borrowed) static inline void ckrm_set_page_class(struct page *page, struct ckrm_mem_res *cls) { - if (unlikely(cls == NULL)) { - cls = ckrm_mem_root_class; - } - if (likely(cls != NULL)) { - struct ckrm_zone *czone = &cls->ckrm_zone[page_zonenum(page)]; - if (unlikely(page->ckrm_zone)) { - kref_put(&cls->nr_users, memclass_release); - } - page->ckrm_zone = czone; - kref_get(&cls->nr_users); - } else { - page->ckrm_zone = NULL; - } -} + struct ckrm_zone *new_czone, *old_czone; -static inline void -ckrm_set_pages_class(struct page *pages, int numpages, struct ckrm_mem_res *cls) -{ - int i; - for (i = 0; i < numpages; pages++, i++) { - ckrm_set_page_class(pages, cls); - } -} - -static inline void -ckrm_clear_page_class(struct page *page) -{ - if (likely(page->ckrm_zone != NULL)) { - if (CkrmAccount(page)) { - decr_use_count(page->ckrm_zone->memcls, 0); - ClearCkrmAccount(page); + if (!cls) { + if (!ckrm_mem_root_class) { + set_page_ckrmzone(page, NULL); + return; } - kref_put(&page->ckrm_zone->memcls->nr_users, memclass_release); - page->ckrm_zone = NULL; + cls = ckrm_mem_root_class; } + new_czone = &cls->ckrm_zone[page_zonenum(page)]; + old_czone = page_ckrmzone(page); + + if (old_czone) + kref_put(&old_czone->memcls->nr_users, memclass_release); + + set_page_ckrmzone(page, new_czone); + kref_get(&cls->nr_users); + incr_use_count(cls, 0); + SetPageCkrmAccount(page); } static inline void ckrm_change_page_class(struct page *page, struct ckrm_mem_res *newcls) { - struct ckrm_zone *old_czone = page->ckrm_zone, *new_czone; + struct ckrm_zone *old_czone = page_ckrmzone(page), *new_czone; struct ckrm_mem_res *oldcls; - if (unlikely(!old_czone || !newcls)) { - BUG_ON(CkrmAccount(page)); - return; + if (!newcls) { + if (!ckrm_mem_root_class) + return; + newcls = ckrm_mem_root_class; } - BUG_ON(!CkrmAccount(page)); oldcls = old_czone->memcls; - if (oldcls == NULL || (oldcls == newcls)) + if (oldcls == newcls) return; - kref_put(&oldcls->nr_users, memclass_release); - decr_use_count(oldcls, 0); - - page->ckrm_zone = new_czone = &newcls->ckrm_zone[page_zonenum(page)]; + if (oldcls) { + kref_put(&oldcls->nr_users, memclass_release); + decr_use_count(oldcls, 0); + } + new_czone = &newcls->ckrm_zone[page_zonenum(page)]; + set_page_ckrmzone(page, new_czone); kref_get(&newcls->nr_users); incr_use_count(newcls, 0); @@ -205,34 +200,45 @@ ckrm_change_page_class(struct page *page, struct ckrm_mem_res *newcls) } } +static inline void +ckrm_clear_page_class(struct page *page) +{ + struct ckrm_zone *czone = page_ckrmzone(page); + if (czone != NULL) { + if (PageCkrmAccount(page)) { + decr_use_count(czone->memcls, 0); + ClearPageCkrmAccount(page); + } + kref_put(&czone->memcls->nr_users, memclass_release); + set_page_ckrmzone(page, NULL); + } +} + static inline void ckrm_mem_inc_active(struct page *page) { - struct ckrm_mem_res *cls = ckrm_get_mem_class(current) ?: ckrm_mem_root_class; + struct ckrm_mem_res *cls = ckrm_get_mem_class(current) + ?: ckrm_mem_root_class; + struct ckrm_zone *czone; if (cls == NULL) return; - BUG_ON(CkrmAccount(page)); - BUG_ON(page->ckrm_zone != NULL); ckrm_set_page_class(page, cls); - incr_use_count(cls, 0); - SetCkrmAccount(page); - BUG_ON(page->ckrm_zone == NULL); - page->ckrm_zone->nr_active++; - list_add(&page->lru, &page->ckrm_zone->active_list); + czone = page_ckrmzone(page); + czone->nr_active++; + list_add(&page->lru, &czone->active_list); } static inline void ckrm_mem_dec_active(struct page *page) { - if (page->ckrm_zone == NULL) + struct ckrm_zone *czone = page_ckrmzone(page); + if (czone == NULL) return; - BUG_ON(page->ckrm_zone->memcls == NULL); - BUG_ON(!CkrmAccount(page)); list_del(&page->lru); - page->ckrm_zone->nr_active--; + czone->nr_active--; ckrm_clear_page_class(page); } @@ -240,39 +246,59 @@ ckrm_mem_dec_active(struct page *page) static inline void ckrm_mem_inc_inactive(struct page *page) { - struct ckrm_mem_res *cls = ckrm_get_mem_class(current) ?: ckrm_mem_root_class; + struct ckrm_mem_res *cls = ckrm_get_mem_class(current) + ?: ckrm_mem_root_class; + struct ckrm_zone *czone; if (cls == NULL) return; - BUG_ON(CkrmAccount(page)); - BUG_ON(page->ckrm_zone != NULL); ckrm_set_page_class(page, cls); - incr_use_count(cls, 0); - SetCkrmAccount(page); - BUG_ON(page->ckrm_zone == NULL); - page->ckrm_zone->nr_inactive++; - list_add(&page->lru, &page->ckrm_zone->inactive_list); + czone = page_ckrmzone(page); + czone->nr_inactive++; + list_add(&page->lru, &czone->inactive_list); } static inline void ckrm_mem_dec_inactive(struct page *page) { - if (page->ckrm_zone == NULL) + struct ckrm_zone *czone = page_ckrmzone(page); + if (czone == NULL) return; - BUG_ON(page->ckrm_zone->memcls == NULL); - BUG_ON(!CkrmAccount(page)); - page->ckrm_zone->nr_inactive--; + czone->nr_inactive--; list_del(&page->lru); ckrm_clear_page_class(page); } +static inline void +ckrm_zone_add_active(struct ckrm_zone *czone, int cnt) +{ + czone->nr_active += cnt; +} + +static inline void +ckrm_zone_add_inactive(struct ckrm_zone *czone, int cnt) +{ + czone->nr_inactive += cnt; +} + +static inline void +ckrm_zone_sub_active(struct ckrm_zone *czone, int cnt) +{ + czone->nr_active -= cnt; +} + +static inline void +ckrm_zone_sub_inactive(struct ckrm_zone *czone, int cnt) +{ + czone->nr_inactive -= cnt; +} + static inline int ckrm_class_limit_ok(struct ckrm_mem_res *cls) { int ret; - extern int ckrm_mem_fail_over; if ((mem_rcbs.resid == -1) || !cls) { return 1; @@ -281,19 +307,25 @@ ckrm_class_limit_ok(struct ckrm_mem_res *cls) struct ckrm_mem_res *parcls = ckrm_get_res_class(cls->parent, mem_rcbs.resid, struct ckrm_mem_res); ret = (parcls ? ckrm_class_limit_ok(parcls) : 0); - } else { - ret = (atomic_read(&cls->pg_total) <= - ((ckrm_mem_fail_over * cls->pg_limit) / 100)); - } + } else + ret = (atomic_read(&cls->pg_total) <= cls->pg_limit); + + /* If we are failing, just nudge the back end */ + if (ret == 0) + ckrm_shrink_atlimit(cls); - if (ret == 0) { - // if we are failing... just nudge the back end - ckrm_at_limit(cls); - } return ret; } -// task/mm initializations/cleanup +static inline void +ckrm_page_init(struct page *page) +{ + page->flags &= ~(1 << PG_ckrm_account); + set_page_ckrmzone(page, NULL); +} + + +/* task/mm initializations/cleanup */ static inline void ckrm_task_mm_init(struct task_struct *tsk) @@ -302,26 +334,42 @@ ckrm_task_mm_init(struct task_struct *tsk) } static inline void -ckrm_task_change_mm(struct task_struct *tsk, struct mm_struct *oldmm, struct mm_struct *newmm) +ckrm_task_mm_set(struct mm_struct * mm, struct task_struct *task) +{ + spin_lock(&mm->peertask_lock); + if (!list_empty(&task->mm_peers)) { + printk(KERN_ERR "MEM_RC: Task list NOT empty!! emptying...\n"); + list_del_init(&task->mm_peers); + } + list_add_tail(&task->mm_peers, &mm->tasklist); + spin_unlock(&mm->peertask_lock); + if (mm->memclass != ckrm_get_mem_class(task)) + ckrm_mem_migrate_mm(mm, NULL); + return; +} + +static inline void +ckrm_task_mm_change(struct task_struct *tsk, + struct mm_struct *oldmm, struct mm_struct *newmm) { if (oldmm) { spin_lock(&oldmm->peertask_lock); list_del(&tsk->mm_peers); - ckrm_mem_evaluate_mm(oldmm, NULL); + ckrm_mem_migrate_mm(oldmm, NULL); spin_unlock(&oldmm->peertask_lock); } spin_lock(&newmm->peertask_lock); list_add_tail(&tsk->mm_peers, &newmm->tasklist); - ckrm_mem_evaluate_mm(newmm, NULL); + ckrm_mem_migrate_mm(newmm, NULL); spin_unlock(&newmm->peertask_lock); } static inline void -ckrm_task_clear_mm(struct task_struct *tsk, struct mm_struct *mm) +ckrm_task_mm_clear(struct task_struct *tsk, struct mm_struct *mm) { spin_lock(&mm->peertask_lock); list_del_init(&tsk->mm_peers); - ckrm_mem_evaluate_mm(mm, NULL); + ckrm_mem_migrate_mm(mm, NULL); spin_unlock(&mm->peertask_lock); } @@ -348,56 +396,65 @@ ckrm_mm_clearclass(struct mm_struct *mm) } } -static inline void -ckrm_zone_inc_active(struct ckrm_zone *czone, int cnt) +static inline void ckrm_init_lists(struct zone *zone) {} + +static inline void ckrm_add_tail_inactive(struct page *page) { - czone->nr_active += cnt; + struct ckrm_zone *ckrm_zone = page_ckrmzone(page); + list_add_tail(&page->lru, &ckrm_zone->inactive_list); } -static inline void -ckrm_zone_inc_inactive(struct ckrm_zone *czone, int cnt) +#else + +#define ckrm_shrink_list_empty() (1) + +static inline void * +ckrm_get_memclass(struct task_struct *tsk) { - czone->nr_inactive += cnt; + return NULL; } -static inline void -ckrm_zone_dec_active(struct ckrm_zone *czone, int cnt) +static inline void ckrm_clear_page_class(struct page *p) {} + +static inline void ckrm_mem_inc_active(struct page *p) {} +static inline void ckrm_mem_dec_active(struct page *p) {} +static inline void ckrm_mem_inc_inactive(struct page *p) {} +static inline void ckrm_mem_dec_inactive(struct page *p) {} + +#define ckrm_zone_add_active(a, b) do {} while (0) +#define ckrm_zone_add_inactive(a, b) do {} while (0) +#define ckrm_zone_sub_active(a, b) do {} while (0) +#define ckrm_zone_sub_inactive(a, b) do {} while (0) + +#define ckrm_class_limit_ok(a) (1) + +static inline void ckrm_page_init(struct page *p) {} +static inline void ckrm_task_mm_init(struct task_struct *tsk) {} +static inline void ckrm_task_mm_set(struct mm_struct * mm, + struct task_struct *task) {} +static inline void ckrm_task_mm_change(struct task_struct *tsk, + struct mm_struct *oldmm, struct mm_struct *newmm) {} +static inline void ckrm_task_mm_clear(struct task_struct *tsk, + struct mm_struct *mm) {} + +static inline void ckrm_mm_init(struct mm_struct *mm) {} + +/* using #define instead of static inline as the prototype requires * + * data structures that is available only with the controller enabled */ +#define ckrm_mm_setclass(a, b) do {} while(0) + +static inline void ckrm_mm_clearclass(struct mm_struct *mm) {} + +static inline void ckrm_init_lists(struct zone *zone) { - czone->nr_active -= cnt; + INIT_LIST_HEAD(&zone->active_list); + INIT_LIST_HEAD(&zone->inactive_list); } -static inline void -ckrm_zone_dec_inactive(struct ckrm_zone *czone, int cnt) +static inline void ckrm_add_tail_inactive(struct page *page) { - czone->nr_inactive -= cnt; + struct zone *zone = page_zone(page); + list_add_tail(&page->lru, &zone->inactive_list); } - -#else // !CONFIG_CKRM_RES_MEM - -#define ckrm_set_page_class(a,b) do{}while(0) -#define ckrm_set_pages_class(a,b,c) do{}while(0) -#define ckrm_clear_page_class(a) do{}while(0) -#define ckrm_clear_pages_class(a,b) do{}while(0) -#define ckrm_change_page_class(a,b) do{}while(0) -#define ckrm_change_pages_class(a,b,c) do{}while(0) -#define ckrm_mem_inc_active(a) do{}while(0) -#define ckrm_mem_dec_active(a) do{}while(0) -#define ckrm_mem_inc_inactive(a) do{}while(0) -#define ckrm_mem_dec_inactive(a) do{}while(0) -#define ckrm_shrink_list_empty() (1) -#define ckrm_kick_page(a,b) (0) -#define ckrm_class_limit_ok(a) (1) -#define ckrm_task_mm_init(a) do{}while(0) -#define ckrm_task_clear_mm(a, b) do{}while(0) -#define ckrm_task_change_mm(a, b, c) do{}while(0) -#define ckrm_mm_init(a) do{}while(0) -#define ckrm_mm_setclass(a, b) do{}while(0) -#define ckrm_mm_clearclass(a) do{}while(0) -#define ckrm_zone_inc_active(a, b) do{}while(0) -#define ckrm_zone_inc_inactive(a, b) do{}while(0) -#define ckrm_zone_dec_active(a, b) do{}while(0) -#define ckrm_zone_dec_inactive(a, b) do{}while(0) - -#endif // CONFIG_CKRM_RES_MEM - -#endif // _LINUX_CKRM_MEM_INLINE_H_ +#endif +#endif /* _LINUX_CKRM_MEM_INLINE_H_ */ diff --git a/include/linux/ckrm_tsk.h b/include/linux/ckrm_tsk.h index f61453901..9ef07a2c5 100644 --- a/include/linux/ckrm_tsk.h +++ b/include/linux/ckrm_tsk.h @@ -13,23 +13,29 @@ * */ +/* Changes + * + * 31 Mar 2004 + * Created. + */ + #ifndef _LINUX_CKRM_TSK_H #define _LINUX_CKRM_TSK_H #ifdef CONFIG_CKRM_TYPE_TASKCLASS #include -typedef int (*get_ref_t) (struct ckrm_core_class *, int); -typedef void (*put_ref_t) (struct ckrm_core_class *); +typedef int (*get_ref_t) (void *, int); +typedef void (*put_ref_t) (void *); -extern int numtasks_get_ref(struct ckrm_core_class *, int); -extern void numtasks_put_ref(struct ckrm_core_class *); +extern int numtasks_get_ref(void *, int); +extern void numtasks_put_ref(void *); extern void ckrm_numtasks_register(get_ref_t, put_ref_t); #else /* CONFIG_CKRM_TYPE_TASKCLASS */ -#define numtasks_get_ref(core_class, ref) (1) -#define numtasks_put_ref(core_class) do {} while (0) +#define numtasks_get_ref(a, b) (1) +#define numtasks_put_ref(a) do {} while(0) #endif /* CONFIG_CKRM_TYPE_TASKCLASS */ #endif /* _LINUX_CKRM_RES_H */ diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index 12788c896..a9858024b 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -197,8 +197,8 @@ struct ext2_group_desc #define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ #ifdef CONFIG_VSERVER_LEGACY -#define EXT2_FL_USER_VISIBLE 0x0C03DFFF /* User visible flags */ -#define EXT2_FL_USER_MODIFIABLE 0x0C0380FF /* User modifiable flags */ +#define EXT2_FL_USER_VISIBLE 0x0803DFFF /* User visible flags */ +#define EXT2_FL_USER_MODIFIABLE 0x080380FF /* User modifiable flags */ #else #define EXT2_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT2_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 818516b81..f2d1cd9fa 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -190,8 +190,8 @@ struct ext3_group_desc #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ #ifdef CONFIG_VSERVER_LEGACY -#define EXT3_FL_USER_VISIBLE 0x0C03DFFF /* User visible flags */ -#define EXT3_FL_USER_MODIFIABLE 0x0C0380FF /* User modifiable flags */ +#define EXT3_FL_USER_VISIBLE 0x0803DFFF /* User visible flags */ +#define EXT3_FL_USER_MODIFIABLE 0x080380FF /* User modifiable flags */ #else #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ diff --git a/include/linux/mm.h b/include/linux/mm.h index d025bcbc6..447e46994 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -240,7 +240,7 @@ struct page { #endif /* WANT_PAGE_VIRTUAL */ #ifdef CONFIG_CKRM_RES_MEM struct ckrm_zone *ckrm_zone; -#endif // CONFIG_CKRM_RES_MEM +#endif }; /* diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c99f570b7..282141e43 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -75,10 +75,7 @@ #define PG_mappedtodisk 17 /* Has blocks allocated on-disk */ #define PG_reclaim 18 /* To be reclaimed asap */ -#ifdef CONFIG_CKRM_RES_MEM -#define PG_ckrm_account 19 /* This page is accounted by CKRM */ -#endif - +#define PG_ckrm_account 20 /* CKRM accounting */ /* * Global page accounting. One instance per CPU. Only unsigned longs are @@ -303,9 +300,9 @@ extern unsigned long __read_page_state(unsigned offset); #endif #ifdef CONFIG_CKRM_RES_MEM -#define CkrmAccount(page) test_bit(PG_ckrm_account, &(page)->flags) -#define SetCkrmAccount(page) set_bit(PG_ckrm_account, &(page)->flags) -#define ClearCkrmAccount(page) clear_bit(PG_ckrm_account, &(page)->flags) +#define PageCkrmAccount(page) test_bit(PG_ckrm_account, &(page)->flags) +#define SetPageCkrmAccount(page) set_bit(PG_ckrm_account, &(page)->flags) +#define ClearPageCkrmAccount(page) clear_bit(PG_ckrm_account, &(page)->flags) #endif struct page; /* forward declaration */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 9cb07d16b..74719a938 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -31,6 +31,7 @@ #include #include #include +#include struct exec_domain; extern int exec_shield; @@ -267,8 +268,8 @@ struct mm_struct { struct kioctx default_kioctx; #ifdef CONFIG_CKRM_RES_MEM struct ckrm_mem_res *memclass; - struct list_head tasklist; /* list of all tasks sharing this address space */ - spinlock_t peertask_lock; /* protect above tasklist */ + struct list_head tasklist; /* tasks sharing this address space */ + spinlock_t peertask_lock; /* protect tasklist above */ #endif }; @@ -718,25 +719,25 @@ struct task_struct { struct mempolicy *mempolicy; short il_next; /* could be shared with used_math */ #endif - #ifdef CONFIG_CKRM - spinlock_t ckrm_tsklock; + spinlock_t ckrm_tsklock; void *ce_data; #ifdef CONFIG_CKRM_TYPE_TASKCLASS - // .. Hubertus should change to CONFIG_CKRM_TYPE_TASKCLASS struct ckrm_task_class *taskclass; - struct list_head taskclass_link; + struct list_head taskclass_link; #ifdef CONFIG_CKRM_CPU_SCHEDULE struct ckrm_cpu_class *cpu_class; - //track cpu demand of this task + /* track cpu demand of this task */ struct ckrm_cpu_demand_stat demand_stat; -#endif //CONFIG_CKRM_CPU_SCHEDULE -#endif // CONFIG_CKRM_TYPE_TASKCLASS +#endif /* CONFIG_CKRM_CPU_SCHEDULE */ +#endif /* CONFIG_CKRM_TYPE_TASKCLASS */ #ifdef CONFIG_CKRM_RES_MEM - struct list_head mm_peers; // list of tasks using same mm_struct -#endif // CONFIG_CKRM_RES_MEM -#endif // CONFIG_CKRM - struct task_delay_info delays; + struct list_head mm_peers; /* list of tasks using same mm_struct */ +#endif +#endif /* CONFIG_CKRM */ +#ifdef CONFIG_DELAY_ACCT + struct task_delay_info delays; +#endif }; static inline pid_t process_group(struct task_struct *tsk) @@ -1303,6 +1304,86 @@ extern void normalize_rt_tasks(void); #endif +/* API for registering delay info */ +#ifdef CONFIG_DELAY_ACCT + +#define test_delay_flag(tsk,flg) ((tsk)->flags & (flg)) +#define set_delay_flag(tsk,flg) ((tsk)->flags |= (flg)) +#define clear_delay_flag(tsk,flg) ((tsk)->flags &= ~(flg)) + +#define def_delay_var(var) unsigned long long var +#define get_delay(tsk,field) ((tsk)->delays.field) + +#define start_delay(var) ((var) = sched_clock()) +#define start_delay_set(var,flg) (set_delay_flag(current,flg),(var) = sched_clock()) + +#define inc_delay(tsk,field) (((tsk)->delays.field)++) + +/* because of hardware timer drifts in SMPs and task continue on different cpu + * then where the start_ts was taken there is a possibility that + * end_ts < start_ts by some usecs. In this case we ignore the diff + * and add nothing to the total. + */ +#ifdef CONFIG_SMP +#define test_ts_integrity(start_ts,end_ts) (likely((end_ts) > (start_ts))) +#else +#define test_ts_integrity(start_ts,end_ts) (1) +#endif + +#define add_delay_ts(tsk,field,start_ts,end_ts) \ + do { if (test_ts_integrity(start_ts,end_ts)) (tsk)->delays.field += ((end_ts)-(start_ts)); } while (0) + +#define add_delay_clear(tsk,field,start_ts,flg) \ + do { \ + unsigned long long now = sched_clock();\ + add_delay_ts(tsk,field,start_ts,now); \ + clear_delay_flag(tsk,flg); \ + } while (0) + +static inline void add_io_delay(unsigned long long dstart) +{ + struct task_struct * tsk = current; + unsigned long long now = sched_clock(); + unsigned long long val; + + if (test_ts_integrity(dstart,now)) + val = now - dstart; + else + val = 0; + if (test_delay_flag(tsk,PF_MEMIO)) { + tsk->delays.mem_iowait_total += val; + tsk->delays.num_memwaits++; + } else { + tsk->delays.iowait_total += val; + tsk->delays.num_iowaits++; + } + clear_delay_flag(tsk,PF_IOWAIT); +} + +inline static void init_delays(struct task_struct *tsk) +{ + memset((void*)&tsk->delays,0,sizeof(tsk->delays)); +} + +#else + +#define test_delay_flag(tsk,flg) (0) +#define set_delay_flag(tsk,flg) do { } while (0) +#define clear_delay_flag(tsk,flg) do { } while (0) + +#define def_delay_var(var) +#define get_delay(tsk,field) (0) + +#define start_delay(var) do { } while (0) +#define start_delay_set(var,flg) do { } while (0) + +#define inc_delay(tsk,field) do { } while (0) +#define add_delay_ts(tsk,field,start_ts,now) do { } while (0) +#define add_delay_clear(tsk,field,start_ts,flg) do { } while (0) +#define add_io_delay(dstart) do { } while (0) +#define init_delays(tsk) do { } while (0) +#endif + #endif /* __KERNEL__ */ #endif diff --git a/include/linux/vserver/inode.h b/include/linux/vserver/inode.h index d9587f219..a1054e831 100644 --- a/include/linux/vserver/inode.h +++ b/include/linux/vserver/inode.h @@ -57,10 +57,6 @@ extern int vc_set_iattr_v0(uint32_t, void __user *); extern int vc_get_iattr(uint32_t, void __user *); extern int vc_set_iattr(uint32_t, void __user *); -extern int vc_iattr_ioctl(struct dentry *de, - unsigned int cmd, - unsigned long arg); - #endif /* __KERNEL__ */ /* inode ioctls */ @@ -68,9 +64,6 @@ extern int vc_iattr_ioctl(struct dentry *de, #define FIOC_GETXFLG _IOR('x', 5, long) #define FIOC_SETXFLG _IOW('x', 6, long) -#define FIOC_GETIATTR _IOR('x', 7, long) -#define FIOC_SETIATTR _IOR('x', 8, long) - #else /* _VX_INODE_H */ #warning duplicate inclusion #endif /* _VX_INODE_H */ diff --git a/init/Kconfig b/init/Kconfig index 509119525..b425cfb2c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -202,22 +202,11 @@ config CKRM_RES_NUMTASKS depends on CKRM_TYPE_TASKCLASS default m help - Provides a Resource Controller for CKRM that allows limiting number of + Provides a Resource Controller for CKRM that allows limiting no of tasks a task class can have. Say N if unsure, Y to use the feature. -config CKRM_RES_NUMTASKS_FORKRATE - tristate "Number of Tasks Resource Manager for Fork Rate" - depends on CKRM_RES_NUMTASKS - default y - help - Provides a Resource Controller for CKRM that allows limiting the rate - of tasks a task class can fork per hour. - - Say N if unsure, Y to use the feature. - - config CKRM_CPU_SCHEDULE bool "CKRM CPU scheduler" depends on CKRM_TYPE_TASKCLASS diff --git a/kernel/ckrm/Makefile b/kernel/ckrm/Makefile index 0c3c98036..7ee24fb07 100644 --- a/kernel/ckrm/Makefile +++ b/kernel/ckrm/Makefile @@ -11,5 +11,5 @@ obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o obj-$(CONFIG_CKRM_RES_NUMTASKS) += ckrm_numtasks.o obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_listenaq.o obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_cpu_class.o ckrm_cpu_monitor.o -obj-$(CONFIG_CKRM_RES_MEM) += ckrm_mem.o +obj-$(CONFIG_CKRM_RES_MEM) += ckrm_memcore.o ckrm_memctlr.o obj-$(CONFIG_CKRM_RES_NULL) += ckrm_null_class.o diff --git a/kernel/ckrm/ckrm_cpu_class.c b/kernel/ckrm/ckrm_cpu_class.c index 929c22d97..301ccbb89 100644 --- a/kernel/ckrm/ckrm_cpu_class.c +++ b/kernel/ckrm/ckrm_cpu_class.c @@ -145,8 +145,6 @@ static void ckrm_free_cpu_class(void *my_res) struct ckrm_cpu_class *cls = my_res, *parres, *childres; ckrm_core_class_t *child = NULL; int maxlimit; - ckrm_lrq_t* queue; - int i; if (!cls) return; @@ -154,15 +152,6 @@ static void ckrm_free_cpu_class(void *my_res) /*the default class can't be freed*/ if (cls == get_default_cpu_class()) return; -#if 1 -#warning "ACB: Remove freed class from any classqueues [PL #4233]" - for (i = 0 ; i < NR_CPUS ; i++) { - queue = &cls->local_queues[i]; - if (cls_in_classqueue(&queue->classqueue_linkobj)) - classqueue_dequeue(queue->classqueue, - &queue->classqueue_linkobj); - } -#endif // Assuming there will be no children when this function is called parres = ckrm_get_cpu_class(cls->parent); diff --git a/kernel/ckrm/ckrm_cpu_monitor.c b/kernel/ckrm/ckrm_cpu_monitor.c index 5f59b375e..23f48ec02 100644 --- a/kernel/ckrm/ckrm_cpu_monitor.c +++ b/kernel/ckrm/ckrm_cpu_monitor.c @@ -841,9 +841,8 @@ static void adjust_lrq_weight(struct ckrm_cpu_class *clsptr, int cpu_online) total_pressure += lrq->lrq_load; } -#define FIX_SHARES -#ifdef FIX_SHARES -#warning "ACB: fix share initialization problem [PL #4227]" +#if 1 +#warning "ACB taking out suspicious early return" #else if (! total_pressure) return; @@ -860,10 +859,6 @@ static void adjust_lrq_weight(struct ckrm_cpu_class *clsptr, int cpu_online) /*give idle class a high share to boost interactiveness */ lw = cpu_class_weight(clsptr); else { -#ifdef FIX_SHARES - if (! total_pressure) - return; -#endif lw = lrq->lrq_load * class_weight; do_div(lw,total_pressure); if (!lw) @@ -965,11 +960,9 @@ static int thread_exit = 0; static int ckrm_cpu_monitord(void *nothing) { daemonize("ckrm_cpu_ctrld"); - current->flags |= PF_NOFREEZE; - for (;;) { /*sleep for sometime before next try*/ - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(CPU_MONITOR_INTERVAL); ckrm_cpu_monitor(1); if (thread_exit) { diff --git a/kernel/ckrm/ckrm_mem.c b/kernel/ckrm/ckrm_mem.c deleted file mode 100644 index 736b579c7..000000000 --- a/kernel/ckrm/ckrm_mem.c +++ /dev/null @@ -1,981 +0,0 @@ -/* ckrm_mem.c - Memory Resource Manager for CKRM - * - * Copyright (C) Chandra Seetharaman, IBM Corp. 2004 - * - * Provides a Memory Resource controller for CKRM - * - * Latest version, more details at http://ckrm.sf.net - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define MEM_NAME "mem" - -#define CKRM_MEM_MAX_HIERARCHY 2 // allows only upto 2 levels - 0, 1 & 2 - -/* all 1-level memory_share_class are chained together */ -LIST_HEAD(ckrm_memclass_list); -LIST_HEAD(ckrm_shrink_list); -spinlock_t ckrm_mem_lock; // protects both lists above -unsigned int ckrm_tot_lru_pages; // total # of pages in the system - // currently doesn't handle memory add/remove -struct ckrm_mem_res *ckrm_mem_root_class; -atomic_t ckrm_mem_real_count = ATOMIC_INIT(0); -static void ckrm_mem_evaluate_all_pages(struct ckrm_mem_res *); -int ckrm_nr_mem_classes = 0; - -EXPORT_SYMBOL_GPL(ckrm_memclass_list); -EXPORT_SYMBOL_GPL(ckrm_shrink_list); -EXPORT_SYMBOL_GPL(ckrm_mem_lock); -EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages); -EXPORT_SYMBOL_GPL(ckrm_mem_root_class); -EXPORT_SYMBOL_GPL(ckrm_mem_real_count); -EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes); - -/* Initialize rescls values - * May be called on each rcfs unmount or as part of error recovery - * to make share values sane. - * Does not traverse hierarchy reinitializing children. - */ - -void -memclass_release(struct kref *kref) -{ - struct ckrm_mem_res *cls = container_of(kref, struct ckrm_mem_res, nr_users); - BUG_ON(ckrm_memclass_valid(cls)); - kfree(cls); -} -EXPORT_SYMBOL_GPL(memclass_release); - -static void -set_ckrm_tot_pages(void) -{ - struct zone *zone; - int tot_lru_pages = 0; - - for_each_zone(zone) { - tot_lru_pages += zone->nr_active; - tot_lru_pages += zone->nr_inactive; - tot_lru_pages += zone->free_pages; - } - ckrm_tot_lru_pages = tot_lru_pages; -} - -static void -mem_res_initcls_one(struct ckrm_mem_res *res) -{ - int zindex = 0; - struct zone *zone; - - memset(res, 0, sizeof(struct ckrm_mem_res)); - - res->shares.my_guarantee = CKRM_SHARE_DONTCARE; - res->shares.my_limit = CKRM_SHARE_DONTCARE; - res->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; - res->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT; - res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; - res->shares.cur_max_limit = 0; - - res->pg_guar = CKRM_SHARE_DONTCARE; - res->pg_limit = CKRM_SHARE_DONTCARE; - - INIT_LIST_HEAD(&res->shrink_list); - INIT_LIST_HEAD(&res->mcls_list); - - for_each_zone(zone) { - INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list); - INIT_LIST_HEAD(&res->ckrm_zone[zindex].inactive_list); - INIT_LIST_HEAD(&res->ckrm_zone[zindex].victim_list); - res->ckrm_zone[zindex].nr_active = 0; - res->ckrm_zone[zindex].nr_inactive = 0; - res->ckrm_zone[zindex].zone = zone; - res->ckrm_zone[zindex].memcls = res; - zindex++; - } - - res->pg_unused = 0; - res->nr_dontcare = 1; // for default class - kref_init(&res->nr_users); -} - -static void -set_impl_guar_children(struct ckrm_mem_res *parres) -{ - ckrm_core_class_t *child = NULL; - struct ckrm_mem_res *cres; - int nr_dontcare = 1; // for defaultclass - int guar, impl_guar; - int resid = mem_rcbs.resid; - - ckrm_lock_hier(parres->core); - while ((child = ckrm_get_next_child(parres->core, child)) != NULL) { - cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res); - // treat NULL cres as don't care as that child is just being - // created. - // FIXME: need a better way to handle this case. - if (!cres || cres->pg_guar == CKRM_SHARE_DONTCARE) { - nr_dontcare++; - } - } - - parres->nr_dontcare = nr_dontcare; - guar = (parres->pg_guar == CKRM_SHARE_DONTCARE) ? - parres->impl_guar : parres->pg_unused; - impl_guar = guar / parres->nr_dontcare; - - while ((child = ckrm_get_next_child(parres->core, child)) != NULL) { - cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res); - if (cres && cres->pg_guar == CKRM_SHARE_DONTCARE) { - cres->impl_guar = impl_guar; - set_impl_guar_children(cres); - } - } - ckrm_unlock_hier(parres->core); - -} - -void -check_memclass(struct ckrm_mem_res *res, char *str) -{ - int i, act = 0, inact = 0; - struct zone *zone; - struct ckrm_zone *ckrm_zone; - struct list_head *pos; - struct page *page; - -#if 0 - printk("Check<%s> %s: total=%d\n", - str, res->core->name, atomic_read(&res->pg_total)); -#endif - for (i = 0; i < MAX_NR_ZONES; i++) { - act = 0; inact = 0; - ckrm_zone = &res->ckrm_zone[i]; - zone = ckrm_zone->zone; - spin_lock_irq(&zone->lru_lock); - pos = ckrm_zone->inactive_list.next; - while (pos != &ckrm_zone->inactive_list) { - page = list_entry(pos, struct page, lru); - pos = pos->next; - inact++; - } - pos = ckrm_zone->active_list.next; - while (pos != &ckrm_zone->active_list) { - page = list_entry(pos, struct page, lru); - pos = pos->next; - act++; - } - spin_unlock_irq(&zone->lru_lock); -#if 0 - printk("Check<%s>(zone=%d): act %ld, inae %ld lact %d lina %d\n", - str, i, ckrm_zone->nr_active, ckrm_zone->nr_inactive, - act, inact); -#endif - } -} -EXPORT_SYMBOL_GPL(check_memclass); - -static void * -mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent) -{ - struct ckrm_mem_res *res, *pres; - - if (mem_rcbs.resid == -1) { - return NULL; - } - - pres = ckrm_get_res_class(parent, mem_rcbs.resid, struct ckrm_mem_res); - if (pres && (pres->hier == CKRM_MEM_MAX_HIERARCHY)) { - printk(KERN_ERR "MEM_RC: only allows hieararchy of %d\n", - CKRM_MEM_MAX_HIERARCHY); - return NULL; - } - - if (unlikely((parent == NULL) && (ckrm_mem_root_class != NULL))) { - printk(KERN_ERR "MEM_RC: Only one root class is allowed\n"); - return NULL; - } - - if (unlikely((parent != NULL) && (ckrm_mem_root_class == NULL))) { - printk(KERN_ERR "MEM_RC: child class with no root class!!"); - return NULL; - } - - res = kmalloc(sizeof(struct ckrm_mem_res), GFP_ATOMIC); - - if (res) { - mem_res_initcls_one(res); - res->core = core; - res->parent = parent; - spin_lock_irq(&ckrm_mem_lock); - list_add(&res->mcls_list, &ckrm_memclass_list); - spin_unlock_irq(&ckrm_mem_lock); - if (parent == NULL) { - // I am part of the root class. So, set the max to - // number of pages available - res->pg_guar = ckrm_tot_lru_pages; - res->pg_unused = ckrm_tot_lru_pages; - res->pg_limit = ckrm_tot_lru_pages; - res->hier = 0; - ckrm_mem_root_class = res; - } else { - int guar; - res->hier = pres->hier + 1; - set_impl_guar_children(pres); - guar = (pres->pg_guar == CKRM_SHARE_DONTCARE) ? - pres->impl_guar : pres->pg_unused; - res->impl_guar = guar / pres->nr_dontcare; - } - ckrm_nr_mem_classes++; - } - else - printk(KERN_ERR "MEM_RC: alloc: GFP_ATOMIC failed\n"); - return res; -} - -/* - * It is the caller's responsibility to make sure that the parent only - * has chilren that are to be accounted. i.e if a new child is added - * this function should be called after it has been added, and if a - * child is deleted this should be called after the child is removed. - */ -static void -child_maxlimit_changed_local(struct ckrm_mem_res *parres) -{ - int maxlimit = 0; - struct ckrm_mem_res *childres; - ckrm_core_class_t *child = NULL; - - // run thru parent's children and get the new max_limit of the parent - ckrm_lock_hier(parres->core); - while ((child = ckrm_get_next_child(parres->core, child)) != NULL) { - childres = ckrm_get_res_class(child, mem_rcbs.resid, - struct ckrm_mem_res); - if (maxlimit < childres->shares.my_limit) { - maxlimit = childres->shares.my_limit; - } - } - ckrm_unlock_hier(parres->core); - parres->shares.cur_max_limit = maxlimit; -} - -/* - * Recalculate the guarantee and limit in # of pages... and propagate the - * same to children. - * Caller is responsible for protecting res and for the integrity of parres - */ -static void -recalc_and_propagate(struct ckrm_mem_res * res, struct ckrm_mem_res * parres) -{ - ckrm_core_class_t *child = NULL; - struct ckrm_mem_res *cres; - int resid = mem_rcbs.resid; - struct ckrm_shares *self = &res->shares; - - if (parres) { - struct ckrm_shares *par = &parres->shares; - - // calculate pg_guar and pg_limit - // - if (parres->pg_guar == CKRM_SHARE_DONTCARE || - self->my_guarantee == CKRM_SHARE_DONTCARE) { - res->pg_guar = CKRM_SHARE_DONTCARE; - } else if (par->total_guarantee) { - u64 temp = (u64) self->my_guarantee * parres->pg_guar; - do_div(temp, par->total_guarantee); - res->pg_guar = (int) temp; - res->impl_guar = CKRM_SHARE_DONTCARE; - } else { - res->pg_guar = 0; - res->impl_guar = CKRM_SHARE_DONTCARE; - } - - if (parres->pg_limit == CKRM_SHARE_DONTCARE || - self->my_limit == CKRM_SHARE_DONTCARE) { - res->pg_limit = CKRM_SHARE_DONTCARE; - } else if (par->max_limit) { - u64 temp = (u64) self->my_limit * parres->pg_limit; - do_div(temp, par->max_limit); - res->pg_limit = (int) temp; - } else { - res->pg_limit = 0; - } - } - - // Calculate unused units - if (res->pg_guar == CKRM_SHARE_DONTCARE) { - res->pg_unused = CKRM_SHARE_DONTCARE; - } else if (self->total_guarantee) { - u64 temp = (u64) self->unused_guarantee * res->pg_guar; - do_div(temp, self->total_guarantee); - res->pg_unused = (int) temp; - } else { - res->pg_unused = 0; - } - - // propagate to children - ckrm_lock_hier(res->core); - while ((child = ckrm_get_next_child(res->core, child)) != NULL) { - cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res); - recalc_and_propagate(cres, res); - } - ckrm_unlock_hier(res->core); - return; -} - -static void -mem_res_free(void *my_res) -{ - struct ckrm_mem_res *res = my_res; - struct ckrm_mem_res *pres; - - if (!res) - return; - - ckrm_mem_evaluate_all_pages(res); - - pres = ckrm_get_res_class(res->parent, mem_rcbs.resid, - struct ckrm_mem_res); - - if (pres) { - child_guarantee_changed(&pres->shares, - res->shares.my_guarantee, 0); - child_maxlimit_changed_local(pres); - recalc_and_propagate(pres, NULL); - set_impl_guar_children(pres); - } - - res->shares.my_guarantee = 0; - res->shares.my_limit = 0; - res->pg_guar = 0; - res->pg_limit = 0; - res->pg_unused = 0; - - spin_lock_irq(&ckrm_mem_lock); - list_del_init(&res->mcls_list); - spin_unlock_irq(&ckrm_mem_lock); - - res->core = NULL; - res->parent = NULL; - kref_put(&res->nr_users, memclass_release); - ckrm_nr_mem_classes--; - return; -} - -static int -mem_set_share_values(void *my_res, struct ckrm_shares *shares) -{ - struct ckrm_mem_res *res = my_res; - struct ckrm_mem_res *parres; - int rc; - - if (!res) - return -EINVAL; - - parres = ckrm_get_res_class(res->parent, mem_rcbs.resid, - struct ckrm_mem_res); - - rc = set_shares(shares, &res->shares, parres ? &parres->shares : NULL); - - if ((rc == 0) && (parres != NULL)) { - child_maxlimit_changed_local(parres); - recalc_and_propagate(parres, NULL); - set_impl_guar_children(parres); - } - - return rc; -} - -static int -mem_get_share_values(void *my_res, struct ckrm_shares *shares) -{ - struct ckrm_mem_res *res = my_res; - - if (!res) - return -EINVAL; - *shares = res->shares; - return 0; -} - -static int -mem_get_stats(void *my_res, struct seq_file *sfile) -{ - struct ckrm_mem_res *res = my_res; - struct zone *zone; - int active = 0, inactive = 0, fr = 0; - - if (!res) - return -EINVAL; - - seq_printf(sfile, "--------- Memory Resource stats start ---------\n"); - if (res == ckrm_mem_root_class) { - int i = 0; - for_each_zone(zone) { - active += zone->nr_active; - inactive += zone->nr_inactive; - fr += zone->free_pages; - i++; - } - seq_printf(sfile,"System: tot_pages=%d,active=%d,inactive=%d" - ",free=%d\n", ckrm_tot_lru_pages, - active, inactive, fr); - } - seq_printf(sfile, "Number of pages used(including pages lent to" - " children): %d\n", atomic_read(&res->pg_total)); - seq_printf(sfile, "Number of pages guaranteed: %d\n", - res->pg_guar); - seq_printf(sfile, "Maximum limit of pages: %d\n", - res->pg_limit); - seq_printf(sfile, "Total number of pages available" - "(after serving guarantees to children): %d\n", - res->pg_unused); - seq_printf(sfile, "Number of pages lent to children: %d\n", - res->pg_lent); - seq_printf(sfile, "Number of pages borrowed from the parent: %d\n", - res->pg_borrowed); - seq_printf(sfile, "---------- Memory Resource stats end ----------\n"); - - return 0; -} - -static void -mem_change_resclass(void *tsk, void *old, void *new) -{ - struct mm_struct *mm; - struct task_struct *task = tsk, *t1; - struct ckrm_mem_res *prev_mmcls; - - if (!task->mm || (new == old) || (old == (void *) -1)) - return; - - mm = task->active_mm; - spin_lock(&mm->peertask_lock); - prev_mmcls = mm->memclass; - - if (new == NULL) { - list_del_init(&task->mm_peers); - } else { - int found = 0; - list_for_each_entry(t1, &mm->tasklist, mm_peers) { - if (t1 == task) { - found++; - break; - } - } - if (!found) { - list_del_init(&task->mm_peers); - list_add_tail(&task->mm_peers, &mm->tasklist); - } - } - - spin_unlock(&mm->peertask_lock); - ckrm_mem_evaluate_mm(mm, (struct ckrm_mem_res *) new); - return; -} - -#define MEM_FAIL_OVER "fail_over" -#define MEM_SHRINK_AT "shrink_at" -#define MEM_SHRINK_TO "shrink_to" -#define MEM_SHRINK_COUNT "num_shrinks" -#define MEM_SHRINK_INTERVAL "shrink_interval" - -int ckrm_mem_fail_over = 110; -int ckrm_mem_shrink_at = 90; -static int ckrm_mem_shrink_to = 80; -static int ckrm_mem_shrink_count = 10; -static int ckrm_mem_shrink_interval = 10; - -EXPORT_SYMBOL_GPL(ckrm_mem_fail_over); -EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at); - -static int -mem_show_config(void *my_res, struct seq_file *sfile) -{ - struct ckrm_mem_res *res = my_res; - - if (!res) - return -EINVAL; - - seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n", - MEM_NAME, - MEM_FAIL_OVER, ckrm_mem_fail_over, - MEM_SHRINK_AT, ckrm_mem_shrink_at, - MEM_SHRINK_TO, ckrm_mem_shrink_to, - MEM_SHRINK_COUNT, ckrm_mem_shrink_count, - MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval); - - return 0; -} - -// config file is available only at the root level, -// so assuming my_res to be the system level class -enum memclass_token { - mem_fail_over, - mem_shrink_at, - mem_shrink_to, - mem_shrink_count, - mem_shrink_interval, - mem_err -}; - -static match_table_t mem_tokens = { - {mem_fail_over, MEM_FAIL_OVER "=%d"}, - {mem_shrink_at, MEM_SHRINK_AT "=%d"}, - {mem_shrink_to, MEM_SHRINK_TO "=%d"}, - {mem_shrink_count, MEM_SHRINK_COUNT "=%d"}, - {mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"}, - {mem_err, NULL}, -}; - -static int -mem_set_config(void *my_res, const char *cfgstr) -{ - char *p; - struct ckrm_mem_res *res = my_res; - int err = 0, val; - - if (!res) - return -EINVAL; - - while ((p = strsep((char**)&cfgstr, ",")) != NULL) { - substring_t args[MAX_OPT_ARGS]; - int token; - if (!*p) - continue; - - token = match_token(p, mem_tokens, args); - switch (token) { - case mem_fail_over: - if (match_int(args, &val) || (val <= 0)) { - err = -EINVAL; - } else { - ckrm_mem_fail_over = val; - } - break; - case mem_shrink_at: - if (match_int(args, &val) || (val <= 0)) { - err = -EINVAL; - } else { - ckrm_mem_shrink_at = val; - } - break; - case mem_shrink_to: - if (match_int(args, &val) || (val < 0) || (val > 100)) { - err = -EINVAL; - } else { - ckrm_mem_shrink_to = val; - } - break; - case mem_shrink_count: - if (match_int(args, &val) || (val <= 0)) { - err = -EINVAL; - } else { - ckrm_mem_shrink_count = val; - } - break; - case mem_shrink_interval: - if (match_int(args, &val) || (val <= 0)) { - err = -EINVAL; - } else { - ckrm_mem_shrink_interval = val; - } - break; - default: - err = -EINVAL; - } - } - return err; -} - -static int -mem_reset_stats(void *my_res) -{ - struct ckrm_mem_res *res = my_res; - printk(KERN_INFO "MEM_RC: reset stats called for class %s\n", - res->core->name); - return 0; -} - -struct ckrm_res_ctlr mem_rcbs = { - .res_name = MEM_NAME, - .res_hdepth = CKRM_MEM_MAX_HIERARCHY, - .resid = -1, - .res_alloc = mem_res_alloc, - .res_free = mem_res_free, - .set_share_values = mem_set_share_values, - .get_share_values = mem_get_share_values, - .get_stats = mem_get_stats, - .change_resclass = mem_change_resclass, - .show_config = mem_show_config, - .set_config = mem_set_config, - .reset_stats = mem_reset_stats, -}; - -EXPORT_SYMBOL_GPL(mem_rcbs); - -int __init -init_ckrm_mem_res(void) -{ - struct ckrm_classtype *clstype; - int resid = mem_rcbs.resid; - - set_ckrm_tot_pages(); - spin_lock_init(&ckrm_mem_lock); - clstype = ckrm_find_classtype_by_name("taskclass"); - if (clstype == NULL) { - printk(KERN_INFO " Unknown ckrm classtype"); - return -ENOENT; - } - - if (resid == -1) { - resid = ckrm_register_res_ctlr(clstype, &mem_rcbs); - if (resid != -1) { - mem_rcbs.classtype = clstype; - } - } - return ((resid < 0) ? resid : 0); -} - -void __exit -exit_ckrm_mem_res(void) -{ - ckrm_unregister_res_ctlr(&mem_rcbs); - mem_rcbs.resid = -1; -} - -module_init(init_ckrm_mem_res) -module_exit(exit_ckrm_mem_res) - -int -ckrm_mem_get_shrink_to(void) -{ - return ckrm_mem_shrink_to; -} - -void -ckrm_at_limit(struct ckrm_mem_res *cls) -{ - struct zone *zone; - unsigned long now = jiffies; - - if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE) || - ((cls->flags & MEM_AT_LIMIT) == MEM_AT_LIMIT)) { - return; - } - if ((cls->last_shrink > now) /* jiffies wrapped around */ || - (cls->last_shrink + (ckrm_mem_shrink_interval * HZ)) < now) { - cls->last_shrink = now; - cls->shrink_count = 0; - } - cls->shrink_count++; - if (cls->shrink_count > ckrm_mem_shrink_count) { - return; - } - spin_lock_irq(&ckrm_mem_lock); - list_add(&cls->shrink_list, &ckrm_shrink_list); - spin_unlock_irq(&ckrm_mem_lock); - cls->flags |= MEM_AT_LIMIT; - for_each_zone(zone) { - wakeup_kswapd(zone); - break; // only once is enough - } -} - -static int -ckrm_mem_evaluate_page_anon(struct page* page) -{ - struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls; - struct ckrm_mem_res* maxshareclass = NULL; - struct anon_vma *anon_vma = (struct anon_vma *) page->mapping; - struct vm_area_struct *vma; - struct mm_struct* mm; - int ret = 0; - - spin_lock(&anon_vma->lock); - BUG_ON(list_empty(&anon_vma->head)); - list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { - mm = vma->vm_mm; - if (!maxshareclass || ckrm_mem_share_compare(maxshareclass, - mm->memclass) < 0) { - maxshareclass = mm->memclass; - } - } - spin_unlock(&anon_vma->lock); - - if (!maxshareclass) { - maxshareclass = ckrm_mem_root_class; - } - if (pgcls != maxshareclass) { - ckrm_change_page_class(page, maxshareclass); - ret = 1; - } - return ret; -} - -static int -ckrm_mem_evaluate_page_file(struct page* page) -{ - struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls; - struct ckrm_mem_res* maxshareclass = NULL; - struct address_space *mapping = page->mapping; - struct vm_area_struct *vma = NULL; - pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); - struct prio_tree_iter iter; - struct mm_struct* mm; - int ret = 0; - - if (!mapping) - return 0; - - if (!spin_trylock(&mapping->i_mmap_lock)) - return 0; - - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, - pgoff, pgoff) { - mm = vma->vm_mm; - if (!maxshareclass || ckrm_mem_share_compare(maxshareclass, - mm->memclass)<0) - maxshareclass = mm->memclass; - } - spin_unlock(&mapping->i_mmap_lock); - - if (!maxshareclass) { - maxshareclass = ckrm_mem_root_class; - } - if (pgcls != maxshareclass) { - ckrm_change_page_class(page, maxshareclass); - ret = 1; - } - return ret; -} - -static int -ckrm_mem_evaluate_page(struct page* page) -{ - int ret = 0; - BUG_ON(page->ckrm_zone == NULL); - if (page->mapping) { - if (PageAnon(page)) - ret = ckrm_mem_evaluate_page_anon(page); - else - ret = ckrm_mem_evaluate_page_file(page); - } - return ret; -} - -static void -ckrm_mem_evaluate_all_pages(struct ckrm_mem_res* res) -{ - struct page *page; - struct ckrm_zone *ckrm_zone; - struct zone *zone; - struct list_head *pos, *next; - int i; - - check_memclass(res, "bef_eval_all_pgs"); - for (i = 0; i < MAX_NR_ZONES; i++) { - ckrm_zone = &res->ckrm_zone[i]; - zone = ckrm_zone->zone; - spin_lock_irq(&zone->lru_lock); - pos = ckrm_zone->inactive_list.next; - while (pos != &ckrm_zone->inactive_list) { - next = pos->next; - page = list_entry(pos, struct page, lru); - if (!ckrm_mem_evaluate_page(page)) - ckrm_change_page_class(page, - ckrm_mem_root_class); - pos = next; - } - pos = ckrm_zone->active_list.next; - while (pos != &ckrm_zone->active_list) { - next = pos->next; - page = list_entry(pos, struct page, lru); - if (!ckrm_mem_evaluate_page(page)) - ckrm_change_page_class(page, - ckrm_mem_root_class); - pos = next; - } - spin_unlock_irq(&zone->lru_lock); - } - check_memclass(res, "aft_eval_all_pgs"); - return; -} - -static inline int -class_migrate_pmd(struct mm_struct* mm, struct vm_area_struct* vma, - pmd_t* pmdir, unsigned long address, unsigned long end) -{ - pte_t *pte; - unsigned long pmd_end; - - if (pmd_none(*pmdir)) - return 0; - BUG_ON(pmd_bad(*pmdir)); - - pmd_end = (address+PMD_SIZE)&PMD_MASK; - if (end>pmd_end) - end = pmd_end; - - do { - pte = pte_offset_map(pmdir,address); - if (pte_present(*pte)) { - struct page *page = pte_page(*pte); - BUG_ON(mm->memclass == NULL); - if (page->mapping && page->ckrm_zone) { - struct zone *zone = page->ckrm_zone->zone; - spin_lock_irq(&zone->lru_lock); - ckrm_change_page_class(page, mm->memclass); - spin_unlock_irq(&zone->lru_lock); - } - } - address += PAGE_SIZE; - pte_unmap(pte); - pte++; - } while(address && (addresspgd_end)) - end = pgd_end; - - do { - class_migrate_pmd(mm,vma,pmd,address,end); - address = (address+PMD_SIZE)&PMD_MASK; - pmd++; - } while (address && (addressvm_start; - end = vma->vm_end; - - pgdir = pgd_offset(vma->vm_mm, address); - do { - class_migrate_pgd(mm,vma,pgdir,address,end); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - pgdir++; - } while(address && (addresspeertask_lock hold */ -void -ckrm_mem_evaluate_mm(struct mm_struct* mm, struct ckrm_mem_res *def) -{ - struct task_struct *task; - struct ckrm_mem_res *maxshareclass = def; - struct vm_area_struct *vma; - - if (list_empty(&mm->tasklist)) { - /* We leave the mm->memclass untouched since we believe that one - * mm with no task associated will be deleted soon or attach - * with another task later. - */ - return; - } - - list_for_each_entry(task, &mm->tasklist, mm_peers) { - struct ckrm_mem_res* cls = ckrm_get_mem_class(task); - if (!cls) - continue; - if (!maxshareclass || - ckrm_mem_share_compare(maxshareclass,cls)<0 ) - maxshareclass = cls; - } - - if (maxshareclass && (mm->memclass != maxshareclass)) { - if (mm->memclass) { - kref_put(&mm->memclass->nr_users, memclass_release); - } - mm->memclass = maxshareclass; - kref_get(&maxshareclass->nr_users); - - /* Go through all VMA to migrate pages */ - down_read(&mm->mmap_sem); - vma = mm->mmap; - while(vma) { - class_migrate_vma(mm, vma); - vma = vma->vm_next; - } - up_read(&mm->mmap_sem); - } - return; -} - -void -ckrm_init_mm_to_task(struct mm_struct * mm, struct task_struct *task) -{ - spin_lock(&mm->peertask_lock); - if (!list_empty(&task->mm_peers)) { - printk(KERN_ERR "MEM_RC: Task list NOT empty!! emptying...\n"); - list_del_init(&task->mm_peers); - } - list_add_tail(&task->mm_peers, &mm->tasklist); - spin_unlock(&mm->peertask_lock); - if (mm->memclass != ckrm_get_mem_class(task)) - ckrm_mem_evaluate_mm(mm, NULL); - return; -} - -int -ckrm_memclass_valid(struct ckrm_mem_res *cls) -{ - struct ckrm_mem_res *tmp; - unsigned long flags; - - if (!cls || list_empty(&cls->mcls_list)) { - return 0; - } - spin_lock_irqsave(&ckrm_mem_lock, flags); - list_for_each_entry(tmp, &ckrm_memclass_list, mcls_list) { - if (tmp == cls) { - spin_unlock(&ckrm_mem_lock); - return 1; - } - } - spin_unlock_irqrestore(&ckrm_mem_lock, flags); - return 0; -} - -MODULE_LICENSE("GPL"); diff --git a/kernel/ckrm/ckrm_numtasks.c b/kernel/ckrm/ckrm_numtasks.c index c0583055d..21d8f9b17 100644 --- a/kernel/ckrm/ckrm_numtasks.c +++ b/kernel/ckrm/ckrm_numtasks.c @@ -11,8 +11,14 @@ * */ +/* Changes + * + * 31 Mar 2004: Created + * + */ + /* - * CKRM Resource controller for tracking number of tasks in a class. + * Code Description: TBD */ #include @@ -22,43 +28,59 @@ #include #include #include +#include #include #include #include -#define TOTAL_NUM_TASKS (131072) /* 128 K */ +#define DEF_TOTAL_NUM_TASKS (131072) // 128 K +#define DEF_FORKRATE (1000000) // 1 million tasks +#define DEF_FORKRATE_INTERVAL (3600) // per hour #define NUMTASKS_DEBUG #define NUMTASKS_NAME "numtasks" - -struct ckrm_numtasks { - struct ckrm_core_class *core; /* the core i am part of... */ - struct ckrm_core_class *parent; /* parent of the core above. */ +#define SYS_TOTAL_TASKS "sys_total_tasks" +#define FORKRATE "forkrate" +#define FORKRATE_INTERVAL "forkrate_interval" + +static int total_numtasks = DEF_TOTAL_NUM_TASKS; +static int total_cnt_alloc = 0; +static int forkrate = DEF_FORKRATE; +static int forkrate_interval = DEF_FORKRATE_INTERVAL; +static ckrm_core_class_t *root_core; + +typedef struct ckrm_numtasks { + struct ckrm_core_class *core; // the core i am part of... + struct ckrm_core_class *parent; // parent of the core above. struct ckrm_shares shares; - spinlock_t cnt_lock; /* always grab parent's lock before child's */ - int cnt_guarantee; /* num_tasks guarantee in local units */ - int cnt_unused; /* has to borrow if more than this is needed */ - int cnt_limit; /* no tasks over this limit. */ - atomic_t cnt_cur_alloc; /* current alloc from self */ - atomic_t cnt_borrowed; /* borrowed from the parent */ - - int over_guarantee; /* turn on/off when cur_alloc goes */ - /* over/under guarantee */ - - /* internally maintained statictics to compare with max numbers */ - int limit_failures; /* # failures as request was over the limit */ - int borrow_sucesses; /* # successful borrows */ - int borrow_failures; /* # borrow failures */ - - /* Maximum the specific statictics has reached. */ + spinlock_t cnt_lock; // always grab parent's lock before child's + int cnt_guarantee; // num_tasks guarantee in local units + int cnt_unused; // has to borrow if more than this is needed + int cnt_limit; // no tasks over this limit. + atomic_t cnt_cur_alloc; // current alloc from self + atomic_t cnt_borrowed; // borrowed from the parent + + int over_guarantee; // turn on/off when cur_alloc goes + // over/under guarantee + + // internally maintained statictics to compare with max numbers + int limit_failures; // # failures as request was over the limit + int borrow_sucesses; // # successful borrows + int borrow_failures; // # borrow failures + + // Maximum the specific statictics has reached. int max_limit_failures; int max_borrow_sucesses; int max_borrow_failures; - /* Total number of specific statistics */ + // Total number of specific statistics int tot_limit_failures; int tot_borrow_sucesses; int tot_borrow_failures; -}; + + // fork rate fields + int forks_in_period; + unsigned long period_start; +} ckrm_numtasks_t; struct ckrm_res_ctlr numtasks_rcbs; @@ -67,7 +89,7 @@ struct ckrm_res_ctlr numtasks_rcbs; * to make share values sane. * Does not traverse hierarchy reinitializing children. */ -static void numtasks_res_initcls_one(struct ckrm_numtasks * res) +static void numtasks_res_initcls_one(ckrm_numtasks_t * res) { res->shares.my_guarantee = CKRM_SHARE_DONTCARE; res->shares.my_limit = CKRM_SHARE_DONTCARE; @@ -94,23 +116,58 @@ static void numtasks_res_initcls_one(struct ckrm_numtasks * res) res->tot_borrow_sucesses = 0; res->tot_borrow_failures = 0; + res->forks_in_period = 0; + res->period_start = jiffies; + atomic_set(&res->cnt_cur_alloc, 0); atomic_set(&res->cnt_borrowed, 0); return; } -static int numtasks_get_ref_local(struct ckrm_core_class *core, int force) +#if 0 +static void numtasks_res_initcls(void *my_res) { - int rc, resid = numtasks_rcbs.resid; - struct ckrm_numtasks *res; + ckrm_numtasks_t *res = my_res; + + /* Write a version which propagates values all the way down + and replace rcbs callback with that version */ + +} +#endif + +static int numtasks_get_ref_local(void *arg, int force) +{ + int rc, resid = numtasks_rcbs.resid, borrowed = 0; + unsigned long now = jiffies, chg_at; + ckrm_numtasks_t *res; + ckrm_core_class_t *core = arg; if ((resid < 0) || (core == NULL)) return 1; - res = ckrm_get_res_class(core, resid, struct ckrm_numtasks); + res = ckrm_get_res_class(core, resid, ckrm_numtasks_t); if (res == NULL) return 1; + // force is not associated with fork. So, if force is specified + // we don't have to bother about forkrate. + if (!force) { + // Take care of wraparound situation + chg_at = res->period_start + forkrate_interval * HZ; + if (chg_at < res->period_start) { + chg_at += forkrate_interval * HZ; + now += forkrate_interval * HZ; + } + if (chg_at <= now) { + res->period_start = now; + res->forks_in_period = 0; + } + + if (res->forks_in_period >= forkrate) { + return 0; + } + } + atomic_inc(&res->cnt_cur_alloc); rc = 1; @@ -129,76 +186,91 @@ static int numtasks_get_ref_local(struct ckrm_core_class *core, int force) res->borrow_sucesses++; res->tot_borrow_sucesses++; res->over_guarantee = 1; + borrowed++; } else { res->borrow_failures++; res->tot_borrow_failures++; } - } else + } else { rc = force; + } } else if (res->over_guarantee) { res->over_guarantee = 0; - if (res->max_limit_failures < res->limit_failures) + if (res->max_limit_failures < res->limit_failures) { res->max_limit_failures = res->limit_failures; - if (res->max_borrow_sucesses < res->borrow_sucesses) + } + if (res->max_borrow_sucesses < res->borrow_sucesses) { res->max_borrow_sucesses = res->borrow_sucesses; - if (res->max_borrow_failures < res->borrow_failures) + } + if (res->max_borrow_failures < res->borrow_failures) { res->max_borrow_failures = res->borrow_failures; + } res->limit_failures = 0; res->borrow_sucesses = 0; res->borrow_failures = 0; } - if (!rc) + if (!rc) { atomic_dec(&res->cnt_cur_alloc); + } else if (!borrowed) { + total_cnt_alloc++; + if (!force) { // force is not associated with a real fork. + res->forks_in_period++; + } + } return rc; } -static void numtasks_put_ref_local(struct ckrm_core_class *core) +static void numtasks_put_ref_local(void *arg) { int resid = numtasks_rcbs.resid; - struct ckrm_numtasks *res; + ckrm_numtasks_t *res; + ckrm_core_class_t *core = arg; - if ((resid == -1) || (core == NULL)) + if ((resid == -1) || (core == NULL)) { return; + } - res = ckrm_get_res_class(core, resid, struct ckrm_numtasks); + res = ckrm_get_res_class(core, resid, ckrm_numtasks_t); if (res == NULL) return; - - if (atomic_read(&res->cnt_cur_alloc)==0) + if (unlikely(atomic_read(&res->cnt_cur_alloc) == 0)) { + printk(KERN_WARNING "numtasks_put_ref: Trying to decrement " + "counter below 0\n"); return; - + } atomic_dec(&res->cnt_cur_alloc); - if (atomic_read(&res->cnt_borrowed) > 0) { atomic_dec(&res->cnt_borrowed); numtasks_put_ref_local(res->parent); + } else { + total_cnt_alloc--; } + return; } static void *numtasks_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent) { - struct ckrm_numtasks *res; + ckrm_numtasks_t *res; - res = kmalloc(sizeof(struct ckrm_numtasks), GFP_ATOMIC); + res = kmalloc(sizeof(ckrm_numtasks_t), GFP_ATOMIC); if (res) { - memset(res, 0, sizeof(struct ckrm_numtasks)); + memset(res, 0, sizeof(ckrm_numtasks_t)); res->core = core; res->parent = parent; numtasks_res_initcls_one(res); res->cnt_lock = SPIN_LOCK_UNLOCKED; if (parent == NULL) { - /* - * I am part of root class. So set the max tasks - * to available default. - */ - res->cnt_guarantee = TOTAL_NUM_TASKS; - res->cnt_unused = TOTAL_NUM_TASKS; - res->cnt_limit = TOTAL_NUM_TASKS; + // I am part of root class. So set the max tasks + // to available default + res->cnt_guarantee = total_numtasks; + res->cnt_unused = total_numtasks; + res->cnt_limit = total_numtasks; + root_core = core; // store the root core. } try_module_get(THIS_MODULE); } else { @@ -214,36 +286,47 @@ static void *numtasks_res_alloc(struct ckrm_core_class *core, */ static void numtasks_res_free(void *my_res) { - struct ckrm_numtasks *res = my_res, *parres, *childres; - struct ckrm_core_class *child = NULL; + ckrm_numtasks_t *res = my_res, *parres, *childres; + ckrm_core_class_t *child = NULL; int i, borrowed, maxlimit, resid = numtasks_rcbs.resid; if (!res) return; - /* Assuming there will be no children when this function is called */ + // Assuming there will be no children when this function is called - parres = ckrm_get_res_class(res->parent, resid, struct ckrm_numtasks); + parres = ckrm_get_res_class(res->parent, resid, ckrm_numtasks_t); - if ((borrowed = atomic_read(&res->cnt_borrowed)) > 0) - for (i = 0; i < borrowed; i++) - numtasks_put_ref_local(parres->core); - - /* return child's limit/guarantee to parent node */ + if (unlikely(atomic_read(&res->cnt_cur_alloc) < 0)) { + printk(KERN_WARNING "numtasks_res: counter below 0\n"); + } + if (unlikely(atomic_read(&res->cnt_cur_alloc) > 0 || + atomic_read(&res->cnt_borrowed) > 0)) { + printk(KERN_WARNING "numtasks_res_free: resource still " + "alloc'd %p\n", res); + if ((borrowed = atomic_read(&res->cnt_borrowed)) > 0) { + for (i = 0; i < borrowed; i++) { + numtasks_put_ref_local(parres->core); + } + } + } + // return child's limit/guarantee to parent node spin_lock(&parres->cnt_lock); child_guarantee_changed(&parres->shares, res->shares.my_guarantee, 0); - /* run thru parent's children and get the new max_limit of the parent */ + // run thru parent's children and get the new max_limit of the parent ckrm_lock_hier(parres->core); maxlimit = 0; while ((child = ckrm_get_next_child(parres->core, child)) != NULL) { - childres = ckrm_get_res_class(child, resid, struct ckrm_numtasks); - if (maxlimit < childres->shares.my_limit) + childres = ckrm_get_res_class(child, resid, ckrm_numtasks_t); + if (maxlimit < childres->shares.my_limit) { maxlimit = childres->shares.my_limit; + } } ckrm_unlock_hier(parres->core); - if (parres->shares.cur_max_limit < maxlimit) + if (parres->shares.cur_max_limit < maxlimit) { parres->shares.cur_max_limit = maxlimit; + } spin_unlock(&parres->cnt_lock); kfree(res); @@ -251,63 +334,67 @@ static void numtasks_res_free(void *my_res) return; } + /* * Recalculate the guarantee and limit in real units... and propagate the * same to children. * Caller is responsible for protecting res and for the integrity of parres */ static void -recalc_and_propagate(struct ckrm_numtasks * res, struct ckrm_numtasks * parres) +recalc_and_propagate(ckrm_numtasks_t * res, ckrm_numtasks_t * parres) { - struct ckrm_core_class *child = NULL; - struct ckrm_numtasks *childres; + ckrm_core_class_t *child = NULL; + ckrm_numtasks_t *childres; int resid = numtasks_rcbs.resid; if (parres) { struct ckrm_shares *par = &parres->shares; struct ckrm_shares *self = &res->shares; - /* calculate cnt_guarantee and cnt_limit */ - if ((parres->cnt_guarantee == CKRM_SHARE_DONTCARE) || - (self->my_guarantee == CKRM_SHARE_DONTCARE)) + // calculate cnt_guarantee and cnt_limit + // + if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) { res->cnt_guarantee = CKRM_SHARE_DONTCARE; - else if (par->total_guarantee) { + } else if (par->total_guarantee) { u64 temp = (u64) self->my_guarantee * parres->cnt_guarantee; do_div(temp, par->total_guarantee); res->cnt_guarantee = (int) temp; - } else + } else { res->cnt_guarantee = 0; + } - if ((parres->cnt_limit == CKRM_SHARE_DONTCARE) || - (self->my_limit == CKRM_SHARE_DONTCARE)) + if (parres->cnt_limit == CKRM_SHARE_DONTCARE) { res->cnt_limit = CKRM_SHARE_DONTCARE; - else if (par->max_limit) { + } else if (par->max_limit) { u64 temp = (u64) self->my_limit * parres->cnt_limit; do_div(temp, par->max_limit); res->cnt_limit = (int) temp; - } else + } else { res->cnt_limit = 0; + } - /* Calculate unused units */ - if ((res->cnt_guarantee == CKRM_SHARE_DONTCARE) || - (self->my_guarantee == CKRM_SHARE_DONTCARE)) + // Calculate unused units + if (res->cnt_guarantee == CKRM_SHARE_DONTCARE) { res->cnt_unused = CKRM_SHARE_DONTCARE; - else if (self->total_guarantee) { + } else if (self->total_guarantee) { u64 temp = (u64) self->unused_guarantee * res->cnt_guarantee; do_div(temp, self->total_guarantee); res->cnt_unused = (int) temp; - } else + } else { res->cnt_unused = 0; + } } - - /* propagate to children */ + // propagate to children ckrm_lock_hier(res->core); while ((child = ckrm_get_next_child(res->core, child)) != NULL) { - childres = ckrm_get_res_class(child, resid, struct ckrm_numtasks); - - spin_lock(&childres->cnt_lock); - recalc_and_propagate(childres, res); - spin_unlock(&childres->cnt_lock); + childres = ckrm_get_res_class(child, resid, ckrm_numtasks_t); + if (childres) { + spin_lock(&childres->cnt_lock); + recalc_and_propagate(childres, res); + spin_unlock(&childres->cnt_lock); + } else { + printk(KERN_ERR "%s: numtasks resclass missing\n",__FUNCTION__); + } } ckrm_unlock_hier(res->core); return; @@ -315,7 +402,7 @@ recalc_and_propagate(struct ckrm_numtasks * res, struct ckrm_numtasks * parres) static int numtasks_set_share_values(void *my_res, struct ckrm_shares *new) { - struct ckrm_numtasks *parres, *res = my_res; + ckrm_numtasks_t *parres, *res = my_res; struct ckrm_shares *cur = &res->shares, *par; int rc = -EINVAL, resid = numtasks_rcbs.resid; @@ -324,7 +411,7 @@ static int numtasks_set_share_values(void *my_res, struct ckrm_shares *new) if (res->parent) { parres = - ckrm_get_res_class(res->parent, resid, struct ckrm_numtasks); + ckrm_get_res_class(res->parent, resid, ckrm_numtasks_t); spin_lock(&parres->cnt_lock); spin_lock(&res->cnt_lock); par = &parres->shares; @@ -337,26 +424,28 @@ static int numtasks_set_share_values(void *my_res, struct ckrm_shares *new) rc = set_shares(new, cur, par); if ((rc == 0) && parres) { - /* Calculate parent's unused units */ - if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) + // Calculate parent's unused units + if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) { parres->cnt_unused = CKRM_SHARE_DONTCARE; - else if (par->total_guarantee) { + } else if (par->total_guarantee) { u64 temp = (u64) par->unused_guarantee * parres->cnt_guarantee; do_div(temp, par->total_guarantee); parres->cnt_unused = (int) temp; - } else + } else { parres->cnt_unused = 0; + } recalc_and_propagate(res, parres); } spin_unlock(&res->cnt_lock); - if (res->parent) + if (res->parent) { spin_unlock(&parres->cnt_lock); + } return rc; } static int numtasks_get_share_values(void *my_res, struct ckrm_shares *shares) { - struct ckrm_numtasks *res = my_res; + ckrm_numtasks_t *res = my_res; if (!res) return -EINVAL; @@ -366,12 +455,12 @@ static int numtasks_get_share_values(void *my_res, struct ckrm_shares *shares) static int numtasks_get_stats(void *my_res, struct seq_file *sfile) { - struct ckrm_numtasks *res = my_res; + ckrm_numtasks_t *res = my_res; if (!res) return -EINVAL; - seq_printf(sfile, "---------Number of tasks stats start---------\n"); + seq_printf(sfile, "Number of tasks resource:\n"); seq_printf(sfile, "Total Over limit failures: %d\n", res->tot_limit_failures); seq_printf(sfile, "Total Over guarantee sucesses: %d\n", @@ -385,7 +474,6 @@ static int numtasks_get_stats(void *my_res, struct seq_file *sfile) res->max_borrow_sucesses); seq_printf(sfile, "Maximum Over guarantee failures: %d\n", res->max_borrow_failures); - seq_printf(sfile, "---------Number of tasks stats end---------\n"); #ifdef NUMTASKS_DEBUG seq_printf(sfile, "cur_alloc %d; borrowed %d; cnt_guar %d; cnt_limit %d " @@ -402,29 +490,114 @@ static int numtasks_get_stats(void *my_res, struct seq_file *sfile) static int numtasks_show_config(void *my_res, struct seq_file *sfile) { - struct ckrm_numtasks *res = my_res; + ckrm_numtasks_t *res = my_res; if (!res) return -EINVAL; - seq_printf(sfile, "res=%s,parameter=somevalue\n", NUMTASKS_NAME); + seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d\n", NUMTASKS_NAME, + SYS_TOTAL_TASKS, total_numtasks, + FORKRATE, forkrate, + FORKRATE_INTERVAL, forkrate_interval); return 0; } +enum numtasks_token_t { + numtasks_token_total, + numtasks_token_forkrate, + numtasks_token_interval, + numtasks_token_err +}; + +static match_table_t numtasks_tokens = { + {numtasks_token_total, SYS_TOTAL_TASKS "=%d"}, + {numtasks_token_forkrate, FORKRATE "=%d"}, + {numtasks_token_interval, FORKRATE_INTERVAL "=%d"}, + {numtasks_token_err, NULL}, +}; + +static void reset_forkrates(ckrm_core_class_t *parent, unsigned long now) +{ + ckrm_numtasks_t *parres; + ckrm_core_class_t *child = NULL; + + parres = ckrm_get_res_class(parent, numtasks_rcbs.resid, + ckrm_numtasks_t); + if (!parres) { + return; + } + parres->forks_in_period = 0; + parres->period_start = now; + + ckrm_lock_hier(parent); + while ((child = ckrm_get_next_child(parent, child)) != NULL) { + reset_forkrates(child, now); + } + ckrm_unlock_hier(parent); +} + static int numtasks_set_config(void *my_res, const char *cfgstr) { - struct ckrm_numtasks *res = my_res; + char *p; + ckrm_numtasks_t *res = my_res; + int new_total, fr = 0, itvl = 0, err = 0; if (!res) return -EINVAL; - printk("numtasks config='%s'\n", cfgstr); - return 0; + + while ((p = strsep((char**)&cfgstr, ",")) != NULL) { + substring_t args[MAX_OPT_ARGS]; + int token; + if (!*p) + continue; + + token = match_token(p, numtasks_tokens, args); + switch (token) { + case numtasks_token_total: + if (match_int(args, &new_total) || + (new_total < total_cnt_alloc)) { + err = -EINVAL; + } else { + total_numtasks = new_total; + + // res is the default class, as config is present only + // in that directory + spin_lock(&res->cnt_lock); + res->cnt_guarantee = total_numtasks; + res->cnt_unused = total_numtasks; + res->cnt_limit = total_numtasks; + recalc_and_propagate(res, NULL); + spin_unlock(&res->cnt_lock); + } + break; + case numtasks_token_forkrate: + if (match_int(args, &fr) || (fr <= 0)) { + err = -EINVAL; + } else { + forkrate = fr; + } + break; + case numtasks_token_interval: + if (match_int(args, &itvl) || (itvl <= 0)) { + err = -EINVAL; + } else { + forkrate_interval = itvl; + } + break; + default: + err = -EINVAL; + } + } + if ((fr > 0) || (itvl > 0)) { + reset_forkrates(root_core, jiffies); + } + return err; } static void numtasks_change_resclass(void *task, void *old, void *new) { - struct ckrm_numtasks *oldres = old; - struct ckrm_numtasks *newres = new; + ckrm_numtasks_t *oldres = old; + ckrm_numtasks_t *newres = new; if (oldres != (void *)-1) { struct task_struct *tsk = task; @@ -433,13 +606,13 @@ static void numtasks_change_resclass(void *task, void *old, void *new) &(tsk->parent->taskclass->core); oldres = ckrm_get_res_class(old_core, numtasks_rcbs.resid, - struct ckrm_numtasks); + ckrm_numtasks_t); } - if (oldres) - numtasks_put_ref_local(oldres->core); + numtasks_put_ref_local(oldres->core); } - if (newres) + if (newres) { (void)numtasks_get_ref_local(newres->core, 1); + } } struct ckrm_res_ctlr numtasks_rcbs = { @@ -469,7 +642,7 @@ int __init init_ckrm_numtasks_res(void) if (resid == -1) { resid = ckrm_register_res_ctlr(clstype, &numtasks_rcbs); - printk("........init_ckrm_numtasks_res -> %d\n", resid); + printk(KERN_DEBUG "........init_ckrm_numtasks_res -> %d\n", resid); if (resid != -1) { ckrm_numtasks_register(numtasks_get_ref_local, numtasks_put_ref_local); @@ -481,13 +654,14 @@ int __init init_ckrm_numtasks_res(void) void __exit exit_ckrm_numtasks_res(void) { - if (numtasks_rcbs.resid != -1) + if (numtasks_rcbs.resid != -1) { ckrm_numtasks_register(NULL, NULL); + } ckrm_unregister_res_ctlr(&numtasks_rcbs); numtasks_rcbs.resid = -1; } module_init(init_ckrm_numtasks_res) -module_exit(exit_ckrm_numtasks_res) + module_exit(exit_ckrm_numtasks_res) -MODULE_LICENSE("GPL"); + MODULE_LICENSE("GPL"); diff --git a/kernel/ckrm/ckrm_numtasks_stub.c b/kernel/ckrm/ckrm_numtasks_stub.c index d9f15c98b..179e6b5d6 100644 --- a/kernel/ckrm/ckrm_numtasks_stub.c +++ b/kernel/ckrm/ckrm_numtasks_stub.c @@ -11,6 +11,12 @@ * */ +/* Changes + * + * 16 May 2004: Created + * + */ + #include #include #include @@ -28,7 +34,7 @@ void ckrm_numtasks_register(get_ref_t gr, put_ref_t pr) spin_unlock(&stub_lock); } -int numtasks_get_ref(struct ckrm_core_class *arg, int force) +int numtasks_get_ref(void *arg, int force) { int ret = 1; spin_lock(&stub_lock); @@ -39,7 +45,7 @@ int numtasks_get_ref(struct ckrm_core_class *arg, int force) return ret; } -void numtasks_put_ref(struct ckrm_core_class *arg) +void numtasks_put_ref(void *arg) { spin_lock(&stub_lock); if (real_put_ref) { diff --git a/kernel/exit.c b/kernel/exit.c index 8ca3c1711..0d55d3842 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -514,7 +514,7 @@ static inline void __exit_mm(struct task_struct * tsk) task_lock(tsk); tsk->mm = NULL; up_read(&mm->mmap_sem); - ckrm_task_clear_mm(tsk, mm); + ckrm_task_mm_clear(tsk, mm); enter_lazy_tlb(mm, current); task_unlock(tsk); mmput(mm); diff --git a/kernel/fork.c b/kernel/fork.c index 1902e9d2e..20e10311f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -309,7 +310,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm) mm->ioctx_list = NULL; mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); mm->free_area_cache = TASK_UNMAPPED_BASE; - ckrm_mm_init(mm); + ckrm_mm_init(mm); if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; @@ -489,7 +490,8 @@ good_mm: ckrm_mm_setclass(mm, oldmm->memclass); tsk->mm = mm; tsk->active_mm = mm; - ckrm_init_mm_to_task(mm, tsk); + ckrm_mm_setclass(mm, oldmm->memclass); + ckrm_task_mm_set(mm, tsk); return 0; free_pt: diff --git a/kernel/vserver/inode.c b/kernel/vserver/inode.c index ca16e0cd4..8fdd30c62 100644 --- a/kernel/vserver/inode.c +++ b/kernel/vserver/inode.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -189,37 +188,6 @@ int vc_set_iattr(uint32_t id, void __user *data) return ret; } -int vc_iattr_ioctl(struct dentry *de, unsigned int cmd, unsigned long arg) -{ - void __user *data = (void __user *)arg; - struct vcmd_ctx_iattr_v1 vc_data; - int ret; - - /* - * I don't think we need any dget/dput pairs in here as long as - * this function is always called from sys_ioctl i.e., de is - * a field of a struct file that is guaranteed not to be freed. - */ - if (cmd == FIOC_SETIATTR) { - if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - if (copy_from_user (&vc_data, data, sizeof(vc_data))) - return -EFAULT; - ret = __vc_set_iattr(de, - &vc_data.xid, &vc_data.flags, &vc_data.mask); - } - else { - if (!vx_check(0, VX_ADMIN)) - return -ENOSYS; - ret = __vc_get_iattr(de->d_inode, - &vc_data.xid, &vc_data.flags, &vc_data.mask); - } - - if (!ret && copy_to_user (data, &vc_data, sizeof(vc_data))) - ret = -EFAULT; - return ret; -} - #ifdef CONFIG_VSERVER_LEGACY diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8c206e407..2aedd4d9c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -275,7 +276,7 @@ free_pages_bulk(struct zone *zone, int count, /* have to delete it as __free_pages_bulk list manipulates */ list_del(&page->lru); __free_pages_bulk(page, base, zone, area, order); - ckrm_clear_page_class(page); + ckrm_clear_page_class(page); ret++; } spin_unlock_irqrestore(&zone->lock, flags); @@ -371,9 +372,7 @@ static void prep_new_page(struct page *page, int order) #endif 1 << PG_checked | 1 << PG_mappedtodisk); page->private = 0; -#ifdef CONFIG_CKRM_RES_MEM - page->ckrm_zone = NULL; -#endif + ckrm_page_init(page); set_page_refs(page, order); } @@ -636,9 +635,8 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, */ can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait; - if (!ckrm_class_limit_ok((ckrm_get_mem_class(current)))) { + if (!in_interrupt() && !ckrm_class_limit_ok(ckrm_get_mem_class(p))) return NULL; - } zones = zonelist->zones; /* the list of zones suitable for gfp_mask */ @@ -1573,10 +1571,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat, } printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", zone_names[j], realsize, batch); -#ifndef CONFIG_CKRM_RES_MEM - INIT_LIST_HEAD(&zone->active_list); - INIT_LIST_HEAD(&zone->inactive_list); -#endif + ckrm_init_lists(zone); zone->nr_scan_active = 0; zone->nr_scan_inactive = 0; zone->nr_active = 0; diff --git a/mm/swap.c b/mm/swap.c index a7eb64921..015dc5e81 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -92,11 +92,7 @@ int rotate_reclaimable_page(struct page *page) spin_lock_irqsave(&zone->lru_lock, flags); if (PageLRU(page) && !PageActive(page)) { list_del(&page->lru); -#ifdef CONFIG_CKRM_RES_MEM - list_add_tail(&page->lru, &ckrm_zone->inactive_list); -#else - list_add_tail(&page->lru, &zone->inactive_list); -#endif + ckrm_add_tail_inactive(page); inc_page_state(pgrotated); } if (!test_clear_page_writeback(page)) diff --git a/mm/vmscan.c b/mm/vmscan.c index 6f7fba513..8fc4a3d5d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -589,7 +590,7 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) nr_taken++; } zone->nr_inactive -= nr_taken; - ckrm_zone_dec_inactive(ckrm_zone, nr_taken); + ckrm_zone_sub_inactive(ckrm_zone, nr_taken); spin_unlock_irq(&zone->lru_lock); if (nr_taken == 0) @@ -616,11 +617,11 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) BUG(); list_del(&page->lru); if (PageActive(page)) { - ckrm_zone_inc_active(ckrm_zone, 1); + ckrm_zone_add_active(ckrm_zone, 1); zone->nr_active++; list_add(&page->lru, active_list); } else { - ckrm_zone_inc_inactive(ckrm_zone, 1); + ckrm_zone_add_inactive(ckrm_zone, 1); zone->nr_inactive++; list_add(&page->lru, inactive_list); } @@ -709,7 +710,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) } zone->pages_scanned += pgscanned; zone->nr_active -= pgmoved; - ckrm_zone_dec_active(ckrm_zone, pgmoved); + ckrm_zone_sub_active(ckrm_zone, pgmoved); spin_unlock_irq(&zone->lru_lock); /* @@ -770,8 +771,8 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) list_move(&page->lru, inactive_list); pgmoved++; if (!pagevec_add(&pvec, page)) { - ckrm_zone_inc_inactive(ckrm_zone, pgmoved); zone->nr_inactive += pgmoved; + ckrm_zone_add_inactive(ckrm_zone, pgmoved); spin_unlock_irq(&zone->lru_lock); pgdeactivate += pgmoved; pgmoved = 0; @@ -781,8 +782,8 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) spin_lock_irq(&zone->lru_lock); } } - ckrm_zone_inc_inactive(ckrm_zone, pgmoved); zone->nr_inactive += pgmoved; + ckrm_zone_add_inactive(ckrm_zone, pgmoved); pgdeactivate += pgmoved; if (buffer_heads_over_limit) { spin_unlock_irq(&zone->lru_lock); @@ -800,16 +801,16 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) list_move(&page->lru, active_list); pgmoved++; if (!pagevec_add(&pvec, page)) { - ckrm_zone_inc_active(ckrm_zone, pgmoved); zone->nr_active += pgmoved; + ckrm_zone_add_active(ckrm_zone, pgmoved); pgmoved = 0; spin_unlock_irq(&zone->lru_lock); __pagevec_release(&pvec); spin_lock_irq(&zone->lru_lock); } } - ckrm_zone_inc_active(ckrm_zone, pgmoved); zone->nr_active += pgmoved; + ckrm_zone_add_active(ckrm_zone, pgmoved); spin_unlock_irq(&zone->lru_lock); pagevec_release(&pvec); @@ -818,45 +819,6 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) } #ifdef CONFIG_CKRM_RES_MEM -static int -shrink_weight(struct ckrm_zone *czone) -{ - u64 temp; - struct zone *zone = czone->zone; - struct ckrm_mem_res *cls = czone->memcls; - int zone_usage, zone_guar, zone_total, guar, ret, cnt; - - zone_usage = czone->nr_active + czone->nr_inactive; - czone->active_over = czone->inactive_over = 0; - - if (zone_usage < SWAP_CLUSTER_MAX * 4) - return 0; - - if (cls->pg_guar == CKRM_SHARE_DONTCARE) { - // no guarantee for this class. use implicit guarantee - guar = cls->impl_guar / cls->nr_dontcare; - } else { - guar = cls->pg_unused / cls->nr_dontcare; - } - zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages; - temp = (u64) guar * zone_total; - do_div(temp, ckrm_tot_lru_pages); - zone_guar = (int) temp; - - ret = ((zone_usage - zone_guar) > SWAP_CLUSTER_MAX) ? - (zone_usage - zone_guar) : 0; - if (ret) { - cnt = czone->nr_active - (2 * zone_guar / 3); - if (cnt > 0) - czone->active_over = cnt; - cnt = czone->active_over + czone->nr_inactive - - zone_guar / 3; - if (cnt > 0) - czone->inactive_over = cnt; - } - return ret; -} - static void shrink_ckrmzone(struct ckrm_zone *czone, struct scan_control *sc) { @@ -878,121 +840,96 @@ shrink_ckrmzone(struct ckrm_zone *czone, struct scan_control *sc) break; } } - - throttle_vm_writeout(); } } -/* insert an entry to the list and sort decendently*/ +/* FIXME: This function needs to be given more thought. */ static void -list_add_sort(struct list_head *entry, struct list_head *head) +ckrm_shrink_class(struct ckrm_mem_res *cls) { - struct ckrm_zone *czone, *new = - list_entry(entry, struct ckrm_zone, victim_list); - struct list_head* pos = head->next; - - while (pos != head) { - czone = list_entry(pos, struct ckrm_zone, victim_list); - if (new->shrink_weight > czone->shrink_weight) { - __list_add(entry, pos->prev, pos); - return; - } - pos = pos->next; - } - list_add_tail(entry, head); - return; -} + struct scan_control sc; + struct zone *zone; + int zindex = 0, cnt, act_credit = 0, inact_credit = 0; -static void -shrink_choose_victims(struct list_head *victims, - unsigned long nr_active, unsigned long nr_inactive) -{ - unsigned long nr; - struct ckrm_zone* czone; - struct list_head *pos, *next; - - pos = victims->next; - while ((pos != victims) && (nr_active || nr_inactive)) { - czone = list_entry(pos, struct ckrm_zone, victim_list); - - if (nr_active && czone->active_over) { - nr = min(nr_active, czone->active_over); - czone->shrink_active += nr; - czone->active_over -= nr; - nr_active -= nr; + sc.nr_mapped = read_page_state(nr_mapped); + sc.nr_scanned = 0; + sc.nr_reclaimed = 0; + sc.priority = 0; // always very high priority + + for_each_zone(zone) { + int zone_total, zone_limit, active_limit, + inactive_limit, clszone_limit; + struct ckrm_zone *czone; + u64 temp; + + czone = &cls->ckrm_zone[zindex]; + if (ckrm_test_set_shrink(czone)) + continue; + + zone->temp_priority = zone->prev_priority; + zone->prev_priority = sc.priority; + + zone_total = zone->nr_active + zone->nr_inactive + + zone->free_pages; + + temp = (u64) cls->pg_limit * zone_total; + do_div(temp, ckrm_tot_lru_pages); + zone_limit = (int) temp; + clszone_limit = (ckrm_mem_shrink_to * zone_limit) / 100; + active_limit = (2 * clszone_limit) / 3; // 2/3rd in active list + inactive_limit = clszone_limit / 3; // 1/3rd in inactive list + + czone->shrink_active = 0; + cnt = czone->nr_active + act_credit - active_limit; + if (cnt > 0) { + czone->shrink_active = (unsigned long) cnt; + act_credit = 0; + } else { + act_credit += cnt; } - if (nr_inactive && czone->inactive_over) { - nr = min(nr_inactive, czone->inactive_over); - czone->shrink_inactive += nr; - czone->inactive_over -= nr; - nr_inactive -= nr; + czone->shrink_inactive = 0; + cnt = czone->shrink_active + inact_credit + + (czone->nr_inactive - inactive_limit); + if (cnt > 0) { + czone->shrink_inactive = (unsigned long) cnt; + inact_credit = 0; + } else { + inact_credit += cnt; } - pos = pos->next; - } - pos = victims->next; - while (pos != victims) { - czone = list_entry(pos, struct ckrm_zone, victim_list); - next = pos->next; - if (czone->shrink_active == 0 && czone->shrink_inactive == 0) { - list_del_init(pos); - ckrm_clear_shrink(czone); + + if (czone->shrink_active || czone->shrink_inactive) { + sc.nr_to_reclaim = czone->shrink_inactive; + shrink_ckrmzone(czone, &sc); } - pos = next; - } - return; + zone->prev_priority = zone->temp_priority; + zindex++; + ckrm_clear_shrink(czone); + } } static void -shrink_get_victims(struct zone *zone, unsigned long nr_active, - unsigned long nr_inactive, struct list_head *victims) +ckrm_shrink_classes(void) { - struct list_head *pos; struct ckrm_mem_res *cls; - struct ckrm_zone *czone; - int zoneindex = zone_idx(zone); - - if (ckrm_nr_mem_classes <= 1) { - if (ckrm_mem_root_class) { - czone = ckrm_mem_root_class->ckrm_zone + zoneindex; - if (!ckrm_test_set_shrink(czone)) { - list_add(&czone->victim_list, victims); - czone->shrink_active = nr_active; - czone->shrink_inactive = nr_inactive; - } - } - return; - } - spin_lock_irq(&ckrm_mem_lock); - list_for_each_entry(cls, &ckrm_memclass_list, mcls_list) { - czone = cls->ckrm_zone + zoneindex; - if (ckrm_test_set_shrink(czone)) - continue; - czone->shrink_active = 0; - czone->shrink_inactive = 0; - czone->shrink_weight = shrink_weight(czone); - if (czone->shrink_weight) { - list_add_sort(&czone->victim_list, victims); - } else { - ckrm_clear_shrink(czone); - } - } - pos = victims->next; - while (pos != victims) { - czone = list_entry(pos, struct ckrm_zone, victim_list); - pos = pos->next; - } - shrink_choose_victims(victims, nr_active, nr_inactive); - spin_unlock_irq(&ckrm_mem_lock); - pos = victims->next; - while (pos != victims) { - czone = list_entry(pos, struct ckrm_zone, victim_list); - pos = pos->next; + spin_lock(&ckrm_mem_lock); + while (!ckrm_shrink_list_empty()) { + cls = list_entry(ckrm_shrink_list.next, struct ckrm_mem_res, + shrink_list); + list_del(&cls->shrink_list); + cls->flags &= ~CLS_AT_LIMIT; + spin_unlock(&ckrm_mem_lock); + ckrm_shrink_class(cls); + spin_lock(&ckrm_mem_lock); } + spin_unlock(&ckrm_mem_lock); } -#endif /* CONFIG_CKRM_RES_MEM */ + +#else +#define ckrm_shrink_classes() do { } while(0) +#endif /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. @@ -1037,9 +974,9 @@ shrink_zone(struct zone *zone, struct scan_control *sc) czone = list_entry(pos, struct ckrm_zone, victim_list); next = pos->next; list_del_init(pos); - ckrm_clear_shrink(czone); sc->nr_to_reclaim = czone->shrink_inactive; shrink_ckrmzone(czone, sc); + ckrm_clear_shrink(czone); pos = next; } } @@ -1064,97 +1001,6 @@ shrink_zone(struct zone *zone, struct scan_control *sc) #endif } -#ifdef CONFIG_CKRM_RES_MEM -// This function needs to be given more thought. -// Shrink the class to be at shrink_to%" of its limit -static void -ckrm_shrink_class(struct ckrm_mem_res *cls) -{ - struct scan_control sc; - struct zone *zone; - int zindex = 0, cnt, act_credit = 0, inact_credit = 0; - int shrink_to = ckrm_mem_get_shrink_to(); - - sc.nr_mapped = read_page_state(nr_mapped); - sc.nr_scanned = 0; - sc.nr_reclaimed = 0; - sc.priority = 0; // always very high priority - - check_memclass(cls, "bef_shnk_cls"); - for_each_zone(zone) { - int zone_total, zone_limit, active_limit, - inactive_limit, clszone_limit; - struct ckrm_zone *czone; - u64 temp; - - czone = &cls->ckrm_zone[zindex]; - if (ckrm_test_set_shrink(czone)) - continue; - - zone->temp_priority = zone->prev_priority; - zone->prev_priority = sc.priority; - - zone_total = zone->nr_active + zone->nr_inactive - + zone->free_pages; - - temp = (u64) cls->pg_limit * zone_total; - do_div(temp, ckrm_tot_lru_pages); - zone_limit = (int) temp; - clszone_limit = (shrink_to * zone_limit) / 100; - active_limit = (2 * clszone_limit) / 3; // 2/3rd in active list - inactive_limit = clszone_limit / 3; // 1/3rd in inactive list - - czone->shrink_active = 0; - cnt = czone->nr_active + act_credit - active_limit; - if (cnt > 0) { - czone->shrink_active = (unsigned long) cnt; - } else { - act_credit += cnt; - } - - czone->shrink_inactive = 0; - cnt = czone->shrink_active + inact_credit + - (czone->nr_inactive - inactive_limit); - if (cnt > 0) { - czone->shrink_inactive = (unsigned long) cnt; - } else { - inact_credit += cnt; - } - - - if (czone->shrink_active || czone->shrink_inactive) { - sc.nr_to_reclaim = czone->shrink_inactive; - shrink_ckrmzone(czone, &sc); - } - zone->prev_priority = zone->temp_priority; - zindex++; - ckrm_clear_shrink(czone); - } - check_memclass(cls, "aft_shnk_cls"); -} - -static void -ckrm_shrink_classes(void) -{ - struct ckrm_mem_res *cls; - - spin_lock_irq(&ckrm_mem_lock); - while (!ckrm_shrink_list_empty()) { - cls = list_entry(ckrm_shrink_list.next, struct ckrm_mem_res, - shrink_list); - list_del(&cls->shrink_list); - cls->flags &= ~MEM_AT_LIMIT; - spin_unlock_irq(&ckrm_mem_lock); - ckrm_shrink_class(cls); - spin_lock_irq(&ckrm_mem_lock); - } - spin_unlock_irq(&ckrm_mem_lock); -} - -#else -#define ckrm_shrink_classes() do { } while(0) -#endif - /* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation @@ -1492,7 +1338,7 @@ static int kswapd(void *p) if (!ckrm_shrink_list_empty()) ckrm_shrink_classes(); - else + else balance_pgdat(pgdat, 0); } return 0; diff --git a/scripts/kernel-2.6-planetlab.spec b/scripts/kernel-2.6-planetlab.spec index e516b27d7..ca2935d13 100644 --- a/scripts/kernel-2.6-planetlab.spec +++ b/scripts/kernel-2.6-planetlab.spec @@ -22,7 +22,7 @@ Summary: The Linux kernel (the core of the Linux operating system) %define kversion 2.6.%{sublevel} %define rpmversion 2.6.%{sublevel} %define rhbsys %([ -r /etc/beehive-root ] && echo || echo .`whoami`) -%define release 1.14_FC2.2.planetlab%{?date:.%{date}} +%define release 1.14_FC2.1.planetlab%{?date:.%{date}} %define signmodules 0 %define KVERREL %{PACKAGE_VERSION}-%{PACKAGE_RELEASE} -- 2.47.0