This commit was manufactured by cvs2svn to create tag after-kexec-2-6-10-patch
authorPlanet-Lab Support <support@planet-lab.org>
Mon, 8 Aug 2005 21:12:14 +0000 (21:12 +0000)
committerPlanet-Lab Support <support@planet-lab.org>
Mon, 8 Aug 2005 21:12:14 +0000 (21:12 +0000)
'after-kexec-2-6-10-patch'.

27 files changed:
configs/kernel-2.6.10-i686-planetlab.config
fs/exec.c
fs/ioctl.c
fs/posix_acl.c
include/linux/ckrm_mem.h
include/linux/ckrm_mem_inline.h
include/linux/ckrm_tsk.h
include/linux/ext2_fs.h
include/linux/ext3_fs.h
include/linux/mm.h
include/linux/page-flags.h
include/linux/sched.h
include/linux/vserver/inode.h
init/Kconfig
kernel/ckrm/Makefile
kernel/ckrm/ckrm_cpu_class.c
kernel/ckrm/ckrm_cpu_monitor.c
kernel/ckrm/ckrm_mem.c [deleted file]
kernel/ckrm/ckrm_numtasks.c
kernel/ckrm/ckrm_numtasks_stub.c
kernel/exit.c
kernel/fork.c
kernel/vserver/inode.c
mm/page_alloc.c
mm/swap.c
mm/vmscan.c
scripts/kernel-2.6-planetlab.spec

index bd63671..8e46fe3 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.10-1.14_FC2.1.planetlab.2005.04.14
-# Sat May  7 01:45:01 2005
+# Linux kernel version: 2.6.10-1.14_FC2.1.planetlab.2005.03.31
+# Thu Mar 31 11:50:25 2005
 #
 CONFIG_X86=y
 CONFIG_MMU=y
@@ -33,10 +33,9 @@ CONFIG_CKRM=y
 CONFIG_RCFS_FS=y
 CONFIG_CKRM_TYPE_TASKCLASS=y
 CONFIG_CKRM_RES_NULL=m
-# CONFIG_CKRM_RES_MEM is not set
+CONFIG_CKRM_RES_MEM=y
 # CONFIG_CKRM_TYPE_SOCKETCLASS is not set
 CONFIG_CKRM_RES_NUMTASKS=y
-# CONFIG_CKRM_RES_NUMTASKS_FORKRATE is not set
 CONFIG_CKRM_CPU_SCHEDULE=y
 # CONFIG_CKRM_RES_BLKIO is not set
 CONFIG_CKRM_CPU_SCHEDULE_AT_BOOT=y
@@ -702,7 +701,7 @@ CONFIG_MD_RAID5=m
 CONFIG_MD_RAID6=m
 CONFIG_MD_MULTIPATH=m
 CONFIG_MD_FAULTY=m
-CONFIG_BLK_DEV_DM=y
+CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_MIRROR=m
@@ -791,7 +790,7 @@ CONFIG_INET_IPCOMP=m
 CONFIG_INET_TUNNEL=m
 # CONFIG_ACCEPT_QUEUES is not set
 CONFIG_IP_TCPDIAG=m
-# CONFIG_IP_TCPDIAG_IPV6 is not set
+CONFIG_IP_TCPDIAG_IPV6=y
 
 #
 # IP: Virtual Server Configuration
@@ -827,7 +826,13 @@ CONFIG_IP_VS_NQ=m
 #
 CONFIG_IP_VS_FTP=m
 CONFIG_ICMP_IPOD=y
-# CONFIG_IPV6 is not set
+CONFIG_IPV6=m
+CONFIG_IPV6_PRIVACY=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_TUNNEL=m
+CONFIG_IPV6_TUNNEL=m
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_DEBUG is not set
 CONFIG_BRIDGE_NETFILTER=y
@@ -904,6 +909,31 @@ CONFIG_IP_NF_ARP_MANGLE=m
 # CONFIG_IP_NF_COMPAT_IPFWADM is not set
 # CONFIG_IP_NF_CT_PROTO_GRE is not set
 
+#
+# IPv6: Netfilter Configuration
+#
+# CONFIG_IP6_NF_QUEUE is not set
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_LIMIT=m
+CONFIG_IP6_NF_MATCH_MAC=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_MARK=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_LENGTH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_PHYSDEV=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_MARK=m
+CONFIG_IP6_NF_RAW=m
+
 #
 # Bridge: Netfilter Configuration
 #
@@ -949,7 +979,7 @@ CONFIG_ATM_BR2684=m
 CONFIG_BRIDGE=m
 CONFIG_VLAN_8021Q=m
 # CONFIG_DECNET is not set
-CONFIG_LLC=m
+CONFIG_LLC=y
 # CONFIG_LLC2 is not set
 CONFIG_IPX=m
 # CONFIG_IPX_INTERN is not set
@@ -1008,9 +1038,98 @@ CONFIG_NETPOLL=y
 CONFIG_NETPOLL_TRAP=y
 CONFIG_NET_POLL_CONTROLLER=y
 # CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_TUX is not set
+CONFIG_IRDA=m
+
+#
+# IrDA protocols
+#
+CONFIG_IRLAN=m
+CONFIG_IRNET=m
+CONFIG_IRCOMM=m
+# CONFIG_IRDA_ULTRA is not set
+
+#
+# IrDA options
+#
+CONFIG_IRDA_CACHE_LAST_LSAP=y
+CONFIG_IRDA_FAST_RR=y
+# CONFIG_IRDA_DEBUG is not set
+
+#
+# Infrared-port device drivers
+#
+
+#
+# SIR device drivers
+#
+CONFIG_IRTTY_SIR=m
+
+#
+# Dongle support
+#
+CONFIG_DONGLE=y
+CONFIG_ESI_DONGLE=m
+CONFIG_ACTISYS_DONGLE=m
+CONFIG_TEKRAM_DONGLE=m
+CONFIG_LITELINK_DONGLE=m
+CONFIG_MA600_DONGLE=m
+CONFIG_GIRBIL_DONGLE=m
+CONFIG_MCP2120_DONGLE=m
+CONFIG_OLD_BELKIN_DONGLE=m
+CONFIG_ACT200L_DONGLE=m
+
+#
+# Old SIR device drivers
+#
+CONFIG_IRPORT_SIR=m
+
+#
+# Old Serial dongle support
+#
+# CONFIG_DONGLE_OLD is not set
+
+#
+# FIR device drivers
+#
+CONFIG_USB_IRDA=m
+CONFIG_SIGMATEL_FIR=m
+CONFIG_TOSHIBA_FIR=m
+CONFIG_VLSI_FIR=m
+CONFIG_BT=m
+CONFIG_BT_L2CAP=m
+CONFIG_BT_SCO=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_CMTP=m
+CONFIG_BT_HIDP=m
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BT_HCIUSB=m
+CONFIG_BT_HCIUSB_SCO=y
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_BCSP_TXCRC=y
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIDTL1=m
+CONFIG_BT_HCIBT3C=m
+CONFIG_BT_HCIBLUECARD=m
+CONFIG_BT_HCIBTUART=m
+CONFIG_BT_HCIVHCI=m
+CONFIG_TUX=m
+
+#
+# TUX options
+#
+CONFIG_TUX_EXTCGI=y
+CONFIG_TUX_EXTENDED_LOG=y
+# CONFIG_TUX_DEBUG is not set
 CONFIG_NETDEVICES=y
 CONFIG_DUMMY=m
 CONFIG_BONDING=m
@@ -1108,7 +1227,13 @@ CONFIG_S2IO_NAPI=y
 #
 # Token Ring devices
 #
-# CONFIG_TR is not set
+CONFIG_TR=y
+CONFIG_IBMOL=m
+CONFIG_IBMLS=m
+CONFIG_3C359=m
+CONFIG_TMS380TR=m
+CONFIG_TMSPCI=m
+CONFIG_ABYSS=m
 
 #
 # Wireless LAN (non-hamradio)
@@ -1174,6 +1299,7 @@ CONFIG_PCMCIA_NMCLAN=m
 CONFIG_PCMCIA_SMC91C92=m
 CONFIG_PCMCIA_XIRC2PS=m
 CONFIG_PCMCIA_AXNET=m
+CONFIG_PCMCIA_IBMTR=m
 
 #
 # Wan interfaces
@@ -1210,9 +1336,20 @@ CONFIG_FDDI=y
 # CONFIG_DEFXX is not set
 CONFIG_SKFP=m
 # CONFIG_HIPPI is not set
-# CONFIG_PLIP is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
+CONFIG_PLIP=m
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_PPP_DEFLATE=m
+# CONFIG_PPP_BSDCOMP is not set
+CONFIG_PPPOE=m
+CONFIG_PPPOATM=m
+CONFIG_SLIP=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_SMART=y
+# CONFIG_SLIP_MODE_SLIP6 is not set
 CONFIG_NET_FC=y
 # CONFIG_SHAPER is not set
 CONFIG_NETCONSOLE=m
@@ -1886,7 +2023,95 @@ CONFIG_LOGO_LINUX_CLUT224=y
 #
 # Sound
 #
-# CONFIG_SOUND is not set
+CONFIG_SOUND=m
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_RTCTIMER=m
+# CONFIG_SND_VERBOSE_PRINTK is not set
+# CONFIG_SND_DEBUG is not set
+
+#
+# Generic devices
+#
+CONFIG_SND_MPU401_UART=m
+CONFIG_SND_OPL3_LIB=m
+CONFIG_SND_VX_LIB=m
+CONFIG_SND_DUMMY=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+# CONFIG_SND_SERIAL_U16550 is not set
+CONFIG_SND_MPU401=m
+
+#
+# PCI devices
+#
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_ALI5451=m
+CONFIG_SND_ATIIXP=m
+CONFIG_SND_ATIIXP_MODEM=m
+CONFIG_SND_AU8810=m
+CONFIG_SND_AU8820=m
+CONFIG_SND_AU8830=m
+CONFIG_SND_AZT3328=m
+CONFIG_SND_BT87X=m
+# CONFIG_SND_BT87X_OVERCLOCK is not set
+CONFIG_SND_CS46XX=m
+CONFIG_SND_CS46XX_NEW_DSP=y
+CONFIG_SND_CS4281=m
+CONFIG_SND_EMU10K1=m
+CONFIG_SND_KORG1212=m
+CONFIG_SND_MIXART=m
+CONFIG_SND_NM256=m
+CONFIG_SND_RME32=m
+CONFIG_SND_RME96=m
+CONFIG_SND_RME9652=m
+CONFIG_SND_HDSP=m
+CONFIG_SND_TRIDENT=m
+CONFIG_SND_YMFPCI=m
+CONFIG_SND_ALS4000=m
+CONFIG_SND_CMIPCI=m
+CONFIG_SND_ENS1370=m
+CONFIG_SND_ENS1371=m
+CONFIG_SND_ES1938=m
+CONFIG_SND_ES1968=m
+CONFIG_SND_MAESTRO3=m
+CONFIG_SND_FM801=m
+CONFIG_SND_FM801_TEA575X=m
+CONFIG_SND_ICE1712=m
+CONFIG_SND_ICE1724=m
+CONFIG_SND_INTEL8X0=m
+CONFIG_SND_INTEL8X0M=m
+CONFIG_SND_SONICVIBES=m
+CONFIG_SND_VIA82XX=m
+CONFIG_SND_VX222=m
+
+#
+# USB devices
+#
+CONFIG_SND_USB_AUDIO=m
+CONFIG_SND_USB_USX2Y=m
+
+#
+# PCMCIA devices
+#
+
+#
+# Open Sound System
+#
+# CONFIG_SOUND_PRIME is not set
 
 #
 # USB support
@@ -1918,7 +2143,12 @@ CONFIG_USB_SL811_HCD=m
 #
 # USB Device Class drivers
 #
-# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_USB_AUDIO is not set
+
+#
+# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
+#
+CONFIG_USB_MIDI=m
 CONFIG_USB_ACM=m
 CONFIG_USB_PRINTER=m
 
index 95ae49b..5f7f092 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -564,7 +564,7 @@ static int exec_mmap(struct mm_struct *mm)
        activate_mm(active_mm, mm);
        task_unlock(tsk);
        arch_pick_mmap_layout(mm);
-       ckrm_task_change_mm(tsk, old_mm, mm);
+       ckrm_task_mm_change(tsk, old_mm, mm);
        if (old_mm) {
                if (active_mm != old_mm) BUG();
                mmput(old_mm);
index 6af7a74..19e902d 100644 (file)
@@ -174,19 +174,6 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
                                error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg);
                        break;
 #endif
-               case FIOC_SETIATTR:
-               case FIOC_GETIATTR:
-                       /*
-                        * Verify that this filp is a file object,
-                        * not (say) a socket.
-                        */
-                       error = -ENOTTY;
-                       if (S_ISREG(filp->f_dentry->d_inode->i_mode) ||
-                           S_ISDIR(filp->f_dentry->d_inode->i_mode))
-                               error = vc_iattr_ioctl(filp->f_dentry,
-                                                      cmd, arg);
-                       break;
-
                default:
                        error = -ENOTTY;
                        if (S_ISREG(filp->f_dentry->d_inode->i_mode))
index 9c67690..97fbb86 100644 (file)
@@ -215,10 +215,6 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
        const struct posix_acl_entry *pa, *pe, *mask_obj;
        int found = 0;
 
-       /* Prevent vservers from escaping chroot() barriers */
-       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
-               return -EACCES;
-
        FOREACH_ACL_ENTRY(pa, acl, pe) {
                 switch(pa->e_tag) {
                         case ACL_USER_OBJ:
index 3712aef..1e4c70f 100644 (file)
@@ -29,8 +29,8 @@ struct ckrm_zone {
        struct list_head active_list;
        struct list_head inactive_list;
 
-       unsigned long nr_active;        // # of pages in the active list
-       unsigned long nr_inactive;      // # of pages in the inactive list
+       unsigned long nr_active;
+       unsigned long nr_inactive;
        unsigned long active_over;
        unsigned long inactive_over;
 
@@ -38,68 +38,72 @@ struct ckrm_zone {
        unsigned long shrink_inactive;
        long shrink_weight;
        unsigned long shrink_flag;
-
-       struct list_head victim_list;   // list of ckrm_zones chosen for shrinking
+       struct list_head victim_list;   /* list of ckrm_zones chosen for
+                                        * shrinking. These are over their
+                                        * 'guarantee'
+                                        */
        struct zone *zone;
        struct ckrm_mem_res *memcls;
 };
 
 struct ckrm_mem_res {
        unsigned long flags;
-       struct ckrm_core_class *core;   // the core i am part of...
-       struct ckrm_core_class *parent; // parent of the core i am part of....
-       struct ckrm_shares shares;
-       struct list_head mcls_list;     // list of all 1-level classes
-       struct list_head shrink_list;   // list of classes need to be shrunk
-       struct kref nr_users;           // # of references to this class/data structure
-       atomic_t pg_total;              // # of pages used by this class
-       int pg_guar;                    // # of pages this class is guaranteed
-       int pg_limit;                   // max # of pages this class can get
-       int pg_borrowed;                // # of pages this class borrowed from its parent
-       int pg_lent;                    // # of pages this class lent to its children
-       int pg_unused;                  // # of pages left to this class (after giving the
-                                       // guarantees to children. need to borrow from parent if
-                                       // more than this is needed.
-       int impl_guar;                  // implicit guarantee for class with don't care guar
-       int nr_dontcare;                // # of children with don't care guarantee
+       struct ckrm_core_class *core;   /* the core i am part of... */
+       struct ckrm_core_class *parent; /* parent of the core i am part of */
+       struct ckrm_shares shares;      
+       struct list_head mcls_list;     /* list of all 1-level classes */
+       struct kref nr_users;           /* ref count */
+       atomic_t pg_total;              /* # of pages used by this class */
+       int pg_guar;                    /* absolute # of guarantee */
+       int pg_limit;                   /* absolute # of limit */
+       int pg_borrowed;                /* # of pages borrowed from parent */
+       int pg_lent;                    /* # of pages lent to children */
+       int pg_unused;                  /* # of pages left to this class
+                                        * (after giving the guarantees to
+                                        * children. need to borrow from
+                                        * parent if more than this is needed.
+                                        */
+       int hier;                       /* hiearchy level, root = 0 */
+       int impl_guar;                  /* for classes with don't care guar */
+       int nr_dontcare;                /* # of dont care children */
+
        struct ckrm_zone ckrm_zone[MAX_NR_ZONES];
+
+       struct list_head shrink_list;   /* list of classes that are near
+                                        * limit and need to be shrunk
+                                        */
        int shrink_count;
        unsigned long last_shrink;
-       int over_limit_failures;
-       int shrink_pages;               // # of pages to free in this class
-       int hier;                       // hiearchy, root = 0
 };
 
+#define CLS_SHRINK_BIT         (1)
+
+#define CLS_AT_LIMIT           (1)
+
 extern atomic_t ckrm_mem_real_count;
-extern unsigned int ckrm_tot_lru_pages;
-extern int ckrm_nr_mem_classes;
-extern struct list_head ckrm_shrink_list;
-extern struct list_head ckrm_memclass_list;
-extern spinlock_t ckrm_mem_lock;
 extern struct ckrm_res_ctlr mem_rcbs;
 extern struct ckrm_mem_res *ckrm_mem_root_class;
+extern struct list_head ckrm_memclass_list;
+extern struct list_head ckrm_shrink_list;
+extern spinlock_t ckrm_mem_lock;
+extern int ckrm_nr_mem_classes;
+extern unsigned int ckrm_tot_lru_pages;
+extern int ckrm_mem_shrink_count;
+extern int ckrm_mem_shrink_to;
+extern int ckrm_mem_shrink_interval ;
 
-#define page_ckrmzone(page)    ((page)->ckrm_zone)
-
-#define CLS_SHRINK_BIT (1)
-
-// used in flags. set when a class is more than 90% of its maxlimit
-#define MEM_AT_LIMIT   1
-
-extern void ckrm_init_mm_to_task(struct mm_struct *, struct task_struct *);
-extern void ckrm_mem_evaluate_mm(struct mm_struct *, struct ckrm_mem_res *);
-extern void ckrm_at_limit(struct ckrm_mem_res *);
-extern int ckrm_memclass_valid(struct ckrm_mem_res *);
-extern int ckrm_mem_get_shrink_to(void);
-extern void check_memclass(struct ckrm_mem_res *, char *);
+extern void ckrm_mem_migrate_mm(struct mm_struct *, struct ckrm_mem_res *);
+extern void ckrm_mem_migrate_all_pages(struct ckrm_mem_res *,
+                                               struct ckrm_mem_res *);
 extern void memclass_release(struct kref *);
-
+extern void shrink_get_victims(struct zone *, unsigned long ,
+                               unsigned long, struct list_head *);
+extern void ckrm_shrink_atlimit(struct ckrm_mem_res *);
 #else
 
-#define ckrm_init_mm_to_current(a)                     do {} while (0)
-#define ckrm_mem_evaluate_mm(a)                                do {} while (0)
-#define ckrm_init_mm_to_task(a,b)                      do {} while (0)
+#define ckrm_mem_migrate_mm(a, b)                      do {} while (0)
+#define ckrm_mem_migrate_all_pages(a, b)               do {} while (0)
 
-#endif // CONFIG_CKRM_RES_MEM
+#endif /* CONFIG_CKRM_RES_MEM */
 
-#endif //_LINUX_CKRM_MEM_H
+#endif /* _LINUX_CKRM_MEM_H */
index 1166956..fe75227 100644 (file)
@@ -26,8 +26,7 @@
 
 #ifdef CONFIG_CKRM_RES_MEM
 
-#define INACTIVE       0
-#define ACTIVE         1
+#define ckrm_shrink_list_empty() list_empty(&ckrm_shrink_list)
 
 static inline struct ckrm_mem_res *
 ckrm_get_mem_class(struct task_struct *tsk)
@@ -36,8 +35,6 @@ ckrm_get_mem_class(struct task_struct *tsk)
                struct ckrm_mem_res);
 }
 
-#define ckrm_shrink_list_empty()       list_empty(&ckrm_shrink_list)
-
 static inline void
 ckrm_set_shrink(struct ckrm_zone *cz)
 {
@@ -56,6 +53,18 @@ ckrm_clear_shrink(struct ckrm_zone *cz)
        clear_bit(CLS_SHRINK_BIT, &cz->shrink_flag);
 }
 
+static inline void
+set_page_ckrmzone( struct page *page, struct ckrm_zone *cz)
+{
+       page->ckrm_zone = cz;
+}
+
+static inline struct ckrm_zone *
+page_ckrmzone(struct page *page)
+{
+       return page->ckrm_zone;
+}
+
 /*
  * Currently, a shared page that is shared by multiple classes is charged
  * to a class with max available guarantee. Simply replace this function
@@ -67,7 +76,7 @@ ckrm_mem_share_compare(struct ckrm_mem_res *a, struct ckrm_mem_res *b)
        if (a == NULL)
                return -(b != NULL);
        if (b == NULL)
-               return 0;
+               return 1;
        if (a->pg_guar == b->pg_guar)
                return 0;
        if (a->pg_guar == CKRM_SHARE_DONTCARE)
@@ -81,29 +90,30 @@ static inline void
 incr_use_count(struct ckrm_mem_res *cls, int borrow)
 {
        extern int ckrm_mem_shrink_at;
-       if (unlikely(!cls))
+       struct ckrm_mem_res *parcls = ckrm_get_res_class(cls->parent,
+                               mem_rcbs.resid, struct ckrm_mem_res);
+
+       if (!cls)
                return;
-       BUG_ON(!ckrm_memclass_valid(cls));
-       atomic_inc(&cls->pg_total);
 
+       atomic_inc(&cls->pg_total);
        if (borrow)
                cls->pg_lent++;
-       if ((cls->pg_guar == CKRM_SHARE_DONTCARE) ||
-                       (atomic_read(&cls->pg_total) > cls->pg_unused)) {
-               struct ckrm_mem_res *parcls = ckrm_get_res_class(cls->parent,
+
+       parcls = ckrm_get_res_class(cls->parent,
                                mem_rcbs.resid, struct ckrm_mem_res);
-               if (parcls) {
-                       incr_use_count(parcls, 1);
-                       cls->pg_borrowed++;
-               }
-       } else {
+       if (parcls && ((cls->pg_guar == CKRM_SHARE_DONTCARE) ||
+                       (atomic_read(&cls->pg_total) > cls->pg_unused))) {
+               incr_use_count(parcls, 1);
+               cls->pg_borrowed++;
+       } else
                atomic_inc(&ckrm_mem_real_count);
-       }
-       if (unlikely((cls->pg_limit != CKRM_SHARE_DONTCARE) &&
+
+       if ((cls->pg_limit != CKRM_SHARE_DONTCARE) &&
                        (atomic_read(&cls->pg_total) >=
                        ((ckrm_mem_shrink_at * cls->pg_limit) / 100)) &&
-                       ((cls->flags & MEM_AT_LIMIT) != MEM_AT_LIMIT))) {
-               ckrm_at_limit(cls);
+                       ((cls->flags & CLS_AT_LIMIT) != CLS_AT_LIMIT)) {
+               ckrm_shrink_atlimit(cls);
        }
        return;
 }
@@ -111,9 +121,8 @@ incr_use_count(struct ckrm_mem_res *cls, int borrow)
 static inline void
 decr_use_count(struct ckrm_mem_res *cls, int borrowed)
 {
-       if (unlikely(!cls))
+       if (!cls)
                return;
-       BUG_ON(!ckrm_memclass_valid(cls));
        atomic_dec(&cls->pg_total);
        if (borrowed)
                cls->pg_lent--;
@@ -132,64 +141,50 @@ decr_use_count(struct ckrm_mem_res *cls, int borrowed)
 static inline void
 ckrm_set_page_class(struct page *page, struct ckrm_mem_res *cls)
 {
-       if (unlikely(cls == NULL)) {
-               cls = ckrm_mem_root_class;
-       }
-       if (likely(cls != NULL)) {
-               struct ckrm_zone *czone = &cls->ckrm_zone[page_zonenum(page)];
-               if (unlikely(page->ckrm_zone)) {
-                       kref_put(&cls->nr_users, memclass_release);
-               }
-               page->ckrm_zone = czone;
-               kref_get(&cls->nr_users);
-       } else {
-               page->ckrm_zone = NULL;
-       }
-}
+       struct ckrm_zone *new_czone, *old_czone;
 
-static inline void
-ckrm_set_pages_class(struct page *pages, int numpages, struct ckrm_mem_res *cls)
-{
-       int i;
-       for (i = 0; i < numpages; pages++, i++) {
-               ckrm_set_page_class(pages, cls);
-       }
-}
-
-static inline void
-ckrm_clear_page_class(struct page *page)
-{
-       if (likely(page->ckrm_zone != NULL)) {
-               if (CkrmAccount(page)) {
-                       decr_use_count(page->ckrm_zone->memcls, 0);
-                       ClearCkrmAccount(page);
+       if (!cls) {
+               if (!ckrm_mem_root_class) {
+                       set_page_ckrmzone(page, NULL);
+                       return;
                }
-               kref_put(&page->ckrm_zone->memcls->nr_users, memclass_release);
-               page->ckrm_zone = NULL;
+               cls = ckrm_mem_root_class;
        }
+       new_czone = &cls->ckrm_zone[page_zonenum(page)];
+       old_czone = page_ckrmzone(page);
+       
+       if (old_czone)
+               kref_put(&old_czone->memcls->nr_users, memclass_release);
+
+       set_page_ckrmzone(page, new_czone);
+       kref_get(&cls->nr_users);
+       incr_use_count(cls, 0);
+       SetPageCkrmAccount(page);
 }
 
 static inline void
 ckrm_change_page_class(struct page *page, struct ckrm_mem_res *newcls)
 {
-       struct ckrm_zone *old_czone = page->ckrm_zone, *new_czone;
+       struct ckrm_zone *old_czone = page_ckrmzone(page), *new_czone;
        struct ckrm_mem_res *oldcls;
 
-       if (unlikely(!old_czone || !newcls)) {
-               BUG_ON(CkrmAccount(page));
-               return;
+       if  (!newcls) {
+               if (!ckrm_mem_root_class)
+                       return;
+               newcls = ckrm_mem_root_class;
        }
-       BUG_ON(!CkrmAccount(page));
 
        oldcls = old_czone->memcls;
-       if (oldcls == NULL || (oldcls == newcls))
+       if (oldcls == newcls)
                return;
 
-       kref_put(&oldcls->nr_users, memclass_release);
-       decr_use_count(oldcls, 0);
-
-       page->ckrm_zone = new_czone = &newcls->ckrm_zone[page_zonenum(page)];
+       if (oldcls) {
+               kref_put(&oldcls->nr_users, memclass_release);
+               decr_use_count(oldcls, 0);
+       }
 
+       new_czone = &newcls->ckrm_zone[page_zonenum(page)];
+       set_page_ckrmzone(page, new_czone);
        kref_get(&newcls->nr_users);
        incr_use_count(newcls, 0);
 
@@ -205,34 +200,45 @@ ckrm_change_page_class(struct page *page, struct ckrm_mem_res *newcls)
        }
 }
 
+static inline void
+ckrm_clear_page_class(struct page *page)
+{
+       struct ckrm_zone *czone = page_ckrmzone(page);
+       if (czone != NULL) {
+               if (PageCkrmAccount(page)) {
+                       decr_use_count(czone->memcls, 0);
+                       ClearPageCkrmAccount(page);
+               }
+               kref_put(&czone->memcls->nr_users, memclass_release);
+               set_page_ckrmzone(page, NULL);
+       }
+}
+
 static inline void
 ckrm_mem_inc_active(struct page *page)
 {
-       struct ckrm_mem_res *cls = ckrm_get_mem_class(current) ?: ckrm_mem_root_class;
+       struct ckrm_mem_res *cls = ckrm_get_mem_class(current)
+                                               ?: ckrm_mem_root_class;
+       struct ckrm_zone *czone;
 
        if (cls == NULL)
                return;
-       BUG_ON(CkrmAccount(page));
-       BUG_ON(page->ckrm_zone != NULL);
 
        ckrm_set_page_class(page, cls);
-       incr_use_count(cls, 0);
-       SetCkrmAccount(page);
-       BUG_ON(page->ckrm_zone == NULL);
-       page->ckrm_zone->nr_active++;
-       list_add(&page->lru, &page->ckrm_zone->active_list);
+       czone = page_ckrmzone(page);
+       czone->nr_active++;
+       list_add(&page->lru, &czone->active_list);
 }
 
 static inline void
 ckrm_mem_dec_active(struct page *page)
 {
-       if (page->ckrm_zone == NULL)
+       struct ckrm_zone *czone = page_ckrmzone(page);
+       if (czone == NULL)
                return;
-       BUG_ON(page->ckrm_zone->memcls == NULL);
-       BUG_ON(!CkrmAccount(page));
 
        list_del(&page->lru);
-       page->ckrm_zone->nr_active--;
+       czone->nr_active--;
        ckrm_clear_page_class(page);
 }
 
@@ -240,39 +246,59 @@ ckrm_mem_dec_active(struct page *page)
 static inline void
 ckrm_mem_inc_inactive(struct page *page)
 {
-       struct ckrm_mem_res *cls = ckrm_get_mem_class(current) ?: ckrm_mem_root_class;
+       struct ckrm_mem_res *cls = ckrm_get_mem_class(current)
+                                               ?: ckrm_mem_root_class;
+       struct ckrm_zone *czone;
 
        if (cls == NULL)
                return;
-       BUG_ON(CkrmAccount(page));
-       BUG_ON(page->ckrm_zone != NULL);
 
        ckrm_set_page_class(page, cls);
-       incr_use_count(cls, 0);
-       SetCkrmAccount(page);
-       BUG_ON(page->ckrm_zone == NULL);
-       page->ckrm_zone->nr_inactive++;
-       list_add(&page->lru, &page->ckrm_zone->inactive_list);
+       czone = page_ckrmzone(page);
+       czone->nr_inactive++;
+       list_add(&page->lru, &czone->inactive_list);
 }
 
 static inline void
 ckrm_mem_dec_inactive(struct page *page)
 {
-       if (page->ckrm_zone == NULL)
+       struct ckrm_zone *czone = page_ckrmzone(page);
+       if (czone == NULL)
                return;
-       BUG_ON(page->ckrm_zone->memcls == NULL);
-       BUG_ON(!CkrmAccount(page));
 
-       page->ckrm_zone->nr_inactive--;
+       czone->nr_inactive--;
        list_del(&page->lru);
        ckrm_clear_page_class(page);
 }
 
+static inline void
+ckrm_zone_add_active(struct ckrm_zone *czone, int cnt)
+{
+       czone->nr_active += cnt;
+}
+
+static inline void
+ckrm_zone_add_inactive(struct ckrm_zone *czone, int cnt)
+{
+       czone->nr_inactive += cnt;
+}
+
+static inline void
+ckrm_zone_sub_active(struct ckrm_zone *czone, int cnt)
+{
+       czone->nr_active -= cnt;
+}
+
+static inline void
+ckrm_zone_sub_inactive(struct ckrm_zone *czone, int cnt)
+{
+       czone->nr_inactive -= cnt;
+}
+
 static inline int
 ckrm_class_limit_ok(struct ckrm_mem_res *cls)
 {
        int ret;
-       extern int ckrm_mem_fail_over;
 
        if ((mem_rcbs.resid == -1) || !cls) {
                return 1;
@@ -281,19 +307,25 @@ ckrm_class_limit_ok(struct ckrm_mem_res *cls)
                struct ckrm_mem_res *parcls = ckrm_get_res_class(cls->parent,
                                        mem_rcbs.resid, struct ckrm_mem_res);
                ret = (parcls ? ckrm_class_limit_ok(parcls) : 0);
-       } else {
-               ret = (atomic_read(&cls->pg_total) <=
-                       ((ckrm_mem_fail_over * cls->pg_limit) / 100));
-       }
+       } else
+               ret = (atomic_read(&cls->pg_total) <= cls->pg_limit);
+
+       /* If we are failing, just nudge the back end */
+       if (ret == 0)
+               ckrm_shrink_atlimit(cls);
 
-       if (ret == 0) {
-               // if we are failing... just nudge the back end
-               ckrm_at_limit(cls);
-       }
        return ret;
 }
 
-// task/mm initializations/cleanup
+static inline void
+ckrm_page_init(struct page *page)
+{
+       page->flags &= ~(1 << PG_ckrm_account);
+       set_page_ckrmzone(page, NULL);
+}
+
+
+/* task/mm initializations/cleanup */
 
 static inline void
 ckrm_task_mm_init(struct task_struct *tsk)
@@ -302,26 +334,42 @@ ckrm_task_mm_init(struct task_struct *tsk)
 }
 
 static inline void
-ckrm_task_change_mm(struct task_struct *tsk, struct mm_struct *oldmm, struct mm_struct *newmm)
+ckrm_task_mm_set(struct mm_struct * mm, struct task_struct *task)
+{
+       spin_lock(&mm->peertask_lock);
+       if (!list_empty(&task->mm_peers)) {
+               printk(KERN_ERR "MEM_RC: Task list NOT empty!! emptying...\n");
+               list_del_init(&task->mm_peers);
+       }
+       list_add_tail(&task->mm_peers, &mm->tasklist);
+       spin_unlock(&mm->peertask_lock);
+       if (mm->memclass != ckrm_get_mem_class(task))
+               ckrm_mem_migrate_mm(mm, NULL);
+       return;
+}
+
+static inline void
+ckrm_task_mm_change(struct task_struct *tsk,
+               struct mm_struct *oldmm, struct mm_struct *newmm)
 {
        if (oldmm) {
                spin_lock(&oldmm->peertask_lock);
                list_del(&tsk->mm_peers);
-               ckrm_mem_evaluate_mm(oldmm, NULL);
+               ckrm_mem_migrate_mm(oldmm, NULL);
                spin_unlock(&oldmm->peertask_lock);
        }
        spin_lock(&newmm->peertask_lock);
        list_add_tail(&tsk->mm_peers, &newmm->tasklist);
-       ckrm_mem_evaluate_mm(newmm, NULL);
+       ckrm_mem_migrate_mm(newmm, NULL);
        spin_unlock(&newmm->peertask_lock);
 }
 
 static inline void
-ckrm_task_clear_mm(struct task_struct *tsk, struct mm_struct *mm)
+ckrm_task_mm_clear(struct task_struct *tsk, struct mm_struct *mm)
 {
        spin_lock(&mm->peertask_lock);
        list_del_init(&tsk->mm_peers);
-       ckrm_mem_evaluate_mm(mm, NULL);
+       ckrm_mem_migrate_mm(mm, NULL);
        spin_unlock(&mm->peertask_lock);
 }
 
@@ -348,56 +396,65 @@ ckrm_mm_clearclass(struct mm_struct *mm)
        }
 }
 
-static inline void
-ckrm_zone_inc_active(struct ckrm_zone *czone, int cnt)
+static inline void ckrm_init_lists(struct zone *zone)                  {}
+
+static inline void ckrm_add_tail_inactive(struct page *page)
 {
-       czone->nr_active += cnt;
+        struct ckrm_zone *ckrm_zone = page_ckrmzone(page);
+        list_add_tail(&page->lru, &ckrm_zone->inactive_list);
 }
 
-static inline void
-ckrm_zone_inc_inactive(struct ckrm_zone *czone, int cnt)
+#else
+
+#define ckrm_shrink_list_empty()               (1)
+
+static inline void *
+ckrm_get_memclass(struct task_struct *tsk)
 {
-       czone->nr_inactive += cnt;
+       return NULL;
 }
 
-static inline void
-ckrm_zone_dec_active(struct ckrm_zone *czone, int cnt)
+static inline void ckrm_clear_page_class(struct page *p)               {}
+
+static inline void ckrm_mem_inc_active(struct page *p)                 {}
+static inline void ckrm_mem_dec_active(struct page *p)                 {}
+static inline void ckrm_mem_inc_inactive(struct page *p)               {}
+static inline void ckrm_mem_dec_inactive(struct page *p)               {}
+
+#define ckrm_zone_add_active(a, b)     do {} while (0)
+#define ckrm_zone_add_inactive(a, b)   do {} while (0)
+#define ckrm_zone_sub_active(a, b)     do {} while (0)
+#define ckrm_zone_sub_inactive(a, b)   do {} while (0)
+
+#define ckrm_class_limit_ok(a)                                         (1)
+
+static inline void ckrm_page_init(struct page *p)                      {}
+static inline void ckrm_task_mm_init(struct task_struct *tsk)          {}
+static inline void ckrm_task_mm_set(struct mm_struct * mm,
+                                       struct task_struct *task)       {}
+static inline void ckrm_task_mm_change(struct task_struct *tsk,
+               struct mm_struct *oldmm, struct mm_struct *newmm)       {}
+static inline void ckrm_task_mm_clear(struct task_struct *tsk,
+                                               struct mm_struct *mm)   {}
+
+static inline void ckrm_mm_init(struct mm_struct *mm)                  {}
+
+/* using #define instead of static inline as the prototype requires   *
+ * data structures that is available only with the controller enabled */
+#define ckrm_mm_setclass(a, b)                                         do {} while(0)
+
+static inline void ckrm_mm_clearclass(struct mm_struct *mm)            {}
+
+static inline void ckrm_init_lists(struct zone *zone)
 {
-       czone->nr_active -= cnt;
+       INIT_LIST_HEAD(&zone->active_list);
+       INIT_LIST_HEAD(&zone->inactive_list);
 }
 
-static inline void
-ckrm_zone_dec_inactive(struct ckrm_zone *czone, int cnt)
+static inline void ckrm_add_tail_inactive(struct page *page)
 {
-       czone->nr_inactive -= cnt;
+        struct zone *zone = page_zone(page);
+        list_add_tail(&page->lru, &zone->inactive_list);
 }
-
-#else // !CONFIG_CKRM_RES_MEM
-
-#define ckrm_set_page_class(a,b)       do{}while(0)
-#define ckrm_set_pages_class(a,b,c)    do{}while(0)
-#define ckrm_clear_page_class(a)       do{}while(0)
-#define ckrm_clear_pages_class(a,b)    do{}while(0)
-#define ckrm_change_page_class(a,b)    do{}while(0)
-#define ckrm_change_pages_class(a,b,c) do{}while(0)
-#define ckrm_mem_inc_active(a)         do{}while(0)
-#define ckrm_mem_dec_active(a)         do{}while(0)
-#define ckrm_mem_inc_inactive(a)       do{}while(0)
-#define ckrm_mem_dec_inactive(a)       do{}while(0)
-#define ckrm_shrink_list_empty()       (1)
-#define ckrm_kick_page(a,b)            (0)
-#define ckrm_class_limit_ok(a)         (1)
-#define ckrm_task_mm_init(a)           do{}while(0)
-#define ckrm_task_clear_mm(a, b)       do{}while(0)
-#define ckrm_task_change_mm(a, b, c)   do{}while(0)
-#define ckrm_mm_init(a)                        do{}while(0)
-#define ckrm_mm_setclass(a, b)         do{}while(0)
-#define ckrm_mm_clearclass(a)          do{}while(0)
-#define ckrm_zone_inc_active(a, b)     do{}while(0)
-#define ckrm_zone_inc_inactive(a, b)   do{}while(0)
-#define ckrm_zone_dec_active(a, b)     do{}while(0)
-#define ckrm_zone_dec_inactive(a, b)   do{}while(0)
-
-#endif // CONFIG_CKRM_RES_MEM
-
-#endif // _LINUX_CKRM_MEM_INLINE_H_
+#endif 
+#endif /* _LINUX_CKRM_MEM_INLINE_H_ */
index f614539..9ef07a2 100644 (file)
  *
  */
 
+/* Changes
+ *
+ * 31 Mar 2004
+ *    Created.
+ */
+
 #ifndef _LINUX_CKRM_TSK_H
 #define _LINUX_CKRM_TSK_H
 
 #ifdef CONFIG_CKRM_TYPE_TASKCLASS
 #include <linux/ckrm_rc.h>
 
-typedef int (*get_ref_t) (struct ckrm_core_class *, int);
-typedef void (*put_ref_t) (struct ckrm_core_class *);
+typedef int (*get_ref_t) (void *, int);
+typedef void (*put_ref_t) (void *);
 
-extern int numtasks_get_ref(struct ckrm_core_class *, int);
-extern void numtasks_put_ref(struct ckrm_core_class *);
+extern int numtasks_get_ref(void *, int);
+extern void numtasks_put_ref(void *);
 extern void ckrm_numtasks_register(get_ref_t, put_ref_t);
 
 #else /* CONFIG_CKRM_TYPE_TASKCLASS */
 
-#define numtasks_get_ref(core_class, ref) (1)
-#define numtasks_put_ref(core_class)  do {} while (0)
+#define numtasks_get_ref(a, b) (1)
+#define numtasks_put_ref(a)    do {} while(0)
 
 #endif /* CONFIG_CKRM_TYPE_TASKCLASS */
 #endif /* _LINUX_CKRM_RES_H */
index 12788c8..a985802 100644 (file)
@@ -197,8 +197,8 @@ struct ext2_group_desc
 #define EXT2_RESERVED_FL               0x80000000 /* reserved for ext2 lib */
 
 #ifdef CONFIG_VSERVER_LEGACY
-#define EXT2_FL_USER_VISIBLE           0x0C03DFFF /* User visible flags */
-#define EXT2_FL_USER_MODIFIABLE                0x0C0380FF /* User modifiable flags */
+#define EXT2_FL_USER_VISIBLE           0x0803DFFF /* User visible flags */
+#define EXT2_FL_USER_MODIFIABLE                0x080380FF /* User modifiable flags */
 #else
 #define EXT2_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
 #define EXT2_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
index 818516b..f2d1cd9 100644 (file)
@@ -190,8 +190,8 @@ struct ext3_group_desc
 #define EXT3_RESERVED_FL               0x80000000 /* reserved for ext3 lib */
 
 #ifdef CONFIG_VSERVER_LEGACY
-#define EXT3_FL_USER_VISIBLE           0x0C03DFFF /* User visible flags */
-#define EXT3_FL_USER_MODIFIABLE                0x0C0380FF /* User modifiable flags */
+#define EXT3_FL_USER_VISIBLE           0x0803DFFF /* User visible flags */
+#define EXT3_FL_USER_MODIFIABLE                0x080380FF /* User modifiable flags */
 #else
 #define EXT3_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
 #define EXT3_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
index d025bcb..447e469 100644 (file)
@@ -240,7 +240,7 @@ struct page {
 #endif /* WANT_PAGE_VIRTUAL */
 #ifdef CONFIG_CKRM_RES_MEM
        struct ckrm_zone *ckrm_zone;
-#endif // CONFIG_CKRM_RES_MEM
+#endif
 };
 
 /*
index c99f570..282141e 100644 (file)
 #define PG_mappedtodisk                17      /* Has blocks allocated on-disk */
 #define PG_reclaim             18      /* To be reclaimed asap */
 
-#ifdef CONFIG_CKRM_RES_MEM
-#define PG_ckrm_account                19      /* This page is accounted by CKRM */
-#endif
-
+#define PG_ckrm_account                20      /* CKRM accounting */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -303,9 +300,9 @@ extern unsigned long __read_page_state(unsigned offset);
 #endif
 
 #ifdef CONFIG_CKRM_RES_MEM
-#define CkrmAccount(page)      test_bit(PG_ckrm_account, &(page)->flags)
-#define SetCkrmAccount(page)   set_bit(PG_ckrm_account, &(page)->flags)
-#define ClearCkrmAccount(page) clear_bit(PG_ckrm_account, &(page)->flags)
+#define PageCkrmAccount(page)          test_bit(PG_ckrm_account, &(page)->flags)
+#define SetPageCkrmAccount(page)       set_bit(PG_ckrm_account, &(page)->flags)
+#define ClearPageCkrmAccount(page)     clear_bit(PG_ckrm_account, &(page)->flags)
 #endif
 
 struct page;   /* forward declaration */
index 9cb07d1..74719a9 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/percpu.h>
 #include <linux/topology.h>
 #include <linux/vs_base.h>
+#include <linux/taskdelays.h>
 
 struct exec_domain;
 extern int exec_shield;
@@ -267,8 +268,8 @@ struct mm_struct {
        struct kioctx           default_kioctx;
 #ifdef CONFIG_CKRM_RES_MEM
        struct ckrm_mem_res *memclass;
-       struct list_head        tasklist; /* list of all tasks sharing this address space */
-       spinlock_t              peertask_lock; /* protect above tasklist */
+       struct list_head tasklist;      /* tasks sharing this address space */
+       spinlock_t peertask_lock;       /* protect tasklist above */
 #endif
 };
 
@@ -718,25 +719,25 @@ struct task_struct {
        struct mempolicy *mempolicy;
        short il_next;          /* could be shared with used_math */
 #endif
-
 #ifdef CONFIG_CKRM
-       spinlock_t  ckrm_tsklock; 
+       spinlock_t  ckrm_tsklock;
        void       *ce_data;
 #ifdef CONFIG_CKRM_TYPE_TASKCLASS
-       // .. Hubertus should change to CONFIG_CKRM_TYPE_TASKCLASS 
        struct ckrm_task_class *taskclass;
-       struct list_head        taskclass_link;
+       struct list_head taskclass_link;
 #ifdef CONFIG_CKRM_CPU_SCHEDULE
         struct ckrm_cpu_class *cpu_class;
-       //track cpu demand of this task
+       /* track cpu demand of this task */
        struct ckrm_cpu_demand_stat demand_stat;
-#endif //CONFIG_CKRM_CPU_SCHEDULE
-#endif // CONFIG_CKRM_TYPE_TASKCLASS
+#endif /* CONFIG_CKRM_CPU_SCHEDULE */
+#endif /* CONFIG_CKRM_TYPE_TASKCLASS */
 #ifdef CONFIG_CKRM_RES_MEM
-       struct list_head        mm_peers; // list of tasks using same mm_struct
-#endif // CONFIG_CKRM_RES_MEM
-#endif // CONFIG_CKRM
-       struct task_delay_info  delays;
+       struct list_head mm_peers; /* list of tasks using same mm_struct */
+#endif
+#endif /* CONFIG_CKRM */
+#ifdef CONFIG_DELAY_ACCT
+       struct task_delay_info delays;
+#endif
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
@@ -1303,6 +1304,86 @@ extern void normalize_rt_tasks(void);
 
 #endif
 
+/* API for registering delay info */
+#ifdef CONFIG_DELAY_ACCT
+
+#define test_delay_flag(tsk,flg)                ((tsk)->flags & (flg))
+#define set_delay_flag(tsk,flg)                 ((tsk)->flags |= (flg))
+#define clear_delay_flag(tsk,flg)               ((tsk)->flags &= ~(flg))
+
+#define def_delay_var(var)                     unsigned long long var
+#define get_delay(tsk,field)                    ((tsk)->delays.field)
+
+#define start_delay(var)                        ((var) = sched_clock())
+#define start_delay_set(var,flg)                (set_delay_flag(current,flg),(var) = sched_clock())
+
+#define inc_delay(tsk,field) (((tsk)->delays.field)++)
+
+/* because of hardware timer drifts in SMPs and task continue on different cpu
+ * then where the start_ts was taken there is a possibility that
+ * end_ts < start_ts by some usecs. In this case we ignore the diff
+ * and add nothing to the total.
+ */
+#ifdef CONFIG_SMP
+#define test_ts_integrity(start_ts,end_ts)  (likely((end_ts) > (start_ts)))
+#else
+#define test_ts_integrity(start_ts,end_ts)  (1)
+#endif
+
+#define add_delay_ts(tsk,field,start_ts,end_ts) \
+       do { if (test_ts_integrity(start_ts,end_ts)) (tsk)->delays.field += ((end_ts)-(start_ts)); } while (0)
+
+#define add_delay_clear(tsk,field,start_ts,flg)        \
+       do {                                           \
+               unsigned long long now = sched_clock();\
+               add_delay_ts(tsk,field,start_ts,now);  \
+               clear_delay_flag(tsk,flg);             \
+        } while (0)
+
+static inline void add_io_delay(unsigned long long dstart) 
+{
+       struct task_struct * tsk = current;
+       unsigned long long now = sched_clock();
+       unsigned long long val;
+
+       if (test_ts_integrity(dstart,now))
+               val = now - dstart;
+       else
+               val = 0;
+       if (test_delay_flag(tsk,PF_MEMIO)) {
+               tsk->delays.mem_iowait_total += val;
+               tsk->delays.num_memwaits++;
+       } else {
+               tsk->delays.iowait_total += val;
+               tsk->delays.num_iowaits++;
+       }
+       clear_delay_flag(tsk,PF_IOWAIT);
+}
+
+inline static void init_delays(struct task_struct *tsk)
+{
+       memset((void*)&tsk->delays,0,sizeof(tsk->delays));
+}
+
+#else
+
+#define test_delay_flag(tsk,flg)                (0)
+#define set_delay_flag(tsk,flg)                 do { } while (0)
+#define clear_delay_flag(tsk,flg)               do { } while (0)
+
+#define def_delay_var(var)                           
+#define get_delay(tsk,field)                    (0)
+
+#define start_delay(var)                        do { } while (0)
+#define start_delay_set(var,flg)                do { } while (0)
+
+#define inc_delay(tsk,field)                    do { } while (0)
+#define add_delay_ts(tsk,field,start_ts,now)    do { } while (0)
+#define add_delay_clear(tsk,field,start_ts,flg) do { } while (0)
+#define add_io_delay(dstart)                   do { } while (0) 
+#define init_delays(tsk)                        do { } while (0)
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif
index d9587f2..a1054e8 100644 (file)
@@ -57,10 +57,6 @@ extern int vc_set_iattr_v0(uint32_t, void __user *);
 extern int vc_get_iattr(uint32_t, void __user *);
 extern int vc_set_iattr(uint32_t, void __user *);
 
-extern int vc_iattr_ioctl(struct dentry *de,
-                         unsigned int cmd,
-                         unsigned long arg);
-
 #endif /* __KERNEL__ */
 
 /* inode ioctls */
@@ -68,9 +64,6 @@ extern int vc_iattr_ioctl(struct dentry *de,
 #define FIOC_GETXFLG   _IOR('x', 5, long)
 #define FIOC_SETXFLG   _IOW('x', 6, long)
 
-#define FIOC_GETIATTR   _IOR('x', 7, long)
-#define FIOC_SETIATTR   _IOR('x', 8, long)
-
 #else  /* _VX_INODE_H */
 #warning duplicate inclusion
 #endif /* _VX_INODE_H */
index 5091195..b425cfb 100644 (file)
@@ -202,22 +202,11 @@ config CKRM_RES_NUMTASKS
        depends on CKRM_TYPE_TASKCLASS
        default m
        help
-         Provides a Resource Controller for CKRM that allows limiting number of
+         Provides a Resource Controller for CKRM that allows limiting no of
          tasks a task class can have.
        
          Say N if unsure, Y to use the feature.
 
-config CKRM_RES_NUMTASKS_FORKRATE
-       tristate "Number of Tasks Resource Manager for Fork Rate"
-       depends on CKRM_RES_NUMTASKS
-       default y
-       help
-         Provides a Resource Controller for CKRM that allows limiting the rate
-         of tasks a task class can fork per hour.
-       
-         Say N if unsure, Y to use the feature.
-
-
 config CKRM_CPU_SCHEDULE
        bool "CKRM CPU scheduler"
        depends on CKRM_TYPE_TASKCLASS
index 0c3c980..7ee24fb 100644 (file)
@@ -11,5 +11,5 @@ obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o
 obj-$(CONFIG_CKRM_RES_NUMTASKS) += ckrm_numtasks.o
 obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_listenaq.o
 obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_cpu_class.o ckrm_cpu_monitor.o
-obj-$(CONFIG_CKRM_RES_MEM) += ckrm_mem.o
+obj-$(CONFIG_CKRM_RES_MEM) += ckrm_memcore.o ckrm_memctlr.o
 obj-$(CONFIG_CKRM_RES_NULL) += ckrm_null_class.o
index 929c22d..301ccbb 100644 (file)
@@ -145,8 +145,6 @@ static void ckrm_free_cpu_class(void *my_res)
        struct ckrm_cpu_class *cls = my_res, *parres, *childres;
        ckrm_core_class_t *child = NULL;
        int maxlimit;
-       ckrm_lrq_t* queue;
-       int i;
 
        if (!cls) 
                return;
@@ -154,15 +152,6 @@ static void ckrm_free_cpu_class(void *my_res)
        /*the default class can't be freed*/
        if (cls == get_default_cpu_class()) 
                return;
-#if 1
-#warning "ACB: Remove freed class from any classqueues [PL #4233]"
-       for (i = 0 ; i < NR_CPUS ; i++) {
-         queue = &cls->local_queues[i];
-         if (cls_in_classqueue(&queue->classqueue_linkobj))
-           classqueue_dequeue(queue->classqueue,
-                              &queue->classqueue_linkobj);
-       }
-#endif
 
        // Assuming there will be no children when this function is called
        parres = ckrm_get_cpu_class(cls->parent);
index 5f59b37..23f48ec 100644 (file)
@@ -841,9 +841,8 @@ static void adjust_lrq_weight(struct ckrm_cpu_class *clsptr, int cpu_online)
                total_pressure += lrq->lrq_load;
        }
 
-#define FIX_SHARES 
-#ifdef FIX_SHARES
-#warning "ACB: fix share initialization problem [PL #4227]"
+#if 1
+#warning "ACB taking out suspicious early return"
 #else
        if (! total_pressure)
                return;
@@ -860,10 +859,6 @@ static void adjust_lrq_weight(struct ckrm_cpu_class *clsptr, int cpu_online)
                        /*give idle class a high share to boost interactiveness */
                        lw = cpu_class_weight(clsptr); 
                else {
-#ifdef FIX_SHARES
-                       if (! total_pressure)
-                               return;
-#endif                 
                        lw = lrq->lrq_load * class_weight;
                        do_div(lw,total_pressure);
                        if (!lw)
@@ -965,11 +960,9 @@ static int thread_exit = 0;
 static int ckrm_cpu_monitord(void *nothing)
 {
        daemonize("ckrm_cpu_ctrld");
-       current->flags |= PF_NOFREEZE;
-
        for (;;) {
                /*sleep for sometime before next try*/
-               set_current_state(TASK_INTERRUPTIBLE);
+               set_current_state(TASK_UNINTERRUPTIBLE);
                schedule_timeout(CPU_MONITOR_INTERVAL);
                ckrm_cpu_monitor(1);
                if (thread_exit) {
diff --git a/kernel/ckrm/ckrm_mem.c b/kernel/ckrm/ckrm_mem.c
deleted file mode 100644 (file)
index 736b579..0000000
+++ /dev/null
@@ -1,981 +0,0 @@
-/* ckrm_mem.c - Memory Resource Manager for CKRM
- *
- * Copyright (C) Chandra Seetharaman, IBM Corp. 2004
- *
- * Provides a Memory Resource controller for CKRM
- *
- * Latest version, more details at http://ckrm.sf.net
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <linux/swapops.h>
-#include <linux/cache.h>
-#include <linux/percpu.h>
-#include <linux/pagevec.h>
-#include <linux/parser.h>
-#include <linux/ckrm_mem_inline.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/errno.h>
-
-#define MEM_NAME "mem"
-
-#define CKRM_MEM_MAX_HIERARCHY 2 // allows only upto 2 levels - 0, 1 & 2
-
-/* all 1-level memory_share_class are chained together */
-LIST_HEAD(ckrm_memclass_list);
-LIST_HEAD(ckrm_shrink_list);
-spinlock_t ckrm_mem_lock; // protects both lists above
-unsigned int ckrm_tot_lru_pages; // total # of pages in the system
-                                // currently doesn't handle memory add/remove
-struct ckrm_mem_res *ckrm_mem_root_class;
-atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
-static void ckrm_mem_evaluate_all_pages(struct ckrm_mem_res *);
-int ckrm_nr_mem_classes = 0;
-
-EXPORT_SYMBOL_GPL(ckrm_memclass_list);
-EXPORT_SYMBOL_GPL(ckrm_shrink_list);
-EXPORT_SYMBOL_GPL(ckrm_mem_lock);
-EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages);
-EXPORT_SYMBOL_GPL(ckrm_mem_root_class);
-EXPORT_SYMBOL_GPL(ckrm_mem_real_count);
-EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes);
-
-/* Initialize rescls values
- * May be called on each rcfs unmount or as part of error recovery
- * to make share values sane.
- * Does not traverse hierarchy reinitializing children.
- */
-
-void
-memclass_release(struct kref *kref)
-{
-       struct ckrm_mem_res *cls = container_of(kref, struct ckrm_mem_res, nr_users);
-       BUG_ON(ckrm_memclass_valid(cls));
-       kfree(cls);
-}
-EXPORT_SYMBOL_GPL(memclass_release);
-
-static void
-set_ckrm_tot_pages(void)
-{
-       struct zone *zone;
-       int tot_lru_pages = 0;
-
-       for_each_zone(zone) {
-               tot_lru_pages += zone->nr_active;
-               tot_lru_pages += zone->nr_inactive;
-               tot_lru_pages += zone->free_pages;
-       }
-       ckrm_tot_lru_pages = tot_lru_pages;
-}
-
-static void
-mem_res_initcls_one(struct ckrm_mem_res *res)
-{
-       int zindex = 0;
-       struct zone *zone;
-
-       memset(res, 0, sizeof(struct ckrm_mem_res));
-
-       res->shares.my_guarantee     = CKRM_SHARE_DONTCARE;
-       res->shares.my_limit         = CKRM_SHARE_DONTCARE;
-       res->shares.total_guarantee  = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-       res->shares.max_limit        = CKRM_SHARE_DFLT_MAX_LIMIT;
-       res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-       res->shares.cur_max_limit    = 0;
-
-       res->pg_guar = CKRM_SHARE_DONTCARE;
-       res->pg_limit = CKRM_SHARE_DONTCARE;
-
-       INIT_LIST_HEAD(&res->shrink_list);
-       INIT_LIST_HEAD(&res->mcls_list);
-
-       for_each_zone(zone) {
-               INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list);
-               INIT_LIST_HEAD(&res->ckrm_zone[zindex].inactive_list);
-               INIT_LIST_HEAD(&res->ckrm_zone[zindex].victim_list);
-               res->ckrm_zone[zindex].nr_active = 0;
-               res->ckrm_zone[zindex].nr_inactive = 0;
-               res->ckrm_zone[zindex].zone = zone;
-               res->ckrm_zone[zindex].memcls = res;
-               zindex++;
-       }
-
-       res->pg_unused = 0;
-       res->nr_dontcare = 1; // for default class
-       kref_init(&res->nr_users);
-}
-
-static void
-set_impl_guar_children(struct ckrm_mem_res *parres)
-{
-       ckrm_core_class_t *child = NULL;
-       struct ckrm_mem_res *cres;
-       int nr_dontcare = 1; // for defaultclass
-       int guar, impl_guar;
-       int resid = mem_rcbs.resid;
-
-       ckrm_lock_hier(parres->core);
-       while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
-               cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
-               // treat NULL cres as don't care as that child is just being
-               // created.
-               // FIXME: need a better way to handle this case.
-               if (!cres || cres->pg_guar == CKRM_SHARE_DONTCARE) {
-                       nr_dontcare++;
-               }
-       }
-
-       parres->nr_dontcare = nr_dontcare;
-       guar = (parres->pg_guar == CKRM_SHARE_DONTCARE) ?
-                       parres->impl_guar : parres->pg_unused;
-       impl_guar = guar / parres->nr_dontcare;
-
-       while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
-               cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
-               if (cres && cres->pg_guar == CKRM_SHARE_DONTCARE) {
-                       cres->impl_guar = impl_guar;
-                       set_impl_guar_children(cres);
-               }
-       }
-       ckrm_unlock_hier(parres->core);
-
-}
-
-void
-check_memclass(struct ckrm_mem_res *res, char *str)
-{
-       int i, act = 0, inact = 0;
-       struct zone *zone;
-       struct ckrm_zone *ckrm_zone;
-       struct list_head *pos;
-       struct page *page;
-
-#if 0
-       printk("Check<%s> %s: total=%d\n",
-               str, res->core->name, atomic_read(&res->pg_total));
-#endif
-       for (i = 0; i < MAX_NR_ZONES; i++) {
-               act = 0; inact = 0;
-               ckrm_zone = &res->ckrm_zone[i];
-               zone = ckrm_zone->zone;
-               spin_lock_irq(&zone->lru_lock);
-               pos = ckrm_zone->inactive_list.next;
-               while (pos != &ckrm_zone->inactive_list) {
-                       page = list_entry(pos, struct page, lru);
-                       pos = pos->next;
-                       inact++;
-               }
-               pos = ckrm_zone->active_list.next;
-               while (pos != &ckrm_zone->active_list) {
-                       page = list_entry(pos, struct page, lru);
-                       pos = pos->next;
-                       act++;
-               }
-               spin_unlock_irq(&zone->lru_lock);
-#if 0
-               printk("Check<%s>(zone=%d): act %ld, inae %ld lact %d lina %d\n",
-                       str, i, ckrm_zone->nr_active, ckrm_zone->nr_inactive,
-                       act, inact);
-#endif
-       }
-}
-EXPORT_SYMBOL_GPL(check_memclass);
-
-static void *
-mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent)
-{
-       struct ckrm_mem_res *res, *pres;
-
-       if (mem_rcbs.resid == -1) {
-               return NULL;
-       }
-
-       pres = ckrm_get_res_class(parent, mem_rcbs.resid, struct ckrm_mem_res);
-       if (pres && (pres->hier == CKRM_MEM_MAX_HIERARCHY)) {
-               printk(KERN_ERR "MEM_RC: only allows hieararchy of %d\n",
-                                               CKRM_MEM_MAX_HIERARCHY);
-               return NULL;
-       }
-
-       if (unlikely((parent == NULL) && (ckrm_mem_root_class != NULL))) {
-               printk(KERN_ERR "MEM_RC: Only one root class is allowed\n");
-               return NULL;
-       }
-
-       if (unlikely((parent != NULL) && (ckrm_mem_root_class == NULL))) {
-               printk(KERN_ERR "MEM_RC: child class with no root class!!");
-               return NULL;
-       }
-
-       res = kmalloc(sizeof(struct ckrm_mem_res), GFP_ATOMIC);
-
-       if (res) {
-               mem_res_initcls_one(res);
-               res->core = core;
-               res->parent = parent;
-               spin_lock_irq(&ckrm_mem_lock);
-               list_add(&res->mcls_list, &ckrm_memclass_list);
-               spin_unlock_irq(&ckrm_mem_lock);
-               if (parent == NULL) {
-                       // I am part of the root class. So, set the max to
-                       // number of pages available
-                       res->pg_guar = ckrm_tot_lru_pages;
-                       res->pg_unused = ckrm_tot_lru_pages;
-                       res->pg_limit = ckrm_tot_lru_pages;
-                       res->hier = 0;
-                       ckrm_mem_root_class = res;
-               } else {
-                       int guar;
-                       res->hier = pres->hier + 1;
-                       set_impl_guar_children(pres);
-                       guar = (pres->pg_guar == CKRM_SHARE_DONTCARE) ?
-                               pres->impl_guar : pres->pg_unused;
-                       res->impl_guar = guar / pres->nr_dontcare;
-               }
-               ckrm_nr_mem_classes++;
-       }
-       else
-               printk(KERN_ERR "MEM_RC: alloc: GFP_ATOMIC failed\n");
-       return res;
-}
-
-/*
- * It is the caller's responsibility to make sure that the parent only
- * has chilren that are to be accounted. i.e if a new child is added
- * this function should be called after it has been added, and if a
- * child is deleted this should be called after the child is removed.
- */
-static void
-child_maxlimit_changed_local(struct ckrm_mem_res *parres)
-{
-       int maxlimit = 0;
-       struct ckrm_mem_res *childres;
-       ckrm_core_class_t *child = NULL;
-
-       // run thru parent's children and get the new max_limit of the parent
-       ckrm_lock_hier(parres->core);
-       while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
-               childres = ckrm_get_res_class(child, mem_rcbs.resid,
-                               struct ckrm_mem_res);
-               if (maxlimit < childres->shares.my_limit) {
-                       maxlimit = childres->shares.my_limit;
-               }
-       }
-       ckrm_unlock_hier(parres->core);
-       parres->shares.cur_max_limit = maxlimit;
-}
-
-/*
- * Recalculate the guarantee and limit in # of pages... and propagate the
- * same to children.
- * Caller is responsible for protecting res and for the integrity of parres
- */
-static void
-recalc_and_propagate(struct ckrm_mem_res * res, struct ckrm_mem_res * parres)
-{
-       ckrm_core_class_t *child = NULL;
-       struct ckrm_mem_res *cres;
-       int resid = mem_rcbs.resid;
-       struct ckrm_shares *self = &res->shares;
-
-       if (parres) {
-               struct ckrm_shares *par = &parres->shares;
-
-               // calculate pg_guar and pg_limit
-               //
-               if (parres->pg_guar == CKRM_SHARE_DONTCARE ||
-                               self->my_guarantee == CKRM_SHARE_DONTCARE) {
-                       res->pg_guar = CKRM_SHARE_DONTCARE;
-               } else if (par->total_guarantee) {
-                       u64 temp = (u64) self->my_guarantee * parres->pg_guar;
-                       do_div(temp, par->total_guarantee);
-                       res->pg_guar = (int) temp;
-                       res->impl_guar = CKRM_SHARE_DONTCARE;
-               } else {
-                       res->pg_guar = 0;
-                       res->impl_guar = CKRM_SHARE_DONTCARE;
-               }
-
-               if (parres->pg_limit == CKRM_SHARE_DONTCARE ||
-                               self->my_limit == CKRM_SHARE_DONTCARE) {
-                       res->pg_limit = CKRM_SHARE_DONTCARE;
-               } else if (par->max_limit) {
-                       u64 temp = (u64) self->my_limit * parres->pg_limit;
-                       do_div(temp, par->max_limit);
-                       res->pg_limit = (int) temp;
-               } else {
-                       res->pg_limit = 0;
-               }
-       }
-
-       // Calculate unused units
-       if (res->pg_guar == CKRM_SHARE_DONTCARE) {
-               res->pg_unused = CKRM_SHARE_DONTCARE;
-       } else if (self->total_guarantee) {
-               u64 temp = (u64) self->unused_guarantee * res->pg_guar;
-               do_div(temp, self->total_guarantee);
-               res->pg_unused = (int) temp;
-       } else {
-               res->pg_unused = 0;
-       }
-
-       // propagate to children
-       ckrm_lock_hier(res->core);
-       while ((child = ckrm_get_next_child(res->core, child)) != NULL) {
-               cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
-               recalc_and_propagate(cres, res);
-       }
-       ckrm_unlock_hier(res->core);
-       return;
-}
-
-static void
-mem_res_free(void *my_res)
-{
-       struct ckrm_mem_res *res = my_res;
-       struct ckrm_mem_res *pres;
-
-       if (!res)
-               return;
-
-       ckrm_mem_evaluate_all_pages(res);
-
-       pres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
-                       struct ckrm_mem_res);
-
-       if (pres) {
-               child_guarantee_changed(&pres->shares,
-                               res->shares.my_guarantee, 0);
-               child_maxlimit_changed_local(pres);
-               recalc_and_propagate(pres, NULL);
-               set_impl_guar_children(pres);
-       }
-
-       res->shares.my_guarantee = 0;
-       res->shares.my_limit = 0;
-       res->pg_guar = 0;
-       res->pg_limit = 0;
-       res->pg_unused = 0;
-
-       spin_lock_irq(&ckrm_mem_lock);
-       list_del_init(&res->mcls_list);
-       spin_unlock_irq(&ckrm_mem_lock);
-
-       res->core = NULL;
-       res->parent = NULL;
-       kref_put(&res->nr_users, memclass_release);
-       ckrm_nr_mem_classes--;
-       return;
-}
-
-static int
-mem_set_share_values(void *my_res, struct ckrm_shares *shares)
-{
-       struct ckrm_mem_res *res = my_res;
-       struct ckrm_mem_res *parres;
-       int rc;
-
-       if (!res)
-               return -EINVAL;
-
-       parres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
-                       struct ckrm_mem_res);
-
-       rc = set_shares(shares, &res->shares, parres ? &parres->shares : NULL);
-
-       if ((rc == 0) && (parres != NULL)) {
-               child_maxlimit_changed_local(parres);
-               recalc_and_propagate(parres, NULL);
-               set_impl_guar_children(parres);
-       }
-
-       return rc;
-}
-
-static int
-mem_get_share_values(void *my_res, struct ckrm_shares *shares)
-{
-       struct ckrm_mem_res *res = my_res;
-
-       if (!res)
-               return -EINVAL;
-       *shares = res->shares;
-       return 0;
-}
-
-static int
-mem_get_stats(void *my_res, struct seq_file *sfile)
-{
-       struct ckrm_mem_res *res = my_res;
-       struct zone *zone;
-       int active = 0, inactive = 0, fr = 0;
-
-       if (!res)
-               return -EINVAL;
-
-       seq_printf(sfile, "--------- Memory Resource stats start ---------\n");
-       if (res == ckrm_mem_root_class) {
-               int i = 0;
-               for_each_zone(zone) {
-                       active += zone->nr_active;
-                       inactive += zone->nr_inactive;
-                       fr += zone->free_pages;
-                       i++;
-               }
-               seq_printf(sfile,"System: tot_pages=%d,active=%d,inactive=%d"
-                               ",free=%d\n", ckrm_tot_lru_pages,
-                               active, inactive, fr);
-       }
-       seq_printf(sfile, "Number of pages used(including pages lent to"
-                       " children): %d\n", atomic_read(&res->pg_total));
-       seq_printf(sfile, "Number of pages guaranteed: %d\n",
-                       res->pg_guar);
-       seq_printf(sfile, "Maximum limit of pages: %d\n",
-                       res->pg_limit);
-       seq_printf(sfile, "Total number of pages available"
-                       "(after serving guarantees to children): %d\n",
-                       res->pg_unused);
-       seq_printf(sfile, "Number of pages lent to children: %d\n",
-                       res->pg_lent);
-       seq_printf(sfile, "Number of pages borrowed from the parent: %d\n",
-                       res->pg_borrowed);
-       seq_printf(sfile, "---------- Memory Resource stats end ----------\n");
-
-       return 0;
-}
-
-static void
-mem_change_resclass(void *tsk, void *old, void *new)
-{
-       struct mm_struct *mm;
-       struct task_struct *task = tsk, *t1;
-       struct ckrm_mem_res *prev_mmcls;
-
-       if (!task->mm || (new == old) || (old == (void *) -1))
-               return;
-
-       mm = task->active_mm;
-       spin_lock(&mm->peertask_lock);
-       prev_mmcls = mm->memclass;
-
-       if (new == NULL) {
-               list_del_init(&task->mm_peers);
-       } else {
-               int found = 0;
-               list_for_each_entry(t1, &mm->tasklist, mm_peers) {
-                       if (t1 == task) {
-                               found++;
-                               break;
-                       }
-               }
-               if (!found) {
-                       list_del_init(&task->mm_peers);
-                       list_add_tail(&task->mm_peers, &mm->tasklist);
-               }
-       }
-
-       spin_unlock(&mm->peertask_lock);
-       ckrm_mem_evaluate_mm(mm, (struct ckrm_mem_res *) new);
-       return;
-}
-
-#define MEM_FAIL_OVER "fail_over"
-#define MEM_SHRINK_AT "shrink_at"
-#define MEM_SHRINK_TO "shrink_to"
-#define MEM_SHRINK_COUNT "num_shrinks"
-#define MEM_SHRINK_INTERVAL "shrink_interval"
-
-int ckrm_mem_fail_over = 110;
-int ckrm_mem_shrink_at = 90;
-static int ckrm_mem_shrink_to = 80;
-static int ckrm_mem_shrink_count = 10;
-static int ckrm_mem_shrink_interval = 10;
-
-EXPORT_SYMBOL_GPL(ckrm_mem_fail_over);
-EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at);
-
-static int
-mem_show_config(void *my_res, struct seq_file *sfile)
-{
-       struct ckrm_mem_res *res = my_res;
-
-       if (!res)
-               return -EINVAL;
-
-       seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n",
-               MEM_NAME,
-               MEM_FAIL_OVER, ckrm_mem_fail_over,
-               MEM_SHRINK_AT, ckrm_mem_shrink_at,
-               MEM_SHRINK_TO, ckrm_mem_shrink_to,
-               MEM_SHRINK_COUNT, ckrm_mem_shrink_count,
-               MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval);
-
-       return 0;
-}
-
-// config file is available only at the root level,
-// so assuming my_res to be the system level class
-enum memclass_token {
-       mem_fail_over,
-       mem_shrink_at,
-       mem_shrink_to,
-       mem_shrink_count,
-       mem_shrink_interval,
-       mem_err
-};
-
-static match_table_t mem_tokens = {
-       {mem_fail_over, MEM_FAIL_OVER "=%d"},
-       {mem_shrink_at, MEM_SHRINK_AT "=%d"},
-       {mem_shrink_to, MEM_SHRINK_TO "=%d"},
-       {mem_shrink_count, MEM_SHRINK_COUNT "=%d"},
-       {mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"},
-       {mem_err, NULL},
-};
-
-static int
-mem_set_config(void *my_res, const char *cfgstr)
-{
-       char *p;
-       struct ckrm_mem_res *res = my_res;
-       int err = 0, val;
-
-       if (!res)
-               return -EINVAL;
-
-       while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
-               substring_t args[MAX_OPT_ARGS];
-               int token;
-               if (!*p)
-                       continue;
-
-               token = match_token(p, mem_tokens, args);
-               switch (token) {
-               case mem_fail_over:
-                       if (match_int(args, &val) || (val <= 0)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_fail_over = val;
-                       }
-                       break;
-               case mem_shrink_at:
-                       if (match_int(args, &val) || (val <= 0)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_shrink_at = val;
-                       }
-                       break;
-               case mem_shrink_to:
-                       if (match_int(args, &val) || (val < 0) || (val > 100)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_shrink_to = val;
-                       }
-                       break;
-               case mem_shrink_count:
-                       if (match_int(args, &val) || (val <= 0)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_shrink_count = val;
-                       }
-                       break;
-               case mem_shrink_interval:
-                       if (match_int(args, &val) || (val <= 0)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_shrink_interval = val;
-                       }
-                       break;
-               default:
-                       err = -EINVAL;
-               }
-       }
-       return err;
-}
-
-static int
-mem_reset_stats(void *my_res)
-{
-       struct ckrm_mem_res *res = my_res;
-       printk(KERN_INFO "MEM_RC: reset stats called for class %s\n",
-                               res->core->name);
-       return 0;
-}
-
-struct ckrm_res_ctlr mem_rcbs = {
-       .res_name          = MEM_NAME,
-       .res_hdepth        = CKRM_MEM_MAX_HIERARCHY,
-       .resid             = -1,
-       .res_alloc         = mem_res_alloc,
-       .res_free          = mem_res_free,
-       .set_share_values  = mem_set_share_values,
-       .get_share_values  = mem_get_share_values,
-       .get_stats         = mem_get_stats,
-       .change_resclass   = mem_change_resclass,
-       .show_config       = mem_show_config,
-       .set_config        = mem_set_config,
-       .reset_stats       = mem_reset_stats,
-};
-
-EXPORT_SYMBOL_GPL(mem_rcbs);
-
-int __init
-init_ckrm_mem_res(void)
-{
-       struct ckrm_classtype *clstype;
-       int resid = mem_rcbs.resid;
-
-       set_ckrm_tot_pages();
-       spin_lock_init(&ckrm_mem_lock);
-       clstype = ckrm_find_classtype_by_name("taskclass");
-       if (clstype == NULL) {
-               printk(KERN_INFO " Unknown ckrm classtype<taskclass>");
-               return -ENOENT;
-       }
-
-       if (resid == -1) {
-               resid = ckrm_register_res_ctlr(clstype, &mem_rcbs);
-               if (resid != -1) {
-                       mem_rcbs.classtype = clstype;
-               }
-       }
-       return ((resid < 0) ? resid : 0);
-}
-
-void __exit
-exit_ckrm_mem_res(void)
-{
-       ckrm_unregister_res_ctlr(&mem_rcbs);
-       mem_rcbs.resid = -1;
-}
-
-module_init(init_ckrm_mem_res)
-module_exit(exit_ckrm_mem_res)
-
-int
-ckrm_mem_get_shrink_to(void)
-{
-       return ckrm_mem_shrink_to;
-}
-
-void
-ckrm_at_limit(struct ckrm_mem_res *cls)
-{
-       struct zone *zone;
-       unsigned long now = jiffies;
-
-       if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE) ||
-                       ((cls->flags & MEM_AT_LIMIT) == MEM_AT_LIMIT)) {
-               return;
-       }
-       if ((cls->last_shrink > now) /* jiffies wrapped around */ ||
-                  (cls->last_shrink + (ckrm_mem_shrink_interval * HZ)) < now) {
-               cls->last_shrink = now;
-               cls->shrink_count = 0;
-       }
-       cls->shrink_count++;
-       if (cls->shrink_count > ckrm_mem_shrink_count) {
-               return;
-       }
-       spin_lock_irq(&ckrm_mem_lock);
-       list_add(&cls->shrink_list, &ckrm_shrink_list);
-       spin_unlock_irq(&ckrm_mem_lock);
-       cls->flags |= MEM_AT_LIMIT;
-       for_each_zone(zone) {
-               wakeup_kswapd(zone);
-               break; // only once is enough
-       }
-}
-
-static int
-ckrm_mem_evaluate_page_anon(struct page* page)
-{
-       struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
-       struct ckrm_mem_res* maxshareclass = NULL;
-       struct anon_vma *anon_vma = (struct anon_vma *) page->mapping;
-       struct vm_area_struct *vma;
-       struct mm_struct* mm;
-       int ret = 0;
-
-       spin_lock(&anon_vma->lock);
-       BUG_ON(list_empty(&anon_vma->head));
-       list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
-               mm = vma->vm_mm;
-               if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
-                               mm->memclass) < 0) {
-                       maxshareclass = mm->memclass;
-               }
-       }
-       spin_unlock(&anon_vma->lock);
-
-       if (!maxshareclass) {
-               maxshareclass = ckrm_mem_root_class;
-       }
-       if (pgcls != maxshareclass) {
-               ckrm_change_page_class(page, maxshareclass);
-               ret = 1;
-       }
-       return ret;
-}
-
-static int
-ckrm_mem_evaluate_page_file(struct page* page)
-{
-       struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
-       struct ckrm_mem_res* maxshareclass = NULL;
-       struct address_space *mapping = page->mapping;
-       struct vm_area_struct *vma = NULL;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       struct prio_tree_iter iter;
-       struct mm_struct* mm;
-       int ret = 0;
-
-       if (!mapping)
-               return 0;
-
-       if (!spin_trylock(&mapping->i_mmap_lock))
-               return 0;
-
-       vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap,
-                                       pgoff, pgoff) {
-               mm = vma->vm_mm;
-               if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
-                               mm->memclass)<0)
-                       maxshareclass = mm->memclass;
-       }
-       spin_unlock(&mapping->i_mmap_lock);
-
-       if (!maxshareclass) {
-               maxshareclass = ckrm_mem_root_class;
-       }
-       if (pgcls != maxshareclass) {
-               ckrm_change_page_class(page, maxshareclass);
-               ret = 1;
-       }
-       return ret;
-}
-
-static int
-ckrm_mem_evaluate_page(struct page* page)
-{
-       int ret = 0;
-       BUG_ON(page->ckrm_zone == NULL);
-       if (page->mapping) {
-               if (PageAnon(page))
-                       ret = ckrm_mem_evaluate_page_anon(page);
-               else
-                       ret = ckrm_mem_evaluate_page_file(page);
-       }
-       return ret;
-}
-
-static void
-ckrm_mem_evaluate_all_pages(struct ckrm_mem_res* res)
-{
-       struct page *page;
-       struct ckrm_zone *ckrm_zone;
-       struct zone *zone;
-       struct list_head *pos, *next;
-       int i;
-
-       check_memclass(res, "bef_eval_all_pgs");
-       for (i = 0; i < MAX_NR_ZONES; i++) {
-               ckrm_zone = &res->ckrm_zone[i];
-               zone = ckrm_zone->zone;
-               spin_lock_irq(&zone->lru_lock);
-               pos = ckrm_zone->inactive_list.next;
-               while (pos != &ckrm_zone->inactive_list) {
-                       next = pos->next;
-                       page = list_entry(pos, struct page, lru);
-                       if (!ckrm_mem_evaluate_page(page))
-                               ckrm_change_page_class(page,
-                                               ckrm_mem_root_class);
-                       pos = next;
-               }
-               pos = ckrm_zone->active_list.next;
-               while (pos != &ckrm_zone->active_list) {
-                       next = pos->next;
-                       page = list_entry(pos, struct page, lru);
-                       if (!ckrm_mem_evaluate_page(page))
-                               ckrm_change_page_class(page,
-                                               ckrm_mem_root_class);
-                       pos = next;
-               }
-               spin_unlock_irq(&zone->lru_lock);
-       }
-       check_memclass(res, "aft_eval_all_pgs");
-       return;
-}
-
-static inline int
-class_migrate_pmd(struct mm_struct* mm, struct vm_area_struct* vma,
-               pmd_t* pmdir, unsigned long address, unsigned long end)
-{
-       pte_t *pte;
-       unsigned long pmd_end;
-
-       if (pmd_none(*pmdir))
-               return 0;
-       BUG_ON(pmd_bad(*pmdir));
-
-       pmd_end = (address+PMD_SIZE)&PMD_MASK;
-       if (end>pmd_end)
-               end = pmd_end;
-
-       do {
-               pte = pte_offset_map(pmdir,address);
-               if (pte_present(*pte)) {
-                       struct page *page = pte_page(*pte);
-                       BUG_ON(mm->memclass == NULL);
-                       if (page->mapping && page->ckrm_zone) {
-                               struct zone *zone = page->ckrm_zone->zone;
-                               spin_lock_irq(&zone->lru_lock);
-                               ckrm_change_page_class(page, mm->memclass);
-                               spin_unlock_irq(&zone->lru_lock);
-                       }
-               }
-               address += PAGE_SIZE;
-               pte_unmap(pte);
-               pte++;
-       } while(address && (address<end));
-       return 0;
-}
-
-static inline int
-class_migrate_pgd(struct mm_struct* mm, struct vm_area_struct* vma,
-               pgd_t* pgdir, unsigned long address, unsigned long end)
-{
-       pmd_t* pmd;
-       unsigned long pgd_end;
-
-       if (pgd_none(*pgdir))
-               return 0;
-       BUG_ON(pgd_bad(*pgdir));
-
-       pmd = pmd_offset(pgdir,address);
-       pgd_end = (address+PGDIR_SIZE)&PGDIR_MASK;
-
-       if (pgd_end && (end>pgd_end))
-               end = pgd_end;
-
-       do {
-               class_migrate_pmd(mm,vma,pmd,address,end);
-               address = (address+PMD_SIZE)&PMD_MASK;
-               pmd++;
-       } while (address && (address<end));
-       return 0;
-}
-
-static inline int
-class_migrate_vma(struct mm_struct* mm, struct vm_area_struct* vma)
-{
-       pgd_t* pgdir;
-       unsigned long address, end;
-
-       address = vma->vm_start;
-       end = vma->vm_end;
-
-       pgdir = pgd_offset(vma->vm_mm, address);
-       do {
-               class_migrate_pgd(mm,vma,pgdir,address,end);
-               address = (address + PGDIR_SIZE) & PGDIR_MASK;
-               pgdir++;
-       } while(address && (address<end));
-       return 0;
-}
-
-/* this function is called with mm->peertask_lock hold */
-void
-ckrm_mem_evaluate_mm(struct mm_struct* mm, struct ckrm_mem_res *def)
-{
-       struct task_struct *task;
-       struct ckrm_mem_res *maxshareclass = def;
-       struct vm_area_struct *vma;
-
-       if (list_empty(&mm->tasklist)) {
-               /* We leave the mm->memclass untouched since we believe that one
-                * mm with no task associated will be deleted soon or attach
-                * with another task later.
-                */
-               return;
-       }
-
-       list_for_each_entry(task, &mm->tasklist, mm_peers) {
-               struct ckrm_mem_res* cls = ckrm_get_mem_class(task);
-               if (!cls)
-                       continue;
-               if (!maxshareclass ||
-                               ckrm_mem_share_compare(maxshareclass,cls)<0 )
-                       maxshareclass = cls;
-       }
-
-       if (maxshareclass && (mm->memclass != maxshareclass)) {
-               if (mm->memclass) {
-                       kref_put(&mm->memclass->nr_users, memclass_release);
-               }
-               mm->memclass = maxshareclass;
-               kref_get(&maxshareclass->nr_users);
-
-               /* Go through all VMA to migrate pages */
-               down_read(&mm->mmap_sem);
-               vma = mm->mmap;
-               while(vma) {
-                       class_migrate_vma(mm, vma);
-                       vma = vma->vm_next;
-               }
-               up_read(&mm->mmap_sem);
-       }
-       return;
-}
-
-void
-ckrm_init_mm_to_task(struct mm_struct * mm, struct task_struct *task)
-{
-       spin_lock(&mm->peertask_lock);
-       if (!list_empty(&task->mm_peers)) {
-               printk(KERN_ERR "MEM_RC: Task list NOT empty!! emptying...\n");
-               list_del_init(&task->mm_peers);
-       }
-       list_add_tail(&task->mm_peers, &mm->tasklist);
-       spin_unlock(&mm->peertask_lock);
-       if (mm->memclass != ckrm_get_mem_class(task))
-               ckrm_mem_evaluate_mm(mm, NULL);
-       return;
-}
-
-int
-ckrm_memclass_valid(struct ckrm_mem_res *cls)
-{
-       struct ckrm_mem_res *tmp;
-       unsigned long flags;
-
-       if (!cls || list_empty(&cls->mcls_list)) {
-               return 0;
-       }
-       spin_lock_irqsave(&ckrm_mem_lock, flags);
-       list_for_each_entry(tmp, &ckrm_memclass_list, mcls_list) {
-               if (tmp == cls) {
-                       spin_unlock(&ckrm_mem_lock);
-                       return 1;
-               }
-       }
-       spin_unlock_irqrestore(&ckrm_mem_lock, flags);
-       return 0;
-}
-
-MODULE_LICENSE("GPL");
index c058305..21d8f9b 100644 (file)
  *
  */
 
+/* Changes
+ * 
+ * 31 Mar 2004: Created
+ * 
+ */
+
 /*
- * CKRM Resource controller for tracking number of tasks in a class.
+ * Code Description: TBD
  */
 
 #include <linux/module.h>
 #include <asm/div64.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/parser.h>
 #include <linux/ckrm_rc.h>
 #include <linux/ckrm_tc.h>
 #include <linux/ckrm_tsk.h>
 
-#define TOTAL_NUM_TASKS (131072)       /* 128 K */
+#define DEF_TOTAL_NUM_TASKS (131072)   // 128 K
+#define DEF_FORKRATE (1000000)                 // 1 million tasks
+#define DEF_FORKRATE_INTERVAL (3600)    // per hour
 #define NUMTASKS_DEBUG
 #define NUMTASKS_NAME "numtasks"
-
-struct ckrm_numtasks {
-       struct ckrm_core_class *core;   /* the core i am part of... */
-       struct ckrm_core_class *parent; /* parent of the core above. */
+#define SYS_TOTAL_TASKS "sys_total_tasks"
+#define FORKRATE "forkrate"
+#define FORKRATE_INTERVAL "forkrate_interval"
+
+static int total_numtasks = DEF_TOTAL_NUM_TASKS;
+static int total_cnt_alloc = 0;
+static int forkrate = DEF_FORKRATE;
+static int forkrate_interval = DEF_FORKRATE_INTERVAL;
+static ckrm_core_class_t *root_core;
+
+typedef struct ckrm_numtasks {
+       struct ckrm_core_class *core;   // the core i am part of...
+       struct ckrm_core_class *parent; // parent of the core above.
        struct ckrm_shares shares;
-       spinlock_t cnt_lock;    /* always grab parent's lock before child's */
-       int cnt_guarantee;      /* num_tasks guarantee in local units */
-       int cnt_unused;         /* has to borrow if more than this is needed */
-       int cnt_limit;          /* no tasks over this limit. */
-       atomic_t cnt_cur_alloc; /* current alloc from self */
-       atomic_t cnt_borrowed;  /* borrowed from the parent */
-
-       int over_guarantee;     /* turn on/off when cur_alloc goes  */
-                               /* over/under guarantee */
-
-       /* internally maintained statictics to compare with max numbers */
-       int limit_failures;     /* # failures as request was over the limit */
-       int borrow_sucesses;    /* # successful borrows */
-       int borrow_failures;    /* # borrow failures */
-
-       /* Maximum the specific statictics has reached. */
+       spinlock_t cnt_lock;    // always grab parent's lock before child's
+       int cnt_guarantee;      // num_tasks guarantee in local units
+       int cnt_unused;         // has to borrow if more than this is needed
+       int cnt_limit;          // no tasks over this limit.
+       atomic_t cnt_cur_alloc; // current alloc from self
+       atomic_t cnt_borrowed;  // borrowed from the parent
+
+       int over_guarantee;     // turn on/off when cur_alloc goes 
+                               // over/under guarantee
+
+       // internally maintained statictics to compare with max numbers
+       int limit_failures;     // # failures as request was over the limit
+       int borrow_sucesses;    // # successful borrows
+       int borrow_failures;    // # borrow failures
+
+       // Maximum the specific statictics has reached.
        int max_limit_failures;
        int max_borrow_sucesses;
        int max_borrow_failures;
 
-       /* Total number of specific statistics */
+       // Total number of specific statistics
        int tot_limit_failures;
        int tot_borrow_sucesses;
        int tot_borrow_failures;
-};
+
+       // fork rate fields
+       int forks_in_period;
+       unsigned long period_start;
+} ckrm_numtasks_t;
 
 struct ckrm_res_ctlr numtasks_rcbs;
 
@@ -67,7 +89,7 @@ struct ckrm_res_ctlr numtasks_rcbs;
  * to make share values sane.
  * Does not traverse hierarchy reinitializing children.
  */
-static void numtasks_res_initcls_one(struct ckrm_numtasks * res)
+static void numtasks_res_initcls_one(ckrm_numtasks_t * res)
 {
        res->shares.my_guarantee = CKRM_SHARE_DONTCARE;
        res->shares.my_limit = CKRM_SHARE_DONTCARE;
@@ -94,23 +116,58 @@ static void numtasks_res_initcls_one(struct ckrm_numtasks * res)
        res->tot_borrow_sucesses = 0;
        res->tot_borrow_failures = 0;
 
+       res->forks_in_period = 0;
+       res->period_start = jiffies;
+
        atomic_set(&res->cnt_cur_alloc, 0);
        atomic_set(&res->cnt_borrowed, 0);
        return;
 }
 
-static int numtasks_get_ref_local(struct ckrm_core_class *core, int force)
+#if 0
+static void numtasks_res_initcls(void *my_res)
 {
-       int rc, resid = numtasks_rcbs.resid;
-       struct ckrm_numtasks *res;
+       ckrm_numtasks_t *res = my_res;
+
+       /* Write a version which propagates values all the way down 
+          and replace rcbs callback with that version */
+
+}
+#endif
+
+static int numtasks_get_ref_local(void *arg, int force)
+{
+       int rc, resid = numtasks_rcbs.resid, borrowed = 0;
+       unsigned long now = jiffies, chg_at;
+       ckrm_numtasks_t *res;
+       ckrm_core_class_t *core = arg;
 
        if ((resid < 0) || (core == NULL))
                return 1;
 
-       res = ckrm_get_res_class(core, resid, struct ckrm_numtasks);
+       res = ckrm_get_res_class(core, resid, ckrm_numtasks_t);
        if (res == NULL)
                return 1;
 
+       // force is not associated with fork. So, if force is specified
+       // we don't have to bother about forkrate.
+       if (!force) {
+               // Take care of wraparound situation
+               chg_at = res->period_start + forkrate_interval * HZ;
+               if (chg_at < res->period_start) {
+                       chg_at += forkrate_interval * HZ;
+                       now += forkrate_interval * HZ;
+               }
+               if (chg_at <= now) {
+                       res->period_start = now;
+                       res->forks_in_period = 0;
+               }
+       
+               if (res->forks_in_period >= forkrate) {
+                       return 0;
+               }
+       }
+
        atomic_inc(&res->cnt_cur_alloc);
 
        rc = 1;
@@ -129,76 +186,91 @@ static int numtasks_get_ref_local(struct ckrm_core_class *core, int force)
                                res->borrow_sucesses++;
                                res->tot_borrow_sucesses++;
                                res->over_guarantee = 1;
+                               borrowed++;
                        } else {
                                res->borrow_failures++;
                                res->tot_borrow_failures++;
                        }
-               } else
+               } else {
                        rc = force;
+               }
        } else if (res->over_guarantee) {
                res->over_guarantee = 0;
 
-               if (res->max_limit_failures < res->limit_failures)
+               if (res->max_limit_failures < res->limit_failures) {
                        res->max_limit_failures = res->limit_failures;
-               if (res->max_borrow_sucesses < res->borrow_sucesses)
+               }
+               if (res->max_borrow_sucesses < res->borrow_sucesses) {
                        res->max_borrow_sucesses = res->borrow_sucesses;
-               if (res->max_borrow_failures < res->borrow_failures)
+               }
+               if (res->max_borrow_failures < res->borrow_failures) {
                        res->max_borrow_failures = res->borrow_failures;
+               }
                res->limit_failures = 0;
                res->borrow_sucesses = 0;
                res->borrow_failures = 0;
        }
 
-       if (!rc)
+       if (!rc) {
                atomic_dec(&res->cnt_cur_alloc);
+       } else if (!borrowed) { 
+               total_cnt_alloc++;
+               if (!force) { // force is not associated with a real fork.
+                       res->forks_in_period++;
+               }
+       }
        return rc;
 }
 
-static void numtasks_put_ref_local(struct ckrm_core_class *core)
+static void numtasks_put_ref_local(void *arg)
 {
        int resid = numtasks_rcbs.resid;
-       struct ckrm_numtasks *res;
+       ckrm_numtasks_t *res;
+       ckrm_core_class_t *core = arg;
 
-       if ((resid == -1) || (core == NULL))
+       if ((resid == -1) || (core == NULL)) {
                return;
+       }
 
-       res = ckrm_get_res_class(core, resid, struct ckrm_numtasks);
+       res = ckrm_get_res_class(core, resid, ckrm_numtasks_t);
        if (res == NULL)
                return;
-
-       if (atomic_read(&res->cnt_cur_alloc)==0)
+       if (unlikely(atomic_read(&res->cnt_cur_alloc) == 0)) {
+               printk(KERN_WARNING "numtasks_put_ref: Trying to decrement "
+                                       "counter below 0\n");
                return;
-
+       }
        atomic_dec(&res->cnt_cur_alloc);
-
        if (atomic_read(&res->cnt_borrowed) > 0) {
                atomic_dec(&res->cnt_borrowed);
                numtasks_put_ref_local(res->parent);
+       } else {
+               total_cnt_alloc--;
        }
+               
        return;
 }
 
 static void *numtasks_res_alloc(struct ckrm_core_class *core,
                                struct ckrm_core_class *parent)
 {
-       struct ckrm_numtasks *res;
+       ckrm_numtasks_t *res;
 
-       res = kmalloc(sizeof(struct ckrm_numtasks), GFP_ATOMIC);
+       res = kmalloc(sizeof(ckrm_numtasks_t), GFP_ATOMIC);
 
        if (res) {
-               memset(res, 0, sizeof(struct ckrm_numtasks));
+               memset(res, 0, sizeof(ckrm_numtasks_t));
                res->core = core;
                res->parent = parent;
                numtasks_res_initcls_one(res);
                res->cnt_lock = SPIN_LOCK_UNLOCKED;
                if (parent == NULL) {
-                       /*
-                        * I am part of root class. So set the max tasks 
-                        * to available default.
-                        */
-                       res->cnt_guarantee = TOTAL_NUM_TASKS;
-                       res->cnt_unused = TOTAL_NUM_TASKS;
-                       res->cnt_limit = TOTAL_NUM_TASKS;
+                       // I am part of root class. So set the max tasks 
+                       // to available default
+                       res->cnt_guarantee = total_numtasks;
+                       res->cnt_unused = total_numtasks;
+                       res->cnt_limit = total_numtasks;
+                       root_core = core; // store the root core.
                }
                try_module_get(THIS_MODULE);
        } else {
@@ -214,36 +286,47 @@ static void *numtasks_res_alloc(struct ckrm_core_class *core,
  */
 static void numtasks_res_free(void *my_res)
 {
-       struct ckrm_numtasks *res = my_res, *parres, *childres;
-       struct ckrm_core_class *child = NULL;
+       ckrm_numtasks_t *res = my_res, *parres, *childres;
+       ckrm_core_class_t *child = NULL;
        int i, borrowed, maxlimit, resid = numtasks_rcbs.resid;
 
        if (!res)
                return;
 
-       /* Assuming there will be no children when this function is called */
+       // Assuming there will be no children when this function is called
 
-       parres = ckrm_get_res_class(res->parent, resid, struct ckrm_numtasks);
+       parres = ckrm_get_res_class(res->parent, resid, ckrm_numtasks_t);
 
-       if ((borrowed = atomic_read(&res->cnt_borrowed)) > 0)
-               for (i = 0; i < borrowed; i++)
-                       numtasks_put_ref_local(parres->core);
-
-       /* return child's limit/guarantee to parent node */
+       if (unlikely(atomic_read(&res->cnt_cur_alloc) < 0)) {
+               printk(KERN_WARNING "numtasks_res: counter below 0\n");
+       }
+       if (unlikely(atomic_read(&res->cnt_cur_alloc) > 0 ||
+                               atomic_read(&res->cnt_borrowed) > 0)) {
+               printk(KERN_WARNING "numtasks_res_free: resource still "
+                      "alloc'd %p\n", res);
+               if ((borrowed = atomic_read(&res->cnt_borrowed)) > 0) {
+                       for (i = 0; i < borrowed; i++) {
+                               numtasks_put_ref_local(parres->core);
+                       }
+               }
+       }
+       // return child's limit/guarantee to parent node
        spin_lock(&parres->cnt_lock);
        child_guarantee_changed(&parres->shares, res->shares.my_guarantee, 0);
 
-       /* run thru parent's children and get the new max_limit of the parent */
+       // run thru parent's children and get the new max_limit of the parent
        ckrm_lock_hier(parres->core);
        maxlimit = 0;
        while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
-               childres = ckrm_get_res_class(child, resid, struct ckrm_numtasks);
-               if (maxlimit < childres->shares.my_limit)
+               childres = ckrm_get_res_class(child, resid, ckrm_numtasks_t);
+               if (maxlimit < childres->shares.my_limit) {
                        maxlimit = childres->shares.my_limit;
+               }
        }
        ckrm_unlock_hier(parres->core);
-       if (parres->shares.cur_max_limit < maxlimit)
+       if (parres->shares.cur_max_limit < maxlimit) {
                parres->shares.cur_max_limit = maxlimit;
+       }
 
        spin_unlock(&parres->cnt_lock);
        kfree(res);
@@ -251,63 +334,67 @@ static void numtasks_res_free(void *my_res)
        return;
 }
 
+
 /*
  * Recalculate the guarantee and limit in real units... and propagate the
  * same to children.
  * Caller is responsible for protecting res and for the integrity of parres
  */
 static void
-recalc_and_propagate(struct ckrm_numtasks * res, struct ckrm_numtasks * parres)
+recalc_and_propagate(ckrm_numtasks_t * res, ckrm_numtasks_t * parres)
 {
-       struct ckrm_core_class *child = NULL;
-       struct ckrm_numtasks *childres;
+       ckrm_core_class_t *child = NULL;
+       ckrm_numtasks_t *childres;
        int resid = numtasks_rcbs.resid;
 
        if (parres) {
                struct ckrm_shares *par = &parres->shares;
                struct ckrm_shares *self = &res->shares;
 
-               /* calculate cnt_guarantee and cnt_limit */
-               if ((parres->cnt_guarantee == CKRM_SHARE_DONTCARE) ||
-                               (self->my_guarantee == CKRM_SHARE_DONTCARE))
+               // calculate cnt_guarantee and cnt_limit
+               //
+               if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
                        res->cnt_guarantee = CKRM_SHARE_DONTCARE;
-               else if (par->total_guarantee) {
+               else if (par->total_guarantee) {
                        u64 temp = (u64) self->my_guarantee * parres->cnt_guarantee;
                        do_div(temp, par->total_guarantee);
                        res->cnt_guarantee = (int) temp;
-               } else
+               } else {
                        res->cnt_guarantee = 0;
+               }
 
-               if ((parres->cnt_limit == CKRM_SHARE_DONTCARE) ||
-                               (self->my_limit == CKRM_SHARE_DONTCARE))
+               if (parres->cnt_limit == CKRM_SHARE_DONTCARE) {
                        res->cnt_limit = CKRM_SHARE_DONTCARE;
-               else if (par->max_limit) {
+               else if (par->max_limit) {
                        u64 temp = (u64) self->my_limit * parres->cnt_limit;
                        do_div(temp, par->max_limit);
                        res->cnt_limit = (int) temp;
-               } else
+               } else {
                        res->cnt_limit = 0;
+               }
 
-               /* Calculate unused units */
-               if ((res->cnt_guarantee == CKRM_SHARE_DONTCARE) ||
-                               (self->my_guarantee == CKRM_SHARE_DONTCARE))
+               // Calculate unused units
+               if (res->cnt_guarantee == CKRM_SHARE_DONTCARE) {
                        res->cnt_unused = CKRM_SHARE_DONTCARE;
-               else if (self->total_guarantee) {
+               else if (self->total_guarantee) {
                        u64 temp = (u64) self->unused_guarantee * res->cnt_guarantee;
                        do_div(temp, self->total_guarantee);
                        res->cnt_unused = (int) temp;
-               } else
+               } else {
                        res->cnt_unused = 0;
+               }
        }
-
-       /* propagate to children */
+       // propagate to children
        ckrm_lock_hier(res->core);
        while ((child = ckrm_get_next_child(res->core, child)) != NULL) {
-               childres = ckrm_get_res_class(child, resid, struct ckrm_numtasks);
-
-               spin_lock(&childres->cnt_lock);
-               recalc_and_propagate(childres, res);
-               spin_unlock(&childres->cnt_lock);
+               childres = ckrm_get_res_class(child, resid, ckrm_numtasks_t);
+               if (childres) {
+                   spin_lock(&childres->cnt_lock);
+                   recalc_and_propagate(childres, res);
+                   spin_unlock(&childres->cnt_lock);
+               } else {
+                       printk(KERN_ERR "%s: numtasks resclass missing\n",__FUNCTION__);
+               }
        }
        ckrm_unlock_hier(res->core);
        return;
@@ -315,7 +402,7 @@ recalc_and_propagate(struct ckrm_numtasks * res, struct ckrm_numtasks * parres)
 
 static int numtasks_set_share_values(void *my_res, struct ckrm_shares *new)
 {
-       struct ckrm_numtasks *parres, *res = my_res;
+       ckrm_numtasks_t *parres, *res = my_res;
        struct ckrm_shares *cur = &res->shares, *par;
        int rc = -EINVAL, resid = numtasks_rcbs.resid;
 
@@ -324,7 +411,7 @@ static int numtasks_set_share_values(void *my_res, struct ckrm_shares *new)
 
        if (res->parent) {
                parres =
-                   ckrm_get_res_class(res->parent, resid, struct ckrm_numtasks);
+                   ckrm_get_res_class(res->parent, resid, ckrm_numtasks_t);
                spin_lock(&parres->cnt_lock);
                spin_lock(&res->cnt_lock);
                par = &parres->shares;
@@ -337,26 +424,28 @@ static int numtasks_set_share_values(void *my_res, struct ckrm_shares *new)
        rc = set_shares(new, cur, par);
 
        if ((rc == 0) && parres) {
-               /* Calculate parent's unused units */
-               if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE)
+               // Calculate parent's unused units
+               if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
                        parres->cnt_unused = CKRM_SHARE_DONTCARE;
-               else if (par->total_guarantee) {
+               else if (par->total_guarantee) {
                        u64 temp = (u64) par->unused_guarantee * parres->cnt_guarantee;
                        do_div(temp, par->total_guarantee);
                        parres->cnt_unused = (int) temp;
-               } else
+               } else {
                        parres->cnt_unused = 0;
+               }
                recalc_and_propagate(res, parres);
        }
        spin_unlock(&res->cnt_lock);
-       if (res->parent)
+       if (res->parent) {
                spin_unlock(&parres->cnt_lock);
+       }
        return rc;
 }
 
 static int numtasks_get_share_values(void *my_res, struct ckrm_shares *shares)
 {
-       struct ckrm_numtasks *res = my_res;
+       ckrm_numtasks_t *res = my_res;
 
        if (!res)
                return -EINVAL;
@@ -366,12 +455,12 @@ static int numtasks_get_share_values(void *my_res, struct ckrm_shares *shares)
 
 static int numtasks_get_stats(void *my_res, struct seq_file *sfile)
 {
-       struct ckrm_numtasks *res = my_res;
+       ckrm_numtasks_t *res = my_res;
 
        if (!res)
                return -EINVAL;
 
-       seq_printf(sfile, "---------Number of tasks stats start---------\n");
+       seq_printf(sfile, "Number of tasks resource:\n");
        seq_printf(sfile, "Total Over limit failures: %d\n",
                   res->tot_limit_failures);
        seq_printf(sfile, "Total Over guarantee sucesses: %d\n",
@@ -385,7 +474,6 @@ static int numtasks_get_stats(void *my_res, struct seq_file *sfile)
                   res->max_borrow_sucesses);
        seq_printf(sfile, "Maximum Over guarantee failures: %d\n",
                   res->max_borrow_failures);
-       seq_printf(sfile, "---------Number of tasks stats end---------\n");
 #ifdef NUMTASKS_DEBUG
        seq_printf(sfile,
                   "cur_alloc %d; borrowed %d; cnt_guar %d; cnt_limit %d "
@@ -402,29 +490,114 @@ static int numtasks_get_stats(void *my_res, struct seq_file *sfile)
 
 static int numtasks_show_config(void *my_res, struct seq_file *sfile)
 {
-       struct ckrm_numtasks *res = my_res;
+       ckrm_numtasks_t *res = my_res;
 
        if (!res)
                return -EINVAL;
 
-       seq_printf(sfile, "res=%s,parameter=somevalue\n", NUMTASKS_NAME);
+       seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d\n", NUMTASKS_NAME,
+                       SYS_TOTAL_TASKS, total_numtasks,
+                       FORKRATE, forkrate,
+                       FORKRATE_INTERVAL, forkrate_interval);
        return 0;
 }
 
+enum numtasks_token_t {
+       numtasks_token_total,
+       numtasks_token_forkrate,
+       numtasks_token_interval,
+       numtasks_token_err
+};
+
+static match_table_t numtasks_tokens = {
+       {numtasks_token_total, SYS_TOTAL_TASKS "=%d"},
+       {numtasks_token_forkrate, FORKRATE "=%d"},
+       {numtasks_token_interval, FORKRATE_INTERVAL "=%d"},
+       {numtasks_token_err, NULL},
+};
+
+static void reset_forkrates(ckrm_core_class_t *parent, unsigned long now)
+{
+       ckrm_numtasks_t *parres;
+       ckrm_core_class_t *child = NULL;
+
+       parres = ckrm_get_res_class(parent, numtasks_rcbs.resid,
+                                ckrm_numtasks_t);
+       if (!parres) {
+               return;
+       }
+       parres->forks_in_period = 0;
+       parres->period_start = now;
+
+       ckrm_lock_hier(parent);
+       while ((child = ckrm_get_next_child(parent, child)) != NULL) {
+               reset_forkrates(child, now);
+       }
+       ckrm_unlock_hier(parent);
+}
+
 static int numtasks_set_config(void *my_res, const char *cfgstr)
 {
-       struct ckrm_numtasks *res = my_res;
+       char *p;
+       ckrm_numtasks_t *res = my_res;
+       int new_total, fr = 0, itvl = 0, err = 0;
 
        if (!res)
                return -EINVAL;
-       printk("numtasks config='%s'\n", cfgstr);
-       return 0;
+
+       while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
+               substring_t args[MAX_OPT_ARGS];
+               int token;
+               if (!*p)
+                       continue;
+
+               token = match_token(p, numtasks_tokens, args);
+               switch (token) {
+               case numtasks_token_total:
+                       if (match_int(args, &new_total) ||
+                                               (new_total < total_cnt_alloc)) {
+                               err = -EINVAL;
+                       } else {
+                               total_numtasks = new_total;
+                       
+                               // res is the default class, as config is present only
+                               // in that directory
+                               spin_lock(&res->cnt_lock);
+                               res->cnt_guarantee = total_numtasks;
+                               res->cnt_unused = total_numtasks;
+                               res->cnt_limit = total_numtasks;
+                               recalc_and_propagate(res, NULL);
+                               spin_unlock(&res->cnt_lock);
+                       }
+                       break;
+               case numtasks_token_forkrate:
+                       if (match_int(args, &fr) || (fr <= 0)) {
+                               err = -EINVAL;
+                       } else {
+                               forkrate = fr;
+                       }
+                       break;
+               case numtasks_token_interval:
+                       if (match_int(args, &itvl) || (itvl <= 0)) {
+                               err = -EINVAL;
+                       } else {
+                               forkrate_interval = itvl;
+                       }
+                       break;
+               default:
+                       err = -EINVAL;
+               }
+       }
+       if ((fr > 0) || (itvl > 0)) {
+               reset_forkrates(root_core, jiffies);
+       }
+       return err;
 }
 
 static void numtasks_change_resclass(void *task, void *old, void *new)
 {
-       struct ckrm_numtasks *oldres = old;
-       struct ckrm_numtasks *newres = new;
+       ckrm_numtasks_t *oldres = old;
+       ckrm_numtasks_t *newres = new;
 
        if (oldres != (void *)-1) {
                struct task_struct *tsk = task;
@@ -433,13 +606,13 @@ static void numtasks_change_resclass(void *task, void *old, void *new)
                            &(tsk->parent->taskclass->core);
                        oldres =
                            ckrm_get_res_class(old_core, numtasks_rcbs.resid,
-                                              struct ckrm_numtasks);
+                                              ckrm_numtasks_t);
                }
-               if (oldres)
-                       numtasks_put_ref_local(oldres->core);
+               numtasks_put_ref_local(oldres->core);
        }
-       if (newres)
+       if (newres) {
                (void)numtasks_get_ref_local(newres->core, 1);
+       }
 }
 
 struct ckrm_res_ctlr numtasks_rcbs = {
@@ -469,7 +642,7 @@ int __init init_ckrm_numtasks_res(void)
 
        if (resid == -1) {
                resid = ckrm_register_res_ctlr(clstype, &numtasks_rcbs);
-               printk("........init_ckrm_numtasks_res -> %d\n", resid);
+               printk(KERN_DEBUG "........init_ckrm_numtasks_res -> %d\n", resid);
                if (resid != -1) {
                        ckrm_numtasks_register(numtasks_get_ref_local,
                                               numtasks_put_ref_local);
@@ -481,13 +654,14 @@ int __init init_ckrm_numtasks_res(void)
 
 void __exit exit_ckrm_numtasks_res(void)
 {
-       if (numtasks_rcbs.resid != -1)
+       if (numtasks_rcbs.resid != -1) {
                ckrm_numtasks_register(NULL, NULL);
+       }
        ckrm_unregister_res_ctlr(&numtasks_rcbs);
        numtasks_rcbs.resid = -1;
 }
 
 module_init(init_ckrm_numtasks_res)
-module_exit(exit_ckrm_numtasks_res)
+    module_exit(exit_ckrm_numtasks_res)
 
-MODULE_LICENSE("GPL");
+    MODULE_LICENSE("GPL");
index d9f15c9..179e6b5 100644 (file)
  *
  */
 
+/* Changes
+ * 
+ * 16 May 2004: Created
+ * 
+ */
+
 #include <linux/spinlock.h>
 #include <linux/module.h>
 #include <linux/ckrm_tsk.h>
@@ -28,7 +34,7 @@ void ckrm_numtasks_register(get_ref_t gr, put_ref_t pr)
        spin_unlock(&stub_lock);
 }
 
-int numtasks_get_ref(struct ckrm_core_class *arg, int force)
+int numtasks_get_ref(void *arg, int force)
 {
        int ret = 1;
        spin_lock(&stub_lock);
@@ -39,7 +45,7 @@ int numtasks_get_ref(struct ckrm_core_class *arg, int force)
        return ret;
 }
 
-void numtasks_put_ref(struct ckrm_core_class *arg)
+void numtasks_put_ref(void *arg)
 {
        spin_lock(&stub_lock);
        if (real_put_ref) {
index 8ca3c17..0d55d38 100644 (file)
@@ -514,7 +514,7 @@ static inline void __exit_mm(struct task_struct * tsk)
        task_lock(tsk);
        tsk->mm = NULL;
        up_read(&mm->mmap_sem);
-       ckrm_task_clear_mm(tsk, mm);
+       ckrm_task_mm_clear(tsk, mm);
        enter_lazy_tlb(mm, current);
        task_unlock(tsk);
        mmput(mm);
index 1902e9d..20e1031 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/rmap.h>
 #include <linux/ckrm_events.h>
 #include <linux/ckrm_tsk.h>
+#include <linux/ckrm_tc.h>
 #include <linux/ckrm_mem_inline.h>
 #include <linux/vs_network.h>
 #include <linux/vs_limit.h>
@@ -309,7 +310,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
        mm->ioctx_list = NULL;
        mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
        mm->free_area_cache = TASK_UNMAPPED_BASE;
-       ckrm_mm_init(mm);
+       ckrm_mm_init(mm);
 
        if (likely(!mm_alloc_pgd(mm))) {
                mm->def_flags = 0;
@@ -489,7 +490,8 @@ good_mm:
        ckrm_mm_setclass(mm, oldmm->memclass);
        tsk->mm = mm;
        tsk->active_mm = mm;
-       ckrm_init_mm_to_task(mm, tsk);
+       ckrm_mm_setclass(mm, oldmm->memclass);
+       ckrm_task_mm_set(mm, tsk);
        return 0;
 
 free_pt:
index ca16e0c..8fdd30c 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/config.h>
 #include <linux/sched.h>
 #include <linux/vs_context.h>
-#include <linux/fs.h>
 #include <linux/proc_fs.h>
 #include <linux/devpts_fs.h>
 #include <linux/namei.h>
@@ -189,37 +188,6 @@ int vc_set_iattr(uint32_t id, void __user *data)
        return ret;
 }
 
-int vc_iattr_ioctl(struct dentry *de, unsigned int cmd, unsigned long arg)
-{
-       void __user *data = (void __user *)arg;
-       struct vcmd_ctx_iattr_v1 vc_data;
-       int ret;
-
-       /*
-        * I don't think we need any dget/dput pairs in here as long as
-        * this function is always called from sys_ioctl i.e., de is
-         * a field of a struct file that is guaranteed not to be freed.
-        */
-       if (cmd == FIOC_SETIATTR) {
-               if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE))
-                       return -EPERM;
-               if (copy_from_user (&vc_data, data, sizeof(vc_data)))
-                       return -EFAULT;
-               ret = __vc_set_iattr(de,
-                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
-       }
-       else {
-               if (!vx_check(0, VX_ADMIN))
-                       return -ENOSYS;
-               ret = __vc_get_iattr(de->d_inode,
-                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
-       }
-
-       if (!ret && copy_to_user (data, &vc_data, sizeof(vc_data)))
-               ret = -EFAULT;
-       return ret;
-}
-
 
 #ifdef CONFIG_VSERVER_LEGACY
 
index 8c206e4..2aedd4d 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/vs_base.h>
 #include <linux/vs_limit.h>
 #include <linux/nodemask.h>
+#include <linux/ckrm_mem_inline.h>
 
 #include <asm/tlbflush.h>
 
@@ -275,7 +276,7 @@ free_pages_bulk(struct zone *zone, int count,
                /* have to delete it as __free_pages_bulk list manipulates */
                list_del(&page->lru);
                __free_pages_bulk(page, base, zone, area, order);
-               ckrm_clear_page_class(page);
+               ckrm_clear_page_class(page);
                ret++;
        }
        spin_unlock_irqrestore(&zone->lock, flags);
@@ -371,9 +372,7 @@ static void prep_new_page(struct page *page, int order)
 #endif
                        1 << PG_checked | 1 << PG_mappedtodisk);
        page->private = 0;
-#ifdef CONFIG_CKRM_RES_MEM
-       page->ckrm_zone = NULL;
-#endif
+       ckrm_page_init(page);
        set_page_refs(page, order);
 }
 
@@ -636,9 +635,8 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
         */
        can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
 
-       if (!ckrm_class_limit_ok((ckrm_get_mem_class(current)))) {
+       if (!in_interrupt() && !ckrm_class_limit_ok(ckrm_get_mem_class(p)))
                return NULL;
-       }
 
        zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
 
@@ -1573,10 +1571,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
                }
                printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
                                zone_names[j], realsize, batch);
-#ifndef CONFIG_CKRM_RES_MEM
-               INIT_LIST_HEAD(&zone->active_list);
-               INIT_LIST_HEAD(&zone->inactive_list);
-#endif
+               ckrm_init_lists(zone);
                zone->nr_scan_active = 0;
                zone->nr_scan_inactive = 0;
                zone->nr_active = 0;
index a7eb649..015dc5e 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -92,11 +92,7 @@ int rotate_reclaimable_page(struct page *page)
        spin_lock_irqsave(&zone->lru_lock, flags);
        if (PageLRU(page) && !PageActive(page)) {
                list_del(&page->lru);
-#ifdef CONFIG_CKRM_RES_MEM
-               list_add_tail(&page->lru, &ckrm_zone->inactive_list);
-#else
-               list_add_tail(&page->lru, &zone->inactive_list);
-#endif
+               ckrm_add_tail_inactive(page);
                inc_page_state(pgrotated);
        }
        if (!test_clear_page_writeback(page))
index 6f7fba5..8fc4a3d 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/rwsem.h>
+#include <linux/ckrm_mem.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -589,7 +590,7 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
                        nr_taken++;
                }
                zone->nr_inactive -= nr_taken;
-               ckrm_zone_dec_inactive(ckrm_zone, nr_taken);
+               ckrm_zone_sub_inactive(ckrm_zone, nr_taken);
                spin_unlock_irq(&zone->lru_lock);
 
                if (nr_taken == 0)
@@ -616,11 +617,11 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
                                BUG();
                        list_del(&page->lru);
                        if (PageActive(page)) {
-                               ckrm_zone_inc_active(ckrm_zone, 1);
+                               ckrm_zone_add_active(ckrm_zone, 1);
                                zone->nr_active++;
                                list_add(&page->lru, active_list);
                        } else {
-                               ckrm_zone_inc_inactive(ckrm_zone, 1);
+                               ckrm_zone_add_inactive(ckrm_zone, 1);
                                zone->nr_inactive++;
                                list_add(&page->lru, inactive_list);
                        }
@@ -709,7 +710,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
        }
        zone->pages_scanned += pgscanned;
        zone->nr_active -= pgmoved;
-       ckrm_zone_dec_active(ckrm_zone, pgmoved);
+       ckrm_zone_sub_active(ckrm_zone, pgmoved);
        spin_unlock_irq(&zone->lru_lock);
 
        /*
@@ -770,8 +771,8 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
                list_move(&page->lru, inactive_list);
                pgmoved++;
                if (!pagevec_add(&pvec, page)) {
-                       ckrm_zone_inc_inactive(ckrm_zone, pgmoved);
                        zone->nr_inactive += pgmoved;
+                       ckrm_zone_add_inactive(ckrm_zone, pgmoved);
                        spin_unlock_irq(&zone->lru_lock);
                        pgdeactivate += pgmoved;
                        pgmoved = 0;
@@ -781,8 +782,8 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
                        spin_lock_irq(&zone->lru_lock);
                }
        }
-       ckrm_zone_inc_inactive(ckrm_zone, pgmoved);
        zone->nr_inactive += pgmoved;
+       ckrm_zone_add_inactive(ckrm_zone, pgmoved);
        pgdeactivate += pgmoved;
        if (buffer_heads_over_limit) {
                spin_unlock_irq(&zone->lru_lock);
@@ -800,16 +801,16 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
                list_move(&page->lru, active_list);
                pgmoved++;
                if (!pagevec_add(&pvec, page)) {
-                       ckrm_zone_inc_active(ckrm_zone, pgmoved);
                        zone->nr_active += pgmoved;
+                       ckrm_zone_add_active(ckrm_zone, pgmoved);
                        pgmoved = 0;
                        spin_unlock_irq(&zone->lru_lock);
                        __pagevec_release(&pvec);
                        spin_lock_irq(&zone->lru_lock);
                }
        }
-       ckrm_zone_inc_active(ckrm_zone, pgmoved);
        zone->nr_active += pgmoved;
+       ckrm_zone_add_active(ckrm_zone, pgmoved);
        spin_unlock_irq(&zone->lru_lock);
        pagevec_release(&pvec);
 
@@ -818,45 +819,6 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
 }
 
 #ifdef CONFIG_CKRM_RES_MEM
-static int
-shrink_weight(struct ckrm_zone *czone)
-{
-       u64 temp;
-       struct zone *zone = czone->zone;
-       struct ckrm_mem_res *cls = czone->memcls;
-       int zone_usage, zone_guar, zone_total, guar, ret, cnt;
-
-       zone_usage = czone->nr_active + czone->nr_inactive;
-       czone->active_over = czone->inactive_over = 0;
-
-       if (zone_usage < SWAP_CLUSTER_MAX * 4)
-               return 0;
-
-       if (cls->pg_guar == CKRM_SHARE_DONTCARE) {
-               // no guarantee for this class. use implicit guarantee
-               guar = cls->impl_guar / cls->nr_dontcare;
-       } else {
-               guar = cls->pg_unused / cls->nr_dontcare;
-       }
-       zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages;
-       temp = (u64) guar * zone_total;
-       do_div(temp, ckrm_tot_lru_pages);
-       zone_guar = (int) temp;
-
-       ret = ((zone_usage - zone_guar) > SWAP_CLUSTER_MAX) ?
-                               (zone_usage - zone_guar) : 0;
-       if (ret) {
-               cnt = czone->nr_active - (2 * zone_guar / 3);
-               if (cnt > 0)
-                       czone->active_over = cnt;
-               cnt = czone->active_over + czone->nr_inactive
-                                       - zone_guar / 3;
-               if (cnt > 0)
-                       czone->inactive_over = cnt;
-       }
-       return ret;
-}
-
 static void
 shrink_ckrmzone(struct ckrm_zone *czone, struct scan_control *sc)
 {
@@ -878,121 +840,96 @@ shrink_ckrmzone(struct ckrm_zone *czone, struct scan_control *sc)
                                break;
                        }
                }
-
-               throttle_vm_writeout();
        }
 }
 
-/* insert an entry to the list and sort decendently*/
+/* FIXME: This function needs to be given more thought. */
 static void
-list_add_sort(struct list_head *entry, struct list_head *head)
+ckrm_shrink_class(struct ckrm_mem_res *cls)
 {
-       struct ckrm_zone *czone, *new =
-                       list_entry(entry, struct ckrm_zone, victim_list);
-       struct list_head* pos = head->next;
-
-       while (pos != head) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               if (new->shrink_weight > czone->shrink_weight) {
-                       __list_add(entry, pos->prev, pos);
-                       return;
-               }
-               pos = pos->next;
-       }
-       list_add_tail(entry, head);
-       return; 
-}
+       struct scan_control sc;
+       struct zone *zone;
+       int zindex = 0, cnt, act_credit = 0, inact_credit = 0;
 
-static void
-shrink_choose_victims(struct list_head *victims,
-               unsigned long nr_active, unsigned long nr_inactive)
-{
-       unsigned long nr;
-       struct ckrm_zone* czone;
-       struct list_head *pos, *next;
-
-       pos = victims->next;
-       while ((pos != victims) && (nr_active || nr_inactive)) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               
-               if (nr_active && czone->active_over) {
-                       nr = min(nr_active, czone->active_over);
-                       czone->shrink_active += nr;
-                       czone->active_over -= nr;
-                       nr_active -= nr;
+       sc.nr_mapped = read_page_state(nr_mapped);
+       sc.nr_scanned = 0;
+       sc.nr_reclaimed = 0;
+       sc.priority = 0; // always very high priority
+
+       for_each_zone(zone) {
+               int zone_total, zone_limit, active_limit,
+                                       inactive_limit, clszone_limit;
+               struct ckrm_zone *czone;
+               u64 temp;
+
+               czone = &cls->ckrm_zone[zindex];
+               if (ckrm_test_set_shrink(czone))
+                       continue;
+
+               zone->temp_priority = zone->prev_priority;
+               zone->prev_priority = sc.priority;
+
+               zone_total = zone->nr_active + zone->nr_inactive 
+                                               + zone->free_pages;
+
+               temp = (u64) cls->pg_limit * zone_total;
+               do_div(temp, ckrm_tot_lru_pages);
+               zone_limit = (int) temp;
+               clszone_limit = (ckrm_mem_shrink_to * zone_limit) / 100;
+               active_limit = (2 * clszone_limit) / 3; // 2/3rd in active list
+               inactive_limit = clszone_limit / 3; // 1/3rd in inactive list
+
+               czone->shrink_active = 0;
+               cnt = czone->nr_active + act_credit - active_limit;
+               if (cnt > 0) {
+                       czone->shrink_active = (unsigned long) cnt;
+                       act_credit = 0;
+               } else {
+                       act_credit += cnt;
                }
 
-               if (nr_inactive && czone->inactive_over) {
-                       nr = min(nr_inactive, czone->inactive_over);
-                       czone->shrink_inactive += nr;
-                       czone->inactive_over -= nr;
-                       nr_inactive -= nr;
+               czone->shrink_inactive = 0;
+               cnt = czone->shrink_active + inact_credit +
+                                       (czone->nr_inactive - inactive_limit);
+               if (cnt > 0) {
+                       czone->shrink_inactive = (unsigned long) cnt;
+                       inact_credit = 0;
+               } else {
+                       inact_credit += cnt;
                }
-               pos = pos->next;
-       }
 
-       pos = victims->next;
-       while (pos != victims) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               next = pos->next;
-               if (czone->shrink_active == 0 && czone->shrink_inactive == 0) {
-                       list_del_init(pos);
-                       ckrm_clear_shrink(czone);
+
+               if (czone->shrink_active || czone->shrink_inactive) {
+                       sc.nr_to_reclaim = czone->shrink_inactive;
+                       shrink_ckrmzone(czone, &sc);
                }
-               pos = next;
-       }       
-       return;
+               zone->prev_priority = zone->temp_priority;
+               zindex++;
+               ckrm_clear_shrink(czone);
+       }
 }
 
 static void
-shrink_get_victims(struct zone *zone, unsigned long nr_active,
-               unsigned long nr_inactive, struct list_head *victims)
+ckrm_shrink_classes(void)
 {
-       struct list_head *pos;
        struct ckrm_mem_res *cls;
-       struct ckrm_zone *czone;
-       int zoneindex = zone_idx(zone);
-       
-       if (ckrm_nr_mem_classes <= 1) {
-               if (ckrm_mem_root_class) {
-                       czone = ckrm_mem_root_class->ckrm_zone + zoneindex;
-                       if (!ckrm_test_set_shrink(czone)) {
-                               list_add(&czone->victim_list, victims);
-                               czone->shrink_active = nr_active;
-                               czone->shrink_inactive = nr_inactive;
-                       }
-               }
-               return;
-       }
-       spin_lock_irq(&ckrm_mem_lock);
-       list_for_each_entry(cls, &ckrm_memclass_list, mcls_list) {
-               czone = cls->ckrm_zone + zoneindex;
-               if (ckrm_test_set_shrink(czone))
-                       continue;
 
-               czone->shrink_active = 0;
-               czone->shrink_inactive = 0;
-               czone->shrink_weight = shrink_weight(czone);
-               if (czone->shrink_weight) {
-                       list_add_sort(&czone->victim_list, victims);
-               } else {
-                       ckrm_clear_shrink(czone);
-               }
-       }
-       pos = victims->next;
-       while (pos != victims) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               pos = pos->next;
-       }
-       shrink_choose_victims(victims, nr_active, nr_inactive);
-       spin_unlock_irq(&ckrm_mem_lock);
-       pos = victims->next;
-       while (pos != victims) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               pos = pos->next;
+       spin_lock(&ckrm_mem_lock);
+       while (!ckrm_shrink_list_empty()) {
+               cls =  list_entry(ckrm_shrink_list.next, struct ckrm_mem_res,
+                               shrink_list);
+               list_del(&cls->shrink_list);
+               cls->flags &= ~CLS_AT_LIMIT;
+               spin_unlock(&ckrm_mem_lock);
+               ckrm_shrink_class(cls);
+               spin_lock(&ckrm_mem_lock);
        }
+       spin_unlock(&ckrm_mem_lock);
 }
-#endif /* CONFIG_CKRM_RES_MEM */
+
+#else
+#define ckrm_shrink_classes()  do { } while(0)
+#endif
 
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
@@ -1037,9 +974,9 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
                        czone = list_entry(pos, struct ckrm_zone, victim_list);
                        next = pos->next;
                        list_del_init(pos);
-                       ckrm_clear_shrink(czone);
                        sc->nr_to_reclaim = czone->shrink_inactive;
                        shrink_ckrmzone(czone, sc);
+                       ckrm_clear_shrink(czone);
                        pos = next;
                }
        }
@@ -1064,97 +1001,6 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
 #endif
 }
 
-#ifdef CONFIG_CKRM_RES_MEM
-// This function needs to be given more thought.
-// Shrink the class to be at shrink_to%" of its limit
-static void
-ckrm_shrink_class(struct ckrm_mem_res *cls)
-{
-       struct scan_control sc;
-       struct zone *zone;
-       int zindex = 0, cnt, act_credit = 0, inact_credit = 0;
-       int shrink_to = ckrm_mem_get_shrink_to();
-
-       sc.nr_mapped = read_page_state(nr_mapped);
-       sc.nr_scanned = 0;
-       sc.nr_reclaimed = 0;
-       sc.priority = 0; // always very high priority
-
-       check_memclass(cls, "bef_shnk_cls");
-       for_each_zone(zone) {
-               int zone_total, zone_limit, active_limit,
-                                       inactive_limit, clszone_limit;
-               struct ckrm_zone *czone;
-               u64 temp;
-
-               czone = &cls->ckrm_zone[zindex];
-               if (ckrm_test_set_shrink(czone))
-                       continue;
-
-               zone->temp_priority = zone->prev_priority;
-               zone->prev_priority = sc.priority;
-
-               zone_total = zone->nr_active + zone->nr_inactive 
-                                               + zone->free_pages;
-
-               temp = (u64) cls->pg_limit * zone_total;
-               do_div(temp, ckrm_tot_lru_pages);
-               zone_limit = (int) temp;
-               clszone_limit = (shrink_to * zone_limit) / 100;
-               active_limit = (2 * clszone_limit) / 3; // 2/3rd in active list
-               inactive_limit = clszone_limit / 3; // 1/3rd in inactive list
-
-               czone->shrink_active = 0;
-               cnt = czone->nr_active + act_credit - active_limit;
-               if (cnt > 0) {
-                       czone->shrink_active = (unsigned long) cnt;
-               } else {
-                       act_credit += cnt;
-               }
-
-               czone->shrink_inactive = 0;
-               cnt = czone->shrink_active + inact_credit +
-                                       (czone->nr_inactive - inactive_limit);
-               if (cnt > 0) {
-                       czone->shrink_inactive = (unsigned long) cnt;
-               } else {
-                       inact_credit += cnt;
-               }
-
-
-               if (czone->shrink_active || czone->shrink_inactive) {
-                       sc.nr_to_reclaim = czone->shrink_inactive;
-                       shrink_ckrmzone(czone, &sc);
-               }
-               zone->prev_priority = zone->temp_priority;
-               zindex++;
-               ckrm_clear_shrink(czone);
-       }
-       check_memclass(cls, "aft_shnk_cls");
-}
-
-static void
-ckrm_shrink_classes(void)
-{
-       struct ckrm_mem_res *cls;
-
-       spin_lock_irq(&ckrm_mem_lock);
-       while (!ckrm_shrink_list_empty()) {
-               cls =  list_entry(ckrm_shrink_list.next, struct ckrm_mem_res,
-                               shrink_list);
-               list_del(&cls->shrink_list);
-               cls->flags &= ~MEM_AT_LIMIT;
-               spin_unlock_irq(&ckrm_mem_lock);
-               ckrm_shrink_class(cls);
-               spin_lock_irq(&ckrm_mem_lock);
-       }
-       spin_unlock_irq(&ckrm_mem_lock);
-}
-
-#else
-#define ckrm_shrink_classes()  do { } while(0)
-#endif
-
 /*
  * This is the direct reclaim path, for page-allocating processes.  We only
  * try to reclaim pages from zones which will satisfy the caller's allocation
@@ -1492,7 +1338,7 @@ static int kswapd(void *p)
 
                if (!ckrm_shrink_list_empty())
                        ckrm_shrink_classes();
-               else
+               else 
                        balance_pgdat(pgdat, 0);
        }
        return 0;
index e516b27..ca2935d 100644 (file)
@@ -22,7 +22,7 @@ Summary: The Linux kernel (the core of the Linux operating system)
 %define kversion 2.6.%{sublevel}
 %define rpmversion 2.6.%{sublevel}
 %define rhbsys  %([ -r /etc/beehive-root ] && echo  || echo .`whoami`)
-%define release 1.14_FC2.2.planetlab%{?date:.%{date}}
+%define release 1.14_FC2.1.planetlab%{?date:.%{date}}
 %define signmodules 0
 
 %define KVERREL %{PACKAGE_VERSION}-%{PACKAGE_RELEASE}