linux 2.6.16.38 w/ vs2.0.3-rc1

[linux-2.6.git] / Documentation / filesystems / proc.txt
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt

index 378722d..944cf10 100644 (file)
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -55,7 +55,7 @@ This work is  based on the 2.2.*  kernel version and the  upcoming 2.4.*. I'm
  afraid it's still far from complete, but we  hope it will be useful. As far as
  we know, it is the first 'all-in-one' document about the /proc file system. It
  is focused  on the Intel  x86 hardware,  so if you  are looking for  PPC, ARM,
-SPARC, APX, etc., features, you probably  won't find what you are looking for.
+SPARC, AXP, etc., features, you probably  won't find what you are looking for.
  It also only covers IPv4 networking, not IPv6 nor other protocols - sorry. But
  additions and patches  are welcome and will  be added to this  document if you
  mail them to Bodo.
@@ -133,6 +133,7 @@ Table 1-1: Process specific entries in /proc
   statm   Process memory status information              
   status  Process status in human readable form          
   wchan   If CONFIG_KALLSYMS is set, a pre-decoded wchan
+ smaps  Extension based on maps, presenting the rss size for each mapped file
  ..............................................................................
  
  For example, to get the status information of a process, all you have to do is
@@ -169,16 +170,18 @@ information. The  statm  file  contains  more  detailed  information about the
  process memory usage. Its seven fields are explained in Table 1-2.
  
  
-Table 1-2: Contents of the statm files 
+Table 1-2: Contents of the statm files (as of 2.6.8-rc3)
  ..............................................................................
- File     Content                         
- size     total program size              
- resident size of memory portions         
- shared   number of pages that are shared 
- trs      number of pages that are 'code' 
- drs      number of pages of data/stack   
- lrs      number of pages of library      
- dt       number of dirty pages           
+ Field    Content
+ size     total program size (pages)           (same as VmSize in status)
+ resident size of memory portions (pages)      (same as VmRSS in status)
+ shared   number of pages that are shared      (i.e. backed by a file)
+ trs      number of pages that are 'code'      (not including libs; broken,
+                                                       includes data segment)
+ lrs      number of pages of library           (always 0 on 2.6)
+ drs      number of pages of data/stack                (including libs; broken,
+                                                       includes library text)
+ dt       number of dirty pages                        (always 0 on 2.6)
  ..............................................................................
  
  1.2 Kernel data
@@ -201,7 +204,7 @@ Table 1-3: Kernel info in /proc
   devices     Available devices (block and character)           
   dma         Used DMS channels                                 
   filesystems Supported filesystems                             
- driver             Various drivers grouped here, currently rtc        (2.4)
+ driver             Various drivers grouped here, currently rtc (2.4)
   execdomains Execdomains, related to security                  (2.4)
   fb         Frame Buffer devices                               (2.4)
   fs         File system parameters, currently nfs/exports      (2.4)
@@ -348,22 +351,6 @@ available.  In this case, there are 0 chunks of 2^0*PAGE_SIZE available in
  ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE 
  available in ZONE_NORMAL, etc... 
  
-
-1.3 IDE devices in /proc/ide
-----------------------------
-
-The subdirectory /proc/ide contains information about all IDE devices of which
-the kernel  is  aware.  There is one subdirectory for each IDE controller, the
-file drivers  and a link for each IDE device, pointing to the device directory
-in the controller specific subtree.
-
-The file  drivers  contains general information about the drivers used for the
-IDE devices:
-
-  > cat /proc/ide/drivers 
-  ide-cdrom version 4.53 
-  ide-disk version 1.08 
-
  ..............................................................................
  
  meminfo:
@@ -392,9 +379,9 @@ Dirty:             968 kB
  Writeback:           0 kB
  Mapped:         280372 kB
  Slab:           684068 kB
-Committed_AS:  1576424 kB
+CommitLimit:   7669796 kB
+Committed_AS:   100056 kB
  PageTables:      24448 kB
-ReverseMaps:   1080904
  VmallocTotal:   112216 kB
  VmallocUsed:       428 kB
  VmallocChunk:   111088 kB
@@ -431,27 +418,55 @@ VmallocChunk:   111088 kB
         Dirty: Memory which is waiting to get written back to the disk
     Writeback: Memory which is actively being written back to the disk
        Mapped: files which have been mmaped, such as libraries
-              Slab: in-kernel data structures cache
-Committed_AS: An estimate of how much RAM you would need to make a
-              99.99% guarantee that there never is OOM (out of memory)
-              for this workload. Normally the kernel will overcommit
-              memory. That means, say you do a 1GB malloc, nothing
-              happens, really. Only when you start USING that malloc
-              memory you will get real memory on demand, and just as
-              much as you use. So you sort of take a mortgage and hope
-              the bank doesn't go bust. Other cases might include when
-              you mmap a file that's shared only when you write to it
-              and you get a private copy of that data. While it normally
-              is shared between processes. The Committed_AS is a
-              guesstimate of how much RAM/swap you would need
-              worst-case.
+        Slab: in-kernel data structures cache
+ CommitLimit: Based on the overcommit ratio ('vm.overcommit_ratio'),
+              this is the total amount of  memory currently available to
+              be allocated on the system. This limit is only adhered to
+              if strict overcommit accounting is enabled (mode 2 in
+              'vm.overcommit_memory').
+              The CommitLimit is calculated with the following formula:
+              CommitLimit = ('vm.overcommit_ratio' * Physical RAM) + Swap
+              For example, on a system with 1G of physical RAM and 7G
+              of swap with a `vm.overcommit_ratio` of 30 it would
+              yield a CommitLimit of 7.3G.
+              For more details, see the memory overcommit documentation
+              in vm/overcommit-accounting.
+Committed_AS: The amount of memory presently allocated on the system.
+              The committed memory is a sum of all of the memory which
+              has been allocated by processes, even if it has not been
+              "used" by them as of yet. A process which malloc()'s 1G
+              of memory, but only touches 300M of it will only show up
+              as using 300M of memory even if it has the address space
+              allocated for the entire 1G. This 1G is memory which has
+              been "committed" to by the VM and can be used at any time
+              by the allocating application. With strict overcommit
+              enabled on the system (mode 2 in 'vm.overcommit_memory'),
+              allocations which would exceed the CommitLimit (detailed
+              above) will not be permitted. This is useful if one needs
+              to guarantee that processes will not fail due to lack of
+              memory once that memory has been successfully allocated.
    PageTables: amount of memory dedicated to the lowest level of page
                tables.
- ReverseMaps: number of reverse mappings performed
  VmallocTotal: total size of vmalloc memory area
   VmallocUsed: amount of vmalloc area which is used
  VmallocChunk: largest contigious block of vmalloc area which is free
  
+
+1.3 IDE devices in /proc/ide
+----------------------------
+
+The subdirectory /proc/ide contains information about all IDE devices of which
+the kernel  is  aware.  There is one subdirectory for each IDE controller, the
+file drivers  and a link for each IDE device, pointing to the device directory
+in the controller specific subtree.
+
+The file  drivers  contains general information about the drivers used for the
+IDE devices:
+
+  > cat /proc/ide/drivers
+  ide-cdrom version 4.53
+  ide-disk version 1.08
+
  More detailed  information  can  be  found  in  the  controller  specific
  subdirectories. These  are  named  ide0,  ide1  and  so  on.  Each  of  these
  directories contains the files shown in table 1-4.
@@ -852,7 +867,8 @@ this time.
  The value  in  file-max  denotes  the  maximum number of file handles that the
  Linux kernel will allocate. When you get a lot of error messages about running
  out of  file handles, you might want to raise this limit. The default value is
-4096. To change it, just write the new number into the file:
+10% of  RAM in kilobytes.  To  change it, just  write the new number  into the
+file:
  
    # cat /proc/sys/fs/file-max 
    4096 
@@ -864,11 +880,14 @@ out of  file handles, you might want to raise this limit. The default value is
  This method  of  revision  is  useful  for  all customizable parameters of the
  kernel - simply echo the new value to the corresponding file.
  
-The three  values  in file-nr denote the number of allocated file handles, the
-number of  used file handles, and the maximum number of file handles. When the
-allocated file  handles  come close to the maximum, but the number of actually
-used ones  is  far  behind,  you've  encountered  a peak in your usage of file
-handles and you don't need to increase the maximum.
+Historically, the three values in file-nr denoted the number of allocated file
+handles,  the number of  allocated but  unused file  handles, and  the maximum
+number of file handles. Linux 2.6 always  reports 0 as the number of free file
+handles -- this  is not an error,  it just means that the  number of allocated
+file handles exactly matches the number of used file handles.
+
+Attempts to  allocate more  file descriptors than  file-max are  reported with
+printk, look for "VFS: file-max limit <number> reached".
  
  inode-state and inode-nr
  ------------------------
@@ -891,16 +910,6 @@ nr_free_inodes
  Represents the  number of free inodes. Ie. The number of inuse inodes is
  (nr_inodes - nr_free_inodes).
  
-super-nr and super-max
-----------------------
-
-Again, super  block structures are allocated by the kernel, but not freed. The
-file super-max  contains  the  maximum  number  of super block handlers, where
-super-nr shows the number of currently allocated ones.
-
-Every mounted file system needs a super block, so if you plan to mount lots of
-file systems, you may want to increase these numbers.
-
  aio-nr and aio-max-nr
  ---------------------
  
@@ -1105,12 +1114,41 @@ modprobe
  The location  where  the  modprobe  binary  is  located.  The kernel uses this
  program to load modules on demand.
  
+unknown_nmi_panic
+-----------------
+
+The value in this file affects behavior of handling NMI. When the value is
+non-zero, unknown NMI is trapped and then panic occurs. At that time, kernel
+debugging information is displayed on console.
+
+NMI switch that most IA32 servers have fires unknown NMI up, for example.
+If a system hangs up, try pressing the NMI switch.
+
+[NOTE]
+   This function and oprofile share a NMI callback. Therefore this function
+   cannot be enabled when oprofile is activated.
+   And NMI watchdog will be disabled when the value in this file is set to
+   non-zero.
+
+
  2.4 /proc/sys/vm - The virtual memory subsystem
  -----------------------------------------------
  
  The files  in  this directory can be used to tune the operation of the virtual
  memory (VM)  subsystem  of  the  Linux  kernel.
  
+vfs_cache_pressure
+------------------
+
+Controls the tendency of the kernel to reclaim the memory which is used for
+caching of directory and inode objects.
+
+At the default value of vfs_cache_pressure=100 the kernel will attempt to
+reclaim dentries and inodes at a "fair" rate with respect to pagecache and
+swapcache reclaim.  Decreasing vfs_cache_pressure causes the kernel to prefer
+to retain dentry and inode caches.  Increasing vfs_cache_pressure beyond 100
+causes the kernel to prefer to reclaim dentries and inodes.
+
  dirty_background_ratio
  ----------------------
  
@@ -1141,6 +1179,12 @@ for writeout by the pdflush daemons.  It is expressed in 100'ths of a second.
  Data which has been dirty in-memory for longer than this interval will be
  written out next time a pdflush daemon wakes up.
  
+legacy_va_layout
+----------------
+
+If non-zero, this sysctl disables the new 32-bit mmap mmap layout - the kernel
+will use the legacy (2.4) layout for all processes.
+
  lower_zone_protection
  ---------------------
  
@@ -1197,16 +1241,83 @@ swap-intensive.
  overcommit_memory
  -----------------
  
-This file  contains  one  value.  The following algorithm is used to decide if
-there's enough  memory:  if  the  value of overcommit_memory is positive, then
-there's always  enough  memory. This is a useful feature, since programs often
-malloc() huge  amounts  of  memory 'just in case', while they only use a small
-part of  it.  Leaving  this value at 0 will lead to the failure of such a huge
-malloc(), when in fact the system has enough memory for the program to run.
+Controls overcommit of system memory, possibly allowing processes
+to allocate (but not use) more memory than is actually available.
+
+
+0      -       Heuristic overcommit handling. Obvious overcommits of
+               address space are refused. Used for a typical system. It
+               ensures a seriously wild allocation fails while allowing
+               overcommit to reduce swap usage.  root is allowed to
+               allocate slighly more memory in this mode. This is the
+               default.
+
+1      -       Always overcommit. Appropriate for some scientific
+               applications.
+
+2      -       Don't overcommit. The total address space commit
+               for the system is not permitted to exceed swap plus a
+               configurable percentage (default is 50) of physical RAM.
+               Depending on the percentage you use, in most situations
+               this means a process will not be killed while attempting
+               to use already-allocated memory but will receive errors
+               on memory allocation as appropriate.
+
+overcommit_ratio
+----------------
+
+Percentage of physical memory size to include in overcommit calculations
+(see above.)
+
+Memory allocation limit = swapspace + physmem * (overcommit_ratio / 100)
+
+       swapspace = total size of all swap areas
+       physmem = size of physical memory in system
+
+nr_hugepages and hugetlb_shm_group
+----------------------------------
+
+nr_hugepages configures number of hugetlb page reserved for the system.
+
+hugetlb_shm_group contains group id that is allowed to create SysV shared
+memory segment using hugetlb page.
+
+laptop_mode
+-----------
+
+laptop_mode is a knob that controls "laptop mode". All the things that are
+controlled by this knob are discussed in Documentation/laptop-mode.txt.
+
+block_dump
+----------
+
+block_dump enables block I/O debugging when set to a nonzero value. More
+information on block I/O debugging is in Documentation/laptop-mode.txt.
+
+swap_token_timeout
+------------------
+
+This file contains valid hold time of swap out protection token. The Linux
+VM has token based thrashing control mechanism and uses the token to prevent
+unnecessary page faults in thrashing situation. The unit of the value is
+second. The value would be useful to tune thrashing behavior.
+
+drop_caches
+-----------
+
+Writing to this will cause the kernel to drop clean caches, dentries and
+inodes from memory, causing that memory to become free.
+
+To free pagecache:
+       echo 1 > /proc/sys/vm/drop_caches
+To free dentries and inodes:
+       echo 2 > /proc/sys/vm/drop_caches
+To free pagecache, dentries and inodes:
+       echo 3 > /proc/sys/vm/drop_caches
+
+As this is a non-destructive operation and dirty objects are not freeable, the
+user should run `sync' first.
  
-On the  other  hand,  enabling this feature can cause you to run out of memory
-and thrash the system to death, so large and/or important servers will want to
-set this value to 0.
  
  2.5 /proc/sys/dev - Device specific parameters
  ----------------------------------------------
@@ -1628,11 +1739,13 @@ flush
  
  Writing to this file results in a flush of the routing cache.
  
-gc_elastic, gc_interval, gc_min_interval, gc_tresh, gc_timeout
---------------------------------------------------------------
+gc_elasticity, gc_interval, gc_min_interval_ms, gc_timeout, gc_thresh
+---------------------------------------------------------------------
  
  Values to  control  the  frequency  and  behavior  of  the  garbage collection
-algorithm for the routing cache.
+algorithm for the routing cache. gc_min_interval is deprecated and replaced
+by gc_min_interval_ms.
+
  
  max_size
  --------
@@ -1671,18 +1784,25 @@ settings contain additional options to set garbage collection parameters.
  
  In the interface directories you'll find the following entries:
  
-base_reachable_time
--------------------
+base_reachable_time, base_reachable_time_ms
+-------------------------------------------
  
  A base  value  used for computing the random reachable time value as specified
  in RFC2461.
  
-retrans_time
-------------
+Expression of base_reachable_time, which is deprecated, is in seconds.
+Expression of base_reachable_time_ms is in milliseconds.
  
-The time,  expressed  in  jiffies  (1/100 sec), between retransmitted Neighbor
-Solicitation messages.  Used  for  address  resolution  and  to determine if a
-neighbor is unreachable.
+retrans_time, retrans_time_ms
+-----------------------------
+
+The time between retransmitted Neighbor Solicitation messages.
+Used for address resolution and to determine if a neighbor is
+unreachable.
+
+Expression of retrans_time, which is deprecated, is in 1/100 seconds (for
+IPv4) or in jiffies (for IPv6).
+Expression of retrans_time_ms is in milliseconds.
  
  unres_qlen
  ----------
@@ -1848,10 +1968,3 @@ need to  recompile  the kernel, or even to reboot the system. The files in the
  command to write value into these files, thereby changing the default settings
  of the kernel.
  ------------------------------------------------------------------------------
-
-
-
-
-
-
-