This commit was manufactured by cvs2svn to create branch 'vserver'.
authorPlanet-Lab Support <support@planet-lab.org>
Wed, 2 Jun 2004 20:45:38 +0000 (20:45 +0000)
committerPlanet-Lab Support <support@planet-lab.org>
Wed, 2 Jun 2004 20:45:38 +0000 (20:45 +0000)
201 files changed:
Documentation/arm/SA1100/PCMCIA [new file with mode: 0644]
Documentation/arm/XScale/ADIFCC/80200EVB [new file with mode: 0644]
Documentation/arm/XScale/IOP3XX/IQ80310 [new file with mode: 0644]
Documentation/arm/XScale/IOP3XX/IQ80321 [new file with mode: 0644]
Documentation/arm/XScale/IOP3XX/aau.txt [new file with mode: 0644]
Documentation/arm/XScale/IOP3XX/dma.txt [new file with mode: 0644]
Documentation/arm/XScale/IOP3XX/message.txt [new file with mode: 0644]
Documentation/arm/XScale/IOP3XX/pmon.txt [new file with mode: 0644]
Documentation/arm/XScale/cache-lock.txt [new file with mode: 0644]
Documentation/arm/XScale/pmu.txt [new file with mode: 0644]
Documentation/arm/XScale/tlb-lock.txt [new file with mode: 0644]
arch/arm/mach-omap/innovator1510.c [new file with mode: 0644]
arch/arm/mach-omap/innovator1610.c [new file with mode: 0644]
arch/arm/mach-omap/irq.h [new file with mode: 0644]
arch/arm/mach-omap/omap-generic.c [new file with mode: 0644]
arch/arm/mach-omap/omap-perseus2.c [new file with mode: 0644]
arch/i386/mach-es7000/es7000.c [new file with mode: 0644]
arch/i386/mach-es7000/setup.c [new file with mode: 0644]
arch/i386/mach-es7000/topology.c [new file with mode: 0644]
arch/ia64/kernel/perfmon_hpsim.h [new file with mode: 0644]
arch/ppc/mm/cachemap.c [new file with mode: 0644]
arch/ppc/ocp/Makefile [new file with mode: 0644]
arch/ppc/ocp/ocp-driver.c [new file with mode: 0644]
arch/ppc/ocp/ocp-probe.c [new file with mode: 0644]
arch/ppc/ocp/ocp.c [new file with mode: 0644]
drivers/char/dz.c [new file with mode: 0644]
drivers/char/dz.h [new file with mode: 0644]
drivers/char/sh-sci.c [new file with mode: 0644]
drivers/char/sh-sci.h [new file with mode: 0644]
drivers/i2c/busses/i2c-ixp42x.c [new file with mode: 0644]
drivers/ide/pci/cmd640.h [new file with mode: 0644]
drivers/ide/ppc/swarm.c [new file with mode: 0644]
drivers/net/auto_irq.c [new file with mode: 0644]
drivers/net/rcif.h [new file with mode: 0644]
drivers/net/rclanmtl.c [new file with mode: 0644]
drivers/net/rclanmtl.h [new file with mode: 0644]
drivers/net/rcpci45.c [new file with mode: 0644]
drivers/net/wan/comx-hw-comx.c [new file with mode: 0644]
drivers/net/wan/comx-hw-locomx.c [new file with mode: 0644]
drivers/net/wan/comx-hw-mixcom.c [new file with mode: 0644]
drivers/net/wan/comx-hw-munich.c [new file with mode: 0644]
drivers/net/wan/comx-proto-fr.c [new file with mode: 0644]
drivers/net/wan/comx-proto-lapb.c [new file with mode: 0644]
drivers/net/wan/comx-proto-ppp.c [new file with mode: 0644]
drivers/net/wan/comx.c [new file with mode: 0644]
drivers/net/wan/comx.h [new file with mode: 0644]
drivers/net/wan/comxhw.h [new file with mode: 0644]
drivers/net/wan/falc-lh.h [new file with mode: 0644]
drivers/net/wan/hscx.h [new file with mode: 0644]
drivers/net/wan/mixcom.h [new file with mode: 0644]
drivers/net/wan/munich32x.h [new file with mode: 0644]
drivers/pcmcia/sa1100.h [new file with mode: 0644]
drivers/pcmcia/sa11xx_core.c [new file with mode: 0644]
drivers/pcmcia/sa11xx_core.h [new file with mode: 0644]
drivers/scsi/pcmcia/qlogic_core.c [new file with mode: 0644]
drivers/scsi/qlogicfas.h [new file with mode: 0644]
drivers/usb/core/driverfs.c [new file with mode: 0644]
fs/intermezzo/Makefile [new file with mode: 0644]
fs/intermezzo/cache.c [new file with mode: 0644]
fs/intermezzo/dcache.c [new file with mode: 0644]
fs/intermezzo/dir.c [new file with mode: 0644]
fs/intermezzo/ext_attr.c [new file with mode: 0644]
fs/intermezzo/file.c [new file with mode: 0644]
fs/intermezzo/fileset.c [new file with mode: 0644]
fs/intermezzo/inode.c [new file with mode: 0644]
fs/intermezzo/intermezzo_fs.h [new file with mode: 0644]
fs/intermezzo/intermezzo_idl.h [new file with mode: 0644]
fs/intermezzo/intermezzo_journal.h [new file with mode: 0644]
fs/intermezzo/intermezzo_kml.h [new file with mode: 0644]
fs/intermezzo/intermezzo_lib.h [new file with mode: 0644]
fs/intermezzo/intermezzo_psdev.h [new file with mode: 0644]
fs/intermezzo/intermezzo_upcall.h [new file with mode: 0644]
fs/intermezzo/journal.c [new file with mode: 0644]
fs/intermezzo/journal_ext2.c [new file with mode: 0644]
fs/intermezzo/journal_ext3.c [new file with mode: 0644]
fs/intermezzo/journal_obdfs.c [new file with mode: 0644]
fs/intermezzo/journal_reiserfs.c [new file with mode: 0644]
fs/intermezzo/journal_tmpfs.c [new file with mode: 0644]
fs/intermezzo/journal_xfs.c [new file with mode: 0644]
fs/intermezzo/kml.c [new file with mode: 0644]
fs/intermezzo/kml_decode.c [new file with mode: 0644]
fs/intermezzo/kml_reint.c [new file with mode: 0644]
fs/intermezzo/kml_setup.c [new file with mode: 0644]
fs/intermezzo/kml_unpack.c [new file with mode: 0644]
fs/intermezzo/kml_utils.c [new file with mode: 0644]
fs/intermezzo/methods.c [new file with mode: 0644]
fs/intermezzo/presto.c [new file with mode: 0644]
fs/intermezzo/psdev.c [new file with mode: 0644]
fs/intermezzo/replicator.c [new file with mode: 0644]
fs/intermezzo/super.c [new file with mode: 0644]
fs/intermezzo/sysctl.c [new file with mode: 0644]
fs/intermezzo/upcall.c [new file with mode: 0644]
fs/intermezzo/vfs.c [new file with mode: 0644]
fs/xfs/linux/kmem.h [new file with mode: 0644]
fs/xfs/linux/mrlock.h [new file with mode: 0644]
fs/xfs/linux/mutex.h [new file with mode: 0644]
fs/xfs/linux/sema.h [new file with mode: 0644]
fs/xfs/linux/spin.h [new file with mode: 0644]
fs/xfs/linux/sv.h [new file with mode: 0644]
fs/xfs/linux/time.h [new file with mode: 0644]
fs/xfs/linux/xfs_aops.c [new file with mode: 0644]
fs/xfs/linux/xfs_buf.c [new file with mode: 0644]
fs/xfs/linux/xfs_buf.h [new file with mode: 0644]
fs/xfs/linux/xfs_cred.h [new file with mode: 0644]
fs/xfs/linux/xfs_file.c [new file with mode: 0644]
fs/xfs/linux/xfs_fs_subr.c [new file with mode: 0644]
fs/xfs/linux/xfs_fs_subr.h [new file with mode: 0644]
fs/xfs/linux/xfs_globals.c [new file with mode: 0644]
fs/xfs/linux/xfs_globals.h [new file with mode: 0644]
fs/xfs/linux/xfs_ioctl.c [new file with mode: 0644]
fs/xfs/linux/xfs_iops.c [new file with mode: 0644]
fs/xfs/linux/xfs_iops.h [new file with mode: 0644]
fs/xfs/linux/xfs_linux.h [new file with mode: 0644]
fs/xfs/linux/xfs_lrw.c [new file with mode: 0644]
fs/xfs/linux/xfs_lrw.h [new file with mode: 0644]
fs/xfs/linux/xfs_stats.c [new file with mode: 0644]
fs/xfs/linux/xfs_stats.h [new file with mode: 0644]
fs/xfs/linux/xfs_super.c [new file with mode: 0644]
fs/xfs/linux/xfs_super.h [new file with mode: 0644]
fs/xfs/linux/xfs_sysctl.c [new file with mode: 0644]
fs/xfs/linux/xfs_sysctl.h [new file with mode: 0644]
fs/xfs/linux/xfs_version.h [new file with mode: 0644]
fs/xfs/linux/xfs_vfs.c [new file with mode: 0644]
fs/xfs/linux/xfs_vfs.h [new file with mode: 0644]
fs/xfs/linux/xfs_vnode.c [new file with mode: 0644]
fs/xfs/linux/xfs_vnode.h [new file with mode: 0644]
include/asm-alpha/rmap.h [new file with mode: 0644]
include/asm-arm/arch-cl7500/ide.h [new file with mode: 0644]
include/asm-arm/arch-cl7500/keyboard.h [new file with mode: 0644]
include/asm-arm/arch-clps711x/keyboard.h [new file with mode: 0644]
include/asm-arm/arch-ebsa110/ide.h [new file with mode: 0644]
include/asm-arm/arch-ebsa285/ide.h [new file with mode: 0644]
include/asm-arm/arch-iop3xx/ide.h [new file with mode: 0644]
include/asm-arm/arch-l7200/ide.h [new file with mode: 0644]
include/asm-arm/arch-l7200/keyboard.h [new file with mode: 0644]
include/asm-arm/arch-nexuspci/ide.h [new file with mode: 0644]
include/asm-arm/arch-pxa/ide.h [new file with mode: 0644]
include/asm-arm/arch-pxa/keyboard.h [new file with mode: 0644]
include/asm-arm/arch-rpc/ide.h [new file with mode: 0644]
include/asm-arm/arch-s3c2410/ide.h [new file with mode: 0644]
include/asm-arm/arch-sa1100/keyboard.h [new file with mode: 0644]
include/asm-arm/arch-shark/ide.h [new file with mode: 0644]
include/asm-arm/arch-shark/keyboard.h [new file with mode: 0644]
include/asm-arm/arch-tbox/ide.h [new file with mode: 0644]
include/asm-arm/rmap.h [new file with mode: 0644]
include/asm-arm26/rmap.h [new file with mode: 0644]
include/asm-cris/rmap.h [new file with mode: 0644]
include/asm-generic/rmap.h [new file with mode: 0644]
include/asm-h8300/aki3068net/machine-depend.h [new file with mode: 0644]
include/asm-h8300/edosk2674/machine-depend.h [new file with mode: 0644]
include/asm-h8300/generic/machine-depend.h [new file with mode: 0644]
include/asm-h8300/generic/timer_rate.h [new file with mode: 0644]
include/asm-h8300/h8300_smsc.h [new file with mode: 0644]
include/asm-h8300/h8max/machine-depend.h [new file with mode: 0644]
include/asm-i386/rmap.h [new file with mode: 0644]
include/asm-ia64/rmap.h [new file with mode: 0644]
include/asm-m68k/rmap.h [new file with mode: 0644]
include/asm-m68knommu/rmap.h [new file with mode: 0644]
include/asm-mips/rmap.h [new file with mode: 0644]
include/asm-parisc/rmap.h [new file with mode: 0644]
include/asm-ppc/rmap.h [new file with mode: 0644]
include/asm-ppc64/rmap.h [new file with mode: 0644]
include/asm-s390/rmap.h [new file with mode: 0644]
include/asm-sh/rmap.h [new file with mode: 0644]
include/asm-sparc/rmap.h [new file with mode: 0644]
include/asm-sparc64/rmap.h [new file with mode: 0644]
include/asm-um/rmap.h [new file with mode: 0644]
include/asm-v850/rmap.h [new file with mode: 0644]
include/asm-x86_64/rmap.h [new file with mode: 0644]
include/linux/ninline.h [new file with mode: 0644]
include/linux/vinline.h [new file with mode: 0644]
include/linux/vserver.h [new file with mode: 0644]
include/linux/vserver/context.h [new file with mode: 0644]
include/linux/vserver/cvirt.h [new file with mode: 0644]
include/linux/vserver/inode.h [new file with mode: 0644]
include/linux/vserver/legacy.h [new file with mode: 0644]
include/linux/vserver/limit.h [new file with mode: 0644]
include/linux/vserver/namespace.h [new file with mode: 0644]
include/linux/vserver/network.h [new file with mode: 0644]
include/linux/vserver/sched.h [new file with mode: 0644]
include/linux/vserver/signal.h [new file with mode: 0644]
include/linux/vserver/switch.h [new file with mode: 0644]
include/linux/vserver/xid.h [new file with mode: 0644]
kernel/vserver/Kconfig [new file with mode: 0644]
kernel/vserver/Makefile [new file with mode: 0644]
kernel/vserver/context.c [new file with mode: 0644]
kernel/vserver/cvirt.c [new file with mode: 0644]
kernel/vserver/init.c [new file with mode: 0644]
kernel/vserver/inode.c [new file with mode: 0644]
kernel/vserver/legacy.c [new file with mode: 0644]
kernel/vserver/limit.c [new file with mode: 0644]
kernel/vserver/namespace.c [new file with mode: 0644]
kernel/vserver/network.c [new file with mode: 0644]
kernel/vserver/proc.c [new file with mode: 0644]
kernel/vserver/sched.c [new file with mode: 0644]
kernel/vserver/signal.c [new file with mode: 0644]
kernel/vserver/switch.c [new file with mode: 0644]
kernel/vserver/sysctl.c [new file with mode: 0644]
net/bluetooth/syms.c [new file with mode: 0644]
sound/pci/ice1712/prodigy.c [new file with mode: 0644]
sound/pci/ice1712/prodigy.h [new file with mode: 0644]

diff --git a/Documentation/arm/SA1100/PCMCIA b/Documentation/arm/SA1100/PCMCIA
new file mode 100644 (file)
index 0000000..5eb5d3a
--- /dev/null
@@ -0,0 +1,374 @@
+Kernel Low-Level PCMCIA Interface Documentation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+John G Dorsey <john+@cs.cmu.edu>
+Updated: 30 June, 2000
+
+
+Note: this interface has not been finalized!
+See also: http://www.cs.cmu.edu/~wearable/software/pcmcia-arm.html
+
+
+Introduction
+
+Early versions of PCMCIA Card Services for StrongARM were designed to
+permit a single socket driver to run on a variety of SA-1100 boards by
+using a userland configuration process. During the conversion to the 2.3
+kernel series, all of the configuration has moved into sub-drivers in the
+kernel proper (see linux/drivers/pcmcia/sa1100*). This document describes
+the low-level interface between those sub-drivers and the sa1100 socket
+driver module.
+
+Presently, there are six operations which must be provided by the
+board-specific code. Only functions whose implementation is likely to
+differ across board designs are required at this level. Some examples
+include:
+
+  - configuring card detect lines to generate interrupts
+  - sensing the legal voltage levels for inserted cards
+  - asserting the reset signal for a card
+
+Functions which are assumed to be the same across all designs are
+performed within the generic socket driver itself. Some examples of these
+kinds of operations include:
+
+  - configuring memory access times based on the core clock frequency
+  - reads/writes on memory, byte swizzling, ...
+
+The current implementation allows the specific per-board set of low-level
+operations to be determined at run time. For each specific board, the
+following structure should be filled in:
+
+  struct pcmcia_low_level {
+    int (*init)(struct pcmcia_init *);
+    int (*shutdown)(void);
+    int (*socket_state)(struct pcmcia_state_array *);
+    int (*get_irq_info)(struct pcmcia_irq_info *);
+    int (*configure_socket)(const struct pcmcia_configure *);
+  };
+
+The component functions are described in detail below. Using the
+machine_is_*() tests, the pointer `pcmcia_low_level' should be assigned to
+the location of the table for your board.
+
+
+0. init(struct pcmcia_init *init)
+
+This operation has three responsibilities:
+
+  - perform any board-specific initialization tasks
+  - associate the given handler with any interrupt-generating signals
+    such as card detection, or battery voltage detection
+  - set up any necessary edge detection for card ready signals
+
+Argument passing for this operation is implemented by the following
+structure:
+
+  struct pcmcia_init {
+    void (*handler)(int irq, void *dev, struct pt_regs *regs);
+    struct pcmcia_maps *maps;
+  };
+
+Here, `handler' is provided by the socket driver, and `maps' must be
+modified if the default mapping isn't appropriate. This operation should
+return one of two values:
+
+  - the highest-numbered socket available, plus one
+  - a negative number, indicating an error in configuration
+
+Note that the former case is _not_ the same as "the number of sockets
+available." In particular, if your design uses SA-1100 slot "one" but
+not slot "zero," you MUST report "2" to the socket driver.
+
+
+1. shutdown(void)
+
+This operation takes no arguments, and will be called during cleanup for
+the socket driver module. Any state associated with the socket controller,
+including allocated data structures, reserved IRQs, etc. should be
+released in this routine.
+
+The return value for this operation is not examined.
+
+
+2. socket_state(struct pcmcia_state_array *state_array)
+
+This operation will be invoked from the interrupt handler which was set up
+in the earlier call to init(). Note, however, that it should not include
+any side effects which would be inappropriate if the operation were to
+occur when no interrupt is pending. (An extra invocation of this operation
+currently takes place to initialize state in the socket driver.)
+
+Argument passing for this operation is handled by a structure which
+contains an array of the following type:
+
+  struct pcmcia_state {
+    unsigned detect: 1,
+              ready: 1,
+               bvd1: 1,
+               bvd2: 1,
+             wrprot: 1,
+              vs_3v: 1,
+              vs_Xv: 1;
+  };
+
+Upon return from the operation, a struct pcmcia_state should be filled in
+for each socket available in the hardware. For every array element (up to
+`size' in the struct pcmcia_state_saarray) which does not correspond to an
+available socket, zero the element bits. (This includes element [0] if
+socket zero is not used.)
+
+Regardless of how the various signals are routed to the SA-1100, the bits
+in struct pcmcia_state always have the following semantics:
+
+  detect - 1 if a card is fully inserted, 0 otherwise
+  ready  - 1 if the card ready signal is asserted, 0 otherwise
+  bvd1   - the value of the Battery Voltage Detect 1 signal
+  bvd2   - the value of the Battery Voltage Detect 2 signal
+  wrprot - 1 if the card is write-protected, 0 otherwise
+  vs_3v  - 1 if the card must be operated at 3.3V, 0 otherwise
+  vs_Xv  - 1 if the card must be operated at X.XV, 0 otherwise
+
+A note about the BVD signals: if your board does not make both lines
+directly observable to the processor, just return reasonable values. The
+standard interpretation of the BVD signals is:
+
+  BVD1  BVD2
+
+   0     x    battery is dead
+   1     0    battery warning
+   1     1    battery ok
+
+Regarding the voltage sense flags (vs_3v, vs_Xv), these bits should be set
+based on a sampling of the Voltage Sense pins, if available. The standard
+interpretation of the VS signals (for a "low-voltage" socket) is:
+
+  VS1   VS2
+
+   0     0    X.XV, else 3.3V, else none
+   0     1    3.3V, else none
+   1     0    X.XV, else none
+   1     1    5V, else none
+
+More information about the BVD and VS conventions is available in chapter
+5 of "PCMCIA System Architecture," 2nd ed., by Don Anderson.
+
+This operation should return 1 if an IRQ is actually pending for the
+socket controller, 0 if no IRQ is pending (but no error condition exists,
+such as an undersized state array), or -1 on any error.
+
+
+3. get_irq_info(struct pcmcia_irq_info *info)
+
+This operation obtains the IRQ assignment which is legal for the given
+socket. An argument of the following type is passed:
+
+  struct pcmcia_irq_info {
+    unsigned int sock;
+    unsigned int irq ;
+  };
+
+The `sock' field contains the socket index being queried. The `irq' field
+should contain the IRQ number corresponding to the card ready signal from
+the device.
+
+This operation should return 0 on success, or -1 on any error.
+
+
+4. configure_socket(const struct pcmcia_configure *configure)
+
+This operation allows the caller to apply power to the socket, issue a
+reset, or enable various outputs. The argument is of the following type:
+
+  struct pcmcia_configure {
+    unsigned sock: 8,
+              vcc: 8,
+              vpp: 8,
+           output: 1,
+          speaker: 1,
+            reset: 1;
+  };
+
+The `sock' field contains the index of the socket to be configured. The
+`vcc' and `vpp' fields contain the voltages to be applied for Vcc and Vpp,
+respectively, in units of 0.1V. (Note that vpp==120 indicates that
+programming voltage should be applied.)
+
+The two output enables, `output' and `speaker', refer to the card data
+signal enable and the card speaker enable, respectively. The `reset' bit,
+when set, indicates that the card reset should be asserted.
+
+This operation should return 0 on success, or -1 on any error.
+
+
+Board-Specific Notes
+
+The following information is known about various SA-11x0 board designs
+which may be used as reference while adding support to the kernel.
+
+
+Carnegie Mellon Itsy/Cue (http://www.cs.cmu.edu/~wearable/itsy/)
+
+  Itsy Chip Select 3 (CS3) Interface
+  ("ITSY MEMORY/PCMCIA ADD-ON BOARD with BATTERY and CHARGER CIRCUITRY,"
+   memo dated 5-20-99, from Tim Manns to Richard Martin, et. al)
+
+  Read:
+    ABVD2    (SS)D0          A slot, Battery Voltage Detect
+    ABVD1    (SS)D1
+    AVSS2    (SS)D2          A slot, Voltage Sense
+    AVSS1    (SS)D3
+    GND      (SS)D4
+    GND      (SS)D5
+    GND      (SS)D6
+    GND      (SS)D7
+  
+    BBVD2    (SS)D8          B slot, Battery Voltage Detect
+    BBVD1    (SS)D9
+    BVSS2    (SS)D10         B slot, Voltage Sense
+    BVSS1    (SS)D11
+    GND      (SS)D12
+    GND      (SS)D13
+    GND      (SS)D14
+    GND      (SS)D15
+  
+  Write:
+    (SS)D0   A_VPP_VCC       LTC1472 VPPEN1
+    (SS)D1   A_VPP_PGM       LTC1472 VPPEN0
+    (SS)D2   A_VCC_3         LTC1472 VCCEN0
+    (SS)D3   A_VCC_5         LTC1472 VCCEN1
+    (SS)D4   RESET (A SLOT)
+    (SS)D5   GND
+    (SS)D6   GND
+    (SS)D7   GND
+    (SS)D8   B_VPP_VCC       LTC1472 VPPEN1
+    (SS)D9   B_VPP_PGM       LTC1472 VPPEN0
+    (SS)D10  B_VCC_3         LTC1472 VCCEN0
+    (SS)D11  B_VCC_5         LTC1472 VCCEN1
+    (SS)D12  RESET (B SLOT)
+    (SS)D13  GND
+    (SS)D14  GND
+    (SS)D15  GND
+  GPIO pin assignments are as follows: (from schematics)
+    GPIO 10                  Slot 0 Card Detect
+    GPIO 11                  Slot 1 Card Detect
+    GPIO 12                  Slot 0 Ready/Interrupt
+    GPIO 13                  Slot 1 Ready/Interrupt
+
+
+
+Intel SA-1100 Multimedia Board (http://developer.intel.com/design/strong/)
+
+  CPLD Registers
+  SA-1100 Multimedia Development Board with Companion SA-1101 Development
+    Board User's Guide, p.4-42
+
+  This SA-1100/1101 development package uses only one GPIO pin (24) to
+  signal changes in card status, and requires software to inspect a
+  PCMCIA status register to determine the source.
+
+  Read: (PCMCIA Power Sense Register - 0x19400000)
+    S0VS1           0        Slot 0 voltage sense
+    S0VS2           1
+    S0BVD1          2        Slot 0 battery voltage sense
+    S0BVD2          3
+    S1VS1           4        Slot 1 voltage sense
+    S1VS2           5
+    S1BVD1          6        Slot 1 battery voltage sense
+    S1BVD2          7
+
+  Read/Write: (PCMCIA Power Control Register - 0x19400002)
+    S0VPP0          0        Slot 0 Vpp
+    S0VPP1          1
+    S0VCC0          2        Slot 0 Vcc
+    S0VCC1          3
+    S1VPP0          4        Slot 1 Vpp
+    S1VPP1          5
+    S1VCC0          6        Slot 1 Vcc
+    S1VCC1          7
+
+  Read: (PCMCIA Status Register - 0x19400004)
+    S0CD1           0        Slot 0 Card Detect 1
+    S0RDY           1        Slot 0 Ready/Interrupt
+    S0STSCHG        2        Slot 0 Status Change
+    S0Reset         3        Slot 0 Reset (RW)
+    S1CD1           4        Slot 1 Card Detect 1
+    S1RDY           5        Slot 1 Ready/Interrupt
+    S1STSCHG        6        Slot 1 Status Change
+    S1Reset         7        Slot 1 Reset (RW)
+
+
+
+Intel SA-1100 Evaluation Platform (http://developer.intel.com/design/strong/)
+
+  Brutus I/O Pins and Chipselect Register
+  pcmcia-brutus.c, by Ivo Clarysse
+  (What's the official reference for this info?)
+
+  This SA-1100 development board uses more GPIO pins than say, the Itsy
+  or the SA-1100/1101 multimedia package. The pin assignments are as
+  follows:
+
+    GPIO 2                   Slot 0 Battery Voltage Detect 1
+    GPIO 3                   Slot 0 Ready/Interrupt
+    GPIO 4                   Slot 0 Card Detect
+    GPIO 5                   Slot 1 Battery Voltage Detect 1
+    GPIO 6                   Slot 1 Ready/Interrupt
+    GPIO 7                   Slot 1 Card Detect
+
+  Like the Itsy, Brutus uses a chipselect register in static memory
+  bank 3 for the other signals, such as voltage sense or reset:
+
+  Read:
+    P0_VS1          8        Slot 0 Voltage Sense
+    P0_VS2          9
+    P0_STSCHG      10        Slot 0 Status Change
+    P1_VS1         12        Slot 1 Voltage Sense
+    P1_VS2         13
+    P1_STSCHG      14        Slot 1 Status Change
+
+  Read/Write:
+    P0_            16        Slot 0 MAX1600EAI control line
+    P0_            17        Slot 0 MAX1600EAI control line
+    P0_            18        Slot 0 MAX1600EAI control line
+    P0_            19        Slot 0 MAX1600EAI control line
+    P0_            20        Slot 0 12V
+    P0_            21        Slot 0 Vpp to Vcc (CONFIRM?)
+    P0_            22        Slot 0 enable fan-out drivers & xcvrs
+    P0_SW_RST      23        Slot 0 Reset
+    P1_            24        Slot 1 MAX1600EAI control line
+    P1_            25        Slot 1 MAX1600EAI control line
+    P1_            26        Slot 1 MAX1600EAI control line
+    P1_            27        Slot 1 MAX1600EAI control line
+    P1_            28        Slot 1 12V
+    P1_            29        Slot 1 Vpp to Vcc (CONFIRM?)
+    P1_            30        Slot 1 enable fan-out drivers & xcvrs
+    P1_SW_RST      31        Slot 1 Reset
+
+  For each slot, the bits labelled "MAX1600EAI" should (apparently)
+  be written with the value 0101 for Vcc 3.3V, and 1001 for Vcc 5V.
+
+
+
+Intel SA-1110 Development Platform (http://developer.intel.com/design/strong/)
+
+  GPIO Pin Descriptions and Board Control Register
+  SA-1110 Microprocessor Development Board User's Guide, p.4-7, 4-10
+
+  The Assabet board contains only a single Compact Flash slot,
+  attached to slot 1 on the SA-1110. Card detect, ready, and BVD
+  signals are routed through GPIO, with power and reset placed in a
+  control register. Note that the CF bus must be enabled before use.
+
+    GPIO 21                  Slot 1 Compact Flash interrupt
+    GPIO 22                  Slot 1 card detect (CD1 NOR CD2)
+    GPIO 24                  Slot 1 Battery Voltage Detect 2
+    GPIO 25                  Slot 1 Battery Voltage Detect 1
+
+  Write-only: (Board Control Register - 0x12000000)
+    CF_PWR          0        CF bus power (3.3V)
+    CF_RST          1        CF reset
+    CF_Bus_On       7        CF bus enable
+
diff --git a/Documentation/arm/XScale/ADIFCC/80200EVB b/Documentation/arm/XScale/ADIFCC/80200EVB
new file mode 100644 (file)
index 0000000..3762de4
--- /dev/null
@@ -0,0 +1,110 @@
+
+Board Overview
+-----------------------------
+
+This is an beta release of the Xscale Linux port to the ADI 80200EVB
+evaluation board.
+
+The 80200EVB is an evaluation platform for ADI Engineering's high-performance
+80200FCC chipset for the Intel 80200 XScale CPU. The 80200FCC is an open
+source FPGA based system that contains a PCI unit and a high performance
+memory controller.
+
+In addition to the 80200FCC, the board also contains a 16C550 UART, and 4MB
+of flash.
+
+The board is still under development and currently only the UART is functional
+as the PCI bits have not been programmed into the FPGA.
+
+For more information on the board, see http://www.adiengineering.com
+
+Port Status
+-----------------------------
+
+Supported:
+
+- Onboard UART (Polled operation only)
+- Cache/TLB locking on 80200 CPU
+
+TODO:
+
+- PCI when hardware supports it
+
+Building the Kernel
+-----------------------------
+change Linux makefile
+make adi_evb_config
+make oldconfig
+make zImage
+
+Loading Linux
+-----------------------------
+
+Before you can use Linux on the ADI board, you need to grab the following:
+
+ADI 80200EVB Monitor:
+       ftp://source.mvista.com/pub/xscale/ADI_EVB/monitor.srec
+
+ADI JFFS2 Image:
+       ftp://source.mvista.com/pub/xscale/ADI_EVB/adi.jffs2
+
+Once you've got the Cygnus prompt, type in the following command:
+
+       load
+
+On another terminal window:
+
+       cat monitor.srec > /dev/ttyS0
+
+(replace ttyS0 with the serial port you are using)
+
+Once completed, just type 'go' at the cygmon prompt and you should see:
+
+       MontaVista IQ80310 Monitor Version 0.1
+       monitor>
+
+Type 'b 115200' at the prompt and change your terminal speed to 115200
+
+The first thing to do is to upload and burn the jffs2 filesystem image
+onto the boards 4MB of flash:
+
+       monitor> u c1000000
+       Uploading file at 0xc1000000
+       Now send file with ymodem
+
+Do as the monitor says and transfer the file adi.jffs2.  Once complete,
+the following will copy the jffs2 image to location 0x80000 in the flash.
+
+       monitor> f 8000 c1000000 200000
+       Erasing sector 0x00080000
+       Writing sector 0x00080000 with data at 0xC1000000
+       Erasing sector 0x000A0000
+       Writing sector 0x000A0000 with data at 0xC1020000
+       Erasing sector 0x000C0000
+       ...
+
+Now use the same command as above to upload your zImage to location c1000000.
+When you've done that, type 'j c1000000' to run Linux.  Login as
+root and you're all set to go.
+
+Misc Notes
+-----------------------------
+
+The current version of the HW does not have an onboard timer, so the 80200
+PMU is not available for general use as it is being used for a timer source.
+
+By default, the MTD driver reserves the first 512K for bootloaders and
+the remaining 3.5MB for the filesystem. You can edit drivers/mtd/map/adi_evb.c
+to change this as needed for your application.
+
+Contributors
+-----------------------------
+
+Thanks to ADI Engineering for providing the hardware for development
+
+Deepak Saxena <dsaxena@mvista.com> - Initial port
+
+-----------------------------
+Enjoy.  If you have any problem please contact Deepak Saxena
+dsaxena@mvista.com
+
diff --git a/Documentation/arm/XScale/IOP3XX/IQ80310 b/Documentation/arm/XScale/IOP3XX/IQ80310
new file mode 100644 (file)
index 0000000..5312a57
--- /dev/null
@@ -0,0 +1,247 @@
+
+Board Overview
+-----------------------------
+
+The Cyclone IQ80310 board is an evaluation platform for Intel's 80200 Xscale
+CPU and 80312 Intelligent I/O chipset (collectively called IOP310 chipset).
+
+The 80312 contains dual PCI hoses (called the ATUs), a PCI-to-PCI bridge,
+three DMA channels (1 on secondary PCI, one on primary PCI ), I2C, I2O
+messaging unit, XOR unit for RAID operations, a bus performance monitoring
+unit, and a memory controller with ECC features.
+
+For more information on the board, see http://developer.intel.com/iio
+
+Port Status
+-----------------------------
+
+Supported:
+
+- MTD/JFFS/JFFS2
+- NFS root
+- RAMDISK root
+- 2ndary PCI slots
+- Onboard ethernet
+- Serial ports (ttyS0/S1)
+- Cache/TLB locking on 80200 CPU
+- Performance monitoring unit on 80200 CPU
+- 80200 Performance Monitoring Unit
+- Acting as a system controller on Cyclone 80303BP PCI backplane
+- DMA engines (EXPERIMENTAL)
+- 80312 Bus Performance Monitor (EXPERIMENTAL)
+- Application Accelerator Unit (XOR engine for RAID) (EXPERIMENTAL)
+- Messaging Unit (EXPERIMENTAL)
+
+TODO:
+- I2C
+
+Building the Kernel
+-----------------------------
+make iq80310_config
+make oldconfig
+make zImage
+
+This will build an image setup for BOOTP/NFS root support.  To change this,
+just run make menuconfig and disable nfs root or add a "root=" option.
+
+Preparing the Hardware
+-----------------------------
+
+This document assumes you're using a Rev D or newer board running
+Redboot as the bootloader.  Note that the version of RedBoot provided
+with the boards has a major issue and you need to replace it with the
+latest RedBoot. You can grab the source from the ECOS CVS or you can
+get a prebuilt image and burn it in using FRU at:
+
+   ftp://source.mvista.com/pub/xscale/iq80310/redboot.bin
+
+Make sure you do an 'fis init' command once you boot with the new
+RedBoot image.
+
+
+
+Downloading Linux
+-----------------------------
+
+Assuming you have your development system setup to act as a bootp/dhcp
+server and running tftp:
+
+   RedBoot> load -r -b 0xa1008000 /tftpboot/zImage.xs
+   Raw file loaded 0xa1008000-0xa1094bd8
+
+If you're not using dhcp/tftp, you can use y-modem instead:
+
+   RedBoot> load -r -b 0xa1008000 -m y
+
+Note that on Rev D. of the board, tftp does not work due to intermittent
+interrupt issues, so you need to download using ymodem.
+
+Once the download is completed:
+
+   RedBoot> go 0xa1008000
+
+Root Devices
+-----------------------------
+
+A kernel is not useful without a root filesystem, and you have several
+choices with this board:  NFS root, RAMDISK, or JFFS/JFFS2.  For development
+purposes, it is suggested that you use NFS root for easy access to various
+tools.  Once you're ready to deploy, probably want to utilize JFFS/JFFS2 on
+the flash device.
+
+MTD on the IQ80310
+-----------------------------
+
+Linux on the IQ80310 supports RedBoot FIS paritioning if it is enabled.
+Out of the box, once you've done 'fis init' on RedBoot, you will get
+the following partitioning scheme:
+
+   root@192.168.0.14:~# cat /proc/mtd
+   dev:    size   erasesize  name
+   mtd0: 00040000 00020000 "RedBoot"
+   mtd1: 00040000 00020000 "RedBoot[backup]"
+   mtd2: 0075f000 00020000 "unallocated space"
+   mtd3: 00001000 00020000 "RedBoot config"
+   mtd4: 00020000 00020000 "FIS directory"
+
+To create an FIS directory, you need to use the fis command in RedBoot.
+As an example, you can burn the kernel into the flash once it's downloaded:
+
+   RedBoot> fis create -b 0xa1008000 -l 0x8CBAC -r 0xa1008000 -f 0x80000 kernel
+   ... Erase from 0x00080000-0x00120000: .....
+   ... Program from 0xa1008000-0xa1094bac at 0x00080000: .....
+   ... Unlock from 0x007e0000-0x00800000: .
+   ... Erase from 0x007e0000-0x00800000: .
+   ... Program from 0xa1fdf000-0xa1fff000 at 0x007e0000: .
+   ... Lock from 0x007e0000-0x00800000: .
+
+   RedBoot> fis list
+   Name              FLASH addr  Mem addr    Length      Entry point
+   RedBoot           0x00000000  0x00000000  0x00040000  0x00000000
+   RedBoot[backup]   0x00040000  0x00040000  0x00040000  0x00000000
+   RedBoot config    0x007DF000  0x007DF000  0x00001000  0x00000000
+   FIS directory     0x007E0000  0x007E0000  0x00020000  0x00000000
+   kernel            0x00080000  0xA1008000  0x000A0000  0x00000000
+
+This leads to the following Linux MTD setup:
+
+   mtroot@192.168.0.14:~# cat /proc/mtd
+   dev:    size   erasesize  name
+   mtd0: 00040000 00020000 "RedBoot"
+   mtd1: 00040000 00020000 "RedBoot[backup]"
+   mtd2: 000a0000 00020000 "kernel"
+   mtd3: 006bf000 00020000 "unallocated space"
+   mtd4: 00001000 00020000 "RedBoot config"
+   mtd5: 00020000 00020000 "FIS directory"
+
+Note that there is not a 1:1 mapping to the number of RedBoot paritions to
+MTD partitions as unused space also gets allocated into MTD partitions.
+
+As an aside, the -r option when creating the Kernel entry allows you to
+simply do an 'fis load kernel' to copy the image from flash into memory.
+You can then do an 'fis go 0xa1008000' to start Linux.
+
+If you choose to use static partitioning instead of the RedBoot partioning:
+
+   /dev/mtd0  0x00000000 - 0x0007ffff: Boot Monitor     (512k)
+   /dev/mtd1  0x00080000 - 0x0011ffff: Kernel Image     (640K)
+   /dev/mtd2  0x00120000 - 0x0071ffff: File System      (6M)
+   /dev/mtd3  0x00720000 - 0x00800000: RedBoot Reserved (896K)
+
+To use a JFFS1/2 root FS, you need to donwload the JFFS image using either
+tftp or ymodem, and then copy it to flash:
+
+   RedBoot> load -r -b 0xa1000000 /tftpboot/jffs.img
+   Raw file loaded 0xa1000000-0xa1600000
+   RedBoot> fis create -b 0xa1000000 -l 0x600000 -f 0x120000 jffs
+   ... Erase from 0x00120000-0x00720000: ..................................
+   ... Program from 0xa1000000-0xa1600000 at 0x00120000: ..................
+   ......................
+   ... Unlock from 0x007e0000-0x00800000: .
+   ... Erase from 0x007e0000-0x00800000: .
+   ... Program from 0xa1fdf000-0xa1fff000 at 0x007e0000: .
+   ... Lock from 0x007e0000-0x00800000: .
+   RedBoot> fis list
+   Name              FLASH addr  Mem addr    Length      Entry point
+   RedBoot           0x00000000  0x00000000  0x00040000  0x00000000
+   RedBoot[backup]   0x00040000  0x00040000  0x00040000  0x00000000
+   RedBoot config    0x007DF000  0x007DF000  0x00001000  0x00000000
+   FIS directory     0x007E0000  0x007E0000  0x00020000  0x00000000
+   kernel            0x00080000  0xA1008000  0x000A0000  0xA1008000
+   jffs              0x00120000  0x00120000  0x00600000  0x00000000
+
+This looks like this in Linux:
+
+   root@192.168.0.14:~# cat /proc/mtd
+   dev:    size   erasesize  name
+   mtd0: 00040000 00020000 "RedBoot"
+   mtd1: 00040000 00020000 "RedBoot[backup]"
+   mtd2: 000a0000 00020000 "kernel"
+   mtd3: 00600000 00020000 "jffs"
+   mtd4: 000bf000 00020000 "unallocated space"
+   mtd5: 00001000 00020000 "RedBoot config"
+   mtd6: 00020000 00020000 "FIS directory"
+
+You need to boot the kernel once and watch the boot messages to see how the
+JFFS RedBoot partition mapped into the MTD partition scheme.
+
+You can grab a pre-built JFFS image to use as a root file system at:
+
+   ftp://source.mvista.com/pub/xscale/iq80310/jffs.img
+
+For detailed info on using MTD and creating a JFFS image go to:
+
+   http://www.linux-mtd.infradead.org.
+
+For details on using RedBoot's FIS commands, type 'fis help' or consult
+your RedBoot manual.
+
+Contributors
+-----------------------------
+
+Thanks to Intel Corporation for providing the hardware.
+
+John Clark <jclark@teamasa.com> - Initial discovery of RedBoot issues
+Dave Jiang <dave.jiang@intel.com> - IRQ demux fixes, AAU, DMA, MU
+Nicolas Pitre <nico@cam.org> - Initial port, cleanup, debugging
+Matt Porter <mporter@mvista.com> - PCI subsystem development, debugging
+Tim Sanders <tsanders@sanders.org> - Initial PCI code
+Mark Salter <msalter@redhat.com> - RedBoot fixes
+Deepak Saxena <dsaxena@mvista.com> - Cleanup, debug, cache lock, PMU
+
+-----------------------------
+Enjoy.
+
+If you have any problems please contact Deepak Saxena <dsaxena@mvista.com>
+
+A few notes from rmk
+-----------------------------
+
+These are notes of my initial experience getting the IQ80310 Rev D up and
+running.  In total, it has taken many hours to work out what's going on...
+The version of redboot used is:
+
+ RedBoot(tm) bootstrap and debug environment, version UNKNOWN - built 14:58:21, Aug 15 2001
+
+
+1. I've had a corrupted download of the redboot.bin file from Montavista's
+   FTP site.  It would be a good idea if there were md5sums, sum or gpg
+   signatures available to ensure the integrity of the downloaded files.
+   The result of this was an apparantly 100% dead card.
+
+2. RedBoot Intel EtherExpress Pro 100 driver seems to be very unstable -
+   I've had it take out the whole of a 100mbit network for several minutes.
+   The Hub indiates ZERO activity, despite machines attempting to communicate.
+   Further to this, while tftping the kernel, the transfer will stall regularly,
+   and might even drop the link LED.
+
+3. There appears to be a bug in the Intel Documentation Pack that comes with
+   the IQ80310 board.  Serial port 1, which is the socket next to the LEDs
+   is address 0xfe810000, not 0xfe800000.
+
+   Note that RedBoot uses either serial port 1 OR serial port 2, so if you
+   have your console connected to the wrong port, you'll see redboot messages
+   but not kernel boot messages.
+
+4. Trying to use fconfig to setup a boot script fails - it hangs when trying
+   to erase the flash.
diff --git a/Documentation/arm/XScale/IOP3XX/IQ80321 b/Documentation/arm/XScale/IOP3XX/IQ80321
new file mode 100644 (file)
index 0000000..e325327
--- /dev/null
@@ -0,0 +1,215 @@
+
+Board Overview
+-----------------------------
+
+The Worcester IQ80321 board is an evaluation platform for Intel's 80321 Xscale
+CPU (sometimes called IOP321 chipset).
+
+The 80321 contains a single PCI hose (called the ATUs), a PCI-to-PCI bridge,
+two DMA channels, I2C, I2O messaging unit, XOR unit for RAID operations,
+a bus performance monitoring unit, and a memory controller with ECC features.
+
+For more information on the board, see http://developer.intel.com/iio
+
+Port Status
+-----------------------------
+
+Supported:
+
+- MTD/JFFS/JFFS2 root
+- NFS root
+- RAMDISK root
+- Serial port (ttyS0)
+- Cache/TLB locking on 80321 CPU
+- Performance monitoring unit on 80321 CPU
+
+TODO:
+
+- DMA engines
+- I2C
+- 80321 Bus Performance Monitor
+- Application Accelerator Unit (XOR engine for RAID)
+- I2O Messaging Unit
+- I2C unit
+- SSP
+
+Building the Kernel
+-----------------------------
+make iq80321_config
+make oldconfig
+make zImage
+
+This will build an image setup for BOOTP/NFS root support.  To change this,
+just run make menuconfig and disable nfs root or add a "root=" option.
+
+Preparing the Hardware
+-----------------------------
+
+Make sure you do an 'fis init' command once you boot with the new
+RedBoot image.
+
+Downloading Linux
+-----------------------------
+
+Assuming you have your development system setup to act as a bootp/dhcp
+server and running tftp:
+
+NOTE: The 80321 board uses a different default memory map than the 80310.
+
+   RedBoot> load -r -b 0x01008000 -m y
+
+Once the download is completed:
+
+   RedBoot> go 0x01008000
+
+There is a version of RedBoot floating around that has DHCP support, but
+I've never been able to cleanly transfer a kernel image and have it run.
+
+Root Devices
+-----------------------------
+
+A kernel is not useful without a root filesystem, and you have several
+choices with this board:  NFS root, RAMDISK, or JFFS/JFFS2.  For development
+purposes, it is suggested that you use NFS root for easy access to various
+tools.  Once you're ready to deploy, probably want to utilize JFFS/JFFS2 on
+the flash device.
+
+MTD on the IQ80321
+-----------------------------
+
+Linux on the IQ80321 supports RedBoot FIS paritioning if it is enabled.
+Out of the box, once you've done 'fis init' on RedBoot, you will get
+the following partitioning scheme:
+
+   root@192.168.0.14:~# cat /proc/mtd
+   dev:    size   erasesize  name
+   mtd0: 00040000 00020000 "RedBoot"
+   mtd1: 00040000 00020000 "RedBoot[backup]"
+   mtd2: 0075f000 00020000 "unallocated space"
+   mtd3: 00001000 00020000 "RedBoot config"
+   mtd4: 00020000 00020000 "FIS directory"
+
+To create an FIS directory, you need to use the fis command in RedBoot.
+As an example, you can burn the kernel into the flash once it's downloaded:
+
+   RedBoot> fis create -b 0x01008000 -l 0x8CBAC -r 0x01008000 -f 0x80000 kernel
+   ... Erase from 0x00080000-0x00120000: .....
+   ... Program from 0x01008000-0x01094bac at 0x00080000: .....
+   ... Unlock from 0x007e0000-0x00800000: .
+   ... Erase from 0x007e0000-0x00800000: .
+   ... Program from 0x01fdf000-0x01fff000 at 0x007e0000: .
+   ... Lock from 0x007e0000-0x00800000: .
+
+   RedBoot> fis list
+   Name              FLASH addr  Mem addr    Length      Entry point
+   RedBoot           0x00000000  0x00000000  0x00040000  0x00000000
+   RedBoot[backup]   0x00040000  0x00040000  0x00040000  0x00000000
+   RedBoot config    0x007DF000  0x007DF000  0x00001000  0x00000000
+   FIS directory     0x007E0000  0x007E0000  0x00020000  0x00000000
+   kernel            0x00080000  0x01008000  0x000A0000  0x00000000
+
+This leads to the following Linux MTD setup:
+
+   mtroot@192.168.0.14:~# cat /proc/mtd
+   dev:    size   erasesize  name
+   mtd0: 00040000 00020000 "RedBoot"
+   mtd1: 00040000 00020000 "RedBoot[backup]"
+   mtd2: 000a0000 00020000 "kernel"
+   mtd3: 006bf000 00020000 "unallocated space"
+   mtd4: 00001000 00020000 "RedBoot config"
+   mtd5: 00020000 00020000 "FIS directory"
+
+Note that there is not a 1:1 mapping to the number of RedBoot paritions to
+MTD partitions as unused space also gets allocated into MTD partitions.
+
+As an aside, the -r option when creating the Kernel entry allows you to
+simply do an 'fis load kernel' to copy the image from flash into memory.
+You can then do an 'fis go 0x01008000' to start Linux.
+
+If you choose to use static partitioning instead of the RedBoot partioning:
+
+   /dev/mtd0  0x00000000 - 0x0007ffff: Boot Monitor     (512k)
+   /dev/mtd1  0x00080000 - 0x0011ffff: Kernel Image     (640K)
+   /dev/mtd2  0x00120000 - 0x0071ffff: File System      (6M)
+   /dev/mtd3  0x00720000 - 0x00800000: RedBoot Reserved (896K)
+
+To use a JFFS1/2 root FS, you need to donwload the JFFS image using either
+tftp or ymodem, and then copy it to flash:
+
+   RedBoot> load -r -b 0x01000000 /tftpboot/jffs.img
+   Raw file loaded 0x01000000-0x01600000
+   RedBoot> fis create -b 0x01000000 -l 0x600000 -f 0x120000 jffs
+   ... Erase from 0x00120000-0x00720000: ..................................
+   ... Program from 0x01000000-0x01600000 at 0x00120000: ..................
+   ......................
+   ... Unlock from 0x007e0000-0x00800000: .
+   ... Erase from 0x007e0000-0x00800000: .
+   ... Program from 0x01fdf000-0x01fff000 at 0x007e0000: .
+   ... Lock from 0x007e0000-0x00800000: .
+   RedBoot> fis list
+   Name              FLASH addr  Mem addr    Length      Entry point
+   RedBoot           0x00000000  0x00000000  0x00040000  0x00000000
+   RedBoot[backup]   0x00040000  0x00040000  0x00040000  0x00000000
+   RedBoot config    0x007DF000  0x007DF000  0x00001000  0x00000000
+   FIS directory     0x007E0000  0x007E0000  0x00020000  0x00000000
+   kernel            0x00080000  0x01008000  0x000A0000  0x01008000
+   jffs              0x00120000  0x00120000  0x00600000  0x00000000
+
+This looks like this in Linux:
+
+   root@192.168.0.14:~# cat /proc/mtd
+   dev:    size   erasesize  name
+   mtd0: 00040000 00020000 "RedBoot"
+   mtd1: 00040000 00020000 "RedBoot[backup]"
+   mtd2: 000a0000 00020000 "kernel"
+   mtd3: 00600000 00020000 "jffs"
+   mtd4: 000bf000 00020000 "unallocated space"
+   mtd5: 00001000 00020000 "RedBoot config"
+   mtd6: 00020000 00020000 "FIS directory"
+
+You need to boot the kernel once and watch the boot messages to see how the
+JFFS RedBoot partition mapped into the MTD partition scheme.
+
+You can grab a pre-built JFFS image to use as a root file system at:
+
+   ftp://source.mvista.com/pub/xscale/iq80310/jffs.img
+
+For detailed info on using MTD and creating a JFFS image go to:
+
+   http://www.linux-mtd.infradead.org.
+
+For details on using RedBoot's FIS commands, type 'fis help' or consult
+your RedBoot manual.
+
+BUGS and ISSUES
+-----------------------------
+
+* As shipped from Intel, pre-production boards have two issues:
+
+- The on board ethernet is disabled S8E1-2 is off. You will need to turn it on.
+
+- The PCIXCAPs are configured for a 100Mhz clock, but the clock selected is
+  actually only 66Mhz. This causes the wrong PPL multiplier to be used and the
+  board only runs at 400Mhz instead of 600Mhz. The way to observe this is to
+  use a independent clock to time a "sleep 10" command from the prompt. If it
+  takes 15 seconds instead of 10, you are running at 400Mhz.
+
+- The experimental IOP310 drivers for the AAU, DMA, etc. are not supported yet.
+
+Contributors
+-----------------------------
+The port to the IQ80321 was performed by:
+
+Rory Bolt <rorybolt@pacbell.net> - Initial port, debugging.
+
+This port was based on the IQ80310 port with the following contributors:
+
+Nicolas Pitre <nico@cam.org> - Initial port, cleanup, debugging
+Matt Porter <mporter@mvista.com> - PCI subsystem development, debugging
+Tim Sanders <tsanders@sanders.org> - Initial PCI code
+Deepak Saxena <dsaxena@mvista.com> - Cleanup, debug, cache lock, PMU
+
+The port is currently maintained by Deepak Saxena <dsaxena@mvista.com>
+
+-----------------------------
+Enjoy.
diff --git a/Documentation/arm/XScale/IOP3XX/aau.txt b/Documentation/arm/XScale/IOP3XX/aau.txt
new file mode 100644 (file)
index 0000000..e3852cc
--- /dev/null
@@ -0,0 +1,178 @@
+Support functions for the Intel 80310 AAU
+===========================================
+
+Dave Jiang <dave.jiang@intel.com>
+Last updated: 09/18/2001
+
+The Intel 80312 companion chip in the 80310 chipset contains an AAU. The
+AAU is capable of processing up to 8 data block sources and perform XOR
+operations on them. This unit is typically used to accelerated XOR
+operations utilized by RAID storage device drivers such as RAID 5. This
+API is designed to provide a set of functions to take adventage of the
+AAU. The AAU can also be used to transfer data blocks and used as a memory
+copier. The AAU transfer the memory faster than the operation performed by
+using CPU copy therefore it is recommended to use the AAU for memory copy.
+
+------------------
+int aau_request(u32 *aau_context, const char *device_id);
+This function allows the user the acquire the control of the the AAU. The
+function will return a context of AAU to the user and allocate
+an interrupt for the AAU. The user must pass the context as a parameter to
+various AAU API calls.
+
+int aau_queue_buffer(u32 aau_context, aau_head_t *listhead);
+This function starts the AAU operation. The user must create a SGL
+header with a SGL attached. The format is presented below. The SGL is
+built from kernel memory.
+
+/* hardware descriptor */
+typedef struct _aau_desc
+{
+    u32 NDA;                    /* next descriptor address [READONLY] */
+    u32 SAR[AAU_SAR_GROUP];     /* src addrs */
+    u32 DAR;                    /* destination addr */
+    u32 BC;                     /* byte count */
+    u32 DC;                     /* descriptor control */
+    u32 SARE[AAU_SAR_GROUP];    /* extended src addrs */
+} aau_desc_t;
+
+/* user SGL format */
+typedef struct _aau_sgl
+{
+    aau_desc_t          aau_desc;  /* AAU HW Desc */
+    u32                        status;    /* status of SGL [READONLY] */
+    struct _aau_sgl    *next;     /* pointer to next SG [READONLY] */
+    void                *dest;     /* destination addr */
+    void                *src[AAU_SAR_GROUP];       /* source addr[4] */
+    void                *ext_src[AAU_SAR_GROUP];    /* ext src addr[4] */
+    u32                 total_src; /* total number of source */
+} aau_sgl_t;
+
+/* header for user SGL */
+typedef struct _aau_head
+{
+    u32                total;      /* total descriptors allocated */
+    u32         status;     /* SGL status */
+    aau_sgl_t   *list;      /* ptr to head of list */
+    aau_callback_t  callback;  /* callback func ptr */
+} aau_head_t;
+
+
+The function will call aau_start() and start the AAU after it queues
+the SGL to the processing queue. When the function will either
+a. Sleep on the wait queue aau->wait_q if no callback has been provided, or
+b. Continue and then call the provided callback function when DMA interrupt
+   has been triggered.
+
+int aau_suspend(u32 aau_context);
+Stops/Suspends the AAU operation
+
+int aau_free(u32 aau_context);
+Frees the ownership of AAU. Called when no longer need AAU service.
+
+aau_sgl_t * aau_get_buffer(u32 aau_context, int num_buf);
+This function obtains an AAU SGL for the user. User must specify the number
+of descriptors to be allocated in the chain that is returned.
+
+void aau_return_buffer(u32 aau_context, aau_sgl_t *list);
+This function returns all SGL back to the API after user is done.
+
+int aau_memcpy(void *dest, void *src, u32 size);
+This function is a short cut for user to do memory copy utilizing the AAU for
+better large block memory copy vs using the CPU. This is similar to using
+typical memcpy() call.
+
+* User is responsible for the source address(es) and the destination address.
+  The source and destination should all be cached memory.
+
+
+
+void aau_test()
+{
+       u32 aau;
+       char dev_id[] = "AAU";
+       int size = 2;
+       int err = 0;
+       aau_head_t *head;
+       aau_sgl_t *list;
+       u32 i;
+       u32 result = 0;
+       void *src, *dest;
+
+       printk("Starting AAU test\n");
+       if((err = aau_request(&aau, dev_id))<0)
+       {
+               printk("test - AAU request failed: %d\n", err);
+               return;
+       }
+       else
+       {
+               printk("test - AAU request successful\n");
+       }
+
+       head = kmalloc(sizeof(aau_head_t), GFP_KERNEL);
+       head->total = size;
+       head->status = 0;
+       head->callback = NULL;
+
+       list = aau_get_buffer(aau, size);
+       if(!list)
+       {
+               printk("Can't get buffers\n");
+               return;
+       }
+       head->list = list;
+
+       src = kmalloc(1024, GFP_KERNEL);
+       dest = kmalloc(1024, GFP_KERNEL);
+
+       while(list)
+       {
+               list->status = 0;
+               list->aau_desc->SAR[0] = (u32)src;
+               list->aau_desc->DAR = (u32)dest;
+               list->aau_desc->BC = 1024;
+
+               /* see iop310-aau.h for more DCR commands */
+               list->aau_desc->DC = AAU_DCR_WRITE | AAU_DCR_BLKCTRL_1_DF;
+               if(!list->next)
+               {
+                       list->aau_desc->DC = AAU_DCR_IE;
+                       break;
+               }
+               list = list->next;
+       }
+
+       printk("test- Queueing buffer for AAU operation\n");
+       err = aau_queue_buffer(aau, head);
+       if(err >= 0)
+       {
+               printk("AAU Queue Buffer is done...\n");
+       }
+       else
+       {
+               printk("AAU Queue Buffer failed...: %d\n", err);
+       }
+
+
+
+#if 1
+       printk("freeing the AAU\n");
+       aau_return_buffer(aau, head->list);
+       aau_free(aau);
+       kfree(src);
+       kfree(dest);
+       kfree((void *)head);
+#endif
+}
+
+All Disclaimers apply. Use this at your own discretion. Neither Intel nor I
+will be responsible if anything goes wrong. =)
+
+
+TODO
+____
+* Testing
+* Do zero-size AAU transfer/channel at init
+  so all we have to do is chainining
+
diff --git a/Documentation/arm/XScale/IOP3XX/dma.txt b/Documentation/arm/XScale/IOP3XX/dma.txt
new file mode 100644 (file)
index 0000000..50c7f99
--- /dev/null
@@ -0,0 +1,214 @@
+Support functions forthe Intel 80310 DMA channels
+==================================================
+
+Dave Jiang <dave.jiang@intel.com>
+Last updated: 09/18/2001
+
+The Intel 80310 XScale chipset provides 3 DMA channels via the 80312 I/O
+companion chip. Two of them resides on the primary PCI bus and one on the
+secondary PCI bus.
+
+The DMA API provided is not compatible with the generic interface in the
+ARM tree unfortunately due to how the 80312 DMACs work. Hopefully some time
+in the near future a software interface can be done to bridge the differences.
+The DMA API has been modeled after Nicholas Pitre's SA11x0 DMA API therefore
+they will look somewhat similar.
+
+
+80310 DMA API
+-------------
+
+int dma_request(dmach_t channel, const char *device_id);
+
+This function will attempt to allocate the channel depending on what the
+user requests:
+
+IOP310_DMA_P0: PCI Primary 1
+IOP310_DMA_P1: PCI Primary 2
+IOP310_DMA_S0: PCI Secondary 1
+/*EOF*/
+
+Once the user allocates the DMA channel it is owned until released. Although
+other users can also use the same DMA channel, but no new resources will be
+allocated. The function will return the allocated channel number if successful.
+
+int dma_queue_buffer(dmach_t channel, dma_sghead_t *listhead);
+
+The user will construct a SGL in the form of below:
+/*
+ * Scattered Gather DMA List for user
+ */
+typedef struct _dma_desc
+{
+    u32  NDAR;       /* next descriptor adress [READONLY] */
+    u32  PDAR;       /* PCI address */
+    u32  PUADR;      /* upper PCI address */
+    u32  LADR;       /* local address */
+    u32  BC;         /* byte count */
+    u32  DC;         /* descriptor control */
+} dma_desc_t;
+
+typedef struct _dma_sgl
+{
+    dma_desc_t      dma_desc;     /* DMA descriptor */
+    u32             status;       /* descriptor status [READONLY] */
+    u32                    data;         /* user defined data */
+    struct _dma_sgl *next;       /* next descriptor [READONLY] */
+} dma_sgl_t;
+
+/* dma sgl head */
+typedef struct _dma_head
+{
+    u32                    total;      /* total elements in SGL */
+    u32                    status;     /* status of sgl */
+    u32                    mode;       /* read or write mode */
+    dma_sgl_t      *list;      /* pointer to list */
+    dma_callback_t  callback;   /* callback function */
+} dma_head_t;
+
+
+The user shall allocate user SGL elements by calling the function:
+dma_get_buffer(). This function will give the user an SGL element. The user
+is responsible for creating the SGL head however. The user is also
+responsible for allocating the memory for DMA data. The following code segment
+shows how a DMA operation can be performed:
+
+#include <asm/arch/iop310-dma.h>
+
+void dma_test(void)
+{
+       char dev_id[] = "Primary 0";
+       dma_head_t *sgl_head = NULL;
+       dma_sgl_t *sgl = NULL;
+       int err = 0;
+       int channel = -1;
+       u32 *test_ptr = 0;
+       DECLARE_WAIT_QUEUE_HEAD(wait_q);
+
+
+       *(IOP310_ATUCR) = (IOP310_ATUCR_PRIM_OUT_ENAB |
+                       IOP310_ATUCR_DIR_ADDR_ENAB);
+
+       channel = dma_request(IOP310_DMA_P0, dev_id);
+
+       sgl_head = (dma_head_t *)kmalloc(sizeof(dma_head_t), GFP_KERNEL);
+       sgl_head->callback = NULL;      /* no callback created */
+       sgl_head->total = 2; /* allocating 2 DMA descriptors */
+       sgl_head->mode = (DMA_MOD_WRITE);
+       sgl_head->status = 0;
+
+       /* now we get the two descriptors */
+       sgl = dma_get_buffer(channel, 2);
+
+       /* we set the header to point to the list we allocated */
+       sgl_head->list = sgl;
+
+       /* allocate 1k of DMA data */
+       sgl->data = (u32)kmalloc(1024, GFP_KERNEL);
+
+       /* Local address is physical */
+       sgl->dma_desc.LADR = (u32)virt_to_phys(sgl->data);
+
+       /* write to arbitrary location over the PCI bus */
+       sgl->dma_desc.PDAR = 0x00600000;
+       sgl->dma_desc.PUADR = 0;
+       sgl->dma_desc.BC = 1024;
+
+       /* set write & invalidate PCI command */
+       sgl->dma_desc.DC = DMA_DCR_PCI_MWI;
+       sgl->status = 0;
+
+       /* set a pattern */
+       memset(sgl->data, 0xFF, 1024);
+
+       /* User's responsibility to keep buffers cached coherent */
+       cpu_dcache_clean(sgl->data, sgl->data + 1024);
+
+       sgl = sgl->next;
+
+       sgl->data = (u32)kmalloc(1024, GFP_KERNEL);
+       sgl->dma_desc.LADR = (u32)virt_to_phys(sgl->data);
+       sgl->dma_desc.PDAR = 0x00610000;
+       sgl->dma_desc.PUADR = 0;
+       sgl->dma_desc.BC = 1024;
+
+       /* second descriptor has interrupt flag enabled */
+       sgl->dma_desc.DC = (DMA_DCR_PCI_MWI | DMA_DCR_IE);
+
+       /* must set end of chain flag */
+       sgl->status = DMA_END_CHAIN; /* DO NOT FORGET THIS!!!! */
+
+       memset(sgl->data, 0x0f, 1024);
+       /* User's responsibility to keep buffers cached coherent */
+       cpu_dcache_clean(sgl->data, sgl->data + 1024);
+
+       /* queuing the buffer, this function will sleep since no callback */
+       err = dma_queue_buffer(channel, sgl_head);
+
+       /* now we are woken from DMA complete */
+
+       /* do data operations here */
+
+       /* free DMA data if necessary */
+
+       /* return the descriptors */
+       dma_return_buffer(channel, sgl_head->list);
+
+       /* free the DMA */
+       dma_free(channel);
+
+       kfree((void *)sgl_head);
+}
+
+
+dma_sgl_t * dma_get_buffer(dmach_t channel, int buf_num);
+
+This call allocates DMA descriptors for the user.
+
+
+void dma_return_buffer(dmach_t channel, dma_sgl_t *list);
+
+This call returns the allocated descriptors back to the API.
+
+
+int dma_suspend(dmach_t channel);
+
+This call suspends any DMA transfer on the given channel.
+
+
+
+int dma_resume(dmach_t channel);
+
+This call resumes a DMA transfer which would have been stopped through
+dma_suspend().
+
+
+int dma_flush_all(dmach_t channel);
+
+This completely flushes all queued buffers and on-going DMA transfers on a
+given channel. This is called when DMA channel errors have occurred.
+
+
+void dma_free(dmach_t channel);
+
+This clears all activities on a given DMA channel and releases it for future
+requests.
+
+
+
+Buffer Allocation
+-----------------
+It is the user's responsibility to allocate, free, and keep track of the
+allocated DMA data memory. Upon calling dma_queue_buffer() the user must
+relinquish the control of the buffers to the kernel and not change the
+state of the buffers that it has passed to the kernel. The user will regain
+the control of the buffers when it has been woken up by the bottom half of
+the DMA interrupt handler. The user can allocate cached buffers or non-cached
+via pci_alloc_consistent(). It is the user's responsibility to ensure that
+the data is cache coherent.
+
+*Reminder*
+The user is responsble to ensure the ATU is setup properly for DMA transfers.
+
+All Disclaimers apply. Use this at your own discretion. Neither Intel nor I
+will be responsible ifanything goes wrong.
diff --git a/Documentation/arm/XScale/IOP3XX/message.txt b/Documentation/arm/XScale/IOP3XX/message.txt
new file mode 100644 (file)
index 0000000..480d13e
--- /dev/null
@@ -0,0 +1,110 @@
+Support functions for the Intel 80310 MU
+===========================================
+
+Dave Jiang <dave.jiang@intel.com>
+Last updated: 10/11/2001
+
+The messaging unit of the IOP310 contains 4 components and is utilized for
+passing messages between the PCI agents on the primary bus and the Intel(R)
+80200 CPU. The four components are:
+Messaging Component
+Doorbell Component
+Circular Queues Component
+Index Registers Component
+
+Messaging Component:
+Contains 4 32bit registers, 2 in and 2 out. Writing to the registers assert
+interrupt on the PCI bus or to the 80200 depend on incoming or outgoing.
+
+int mu_msg_request(u32 *mu_context);
+Request the usage of Messaging Component. mu_context is written back by the
+API. The MU context is passed to other Messaging calls as a parameter.
+
+int mu_msg_set_callback(u32 mu_context, u8 reg, mu_msg_cb_t func);
+Setup the callback function for incoming messages. Callback can be setup for
+outbound 0, 1, or both outbound registers.
+
+int mu_msg_post(u32 mu_context, u32 val, u8 reg);
+Posting a message in the val parameter. The reg parameter denotes whether
+to use register 0, 1.
+
+int mu_msg_free(u32 mu_context, u8 mode);
+Free the usage of messaging component. mode can be specified soft or hard. In
+hardmode all resources are unallocated.
+
+Doorbell Component:
+The doorbell registers contains 1 inbound and 1 outbound. Depending on the bits
+being set different interrupts are asserted.
+
+int mu_db_request(u32 *mu_context);
+Request the usage of the doorbell register.
+
+int mu_db_set_callback(u32 mu_context, mu_db_cb_t func);
+Setting up the inbound callback.
+
+void mu_db_ring(u32 mu_context, u32 mask);
+Write to the outbound db register with mask.
+
+int mu_db_free(u32 mu_context);
+Free the usage of doorbell component.
+
+Circular Queues Component:
+The circular queue component has 4 circular queues. Inbound post, inbound free,
+outbound post, outbound free. These queues are used to pass messages.
+
+int mu_cq_request(u32 *mu_context, u32 q_size);
+Request the usage of the queue. See code comment header for q_size. It tells
+the API how big of queues to setup.
+
+int mu_cq_inbound_init(u32 mu_context, mfa_list_t *list, u32 size,
+                       mu_cq_cb_t func);
+Init inbound queues. The user must provide a list of free message frames to
+be put in inbound free queue and the callback function to handle the inbound
+messages.
+
+int mu_cq_enable(u32 mu_context);
+Enables the circular queues mechanism. Called once all the setup functions
+are called.
+
+u32 mu_cq_get_frame(u32 mu_context);
+Obtain the address of an outbound free frame for the user.
+
+int mu_cq_post_frame(u32 mu_context, u32 mfa);
+The user can post the frame once getting the frame and put information in the
+frame.
+
+int mu_cq_free(u32 mu_context);
+Free the usage of circular queues mechanism.
+
+Index Registers Component:
+The index register provides the mechanism to receive inbound messages.
+
+int mu_ir_request(u32 *mu_context);
+Request of Index Register component usage.
+
+int mu_ir_set_callback(u32 mu_context, mu_ir_cb_t callback);
+Setting up callback for inbound messages. The callback will receive the
+value of the register that IAR offsets to.
+
+int mu_ir_free(u32 mu_context);
+Free the usage of Index Registers component.
+
+void mu_set_irq_threshold(u32 mu_context, int thresh);
+Setup the IRQ threshold before relinquish processing in IRQ space. Default
+is set at 10 loops.
+
+
+*NOTE: Example of host driver that utilize the MU can be found in the Linux I2O
+driver. Specifically i2o_pci and some functions of i2o_core. The I2O driver
+only utilize the circular queues mechanism. The other 3 components are simple
+enough that they can be easily setup. The MU API provides no flow control for
+the messaging mechanism. Flow control of the messaging needs to be established
+by a higher layer of software on the IOP or the host driver.
+
+All Disclaimers apply. Use this at your own discretion. Neither Intel nor I
+will be responsible if anything goes wrong. =)
+
+
+TODO
+____
+
diff --git a/Documentation/arm/XScale/IOP3XX/pmon.txt b/Documentation/arm/XScale/IOP3XX/pmon.txt
new file mode 100644 (file)
index 0000000..7978494
--- /dev/null
@@ -0,0 +1,71 @@
+
+Intel's XScale Microarchitecture 80312 companion processor provides a
+Performance Monitoring Unit (PMON) that can be utilized to provide
+information that can be useful for fine tuning of code.  This text
+file describes the API that's been developed for use by Linux kernel
+programmers.  Note that to get the most usage out of the PMON,
+I highly reccomend getting the XScale reference manual from Intel[1]
+and looking at chapter 12.
+
+To use the PMON, you must #include <asm-arm/arch-iop310/pmon.h> in your
+source file.
+
+Since there's only one PMON, only one user can currently use the PMON
+at a given time.  To claim the PMON for usage, call iop310_pmon_claim() which
+returns an identifier.  When you are done using the PMON, call
+iop310_pmon_release() with the id you were given earlier.
+
+The PMON consists of 14 registers that can be used for performance measurements.
+By combining different statistics, you can derive complex performance metrics.
+
+To start the PMON, just call iop310_pmon_start(mode).  Mode tells the PMON what
+statistics to capture and can each be one of:
+
+    IOP310_PMU_MODE0
+    Performance Monitoring Disabled
+
+    IOP310_PMU_MODE1
+    Primary PCI bus and internal agents (bridge, dma Ch0, dam Ch1, patu)
+
+    IOP310_PMU_MODE2
+    Secondary PCI bus and internal agents (bridge, dma Ch0, dam Ch1, patu)
+
+    IOP310_PMU_MODE3
+    Secondary PCI bus and internal agents (external masters 0..2 and Intel
+    80312 I/O companion chip)
+
+    IOP310_PMU_MODE4
+    Secondary PCI bus and internal agents (external masters 3..5 and Intel
+    80312 I/O companion chip)
+
+    IOP310_PMU_MODE5
+    Intel 80312 I/O companion chip internal bus, DMA Channels and Application
+    Accelerator
+
+    IOP310_PMU_MODE6
+    Intel 80312 I/O companion chip internal bus, PATU, SATU and Intel 80200
+    processor
+
+    IOP310_PMU_MODE7
+    Intel 80312 I/O companion chip internal bus, Primary PCI bus, Secondary
+    PCI bus and Secondary PCI agents (external masters 0..5 & Intel 80312 I/O
+    companion chip)
+
+To get the results back, call iop310_pmon_stop(&results) where results is
+defined as follows:
+
+typedef struct _iop310_pmon_result
+{
+       u32 timestamp;                  /* Global Time Stamp Register */
+       u32 timestamp_overflow;         /* Time Stamp overflow count */
+       u32 event_count[14];            /* Programmable Event Counter
+                                          Registers 1-14 */
+       u32 event_overflow[14];         /* Overflow counter for PECR1-14 */
+} iop310_pmon_res_t;
+
+
+--
+This code is still under development, so please feel free to send patches,
+questions, comments, etc to me.
+
+Deepak Saxena <dsaxena@mvista.com>
diff --git a/Documentation/arm/XScale/cache-lock.txt b/Documentation/arm/XScale/cache-lock.txt
new file mode 100644 (file)
index 0000000..9728c94
--- /dev/null
@@ -0,0 +1,123 @@
+
+Intel's XScale Microarchitecture provides support for locking of data
+and instructions into the appropriate caches. This  file provides
+an overview of the API that has been developed to take advantage of this
+feature from kernel space. Note that there is NO support for user space
+cache locking.
+
+For example usage of this code, grab:
+
+       ftp://source.mvista.com/pub/xscale/cache-test.c
+
+If you have any questions, comments, patches, etc, please contact me.
+
+Deepak Saxena <dsaxena@mvista.com>
+
+API DESCRIPTION
+
+
+I. Header File
+
+   #include <asm/xscale-lock.h>
+
+II. Cache Capability Discovery
+
+   SYNOPSIS
+
+   int cache_query(u8 cache_type,
+                           struct cache_capabilities *pcache);
+
+   struct cache_capabilities
+   {
+      u32   flags;      /* Flags defining capabilities  */
+      u32   cache_size; /* Cache size in K (1024 bytes) */
+      u32   max_lock;   /* Maximum lockable region in K */
+   }
+
+   /*
+    * Flags
+    */
+
+   /*
+    * Bit 0: Cache lockability
+    * Bits 1-31: Reserved for future use
+    */
+   #define CACHE_LOCKABLE    0x00000001   /* Cache can be locked */
+
+   /*
+    * Cache Types
+    */
+   #define ICACHE            0x00
+   #define DCACHE            0x01
+
+   DESCRIPTION
+
+   This function fills out the pcache capability identifier for the
+   requested cache. cache_type is either DCACHE or ICACHE. This
+   function is not very useful at the moment as all XScale CPU's
+   have the same size Cache, but is is provided for future XScale
+   based processors that may have larger cache sizes.
+
+   RETURN VALUE
+
+   This function returns 0 if no error occurs, otherwise it returns
+   a negative, errno compatible value.
+
+      -EIO   Unknown hardware error
+
+III. Cache Locking
+
+   SYNOPSIS
+
+   int cache_lock(void *addr, u32 len, u8 cache_type, const char *desc);
+
+   DESCRIPTION
+
+   This function locks a physically contigous portion of memory starting
+   at the virtual address pointed to by addr into the cache referenced
+   by cache_type.
+
+   The address of the data/instruction that is to be locked must be
+   aligned on a cache line boundary (L1_CACHE_ALIGNEMENT).
+
+   The desc parameter is an optional (pass NULL if not used) human readable
+   descriptor of the locked memory region that is used by the cache
+   management code to build the /proc/cache_locks table.
+
+   Note that this function does not check whether the address is valid
+   or not before locking it into the cache.  That duty is up to the
+   caller.  Also, it does not check for duplicate or overlaping
+   entries.
+
+   RETURN VALUE
+
+   If the function is successful in locking the entry into cache, a
+   zero is returned.
+
+   If an error occurs, an appropriate error value is returned.
+
+      -EINVAL   The memory address provided was not cache line aligned
+      -ENOMEM   Could not allocate memory to complete operation
+      -ENOSPC   Not enough space left on cache to lock in requested region
+      -EIO      Unknown error
+
+III. Cache Unlocking
+
+   SYNOPSIS
+
+   int cache_unlock(void *addr)
+
+   DESCRIPTION
+
+   This function unlocks a portion of memory that was previously locked
+   into either the I or D cache.
+
+   RETURN VALUE
+
+   If the entry is cleanly unlocked from the cache, a 0 is returned.
+   In the case of an error, an appropriate error is returned.
+
+      -ENOENT    No entry with given address associated with this cache
+      -EIO       Unknown error
+
+
diff --git a/Documentation/arm/XScale/pmu.txt b/Documentation/arm/XScale/pmu.txt
new file mode 100644 (file)
index 0000000..508575d
--- /dev/null
@@ -0,0 +1,168 @@
+
+Intel's XScale Microarchitecture processors provide a Performance
+Monitoring Unit (PMU) that can be utilized to provide information
+that can be useful for fine tuning of code.  This text file describes
+the API that's been developed for use by Linux kernel programmers.
+When I have some extra time on my hand, I will extend the code to
+provide support for user mode performance monitoring (which is
+probably much more useful).  Note that to get the most usage out
+of the PMU, I highly reccomend getting the XScale reference manual
+from Intel and looking at chapter 12.
+
+To use the PMU, you must #include <asm/xscale-pmu.h> in your source file.
+
+Since there's only one PMU, only one user can currently use the PMU
+at a given time.  To claim the PMU for usage, call pmu_claim() which
+returns an identifier.  When you are done using the PMU, call
+pmu_release() with the identifier that you were given by pmu_claim.
+
+In addition, the PMU can only be used on XScale based systems that
+provide an external timer.  Systems that the PMU is currently supported
+on are:
+
+       - Cyclone IQ80310
+
+Before delving into how to use the PMU code, let's do a quick overview
+of the PMU itself.  The PMU consists of three registers that can be
+used for performance measurements.  The first is the CCNT register with
+provides the number of clock cycles elapsed since the PMU was started.
+The next two register, PMN0 and PMN1, are eace user programmable to
+provide 1 of 20 different performance statistics.  By combining different
+statistics, you can derive complex performance metrics.
+
+To start the PMU, just call pmu_start(pm0, pmn1).  pmn0 and pmn1 tell
+the PMU what statistics to capture and can each be one of:
+
+EVT_ICACHE_MISS
+       Instruction fetches requiring access to external memory
+
+EVT_ICACHE_NO_DELIVER
+       Instruction cache could not deliver an instruction.  Either an
+       ICACHE miss or an instruction TLB miss.
+
+EVT_ICACHE_DATA_STALL
+       Stall in execution due to a data dependency. This counter is
+       incremented each cycle in which the condition is present.
+
+EVT_ITLB_MISS
+       Instruction TLB miss
+
+EVT_DTLB_MISS
+       Data TLB miss
+
+EVT_BRANCH
+       A branch instruction was executed and it may or may not have
+       changed program flow
+
+EVT_BRANCH_MISS
+       A branch (B or BL instructions only) was mispredicted
+
+EVT_INSTRUCTION
+       An instruction was executed
+
+EVT_DCACHE_FULL_STALL
+       Stall because data cache buffers are full.  Incremented on every
+       cycle in which condition is present.
+
+EVT_DCACHE_FULL_STALL_CONTIG
+       Stall because data cache buffers are full.  Incremented on every
+       cycle in which condition is contigous.
+
+EVT_DCACHE_ACCESS
+       Data cache access (data fetch)
+
+EVT_DCACHE_MISS
+       Data cache miss
+
+EVT_DCACHE_WRITE_BACK
+       Data cache write back.  This counter is incremented for every
+       1/2 line (four words) that are written back.
+
+EVT_PC_CHANGED
+       Software changed the PC.  This is incremented only when the
+       software changes the PC and there is no mode change.  For example,
+       a MOV instruction that targets the PC would increment the counter.
+       An SWI would not as it triggers a mode change.
+
+EVT_BCU_REQUEST
+       The Bus Control Unit(BCU) received a request from the core
+
+EVT_BCU_FULL
+       The BCU request queue if full.  A high value for this event means
+       that the BCU is often waiting for to complete on the external bus.
+
+EVT_BCU_DRAIN
+       The BCU queues were drained due to either a Drain Write Buffer
+       command or an I/O transaction for a page that was marked as
+       uncacheable and unbufferable.
+
+EVT_BCU_ECC_NO_ELOG
+       The BCU detected an ECC error on the memory bus but noe ELOG
+       register was available to to log the errors.
+
+EVT_BCU_1_BIT_ERR
+       The BCU detected a 1-bit error while reading from the bus.
+
+EVT_RMW
+       An RMW cycle occurred due to narrow write on ECC protected memory.
+
+To get the results back, call pmu_stop(&results) where results is defined
+as a struct pmu_results:
+
+       struct pmu_results
+       {
+               u32     ccnt;   /* Clock Counter Register */
+               u32     ccnt_of; /
+               u32     pmn0;   /* Performance Counter Register 0 */
+               u32     pmn0_of;
+               u32     pmn1;   /* Performance Counter Register 1 */
+               u32     pmn1_of;
+       };
+
+Pretty simple huh?  Following are some examples of how to get some commonly
+wanted numbers out of the PMU data.  Note that since you will be dividing
+things, this isn't super useful from the kernel and you need to printk the
+data out to syslog.  See [1] for more examples.
+
+Instruction Cache Efficiency
+
+       pmu_start(EVT_INSTRUCTION, EVT_ICACHE_MISS);
+       ...
+       pmu_stop(&results);
+
+       icache_miss_rage = results.pmn1 / results.pmn0;
+       cycles_per_instruction = results.ccnt / results.pmn0;
+
+Data Cache Efficiency
+
+       pmu_start(EVT_DCACHE_ACCESS, EVT_DCACHE_MISS);
+       ...
+       pmu_stop(&results);
+
+       dcache_miss_rage = results.pmn1 / results.pmn0;
+
+Instruction Fetch Latency
+
+       pmu_start(EVT_ICACHE_NO_DELIVER, EVT_ICACHE_MISS);
+       ...
+       pmu_stop(&results);
+
+       average_stall_waiting_for_instruction_fetch =
+               results.pmn0 / results.pmn1;
+
+       percent_stall_cycles_due_to_instruction_fetch =
+               results.pmn0 / results.ccnt;
+
+
+ToDo:
+
+- Add support for usermode PMU usage.  This might require hooking into
+  the scheduler so that we pause the PMU when the task that requested
+  statistics is scheduled out.
+
+--
+This code is still under development, so please feel free to send patches,
+questions, comments, etc to me.
+
+Deepak Saxena <dsaxena@mvista.com>
+
diff --git a/Documentation/arm/XScale/tlb-lock.txt b/Documentation/arm/XScale/tlb-lock.txt
new file mode 100644 (file)
index 0000000..1ba3e11
--- /dev/null
@@ -0,0 +1,64 @@
+
+Intel's XScale Microarchitecture provides support for locking of TLB
+entries in both the instruction and data TLBs.  This  file provides
+an overview of the API that has been developed to take advantage of this
+feature from kernel space. Note that there is NO support for user space.
+
+In general, this feature should be used in conjunction with locking
+data or instructions into the appropriate caches.  See the file
+cache-lock.txt in this directory.
+
+If you have any questions, comments, patches, etc, please contact me.
+
+Deepak Saxena <dsaxena@mvista.com>
+
+
+API DESCRIPTION
+
+I. Header file
+
+   #include <asm/xscale-lock.h>
+
+II. Locking an entry into the TLB
+
+    SYNOPSIS
+
+    xscale_tlb_lock(u8 tlb_type, u32 addr);
+
+    /*
+     * TLB types
+     */
+    #define ITLB       0x0
+    #define DTLB       0x1
+
+    DESCRIPTION
+
+    This function locks the virtual to physical mapping for virtual
+    address addr into the requested TLB.
+
+    RETURN VALUE
+
+    If the entry is properly locked into the TLB, a 0 is returned.
+    In case of an error, an appropriate error is returned.
+
+       -ENOSPC No more entries left in the TLB
+       -EIO    Unknown error
+
+III. Unlocking an entry from a TLB
+
+     SYNOPSIS
+
+     xscale_tlb_unlock(u8 tlb_type, u32 addr);
+
+     DESCRIPTION
+
+     This function unlocks the entry for virtual address addr from the
+     specified cache.
+
+     RETURN VALUE
+
+     If the TLB entry is properly unlocked, a 0 is returned.
+     In case of an error, an appropriate error is returned.
+
+        -ENOENT  No entry for given address in specified TLB
+
diff --git a/arch/arm/mach-omap/innovator1510.c b/arch/arm/mach-omap/innovator1510.c
new file mode 100644 (file)
index 0000000..1309f96
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * linux/arch/arm/mach-omap/innovator1510.c
+ *
+ * Board specific inits for OMAP-1510 Innovator
+ *
+ * Copyright (C) 2001 RidgeRun, Inc.
+ * Author: Greg Lonnon <glonnon@ridgerun.com>
+ *
+ * Copyright (C) 2002 MontaVista Software, Inc.
+ *
+ * Separated FPGA interrupts from innovator1510.c and cleaned up for 2.6
+ * Copyright (C) 2004 Nokia Corporation by Tony Lindrgen <tony@atomide.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/device.h>
+
+#include <asm/hardware.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+
+#include <asm/arch/clocks.h>
+#include <asm/arch/gpio.h>
+#include <asm/arch/fpga.h>
+
+#include "common.h"
+
+extern int omap_gpio_init(void);
+
+void innovator_init_irq(void)
+{
+       omap_init_irq();
+       omap_gpio_init();
+       fpga_init_irq();
+}
+
+static struct resource smc91x_resources[] = {
+       [0] = {
+               .start  = OMAP1510P1_FPGA_ETHR_START,   /* Physical */
+               .end    = OMAP1510P1_FPGA_ETHR_START + 16,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = INT_ETHER,
+               .end    = INT_ETHER,
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device smc91x_device = {
+       .name           = "smc91x",
+       .id             = 0,
+       .num_resources  = ARRAY_SIZE(smc91x_resources),
+       .resource       = smc91x_resources,
+};
+
+static struct platform_device *devices[] __initdata = {
+       &smc91x_device,
+};
+
+static void __init innovator_init(void)
+{
+       if (!machine_is_innovator())
+               return;
+
+       (void) platform_add_devices(devices, ARRAY_SIZE(devices));
+}
+
+/* Only FPGA needs to be mapped here. All others are done with ioremap */
+static struct map_desc innovator_io_desc[] __initdata = {
+{ OMAP1510P1_FPGA_BASE, OMAP1510P1_FPGA_START, OMAP1510P1_FPGA_SIZE,
+       MT_DEVICE },
+};
+
+static void __init innovator_map_io(void)
+{
+       omap_map_io();
+       iotable_init(innovator_io_desc, ARRAY_SIZE(innovator_io_desc));
+
+       /* Dump the Innovator FPGA rev early - useful info for support. */
+       printk("Innovator FPGA Rev %d.%d Board Rev %d\n",
+              fpga_read(OMAP1510P1_FPGA_REV_HIGH),
+              fpga_read(OMAP1510P1_FPGA_REV_LOW),
+              fpga_read(OMAP1510P1_FPGA_BOARD_REV));
+}
+
+MACHINE_START(INNOVATOR, "TI-Innovator/OMAP1510")
+       MAINTAINER("MontaVista Software, Inc.")
+       BOOT_MEM(0x10000000, 0xe0000000, 0xe0000000)
+       BOOT_PARAMS(0x10000100)
+       MAPIO(innovator_map_io)
+       INITIRQ(innovator_init_irq)
+       INIT_MACHINE(innovator_init)
+MACHINE_END
diff --git a/arch/arm/mach-omap/innovator1610.c b/arch/arm/mach-omap/innovator1610.c
new file mode 100644 (file)
index 0000000..4081735
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * linux/arch/arm/mach-omap/innovator1610.c
+ *
+ * This file contains Innovator-specific code.
+ *
+ * Copyright (C) 2002 MontaVista Software, Inc.
+ *
+ * Copyright (C) 2001 RidgeRun, Inc.
+ * Author: Greg Lonnon <glonnon@ridgerun.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/major.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/hardware.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/arch/irqs.h>
+
+#include "common.h"
+
+void
+innovator_init_irq(void)
+{
+       omap_init_irq();
+}
+
+static struct resource smc91x_resources[] = {
+       [0] = {
+               .start  = OMAP1610_ETHR_START,          /* Physical */
+               .end    = OMAP1610_ETHR_START + SZ_4K,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = 0,                            /* Really GPIO 0 */
+               .end    = 0,
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device smc91x_device = {
+       .name           = "smc91x",
+       .id             = 0,
+       .num_resources  = ARRAY_SIZE(smc91x_resources),
+       .resource       = smc91x_resources,
+};
+
+static struct platform_device *devices[] __initdata = {
+       &smc91x_device,
+};
+
+static void __init innovator_init(void)
+{
+       if (!machine_is_innovator())
+               return;
+
+       (void) platform_add_devices(devices, ARRAY_SIZE(devices));
+}
+
+static struct map_desc innovator_io_desc[] __initdata = {
+{ OMAP1610_ETHR_BASE, OMAP1610_ETHR_START, OMAP1610_ETHR_SIZE,MT_DEVICE },
+{ OMAP1610_NOR_FLASH_BASE, OMAP1610_NOR_FLASH_START, OMAP1610_NOR_FLASH_SIZE,
+       MT_DEVICE },
+};
+
+static void __init innovator_map_io(void)
+{
+       omap_map_io();
+       iotable_init(innovator_io_desc, ARRAY_SIZE(innovator_io_desc));
+}
+
+MACHINE_START(INNOVATOR, "TI-Innovator/OMAP1610")
+       MAINTAINER("MontaVista Software, Inc.")
+       BOOT_MEM(0x10000000, 0xe0000000, 0xe0000000)
+       BOOT_PARAMS(0x10000100)
+       MAPIO(innovator_map_io)
+       INITIRQ(innovator_init_irq)
+       INIT_MACHINE(innovator_init)
+MACHINE_END
+
diff --git a/arch/arm/mach-omap/irq.h b/arch/arm/mach-omap/irq.h
new file mode 100644 (file)
index 0000000..8e1aa78
--- /dev/null
@@ -0,0 +1,172 @@
+/*
+ * linux/arch/arm/mach-omap/irq.h
+ *
+ * OMAP specific interrupt bank definitions
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Written by Tony Lindgren <tony@atomide.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You should have received a copy of the  GNU General Public License along
+ * with this program; if not, write  to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define OMAP_IRQ_TYPE710       1
+#define OMAP_IRQ_TYPE730       2
+#define OMAP_IRQ_TYPE1510      3
+#define OMAP_IRQ_TYPE1610      4
+#define OMAP_IRQ_TYPE1710      5
+
+#define MAX_NR_IRQ_BANKS       4
+
+#define BANK_NR_IRQS           32
+
+struct omap_irq_desc {
+       unsigned int    cpu_type;
+       unsigned int    start_irq;
+       unsigned long   level_map;
+       unsigned long   base_reg;
+       unsigned long   mask_reg;
+       unsigned long   ack_reg;
+       struct irqchip  *handler;
+};
+
+struct omap_irq_bank {
+       unsigned int    start_irq;
+       unsigned long   level_map;
+       unsigned long   base_reg;
+       unsigned long   mask_reg;
+       unsigned long   ack_reg;
+       struct irqchip  *handler;
+};
+
+static void omap_offset_ack_irq(unsigned int irq);
+static void omap_offset_mask_irq(unsigned int irq);
+static void omap_offset_unmask_irq(unsigned int irq);
+static void omap_offset_mask_ack_irq(unsigned int irq);
+
+/* NOTE: These will not work if irq bank offset != 0x100 */
+#define IRQ_TO_BANK(irq)       (irq >> 5)
+#define IRQ_BIT(irq)           (irq & 0x1f)
+#define BANK_OFFSET(bank)      ((bank - 1) * 0x100)
+
+static struct irqchip omap_offset_irq = {
+       .ack    =  omap_offset_mask_ack_irq,
+       .mask   =  omap_offset_mask_irq,
+       .unmask =  omap_offset_unmask_irq,
+};
+
+/*
+ * OMAP-730 interrupt banks
+ */
+static struct omap_irq_desc omap730_bank0_irqs __initdata = {
+       .cpu_type       = OMAP_IRQ_TYPE730,
+       .start_irq      = 0,
+       .level_map      = 0xb3f8e22f,
+       .base_reg       = OMAP_IH1_BASE,
+       .mask_reg       = OMAP_IH1_BASE + IRQ_MIR,
+       .ack_reg        = OMAP_IH1_BASE + IRQ_CONTROL_REG,
+       .handler        = &omap_offset_irq,     /* IH2 regs at 0x100 offsets */
+};
+
+static struct omap_irq_desc omap730_bank1_irqs __initdata = {
+       .cpu_type       = OMAP_IRQ_TYPE730,
+       .start_irq      = 32,
+       .level_map      = 0xfdb9c1f2,
+       .base_reg       = OMAP_IH2_BASE,
+       .mask_reg       = OMAP_IH2_BASE + IRQ_MIR,
+       .ack_reg        = OMAP_IH2_BASE + IRQ_CONTROL_REG,
+       .handler        = &omap_offset_irq,     /* IH2 regs at 0x100 offsets */
+};
+
+static struct omap_irq_desc omap730_bank2_irqs __initdata = {
+       .cpu_type       = OMAP_IRQ_TYPE730,
+       .start_irq      = 64,
+       .level_map      = 0x800040f3,
+       .base_reg       = OMAP_IH2_BASE + 0x100,
+       .mask_reg       = OMAP_IH2_BASE + 0x100 + IRQ_MIR,
+       .ack_reg        = OMAP_IH2_BASE + IRQ_CONTROL_REG, /* Not replicated */
+       .handler        = &omap_offset_irq,     /* IH2 regs at 0x100 offsets */
+};
+
+/*
+ * OMAP-1510 interrupt banks
+ */
+static struct omap_irq_desc omap1510_bank0_irqs __initdata = {
+       .cpu_type       = OMAP_IRQ_TYPE1510,
+       .start_irq      = 0,
+       .level_map      = 0xb3febfff,
+       .base_reg       = OMAP_IH1_BASE,
+       .mask_reg       = OMAP_IH1_BASE + IRQ_MIR,
+       .ack_reg        = OMAP_IH1_BASE + IRQ_CONTROL_REG,
+       .handler        = &omap_offset_irq,     /* IH2 regs at 0x100 offsets */
+};
+
+static struct omap_irq_desc omap1510_bank1_irqs __initdata = {
+       .cpu_type       = OMAP_IRQ_TYPE1510,
+       .start_irq      = 32,
+       .level_map      = 0xffbfffed,
+       .base_reg       = OMAP_IH2_BASE,
+       .mask_reg       = OMAP_IH2_BASE + IRQ_MIR,
+       .ack_reg        = OMAP_IH2_BASE + IRQ_CONTROL_REG,
+       .handler        = &omap_offset_irq,     /* IH2 regs at 0x100 offsets */
+};
+
+/*
+ * OMAP-1610 interrupt banks
+ */
+static struct omap_irq_desc omap1610_bank0_irqs __initdata = {
+       .cpu_type       = OMAP_IRQ_TYPE1610,
+       .start_irq      = 0,
+       .level_map      = 0xb3fefe8f,
+       .base_reg       = OMAP_IH1_BASE,
+       .mask_reg       = OMAP_IH1_BASE + IRQ_MIR,
+       .ack_reg        = OMAP_IH1_BASE + IRQ_CONTROL_REG,
+       .handler        = &omap_offset_irq,     /* IH2 regs at 0x100 offsets */
+};
+
+static struct omap_irq_desc omap1610_bank1_irqs __initdata = {
+       .cpu_type       = OMAP_IRQ_TYPE1610,
+       .start_irq      = 32,
+       .level_map      = 0xfffff7ff,
+       .base_reg       = OMAP_IH2_BASE,
+       .mask_reg       = OMAP_IH2_BASE + IRQ_MIR,
+       .ack_reg        = OMAP_IH2_BASE + IRQ_CONTROL_REG,
+       .handler        = &omap_offset_irq,     /* IH2 regs at 0x100 offsets */
+};
+
+static struct omap_irq_desc omap1610_bank2_irqs __initdata = {
+       .cpu_type       = OMAP_IRQ_TYPE1610,
+       .start_irq      = 64,
+       .level_map      = 0xffffffff,
+       .base_reg       = OMAP_IH2_BASE + 0x100,
+       .mask_reg       = OMAP_IH2_BASE + 0x100 + IRQ_MIR,
+       .ack_reg        = OMAP_IH2_BASE + IRQ_CONTROL_REG, /* Not replicated */
+       .handler        = &omap_offset_irq,     /* IH2 regs at 0x100 offsets */
+};
+
+static struct omap_irq_desc omap1610_bank3_irqs __initdata = {
+       .cpu_type       = OMAP_IRQ_TYPE1610,
+       .start_irq      = 96,
+       .level_map      = 0xffffffff,
+       .base_reg       = OMAP_IH2_BASE + 0x200,
+       .mask_reg       = OMAP_IH2_BASE + 0x200 + IRQ_MIR,
+       .ack_reg        = OMAP_IH2_BASE + IRQ_CONTROL_REG, /* Not replicated */
+       .handler        = &omap_offset_irq,     /* IH2 regs at 0x100 offsets */
+};
diff --git a/arch/arm/mach-omap/omap-generic.c b/arch/arm/mach-omap/omap-generic.c
new file mode 100644 (file)
index 0000000..982830d
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * linux/arch/arm/mach-omap/generic.c
+ *
+ * Modified from innovator.c
+ *
+ * Code for generic OMAP board. Should work on many OMAP systems where
+ * the device drivers take care of all the necessary hardware initialization.
+ * Do not put any board specific code to this file; create a new machine
+ * type if you need custom low-level initializations.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/device.h>
+
+#include <asm/hardware.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+
+#include <asm/arch/clocks.h>
+#include <asm/arch/gpio.h>
+#include <asm/arch/mux.h>
+
+#include "common.h"
+
+static void __init omap_generic_init_irq(void)
+{
+       omap_init_irq();
+}
+
+/*
+ * Muxes the serial ports on
+ */
+static void __init omap_early_serial_init(void)
+{
+       omap_cfg_reg(UART1_TX);
+       omap_cfg_reg(UART1_RTS);
+
+       omap_cfg_reg(UART2_TX);
+       omap_cfg_reg(UART2_RTS);
+
+       omap_cfg_reg(UART3_TX);
+       omap_cfg_reg(UART3_RX);
+}
+
+static void __init omap_generic_init(void)
+{
+       if (!machine_is_omap_generic())
+               return;
+
+       /*
+        * Make sure the serial ports are muxed on at this point.
+        * You have to mux them off in device drivers later on
+        * if not needed.
+        */
+       if (cpu_is_omap1510()) {
+               omap_early_serial_init();
+       }
+}
+
+static void __init omap_generic_map_io(void)
+{
+       omap_map_io();
+}
+
+MACHINE_START(OMAP_GENERIC, "Generic OMAP-1510/1610")
+       MAINTAINER("Tony Lindgren <tony@atomide.com>")
+       BOOT_MEM(0x10000000, 0xe0000000, 0xe0000000)
+       BOOT_PARAMS(0x10000100)
+       MAPIO(omap_generic_map_io)
+       INITIRQ(omap_generic_init_irq)
+       INIT_MACHINE(omap_generic_init)
+MACHINE_END
diff --git a/arch/arm/mach-omap/omap-perseus2.c b/arch/arm/mach-omap/omap-perseus2.c
new file mode 100644 (file)
index 0000000..ec05093
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * linux/arch/arm/mach-omap/omap-perseus2.c
+ *
+ * Modified from omap-generic.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/device.h>
+
+#include <asm/hardware.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+
+#include <asm/arch/clocks.h>
+#include <asm/arch/gpio.h>
+#include <asm/arch/mux.h>
+
+#include <asm/arch/omap-perseus2.h>
+
+#include "common.h"
+
+void omap_perseus2_init_irq(void)
+{
+       omap_init_irq();
+}
+
+static struct resource smc91x_resources[] = {
+       [0] = {
+               .start  = OMAP730_FPGA_ETHR_START,      /* Physical */
+               .end    = OMAP730_FPGA_ETHR_START + SZ_4K,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = 0,
+               .end    = 0,
+               .flags  = INT_ETHER,
+       },
+};
+
+static struct platform_device smc91x_device = {
+       .name           = "smc91x",
+       .id             = 0,
+       .num_resources  = ARRAY_SIZE(smc91x_resources),
+       .resource       = smc91x_resources,
+};
+
+static struct platform_device *devices[] __initdata = {
+       &smc91x_device,
+};
+
+static void __init omap_perseus2_init(void)
+{
+       if (!machine_is_omap_perseus2())
+               return;
+
+       (void) platform_add_devices(devices, ARRAY_SIZE(devices));
+}
+
+/* Only FPGA needs to be mapped here. All others are done with ioremap */
+static struct map_desc omap_perseus2_io_desc[] __initdata = {
+       {OMAP730_FPGA_BASE, OMAP730_FPGA_START, OMAP730_FPGA_SIZE,
+        MT_DEVICE},
+};
+
+static void __init omap_perseus2_map_io(void)
+{
+       omap_map_io();
+       iotable_init(omap_perseus2_io_desc,
+                    ARRAY_SIZE(omap_perseus2_io_desc));
+
+       /* Early, board-dependent init */
+
+       /*
+        * Hold GSM Reset until needed
+        */
+       *DSP_M_CTL &= ~1;
+
+       /*
+        * UARTs -> done automagically by 8250 driver
+        */
+
+       /*
+        * CSx timings, GPIO Mux ... setup
+        */
+
+       /* Flash: CS0 timings setup */
+       *((volatile __u32 *) OMAP_FLASH_CFG_0) = 0x0000fff3;
+       *((volatile __u32 *) OMAP_FLASH_ACFG_0) = 0x00000088;
+
+       /*
+        * Ethernet support trough the debug board
+        * CS1 timings setup
+        */
+       *((volatile __u32 *) OMAP_FLASH_CFG_1) = 0x0000fff3;
+       *((volatile __u32 *) OMAP_FLASH_ACFG_1) = 0x00000000;
+
+       /*
+        * Configure MPU_EXT_NIRQ IO in IO_CONF9 register,
+        * It is used as the Ethernet controller interrupt
+        */
+       *((volatile __u32 *) PERSEUS2_IO_CONF_9) &= 0x1FFFFFFF;
+}
+
+MACHINE_START(OMAP_PERSEUS2, "OMAP730 Perseus2")
+       MAINTAINER("Kevin Hilman <k-hilman@ti.com>")
+       BOOT_MEM(0x10000000, 0xe0000000, 0xe0000000)
+       BOOT_PARAMS(0x10000100)
+       MAPIO(omap_perseus2_map_io)
+       INITIRQ(omap_perseus2_init_irq)
+       INIT_MACHINE(omap_perseus2_init)
+MACHINE_END
diff --git a/arch/i386/mach-es7000/es7000.c b/arch/i386/mach-es7000/es7000.c
new file mode 100644 (file)
index 0000000..defe41e
--- /dev/null
@@ -0,0 +1,279 @@
+/*
+ * Written by: Garry Forsgren, Unisys Corporation
+ *             Natalie Protasevich, Unisys Corporation
+ * This file contains the code to configure and interface 
+ * with Unisys ES7000 series hardware system manager.
+ *
+ * Copyright (c) 2003 Unisys Corporation.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Unisys Corporation, Township Line & Union Meeting 
+ * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
+ *
+ * http://www.unisys.com
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <asm/io.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include <asm/apicdef.h>
+#include "es7000.h"
+
+/*
+ * ES7000 Globals
+ */
+
+volatile unsigned long *psai = NULL;
+struct mip_reg         *mip_reg;  
+struct mip_reg         *host_reg;
+int                    mip_port;
+unsigned long          mip_addr, host_addr;
+
+/*
+ * Parse the OEM Table
+ */
+
+void __init
+parse_unisys_oem (char *oemptr, int oem_entries)
+{
+       int                     i;
+       int                     success = 0;
+       unsigned char           type, size;
+       unsigned long           val;
+       char                    *tp = NULL;  
+       struct psai             *psaip = NULL;
+       struct mip_reg_info     *mi;
+       struct mip_reg          *host, *mip;
+
+       tp = oemptr;
+
+       tp += 8;
+
+       for (i=0; i <= oem_entries; i++) {
+               type = *tp++;
+               size = *tp++;
+               tp -= 2;
+               switch (type) {
+               case MIP_REG:
+                       mi = (struct mip_reg_info *)tp;
+                       val = MIP_RD_LO(mi->host_reg);
+                       host_addr = val;
+                       host = (struct mip_reg *)val;
+                       host_reg = __va(host);
+                       val = MIP_RD_LO(mi->mip_reg);
+                       mip_addr = val;
+                       mip = (struct mip_reg *)val;
+                       mip_reg = __va(mip);
+                       Dprintk("es7000_mipcfg: host_reg = 0x%lx \n", 
+                               (unsigned long)host_reg);
+                       Dprintk("es7000_mipcfg: mip_reg = 0x%lx \n", 
+                               (unsigned long)mip_reg);
+                       success++;
+                       break;
+               case MIP_PSAI_REG:
+                       psaip = (struct psai *)tp;
+                       if (tp != NULL) {
+                               if (psaip->addr)
+                                       psai = __va(psaip->addr);
+                               else
+                                       psai = NULL;
+                               success++;
+                       }
+                       break;
+               default:
+                       break;
+               }
+               if (i == 6) break;
+               tp += size;
+       }
+
+       if (success < 2) {
+               printk("\nNo ES7000 found.\n");
+               es7000_plat = 0;
+       } else {
+               printk("\nEnabling ES7000 specific features...\n");
+               es7000_plat = 1;
+       }
+       return;
+}
+
+int __init 
+find_unisys_acpi_oem_table(unsigned long *oem_addr, int *length) 
+{
+       struct acpi_table_rsdp          *rsdp = NULL;
+       unsigned long                   rsdp_phys = 0;
+       struct acpi_table_header        *header = NULL;
+       int                             i;
+       struct acpi_table_sdt           sdt;
+
+       rsdp_phys = acpi_find_rsdp();
+       rsdp = __va(rsdp_phys);
+       if (rsdp->rsdt_address) {
+               struct acpi_table_rsdt  *mapped_rsdt = NULL;
+               sdt.pa = rsdp->rsdt_address;
+
+               header = (struct acpi_table_header *)
+                       __acpi_map_table(sdt.pa, sizeof(struct acpi_table_header));
+               if (!header)
+                       return -ENODEV;
+
+               sdt.count = (header->length - sizeof(struct acpi_table_header)) >> 3;
+               mapped_rsdt = (struct acpi_table_rsdt *)
+                       __acpi_map_table(sdt.pa, header->length);
+               if (!mapped_rsdt)
+                       return -ENODEV;
+
+               header = &mapped_rsdt->header;
+
+               for (i = 0; i < sdt.count; i++)
+                       sdt.entry[i].pa = (unsigned long) mapped_rsdt->entry[i];
+       };
+       for (i = 0; i < sdt.count; i++) {
+
+               header = (struct acpi_table_header *)
+                       __acpi_map_table(sdt.entry[i].pa,
+                               sizeof(struct acpi_table_header));
+               if (!header)
+                       continue;
+               if (!strncmp((char *) &header->signature, "OEM1", 4)) {
+                       if (!strncmp((char *) &header->oem_id, "UNISYS", 6)) {
+                               void *addr;
+                               struct oem_table *t;
+                               acpi_table_print(header, sdt.entry[i].pa);
+                               t = (struct oem_table *) __acpi_map_table(sdt.entry[i].pa, header->length);
+                               addr = (void *) __acpi_map_table(t->OEMTableAddr, t->OEMTableSize);
+                               *length = header->length;
+                               *oem_addr = (unsigned long) addr;
+                               return 0;
+                       }
+               }
+       }
+       printk("ES7000: did not find Unisys ACPI OEM table!\n");
+       return -1;
+}
+
+static void
+es7000_spin(int n)
+{
+       int i = 0;
+
+       while (i++ < n) 
+               rep_nop();
+}
+
+static int __init
+es7000_mip_write(struct mip_reg *mip_reg)
+{
+       int                     status = 0;
+       int                     spin;
+
+       spin = MIP_SPIN;
+       while (((unsigned long long)host_reg->off_38 &
+               (unsigned long long)MIP_VALID) != 0) {
+                       if (--spin <= 0) {
+                               printk("es7000_mip_write: Timeout waiting for Host Valid Flag");
+                               return -1;
+                       }
+               es7000_spin(MIP_SPIN);
+       }
+
+       memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
+       outb(1, mip_port);
+
+       spin = MIP_SPIN;
+
+       while (((unsigned long long)mip_reg->off_38 &
+               (unsigned long long)MIP_VALID) == 0) {
+               if (--spin <= 0) {
+                       printk("es7000_mip_write: Timeout waiting for MIP Valid Flag");
+                       return -1;
+               }
+               es7000_spin(MIP_SPIN);
+       }
+
+       status = ((unsigned long long)mip_reg->off_0 &
+               (unsigned long long)0xffff0000000000) >> 48;
+       mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 &
+               (unsigned long long)~MIP_VALID);
+       return status;
+}
+
+int 
+es7000_start_cpu(int cpu, unsigned long eip)
+{
+       unsigned long vect = 0, psaival = 0;
+
+       if (psai == NULL)
+               return -1;
+
+       vect = ((unsigned long)__pa(eip)/0x1000) << 16;
+       psaival = (0x1000000 | vect | cpu);
+
+       while (*psai & 0x1000000)
+                ;
+
+       *psai = psaival;
+
+       return 0;
+
+}
+
+int 
+es7000_stop_cpu(int cpu)
+{
+       int startup;
+
+       if (psai == NULL)
+               return -1;
+
+       startup= (0x1000000 | cpu);
+
+       while ((*psai & 0xff00ffff) != startup)
+               ;
+
+       startup = (*psai & 0xff0000) >> 16;
+       *psai &= 0xffffff;
+
+       return 0;
+
+}
+
+void __init
+es7000_sw_apic()
+{
+       if (es7000_plat) {
+               int mip_status;
+               struct mip_reg es7000_mip_reg;
+
+               printk("ES7000: Enabling APIC mode.\n");
+               memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
+               es7000_mip_reg.off_0 = MIP_SW_APIC;
+               es7000_mip_reg.off_38 = (MIP_VALID);
+               while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
+                       printk("es7000_sw_apic: command failed, status = %x\n", 
+                               mip_status);
+               return;
+       }
+}
diff --git a/arch/i386/mach-es7000/setup.c b/arch/i386/mach-es7000/setup.c
new file mode 100644 (file)
index 0000000..4caed0e
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ *     Machine specific setup for es7000
+ */
+
+#include <linux/config.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <asm/acpi.h>
+#include <asm/arch_hooks.h>
+
+/**
+ * pre_intr_init_hook - initialisation prior to setting up interrupt vectors
+ *
+ * Description:
+ *     Perform any necessary interrupt initialisation prior to setting up
+ *     the "ordinary" interrupt call gates.  For legacy reasons, the ISA
+ *     interrupts should be initialised here if the machine emulates a PC
+ *     in any way.
+ **/void __init pre_intr_init_hook(void)
+{
+       init_ISA_irqs();
+}
+
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
+static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL};
+
+/**
+ * intr_init_hook - post gate setup interrupt initialisation
+ *
+ * Description:
+ *     Fill in any interrupts that may have been left out by the general
+ *     init_IRQ() routine.  interrupts having to do with the machine rather
+ *     than the devices on the I/O bus (like APIC interrupts in intel MP
+ *     systems) are started here.
+ **/
+void __init intr_init_hook(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+       apic_intr_init();
+#endif
+
+       if (!acpi_ioapic)
+               setup_irq(2, &irq2);
+}
+
+/**
+ * pre_setup_arch_hook - hook called prior to any setup_arch() execution
+ *
+ * Description:
+ *     generally used to activate any machine specific identification
+ *     routines that may be needed before setup_arch() runs.  On VISWS
+ *     this is used to get the board revision and type.
+ **/
+void __init pre_setup_arch_hook(void)
+{
+}
+
+/**
+ * trap_init_hook - initialise system specific traps
+ *
+ * Description:
+ *     Called as the final act of trap_init().  Used in VISWS to initialise
+ *     the various board specific APIC traps.
+ **/
+void __init trap_init_hook(void)
+{
+}
+
+static struct irqaction irq0  = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL};
+
+/**
+ * time_init_hook - do any specific initialisations for the system timer.
+ *
+ * Description:
+ *     Must plug the system timer interrupt source at HZ into the IRQ listed
+ *     in irq_vectors.h:TIMER_IRQ
+ **/
+void __init time_init_hook(void)
+{
+       setup_irq(0, &irq0);
+}
+
+#ifdef CONFIG_MCA
+/**
+ * mca_nmi_hook - hook into MCA specific NMI chain
+ *
+ * Description:
+ *     The MCA (Microchannel Arcitecture) has an NMI chain for NMI sources
+ *     along the MCA bus.  Use this to hook into that chain if you will need
+ *     it.
+ **/
+void __init mca_nmi_hook(void)
+{
+       /* If I recall correctly, there's a whole bunch of other things that
+        * we can do to check for NMI problems, but that's all I know about
+        * at the moment.
+        */
+
+       printk("NMI generated from unknown source!\n");
+}
+
+#endif
diff --git a/arch/i386/mach-es7000/topology.c b/arch/i386/mach-es7000/topology.c
new file mode 100644 (file)
index 0000000..e96d891
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * arch/i386/mach-generic/topology.c - Populate driverfs with topology information
+ *
+ * Written by: Matthew Dobson, IBM Corporation
+ * Original Code: Paul Dorwin, IBM Corporation, Patrick Mochel, OSDL
+ *
+ * Copyright (C) 2002, IBM Corp.
+ *
+ * All rights reserved.          
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <colpatch@us.ibm.com>
+ */
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <asm/cpu.h>
+
+struct i386_cpu cpu_devices[NR_CPUS];
+
+#ifdef CONFIG_NUMA
+#include <linux/mmzone.h>
+#include <asm/node.h>
+
+struct i386_node node_devices[MAX_NUMNODES];
+
+static int __init topology_init(void)
+{
+       int i;
+
+       for (i = 0; i < num_online_nodes(); i++)
+               arch_register_node(i);
+       for (i = 0; i < NR_CPUS; i++)
+               if (cpu_possible(i)) arch_register_cpu(i);
+       return 0;
+}
+
+#else /* !CONFIG_NUMA */
+
+static int __init topology_init(void)
+{
+       int i;
+
+       for (i = 0; i < NR_CPUS; i++)
+               if (cpu_possible(i)) arch_register_cpu(i);
+       return 0;
+}
+
+#endif /* CONFIG_NUMA */
+
+subsys_initcall(topology_init);
diff --git a/arch/ia64/kernel/perfmon_hpsim.h b/arch/ia64/kernel/perfmon_hpsim.h
new file mode 100644 (file)
index 0000000..9c6fe7f
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * This file contains the HP SKI Simulator PMU register description tables
+ * and pmc checkers used by perfmon.c.
+ *
+ * Copyright (C) 2002-2003  Hewlett Packard Co
+ *               Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * File mostly contributed by Ian Wienand <ianw@gelato.unsw.edu.au>
+ *
+ * This file is included as a dummy template so the kernel does not
+ * try to initalize registers the simulator can't handle.
+ *
+ * Note the simulator does not (currently) implement these registers, i.e.,
+ * they do not count anything. But you can read/write them.
+ */
+
+#define RDEP(x)        (1UL<<(x))
+
+#ifndef CONFIG_IA64_HP_SIM
+#error "This file should only be included for the HP Simulator"
+#endif
+
+static pfm_reg_desc_t pfm_hpsim_pmc_desc[PMU_MAX_PMCS]={
+/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL, 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL, 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL, 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL, 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc8  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc9  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc10 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(10), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc11 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(11), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc12 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(12), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc13 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(13), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc14 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(14), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmc15 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(15), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+           { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+static pfm_reg_desc_t pfm_hpsim_pmd_desc[PMU_MAX_PMDS]={
+/* pmd0  */ { PFM_REG_BUFFER, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmd1  */ { PFM_REG_BUFFER, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmd2  */ { PFM_REG_BUFFER, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmd3  */ { PFM_REG_BUFFER, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
+/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
+/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
+/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
+/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
+/* pmd8  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(8),0UL, 0UL, 0UL}},
+/* pmd9  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(9),0UL, 0UL, 0UL}},
+/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
+/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
+/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
+/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(13),0UL, 0UL, 0UL}},
+/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(14),0UL, 0UL, 0UL}},
+/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(15),0UL, 0UL, 0UL}},
+           { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
+};
+
+/*
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
+ */
+static pmu_config_t pmu_conf={
+       .pmu_name   = "hpsim",
+       .pmu_family = 0x7, /* ski emulator reports as Itanium */
+       .enabled    = 0,
+       .ovfl_val   = (1UL << 32) - 1,
+       .num_ibrs   = 0, /* does not use */
+       .num_dbrs   = 0, /* does not use */
+       .pmd_desc   = pfm_hpsim_pmd_desc,
+       .pmc_desc   = pfm_hpsim_pmc_desc
+};
diff --git a/arch/ppc/mm/cachemap.c b/arch/ppc/mm/cachemap.c
new file mode 100644 (file)
index 0000000..2033eec
--- /dev/null
@@ -0,0 +1,174 @@
+/*
+ *  PowerPC version derived from arch/arm/mm/consistent.c
+ *    Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
+ *
+ *  arch/ppc/mm/cachemap.c
+ *
+ *  Copyright (C) 2000 Russell King
+ *
+ * Consistent memory allocators.  Used for DMA devices that want to
+ * share uncached memory with the processor core.  The function return
+ * is the virtual address and 'dma_handle' is the physical address.
+ * Mostly stolen from the ARM port, with some changes for PowerPC.
+ *                                             -- Dan
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/hardirq.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+
+int map_page(unsigned long va, phys_addr_t pa, int flags);
+
+/* This function will allocate the requested contiguous pages and
+ * map them into the kernel's vmalloc() space.  This is done so we
+ * get unique mapping for these pages, outside of the kernel's 1:1
+ * virtual:physical mapping.  This is necessary so we can cover large
+ * portions of the kernel with single large page TLB entries, and
+ * still get unique uncached pages for consistent DMA.
+ */
+void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle)
+{
+       int order, err;
+       struct page *page, *free, *end;
+       phys_addr_t pa;
+       unsigned long flags, offset;
+       struct vm_struct *area = NULL;
+       unsigned long va = 0;
+
+       BUG_ON(in_interrupt());
+
+       /* Only allocate page size areas */
+       size = PAGE_ALIGN(size);
+       order = get_order(size);
+
+       free = page = alloc_pages(gfp, order);
+       if (! page)
+               return NULL;
+
+       pa = page_to_phys(page);
+       *dma_handle = page_to_bus(page);
+       end = page + (1 << order);
+
+       /*
+        * we need to ensure that there are no cachelines in use,
+        * or worse dirty in this area.
+        */
+       invalidate_dcache_range((unsigned long)page_address(page),
+                               (unsigned long)page_address(page) + size);
+
+       /*
+        * alloc_pages() expects the block to be handled as a unit, so
+        * it only sets the page count on the first page.  We set the
+        * counts on each page so they can be freed individually
+        */
+       for (; page < end; page++)
+               set_page_count(page, 1);
+
+
+       /* Allocate some common virtual space to map the new pages*/
+       area = get_vm_area(size, VM_ALLOC);
+       if (! area)
+               goto out;
+
+       va = (unsigned long) area->addr;
+
+       flags = _PAGE_KERNEL | _PAGE_NO_CACHE;
+       
+       for (offset = 0; offset < size; offset += PAGE_SIZE) {
+               err = map_page(va+offset, pa+offset, flags);
+               if (err) {
+                       vfree((void *)va);
+                       va = 0;
+                       goto out;
+               }
+
+               free++;
+       }
+
+ out:
+       /* Free pages which weren't mapped */
+       for (; free < end; free++) {
+               __free_page(free);
+       }
+
+       return (void *)va;
+}
+
+/*
+ * free page(s) as defined by the above mapping.
+ */
+void consistent_free(void *vaddr)
+{
+       BUG_ON(in_interrupt());
+       vfree(vaddr);
+}
+
+/*
+ * make an area consistent.
+ */
+void consistent_sync(void *vaddr, size_t size, int direction)
+{
+       unsigned long start = (unsigned long)vaddr;
+       unsigned long end   = start + size;
+
+       switch (direction) {
+       case DMA_NONE:
+               BUG();
+       case DMA_FROM_DEVICE:   /* invalidate only */
+               invalidate_dcache_range(start, end);
+               break;
+       case DMA_TO_DEVICE:             /* writeback only */
+               clean_dcache_range(start, end);
+               break;
+       case DMA_BIDIRECTIONAL: /* writeback and invalidate */
+               flush_dcache_range(start, end);
+               break;
+       }
+}
+
+/*
+ * consistent_sync_page make a page are consistent. identical
+ * to consistent_sync, but takes a struct page instead of a virtual address
+ */
+
+void consistent_sync_page(struct page *page, unsigned long offset,
+       size_t size, int direction)
+{
+       unsigned long start;
+
+       start = (unsigned long)page_address(page) + offset;
+       consistent_sync((void *)start, size, direction);
+}
+
+EXPORT_SYMBOL(consistent_sync_page);
diff --git a/arch/ppc/ocp/Makefile b/arch/ppc/ocp/Makefile
new file mode 100644 (file)
index 0000000..f669ee0
--- /dev/null
@@ -0,0 +1,6 @@
+#
+# Makefile for the linux kernel.
+#
+
+obj-y          := ocp.o ocp-driver.o ocp-probe.o
+
diff --git a/arch/ppc/ocp/ocp-driver.c b/arch/ppc/ocp/ocp-driver.c
new file mode 100644 (file)
index 0000000..9f6bb3f
--- /dev/null
@@ -0,0 +1,195 @@
+/*
+ * FILE NAME: ocp-driver.c
+ *
+ * BRIEF MODULE DESCRIPTION:
+ * driver callback, id matching and registration
+ * Based on drivers/pci/pci-driver, Copyright (c) 1997--1999 Martin Mares
+ *
+ * Maintained by: Armin <akuster@mvista.com>
+ *
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/ocp.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+/*
+ *  Registration of OCP drivers and handling of hot-pluggable devices.
+ */
+
+static int
+ocp_device_probe(struct device *dev)
+{
+       int error = 0;
+       struct ocp_driver *drv;
+       struct ocp_device *ocp_dev;
+
+       drv = to_ocp_driver(dev->driver);
+       ocp_dev = to_ocp_dev(dev);
+
+       if (drv->probe) {
+               error = drv->probe(ocp_dev);
+               DBG("probe return code %d\n", error);
+               if (error >= 0) {
+                       ocp_dev->driver = drv;
+                       error = 0;
+               }
+       }
+       return error;
+}
+
+static int
+ocp_device_remove(struct device *dev)
+{
+       struct ocp_device *ocp_dev = to_ocp_dev(dev);
+
+       if (ocp_dev->driver) {
+               if (ocp_dev->driver->remove)
+                       ocp_dev->driver->remove(ocp_dev);
+               ocp_dev->driver = NULL;
+       }
+       return 0;
+}
+
+static int
+ocp_device_suspend(struct device *dev, u32 state, u32 level)
+{
+       struct ocp_device *ocp_dev = to_ocp_dev(dev);
+
+       int error = 0;
+
+       if (ocp_dev->driver) {
+               if (level == SUSPEND_SAVE_STATE && ocp_dev->driver->save_state)
+                       error = ocp_dev->driver->save_state(ocp_dev, state);
+               else if (level == SUSPEND_POWER_DOWN
+                        && ocp_dev->driver->suspend)
+                       error = ocp_dev->driver->suspend(ocp_dev, state);
+       }
+       return error;
+}
+
+static int
+ocp_device_resume(struct device *dev, u32 level)
+{
+       struct ocp_device *ocp_dev = to_ocp_dev(dev);
+
+       if (ocp_dev->driver) {
+               if (level == RESUME_POWER_ON && ocp_dev->driver->resume)
+                       ocp_dev->driver->resume(ocp_dev);
+       }
+       return 0;
+}
+
+/**
+ * ocp_bus_match - Works out whether an OCP device matches any
+ * of the IDs listed for a given OCP driver.
+ * @dev: the generic device struct for the OCP device
+ * @drv: the generic driver struct for the OCP driver
+ *
+ * Used by a driver to check whether a OCP device present in the
+ * system is in its list of supported devices.  Returns 1 for a
+ * match, or 0 if there is no match.
+ */
+static int
+ocp_bus_match(struct device *dev, struct device_driver *drv)
+{
+       struct ocp_device *ocp_dev = to_ocp_dev(dev);
+       struct ocp_driver *ocp_drv = to_ocp_driver(drv);
+       const struct ocp_device_id *ids = ocp_drv->id_table;
+
+       if (!ids)
+               return 0;
+
+       while (ids->vendor || ids->device) {
+               if ((ids->vendor == OCP_ANY_ID
+                    || ids->vendor == ocp_dev->vendor)
+                   && (ids->device == OCP_ANY_ID
+                       || ids->device == ocp_dev->device)) {
+                       DBG("Bus match -vendor:%x device:%x\n", ids->vendor,
+                           ids->device);
+                       return 1;
+               }
+               ids++;
+       }
+       return 0;
+}
+
+struct bus_type ocp_bus_type = {
+       .name = "ocp",
+       .match = ocp_bus_match,
+};
+
+static int __init
+ocp_driver_init(void)
+{
+       return bus_register(&ocp_bus_type);
+}
+
+postcore_initcall(ocp_driver_init);
+
+/**
+ * ocp_register_driver - register a new ocp driver
+ * @drv: the driver structure to register
+ *
+ * Adds the driver structure to the list of registered drivers
+ * Returns the number of ocp devices which were claimed by the driver
+ * during registration.  The driver remains registered even if the
+ * return value is zero.
+ */
+int
+ocp_register_driver(struct ocp_driver *drv)
+{
+       int count = 0;
+
+       /* initialize common driver fields */
+       drv->driver.name = drv->name;
+       drv->driver.bus = &ocp_bus_type;
+       drv->driver.probe = ocp_device_probe;
+       drv->driver.resume = ocp_device_resume;
+       drv->driver.suspend = ocp_device_suspend;
+       drv->driver.remove = ocp_device_remove;
+
+       /* register with core */
+       count = driver_register(&drv->driver);
+       return count ? count : 1;
+}
+
+/**
+ * ocp_unregister_driver - unregister a ocp driver
+ * @drv: the driver structure to unregister
+ *
+ * Deletes the driver structure from the list of registered OCP drivers,
+ * gives it a chance to clean up by calling its remove() function for
+ * each device it was responsible for, and marks those devices as
+ * driverless.
+ */
+
+void
+ocp_unregister_driver(struct ocp_driver *drv)
+{
+       driver_unregister(&drv->driver);
+}
+
+EXPORT_SYMBOL(ocp_register_driver);
+EXPORT_SYMBOL(ocp_unregister_driver);
+EXPORT_SYMBOL(ocp_bus_type);
diff --git a/arch/ppc/ocp/ocp-probe.c b/arch/ppc/ocp/ocp-probe.c
new file mode 100644 (file)
index 0000000..bb4aff7
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * FILE NAME: ocp-probe.c
+ *
+ * BRIEF MODULE DESCRIPTION:
+ * Device scanning & bus set routines
+ * Based on drivers/pci/probe, Copyright (c) 1997--1999 Martin Mares
+ *
+ * Maintained by: Armin <akuster@mvista.com>
+ *
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <asm/ocp.h>
+
+LIST_HEAD(ocp_devices);
+struct device *ocp_bus;
+
+static struct ocp_device * __devinit
+ocp_setup_dev(struct ocp_def *odef, unsigned int index)
+{
+       struct ocp_device *dev;
+
+       dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+       if (!dev)
+               return NULL;
+       memset(dev, 0, sizeof(*dev));
+
+       dev->vendor = odef->vendor;
+       dev->device = odef->device;
+       dev->num = ocp_get_num(dev->device);
+       dev->paddr = odef->paddr;
+       dev->irq = odef->irq;
+       dev->pm = odef->pm;
+       dev->current_state = 4;
+
+       sprintf(dev->name, "OCP device %04x:%04x", dev->vendor, dev->device);
+
+       DBG("%s %s 0x%lx irq:%d pm:0x%lx \n", dev->slot_name, dev->name,
+           (unsigned long) dev->paddr, dev->irq, dev->pm);
+
+       /* now put in global tree */
+       sprintf(dev->dev.bus_id, "%d", index);
+       dev->dev.parent = ocp_bus;
+       dev->dev.bus = &ocp_bus_type;
+       device_register(&dev->dev);
+
+       return dev;
+}
+
+static struct device * __devinit ocp_alloc_primary_bus(void)
+{
+       struct device *b;
+
+       b = kmalloc(sizeof(struct device), GFP_KERNEL);
+       if (b == NULL)
+               return NULL;
+       memset(b, 0, sizeof(struct device));
+       strcpy(b->bus_id, "ocp");
+
+       device_register(b);
+
+       return b;
+}
+
+void __devinit ocp_setup_devices(struct ocp_def *odef)
+{
+       int index;
+       struct ocp_device *dev;
+
+       if (ocp_bus == NULL)
+               ocp_bus = ocp_alloc_primary_bus();
+       for (index = 0; odef->vendor != OCP_VENDOR_INVALID; ++index, ++odef) {
+               dev = ocp_setup_dev(odef, index);
+               if (dev != NULL)
+                       list_add_tail(&dev->global_list, &ocp_devices);
+       }
+}
+
+extern struct ocp_def core_ocp[];
+
+static int __init
+ocparch_init(void)
+{
+       ocp_setup_devices(core_ocp);
+       return 0;
+}
+
+subsys_initcall(ocparch_init);
+
+EXPORT_SYMBOL(ocp_devices);
diff --git a/arch/ppc/ocp/ocp.c b/arch/ppc/ocp/ocp.c
new file mode 100644 (file)
index 0000000..8df60d7
--- /dev/null
@@ -0,0 +1,109 @@
+/*
+ * ocp.c
+ *
+ *     The is drived from pci.c
+ *
+ *     Current Maintainer
+ *      Armin Kuster akuster@dslextreme.com
+ *      Jan, 2002
+ *
+ *
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR   IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT,  INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <asm/io.h>
+#include <asm/ocp.h>
+#include <asm/errno.h>
+
+/**
+ * ocp_get_num - This determines how many OCP devices of a given
+ * device are registered
+ * @device: OCP device such as HOST, PCI, GPT, UART, OPB, IIC, GPIO, EMAC, ZMII,
+ *
+ * The routine returns the number that devices which is registered
+ */
+unsigned int ocp_get_num(unsigned int device)
+{
+       unsigned int count = 0;
+       struct ocp_device *ocp;
+       struct list_head *ocp_l;
+
+       list_for_each(ocp_l, &ocp_devices) {
+               ocp = list_entry(ocp_l, struct ocp_device, global_list);
+               if (device == ocp->device)
+                       count++;
+       }
+       return count;
+}
+
+/**
+ * ocp_get_dev - get ocp driver pointer for ocp device and instance of it
+ * @device: OCP device such as PCI, GPT, UART, OPB, IIC, GPIO, EMAC, ZMII
+ * @dev_num: ocp device number whos paddr you want
+ *
+ * The routine returns ocp device pointer
+ * in list based on device and instance of that device
+ *
+ */
+struct ocp_device *
+ocp_get_dev(unsigned int device, int dev_num)
+{
+       struct ocp_device *ocp;
+       struct list_head *ocp_l;
+       int count = 0;
+
+       list_for_each(ocp_l, &ocp_devices) {
+               ocp = list_entry(ocp_l, struct ocp_device, global_list);
+               if (device == ocp->device) {
+                       if (dev_num == count)
+                               return ocp;
+                       count++;
+               }
+       }
+       return NULL;
+}
+
+EXPORT_SYMBOL(ocp_get_dev);
+EXPORT_SYMBOL(ocp_get_num);
+
+#ifdef CONFIG_PM
+int ocp_generic_suspend(struct ocp_device *pdev, u32 state)
+{
+       ocp_force_power_off(pdev);
+       return 0;
+}
+
+int ocp_generic_resume(struct ocp_device *pdev)
+{
+       ocp_force_power_on(pdev);
+}
+
+EXPORT_SYMBOL(ocp_generic_suspend);
+EXPORT_SYMBOL(ocp_generic_resume);
+#endif /* CONFIG_PM */
diff --git a/drivers/char/dz.c b/drivers/char/dz.c
new file mode 100644 (file)
index 0000000..2363003
--- /dev/null
@@ -0,0 +1,1540 @@
+/*
+ * dz.c: Serial port driver for DECStations equiped 
+ *       with the DZ chipset.
+ *
+ * Copyright (C) 1998 Olivier A. D. Lebaillif 
+ *             
+ * Email: olivier.lebaillif@ifrsys.com
+ *
+ * [31-AUG-98] triemer
+ * Changed IRQ to use Harald's dec internals interrupts.h
+ * removed base_addr code - moving address assignment to setup.c
+ * Changed name of dz_init to rs_init to be consistent with tc code
+ * [13-NOV-98] triemer fixed code to receive characters
+ *    after patches by harald to irq code.  
+ * [09-JAN-99] triemer minor fix for schedule - due to removal of timeout
+ *            field from "current" - somewhere between 2.1.121 and 2.1.131
+Qua Jun 27 15:02:26 BRT 2001
+ * [27-JUN-2001] Arnaldo Carvalho de Melo <acme@conectiva.com.br> - cleanups
+ *  
+ * Parts (C) 1999 David Airlie, airlied@linux.ie 
+ * [07-SEP-99] Bugfixes 
+ */
+
+/* #define DEBUG_DZ 1 */
+
+#include <linux/module.h>
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h> 
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/major.h>
+#include <linux/param.h>
+#include <linux/interrupt.h>
+#include <linux/serial.h>
+#include <linux/serialP.h>
+#include <asm-mips/wbflush.h>
+#include <asm/dec/interrupts.h>                        /* for definition of SERIAL */
+
+/* for definition of struct console */
+#ifdef CONFIG_SERIAL_CONSOLE
+#define CONSOLE_LINE (3)
+#endif /* ifdef CONFIG_SERIAL_CONSOLE */
+#if defined(CONFIG_SERIAL_CONSOLE) || defined(DEBUG_DZ)
+#include <linux/console.h>
+#endif /* if defined(CONFIG_SERIAL_CONSOLE) || defined(DEBUG_DZ) */
+
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+
+#include <asm/uaccess.h>
+#include <asm/irq.h>
+#include <asm/dec/machtype.h>
+#include <asm/dec/kn01.h>
+#include <asm/dec/kn02.h>
+
+#ifdef DEBUG_DZ
+#include <linux/ptrace.h>
+#include <linux/fs.h>
+#include <asm/bootinfo.h>
+
+extern int (*prom_printf) (char *,...);
+#endif
+
+
+
+#include "dz.h"
+
+#define DZ_INTR_DEBUG 1
+
+DECLARE_TASK_QUEUE(tq_serial);
+
+static struct dz_serial *lines[4];
+static unsigned char tmp_buffer[256];
+
+
+
+#ifdef DEBUG_DZ
+/*
+ * debugging code to send out chars via prom 
+ */
+static void debug_console( const char *s,int count)
+{
+       unsigned i;
+
+       for (i = 0; i < count; i++) {
+               if (*s == 10)
+                       prom_printf("%c", 13);
+               prom_printf("%c", *s++);
+       }
+}
+#endif
+
+/*
+ * ------------------------------------------------------------
+ * dz_in () and dz_out ()
+ *
+ * These routines are used to access the registers of the DZ 
+ * chip, hiding relocation differences between implementation.
+ * ------------------------------------------------------------
+ */
+
+static inline unsigned short dz_in (struct dz_serial *info, unsigned offset)
+{
+       volatile u16 *addr = (volatile u16 *)(info->port + offset);
+
+       return *addr;
+}
+
+static inline void dz_out (struct dz_serial *info, unsigned offset,
+                           unsigned short value)
+{
+       volatile u16 *addr = (volatile u16 *)(info->port + offset);
+       *addr = value;
+}
+
+/*
+ * ------------------------------------------------------------
+ * rs_stop () and rs_start ()
+ *
+ * These routines are called before setting or resetting 
+ * tty->stopped. They enable or disable transmitter interrupts, 
+ * as necessary.
+ * ------------------------------------------------------------
+ */
+
+static void dz_stop (struct tty_struct *tty)
+{
+       struct dz_serial *info; 
+       unsigned short mask, tmp;
+
+       if (!tty) 
+               return; 
+       info = (struct dz_serial *)tty->driver_data; 
+
+       mask = 1 << info->line;
+       tmp = dz_in (info, DZ_TCR);       /* read the TX flag */
+
+       tmp &= ~mask;                   /* clear the TX flag */
+       dz_out (info, DZ_TCR, tmp);
+}
+
+static void dz_start (struct tty_struct *tty)
+{
+       struct dz_serial *info = (struct dz_serial *)tty->driver_data;
+       unsigned short mask, tmp;
+
+       mask = 1 << info->line;
+       tmp = dz_in (info, DZ_TCR);      /* read the TX flag */
+
+       tmp |= mask;                   /* set the TX flag */
+       dz_out (info, DZ_TCR, tmp);
+}
+
+/*
+ * ------------------------------------------------------------
+ * Here starts the interrupt handling routines.  All of the 
+ * following subroutines are declared as inline and are folded 
+ * into dz_interrupt.  They were separated out for readability's 
+ * sake. 
+ *
+ * Note: rs_interrupt() is a "fast" interrupt, which means that it
+ * runs with interrupts turned off.  People who may want to modify
+ * rs_interrupt() should try to keep the interrupt handler as fast as
+ * possible.  After you are done making modifications, it is not a bad
+ * idea to do:
+ * 
+ * gcc -S -DKERNEL -Wall -Wstrict-prototypes -O6 -fomit-frame-pointer dz.c
+ *
+ * and look at the resulting assemble code in serial.s.
+ *
+ * ------------------------------------------------------------
+ */
+
+/*
+ * ------------------------------------------------------------
+ * dz_sched_event ()
+ *
+ * This routine is used by the interrupt handler to schedule
+ * processing in the software interrupt portion of the driver.
+ * ------------------------------------------------------------
+ */
+static inline void dz_sched_event (struct dz_serial *info, int event)
+{
+       info->event |= 1 << event;
+       queue_task(&info->tqueue, &tq_serial);
+       mark_bh(SERIAL_BH);
+}
+
+/*
+ * ------------------------------------------------------------
+ * receive_char ()
+ *
+ * This routine deals with inputs from any lines.
+ * ------------------------------------------------------------
+ */
+static inline void receive_chars (struct dz_serial *info_in)
+{
+       struct dz_serial *info;
+       struct tty_struct *tty = 0;
+       struct async_icount *icount;
+       int ignore = 0;
+       unsigned short status, tmp;
+       unsigned char ch;
+
+       /*
+        * This code is going to be a problem...  the call to tty_flip_buffer
+        * is going to need to be rethought...
+        */
+       do {
+               status = dz_in (info_in, DZ_RBUF);
+               info = lines[LINE(status)];
+
+               /* punt so we don't get duplicate characters */
+               if (!(status & DZ_DVAL))
+                       goto ignore_char;
+
+               ch = UCHAR(status);                     /* grab the char */
+
+#if 0
+               if (info->is_console) {
+                       if (ch == 0)
+                               return;                 /* it's a break ... */
+               }
+#endif
+
+               tty = info->tty;        /* now tty points to the proper dev */
+               icount = &info->icount;
+
+               if (!tty)
+                       break;
+               if (tty->flip.count >= TTY_FLIPBUF_SIZE) break;
+
+               *tty->flip.char_buf_ptr = ch;
+               *tty->flip.flag_buf_ptr = 0;
+               icount->rx++;
+
+               /* keep track of the statistics */
+               if (status & (DZ_OERR | DZ_FERR | DZ_PERR)) {
+                       if (status & DZ_PERR)           /* parity error */
+                               icount->parity++;
+                       else if (status & DZ_FERR)      /* frame error */
+                               icount->frame++;
+                       if (status & DZ_OERR)           /* overrun error */
+                               icount->overrun++;
+
+                       /*
+                        * Check to see if we should ignore the character and
+                        * mask off conditions that should be ignored
+                        */
+
+                       if (status & info->ignore_status_mask) {
+                               if (++ignore > 100)
+                                       break;
+                               goto ignore_char;
+                       }
+
+                       /* mask off the error conditions we want to ignore */
+                       tmp = status & info->read_status_mask;
+
+                       if (tmp & DZ_PERR) {
+                               *tty->flip.flag_buf_ptr = TTY_PARITY;
+#ifdef DEBUG_DZ
+                               debug_console("PERR\n",5);
+#endif /* DEBUG_DZ */
+                       } else if (tmp & DZ_FERR) {
+                               *tty->flip.flag_buf_ptr = TTY_FRAME;
+#ifdef DEBUG_DZ
+                               debug_console("FERR\n",5);
+#endif /* DEBUG_DZ */
+                       } if (tmp & DZ_OERR) { 
+#ifdef DEBUG_DZ
+                               debug_console("OERR\n",5);
+#endif /* DEBUG_DZ */
+                               if (tty->flip.count < TTY_FLIPBUF_SIZE) {
+                                       tty->flip.count++;
+                                       tty->flip.flag_buf_ptr++;
+                                       tty->flip.char_buf_ptr++;
+                                       *tty->flip.flag_buf_ptr = TTY_OVERRUN;
+                               }
+                       }
+               }
+       tty->flip.flag_buf_ptr++;
+       tty->flip.char_buf_ptr++;
+       tty->flip.count++;
+ignore_char:
+       ;
+       } while (status & DZ_DVAL);
+
+       if (tty)
+               tty_flip_buffer_push(tty);
+}
+
+/*
+ * ------------------------------------------------------------
+ * transmit_char ()
+ *
+ * This routine deals with outputs to any lines.
+ * ------------------------------------------------------------
+ */
+static inline void transmit_chars (struct dz_serial *info)
+{
+       unsigned char tmp;
+
+       if (info->x_char) {           /* XON/XOFF chars */
+               dz_out(info, DZ_TDR, info->x_char);
+               info->icount.tx++;
+               info->x_char = 0;
+               return;
+       }
+
+       /* if nothing to do or stopped or hardware stopped */
+       if ((info->xmit_cnt <= 0) || info->tty->stopped ||
+           info->tty->hw_stopped) {
+               dz_stop(info->tty);
+               return;
+       }
+
+       /*
+        * If something to do ... (rember the dz has no output fifo so we go
+        * one char at a time :-<
+        */
+       tmp = (unsigned short) info->xmit_buf[info->xmit_tail++];
+       dz_out(info, DZ_TDR, tmp);
+       info->xmit_tail = info->xmit_tail & (DZ_XMIT_SIZE - 1);
+       info->icount.tx++;
+
+       if (--info->xmit_cnt < WAKEUP_CHARS)
+       dz_sched_event(info, DZ_EVENT_WRITE_WAKEUP);
+
+       /* Are we done */
+       if (info->xmit_cnt <= 0)
+               dz_stop(info->tty);
+}
+
+/*
+ * ------------------------------------------------------------
+ * check_modem_status ()
+ *
+ * Only valid for the MODEM line duh !
+ * ------------------------------------------------------------
+ */
+static inline void check_modem_status (struct dz_serial *info)
+{
+       unsigned short status;
+
+       /* if not ne modem line just return */
+       if (info->line != DZ_MODEM)
+               return;
+
+       status = dz_in(info, DZ_MSR);
+  
+       /* it's easy, since DSR2 is the only bit in the register */
+       if (status)
+               info->icount.dsr++;
+}
+
+/*
+ * ------------------------------------------------------------
+ * dz_interrupt ()
+ *
+ * this is the main interrupt routine for the DZ chip.
+ * It deals with the multiple ports.
+ * ------------------------------------------------------------
+ */
+static void dz_interrupt (int irq, void *dev, struct pt_regs *regs)
+{
+       struct dz_serial *info;
+       unsigned short status;
+
+        /* get the reason why we just got an irq */
+       status = dz_in((struct dz_serial *)dev, DZ_CSR);
+       info = lines[LINE(status)];     /* re-arrange info the proper port */
+
+       if (status & DZ_RDONE) 
+               receive_chars(info);    /* the receive function */
+
+       if (status & DZ_TRDY) 
+               transmit_chars (info);
+}
+
+/*
+ * -------------------------------------------------------------------
+ * Here ends the DZ interrupt routines.
+ * -------------------------------------------------------------------
+ */
+
+/*
+ * This routine is used to handle the "bottom half" processing for the
+ * serial driver, known also the "software interrupt" processing.
+ * This processing is done at the kernel interrupt level, after the
+ * rs_interrupt() has returned, BUT WITH INTERRUPTS TURNED ON.  This
+ * is where time-consuming activities which can not be done in the
+ * interrupt driver proper are done; the interrupt driver schedules
+ * them using rs_sched_event(), and they get done here.
+ */
+static void do_serial_bh (void)
+{
+       run_task_queue (&tq_serial);
+}
+
+static void do_softint (void *private_data)
+{
+       struct dz_serial *info = (struct dz_serial *) private_data;
+       struct tty_struct *tty = info->tty;
+
+       if (!tty)
+               return;
+
+       if (test_and_clear_bit(DZ_EVENT_WRITE_WAKEUP, &info->event)) {
+               if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
+                   tty->ldisc.write_wakeup)
+                       (tty->ldisc.write_wakeup) (tty);
+               wake_up_interruptible (&tty->write_wait);
+       }
+}
+
+/*
+ * -------------------------------------------------------------------
+ * This routine is called from the scheduler tqueue when the interrupt
+ * routine has signalled that a hangup has occurred.  The path of
+ * hangup processing is:
+ *
+ *      serial interrupt routine -> (scheduler tqueue) ->
+ *      do_serial_hangup() -> tty->hangup() -> rs_hangup()
+ * ------------------------------------------------------------------- 
+ */
+static void do_serial_hangup (void *private_data)
+{
+       struct dz_serial *info = (struct dz_serial *) private_data;
+       struct tty_struct *tty = info->tty;
+        
+       if (!tty)
+               return;
+
+       tty_hangup(tty);
+}
+
+/*
+ * -------------------------------------------------------------------
+ * startup ()
+ *
+ * various initialization tasks
+ * ------------------------------------------------------------------- 
+ */
+static int startup (struct dz_serial *info)
+{
+       unsigned long page, flags;
+       unsigned short tmp;
+
+       if (info->is_initialized)
+               return 0;
+  
+       save_and_cli(flags);
+
+       if (!info->port) {
+               if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags);
+               restore_flags(flags);
+               return -ENODEV;
+       }
+
+       if (!info->xmit_buf) {
+               page = get_zeroed_page(GFP_KERNEL);
+               if (!page) {
+                       restore_flags (flags);
+               return -ENOMEM;
+               }
+               info->xmit_buf = (unsigned char *)page;
+       }
+
+       if (info->tty)
+               clear_bit(TTY_IO_ERROR, &info->tty->flags);
+
+       /* enable the interrupt and the scanning */
+       tmp = dz_in(info, DZ_CSR);
+       tmp |= (DZ_RIE | DZ_TIE | DZ_MSE);
+       dz_out(info, DZ_CSR, tmp);
+
+       info->xmit_cnt = info->xmit_head = info->xmit_tail = 0;
+
+       change_speed(info);                     /* set up the speed */
+
+       /*
+        * Clear the line transmitter buffer I can't figure out why I need to
+        * do this - but its necessary - in order for the console portion and
+        * the interrupt portion to live happily side by side.
+        */
+
+       info->is_initialized = 1;
+
+       restore_flags(flags);
+
+       return 0;
+}
+
+/* 
+ * -------------------------------------------------------------------
+ * shutdown ()
+ *
+ * This routine will shutdown a serial port; interrupts are disabled, and
+ * DTR is dropped if the hangup on close termio flag is on.
+ * ------------------------------------------------------------------- 
+ */
+static void shutdown (struct dz_serial *info)
+{
+       unsigned long flags;
+       unsigned short tmp;
+
+       if (!info->is_initialized)
+               return;
+
+       save_and_cli(flags);
+
+       dz_stop (info->tty);
+
+       info->cflags &= ~DZ_CREAD;      /* turn off receive enable flag */
+       dz_out(info, DZ_LPR, info->cflags);
+
+       if (info->xmit_buf) {               /* free Tx buffer */
+               free_page((unsigned long)info->xmit_buf);
+               info->xmit_buf = 0;
+       }
+
+       if (!info->tty || (info->tty->termios->c_cflag & HUPCL)) {
+               tmp = dz_in(info, DZ_TCR);
+               if (tmp & DZ_MODEM_DTR) {
+                       tmp &= ~DZ_MODEM_DTR;
+                       dz_out(info, DZ_TCR, tmp);
+               }
+       }
+
+       if (info->tty)
+               set_bit (TTY_IO_ERROR, &info->tty->flags);
+
+       info->is_initialized = 0;
+
+       restore_flags (flags);
+}
+
+/* 
+ * -------------------------------------------------------------------
+ * change_speed ()
+ *
+ * set the baud rate.
+ * ------------------------------------------------------------------- 
+ */
+static void change_speed (struct dz_serial *info)
+{
+       unsigned long flags;
+       unsigned cflag;
+       int baud;
+
+       if (!info->tty || !info->tty->termios)
+               return;
+  
+       save_and_cli(flags);
+  
+       info->cflags = info->line;
+
+       cflag = info->tty->termios->c_cflag;
+
+       switch (cflag & CSIZE) {
+               case CS5:
+                       info->cflags |= DZ_CS5;
+                       break;
+               case CS6:
+                       info->cflags |= DZ_CS6;
+                       break;
+               case CS7:
+                       info->cflags |= DZ_CS7;
+                       break;
+               case CS8: 
+               default:
+                       info->cflags |= DZ_CS8;
+       }
+
+       if (cflag & CSTOPB)
+               info->cflags |= DZ_CSTOPB;
+       if (cflag & PARENB)
+               info->cflags |= DZ_PARENB;
+       if (cflag & PARODD)
+               info->cflags |= DZ_PARODD;
+  
+       baud = tty_get_baud_rate(info->tty);
+       switch (baud) {
+       case 50:
+               info->cflags |= DZ_B50;
+               break;
+       case 75:
+               info->cflags |= DZ_B75;
+               break;
+       case 110:
+               info->cflags |= DZ_B110;
+               break;
+       case 134:
+               info->cflags |= DZ_B134;
+               break; 
+       case 150:
+               info->cflags |= DZ_B150;
+               break;
+       case 300:
+               info->cflags |= DZ_B300;
+               break; 
+       case 600:
+               info->cflags |= DZ_B600;
+               break;
+       case 1200:
+               info->cflags |= DZ_B1200;
+               break; 
+       case 1800:
+               info->cflags |= DZ_B1800;
+               break;
+       case 2000:
+               info->cflags |= DZ_B2000;
+               break;
+       case 2400:
+               info->cflags |= DZ_B2400;
+               break;
+       case 3600:
+               info->cflags |= DZ_B3600;
+               break; 
+       case 4800:
+               info->cflags |= DZ_B4800;
+               break;
+       case 7200:
+               info->cflags |= DZ_B7200;
+               break; 
+       case 9600: 
+       default:
+               info->cflags |= DZ_B9600; 
+       }
+
+       info->cflags |= DZ_RXENAB;
+       dz_out(info, DZ_LPR, info->cflags);
+
+       /* setup accept flag */
+       info->read_status_mask = DZ_OERR;
+       if (I_INPCK(info->tty))
+               info->read_status_mask |= (DZ_FERR | DZ_PERR); 
+  
+       /* characters to ignore */
+       info->ignore_status_mask = 0;
+       if (I_IGNPAR(info->tty))
+               info->ignore_status_mask |= (DZ_FERR | DZ_PERR);
+
+       restore_flags(flags);
+}
+
+/* 
+ * -------------------------------------------------------------------
+ * dz_flush_char ()
+ *
+ * Flush the buffer.
+ * ------------------------------------------------------------------- 
+ */
+static void dz_flush_chars (struct tty_struct *tty)
+{
+       struct dz_serial *info = (struct dz_serial *)tty->driver_data;
+       unsigned long flags;
+
+       if (info->xmit_cnt <= 0 || tty->stopped || tty->hw_stopped ||
+           !info->xmit_buf)
+               return;
+
+       save_and_cli(flags);
+       dz_start (info->tty);
+       restore_flags(flags);
+}
+
+
+/* 
+ * -------------------------------------------------------------------
+ * dz_write ()
+ *
+ * main output routine.
+ * ------------------------------------------------------------------- 
+ */
+static int dz_write (struct tty_struct *tty, int from_user,
+                     const unsigned char *buf, int count)
+{
+       struct dz_serial *info = (struct dz_serial *)tty->driver_data;
+       unsigned long flags;
+       int c, ret = 0;
+
+       if (!tty )
+               return ret;
+       if (!info->xmit_buf)
+               return ret;
+       if (!tmp_buf)
+               tmp_buf = tmp_buffer;
+
+       if (from_user) {
+               down (&tmp_buf_sem);
+               while (1) {
+                       c = MIN(count, MIN(DZ_XMIT_SIZE - info->xmit_cnt - 1,
+                                          DZ_XMIT_SIZE - info->xmit_head));
+                       if (c <= 0)
+                               break;
+
+                       c -= copy_from_user (tmp_buf, buf, c);
+                       if (!c) {
+                               if (!ret)
+                                       ret = -EFAULT;
+                               break;
+                       }
+
+                       save_and_cli(flags);
+
+                       c = MIN(c, MIN(DZ_XMIT_SIZE - info->xmit_cnt - 1,
+                                      DZ_XMIT_SIZE - info->xmit_head));
+                       memcpy(info->xmit_buf + info->xmit_head, tmp_buf, c);
+                       info->xmit_head = ((info->xmit_head + c) &
+                                          (DZ_XMIT_SIZE - 1));
+                       info->xmit_cnt += c;
+                       restore_flags(flags);
+
+                       buf += c;
+                       count -= c;
+                       ret += c;
+               }
+               up(&tmp_buf_sem);
+       } else {
+               while (1) {
+                       save_and_cli(flags);
+
+                       c = MIN(count, MIN(DZ_XMIT_SIZE - info->xmit_cnt - 1,
+                                          DZ_XMIT_SIZE - info->xmit_head));
+                       if (c <= 0) {
+                               restore_flags (flags);
+                               break;
+                       }
+                       memcpy(info->xmit_buf + info->xmit_head, buf, c);
+                       info->xmit_head = ((info->xmit_head + c) &
+                                          (DZ_XMIT_SIZE-1));
+                       info->xmit_cnt += c;
+                       restore_flags(flags);
+
+                       buf += c;
+                       count -= c;
+                       ret += c;
+               }
+       }
+
+       if (info->xmit_cnt) {
+               if (!tty->stopped) {
+                       if (!tty->hw_stopped) {
+                               dz_start (info->tty);
+                       }
+               }
+       }
+
+       return ret;
+}
+
+/* 
+ * -------------------------------------------------------------------
+ * dz_write_room ()
+ *
+ * compute the amount of space available for writing.
+ * ------------------------------------------------------------------- 
+ */
+static int dz_write_room (struct tty_struct *tty)
+{
+       struct dz_serial *info = (struct dz_serial *)tty->driver_data;
+       int ret;
+
+       ret = DZ_XMIT_SIZE - info->xmit_cnt - 1;
+       if (ret < 0)
+               ret = 0;
+
+       return ret;
+}
+
+/* 
+ * -------------------------------------------------------------------
+ * dz_chars_in_buffer ()
+ *
+ * compute the amount of char left to be transmitted
+ * ------------------------------------------------------------------- 
+ */
+static int dz_chars_in_buffer (struct tty_struct *tty)
+{
+       struct dz_serial *info = (struct dz_serial *)tty->driver_data;
+  
+       return info->xmit_cnt;
+}
+
+/* 
+ * -------------------------------------------------------------------
+ * dz_flush_buffer ()
+ *
+ * Empty the output buffer
+ * ------------------------------------------------------------------- 
+ */
+static void dz_flush_buffer (struct tty_struct *tty)
+{
+       struct dz_serial *info = (struct dz_serial *)tty->driver_data;
+                                
+       cli();
+       info->xmit_cnt = info->xmit_head = info->xmit_tail = 0;
+       sti();
+
+       wake_up_interruptible (&tty->write_wait);
+
+       if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
+            tty->ldisc.write_wakeup)
+               tty->ldisc.write_wakeup(tty);
+}
+
+/*
+ * ------------------------------------------------------------
+ * dz_throttle () and dz_unthrottle ()
+ * 
+ * This routine is called by the upper-layer tty layer to signal that
+ * incoming characters should be throttled (or not).
+ * ------------------------------------------------------------
+ */
+static void dz_throttle (struct tty_struct *tty)
+{
+       struct dz_serial *info = (struct dz_serial *)tty->driver_data;  
+
+       if (I_IXOFF(tty))
+               info->x_char = STOP_CHAR(tty);
+}
+
+static void dz_unthrottle (struct tty_struct *tty)
+{
+       struct dz_serial *info = (struct dz_serial *)tty->driver_data;  
+
+       if (I_IXOFF(tty)) {
+               if (info->x_char)
+                       info->x_char = 0;
+               else
+                       info->x_char = START_CHAR(tty);
+       }
+}
+
+static void dz_send_xchar (struct tty_struct *tty, char ch)
+{
+       struct dz_serial *info = (struct dz_serial *)tty->driver_data;
+
+       info->x_char = ch;
+
+       if (ch)
+               dz_start(info->tty);
+}
+
+/*
+ * ------------------------------------------------------------
+ * rs_ioctl () and friends
+ * ------------------------------------------------------------
+ */
+static int get_serial_info(struct dz_serial *info,
+                           struct serial_struct *retinfo)
+{
+       struct serial_struct tmp;
+  
+       if (!retinfo)
+               return -EFAULT;
+
+       memset (&tmp, 0, sizeof(tmp));
+
+       tmp.type = info->type;
+       tmp.line = info->line;
+       tmp.port = info->port;
+       tmp.irq = SERIAL;
+       tmp.flags = info->flags;
+       tmp.baud_base = info->baud_base;
+       tmp.close_delay = info->close_delay;
+       tmp.closing_wait = info->closing_wait;
+
+       return copy_to_user(retinfo, &tmp, sizeof(*retinfo)) ? -EFAULT : 0;
+}
+
+static int set_serial_info (struct dz_serial *info,
+                            struct serial_struct *new_info)
+{
+       struct serial_struct new_serial;
+       struct dz_serial old_info;
+       int retval = 0;
+
+       if (!new_info)
+               return -EFAULT;
+
+       if (copy_from_user(&new_serial, new_info, sizeof(new_serial)))
+               return -EFAULT;
+
+       old_info = *info;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (info->count > 1)
+               return -EBUSY;
+
+       /*
+        * OK, past this point, all the error checking has been done.
+        * At this point, we start making changes.....
+        */
+
+       info->baud_base = new_serial.baud_base;
+       info->type = new_serial.type;
+       info->close_delay = new_serial.close_delay;
+       info->closing_wait = new_serial.closing_wait;
+
+       retval = startup(info);
+
+       return retval;
+}
+
+/*
+ * get_lsr_info - get line status register info
+ *
+ * Purpose: Let user call ioctl() to get info when the UART physically
+ *          is emptied.  On bus types like RS485, the transmitter must
+ *          release the bus after transmitting. This must be done when
+ *          the transmit shift register is empty, not be done when the
+ *          transmit holding register is empty.  This functionality
+ *          allows an RS485 driver to be written in user space. 
+ */
+static int get_lsr_info (struct dz_serial *info, unsigned int *value)
+{
+       unsigned short status = dz_in (info, DZ_LPR);
+
+       return put_user (status, value);
+}
+
+/*
+ * This routine sends a break character out the serial port.
+ */
+static void send_break (struct dz_serial *info, int duration)
+{
+       unsigned long flags;
+       unsigned short tmp, mask;
+
+       if (!info->port)
+               return;
+
+       mask = 1 << info->line;
+       tmp = dz_in (info, DZ_TCR);
+       tmp |= mask;
+
+       current->state = TASK_INTERRUPTIBLE;
+
+       save_and_cli(flags);
+       dz_out(info, DZ_TCR, tmp);
+       schedule_timeout(duration);
+       tmp &= ~mask;
+       dz_out(info, DZ_TCR, tmp);
+       restore_flags(flags);
+}
+
+static int dz_ioctl(struct tty_struct *tty, struct file *file,
+                    unsigned int cmd, unsigned long arg)
+{
+       int error;
+       struct dz_serial * info = (struct dz_serial *)tty->driver_data;
+       int retval;
+
+       if (cmd != TIOCGSERIAL && cmd != TIOCSSERIAL &&
+           cmd != TIOCSERCONFIG && cmd != TIOCSERGWILD  &&
+           cmd != TIOCSERSWILD && cmd != TIOCSERGSTRUCT) {
+               if (tty->flags & (1 << TTY_IO_ERROR))
+                       return -EIO;
+       }
+
+       switch (cmd) {
+       case TCSBRK:            /* SVID version: non-zero arg --> no break */
+               retval = tty_check_change(tty);
+               if (retval)
+                       return retval;
+               tty_wait_until_sent(tty, 0);
+               if (!arg)
+                       send_break(info, HZ/4); /* 1/4 second */
+               return 0;
+
+       case TCSBRKP:           /* support for POSIX tcsendbreak() */
+               retval = tty_check_change(tty);
+               if (retval)
+                       return retval;
+               tty_wait_until_sent(tty, 0);
+               send_break(info, arg ? arg*(HZ/10) : HZ/4);
+               return 0;
+
+       case TIOCGSOFTCAR:
+               return put_user(C_CLOCAL(tty) ? 1 : 0, (unsigned long *)arg);
+
+       case TIOCSSOFTCAR:
+               if (get_user (arg, (unsigned long *)arg))
+                       return -EFAULT;
+
+               tty->termios->c_cflag = (tty->termios->c_cflag & ~CLOCAL) |
+                                       (arg ? CLOCAL : 0);
+               return 0;
+
+       case TIOCGSERIAL:
+               return get_serial_info(info, (struct serial_struct *)arg);
+
+       case TIOCSSERIAL:
+               return set_serial_info(info, (struct serial_struct *) arg);
+
+       case TIOCSERGETLSR:             /* Get line status register */
+               return get_lsr_info (info, (unsigned int *)arg);
+
+       case TIOCSERGSTRUCT:
+               return copy_to_user((struct dz_serial *)arg, info,
+                                   sizeof(struct dz_serial)) ? -EFAULT : 0;
+       default:
+               return -ENOIOCTLCMD;
+       }
+
+       return 0;
+}
+
+static void dz_set_termios (struct tty_struct *tty,
+                           struct termios *old_termios)
+{
+       struct dz_serial *info = (struct dz_serial *)tty->driver_data;
+
+       if (tty->termios->c_cflag == old_termios->c_cflag)
+               return;
+
+       change_speed (info);
+
+       if ((old_termios->c_cflag & CRTSCTS) &&
+           !(tty->termios->c_cflag & CRTSCTS)) {
+               tty->hw_stopped = 0;
+               dz_start(tty);
+       }
+}
+
+/*
+ * ------------------------------------------------------------
+ * dz_close()
+ * 
+ * This routine is called when the serial port gets closed.  First, we
+ * wait for the last remaining data to be sent.  Then, we turn off
+ * the transmit enable and receive enable flags.
+ * ------------------------------------------------------------
+ */
+static void dz_close(struct tty_struct *tty, struct file *filp)
+{
+       struct dz_serial * info = (struct dz_serial *)tty->driver_data;
+       unsigned long flags;
+
+       if (!info)
+               return;
+       save_and_cli(flags); 
+
+       if (tty_hung_up_p(filp)) {
+               restore_flags(flags);
+               return;
+       }
+
+       if ((tty->count == 1) && (info->count != 1)) {
+               /*
+                * Uh, oh.  tty->count is 1, which means that the tty structure
+                * will be freed.  Info->count should always be one in these
+                * conditions.  If it's greater than one, we've got real
+                * problems, since it means the serial port won't be shutdown.
+                */
+               printk("dz_close: bad serial port count; tty->count is 1, "
+                      "info->count is %d\n", info->count);
+               info->count = 1;
+       }
+
+       if (--info->count < 0) {
+               printk("ds_close: bad serial port count for ttyS%02d: %d\n",
+                      info->line, info->count);
+               info->count = 0;
+       }
+
+       if (info->count) {
+               restore_flags(flags);
+               return;
+       }
+       info->flags |= DZ_CLOSING;
+       /*
+        * Now we wait for the transmit buffer to clear; and we notify the line
+        * discipline to only process XON/XOFF characters.
+        */
+       tty->closing = 1;
+
+       if (info->closing_wait != DZ_CLOSING_WAIT_NONE)
+               tty_wait_until_sent(tty, info->closing_wait);
+
+       /*
+        * At this point we stop accepting input.  To do this, we disable the
+        * receive line status interrupts.
+        */
+       shutdown(info);
+
+       if (tty->driver->flush_buffer)
+               tty->driver->flush_buffer (tty);
+       if (tty->ldisc.flush_buffer)
+               tty->ldisc.flush_buffer (tty);
+       tty->closing = 0;
+       info->event = 0;
+       info->tty = 0;
+
+       if (tty->ldisc.num != ldiscs[N_TTY].num) {
+               if (tty->ldisc.close)
+                       tty->ldisc.close(tty);
+               tty->ldisc = ldiscs[N_TTY];
+               tty->termios->c_line = N_TTY;
+               if (tty->ldisc.open)
+                       tty->ldisc.open(tty);
+       }
+       if (info->blocked_open) {
+               if (info->close_delay) {
+                       current->state = TASK_INTERRUPTIBLE;
+                       schedule_timeout(info->close_delay);
+               }
+               wake_up_interruptible(&info->open_wait);
+       }
+
+       info->flags &= ~(DZ_NORMAL_ACTIVE | DZ_CLOSING);
+       wake_up_interruptible(&info->close_wait);
+
+       restore_flags(flags);
+}
+
+/*
+ * dz_hangup () --- called by tty_hangup() when a hangup is signaled.
+ */
+static void dz_hangup (struct tty_struct *tty)
+{
+       struct dz_serial *info = (struct dz_serial *) tty->driver_data;
+  
+       dz_flush_buffer(tty);
+       shutdown(info);
+       info->event = 0;
+       info->count = 0;
+       info->flags &= ~DZ_NORMAL_ACTIVE;
+       info->tty = 0;
+       wake_up_interruptible(&info->open_wait);
+}
+
+/*
+ * ------------------------------------------------------------
+ * rs_open() and friends
+ * ------------------------------------------------------------
+ */
+static int block_til_ready(struct tty_struct *tty, struct file *filp,
+                           struct dz_serial *info)
+{
+       DECLARE_WAITQUEUE(wait, current); 
+       int retval;
+       int do_clocal = 0;
+
+       /*
+        * If the device is in the middle of being closed, then block
+        * until it's done, and then try again.
+        */
+       if (info->flags & DZ_CLOSING) {
+               interruptible_sleep_on(&info->close_wait);
+               return -EAGAIN;
+       }
+
+       /*
+        * If non-blocking mode is set, or the port is not enabled, then make
+        * the check up front and then exit.
+        */
+       if ((filp->f_flags & O_NONBLOCK) ||
+           (tty->flags & (1 << TTY_IO_ERROR))) {
+               info->flags |= DZ_NORMAL_ACTIVE;
+
+               return 0;
+       }
+
+       if (tty->termios->c_cflag & CLOCAL)
+               do_clocal = 1;
+
+       /*
+        * Block waiting for the carrier detect and the line to become free
+        * (i.e., not in use by the callout).  While we are in this loop,
+        * info->count is dropped by one, so that dz_close() knows when to free
+        * things.  We restore it upon exit, either normal or abnormal.
+        */
+       retval = 0;
+       add_wait_queue(&info->open_wait, &wait);
+
+       info->count--;
+       info->blocked_open++;
+       while (1) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (tty_hung_up_p (filp) || !(info->is_initialized)) {
+                       retval = -EAGAIN;
+                       break;
+               }
+               if (!(info->flags & DZ_CLOSING) && do_clocal)
+                       break;
+               if (signal_pending(current)) {
+                       retval = -ERESTARTSYS;
+                       break;
+               }
+               schedule();
+       }
+               
+       current->state = TASK_RUNNING;
+       remove_wait_queue (&info->open_wait, &wait);
+       if (!tty_hung_up_p(filp))
+               info->count++;
+       info->blocked_open--;
+
+       if (retval)
+               return retval;
+       info->flags |= DZ_NORMAL_ACTIVE;
+       return 0;
+}
+
+/*
+ * This routine is called whenever a serial port is opened.  It
+ * enables interrupts for a serial port. It also performs the 
+ * serial-specific initialization for the tty structure.
+ */
+static int dz_open (struct tty_struct *tty, struct file *filp)
+{
+       struct dz_serial *info;
+       int retval, line;
+
+       line = tty->index;
+
+       /*
+        * The dz lines for the mouse/keyboard must be opened using their
+        * respective drivers.
+        */
+       if ((line < 0) || (line >= DZ_NB_PORT))
+               return -ENODEV;
+
+       if ((line == DZ_KEYBOARD) || (line == DZ_MOUSE))
+               return -ENODEV;
+
+       info = lines[line];
+       info->count++;
+
+       tty->driver_data = info;
+       info->tty = tty;
+
+       /*
+        * Start up serial port
+        */
+       retval = startup (info);
+       if (retval)
+               return retval;
+
+       retval = block_til_ready (tty, filp, info);
+       if (retval)
+               return retval;
+
+       return 0;
+}
+
+static void show_serial_version (void)
+{
+       printk("%s%s\n", dz_name, dz_version);
+}
+
+static struct tty_driver *serial_driver;
+
+static struct tty_operations serial_ops = {
+       .open = dz_open,
+       .close = dz_close,
+       .write = dz_write,
+       .flush_chars = dz_flush_chars,
+       .write_room = dz_write_room,
+       .chars_in_buffer = dz_chars_in_buffer,
+       .flush_buffer = dz_flush_buffer,
+       .ioctl = dz_ioctl,
+       .throttle = dz_throttle,
+       .unthrottle = dz_unthrottle,
+       .send_xchar = dz_send_xchar,
+       .set_termios = dz_set_termios,
+       .stop = dz_stop,
+       .start = dz_start,
+       .hangup = dz_hangup,
+};
+
+int __init dz_init(void)
+{
+       int i, flags;
+       struct dz_serial *info;
+
+       serial_driver = alloc_tty_driver(DZ_NB_PORT);
+       if (!serial_driver)
+               return -ENOMEM;
+
+       /* Setup base handler, and timer table. */
+       init_bh(SERIAL_BH, do_serial_bh);
+
+       show_serial_version();
+
+       serial_driver->owner = THIS_MODULE;
+       serial_driver->devfs_name = "tts/";
+       serial_driver->name = "ttyS";
+       serial_driver->major = TTY_MAJOR;
+       serial_driver->minor_start = 64;
+       serial_driver->type = TTY_DRIVER_TYPE_SERIAL;
+       serial_driver->subtype = SERIAL_TYPE_NORMAL;
+       serial_driver->init_termios = tty_std_termios;
+       serial_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL |
+                                            CLOCAL;
+       serial_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_NO_DEVFS;
+       tty_set_operations(serial_driver, &serial_ops);
+
+       if (tty_register_driver(serial_driver))
+               panic("Couldn't register serial driver\n");
+
+       save_flags(flags); cli();
+       for (i=0; i < DZ_NB_PORT;  i++) {
+               info = &multi[i]; 
+               lines[i] = info;
+               info->magic = SERIAL_MAGIC;
+
+               if ((mips_machtype == MACH_DS23100) ||
+                   (mips_machtype == MACH_DS5100)) 
+                       info->port = (unsigned long) KN01_DZ11_BASE;
+               else 
+                       info->port = (unsigned long) KN02_DZ11_BASE;
+
+               info->line = i;
+               info->tty = 0;
+               info->close_delay = 50;
+               info->closing_wait = 3000;
+               info->x_char = 0;
+               info->event = 0;
+               info->count = 0;
+               info->blocked_open = 0;
+               info->tqueue.routine = do_softint;
+               info->tqueue.data = info;
+               info->tqueue_hangup.routine = do_serial_hangup;
+               info->tqueue_hangup.data = info;
+               init_waitqueue_head(&info->open_wait); 
+               init_waitqueue_head(&info->close_wait); 
+
+               /*
+                * If we are pointing to address zero then punt - not correctly
+                * set up in setup.c to handle this.
+                */
+               if (! info->port)
+                       return 0;
+
+               printk("ttyS%02d at 0x%08x (irq = %d)\n", info->line,
+                      info->port, SERIAL);
+
+               tty_register_device(serial_driver, info->line, NULL);
+       }
+
+       /* Reset the chip */
+#ifndef CONFIG_SERIAL_CONSOLE
+       {
+               int tmp;
+               dz_out(info, DZ_CSR, DZ_CLR);
+               while ((tmp = dz_in(info,DZ_CSR)) & DZ_CLR);
+               wbflush();
+  
+               /* Enable scanning */
+               dz_out(info, DZ_CSR, DZ_MSE); 
+       }
+#endif
+  
+       /*
+        * Order matters here... the trick is that flags is updated... in
+        * request_irq - to immediatedly obliterate it is unwise.
+        */
+       restore_flags(flags);
+
+       if (request_irq(SERIAL, dz_interrupt, SA_INTERRUPT, "DZ", lines[0]))
+               panic("Unable to register DZ interrupt\n");
+       return 0;
+}
+
+#ifdef CONFIG_SERIAL_CONSOLE
+static void dz_console_put_char (unsigned char ch)
+{
+       unsigned long flags;
+       int  loops = 2500;
+       unsigned short tmp = ch;
+       /*
+        * this code sends stuff out to serial device - spinning its wheels and
+        * waiting.
+        */
+
+       /* force the issue - point it at lines[3]*/
+       dz_console = &multi[CONSOLE_LINE];
+
+       save_and_cli(flags);
+
+       /* spin our wheels */
+       while (((dz_in(dz_console, DZ_CSR) & DZ_TRDY) != DZ_TRDY) &&  loops--)
+               ;
+  
+       /* Actually transmit the character. */
+       dz_out(dz_console, DZ_TDR, tmp);
+
+       restore_flags(flags); 
+}
+
+/* 
+ * -------------------------------------------------------------------
+ * dz_console_print ()
+ *
+ * dz_console_print is registered for printk.
+ * The console must be locked when we get here.
+ * ------------------------------------------------------------------- 
+ */
+static void dz_console_print (struct console *cons, 
+                             const char *str, 
+                             unsigned int count)
+{
+#ifdef DEBUG_DZ
+       prom_printf((char *)str);
+#endif
+       while (count--) {
+               if (*str == '\n')
+                       dz_console_put_char('\r');
+               dz_console_put_char(*str++);
+       }
+}
+
+static struct tty_driver *dz_console_device(struct console *c, int *index)
+{
+       *index = c->index;
+       return serial_driver;
+}
+
+static int __init dz_console_setup(struct console *co, char *options)
+{
+       int baud = 9600;
+       int bits = 8;
+       int parity = 'n';
+       int cflag = CREAD | HUPCL | CLOCAL;
+       char *s;
+       unsigned short mask,tmp;
+
+       if (options) {
+               baud = simple_strtoul(options, NULL, 10);
+               s = options;
+               while (*s >= '0' && *s <= '9')
+                       s++;
+               if (*s)
+                       parity = *s++;
+               if (*s)
+                       bits   = *s - '0';
+       }
+
+       /*
+        * Now construct a cflag setting.
+        */
+       switch (baud) {
+       case 1200:
+               cflag |= DZ_B1200;
+               break;
+       case 2400:
+               cflag |= DZ_B2400;
+               break;
+       case 4800:
+               cflag |= DZ_B4800;
+               break;
+       case 9600:
+       default:
+               cflag |= DZ_B9600;
+               break;
+       }
+       switch (bits) {
+       case 7:
+               cflag |= DZ_CS7;
+               break;
+       default:
+       case 8:
+               cflag |= DZ_CS8;
+               break;
+       }
+       switch (parity) {
+       case 'o':
+       case 'O':
+               cflag |= DZ_PARODD;
+               break;
+       case 'e':
+       case 'E':
+               cflag |= DZ_PARENB;
+               break;
+       }
+       co->cflag = cflag;
+
+       /* TOFIX: force to console line */
+       dz_console = &multi[CONSOLE_LINE];
+       if ((mips_machtype == MACH_DS23100) || (mips_machtype == MACH_DS5100)) 
+               dz_console->port = KN01_DZ11_BASE;
+       else 
+               dz_console->port = KN02_DZ11_BASE; 
+       dz_console->line = CONSOLE_LINE;
+
+       dz_out(dz_console, DZ_CSR, DZ_CLR);
+       while ((tmp = dz_in(dz_console,DZ_CSR)) & DZ_CLR)
+               ;
+
+       /* enable scanning */
+       dz_out(dz_console, DZ_CSR, DZ_MSE); 
+
+       /*  Set up flags... */
+       dz_console->cflags = 0;
+       dz_console->cflags |= DZ_B9600;
+       dz_console->cflags |= DZ_CS8;
+       dz_console->cflags |= DZ_PARENB;
+       dz_out(dz_console, DZ_LPR, dz_console->cflags);
+
+       mask = 1 << dz_console->line;
+       tmp = dz_in (dz_console, DZ_TCR);               /* read the TX flag */
+       if (!(tmp & mask)) {
+               tmp |= mask;                            /* set the TX flag */
+               dz_out (dz_console, DZ_TCR, tmp); 
+       }
+
+       return 0;
+}
+
+static struct console dz_sercons = {
+    .name      = "ttyS",
+    .write     = dz_console_print,
+    .device    = dz_console_device,
+    .setup     = dz_console_setup,
+    .flags     = CON_CONSDEV | CON_PRINTBUFFER,
+    .index     = CONSOLE_LINE,
+};
+
+void __init dz_serial_console_init(void)
+{
+       register_console(&dz_sercons);
+}
+
+#endif /* ifdef CONFIG_SERIAL_CONSOLE */
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/dz.h b/drivers/char/dz.h
new file mode 100644 (file)
index 0000000..989f927
--- /dev/null
@@ -0,0 +1,230 @@
+/*
+ * dz.h: Serial port driver for DECStations equiped 
+ *       with the DZ chipset.
+ *
+ * Copyright (C) 1998 Olivier A. D. Lebaillif 
+ *             
+ * Email: olivier.lebaillif@ifrsys.com
+ *
+ */
+#ifndef DZ_SERIAL_H
+#define DZ_SERIAL_H
+
+/*
+ * Definitions for the Control and Status Received.
+ */
+#define DZ_TRDY        0x8000                 /* Transmitter empty */
+#define DZ_TIE         0x4000                 /* Transmitter Interrupt Enable */
+#define DZ_RDONE       0x0080                 /* Receiver data ready */
+#define DZ_RIE         0x0040                 /* Receive Interrupt Enable */
+#define DZ_MSE         0x0020                 /* Master Scan Enable */
+#define DZ_CLR         0x0010                 /* Master reset */
+#define DZ_MAINT       0x0008                 /* Loop Back Mode */
+
+/*
+ * Definitions for the Received buffer. 
+ */
+#define DZ_RBUF_MASK   0x00FF                 /* Data Mask in the Receive Buffer */
+#define DZ_LINE_MASK   0x0300                 /* Line Mask in the Receive Buffer */
+#define DZ_DVAL        0x8000                 /* Valid Data indicator */
+#define DZ_OERR        0x4000                 /* Overrun error indicator */
+#define DZ_FERR        0x2000                 /* Frame error indicator */
+#define DZ_PERR        0x1000                 /* Parity error indicator */
+
+#define LINE(x) (x & DZ_LINE_MASK) >> 8       /* Get the line number from the input buffer */
+#define UCHAR(x) (unsigned char)(x & DZ_RBUF_MASK)
+
+/*
+ * Definitions for the Transmit Register.
+ */
+#define DZ_LINE_KEYBOARD 0x0001
+#define DZ_LINE_MOUSE    0x0002
+#define DZ_LINE_MODEM    0x0004
+#define DZ_LINE_PRINTER  0x0008
+
+#define DZ_MODEM_DTR     0x0400               /* DTR for the modem line (2) */
+
+/*
+ * Definitions for the Modem Status Register.
+ */
+#define DZ_MODEM_DSR     0x0200               /* DSR for the modem line (2) */
+
+/*
+ * Definitions for the Transmit Data Register.
+ */
+#define DZ_BRK0          0x0100               /* Break assertion for line 0 */
+#define DZ_BRK1          0x0200               /* Break assertion for line 1 */
+#define DZ_BRK2          0x0400               /* Break assertion for line 2 */
+#define DZ_BRK3          0x0800               /* Break assertion for line 3 */
+
+/*
+ * Definitions for the Line Parameter Register.
+ */
+#define DZ_KEYBOARD      0x0000               /* line 0 = keyboard */
+#define DZ_MOUSE         0x0001               /* line 1 = mouse */
+#define DZ_MODEM         0x0002               /* line 2 = modem */
+#define DZ_PRINTER       0x0003               /* line 3 = printer */
+
+#define DZ_CSIZE         0x0018               /* Number of bits per byte (mask) */
+#define DZ_CS5           0x0000               /* 5 bits per byte */
+#define DZ_CS6           0x0008               /* 6 bits per byte */
+#define DZ_CS7           0x0010               /* 7 bits per byte */
+#define DZ_CS8           0x0018               /* 8 bits per byte */
+
+#define DZ_CSTOPB        0x0020               /* 2 stop bits instead of one */ 
+
+#define DZ_PARENB        0x0040               /* Parity enable */
+#define DZ_PARODD        0x0080               /* Odd parity instead of even */
+
+#define DZ_CBAUD         0x0E00               /* Baud Rate (mask) */
+#define DZ_B50           0x0000
+#define DZ_B75           0x0100
+#define DZ_B110          0x0200
+#define DZ_B134          0x0300
+#define DZ_B150          0x0400
+#define DZ_B300          0x0500
+#define DZ_B600          0x0600
+#define DZ_B1200         0x0700 
+#define DZ_B1800         0x0800
+#define DZ_B2000         0x0900
+#define DZ_B2400         0x0A00
+#define DZ_B3600         0x0B00
+#define DZ_B4800         0x0C00
+#define DZ_B7200         0x0D00
+#define DZ_B9600         0x0E00
+
+#define DZ_CREAD         0x1000               /* Enable receiver */
+#define DZ_RXENAB        0x1000               /* enable receive char */
+/*
+ * Addresses for the DZ registers
+ */
+#define DZ_CSR       0x00            /* Control and Status Register */
+#define DZ_RBUF      0x08            /* Receive Buffer */
+#define DZ_LPR       0x08            /* Line Parameters Register */
+#define DZ_TCR       0x10            /* Transmitter Control Register */
+#define DZ_MSR       0x18            /* Modem Status Register */
+#define DZ_TDR       0x18            /* Transmit Data Register */
+
+
+#define DZ_NB_PORT 4
+
+#define DZ_XMIT_SIZE   4096                 /* buffer size */
+#define WAKEUP_CHARS   DZ_XMIT_SIZE/4
+
+#define DZ_EVENT_WRITE_WAKEUP   0
+
+#ifndef MIN
+#define MIN(a,b)        ((a) < (b) ? (a) : (b))
+
+#define DZ_INITIALIZED       0x80000000 /* Serial port was initialized */
+#define DZ_CALLOUT_ACTIVE    0x40000000 /* Call out device is active */
+#define DZ_NORMAL_ACTIVE     0x20000000 /* Normal device is active */
+#define DZ_BOOT_AUTOCONF     0x10000000 /* Autoconfigure port on bootup */
+#define DZ_CLOSING           0x08000000 /* Serial port is closing */
+#define DZ_CTS_FLOW          0x04000000 /* Do CTS flow control */
+#define DZ_CHECK_CD          0x02000000 /* i.e., CLOCAL */
+
+#define DZ_CLOSING_WAIT_INF  0
+#define DZ_CLOSING_WAIT_NONE 65535
+
+#define DZ_SPLIT_TERMIOS   0x0008 /* Separate termios for dialin/callout */
+#define DZ_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */
+#define DZ_PGRP_LOCKOUT    0x0200 /* Lock out cua opens based on pgrp */
+
+struct dz_serial {
+  unsigned                port;                /* base address for the port */
+  int                     type;
+  int                     flags; 
+  int                     baud_base;
+  int                     blocked_open;
+  unsigned short          close_delay;
+  unsigned short          closing_wait;
+  unsigned short          line;                /* port/line number */
+  unsigned short          cflags;              /* line configuration flag */
+  unsigned short          x_char;              /* xon/xoff character */
+  unsigned short          read_status_mask;    /* mask for read condition */
+  unsigned short          ignore_status_mask;  /* mask for ignore condition */
+  unsigned long           event;               /* mask used in BH */
+  unsigned char           *xmit_buf;           /* Transmit buffer */
+  int                     xmit_head;           /* Position of the head */
+  int                     xmit_tail;           /* Position of the tail */
+  int                     xmit_cnt;            /* Count of the chars in the buffer */
+  int                     count;               /* indicates how many times it has been opened */
+  int                     magic;
+
+  struct async_icount     icount;              /* keep track of things ... */
+  struct tty_struct       *tty;                /* tty associated */
+  struct tq_struct        tqueue;              /* Queue for BH */
+  struct tq_struct        tqueue_hangup;
+  wait_queue_head_t       open_wait;
+  wait_queue_head_t       close_wait;
+
+  unsigned char           is_console;          /* flag indicating a serial console */
+  unsigned char           is_initialized;
+};
+
+static struct dz_serial multi[DZ_NB_PORT];    /* Four serial lines in the DZ chip */
+static struct dz_serial *dz_console;
+
+/*
+ * tmp_buf is used as a temporary buffer by serial_write.  We need to
+ * lock it in case the copy_from_user blocks while swapping in a page,
+ * and some other program tries to do a serial write at the same time.
+ * Since the lock will only come under contention when the system is
+ * swapping and available memory is low, it makes sense to share one
+ * buffer across all the serial ports, since it significantly saves
+ * memory if large numbers of serial ports are open.
+ */
+static unsigned char *tmp_buf;
+static DECLARE_MUTEX(tmp_buf_sem);
+
+static char *dz_name = "DECstation DZ serial driver version ";
+static char *dz_version = "1.02";
+
+static inline unsigned short dz_in (struct dz_serial *, unsigned);
+static inline void dz_out (struct dz_serial *, unsigned, unsigned short);
+
+static inline void dz_sched_event (struct dz_serial *, int);
+static inline void receive_chars (struct dz_serial *);
+static inline void transmit_chars (struct dz_serial *);
+static inline void check_modem_status (struct dz_serial *);
+
+static void dz_stop (struct tty_struct *);
+static void dz_start (struct tty_struct *);
+static void dz_interrupt (int, void *, struct pt_regs *);
+static void do_serial_bh (void);
+static void do_softint (void *);
+static void do_serial_hangup (void *);
+static void change_speed (struct dz_serial *);
+static void dz_flush_chars (struct tty_struct *);
+static void dz_console_print (struct console *, const char *, unsigned int);
+static void dz_flush_buffer (struct tty_struct *);
+static void dz_throttle (struct tty_struct *);
+static void dz_unthrottle (struct tty_struct *);
+static void dz_send_xchar (struct tty_struct *, char);
+static void shutdown (struct dz_serial *);
+static void send_break (struct dz_serial *, int);
+static void dz_set_termios (struct tty_struct *, struct termios *);
+static void dz_close (struct tty_struct *, struct file *);
+static void dz_hangup (struct tty_struct *);
+static void show_serial_version (void);
+
+static int dz_write (struct tty_struct *, int, const unsigned char *, int);
+static int dz_write_room (struct tty_struct *);
+static int dz_chars_in_buffer (struct tty_struct *);
+static int startup (struct dz_serial *);
+static int get_serial_info (struct dz_serial *, struct serial_struct *);
+static int set_serial_info (struct dz_serial *, struct serial_struct *);
+static int get_lsr_info (struct dz_serial *, unsigned int *);
+static int dz_ioctl (struct tty_struct *, struct file *, unsigned int, unsigned long);
+static int block_til_ready (struct tty_struct *, struct file *, struct dz_serial *);
+static int dz_open (struct tty_struct *, struct file *);
+
+#ifdef MODULE
+int init_module (void)
+void cleanup_module (void)
+#endif
+
+#endif
+
+#endif /* DZ_SERIAL_H */
diff --git a/drivers/char/sh-sci.c b/drivers/char/sh-sci.c
new file mode 100644 (file)
index 0000000..d3894a6
--- /dev/null
@@ -0,0 +1,1646 @@
+/* $Id: sh-sci.c,v 1.16 2004/02/10 17:04:17 lethal Exp $
+ *
+ *  linux/drivers/char/sh-sci.c
+ *
+ *  SuperH on-chip serial module support.  (SCI with no FIFO / with FIFO)
+ *  Copyright (C) 1999, 2000  Niibe Yutaka
+ *  Copyright (C) 2000  Sugioka Toshinobu
+ *  Modified to support multiple serial ports. Stuart Menefy (May 2000).
+ *  Modified to support SH7760 SCIF. Paul Mundt (Oct 2003).
+ *  Modified to support H8/300 Series. Yoshinori Sato (Feb 2004).
+ *
+ * TTY code is based on sx.c (Specialix SX driver) by:
+ *
+ *   (C) 1998 R.E.Wolff@BitWizard.nl
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/fcntl.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#if defined(CONFIG_SERIAL_CONSOLE) || defined(CONFIG_SH_KGDB_CONSOLE)
+#include <linux/console.h>
+#endif
+#ifdef CONFIG_CPU_FREQ
+#include <linux/notifier.h>
+#include <linux/cpufreq.h>
+#endif
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/bitops.h>
+
+#include <linux/generic_serial.h>
+
+#ifdef CONFIG_SH_STANDARD_BIOS
+#include <asm/sh_bios.h>
+#endif
+
+#include "sh-sci.h"
+
+#ifdef CONFIG_SH_KGDB
+#include <asm/kgdb.h>
+
+int kgdb_sci_setup(void);
+static int kgdb_get_char(struct sci_port *port);
+static void kgdb_put_char(struct sci_port *port, char c);
+static void kgdb_handle_error(struct sci_port *port);
+static struct sci_port *kgdb_sci_port;
+
+#ifdef CONFIG_SH_KGDB_CONSOLE
+static struct console kgdbcons;
+void __init kgdb_console_init(void);
+#endif /* CONFIG_SH_KGDB_CONSOLE */
+
+#endif /* CONFIG_SH_KGDB */
+
+#ifdef CONFIG_SERIAL_CONSOLE
+static struct console sercons;
+static struct sci_port* sercons_port=0;
+static int sercons_baud;
+#ifdef CONFIG_MAGIC_SYSRQ
+#include <linux/sysrq.h>
+static int break_pressed;
+#endif /* CONFIG_MAGIC_SYSRQ */
+#endif /* CONFIG_SERIAL_CONSOLE */
+
+/* Function prototypes */
+static void sci_init_pins_sci(struct sci_port* port, unsigned int cflag);
+#ifndef SCI_ONLY
+static void sci_init_pins_scif(struct sci_port* port, unsigned int cflag);
+#if defined(CONFIG_CPU_SH3)
+static void sci_init_pins_irda(struct sci_port* port, unsigned int cflag);
+#endif
+#endif
+static void sci_disable_tx_interrupts(void *ptr);
+static void sci_enable_tx_interrupts(void *ptr);
+static void sci_disable_rx_interrupts(void *ptr);
+static void sci_enable_rx_interrupts(void *ptr);
+static int  sci_get_CD(void *ptr);
+static void sci_shutdown_port(void *ptr);
+static int sci_set_real_termios(void *ptr);
+static void sci_hungup(void *ptr);
+static void sci_close(void *ptr);
+static int sci_chars_in_buffer(void *ptr);
+static int sci_request_irq(struct sci_port *port);
+static void sci_free_irq(struct sci_port *port);
+static int sci_init_drivers(void);
+
+static struct tty_driver *sci_driver;
+
+static struct sci_port sci_ports[SCI_NPORTS] = SCI_INIT;
+
+static int sci_debug = 0;
+
+#ifdef MODULE
+MODULE_PARM(sci_debug, "i");
+#endif
+
+#define dprintk(x...) do { if (sci_debug) printk(x); } while(0)
+
+#ifdef CONFIG_SERIAL_CONSOLE
+static void put_char(struct sci_port *port, char c)
+{
+       unsigned long flags;
+       unsigned short status;
+
+       local_irq_save(flags);
+
+       do
+               status = sci_in(port, SCxSR);
+       while (!(status & SCxSR_TDxE(port)));
+       
+       sci_out(port, SCxTDR, c);
+       sci_in(port, SCxSR);            /* Dummy read */
+       sci_out(port, SCxSR, SCxSR_TDxE_CLEAR(port));
+
+       local_irq_restore(flags);
+}
+#endif
+
+#if defined(CONFIG_SH_STANDARD_BIOS) || defined(CONFIG_SH_KGDB)
+
+static void handle_error(struct sci_port *port)
+{                              /* Clear error flags */
+       sci_out(port, SCxSR, SCxSR_ERROR_CLEAR(port));
+}
+
+static int get_char(struct sci_port *port)
+{
+       unsigned long flags;
+       unsigned short status;
+       int c;
+
+       local_irq_save(flags);
+        do {
+               status = sci_in(port, SCxSR);
+               if (status & SCxSR_ERRORS(port)) {
+                       handle_error(port);
+                       continue;
+               }
+       } while (!(status & SCxSR_RDxF(port)));
+       c = sci_in(port, SCxRDR);
+       sci_in(port, SCxSR);            /* Dummy read */
+       sci_out(port, SCxSR, SCxSR_RDxF_CLEAR(port));
+       local_irq_restore(flags);
+
+       return c;
+}
+
+/* Taken from sh-stub.c of GDB 4.18 */
+static const char hexchars[] = "0123456789abcdef";
+
+static __inline__ char highhex(int  x)
+{
+       return hexchars[(x >> 4) & 0xf];
+}
+
+static __inline__ char lowhex(int  x)
+{
+       return hexchars[x & 0xf];
+}
+
+#endif /* CONFIG_SH_STANDARD_BIOS || CONFIG_SH_KGDB */
+
+/*
+ * Send the packet in buffer.  The host gets one chance to read it.
+ * This routine does not wait for a positive acknowledge.
+ */
+
+#ifdef CONFIG_SERIAL_CONSOLE
+static void put_string(struct sci_port *port, const char *buffer, int count)
+{
+       int i;
+       const unsigned char *p = buffer;
+
+#if defined(CONFIG_SH_STANDARD_BIOS) || defined(CONFIG_SH_KGDB)
+       int checksum;
+       int usegdb=0;
+
+#ifdef CONFIG_SH_STANDARD_BIOS
+       /* This call only does a trap the first time it is
+        * called, and so is safe to do here unconditionally
+        */
+       usegdb |= sh_bios_in_gdb_mode();
+#endif
+#ifdef CONFIG_SH_KGDB
+       usegdb |= (kgdb_in_gdb_mode && (port == kgdb_sci_port));
+#endif
+
+       if (usegdb) {
+           /*  $<packet info>#<checksum>. */
+           do {
+               unsigned char c;
+               put_char(port, '$');
+               put_char(port, 'O'); /* 'O'utput to console */
+               checksum = 'O';
+
+               for (i=0; i<count; i++) { /* Don't use run length encoding */
+                       int h, l;
+
+                       c = *p++;
+                       h = highhex(c);
+                       l = lowhex(c);
+                       put_char(port, h);
+                       put_char(port, l);
+                       checksum += h + l;
+               }
+               put_char(port, '#');
+               put_char(port, highhex(checksum));
+               put_char(port, lowhex(checksum));
+           } while  (get_char(port) != '+');
+       } else
+#endif /* CONFIG_SH_STANDARD_BIOS || CONFIG_SH_KGDB */
+       for (i=0; i<count; i++) {
+               if (*p == 10)
+                       put_char(port, '\r');
+               put_char(port, *p++);
+       }
+}
+#endif /* CONFIG_SERIAL_CONSOLE */
+
+
+#ifdef CONFIG_SH_KGDB
+
+/* Is the SCI ready, ie is there a char waiting? */
+static int kgdb_is_char_ready(struct sci_port *port)
+{
+        unsigned short status = sci_in(port, SCxSR);
+
+        if (status & (SCxSR_ERRORS(port) | SCxSR_BRK(port)))
+                kgdb_handle_error(port);
+
+        return (status & SCxSR_RDxF(port));
+}
+
+/* Write a char */
+static void kgdb_put_char(struct sci_port *port, char c)
+{
+        unsigned short status;
+
+        do
+                status = sci_in(port, SCxSR);
+        while (!(status & SCxSR_TDxE(port)));
+
+        sci_out(port, SCxTDR, c);
+        sci_in(port, SCxSR);    /* Dummy read */
+        sci_out(port, SCxSR, SCxSR_TDxE_CLEAR(port));
+}
+
+/* Get a char if there is one, else ret -1 */
+static int kgdb_get_char(struct sci_port *port)
+{
+        int c;
+
+        if (kgdb_is_char_ready(port) == 0)
+                c = -1;
+        else {
+                c = sci_in(port, SCxRDR);
+                sci_in(port, SCxSR);    /* Dummy read */
+                sci_out(port, SCxSR, SCxSR_RDxF_CLEAR(port));
+        }
+
+        return c;
+}
+
+/* Called from kgdbstub.c to get a character, i.e. is blocking */
+static int kgdb_sci_getchar(void)
+{
+        volatile int c;
+
+        /* Keep trying to read a character, this could be neater */
+        while ((c = kgdb_get_char(kgdb_sci_port)) < 0);
+
+        return c;
+}
+
+/* Called from kgdbstub.c to put a character, just a wrapper */
+static void kgdb_sci_putchar(int c)
+{
+
+        kgdb_put_char(kgdb_sci_port, c);
+}
+
+/* Clear any errors on the SCI */
+static void kgdb_handle_error(struct sci_port *port)
+{
+        sci_out(port, SCxSR, SCxSR_ERROR_CLEAR(port));  /* Clear error flags */
+}
+
+/* Breakpoint if there's a break sent on the serial port */
+static void kgdb_break_interrupt(int irq, void *ptr, struct pt_regs *regs)
+{
+        struct sci_port *port = ptr;
+        unsigned short status = sci_in(port, SCxSR);
+
+        if (status & SCxSR_BRK(port)) {
+
+                /* Break into the debugger if a break is detected */
+                BREAKPOINT();
+
+                /* Clear */
+                sci_out(port, SCxSR, SCxSR_BREAK_CLEAR(port));
+                return;
+        }
+}
+
+#endif /* CONFIG_SH_KGDB */
+
+static struct real_driver sci_real_driver = {
+       sci_disable_tx_interrupts,
+       sci_enable_tx_interrupts,
+       sci_disable_rx_interrupts,
+       sci_enable_rx_interrupts,
+       sci_get_CD,
+       sci_shutdown_port,
+       sci_set_real_termios,
+       sci_chars_in_buffer,
+        sci_close,
+        sci_hungup,
+       NULL
+};
+
+#if !defined(__H8300H__) && !defined(__H8300S__)
+#if defined(SCI_ONLY) || defined(SCI_AND_SCIF)
+static void sci_init_pins_sci(struct sci_port* port, unsigned int cflag)
+{
+}
+#endif
+
+#if defined(SCIF_ONLY) || defined(SCI_AND_SCIF)
+#if defined(CONFIG_CPU_SH3)
+/* For SH7707, SH7709, SH7709A, SH7729 */
+static void sci_init_pins_scif(struct sci_port* port, unsigned int cflag)
+{
+       unsigned int fcr_val = 0;
+
+       {
+               unsigned short data;
+
+               /* We need to set SCPCR to enable RTS/CTS */
+               data = ctrl_inw(SCPCR);
+               /* Clear out SCP7MD1,0, SCP6MD1,0, SCP4MD1,0*/
+               ctrl_outw(data&0x0cff, SCPCR);
+       }
+       if (cflag & CRTSCTS)
+               fcr_val |= SCFCR_MCE;
+       else {
+               unsigned short data;
+
+               /* We need to set SCPCR to enable RTS/CTS */
+               data = ctrl_inw(SCPCR);
+               /* Clear out SCP7MD1,0, SCP4MD1,0,
+                  Set SCP6MD1,0 = {01} (output)  */
+               ctrl_outw((data&0x0cff)|0x1000, SCPCR);
+
+               data = ctrl_inb(SCPDR);
+               /* Set /RTS2 (bit6) = 0 */
+               ctrl_outb(data&0xbf, SCPDR);
+       }
+       sci_out(port, SCFCR, fcr_val);
+}
+
+static void sci_init_pins_irda(struct sci_port* port, unsigned int cflag)
+{
+       unsigned int fcr_val = 0;
+
+       if (cflag & CRTSCTS)
+               fcr_val |= SCFCR_MCE;
+
+       sci_out(port, SCFCR, fcr_val);
+}
+
+#else
+
+/* For SH7750 */
+static void sci_init_pins_scif(struct sci_port* port, unsigned int cflag)
+{
+       unsigned int fcr_val = 0;
+
+       if (cflag & CRTSCTS) {
+               fcr_val |= SCFCR_MCE;
+       } else {
+               ctrl_outw(0x0080, SCSPTR2); /* Set RTS = 1 */
+       }
+       sci_out(port, SCFCR, fcr_val);
+}
+
+#endif
+#endif /* SCIF_ONLY || SCI_AND_SCIF */
+#else /* !defined(__H8300H__) && !defined(__H8300S__) */
+static void sci_init_pins_sci(struct sci_port* port, unsigned int cflag)
+{
+       int ch = (port->base - SMR0) >> 3;
+       /* set DDR regs */
+       H8300_GPIO_DDR(h8300_sci_pins[ch].port,h8300_sci_pins[ch].rx,H8300_GPIO_INPUT);
+       H8300_GPIO_DDR(h8300_sci_pins[ch].port,h8300_sci_pins[ch].tx,H8300_GPIO_OUTPUT);
+       /* tx mark output*/
+       H8300_SCI_DR(ch) |= h8300_sci_pins[ch].tx;
+}
+
+#if defined(__H8300S__)
+enum {sci_disable,sci_enable};
+
+static void h8300_sci_enable(struct sci_port* port, unsigned int ctrl)
+{
+       volatile unsigned char *mstpcrl=(volatile unsigned char *)MSTPCRL;
+       int ch = (port->base  - SMR0) >> 3;
+       unsigned char mask = 1 << (ch+1);
+       if (ctrl == sci_disable)
+               *mstpcrl |= mask;
+       else
+               *mstpcrl &= ~mask;
+}
+#endif
+#endif
+
+static void sci_setsignals(struct sci_port *port, int dtr, int rts)
+{
+       /* This routine is used for seting signals of: DTR, DCD, CTS/RTS */
+       /* We use SCIF's hardware for CTS/RTS, so don't need any for that. */
+       /* If you have signals for DTR and DCD, please implement here. */
+       ;
+}
+
+static int sci_getsignals(struct sci_port *port)
+{
+       /* This routine is used for geting signals of: DTR, DCD, DSR, RI,
+          and CTS/RTS */
+
+       return TIOCM_DTR|TIOCM_RTS|TIOCM_DSR;
+/*
+       (((o_stat & OP_DTR)?TIOCM_DTR:0) |
+        ((o_stat & OP_RTS)?TIOCM_RTS:0) |
+        ((i_stat & IP_CTS)?TIOCM_CTS:0) |
+        ((i_stat & IP_DCD)?TIOCM_CAR:0) |
+        ((i_stat & IP_DSR)?TIOCM_DSR:0) |
+        ((i_stat & IP_RI) ?TIOCM_RNG:0)
+*/
+}
+
+static void sci_set_baud(struct sci_port *port, int baud)
+{
+       int t;
+
+       switch (baud) {
+       case 0:
+               t = -1;
+               break;
+       case 2400:
+               t = BPS_2400;
+               break;
+       case 4800:
+               t = BPS_4800;
+               break;
+       case 9600:
+               t = BPS_9600;
+               break;
+       case 19200:
+               t = BPS_19200;
+               break;
+       case 38400:
+               t = BPS_38400;
+               break;
+       case 57600:
+               t = BPS_57600;
+               break;
+       default:
+               printk(KERN_INFO "sci: unsupported baud rate: %d, using 115200 instead.\n", baud);
+       case 115200:
+               t = BPS_115200;
+               break;
+       }
+
+       if (t > 0) {
+               sci_setsignals (port, 1, -1);
+               if(t >= 256) {
+                       sci_out(port, SCSMR, (sci_in(port, SCSMR) & ~3) | 1);
+                       t >>= 2;
+               } else {
+                       sci_out(port, SCSMR, sci_in(port, SCSMR) & ~3);
+               }
+               sci_out(port, SCBRR, t);
+               udelay((1000000+(baud-1)) / baud); /* Wait one bit interval */
+       } else {
+               sci_setsignals (port, 0, -1);
+       }
+}
+
+static void sci_set_termios_cflag(struct sci_port *port, int cflag, int baud)
+{
+       unsigned int status;
+       unsigned int smr_val;
+
+       do
+               status = sci_in(port, SCxSR);
+       while (!(status & SCxSR_TEND(port)));
+
+       sci_out(port, SCSCR, 0x00);     /* TE=0, RE=0, CKE1=0 */
+
+#if !defined(SCI_ONLY)
+       if (port->type == PORT_SCIF) {
+               sci_out(port, SCFCR, SCFCR_RFRST | SCFCR_TFRST);
+       }
+#endif
+
+       smr_val = sci_in(port, SCSMR) & 3;
+       if ((cflag & CSIZE) == CS7)
+               smr_val |= 0x40;
+       if (cflag & PARENB)
+               smr_val |= 0x20;
+       if (cflag & PARODD)
+               smr_val |= 0x30;
+       if (cflag & CSTOPB)
+               smr_val |= 0x08;
+       sci_out(port, SCSMR, smr_val);
+       sci_set_baud(port, baud);
+
+       port->init_pins(port, cflag);
+       sci_out(port, SCSCR, SCSCR_INIT(port));
+}
+
+static int sci_set_real_termios(void *ptr)
+{
+       struct sci_port *port = ptr;
+
+       if (port->old_cflag != port->gs.tty->termios->c_cflag) {
+               port->old_cflag = port->gs.tty->termios->c_cflag;
+               sci_set_termios_cflag(port, port->old_cflag, port->gs.baud);
+               sci_enable_rx_interrupts(port);
+       }
+
+       return 0;
+}
+
+/* ********************************************************************** *
+ *                   the interrupt related routines                       *
+ * ********************************************************************** */
+
+/*
+ * This routine is used by the interrupt handler to schedule
+ * processing in the software interrupt portion of the driver.
+ */
+static inline void sci_sched_event(struct sci_port *port, int event)
+{
+       port->event |= 1 << event;
+       schedule_work(&port->tqueue);
+}
+
+static void sci_transmit_chars(struct sci_port *port)
+{
+       int count, i;
+       int txroom;
+       unsigned long flags;
+       unsigned short status;
+       unsigned short ctrl;
+       unsigned char c;
+
+       status = sci_in(port, SCxSR);
+       if (!(status & SCxSR_TDxE(port))) {
+               local_irq_save(flags);
+               ctrl = sci_in(port, SCSCR);
+               if (port->gs.xmit_cnt == 0) {
+                       ctrl &= ~SCI_CTRL_FLAGS_TIE;
+                       port->gs.flags &= ~GS_TX_INTEN;
+               } else
+                       ctrl |= SCI_CTRL_FLAGS_TIE;
+               sci_out(port, SCSCR, ctrl);
+               local_irq_restore(flags);
+               return;
+       }
+
+       while (1) {
+               count = port->gs.xmit_cnt;
+#if !defined(SCI_ONLY)
+               if (port->type == PORT_SCIF) {
+                       txroom = 16 - (sci_in(port, SCFDR)>>8);
+               } else {
+                       txroom = (sci_in(port, SCxSR) & SCI_TDRE)?1:0;
+               }
+#else
+               txroom = (sci_in(port, SCxSR) & SCI_TDRE)?1:0;
+#endif
+               if (count > txroom)
+                       count = txroom;
+
+               /* Don't copy past the end of the source buffer */
+               if (count > SERIAL_XMIT_SIZE - port->gs.xmit_tail)
+                       count = SERIAL_XMIT_SIZE - port->gs.xmit_tail;
+
+               /* If for one reason or another, we can't copy more data, we're done! */
+               if (count == 0)
+                       break;
+
+               for (i=0; i<count; i++) {
+                       c = port->gs.xmit_buf[port->gs.xmit_tail + i];
+                       sci_out(port, SCxTDR, c);
+               }
+               sci_out(port, SCxSR, SCxSR_TDxE_CLEAR(port));
+
+               port->icount.tx += count;
+
+               /* Update the kernel buffer end */
+               port->gs.xmit_tail = (port->gs.xmit_tail + count) & (SERIAL_XMIT_SIZE-1);
+
+               /* This one last. (this is essential)
+                  It would allow others to start putting more data into the buffer! */
+               port->gs.xmit_cnt -= count;
+       }
+
+       if (port->gs.xmit_cnt <= port->gs.wakeup_chars)
+               sci_sched_event(port, SCI_EVENT_WRITE_WAKEUP);
+
+       local_irq_save(flags);
+       ctrl = sci_in(port, SCSCR);
+       if (port->gs.xmit_cnt == 0) {
+               ctrl &= ~SCI_CTRL_FLAGS_TIE;
+               port->gs.flags &= ~GS_TX_INTEN;
+       } else {
+#if !defined(SCI_ONLY)
+               if (port->type == PORT_SCIF) {
+                       sci_in(port, SCxSR); /* Dummy read */
+                       sci_out(port, SCxSR, SCxSR_TDxE_CLEAR(port));
+               }
+#endif
+               ctrl |= SCI_CTRL_FLAGS_TIE;
+       }
+       sci_out(port, SCSCR, ctrl);
+       local_irq_restore(flags);
+}
+
+/* On SH3, SCIF may read end-of-break as a space->mark char */
+#define STEPFN(c)  ({int __c=(c); (((__c-1)|(__c)) == -1); })
+
+static inline void sci_receive_chars(struct sci_port *port,
+                                    struct pt_regs *regs)
+{
+       int i, count;
+       struct tty_struct *tty;
+       int copied=0;
+       unsigned short status;
+
+       status = sci_in(port, SCxSR);
+       if (!(status & SCxSR_RDxF(port)))
+               return;
+
+       tty = port->gs.tty;
+       while (1) {
+#if !defined(SCI_ONLY)
+               if (port->type == PORT_SCIF) {
+                       count = sci_in(port, SCFDR)&0x001f;
+               } else {
+                       count = (sci_in(port, SCxSR)&SCxSR_RDxF(port))?1:0;
+               }
+#else
+               count = (sci_in(port, SCxSR)&SCxSR_RDxF(port))?1:0;
+#endif
+
+               /* Don't copy more bytes than there is room for in the buffer */
+               if (tty->flip.count + count > TTY_FLIPBUF_SIZE)
+                       count = TTY_FLIPBUF_SIZE - tty->flip.count;
+
+               /* If for any reason we can't copy more data, we're done! */
+               if (count == 0)
+                       break;
+
+               if (port->type == PORT_SCI) {
+                       tty->flip.char_buf_ptr[0] = sci_in(port, SCxRDR);
+                       tty->flip.flag_buf_ptr[0] = TTY_NORMAL;
+               } else {
+                       for (i=0; i<count; i++) {
+                               char c = sci_in(port, SCxRDR);
+                               status = sci_in(port, SCxSR);
+#if defined(__SH3__)
+                               /* Skip "chars" during break */
+                               if (port->break_flag) {
+                                       if ((c == 0) &&
+                                           (status & SCxSR_FER(port))) {
+                                               count--; i--;
+                                               continue;
+                                       }
+                                       /* Nonzero => end-of-break */
+                                       dprintk("scif: debounce<%02x>\n", c);
+                                       port->break_flag = 0;
+                                       if (STEPFN(c)) {
+                                               count--; i--;
+                                               continue;
+                                       }
+                               }
+#endif /* __SH3__ */
+#if defined(CONFIG_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+                               if (break_pressed && (port == sercons_port)) {
+                                       if (c != 0 &&
+                                           time_before(jiffies,
+                                                       break_pressed + HZ*5)) {
+                                               handle_sysrq(c, regs, NULL);
+                                               break_pressed = 0;
+                                               count--; i--;
+                                               continue;
+                                       } else if (c != 0) {
+                                               break_pressed = 0;
+                                       }
+                               }
+#endif /* CONFIG_SERIAL_CONSOLE && CONFIG_MAGIC_SYSRQ */
+
+                               /* Store data and status */
+                               tty->flip.char_buf_ptr[i] = c;
+                               if (status&SCxSR_FER(port)) {
+                                       tty->flip.flag_buf_ptr[i] = TTY_FRAME;
+                                       dprintk("sci: frame error\n");
+                               } else if (status&SCxSR_PER(port)) {
+                                       tty->flip.flag_buf_ptr[i] = TTY_PARITY;
+                                       dprintk("sci: parity error\n");
+                               } else {
+                                       tty->flip.flag_buf_ptr[i] = TTY_NORMAL;
+                               }
+                       }
+               }
+
+               sci_in(port, SCxSR); /* dummy read */
+               sci_out(port, SCxSR, SCxSR_RDxF_CLEAR(port));
+
+               /* Update the kernel buffer end */
+               tty->flip.count += count;
+               tty->flip.char_buf_ptr += count;
+               tty->flip.flag_buf_ptr += count;
+
+               copied += count;
+               port->icount.rx += count;
+       }
+
+       if (copied)
+               /* Tell the rest of the system the news. New characters! */
+               tty_flip_buffer_push(tty);
+       else {
+               sci_in(port, SCxSR); /* dummy read */
+               sci_out(port, SCxSR, SCxSR_RDxF_CLEAR(port));
+       }
+}
+
+static inline int sci_handle_errors(struct sci_port *port)
+{
+       int copied = 0;
+       unsigned short status = sci_in(port, SCxSR);
+       struct tty_struct *tty = port->gs.tty;
+
+       if (status&SCxSR_ORER(port) && tty->flip.count<TTY_FLIPBUF_SIZE) {
+               /* overrun error */
+               copied++;
+               *tty->flip.flag_buf_ptr++ = TTY_OVERRUN;
+               dprintk("sci: overrun error\n");
+       }
+
+       if (status&SCxSR_FER(port) && tty->flip.count<TTY_FLIPBUF_SIZE) {
+               if (sci_rxd_in(port) == 0) {
+                       /* Notify of BREAK */
+                       copied++;
+                       *tty->flip.flag_buf_ptr++ = TTY_BREAK;
+                       dprintk("sci: BREAK detected\n");
+               }
+               else {
+                       /* frame error */
+                       copied++;
+                       *tty->flip.flag_buf_ptr++ = TTY_FRAME;
+                       dprintk("sci: frame error\n");
+               }
+       }
+
+       if (status&SCxSR_PER(port) && tty->flip.count<TTY_FLIPBUF_SIZE) {
+               /* parity error */
+               copied++;
+               *tty->flip.flag_buf_ptr++ = TTY_PARITY;
+               dprintk("sci: parity error\n");
+       }
+
+       if (copied) {
+               tty->flip.count += copied;
+               tty_flip_buffer_push(tty);
+       }
+
+       return copied;
+}
+
+static inline int sci_handle_breaks(struct sci_port *port)
+{
+       int copied = 0;
+       unsigned short status = sci_in(port, SCxSR);
+       struct tty_struct *tty = port->gs.tty;
+
+       if (status&SCxSR_BRK(port) && tty->flip.count<TTY_FLIPBUF_SIZE) {
+#if defined(__SH3__)
+               /* Debounce break */
+               if (port->break_flag)
+                       goto break_continue;
+               port->break_flag = 1;
+#endif
+#if defined(CONFIG_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+               if (port == sercons_port) {
+                       if (break_pressed == 0) {
+                               break_pressed = jiffies;
+                               dprintk("sci: implied sysrq\n");
+                               goto break_continue;
+                       }
+                       /* Double break implies a real break */
+                       break_pressed = 0;
+               }
+#endif
+               /* Notify of BREAK */
+               copied++;
+               *tty->flip.flag_buf_ptr++ = TTY_BREAK;
+               dprintk("sci: BREAK detected\n");
+       }
+ break_continue:
+
+#if defined(CONFIG_CPU_SUBTYPE_SH7750) || defined(CONFIG_CPU_SUBTYPE_ST40STB1) || \
+    defined(CONFIG_CPU_SUBTYPE_SH7760)
+       /* XXX: Handle SCIF overrun error */
+       if (port->type == PORT_SCIF && (sci_in(port, SCLSR) & SCIF_ORER) != 0) {
+               sci_out(port, SCLSR, 0);
+               if(tty->flip.count<TTY_FLIPBUF_SIZE) {
+                       copied++;
+                       *tty->flip.flag_buf_ptr++ = TTY_OVERRUN;
+                       dprintk("sci: overrun error\n");
+               }
+       }
+#endif
+
+       if (copied) {
+               tty->flip.count += copied;
+               tty_flip_buffer_push(tty);
+       }
+
+       return copied;
+}
+
+static irqreturn_t sci_rx_interrupt(int irq, void *ptr, struct pt_regs *regs)
+{
+       struct sci_port *port = ptr;
+
+       if (port->gs.flags & GS_ACTIVE)
+               if (!(port->gs.flags & SCI_RX_THROTTLE)) {
+                       sci_receive_chars(port, regs);
+                       return IRQ_HANDLED;
+
+               }
+       sci_disable_rx_interrupts(port);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t sci_tx_interrupt(int irq, void *ptr, struct pt_regs *regs)
+{
+       struct sci_port *port = ptr;
+
+       if (port->gs.flags & GS_ACTIVE)
+               sci_transmit_chars(port);
+       else {
+               sci_disable_tx_interrupts(port);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t sci_er_interrupt(int irq, void *ptr, struct pt_regs *regs)
+{
+       struct sci_port *port = ptr;
+
+       /* Handle errors */
+       if (port->type == PORT_SCI) {
+               if(sci_handle_errors(port)) {
+                       /* discard character in rx buffer */
+                       sci_in(port, SCxSR);
+                       sci_out(port, SCxSR, SCxSR_RDxF_CLEAR(port));
+               }
+       }
+       else
+               sci_rx_interrupt(irq, ptr, regs);
+               
+       sci_out(port, SCxSR, SCxSR_ERROR_CLEAR(port));
+
+       /* Kick the transmission */
+       sci_tx_interrupt(irq, ptr, regs);
+
+       return IRQ_HANDLED;
+}
+
+#if !defined(SCI_ONLY)
+static irqreturn_t sci_br_interrupt(int irq, void *ptr, struct pt_regs *regs)
+{
+       struct sci_port *port = ptr;
+
+       /* Handle BREAKs */
+       sci_handle_breaks(port);
+       sci_out(port, SCxSR, SCxSR_BREAK_CLEAR(port));
+
+       return IRQ_HANDLED;
+}
+#endif
+
+static void do_softint(void *private_)
+{
+       struct sci_port *port = (struct sci_port *) private_;
+       struct tty_struct       *tty;
+       
+       tty = port->gs.tty;
+       if (!tty)
+               return;
+
+       if (test_and_clear_bit(SCI_EVENT_WRITE_WAKEUP, &port->event)) {
+               if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
+                   tty->ldisc.write_wakeup)
+                       (tty->ldisc.write_wakeup)(tty);
+               wake_up_interruptible(&tty->write_wait);
+       }
+}
+
+/* ********************************************************************** *
+ *                Here are the routines that actually                     *
+ *              interface with the generic_serial driver                  *
+ * ********************************************************************** */
+
+static void sci_disable_tx_interrupts(void *ptr)
+{
+       struct sci_port *port = ptr;
+       unsigned long flags;
+       unsigned short ctrl;
+
+       /* Clear TIE (Transmit Interrupt Enable) bit in SCSCR */
+       local_irq_save(flags);
+       ctrl = sci_in(port, SCSCR);
+       ctrl &= ~SCI_CTRL_FLAGS_TIE;
+       sci_out(port, SCSCR, ctrl);
+       local_irq_restore(flags);
+}
+
+static void sci_enable_tx_interrupts(void *ptr)
+{
+       struct sci_port *port = ptr; 
+
+       disable_irq(port->irqs[SCIx_TXI_IRQ]);
+       sci_transmit_chars(port);
+       enable_irq(port->irqs[SCIx_TXI_IRQ]);
+}
+
+static void sci_disable_rx_interrupts(void * ptr)
+{
+       struct sci_port *port = ptr;
+       unsigned long flags;
+       unsigned short ctrl;
+
+       /* Clear RIE (Receive Interrupt Enable) bit in SCSCR */
+       local_irq_save(flags);
+       ctrl = sci_in(port, SCSCR);
+       ctrl &= ~SCI_CTRL_FLAGS_RIE;
+       sci_out(port, SCSCR, ctrl);
+       local_irq_restore(flags);
+}
+
+static void sci_enable_rx_interrupts(void * ptr)
+{
+       struct sci_port *port = ptr;
+       unsigned long flags;
+       unsigned short ctrl;
+
+       /* Set RIE (Receive Interrupt Enable) bit in SCSCR */
+       local_irq_save(flags);
+       ctrl = sci_in(port, SCSCR);
+       ctrl |= SCI_CTRL_FLAGS_RIE;
+       sci_out(port, SCSCR, ctrl);
+       local_irq_restore(flags);
+}
+
+static int sci_get_CD(void * ptr)
+{
+       /* If you have signal for CD (Carrier Detect), please change here. */
+       return 1;
+}
+
+static int sci_chars_in_buffer(void * ptr)
+{
+       struct sci_port *port = ptr;
+
+#if !defined(SCI_ONLY)
+       if (port->type == PORT_SCIF) {
+               return (sci_in(port, SCFDR) >> 8) + ((sci_in(port, SCxSR) & SCxSR_TEND(port))? 0: 1);
+       } else {
+               return (sci_in(port, SCxSR) & SCxSR_TEND(port))? 0: 1;
+       }
+#else
+       return (sci_in(port, SCxSR) & SCxSR_TEND(port))? 0: 1;
+#endif
+}
+
+static void sci_shutdown_port(void * ptr)
+{
+       struct sci_port *port = ptr; 
+
+       port->gs.flags &= ~ GS_ACTIVE;
+       if (port->gs.tty && port->gs.tty->termios->c_cflag & HUPCL)
+               sci_setsignals(port, 0, 0);
+       sci_free_irq(port);
+#if defined(__H8300S__)
+       h8300_sci_enable(port,sci_disable);
+#endif
+}
+
+/* ********************************************************************** *
+ *                Here are the routines that actually                     *
+ *               interface with the rest of the system                    *
+ * ********************************************************************** */
+
+static int sci_open(struct tty_struct * tty, struct file * filp)
+{
+       struct sci_port *port;
+       int retval, line;
+
+       line = tty->index;
+
+       if ((line < 0) || (line >= SCI_NPORTS))
+               return -ENODEV;
+
+       port = &sci_ports[line];
+
+       tty->driver_data = port;
+       port->gs.tty = tty;
+       port->gs.count++;
+
+       port->event = 0;
+       INIT_WORK(&port->tqueue, do_softint, port);
+
+#if defined(__H8300S__)
+               h8300_sci_enable(port,sci_enable);
+#endif
+
+       /*
+        * Start up serial port
+        */
+       retval = gs_init_port(&port->gs);
+       if (retval) {
+               goto failed_1;
+       }
+
+       port->gs.flags |= GS_ACTIVE;
+       sci_setsignals(port, 1,1);
+
+       if (port->gs.count == 1) {
+               retval = sci_request_irq(port);
+       }
+
+       retval = gs_block_til_ready(port, filp);
+
+       if (retval) {
+               goto failed_3;
+       }
+
+#ifdef CONFIG_SERIAL_CONSOLE
+       if (sercons.cflag && sercons.index == line) {
+               tty->termios->c_cflag = sercons.cflag;
+               port->gs.baud = sercons_baud;
+               sercons.cflag = 0;
+               sci_set_real_termios(port);
+       }
+#endif
+
+#ifdef CONFIG_SH_KGDB_CONSOLE
+        if (kgdbcons.cflag && kgdbcons.index == line) {
+                tty->termios->c_cflag = kgdbcons.cflag;
+                port->gs.baud = kgdb_baud;
+                sercons.cflag = 0;
+                sci_set_real_termios(port);
+        }
+#endif
+
+       sci_enable_rx_interrupts(port);
+
+       return 0;
+
+failed_3:
+       sci_free_irq(port);
+failed_1:
+       port->gs.count--;
+       return retval;
+}
+
+static void sci_hungup(void *ptr)
+{
+        return;
+}
+
+static void sci_close(void *ptr)
+{
+        return;
+}
+
+static int sci_tiocmget(struct tty_struct *tty, struct file *file)
+{
+       struct sci_port *port = tty->driver_data;
+       return sci_getsignals(port);
+}
+
+static int sci_tiocmset(struct tty_struct *tty, struct file *file,
+                       unsigned int set, unsigned int clear)
+{
+       struct sci_port *port = tty->driver_data;
+       int rts = -1, dtr = -1;
+
+       if (set & TIOCM_RTS)
+               rts = 1;
+       if (set & TIOCM_DTR)
+               dtr = 1;
+       if (clear & TIOCM_RTS)
+               rts = 0;
+       if (clear & TIOCM_DTR)
+               dtr = 0;
+
+       sci_setsignals(port, dtr, rts);
+       return 0;
+}
+
+static int sci_ioctl(struct tty_struct * tty, struct file * filp, 
+                     unsigned int cmd, unsigned long arg)
+{
+       int rc;
+       struct sci_port *port = tty->driver_data;
+       int ival;
+
+       rc = 0;
+       switch (cmd) {
+       case TIOCGSOFTCAR:
+               rc = put_user(((tty->termios->c_cflag & CLOCAL) ? 1 : 0),
+                             (unsigned int __user *) arg);
+               break;
+       case TIOCSSOFTCAR:
+               if ((rc = get_user(ival, (unsigned int __user *) arg)) == 0)
+                       tty->termios->c_cflag =
+                               (tty->termios->c_cflag & ~CLOCAL) |
+                               (ival ? CLOCAL : 0);
+               break;
+       case TIOCGSERIAL:
+               if ((rc = verify_area(VERIFY_WRITE, (void __user *) arg,
+                                     sizeof(struct serial_struct))) == 0)
+                       rc = gs_getserial(&port->gs, (struct serial_struct *) arg);
+               break;
+       case TIOCSSERIAL:
+               if ((rc = verify_area(VERIFY_READ, (void __user *) arg,
+                                     sizeof(struct serial_struct))) == 0)
+                       rc = gs_setserial(&port->gs,
+                                         (struct serial_struct *) arg);
+               break;
+       default:
+               rc = -ENOIOCTLCMD;
+               break;
+       }
+
+       return rc;
+}
+
+static void sci_throttle(struct tty_struct * tty)
+{
+       struct sci_port *port = (struct sci_port *)tty->driver_data;
+
+       /* If the port is using any type of input flow
+        * control then throttle the port.
+        */
+       if ((tty->termios->c_cflag & CRTSCTS) || (I_IXOFF(tty)) )
+               port->gs.flags |= SCI_RX_THROTTLE;
+}
+
+static void sci_unthrottle(struct tty_struct * tty)
+{
+       struct sci_port *port = (struct sci_port *)tty->driver_data;
+
+       /* Always unthrottle even if flow control is not enabled on
+        * this port in case we disabled flow control while the port
+        * was throttled
+        */
+       port->gs.flags &= ~SCI_RX_THROTTLE;
+       sci_enable_rx_interrupts(port);
+       return;
+}
+
+#ifdef CONFIG_PROC_FS
+static int sci_read_proc(char *page, char **start, off_t off, int count,
+                        int *eof, void *data)
+{
+       int i;
+       struct sci_port *port;
+       int len = 0;
+       
+        len += sprintf(page, "sciinfo:0.1\n");
+       for (i = 0; i < SCI_NPORTS && len < 4000; i++) {
+               port = &sci_ports[i];
+               len += sprintf(page+len, "%d: uart:%s address: %08x", i,
+                              (port->type == PORT_SCI) ? "SCI" : "SCIF",
+                              port->base);
+               len += sprintf(page+len, " baud:%d", port->gs.baud);
+               len += sprintf(page+len, " tx:%d rx:%d",
+                              port->icount.tx, port->icount.rx);
+
+               if (port->icount.frame)
+                       len += sprintf(page+len, " fe:%d", port->icount.frame);
+               if (port->icount.parity)
+                       len += sprintf(page+len, " pe:%d", port->icount.parity);
+               if (port->icount.brk)
+                       len += sprintf(page+len, " brk:%d", port->icount.brk);
+               if (port->icount.overrun)
+                       len += sprintf(page+len, " oe:%d", port->icount.overrun);
+               len += sprintf(page+len, "\n");
+       }
+       return len;
+}
+#endif
+
+#ifdef CONFIG_CPU_FREQ
+/*
+ * Here we define a transistion notifier so that we can update all of our
+ * ports' baud rate when the peripheral clock changes.
+ */
+
+static int sci_notifier(struct notifier_block *self, unsigned long phase, void *p)
+{
+       struct cpufreq_freqs *freqs = p;
+       int i;
+
+       if (phase == CPUFREQ_POSTCHANGE) {
+               for (i = 0; i < SCI_NPORTS; i++) {
+                       /*
+                        * This will force a baud rate change in hardware.
+                        */
+                       if (sci_ports[i].gs.tty != NULL) {
+                               sci_set_baud(&sci_ports[i], sci_ports[i].gs.baud);
+                       }
+               }
+               printk("%s: got a postchange notification for cpu %d (old %d, new %d)\n",
+                               __FUNCTION__, freqs->cpu, freqs->old, freqs->new);
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block sci_nb = { &sci_notifier, NULL, 0 };
+#endif /* CONFIG_CPU_FREQ */
+
+static struct tty_operations sci_ops = {
+       .open   = sci_open,
+       .close = gs_close,
+       .write = gs_write,
+       .put_char = gs_put_char,
+       .flush_chars = gs_flush_chars,
+       .write_room = gs_write_room,
+       .chars_in_buffer = gs_chars_in_buffer,
+       .flush_buffer = gs_flush_buffer,
+       .ioctl = sci_ioctl,
+       .throttle = sci_throttle,
+       .unthrottle = sci_unthrottle,
+       .set_termios = gs_set_termios,
+       .stop = gs_stop,
+       .start = gs_start,
+       .hangup = gs_hangup,
+#ifdef CONFIG_PROC_FS
+       .read_proc = sci_read_proc,
+#endif
+       .tiocmget = sci_tiocmget,
+       .tiocmset = sci_tiocmset,
+};
+
+/* ********************************************************************** *
+ *                    Here are the initialization routines.               *
+ * ********************************************************************** */
+
+static int sci_init_drivers(void)
+{
+       int error;
+       struct sci_port *port;
+       sci_driver = alloc_tty_driver(SCI_NPORTS);
+       if (!sci_driver)
+               return -ENOMEM;
+
+       sci_driver->owner = THIS_MODULE;
+       sci_driver->driver_name = "sci";
+       sci_driver->name = "ttySC";
+       sci_driver->devfs_name = "ttsc/";
+       sci_driver->major = SCI_MAJOR;
+       sci_driver->minor_start = SCI_MINOR_START;
+       sci_driver->type = TTY_DRIVER_TYPE_SERIAL;
+       sci_driver->subtype = SERIAL_TYPE_NORMAL;
+       sci_driver->init_termios = tty_std_termios;
+       sci_driver->init_termios.c_cflag =
+               B9600 | CS8 | CREAD | HUPCL | CLOCAL | CRTSCTS;
+       sci_driver->flags = TTY_DRIVER_REAL_RAW;
+       tty_set_operations(sci_driver, &sci_ops);
+       if ((error = tty_register_driver(sci_driver))) {
+               printk(KERN_ERR "sci: Couldn't register SCI driver, error = %d\n",
+                      error);
+               put_tty_driver(sci_driver);
+               return 1;
+       }
+
+       for (port = &sci_ports[0]; port < &sci_ports[SCI_NPORTS]; port++) {
+               port->gs.magic = SCI_MAGIC;
+               port->gs.close_delay = HZ/2;
+               port->gs.closing_wait = 30 * HZ;
+               port->gs.rd = &sci_real_driver;
+               init_waitqueue_head(&port->gs.open_wait);
+               init_waitqueue_head(&port->gs.close_wait);
+               port->old_cflag = 0;
+               port->icount.cts = port->icount.dsr = 
+                       port->icount.rng = port->icount.dcd = 0;
+               port->icount.rx = port->icount.tx = 0;
+               port->icount.frame = port->icount.parity = 0;
+               port->icount.overrun = port->icount.brk = 0;
+       }
+
+#ifdef CONFIG_CPU_FREQ
+       /* Setup transition notifier */
+       if (cpufreq_register_notifier(&sci_nb, CPUFREQ_TRANSITION_NOTIFIER) < 0) {
+               printk(KERN_ERR "sci: Unable to register CPU frequency notifier\n");
+               return 1;
+       }
+       printk("sci: CPU frequency notifier registered\n");
+#endif
+       return 0;
+}
+
+static int sci_request_irq(struct sci_port *port)
+{
+       int i;
+#if !defined(SCI_ONLY)
+       irqreturn_t (*handlers[4])(int irq, void *p, struct pt_regs *regs) = {
+               sci_er_interrupt, sci_rx_interrupt, sci_tx_interrupt,
+               sci_br_interrupt,
+       };
+#else
+       void (*handlers[3])(int irq, void *ptr, struct pt_regs *regs) = {
+               sci_er_interrupt, sci_rx_interrupt, sci_tx_interrupt,
+       };
+#endif
+       for (i=0; i<(sizeof(handlers)/sizeof(handlers[0])); i++) {
+               if (!port->irqs[i]) continue;
+               if (request_irq(port->irqs[i], handlers[i], SA_INTERRUPT,
+                               "sci", port)) {
+                       printk(KERN_ERR "sci: Cannot allocate irq.\n");
+                       return -ENODEV;
+               }
+       }
+       return 0;
+}
+
+static void sci_free_irq(struct sci_port *port)
+{
+       int i;
+
+       for (i=0; i<4; i++) {
+               if (!port->irqs[i]) continue;
+               free_irq(port->irqs[i], port);
+       }
+}
+
+static char banner[] __initdata =
+       KERN_INFO "SuperH SCI(F) driver initialized\n";
+
+int __init sci_init(void)
+{
+       struct sci_port *port;
+       int j;
+
+       printk("%s", banner);
+
+       for (j=0; j<SCI_NPORTS; j++) {
+               port = &sci_ports[j];
+               printk(KERN_INFO "ttySC%d at 0x%08x is a %s\n", j, port->base,
+                      (port->type == PORT_SCI) ? "SCI" : "SCIF");
+       }
+
+       sci_init_drivers();
+
+#ifdef CONFIG_SH_STANDARD_BIOS
+       sh_bios_gdb_detach();
+#endif
+       return 0;               /* Return -EIO when not detected */
+}
+
+module_init(sci_init);
+
+#ifdef MODULE
+#undef func_enter
+#undef func_exit
+
+void cleanup_module(void)
+{
+       tty_unregister_driver(sci_driver);
+       put_tty_driver(sci_driver);
+}
+
+#include "generic_serial.c"
+#endif
+
+#ifdef CONFIG_SERIAL_CONSOLE
+/*
+ *     Print a string to the serial port trying not to disturb
+ *     any possible real use of the port...
+ */
+static void serial_console_write(struct console *co, const char *s,
+                                unsigned count)
+{
+       put_string(sercons_port, s, count);
+}
+
+static struct tty_driver *serial_console_device(struct console *c, int *index)
+{
+       *index = c->index;
+       return sci_driver;
+}
+
+/*
+ *     Setup initial baud/bits/parity. We do two things here:
+ *     - construct a cflag setting for the first rs_open()
+ *     - initialize the serial port
+ *     Return non-zero if we didn't find a serial port.
+ */
+static int __init serial_console_setup(struct console *co, char *options)
+{
+       int     baud = 9600;
+       int     bits = 8;
+       int     parity = 'n';
+       int     cflag = CREAD | HUPCL | CLOCAL;
+       char    *s;
+
+       sercons_port = &sci_ports[co->index];
+
+       if (options) {
+               baud = simple_strtoul(options, NULL, 10);
+               s = options;
+               while(*s >= '0' && *s <= '9')
+                       s++;
+               if (*s) parity = *s++;
+               if (*s) bits   = *s - '0';
+       }
+
+       /*
+        *      Now construct a cflag setting.
+        */
+       switch (baud) {
+               case 19200:
+                       cflag |= B19200;
+                       break;
+               case 38400:
+                       cflag |= B38400;
+                       break;
+               case 57600:
+                       cflag |= B57600;
+                       break;
+               case 115200:
+                       cflag |= B115200;
+                       break;
+               case 9600:
+               default:
+                       cflag |= B9600;
+                       baud = 9600;
+                       break;
+       }
+       switch (bits) {
+               case 7:
+                       cflag |= CS7;
+                       break;
+               default:
+               case 8:
+                       cflag |= CS8;
+                       break;
+       }
+       switch (parity) {
+               case 'o': case 'O':
+                       cflag |= PARODD;
+                       break;
+               case 'e': case 'E':
+                       cflag |= PARENB;
+                       break;
+       }
+
+       co->cflag = cflag;
+       sercons_baud = baud;
+
+#if defined(__H8300S__)
+       h8300_sci_enable(sercons_port,sci_enable);
+#endif
+       sci_set_termios_cflag(sercons_port, cflag, baud);
+       sercons_port->old_cflag = cflag;
+
+       return 0;
+}
+
+static struct console sercons = {
+       .name           = "ttySC",
+       .write          = serial_console_write,
+       .device         = serial_console_device,
+       .setup          = serial_console_setup,
+       .flags          = CON_PRINTBUFFER,
+       .index          = -1,
+};
+
+/*
+ *     Register console.
+ */
+
+#ifdef CONFIG_SH_EARLY_PRINTK
+extern void sh_console_unregister (void);
+#endif
+
+static int __init sci_console_init(void)
+{
+       register_console(&sercons);
+#ifdef CONFIG_SH_EARLY_PRINTK
+       /* Now that the real console is available, unregister the one we
+        * used while first booting.
+        */
+       sh_console_unregister();
+#endif
+       return 0;
+}
+console_initcall(sci_console_init);
+
+#endif /* CONFIG_SERIAL_CONSOLE */
+
+
+#ifdef CONFIG_SH_KGDB
+
+/* Initialise the KGDB serial port */
+int kgdb_sci_setup(void)
+{
+       int cflag = CREAD | HUPCL | CLOCAL;
+
+       if ((kgdb_portnum < 0) || (kgdb_portnum >= SCI_NPORTS))
+               return -1;
+
+        kgdb_sci_port = &sci_ports[kgdb_portnum];
+
+       switch (kgdb_baud) {
+        case 115200:
+                cflag |= B115200;
+                break;
+       case 57600:
+                cflag |= B57600;
+                break;
+        case 38400:
+                cflag |= B38400;
+                break;
+        case 19200:
+                cflag |= B19200;
+                break;
+        case 9600:
+        default:
+                cflag |= B9600;
+                kgdb_baud = 9600;
+                break;
+        }
+
+       switch (kgdb_bits) {
+        case '7':
+                cflag |= CS7;
+                break;
+        default:
+        case '8':
+                cflag |= CS8;
+                break;
+        }
+
+        switch (kgdb_parity) {
+        case 'O':
+                cflag |= PARODD;
+                break;
+        case 'E':
+                cflag |= PARENB;
+                break;
+        }
+
+        kgdb_cflag = cflag;
+        sci_set_termios_cflag(kgdb_sci_port, kgdb_cflag, kgdb_baud);
+
+        /* Set up the interrupt for BREAK from GDB */
+       /* Commented out for now since it may not be possible yet...
+          request_irq(kgdb_sci_port->irqs[0], kgdb_break_interrupt,
+                      SA_INTERRUPT, "sci", kgdb_sci_port);
+          sci_enable_rx_interrupts(kgdb_sci_port);
+       */
+
+       /* Setup complete: initialize function pointers */
+       kgdb_getchar = kgdb_sci_getchar;
+       kgdb_putchar = kgdb_sci_putchar;
+
+        return 0;
+}
+
+#ifdef CONFIG_SH_KGDB_CONSOLE
+
+/* Create a console device */
+static kdev_t kgdb_console_device(struct console *c)
+{
+        return MKDEV(SCI_MAJOR, SCI_MINOR_START + c->index);
+}
+
+/* Set up the KGDB console */
+static int __init kgdb_console_setup(struct console *co, char *options)
+{
+        /* NB we ignore 'options' because we've already done the setup */
+        co->cflag = kgdb_cflag;
+
+        return 0;
+}
+
+/* Register the KGDB console so we get messages (d'oh!) */
+void __init kgdb_console_init(void)
+{
+        register_console(&kgdbcons);
+}
+
+/* The console structure for KGDB */
+static struct console kgdbcons = {
+        name:"ttySC",
+        write:kgdb_console_write,
+        device:kgdb_console_device,
+        wait_key:serial_console_wait_key,
+        setup:kgdb_console_setup,
+        flags:CON_PRINTBUFFER | CON_ENABLED,
+        index:-1,
+};
+
+#endif /* CONFIG_SH_KGDB_CONSOLE */
+
+#endif /* CONFIG_SH_KGDB */
diff --git a/drivers/char/sh-sci.h b/drivers/char/sh-sci.h
new file mode 100644 (file)
index 0000000..5d07cd1
--- /dev/null
@@ -0,0 +1,478 @@
+/* $Id: sh-sci.h,v 1.7 2004/02/10 17:04:17 lethal Exp $
+ *
+ *  linux/drivers/char/sh-sci.h
+ *
+ *  SuperH on-chip serial module support.  (SCI with no FIFO / with FIFO)
+ *  Copyright (C) 1999, 2000  Niibe Yutaka
+ *  Copyright (C) 2000  Greg Banks
+ *  Modified to support multiple serial ports. Stuart Menefy (May 2000).
+ *  Modified to support SH7760 SCIF. Paul Mundt (Oct 2003).
+ *  Modified to support H8/300 Serise Yoshinori Sato (Feb 2004). 
+ *
+ */
+#include <linux/config.h>
+
+#if defined(__H8300H__) || defined(__H8300S__)
+#include <asm/gpio.h>
+#if defined(CONFIG_H83007) || defined(CONFIG_H83068)
+#include <asm/regs306x.h>
+#endif
+#if defined(CONFIG_H8S2678)
+#include <asm/regs267x.h>
+#endif
+#endif
+
+/* Values for sci_port->type */
+#define PORT_SCI  0
+#define PORT_SCIF 1
+#define PORT_IRDA 1            /* XXX: temporary assignment */
+
+/* Offsets into the sci_port->irqs array */
+#define SCIx_ERI_IRQ 0
+#define SCIx_RXI_IRQ 1
+#define SCIx_TXI_IRQ 2
+
+/*                     ERI, RXI, TXI, BRI */
+#define SCI_IRQS      { 23,  24,  25,   0 }
+#define SH3_SCIF_IRQS { 56,  57,  59,  58 }
+#define SH3_IRDA_IRQS { 52,  53,  55,  54 }
+#define SH4_SCIF_IRQS { 40,  41,  43,  42 }
+#define STB1_SCIF1_IRQS {23, 24,  26,  25 }
+#define SH7760_SCIF0_IRQS { 52, 53, 55, 54 }
+#define SH7760_SCIF1_IRQS { 72, 73, 75, 74 }
+#define SH7760_SCIF2_IRQS { 76, 77, 79, 78 }
+#define H8300H_SCI_IRQS0 {52, 53, 54,   0 }
+#define H8300H_SCI_IRQS1 {56, 57, 58,   0 }
+#define H8300H_SCI_IRQS2 {60, 61, 62,   0 }
+#define H8S_SCI_IRQS0 {88, 89, 90,   0 }
+#define H8S_SCI_IRQS1 {92, 93, 94,   0 }
+#define H8S_SCI_IRQS2 {96, 97, 98,   0 }
+
+#if defined(CONFIG_CPU_SUBTYPE_SH7708)
+# define SCI_NPORTS 1
+# define SCI_INIT { \
+  { {}, PORT_SCI,  0xfffffe80, SCI_IRQS,      sci_init_pins_sci  } \
+}
+# define SCSPTR 0xffffff7c /* 8 bit */
+# define SCSCR_INIT(port)          0x30 /* TIE=0,RIE=0,TE=1,RE=1 */
+# define SCI_ONLY
+#elif defined(CONFIG_CPU_SUBTYPE_SH7707) || defined(CONFIG_CPU_SUBTYPE_SH7709)
+# define SCI_NPORTS 3
+# define SCI_INIT { \
+  { {}, PORT_SCI,  0xfffffe80, SCI_IRQS,      sci_init_pins_sci  }, \
+  { {}, PORT_SCIF, 0xA4000150, SH3_SCIF_IRQS, sci_init_pins_scif }, \
+  { {}, PORT_SCIF, 0xA4000140, SH3_IRDA_IRQS, sci_init_pins_irda }  \
+}
+# define SCPCR  0xA4000116 /* 16 bit SCI and SCIF */
+# define SCPDR  0xA4000136 /* 8  bit SCI and SCIF */
+# define SCSCR_INIT(port)          0x30 /* TIE=0,RIE=0,TE=1,RE=1 */
+# define SCI_AND_SCIF
+#elif defined(CONFIG_CPU_SUBTYPE_SH7750) || defined(CONFIG_CPU_SUBTYPE_SH7751)
+# define SCI_NPORTS 2
+# define SCI_INIT { \
+  { {}, PORT_SCI,  0xffe00000, SCI_IRQS,      sci_init_pins_sci  }, \
+  { {}, PORT_SCIF, 0xFFE80000, SH4_SCIF_IRQS, sci_init_pins_scif }  \
+}
+# define SCSPTR1 0xffe0001c /* 8  bit SCI */
+# define SCSPTR2 0xFFE80020 /* 16 bit SCIF */
+# define SCIF_ORER 0x0001   /* overrun error bit */
+# define SCSCR_INIT(port) (((port)->type == PORT_SCI) ? \
+       0x30 /* TIE=0,RIE=0,TE=1,RE=1 */ : \
+       0x38 /* TIE=0,RIE=0,TE=1,RE=1,REIE=1 */ )
+# define SCI_AND_SCIF
+#elif defined(CONFIG_CPU_SUBTYPE_SH7760)
+# define SCI_NPORTS 3
+# define SCI_INIT { \
+  { {}, PORT_SCIF, 0xfe600000, SH7760_SCIF0_IRQS, sci_init_pins_scif }, \
+  { {}, PORT_SCIF, 0xfe610000, SH7760_SCIF1_IRQS, sci_init_pins_scif }, \
+  { {}, PORT_SCIF, 0xfe620000, SH7760_SCIF2_IRQS, sci_init_pins_scif }  \
+}
+# define SCSPTR0 0xfe600024 /* 16 bit SCIF */
+# define SCSPTR1 0xfe610024 /* 16 bit SCIF */
+# define SCSPTR2 0xfe620024 /* 16 bit SCIF */
+# define SCIF_ORDER 0x0001  /* overrun error bit */
+# define SCSCR_INIT(port)          0x38 /* TIE=0,RIE=0,TE=1,RE=1,REIE=1 */
+# define SCIF_ONLY
+#elif defined(CONFIG_CPU_SUBTYPE_ST40STB1)
+# define SCI_NPORTS 2
+# define SCI_INIT { \
+  { {}, PORT_SCIF, 0xffe00000, STB1_SCIF1_IRQS, sci_init_pins_scif }, \
+  { {}, PORT_SCIF, 0xffe80000, SH4_SCIF_IRQS,   sci_init_pins_scif }  \
+}
+# define SCSPTR1 0xffe00020 /* 16 bit SCIF */
+# define SCSPTR2 0xffe80020 /* 16 bit SCIF */
+# define SCIF_ORER 0x0001   /* overrun error bit */
+# define SCSCR_INIT(port)          0x38 /* TIE=0,RIE=0,TE=1,RE=1,REIE=1 */
+# define SCIF_ONLY
+#elif defined(CONFIG_H83007) || defined(CONFIG_H83068)
+# define SCI_NPORTS 3
+# define SCI_INIT { \
+  { {}, PORT_SCI,  0x00ffffb0, H8300H_SCI_IRQS0, sci_init_pins_sci }, \
+  { {}, PORT_SCI,  0x00ffffb8, H8300H_SCI_IRQS1, sci_init_pins_sci }, \
+  { {}, PORT_SCI,  0x00ffffc0, H8300H_SCI_IRQS2, sci_init_pins_sci }  \
+}
+# define SCSCR_INIT(port)          0x30 /* TIE=0,RIE=0,TE=1,RE=1 */
+# define SCI_ONLY
+# define H8300_SCI_DR(ch) *(volatile char *)(P1DR + h8300_sci_pins[ch].port)
+#elif defined(CONFIG_H8S2678)
+# define SCI_NPORTS 3
+# define SCI_INIT { \
+  { {}, PORT_SCI,  0x00ffff78, H8S_SCI_IRQS0, sci_init_pins_sci }, \
+  { {}, PORT_SCI,  0x00ffff80, H8S_SCI_IRQS1, sci_init_pins_sci }, \
+  { {}, PORT_SCI,  0x00ffff88, H8S_SCI_IRQS2, sci_init_pins_sci }  \
+}
+# define SCSCR_INIT(port)          0x30 /* TIE=0,RIE=0,TE=1,RE=1 */
+# define SCI_ONLY
+# define H8300_SCI_DR(ch) *(volatile char *)(P1DR + h8300_sci_pins[ch].port)
+#else
+# error CPU subtype not defined
+#endif
+
+/* SCSCR */
+#define SCI_CTRL_FLAGS_TIE  0x80 /* all */
+#define SCI_CTRL_FLAGS_RIE  0x40 /* all */
+#define SCI_CTRL_FLAGS_TE   0x20 /* all */
+#define SCI_CTRL_FLAGS_RE   0x10 /* all */
+/*      SCI_CTRL_FLAGS_REIE 0x08  * 7750 SCIF */
+/*      SCI_CTRL_FLAGS_MPIE 0x08  * 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */
+/*      SCI_CTRL_FLAGS_TEIE 0x04  * 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */
+/*      SCI_CTRL_FLAGS_CKE1 0x02  * all */
+/*      SCI_CTRL_FLAGS_CKE0 0x01  * 7707 SCI/SCIF, 7708 SCI, 7709 SCI/SCIF, 7750 SCI */
+
+/* SCxSR SCI */
+#define SCI_TDRE  0x80 /* 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */
+#define SCI_RDRF  0x40 /* 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */
+#define SCI_ORER  0x20 /* 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */
+#define SCI_FER   0x10 /* 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */
+#define SCI_PER   0x08 /* 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */
+#define SCI_TEND  0x04 /* 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */
+/*      SCI_MPB   0x02  * 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */
+/*      SCI_MPBT  0x01  * 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */
+
+#define SCI_ERRORS ( SCI_PER | SCI_FER | SCI_ORER)
+
+/* SCxSR SCIF */
+#define SCIF_ER    0x0080 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */
+#define SCIF_TEND  0x0040 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */
+#define SCIF_TDFE  0x0020 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */
+#define SCIF_BRK   0x0010 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */
+#define SCIF_FER   0x0008 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */
+#define SCIF_PER   0x0004 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */
+#define SCIF_RDF   0x0002 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */
+#define SCIF_DR    0x0001 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */
+
+#define SCIF_ERRORS ( SCIF_PER | SCIF_FER | SCIF_ER | SCIF_BRK)
+
+#if defined(SCI_ONLY)
+# define SCxSR_TEND(port)              SCI_TEND
+# define SCxSR_ERRORS(port)            SCI_ERRORS
+# define SCxSR_RDxF(port)               SCI_RDRF
+# define SCxSR_TDxE(port)               SCI_TDRE
+# define SCxSR_ORER(port)              SCI_ORER
+# define SCxSR_FER(port)               SCI_FER
+# define SCxSR_PER(port)               SCI_PER
+# define SCxSR_BRK(port)               0x00
+# define SCxSR_RDxF_CLEAR(port)                0xbc
+# define SCxSR_ERROR_CLEAR(port)       0xc4
+# define SCxSR_TDxE_CLEAR(port)                0x78
+# define SCxSR_BREAK_CLEAR(port)       0xc4
+#elif defined(SCIF_ONLY) 
+# define SCxSR_TEND(port)              SCIF_TEND
+# define SCxSR_ERRORS(port)            SCIF_ERRORS
+# define SCxSR_RDxF(port)               SCIF_RDF
+# define SCxSR_TDxE(port)               SCIF_TDFE
+# define SCxSR_ORER(port)              0x0000
+# define SCxSR_FER(port)               SCIF_FER
+# define SCxSR_PER(port)               SCIF_PER
+# define SCxSR_BRK(port)               SCIF_BRK
+# define SCxSR_RDxF_CLEAR(port)                0x00fc
+# define SCxSR_ERROR_CLEAR(port)       0x0073
+# define SCxSR_TDxE_CLEAR(port)                0x00df
+# define SCxSR_BREAK_CLEAR(port)       0x00e3
+#else
+# define SCxSR_TEND(port)       (((port)->type == PORT_SCI) ? SCI_TEND   : SCIF_TEND)
+# define SCxSR_ERRORS(port)     (((port)->type == PORT_SCI) ? SCI_ERRORS : SCIF_ERRORS)
+# define SCxSR_RDxF(port)        (((port)->type == PORT_SCI) ? SCI_RDRF   : SCIF_RDF)
+# define SCxSR_TDxE(port)        (((port)->type == PORT_SCI) ? SCI_TDRE   : SCIF_TDFE)
+# define SCxSR_ORER(port)        (((port)->type == PORT_SCI) ? SCI_ORER   : 0x0000)
+# define SCxSR_FER(port)         (((port)->type == PORT_SCI) ? SCI_FER    : SCIF_FER)
+# define SCxSR_PER(port)         (((port)->type == PORT_SCI) ? SCI_PER    : SCIF_PER)
+# define SCxSR_BRK(port)         (((port)->type == PORT_SCI) ? 0x00       : SCIF_BRK)
+# define SCxSR_RDxF_CLEAR(port)         (((port)->type == PORT_SCI) ? 0xbc : 0x00fc)
+# define SCxSR_ERROR_CLEAR(port) (((port)->type == PORT_SCI) ? 0xc4 : 0x0073)
+# define SCxSR_TDxE_CLEAR(port)  (((port)->type == PORT_SCI) ? 0x78 : 0x00df)
+# define SCxSR_BREAK_CLEAR(port) (((port)->type == PORT_SCI) ? 0xc4 : 0x00e3)
+#endif
+
+/* SCFCR */
+#define SCFCR_RFRST 0x0002
+#define SCFCR_TFRST 0x0004
+#define SCFCR_MCE   0x0008
+
+#define SCI_MAJOR              204
+#define SCI_MINOR_START                8
+
+/* Generic serial flags */
+#define SCI_RX_THROTTLE                0x0000001
+
+#define SCI_MAGIC 0xbabeface
+
+/*
+ * Events are used to schedule things to happen at timer-interrupt
+ * time, instead of at rs interrupt time.
+ */
+#define SCI_EVENT_WRITE_WAKEUP 0
+
+struct sci_port {
+       struct gs_port gs;
+       int type;
+       unsigned int base;
+       unsigned char irqs[4]; /* ERI, RXI, TXI, BRI */
+       void (*init_pins)(struct sci_port* port, unsigned int cflag);
+       unsigned int old_cflag;
+       struct async_icount icount;
+       struct work_struct tqueue;
+       unsigned long event;
+       int break_flag;
+};
+
+#define SCI_IN(size, offset)                                   \
+  unsigned int addr = port->base + (offset);                   \
+  if ((size) == 8) {                                           \
+    return ctrl_inb(addr);                                     \
+  } else {                                                     \
+    return ctrl_inw(addr);                                     \
+  }
+#define SCI_OUT(size, offset, value)                           \
+  unsigned int addr = port->base + (offset);                   \
+  if ((size) == 8) {                                           \
+    ctrl_outb(value, addr);                                    \
+  } else {                                                     \
+    ctrl_outw(value, addr);                                    \
+  }
+
+#define CPU_SCIx_FNS(name, sci_offset, sci_size, scif_offset, scif_size)\
+  static inline unsigned int sci_##name##_in(struct sci_port* port)    \
+  {                                                                    \
+    if (port->type == PORT_SCI) {                                      \
+      SCI_IN(sci_size, sci_offset)                                     \
+    } else {                                                           \
+      SCI_IN(scif_size, scif_offset);                                  \
+    }                                                                  \
+  }                                                                    \
+  static inline void sci_##name##_out(struct sci_port* port, unsigned int value) \
+  {                                                                    \
+    if (port->type == PORT_SCI) {                                      \
+      SCI_OUT(sci_size, sci_offset, value)                             \
+    } else {                                                           \
+      SCI_OUT(scif_size, scif_offset, value);                          \
+    }                                                                  \
+  }
+
+#define CPU_SCIF_FNS(name, scif_offset, scif_size)                             \
+  static inline unsigned int sci_##name##_in(struct sci_port* port)    \
+  {                                                                    \
+    SCI_IN(scif_size, scif_offset);                                    \
+  }                                                                    \
+  static inline void sci_##name##_out(struct sci_port* port, unsigned int value) \
+  {                                                                    \
+    SCI_OUT(scif_size, scif_offset, value);                            \
+  }
+
+#define CPU_SCI_FNS(name, sci_offset, sci_size)                                \
+  static inline unsigned int sci_##name##_in(struct sci_port* port)    \
+  {                                                                    \
+    SCI_IN(sci_size, sci_offset);                                      \
+  }                                                                    \
+  static inline void sci_##name##_out(struct sci_port* port, unsigned int value) \
+  {                                                                    \
+    SCI_OUT(sci_size, sci_offset, value);                              \
+  }
+
+#ifdef CONFIG_CPU_SH3
+#define SCIx_FNS(name, sh3_sci_offset, sh3_sci_size, sh4_sci_offset, sh4_sci_size, \
+                sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size, \
+                 h8_sci_offset, h8_sci_size) \
+  CPU_SCIx_FNS(name, sh3_sci_offset, sh3_sci_size, sh3_scif_offset, sh3_scif_size)
+#define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size) \
+  CPU_SCIF_FNS(name, sh3_scif_offset, sh3_scif_size)
+#elif defined(__H8300H__) || defined(__H8300S__)
+#define SCIx_FNS(name, sh3_sci_offset, sh3_sci_size, sh4_sci_offset, sh4_sci_size, \
+                sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size, \
+                 h8_sci_offset, h8_sci_size) \
+  CPU_SCI_FNS(name, h8_sci_offset, h8_sci_size)
+#define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size)
+#else
+#define SCIx_FNS(name, sh3_sci_offset, sh3_sci_size, sh4_sci_offset, sh4_sci_size, \
+                sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size, \
+                h8_sci_offset, h8_sci_size) \
+  CPU_SCIx_FNS(name, sh4_sci_offset, sh4_sci_size, sh4_scif_offset, sh4_scif_size)
+#define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size) \
+  CPU_SCIF_FNS(name, sh4_scif_offset, sh4_scif_size)
+#endif
+
+/*      reg      SCI/SH3   SCI/SH4  SCIF/SH3   SCIF/SH4  SCI/H8*/
+/*      name     off  sz   off  sz   off  sz   off  sz   off  sz*/
+SCIx_FNS(SCSMR,  0x00,  8, 0x00,  8, 0x00,  8, 0x00, 16, 0x00,  8)
+SCIx_FNS(SCBRR,  0x02,  8, 0x04,  8, 0x02,  8, 0x04,  8, 0x01,  8)
+SCIx_FNS(SCSCR,  0x04,  8, 0x08,  8, 0x04,  8, 0x08, 16, 0x02,  8)
+SCIx_FNS(SCxTDR, 0x06,  8, 0x0c,  8, 0x06,  8, 0x0C,  8, 0x03,  8)
+SCIx_FNS(SCxSR,  0x08,  8, 0x10,  8, 0x08, 16, 0x10, 16, 0x04,  8)
+SCIx_FNS(SCxRDR, 0x0a,  8, 0x14,  8, 0x0A,  8, 0x14,  8, 0x05,  8)
+SCIF_FNS(SCFCR,                      0x0c,  8, 0x18, 16)
+SCIF_FNS(SCFDR,                      0x0e, 16, 0x1C, 16)
+SCIF_FNS(SCLSR,                         0,  0, 0x24, 16)
+
+#define sci_in(port, reg) sci_##reg##_in(port)
+#define sci_out(port, reg, value) sci_##reg##_out(port, value)
+
+/* H8/300 series SCI pins assignment */
+#if defined(__H8300H__) || defined(__H8300S__)
+static const struct __attribute__((packed))
+{
+       int port;             /* GPIO port no */
+       unsigned short rx,tx; /* GPIO bit no */
+} h8300_sci_pins[] =
+{
+#if defined(CONFIG_H83007) || defined(CONFIG_H83068)
+       {    /* SCI0 */
+               .port = H8300_GPIO_P9,
+               .rx   = H8300_GPIO_B2,
+               .tx   = H8300_GPIO_B0,
+       },
+       {    /* SCI1 */
+               .port = H8300_GPIO_P9,
+               .rx   = H8300_GPIO_B3,
+               .tx   = H8300_GPIO_B1,
+       },
+       {    /* SCI2 */
+               .port = H8300_GPIO_PB,
+               .rx   = H8300_GPIO_B7,
+               .tx   = H8300_GPIO_B6,
+       }
+#elif defined(CONFIG_H8S2678)
+       {    /* SCI0 */
+               .port = H8300_GPIO_P3,
+               .rx   = H8300_GPIO_B2,
+               .tx   = H8300_GPIO_B0,
+       },
+       {    /* SCI1 */
+               .port = H8300_GPIO_P3,
+               .rx   = H8300_GPIO_B3,
+               .tx   = H8300_GPIO_B1,
+       },
+       {    /* SCI2 */
+               .port = H8300_GPIO_P5,
+               .rx   = H8300_GPIO_B1,
+               .tx   = H8300_GPIO_B0,
+       }
+#endif
+};
+#endif
+
+#if defined(CONFIG_CPU_SUBTYPE_SH7708)
+static inline int sci_rxd_in(struct sci_port *port)
+{
+       if (port->base == 0xfffffe80)
+               return ctrl_inb(SCSPTR)&0x01 ? 1 : 0; /* SCI */
+       return 1;
+}
+#elif defined(CONFIG_CPU_SUBTYPE_SH7707) || defined(CONFIG_CPU_SUBTYPE_SH7709)
+static inline int sci_rxd_in(struct sci_port *port)
+{
+       if (port->base == 0xfffffe80)
+               return ctrl_inb(SCPDR)&0x01 ? 1 : 0; /* SCI */
+       if (port->base == 0xa4000150)
+               return ctrl_inb(SCPDR)&0x10 ? 1 : 0; /* SCIF */
+       if (port->base == 0xa4000140)
+               return ctrl_inb(SCPDR)&0x04 ? 1 : 0; /* IRDA */
+       return 1;
+}
+#elif defined(CONFIG_CPU_SUBTYPE_SH7750) || defined(CONFIG_CPU_SUBTYPE_SH7751)
+static inline int sci_rxd_in(struct sci_port *port)
+{
+#ifndef SCIF_ONLY
+       if (port->base == 0xffe00000)
+               return ctrl_inb(SCSPTR1)&0x01 ? 1 : 0; /* SCI */
+#endif
+#ifndef SCI_ONLY
+       if (port->base == 0xffe80000)
+               return ctrl_inw(SCSPTR2)&0x0001 ? 1 : 0; /* SCIF */
+#endif
+       return 1;
+}
+#elif defined(CONFIG_CPU_SUBTYPE_SH7760)
+static inline int sci_rxd_in(struct sci_port *port)
+{
+       if (port->base == 0xfe600000)
+               return ctrl_inw(SCSPTR0) & 0x0001 ? 1 : 0; /* SCIF */
+       if (port->base == 0xfe610000)
+               return ctrl_inw(SCSPTR1) & 0x0001 ? 1 : 0; /* SCIF */
+       if (port->base == 0xfe620000)
+               return ctrl_inw(SCSPTR2) & 0x0001 ? 1 : 0; /* SCIF */
+}
+#elif defined(CONFIG_CPU_SUBTYPE_ST40STB1)
+static inline int sci_rxd_in(struct sci_port *port)
+{
+       if (port->base == 0xffe00000)
+               return ctrl_inw(SCSPTR1)&0x0001 ? 1 : 0; /* SCIF */
+       else
+               return ctrl_inw(SCSPTR2)&0x0001 ? 1 : 0; /* SCIF */
+
+}
+#elif defined(__H8300H__) || defined(__H8300S__)
+static inline int sci_rxd_in(struct sci_port *port)
+{
+       int ch = (port->base - SMR0) >> 3;
+       return (H8300_SCI_DR(ch) & h8300_sci_pins[ch].rx) ? 1 : 0;
+}
+#endif
+
+/*
+ * Values for the BitRate Register (SCBRR)
+ *
+ * The values are actually divisors for a frequency which can
+ * be internal to the SH3 (14.7456MHz) or derived from an external
+ * clock source.  This driver assumes the internal clock is used;
+ * to support using an external clock source, config options or
+ * possibly command-line options would need to be added.
+ *
+ * Also, to support speeds below 2400 (why?) the lower 2 bits of
+ * the SCSMR register would also need to be set to non-zero values.
+ *
+ * -- Greg Banks 27Feb2000
+ *
+ * Answer: The SCBRR register is only eight bits, and the value in
+ * it gets larger with lower baud rates. At around 2400 (depending on
+ * the peripherial module clock) you run out of bits. However the
+ * lower two bits of SCSMR allow the module clock to be divided down,
+ * scaling the value which is needed in SCBRR.
+ *
+ * -- Stuart Menefy - 23 May 2000
+ *
+ * I meant, why would anyone bother with bitrates below 2400.
+ *
+ * -- Greg Banks - 7Jul2000
+ *
+ * You "speedist"!  How will I use my 110bps ASR-33 teletype with paper
+ * tape reader as a console!
+ *
+ * -- Mitch Davis - 15 Jul 2000
+ */
+
+#define PCLK           (current_cpu_data.module_clock)
+
+#if !defined(__H8300H__) && !defined(__H8300S__)
+#define SCBRR_VALUE(bps) ((PCLK+16*bps)/(32*bps)-1)
+#else
+#define SCBRR_VALUE(bps) (((CONFIG_CPU_CLOCK*1000/32)/bps)-1)
+#endif
+#define BPS_2400       SCBRR_VALUE(2400)
+#define BPS_4800       SCBRR_VALUE(4800)
+#define BPS_9600       SCBRR_VALUE(9600)
+#define BPS_19200      SCBRR_VALUE(19200)
+#define BPS_38400      SCBRR_VALUE(38400)
+#define BPS_57600      SCBRR_VALUE(57600)
+#define BPS_115200     SCBRR_VALUE(115200)
+#define BPS_230400     SCBRR_VALUE(230400)
+
diff --git a/drivers/i2c/busses/i2c-ixp42x.c b/drivers/i2c/busses/i2c-ixp42x.c
new file mode 100644 (file)
index 0000000..59fcb70
--- /dev/null
@@ -0,0 +1,176 @@
+/*
+ * drivers/i2c/i2c-adap-ixp42x.c
+ *
+ * Intel's IXP42x XScale NPU chipsets (IXP420, 421, 422, 425) do not have
+ * an on board I2C controller but provide 16 GPIO pins that are often
+ * used to create an I2C bus. This driver provides an i2c_adapter 
+ * interface that plugs in under algo_bit and drives the GPIO pins
+ * as instructed by the alogorithm driver.
+ *
+ * Author: Deepak Saxena <dsaxena@plexity.net>
+ *
+ * Copyright (c) 2003-2004 MontaVista Software Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public 
+ * License version 2. This program is licensed "as is" without any 
+ * warranty of any kind, whether express or implied.
+ *
+ * NOTE: Since different platforms will use different GPIO pins for
+ *       I2C, this driver uses an IXP42x-specific platform_data
+ *       pointer to pass the GPIO numbers to the driver. This 
+ *       allows us to support all the different IXP42x platforms
+ *       w/o having to put #ifdefs in this driver.
+ *
+ *       See arch/arm/mach-ixp42x/ixdp425.c for an example of building a 
+ *       device list and filling in the ixp42x_i2c_pins data structure 
+ *       that is passed as the platform_data to this driver.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+
+#include <asm/hardware.h>      /* Pick up IXP42x-specific bits */
+
+static inline int ixp42x_scl_pin(void *data)
+{
+       return ((struct ixp42x_i2c_pins*)data)->scl_pin;
+}
+
+static inline int ixp42x_sda_pin(void *data)
+{
+       return ((struct ixp42x_i2c_pins*)data)->sda_pin;
+}
+
+static void ixp42x_bit_setscl(void *data, int val)
+{
+       gpio_line_set(ixp42x_scl_pin(data), 0);
+       gpio_line_config(ixp42x_scl_pin(data),
+               val ? IXP425_GPIO_IN : IXP425_GPIO_OUT );
+}
+
+static void ixp42x_bit_setsda(void *data, int val)
+{
+       gpio_line_set(ixp42x_sda_pin(data), 0);
+       gpio_line_config(ixp42x_sda_pin(data),
+               val ? IXP425_GPIO_IN : IXP425_GPIO_OUT );
+}
+
+static int ixp42x_bit_getscl(void *data)
+{
+       int scl;
+
+       gpio_line_config(ixp42x_scl_pin(data), IXP425_GPIO_IN );
+       gpio_line_get(ixp42x_scl_pin(data), &scl);
+
+       return scl;
+}      
+
+static int ixp42x_bit_getsda(void *data)
+{
+       int sda;
+
+       gpio_line_config(ixp42x_sda_pin(data), IXP425_GPIO_IN );
+       gpio_line_get(ixp42x_sda_pin(data), &sda);
+
+       return sda;
+}      
+
+struct ixp42x_i2c_data {
+       struct ixp42x_i2c_pins *gpio_pins;
+       struct i2c_adapter adapter;
+       struct i2c_algo_bit_data algo_data;
+};
+
+static int ixp42x_i2c_remove(struct device *dev)
+{
+       struct platform_device *plat_dev = to_platform_device(dev);
+       struct ixp42x_i2c_data *drv_data = dev_get_drvdata(&plat_dev->dev);
+
+       dev_set_drvdata(&plat_dev->dev, NULL);
+
+       i2c_bit_del_bus(&drv_data->adapter);
+
+       kfree(drv_data);
+
+       return 0;
+}
+
+static int ixp42x_i2c_probe(struct device *dev)
+{
+       int err;
+       struct platform_device *plat_dev = to_platform_device(dev);
+       struct ixp42x_i2c_pins *gpio = plat_dev->dev.platform_data;
+       struct ixp42x_i2c_data *drv_data = 
+               kmalloc(sizeof(struct ixp42x_i2c_data), GFP_KERNEL);
+
+       if(!drv_data)
+               return -ENOMEM;
+
+       memzero(drv_data, sizeof(struct ixp42x_i2c_data));
+       drv_data->gpio_pins = gpio;
+
+       /*
+        * We could make a lot of these structures static, but
+        * certain platforms may have multiple GPIO-based I2C
+        * buses for various device domains, so we need per-device
+        * algo_data->data. 
+        */
+       drv_data->algo_data.data = gpio;
+       drv_data->algo_data.setsda = ixp42x_bit_setsda;
+       drv_data->algo_data.setscl = ixp42x_bit_setscl;
+       drv_data->algo_data.getsda = ixp42x_bit_getsda;
+       drv_data->algo_data.getscl = ixp42x_bit_getscl;
+       drv_data->algo_data.udelay = 10;
+       drv_data->algo_data.mdelay = 10;
+       drv_data->algo_data.timeout = 100;
+
+       drv_data->adapter.id = I2C_HW_B_IXP425,
+       drv_data->adapter.algo_data = &drv_data->algo_data,
+
+       drv_data->adapter.dev.parent = &plat_dev->dev;
+
+       gpio_line_config(gpio->scl_pin, IXP425_GPIO_IN);
+       gpio_line_config(gpio->sda_pin, IXP425_GPIO_IN);
+       gpio_line_set(gpio->scl_pin, 0);
+       gpio_line_set(gpio->sda_pin, 0);
+
+       if ((err = i2c_bit_add_bus(&drv_data->adapter) != 0)) {
+               printk(KERN_ERR "ERROR: Could not install %s\n", dev->bus_id);
+
+               kfree(drv_data);
+               return err;
+       }
+
+       dev_set_drvdata(&plat_dev->dev, drv_data);
+
+       return 0;
+}
+
+static struct device_driver ixp42x_i2c_driver = {
+       .name           = "IXP42X-I2C",
+       .bus            = &platform_bus_type,
+       .probe          = ixp42x_i2c_probe,
+       .remove         = ixp42x_i2c_remove,
+};
+
+static int __init ixp42x_i2c_init(void)
+{
+       return driver_register(&ixp42x_i2c_driver);
+}
+
+static void __exit ixp42x_i2c_exit(void)
+{
+       driver_unregister(&ixp42x_i2c_driver);
+}
+
+module_init(ixp42x_i2c_init);
+module_exit(ixp42x_i2c_exit);
+
+MODULE_DESCRIPTION("GPIO-based I2C driver for IXP42x systems");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Deepak Saxena <dsaxena@plexity.net>");
+
diff --git a/drivers/ide/pci/cmd640.h b/drivers/ide/pci/cmd640.h
new file mode 100644 (file)
index 0000000..28b6e04
--- /dev/null
@@ -0,0 +1,32 @@
+#ifndef CMD640_H
+#define CMD640_H
+
+#include <linux/config.h>
+#include <linux/pci.h>
+#include <linux/ide.h>
+
+#define IDE_IGNORE      ((void *)-1)
+
+static ide_pci_device_t cmd640_chipsets[] __initdata = {
+       {
+               .vendor         = PCI_VENDOR_ID_CMD,
+               .device         = PCI_DEVICE_ID_CMD_640,
+               .name           = "CMD640",
+               .init_setup     = NULL,
+               .init_chipset   = NULL,
+               .init_iops      = NULL,
+               .init_hwif      = IDE_IGNORE,
+               .init_dma       = NULL,
+               .channels       = 2,
+               .autodma        = NODMA,
+               .enablebits     = {{0x00,0x00,0x00}, {0x00,0x00,0x00}},
+               .bootable       = ON_BOARD,
+               .extra          = 0
+       },{
+               .vendor         = 0,
+               .device         = 0,
+               .bootable       = EOL,
+       }
+}
+
+#endif /* CMD640_H */
diff --git a/drivers/ide/ppc/swarm.c b/drivers/ide/ppc/swarm.c
new file mode 100644 (file)
index 0000000..d54a555
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2001 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+/*  Derived loosely from ide-pmac.c, so:
+ *  
+ *  Copyright (C) 1998 Paul Mackerras.
+ *  Copyright (C) 1995-1998 Mark Lord
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/ide.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/sibyte/sb1250_int.h>
+
+#define __IDE_SWARM_C
+
+#include <asm/sibyte/swarm_ide.h>
+
+void __init swarm_ide_probe(void)
+{
+       int i;
+       ide_hwif_t *hwif;
+       /* 
+        * Find the first untaken slot in hwifs 
+        */
+       for (i = 0; i < MAX_HWIFS; i++) {
+               if (!ide_hwifs[i].io_ports[IDE_DATA_OFFSET]) {
+                       break;
+               }
+       }
+       if (i == MAX_HWIFS) {
+               printk("No space for SWARM onboard IDE driver in ide_hwifs[].  Not enabled.\n");
+               return;
+       }
+
+       /* Set up our stuff */
+       hwif = &ide_hwifs[i];
+       hwif->hw.io_ports[IDE_DATA_OFFSET]    = SWARM_IDE_REG(0x1f0);
+       hwif->hw.io_ports[IDE_ERROR_OFFSET]   = SWARM_IDE_REG(0x1f1);
+       hwif->hw.io_ports[IDE_NSECTOR_OFFSET] = SWARM_IDE_REG(0x1f2);
+       hwif->hw.io_ports[IDE_SECTOR_OFFSET]  = SWARM_IDE_REG(0x1f3);
+       hwif->hw.io_ports[IDE_LCYL_OFFSET]    = SWARM_IDE_REG(0x1f4);
+       hwif->hw.io_ports[IDE_HCYL_OFFSET]    = SWARM_IDE_REG(0x1f5);
+       hwif->hw.io_ports[IDE_SELECT_OFFSET]  = SWARM_IDE_REG(0x1f6);
+       hwif->hw.io_ports[IDE_STATUS_OFFSET]  = SWARM_IDE_REG(0x1f7);
+       hwif->hw.io_ports[IDE_CONTROL_OFFSET] = SWARM_IDE_REG(0x3f6);
+       hwif->hw.io_ports[IDE_IRQ_OFFSET]     = SWARM_IDE_REG(0x3f7);
+//     hwif->hw->ack_intr                    = swarm_ide_ack_intr;
+       hwif->hw.irq                          = SWARM_IDE_INT;
+#if 0
+       hwif->iops                            = swarm_iops;
+#else
+       hwif->OUTB      = hwif->OUTBP         = swarm_outb;
+       hwif->OUTW      = hwif->OUTWP         = swarm_outw;
+       hwif->OUTL      = hwif->OUTLP         = swarm_outl;
+       hwif->OUTSW     = hwif->OUTSWP        = swarm_outsw;
+       hwif->OUTSL     = hwif->OUTSLP        = swarm_outsl;
+       hwif->INB       = hwif->INBP          = swarm_inb;
+       hwif->INW       = hwif->INWP          = swarm_inw;
+       hwif->INL       = hwif->INLP          = swarm_inl;
+       hwif->INSW      = hwif->INSWP         = swarm_insw;
+       hwif->INSL      = hwif->INSLP         = swarm_insl;
+#endif
+#if 0
+       hwif->pioops                          = swarm_pio_ops;
+#else
+       hwif->ata_input_data                  = swarm_ata_input_data;
+       hwif->ata_output_data                 = swarm_ata_output_data;
+       hwif->atapi_input_bytes               = swarm_atapi_input_bytes;
+       hwif->atapi_output_bytes              = swarm_atapi_output_bytes;
+#endif
+       memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
+       hwif->irq                             = hwif->hw.irq;
+       printk("SWARM onboard IDE configured as device %i\n", i);
+
+#ifndef HWIF_PROBE_CLASSIC_METHOD
+       probe_hwif_init(hwif->index);
+#endif /* HWIF_PROBE_CLASSIC_METHOD */
+
+}
+
diff --git a/drivers/net/auto_irq.c b/drivers/net/auto_irq.c
new file mode 100644 (file)
index 0000000..96ddc77
--- /dev/null
@@ -0,0 +1,68 @@
+/* auto_irq.c: Auto-configure IRQ lines for linux. */
+/*
+    Written 1994 by Donald Becker.
+
+    The author may be reached as becker@scyld.com
+
+    This code is a general-purpose IRQ line detector for devices with
+    jumpered IRQ lines.  If you can make the device raise an IRQ (and
+    that IRQ line isn't already being used), these routines will tell
+    you what IRQ line it's using -- perfect for those oh-so-cool boot-time
+    device probes!
+
+    To use this, first call autoirq_setup(timeout). TIMEOUT is how many
+    'jiffies' (1/100 sec.) to detect other devices that have active IRQ lines,
+    and can usually be zero at boot.  'autoirq_setup()' returns the bit
+    vector of nominally-available IRQ lines (lines may be physically in-use,
+    but not yet registered to a device).
+    Next, set up your device to trigger an interrupt.
+    Finally call autoirq_report(TIMEOUT) to find out which IRQ line was
+    most recently active.  The TIMEOUT should usually be zero, but may
+    be set to the number of jiffies to wait for a slow device to raise an IRQ.
+
+    The idea of using the setup timeout to filter out bogus IRQs came from
+    the serial driver.
+*/
+
+
+#ifdef version
+static const char *version=
+"auto_irq.c:v1.11 Donald Becker (becker@scyld.com)";
+#endif
+
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/delay.h>
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <linux/netdevice.h>
+
+static unsigned long irqs;
+
+void autoirq_setup(int waittime)
+{
+       irqs = probe_irq_on();
+}
+
+#define BUSY_LOOP_UNTIL(j) while ((long)(jiffies-(j)) < 0) ;
+int autoirq_report(int waittime)
+{
+       unsigned long delay = jiffies + waittime;
+       BUSY_LOOP_UNTIL(delay)
+       return probe_irq_off(irqs);
+}
+
+EXPORT_SYMBOL(autoirq_setup);
+EXPORT_SYMBOL(autoirq_report);
+
+\f
+/*
+ * Local variables:
+ *  compile-command: "gcc -DKERNEL -Wall -O6 -fomit-frame-pointer -I/usr/src/linux/net/tcp -c auto_irq.c"
+ *  version-control: t
+ *  kept-new-versions: 5
+ *  c-indent-level: 4
+ *  tab-width: 4
+ * End:
+ */
diff --git a/drivers/net/rcif.h b/drivers/net/rcif.h
new file mode 100644 (file)
index 0000000..85ff861
--- /dev/null
@@ -0,0 +1,292 @@
+/*
+** *************************************************************************
+**
+**
+**     R C I F . H
+**
+**
+**  RedCreek InterFace include file.
+**
+**  ---------------------------------------------------------------------
+**  ---     Copyright (c) 1998-1999, RedCreek Communications Inc.     ---
+**  ---                   All rights reserved.                        ---
+**  ---------------------------------------------------------------------
+**
+** File Description:
+**
+** Header file private ioctl commands.
+**
+**
+**  This program is free software; you can redistribute it and/or modify
+**  it under the terms of the GNU General Public License as published by
+**  the Free Software Foundation; either version 2 of the License, or
+**  (at your option) any later version.
+
+**  This program is distributed in the hope that it will be useful,
+**  but WITHOUT ANY WARRANTY; without even the implied warranty of
+**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+**  GNU General Public License for more details.
+
+**  You should have received a copy of the GNU General Public License
+**  along with this program; if not, write to the Free Software
+**  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+** *************************************************************************
+*/
+
+#ifndef RCIF_H
+#define RCIF_H
+
+/* The following protocol revision # should be incremented every time
+   a new protocol or new structures are used in this file. */
+int USER_PROTOCOL_REV = 2;     /* used to track different protocol revisions */
+
+/* define a single TCB & buffer */
+typedef struct {               /* a single buffer */
+       U32 context;            /* context */
+       U32 scount;             /* segment count */
+       U32 size;               /* segment size */
+       U32 addr;               /* segment physical address */
+} __attribute__ ((packed))
+    singleB, *psingleB;
+typedef struct {               /* a single TCB */
+       /*
+          **  +-----------------------+
+          **  |         1             |  one buffer in the TCB
+          **  +-----------------------+
+          **  |  <user's Context>     |  user's buffer reference
+          **  +-----------------------+
+          **  |         1             |  one segment buffer
+          **  +-----------------------+                            _
+          **  |    <buffer size>      |  size                       \ 
+          **  +-----------------------+                              \ segment descriptor
+          **  |  <physical address>   |  physical address of buffer  /
+          **  +-----------------------+                            _/
+        */
+       U32 bcount;             /* buffer count */
+       singleB b;              /* buffer */
+
+} __attribute__ ((packed))
+    singleTCB, *psingleTCB;
+
+/*
+   When adding new entries, please add all 5 related changes, since 
+   it helps keep everything consistent:
+      1) User structure entry
+      2) User data entry
+      3) Structure short-cut entry
+      4) Data short-cut entry
+      5) Command identifier entry
+
+   For Example ("GETSPEED"):
+      1) struct  RCgetspeed_tag { U32 LinkSpeedCode; } RCgetspeed;
+      2) struct  RCgetspeed_tag *getspeed;
+      3) #define RCUS_GETSPEED  data.RCgetspeed;
+      4) #define RCUD_GETSPEED  _RC_user_data.getspeed
+      5) #define RCUC_GETSPEED  0x02
+  
+   Notes for the "GETSPEED" entry, above:
+      1) RCgetspeed      - RC{name}
+         RCgetspeed_tag  - RC{name}_tag
+         LinkSpeedCode   - create any structure format desired (not too large,
+                           since memory will be unioned with all other entries)
+      2) RCgetspeed_tag  - RC{name}_tag chosen in #1
+         getspeed        - arbitrary name (ptr to structure in #1)
+      3) RCUS_GETSPEED   - RCUS_{NAME}   ("NAME" & "name" do not have to the same)
+         data.RCgetspeed - data.RC{name}  ("RC{name}" from #1)
+      4) RCUD_GETSPEED   - _RC_user_data.getspeed  ("getspeed" from #2)
+      5) RCUC_GETSPEED   - unique hex identifier entry.
+*/
+
+typedef struct RC_user_tag RCuser_struct;
+
+/* 1) User structure entry */
+struct RC_user_tag {
+       int cmd;
+       union {
+               /* GETINFO structure */
+               struct RCgetinfo_tag {
+                       unsigned long int mem_start;
+                       unsigned long int mem_end;
+                       unsigned long int base_addr;
+                       unsigned char irq;
+                       unsigned char dma;
+                       unsigned char port;
+               } RCgetinfo;    /* <---- RCgetinfo */
+
+               /* GETSPEED structure */
+               struct RCgetspeed_tag {
+                       U32 LinkSpeedCode;
+               } RCgetspeed;   /* <---- RCgetspeed */
+
+               /* SETSPEED structure */
+               struct RCsetspeed_tag {
+                       U16 LinkSpeedCode;
+               } RCsetspeed;   /* <---- RCsetspeed */
+
+               /* GETPROM structure */
+               struct RCgetprom_tag {
+                       U32 PromMode;
+               } RCgetprom;    /* <---- RCgetprom */
+
+               /* SETPROM structure */
+               struct RCsetprom_tag {
+                       U16 PromMode;
+               } RCsetprom;    /* <---- RCsetprom */
+
+               /* GETBROADCAST structure */
+               struct RCgetbroadcast_tag {
+                       U32 BroadcastMode;
+               } RCgetbroadcast;       /* <---- RCgetbroadcast */
+
+               /* SETBROADCAST structure */
+               struct RCsetbroadcast_tag {
+                       U16 BroadcastMode;
+               } RCsetbroadcast;       /* <---- RCsetbroadcast */
+
+               /* GETFIRMWAREVER structure */
+#define FirmStringLen 80
+               struct RCgetfwver_tag {
+                       U8 FirmString[FirmStringLen];
+               } RCgetfwver;   /* <---- RCgetfwver */
+
+               /* GETIPANDMASK structure */
+               struct RCgetipnmask_tag {
+                       U32 IpAddr;
+                       U32 NetMask;
+               } RCgetipandmask;       /* <---- RCgetipandmask */
+
+               /* SETIPANDMASK structure */
+               struct RCsetipnmask_tag {
+                       U32 IpAddr;
+                       U32 NetMask;
+               } RCsetipandmask;       /* <---- RCsetipandmask */
+
+               /* GETMAC structure */
+#define MAC_SIZE 10
+               struct RCgetmac_tag {
+                       U8 mac[MAC_SIZE];
+               } RCgetmac;     /* <---- RCgetmac */
+
+               /* SETMAC structure */
+               struct RCsetmac_tag {
+                       U8 mac[MAC_SIZE];
+               } RCsetmac;     /* <---- RCsetmac */
+
+               /* GETLINKSTATUS structure */
+               struct RCgetlnkstatus_tag {
+                       U32 ReturnStatus;
+               } RCgetlnkstatus;       /* <---- RCgetlnkstatus */
+
+               /* GETLINKSTATISTICS structure */
+               struct RCgetlinkstats_tag {
+                       RCLINKSTATS StatsReturn;
+               } RCgetlinkstats;       /* <---- RCgetlinkstats */
+
+               /* DEFAULT structure (when no command was recognized) */
+               struct RCdefault_tag {
+                       int rc;
+               } RCdefault;    /* <---- RCdefault */
+
+       } data;
+
+};                             /* struct RC_user_tag { ... } */
+
+/* 2) User data entry */
+/* RCUD = RedCreek User Data */
+union RC_user_data_tag {       /* structure tags used are taken from RC_user_tag structure above */
+       struct RCgetinfo_tag *getinfo;
+       struct RCgetspeed_tag *getspeed;
+       struct RCgetprom_tag *getprom;
+       struct RCgetbroadcast_tag *getbroadcast;
+       struct RCgetfwver_tag *getfwver;
+       struct RCgetipnmask_tag *getipandmask;
+       struct RCgetmac_tag *getmac;
+       struct RCgetlnkstatus_tag *getlinkstatus;
+       struct RCgetlinkstats_tag *getlinkstatistics;
+       struct RCdefault_tag *rcdefault;
+       struct RCsetspeed_tag *setspeed;
+       struct RCsetprom_tag *setprom;
+       struct RCsetbroadcast_tag *setbroadcast;
+       struct RCsetipnmask_tag *setipandmask;
+       struct RCsetmac_tag *setmac;
+} _RC_user_data;               /* declare as a global, so the defines below will work */
+
+/* 3) Structure short-cut entry */
+/* define structure short-cuts *//* structure names are taken from RC_user_tag structure above */
+#define RCUS_GETINFO           data.RCgetinfo;
+#define RCUS_GETSPEED          data.RCgetspeed;
+#define RCUS_GETPROM           data.RCgetprom;
+#define RCUS_GETBROADCAST      data.RCgetbroadcast;
+#define RCUS_GETFWVER          data.RCgetfwver;
+#define RCUS_GETIPANDMASK      data.RCgetipandmask;
+#define RCUS_GETMAC            data.RCgetmac;
+#define RCUS_GETLINKSTATUS     data.RCgetlnkstatus;
+#define RCUS_GETLINKSTATISTICS data.RCgetlinkstats;
+#define RCUS_DEFAULT           data.RCdefault;
+#define RCUS_SETSPEED          data.RCsetspeed;
+#define RCUS_SETPROM           data.RCsetprom;
+#define RCUS_SETBROADCAST      data.RCsetbroadcast;
+#define RCUS_SETIPANDMASK      data.RCsetipandmask;
+#define RCUS_SETMAC            data.RCsetmac;
+
+/* 4) Data short-cut entry */
+/* define data short-cuts *//* pointer names are from RC_user_data_tag union (just below RC_user_tag) */
+#define RCUD_GETINFO           _RC_user_data.getinfo
+#define RCUD_GETSPEED          _RC_user_data.getspeed
+#define RCUD_GETPROM           _RC_user_data.getprom
+#define RCUD_GETBROADCAST      _RC_user_data.getbroadcast
+#define RCUD_GETFWVER          _RC_user_data.getfwver
+#define RCUD_GETIPANDMASK      _RC_user_data.getipandmask
+#define RCUD_GETMAC            _RC_user_data.getmac
+#define RCUD_GETLINKSTATUS     _RC_user_data.getlinkstatus
+#define RCUD_GETLINKSTATISTICS _RC_user_data.getlinkstatistics
+#define RCUD_DEFAULT           _RC_user_data.rcdefault
+#define RCUD_SETSPEED          _RC_user_data.setspeed
+#define RCUD_SETPROM           _RC_user_data.setprom
+#define RCUD_SETBROADCAST      _RC_user_data.setbroadcast
+#define RCUD_SETIPANDMASK      _RC_user_data.setipandmask
+#define RCUD_SETMAC            _RC_user_data.setmac
+
+/* 5) Command identifier entry */
+/* define command identifiers */
+#define RCUC_GETINFO            0x01
+#define RCUC_GETSPEED           0x02
+#define RCUC_GETFWVER           0x03
+#define RCUC_GETIPANDMASK       0x04
+#define RCUC_GETMAC             0x05
+#define RCUC_GETLINKSTATUS      0x06
+#define RCUC_GETLINKSTATISTICS  0x07
+#define RCUC_GETPROM            0x14
+#define RCUC_GETBROADCAST       0x15
+#define RCUC_DEFAULT            0xff
+#define RCUC_SETSPEED           0x08
+#define RCUC_SETIPANDMASK       0x09
+#define RCUC_SETMAC             0x0a
+#define RCUC_SETPROM            0x16
+#define RCUC_SETBROADCAST       0x17
+
+/* define ioctl commands to use, when talking to RC 45/PCI driver */
+#define RCU_PROTOCOL_REV         SIOCDEVPRIVATE
+#define RCU_COMMAND              SIOCDEVPRIVATE+1
+
+/*
+   Intended use for the above defines is shown below (GETINFO, as this example):
+
+      RCuser_struct RCuser;           // declare RCuser structure
+      struct ifreq ifr;               // declare an interface request structure
+
+      RCuser.cmd = RCUC_GETINFO;           // set user command to GETINFO
+      ifr->ifr_data = (caddr_t) &RCuser;   // set point to user structure
+
+      sock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);   // get a socket
+      ioctl(sock, RCU_COMMAND, &ifr);                  // do ioctl on socket
+
+      RCUD_GETINFO = &RCuser.RCUS_GETINFO;   // set data pointer for GETINFO
+
+      // print results
+      printf("memory 0x%lx-0x%lx, base address 0x%x, irq 0x%x\n",
+              RCUD_GETINFO->mem_start, RCUD_GETINFO->mem_end,
+              RCUD_GETINFO->base_addr, RCUD_GETINFO->irq);
+*/
+
+#endif                         /* RCIF_H */
diff --git a/drivers/net/rclanmtl.c b/drivers/net/rclanmtl.c
new file mode 100644 (file)
index 0000000..14bd88a
--- /dev/null
@@ -0,0 +1,2029 @@
+/*
+** *************************************************************************
+**
+**
+**     R C L A N M T L . C             $Revision: 6 $
+**
+**
+**  RedCreek I2O LAN Message Transport Layer program module.
+**
+**  ---------------------------------------------------------------------
+**  ---     Copyright (c) 1997-1999, RedCreek Communications Inc.     ---
+**  ---                   All rights reserved.                        ---
+**  ---------------------------------------------------------------------
+**
+**  File Description:
+**
+**  Host side I2O (Intelligent I/O) LAN message transport layer.
+**
+**  This program is free software; you can redistribute it and/or modify
+**  it under the terms of the GNU General Public License as published by
+**  the Free Software Foundation; either version 2 of the License, or
+**  (at your option) any later version.
+
+**  This program is distributed in the hope that it will be useful,
+**  but WITHOUT ANY WARRANTY; without even the implied warranty of
+**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+**  GNU General Public License for more details.
+
+**  You should have received a copy of the GNU General Public License
+**  along with this program; if not, write to the Free Software
+**  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+**
+** 1998-1999, LAN API was modified and enhanced by Alice Hennessy.
+**
+** Sometime in 1997, LAN API was written from scratch by Wendell Nichols.
+** *************************************************************************
+*/
+
+#define DEBUG 1
+
+#define RC_LINUX_MODULE
+#include "rclanmtl.h"
+
+ /* RedCreek LAN device Target ID */
+#define RC_LAN_TARGET_ID  0x10
+ /* RedCreek's OSM default LAN receive Initiator */
+#define DEFAULT_RECV_INIT_CONTEXT  0xA17
+
+/*
+** I2O message structures
+*/
+
+#define    I2O_TID_SZ                                  12
+#define    I2O_FUNCTION_SZ                             8
+
+/* Transaction Reply Lists (TRL) Control Word structure */
+
+#define    I2O_TRL_FLAGS_SINGLE_FIXED_LENGTH           0x00
+#define    I2O_TRL_FLAGS_SINGLE_VARIABLE_LENGTH        0x40
+#define    I2O_TRL_FLAGS_MULTIPLE_FIXED_LENGTH         0x80
+
+/* LAN Class specific functions */
+
+#define    I2O_LAN_PACKET_SEND                         0x3B
+#define    I2O_LAN_SDU_SEND                            0x3D
+#define    I2O_LAN_RECEIVE_POST                        0x3E
+#define    I2O_LAN_RESET                               0x35
+#define    I2O_LAN_SHUTDOWN                            0x37
+
+/* Private Class specfic function */
+#define    I2O_PRIVATE                                 0xFF
+
+/*  I2O Executive Function Codes.  */
+
+#define    I2O_EXEC_ADAPTER_ASSIGN                     0xB3
+#define    I2O_EXEC_ADAPTER_READ                       0xB2
+#define    I2O_EXEC_ADAPTER_RELEASE                    0xB5
+#define    I2O_EXEC_BIOS_INFO_SET                      0xA5
+#define    I2O_EXEC_BOOT_DEVICE_SET                    0xA7
+#define    I2O_EXEC_CONFIG_VALIDATE                    0xBB
+#define    I2O_EXEC_CONN_SETUP                         0xCA
+#define    I2O_EXEC_DEVICE_ASSIGN                      0xB7
+#define    I2O_EXEC_DEVICE_RELEASE                     0xB9
+#define    I2O_EXEC_HRT_GET                            0xA8
+#define    I2O_EXEC_IOP_CLEAR                          0xBE
+#define    I2O_EXEC_IOP_CONNECT                        0xC9
+#define    I2O_EXEC_IOP_RESET                          0xBD
+#define    I2O_EXEC_LCT_NOTIFY                         0xA2
+#define    I2O_EXEC_OUTBOUND_INIT                      0xA1
+#define    I2O_EXEC_PATH_ENABLE                        0xD3
+#define    I2O_EXEC_PATH_QUIESCE                       0xC5
+#define    I2O_EXEC_PATH_RESET                         0xD7
+#define    I2O_EXEC_STATIC_MF_CREATE                   0xDD
+#define    I2O_EXEC_STATIC_MF_RELEASE                  0xDF
+#define    I2O_EXEC_STATUS_GET                         0xA0
+#define    I2O_EXEC_SW_DOWNLOAD                        0xA9
+#define    I2O_EXEC_SW_UPLOAD                          0xAB
+#define    I2O_EXEC_SW_REMOVE                          0xAD
+#define    I2O_EXEC_SYS_ENABLE                         0xD1
+#define    I2O_EXEC_SYS_MODIFY                         0xC1
+#define    I2O_EXEC_SYS_QUIESCE                        0xC3
+#define    I2O_EXEC_SYS_TAB_SET                        0xA3
+
+ /* Init Outbound Q status */
+#define    I2O_EXEC_OUTBOUND_INIT_IN_PROGRESS          0x01
+#define    I2O_EXEC_OUTBOUND_INIT_REJECTED             0x02
+#define    I2O_EXEC_OUTBOUND_INIT_FAILED               0x03
+#define    I2O_EXEC_OUTBOUND_INIT_COMPLETE             0x04
+
+#define    I2O_UTIL_NOP                                0x00
+
+/* I2O Get Status State values */
+
+#define    I2O_IOP_STATE_INITIALIZING                  0x01
+#define    I2O_IOP_STATE_RESET                         0x02
+#define    I2O_IOP_STATE_HOLD                          0x04
+#define    I2O_IOP_STATE_READY                         0x05
+#define    I2O_IOP_STATE_OPERATIONAL                   0x08
+#define    I2O_IOP_STATE_FAILED                        0x10
+#define    I2O_IOP_STATE_FAULTED                       0x11
+
+/* Defines for Request Status Codes:  Table 3-1 Reply Status Codes.  */
+
+#define    I2O_REPLY_STATUS_SUCCESS                    0x00
+#define    I2O_REPLY_STATUS_ABORT_DIRTY                0x01
+#define    I2O_REPLY_STATUS_ABORT_NO_DATA_TRANSFER     0x02
+#define    I2O_REPLY_STATUS_ABORT_PARTIAL_TRANSFER     0x03
+#define    I2O_REPLY_STATUS_ERROR_DIRTY                0x04
+#define    I2O_REPLY_STATUS_ERROR_NO_DATA_TRANSFER     0x05
+#define    I2O_REPLY_STATUS_ERROR_PARTIAL_TRANSFER     0x06
+#define    I2O_REPLY_STATUS_PROCESS_ABORT_DIRTY        0x07
+#define    I2O_REPLY_STATUS_PROCESS_ABORT_NO_DATA_TRANSFER   0x08
+#define    I2O_REPLY_STATUS_PROCESS_ABORT_PARTIAL_TRANSFER   0x09
+#define    I2O_REPLY_STATUS_TRANSACTION_ERROR          0x0A
+#define    I2O_REPLY_STATUS_PROGRESS_REPORT            0x80
+
+/* DetailedStatusCode defines for ALL messages: Table 3-2 Detailed Status Codes.*/
+
+#define    I2O_DETAIL_STATUS_SUCCESS                        0x0000
+#define    I2O_DETAIL_STATUS_BAD_KEY                        0x0001
+#define    I2O_DETAIL_STATUS_CHAIN_BUFFER_TOO_LARGE         0x0002
+#define    I2O_DETAIL_STATUS_DEVICE_BUSY                    0x0003
+#define    I2O_DETAIL_STATUS_DEVICE_LOCKED                  0x0004
+#define    I2O_DETAIL_STATUS_DEVICE_NOT_AVAILABLE           0x0005
+#define    I2O_DETAIL_STATUS_DEVICE_RESET                   0x0006
+#define    I2O_DETAIL_STATUS_INAPPROPRIATE_FUNCTION         0x0007
+#define    I2O_DETAIL_STATUS_INSUFFICIENT_RESOURCE_HARD     0x0008
+#define    I2O_DETAIL_STATUS_INSUFFICIENT_RESOURCE_SOFT     0x0009
+#define    I2O_DETAIL_STATUS_INVALID_INITIATOR_ADDRESS      0x000A
+#define    I2O_DETAIL_STATUS_INVALID_MESSAGE_FLAGS          0x000B
+#define    I2O_DETAIL_STATUS_INVALID_OFFSET                 0x000C
+#define    I2O_DETAIL_STATUS_INVALID_PARAMETER              0x000D
+#define    I2O_DETAIL_STATUS_INVALID_REQUEST                0x000E
+#define    I2O_DETAIL_STATUS_INVALID_TARGET_ADDRESS         0x000F
+#define    I2O_DETAIL_STATUS_MESSAGE_TOO_LARGE              0x0010
+#define    I2O_DETAIL_STATUS_MESSAGE_TOO_SMALL              0x0011
+#define    I2O_DETAIL_STATUS_MISSING_PARAMETER              0x0012
+#define    I2O_DETAIL_STATUS_NO_SUCH_PAGE                   0x0013
+#define    I2O_DETAIL_STATUS_REPLY_BUFFER_FULL              0x0014
+#define    I2O_DETAIL_STATUS_TCL_ERROR                      0x0015
+#define    I2O_DETAIL_STATUS_TIMEOUT                        0x0016
+#define    I2O_DETAIL_STATUS_UNKNOWN_ERROR                  0x0017
+#define    I2O_DETAIL_STATUS_UNKNOWN_FUNCTION               0x0018
+#define    I2O_DETAIL_STATUS_UNSUPPORTED_FUNCTION           0x0019
+#define    I2O_DETAIL_STATUS_UNSUPPORTED_VERSION            0x001A
+
+ /* I2O msg header defines for VersionOffset */
+#define I2OMSGVER_1_5   0x0001
+#define SGL_OFFSET_0    I2OMSGVER_1_5
+#define SGL_OFFSET_4    (0x0040 | I2OMSGVER_1_5)
+#define TRL_OFFSET_5    (0x0050 | I2OMSGVER_1_5)
+#define TRL_OFFSET_6    (0x0060 | I2OMSGVER_1_5)
+
+ /* I2O msg header defines for MsgFlags */
+#define MSG_STATIC      0x0100
+#define MSG_64BIT_CNTXT 0x0200
+#define MSG_MULTI_TRANS 0x1000
+#define MSG_FAIL        0x2000
+#define MSG_LAST        0x4000
+#define MSG_REPLY       0x8000
+
+  /* normal LAN request message MsgFlags and VersionOffset (0x1041) */
+#define LAN_MSG_REQST  (MSG_MULTI_TRANS | SGL_OFFSET_4)
+
+ /* minimum size msg */
+#define THREE_WORD_MSG_SIZE 0x00030000
+#define FOUR_WORD_MSG_SIZE  0x00040000
+#define FIVE_WORD_MSG_SIZE  0x00050000
+#define SIX_WORD_MSG_SIZE   0x00060000
+#define SEVEN_WORD_MSG_SIZE 0x00070000
+#define EIGHT_WORD_MSG_SIZE 0x00080000
+#define NINE_WORD_MSG_SIZE  0x00090000
+
+/* Special TID Assignments */
+
+#define I2O_IOP_TID   0
+#define I2O_HOST_TID  0xB91
+
+ /* RedCreek I2O private message codes */
+#define RC_PRIVATE_GET_MAC_ADDR     0x0001/**/ /* OBSOLETE */
+#define RC_PRIVATE_SET_MAC_ADDR     0x0002
+#define RC_PRIVATE_GET_NIC_STATS    0x0003
+#define RC_PRIVATE_GET_LINK_STATUS  0x0004
+#define RC_PRIVATE_SET_LINK_SPEED   0x0005
+#define RC_PRIVATE_SET_IP_AND_MASK  0x0006
+/* #define RC_PRIVATE_GET_IP_AND_MASK  0x0007 *//* OBSOLETE */
+#define RC_PRIVATE_GET_LINK_SPEED   0x0008
+#define RC_PRIVATE_GET_FIRMWARE_REV 0x0009
+/* #define RC_PRIVATE_GET_MAC_ADDR     0x000A */
+#define RC_PRIVATE_GET_IP_AND_MASK  0x000B
+#define RC_PRIVATE_DEBUG_MSG        0x000C
+#define RC_PRIVATE_REPORT_DRIVER_CAPABILITY  0x000D
+#define RC_PRIVATE_SET_PROMISCUOUS_MODE  0x000e
+#define RC_PRIVATE_GET_PROMISCUOUS_MODE  0x000f
+#define RC_PRIVATE_SET_BROADCAST_MODE    0x0010
+#define RC_PRIVATE_GET_BROADCAST_MODE    0x0011
+
+#define RC_PRIVATE_REBOOT           0x00FF
+
+/* I2O message header */
+typedef struct _I2O_MESSAGE_FRAME {
+       U8 VersionOffset;
+       U8 MsgFlags;
+       U16 MessageSize;
+       BF TargetAddress:I2O_TID_SZ;
+       BF InitiatorAddress:I2O_TID_SZ;
+       BF Function:I2O_FUNCTION_SZ;
+       U32 InitiatorContext;
+       /* SGL[] */
+} I2O_MESSAGE_FRAME, *PI2O_MESSAGE_FRAME;
+
+ /* assumed a 16K minus 256 byte space for outbound queue message frames */
+#define MSG_FRAME_SIZE  512
+#define NMBR_MSG_FRAMES 30
+
+ /* 
+    ** in reserved space right after PAB in host memory is area for returning
+    ** values from card 
+  */
+
+/*
+** typedef NICSTAT
+**
+** Data structure for NIC statistics retruned from PCI card.  Data copied from
+** here to user allocated RCLINKSTATS (see rclanmtl.h) structure.
+*/
+typedef struct tag_NicStat {
+       unsigned long TX_good;
+       unsigned long TX_maxcol;
+       unsigned long TX_latecol;
+       unsigned long TX_urun;
+       unsigned long TX_crs;   /* lost carrier sense */
+       unsigned long TX_def;   /* transmit deferred */
+       unsigned long TX_singlecol;     /* single collisions */
+       unsigned long TX_multcol;
+       unsigned long TX_totcol;
+       unsigned long Rcv_good;
+       unsigned long Rcv_CRCerr;
+       unsigned long Rcv_alignerr;
+       unsigned long Rcv_reserr;       /* rnr'd pkts */
+       unsigned long Rcv_orun;
+       unsigned long Rcv_cdt;
+       unsigned long Rcv_runt;
+       unsigned long dump_status;      /* last field directly from the chip */
+} NICSTAT, *P_NICSTAT;
+
+#define DUMP_DONE   0x0000A005 /* completed statistical dump */
+#define DUMP_CLEAR  0x0000A007 /* completed stat dump and clear counters */
+
+static volatile int msgFlag;
+
+/* local function prototypes */
+static void ProcessOutboundI2OMsg (PPAB pPab, U32 phyMsgAddr);
+static int FillI2OMsgSGLFromTCB (PU32 pMsg, PRCTCB pXmitCntrlBlock);
+static int GetI2OStatus (PPAB pPab);
+static int SendI2OOutboundQInitMsg (PPAB pPab);
+static int SendEnableSysMsg (PPAB pPab);
+
+/*
+** =========================================================================
+** RCInitI2OMsgLayer()
+**
+** Initialize the RedCreek I2O Module and adapter.
+**
+** Inputs:  dev - the devices net_device struct
+**          TransmitCallbackFunction - address of transmit callback function
+**          ReceiveCallbackFunction  - address of receive  callback function
+**
+** private message block is allocated by user.  It must be in locked pages.
+** p_msgbuf and p_phymsgbuf point to the same location.  Must be contigous
+** memory block of a minimum of 16K byte and long word aligned.
+** =========================================================================
+*/
+RC_RETURN
+RCInitI2OMsgLayer (struct net_device *dev,
+                  PFNTXCALLBACK TransmitCallbackFunction,
+                  PFNRXCALLBACK ReceiveCallbackFunction,
+                  PFNCALLBACK RebootCallbackFunction)
+{
+       int result;
+       PPAB pPab;
+       U32 pciBaseAddr = dev->base_addr;
+       PDPA pDpa = dev->priv;
+       PU8 p_msgbuf = pDpa->msgbuf;
+       PU8 p_phymsgbuf = (PU8) pDpa->msgbuf_dma;
+
+       dprintk
+           ("InitI2O: Adapter:0x%04ux ATU:0x%08ulx msgbuf:%p phymsgbuf:0x%08ulx\n"
+            "TransmitCallbackFunction:0x%08ulx  ReceiveCallbackFunction:0x%08ulx\n",
+            pDpa->id, pciBaseAddr, p_msgbuf, (u32) p_phymsgbuf,
+            (u32) TransmitCallbackFunction, (u32) ReceiveCallbackFunction);
+
+       /* Check if this interface already initialized - if so, shut it down */
+       if (pDpa->pPab != NULL) {
+               printk (KERN_WARNING
+                       "(rcpci45 driver:) pDpa->pPab [%d] != NULL\n",
+                       pDpa->id);
+/*          RCResetLANCard(pDpa->id, 0, (PU32)NULL, (PFNCALLBACK)NULL); */
+               pDpa->pPab = NULL;
+       }
+
+       /* store adapter instance values in adapter block.
+        * Adapter block is at beginning of message buffer */
+
+       pPab = kmalloc (sizeof (*pPab), GFP_KERNEL);
+       if (!pPab) {
+               printk (KERN_ERR
+                       "(rcpci45 driver:) RCInitI2OMsgLayer: Could not allocate memory for PAB struct!\n");
+               result = RC_RTN_MALLOC_ERROR;
+               goto err_out;
+       }
+
+       memset (pPab, 0, sizeof (*pPab));
+       pDpa->pPab = pPab;
+       pPab->p_atu = (PATU) pciBaseAddr;
+       pPab->pPci45LinBaseAddr = (PU8) pciBaseAddr;
+
+       /* Set outbound message frame addr */
+       pPab->outMsgBlockPhyAddr = (U32) p_phymsgbuf;
+       pPab->pLinOutMsgBlock = (PU8) p_msgbuf;
+
+       /* store callback function addresses */
+       pPab->pTransCallbackFunc = TransmitCallbackFunction;
+       pPab->pRecvCallbackFunc = ReceiveCallbackFunction;
+       pPab->pRebootCallbackFunc = RebootCallbackFunction;
+       pPab->pCallbackFunc = (PFNCALLBACK) NULL;
+
+       /*
+          ** Initialize I2O IOP
+        */
+       result = GetI2OStatus (pPab);
+
+       if (result != RC_RTN_NO_ERROR)
+               goto err_out_dealloc;
+
+       if (pPab->IOPState == I2O_IOP_STATE_OPERATIONAL) {
+               printk (KERN_INFO
+                       "(rcpci45 driver:) pPab->IOPState == op: resetting adapter\n");
+               RCResetLANCard (dev, 0, (PU32) NULL, (PFNCALLBACK) NULL);
+       }
+
+       result = SendI2OOutboundQInitMsg (pPab);
+
+       if (result != RC_RTN_NO_ERROR)
+               goto err_out_dealloc;
+
+       result = SendEnableSysMsg (pPab);
+
+       if (result != RC_RTN_NO_ERROR)
+               goto err_out_dealloc;
+
+       return RC_RTN_NO_ERROR;
+
+      err_out_dealloc:
+       kfree (pPab);
+      err_out:
+       return result;
+}
+
+/*
+** =========================================================================
+** Disable and Enable I2O interrupts.  I2O interrupts are enabled at Init time
+** but can be disabled and re-enabled through these two function calls.
+** Packets will still be put into any posted received buffers and packets will
+** be sent through RCI2OSendPacket() functions.  Disabling I2O interrupts
+** will prevent hardware interrupt to host even though the outbound I2O msg
+** queue is not emtpy.
+** =========================================================================
+*/
+#define i960_OUT_POST_Q_INT_BIT        0x0008  /* bit set masks interrupts */
+
+RC_RETURN
+RCDisableI2OInterrupts (struct net_device * dev)
+{
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       pPab->p_atu->OutIntMask |= i960_OUT_POST_Q_INT_BIT;
+
+       return RC_RTN_NO_ERROR;
+}
+
+RC_RETURN
+RCEnableI2OInterrupts (struct net_device * dev)
+{
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       pPab->p_atu->OutIntMask &= ~i960_OUT_POST_Q_INT_BIT;
+
+       return RC_RTN_NO_ERROR;
+
+}
+
+/*
+** =========================================================================
+** RCI2OSendPacket()
+** =========================================================================
+*/
+RC_RETURN
+RCI2OSendPacket (struct net_device * dev, U32 InitiatorContext,
+                PRCTCB pTransCtrlBlock)
+{
+       U32 msgOffset;
+       PU32 pMsg;
+       int size;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       dprintk ("RCI2OSendPacket()...\n");
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       /* get Inbound free Q entry - reading from In Q gets free Q entry */
+       /* offset to Msg Frame in PCI msg block */
+
+       msgOffset = pPab->p_atu->InQueue;
+
+       if (msgOffset == 0xFFFFFFFF) {
+               dprintk ("RCI2OSendPacket(): Inbound Free Q empty!\n");
+               return RC_RTN_FREE_Q_EMPTY;
+       }
+
+       /* calc virtual address of msg - virtual already mapped to physical */
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset);
+
+       size = FillI2OMsgSGLFromTCB (pMsg + 4, pTransCtrlBlock);
+
+       if (size == -1) {       /* error processing TCB - send NOP msg */
+               dprintk ("RCI2OSendPacket(): Error Rrocess TCB!\n");
+               pMsg[0] = THREE_WORD_MSG_SIZE | SGL_OFFSET_0;
+               pMsg[1] =
+                   I2O_UTIL_NOP << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+               return RC_RTN_TCB_ERROR;
+       } else {                /* send over msg header */
+
+               pMsg[0] = (size + 4) << 16 | LAN_MSG_REQST;     /* send over message size and flags */
+               pMsg[1] =
+                   I2O_LAN_PACKET_SEND << 24 | I2O_HOST_TID << 12 |
+                   RC_LAN_TARGET_ID;
+               pMsg[2] = InitiatorContext;
+               pMsg[3] = 0;    /* batch reply */
+               /* post to Inbound Post Q */
+               pPab->p_atu->InQueue = msgOffset;
+               return RC_RTN_NO_ERROR;
+       }
+}
+
+/*
+** =========================================================================
+** RCI2OPostRecvBuffer()
+**
+** inputs:  pBufrCntrlBlock - pointer to buffer control block
+**
+** returns TRUE if successful in sending message, else FALSE.
+** =========================================================================
+*/
+RC_RETURN
+RCPostRecvBuffers (struct net_device * dev, PRCTCB pTransCtrlBlock)
+{
+       U32 msgOffset;
+       PU32 pMsg;
+       int size;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       dprintk ("RCPostRecvBuffers()...\n");
+
+       /* search for DeviceHandle */
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       /* get Inbound free Q entry - reading from In Q gets free Q entry */
+       /* offset to Msg Frame in PCI msg block */
+       msgOffset = pPab->p_atu->InQueue;
+
+       if (msgOffset == 0xFFFFFFFF) {
+               dprintk ("RCPostRecvBuffers(): Inbound Free Q empty!\n");
+               return RC_RTN_FREE_Q_EMPTY;
+       }
+       /* calc virtual address of msg - virtual already mapped to physical */
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset);
+
+       size = FillI2OMsgSGLFromTCB (pMsg + 4, pTransCtrlBlock);
+
+       if (size == -1) {       /* error prcessing TCB - send 3 DWORD private msg == NOP */
+               dprintk
+                   ("RCPostRecvBuffers(): Error Processing TCB! size = %d\n",
+                    size);
+               pMsg[0] = THREE_WORD_MSG_SIZE | SGL_OFFSET_0;
+               pMsg[1] =
+                   I2O_UTIL_NOP << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+               /* post to Post Q */
+               pPab->p_atu->InQueue = msgOffset;
+               return RC_RTN_TCB_ERROR;
+       } else {                /* send over size msg header */
+
+               pMsg[0] = (size + 4) << 16 | LAN_MSG_REQST;     /* send over message size and flags */
+               pMsg[1] =
+                   I2O_LAN_RECEIVE_POST << 24 | I2O_HOST_TID << 12 |
+                   RC_LAN_TARGET_ID;
+               pMsg[2] = DEFAULT_RECV_INIT_CONTEXT;
+               pMsg[3] = *(PU32) pTransCtrlBlock;      /* number of packet buffers */
+               /* post to Post Q */
+               pPab->p_atu->InQueue = msgOffset;
+               return RC_RTN_NO_ERROR;
+       }
+}
+
+/*
+** =========================================================================
+** RCProcI2OMsgQ()
+**
+** Process I2O outbound message queue until empty.
+** =========================================================================
+*/
+irqreturn_t
+RCProcI2OMsgQ (struct net_device *dev)
+{
+       U32 phyAddrMsg;
+       PU8 p8Msg;
+       PU32 p32;
+       U16 count;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+       unsigned char debug_msg[20];
+
+       if (pPab == NULL)
+               return IRQ_NONE;
+
+       phyAddrMsg = pPab->p_atu->OutQueue;
+
+       while (phyAddrMsg != 0xFFFFFFFF) {
+               p8Msg =
+                   pPab->pLinOutMsgBlock + (phyAddrMsg -
+                                            pPab->outMsgBlockPhyAddr);
+               p32 = (PU32) p8Msg;
+
+               dprintk ("msg: 0x%x  0x%x \n", p8Msg[7], p32[5]);
+
+               /* Send Packet Reply Msg */
+               if (I2O_LAN_PACKET_SEND == p8Msg[7]) {  /* function code byte */
+                       count = *(PU16) (p8Msg + 2);
+                       count -= p8Msg[0] >> 4;
+                       /* status, count, context[], adapter */
+                       (*pPab->pTransCallbackFunc) (p8Msg[19], count, p32 + 5,
+                                                    dev);
+               } else if (I2O_LAN_RECEIVE_POST == p8Msg[7]) {  /* Receive Packet Reply Msg */
+                       dprintk
+                           ("I2O_RECV_REPLY pPab:0x%08ulx p8Msg:0x%08ulx p32:0x%08ulx\n",
+                            (u32) pPab, (u32) p8Msg, (u32) p32);
+                       dprintk ("msg: 0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n",
+                                p32[0], p32[1], p32[2], p32[3]);
+                       dprintk ("     0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n",
+                                p32[4], p32[5], p32[6], p32[7]);
+                       dprintk ("     0x%08ulx:0X%08ulx:0x%08ulx:0x%08ulx\n",
+                                p32[8], p32[9], p32[10], p32[11]);
+                       /*  status, count, buckets remaining, packetParmBlock, adapter */
+                       (*pPab->pRecvCallbackFunc) (p8Msg[19], p8Msg[12],
+                                                   p32[5], p32 + 6, dev);
+               } else if (I2O_LAN_RESET == p8Msg[7]
+                          || I2O_LAN_SHUTDOWN == p8Msg[7])
+                       if (pPab->pCallbackFunc)
+                               (*pPab->pCallbackFunc) (p8Msg[19], 0, 0, dev);
+                       else
+                               pPab->pCallbackFunc = (PFNCALLBACK) 1;
+               else if (I2O_PRIVATE == p8Msg[7]) {
+                       dprintk ("i2o private 0x%x, 0x%x \n", p8Msg[7], p32[5]);
+                       switch (p32[5]) {
+                       case RC_PRIVATE_DEBUG_MSG:
+                               msgFlag = 1;
+                               dprintk ("Received I2O_PRIVATE msg\n");
+                               debug_msg[15] = (p32[6] & 0xff000000) >> 24;
+                               debug_msg[14] = (p32[6] & 0x00ff0000) >> 16;
+                               debug_msg[13] = (p32[6] & 0x0000ff00) >> 8;
+                               debug_msg[12] = (p32[6] & 0x000000ff);
+
+                               debug_msg[11] = (p32[7] & 0xff000000) >> 24;
+                               debug_msg[10] = (p32[7] & 0x00ff0000) >> 16;
+                               debug_msg[9] = (p32[7] & 0x0000ff00) >> 8;
+                               debug_msg[8] = (p32[7] & 0x000000ff);
+
+                               debug_msg[7] = (p32[8] & 0xff000000) >> 24;
+                               debug_msg[6] = (p32[8] & 0x00ff0000) >> 16;
+                               debug_msg[5] = (p32[8] & 0x0000ff00) >> 8;
+                               debug_msg[4] = (p32[8] & 0x000000ff);
+
+                               debug_msg[3] = (p32[9] & 0xff000000) >> 24;
+                               debug_msg[2] = (p32[9] & 0x00ff0000) >> 16;
+                               debug_msg[1] = (p32[9] & 0x0000ff00) >> 8;
+                               debug_msg[0] = (p32[9] & 0x000000ff);
+
+                               debug_msg[16] = '\0';
+                               dprintk ("%s", debug_msg);
+                               break;
+                       case RC_PRIVATE_REBOOT:
+                               dprintk ("Adapter reboot initiated...\n");
+                               if (pPab->pRebootCallbackFunc)
+                                       (*pPab->pRebootCallbackFunc) (0, 0, 0,
+                                                                     dev);
+                               break;
+                       default:
+                               printk (KERN_WARNING
+                                       "(rcpci45 driver:) Unknown private I2O msg received: 0x%x\n",
+                                       p32[5]);
+                               break;
+                       }
+               }
+
+               /* 
+                  ** Process other Msg's
+                */
+               else
+                       ProcessOutboundI2OMsg (pPab, phyAddrMsg);
+
+               /* return MFA to outbound free Q */
+               pPab->p_atu->OutQueue = phyAddrMsg;
+
+               /* any more msgs? */
+               phyAddrMsg = pPab->p_atu->OutQueue;
+       }
+
+       return IRQ_HANDLED;
+}
+
+/*
+** =========================================================================
+**  Returns LAN interface statistical counters to space provided by caller at
+**  StatsReturnAddr.  Returns 0 if success, else RC_RETURN code.
+**  This function will call the WaitCallback function provided by
+**  user while waiting for card to respond.
+** =========================================================================
+*/
+RC_RETURN
+RCGetLinkStatistics (struct net_device *dev,
+                    P_RCLINKSTATS StatsReturnAddr,
+                    PFNWAITCALLBACK WaitCallback)
+{
+       U32 msgOffset;
+       volatile U32 timeout;
+       volatile PU32 pMsg;
+       volatile PU32 p32, pReturnAddr;
+       P_NICSTAT pStats;
+       int i;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+/*dprintk("Get82558Stats() StatsReturnAddr:0x%08ulx\n", StatsReturnAddr); */
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       msgOffset = pPab->p_atu->InQueue;
+
+       if (msgOffset == 0xFFFFFFFF) {
+               dprintk ("Get8255XStats(): Inbound Free Q empty!\n");
+               return RC_RTN_FREE_Q_EMPTY;
+       }
+
+       /* calc virtual address of msg - virtual already mapped to physical */
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset);
+
+/*dprintk("Get82558Stats - pMsg = 0x%08ulx, InQ msgOffset = 0x%08ulx\n", pMsg, msgOffset);*/
+/*dprintk("Get82558Stats - pMsg = 0x%08X, InQ msgOffset = 0x%08X\n", pMsg, msgOffset);*/
+
+       pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = DEFAULT_RECV_INIT_CONTEXT;
+       pMsg[3] = 0x112;        /* transaction context */
+       pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_NIC_STATS;
+       pMsg[5] = pPab->outMsgBlockPhyAddr;
+
+       p32 = (PU32) pPab->outMsgBlockPhyAddr;
+       pStats = (P_NICSTAT) pPab->pLinOutMsgBlock;
+       pStats->dump_status = 0xFFFFFFFF;
+
+       /* post to Inbound Post Q */
+       pPab->p_atu->InQueue = msgOffset;
+
+       timeout = 100000;
+       while (1) {
+               if (WaitCallback)
+                       (*WaitCallback) ();
+
+               udelay (10);
+
+               if (pStats->dump_status != 0xFFFFFFFF)
+                       break;
+
+               if (!timeout--) {
+                       dprintk
+                           ("RCGet82558Stats() Timeout waiting for NIC statistics\n");
+                       return RC_RTN_MSG_REPLY_TIMEOUT;
+               }
+       }
+
+       pReturnAddr = (PU32) StatsReturnAddr;
+
+       /* copy Nic stats to user's structure */
+       for (i = 0; i < (int) sizeof (RCLINKSTATS) / 4; i++)
+               pReturnAddr[i] = p32[i];
+
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** Get82558LinkStatus()
+** =========================================================================
+*/
+RC_RETURN
+RCGetLinkStatus (struct net_device * dev, PU32 ReturnAddr,
+                PFNWAITCALLBACK WaitCallback)
+{
+       U32 msgOffset;
+       volatile U32 timeout;
+       volatile PU32 pMsg;
+       volatile PU32 p32;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       dprintk ("Get82558LinkStatus() ReturnPhysAddr:0x%08ulx\n",
+                (u32) ReturnAddr);
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       msgOffset = pPab->p_atu->InQueue;
+
+       if (msgOffset == 0xFFFFFFFF) {
+               dprintk ("Get82558LinkStatus(): Inbound Free Q empty!\n");
+               return RC_RTN_FREE_Q_EMPTY;
+       }
+
+       /* calc virtual address of msg - virtual already mapped to physical */
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset);
+/*dprintk("Get82558LinkStatus - pMsg = 0x%08ulx, InQ msgOffset = 0x%08ulx\n", pMsg, msgOffset);*/
+/*dprintk("Get82558LinkStatus - pMsg = 0x%08X, InQ msgOffset = 0x%08X\n", pMsg, msgOffset);*/
+
+       pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = DEFAULT_RECV_INIT_CONTEXT;
+       pMsg[3] = 0x112;        /* transaction context */
+       pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_LINK_STATUS;
+       pMsg[5] = pPab->outMsgBlockPhyAddr;
+
+       p32 = (PU32) pPab->pLinOutMsgBlock;
+       *p32 = 0xFFFFFFFF;
+
+       /* post to Inbound Post Q */
+       pPab->p_atu->InQueue = msgOffset;
+
+       timeout = 100000;
+       while (1) {
+               if (WaitCallback)
+                       (*WaitCallback) ();
+
+               udelay (10);
+
+               if (*p32 != 0xFFFFFFFF)
+                       break;
+
+               if (!timeout--) {
+                       dprintk ("Timeout waiting for link status\n");
+                       return RC_RTN_MSG_REPLY_TIMEOUT;
+               }
+       }
+
+       *ReturnAddr = *p32;     /* 1 = up 0 = down */
+
+       return RC_RTN_NO_ERROR;
+
+}
+
+/*
+** =========================================================================
+** RCGetMAC()
+**
+** get the MAC address the adapter is listening for in non-promiscous mode.
+** MAC address is in media format.
+** =========================================================================
+*/
+RC_RETURN
+RCGetMAC (struct net_device * dev, PFNWAITCALLBACK WaitCallback)
+{
+       unsigned timeout;
+       U32 off;
+       PU8 mac = dev->dev_addr;
+       PU32 p;
+       U32 temp[2];
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+       PATU p_atu;
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       p_atu = pPab->p_atu;
+
+       p_atu->EtherMacLow = 0; /* first zero return data */
+       p_atu->EtherMacHi = 0;
+
+       off = p_atu->InQueue;   /* get addresss of message */
+
+       if (0xFFFFFFFF == off)
+               return RC_RTN_FREE_Q_EMPTY;
+
+       p = (PU32) (pPab->pPci45LinBaseAddr + off);
+
+       dprintk ("RCGetMAC: p_atu 0x%08x, off 0x%08x, p 0x%08x\n",
+                (uint) p_atu, (uint) off, (uint) p);
+       /* setup private message */
+       p[0] = FIVE_WORD_MSG_SIZE | SGL_OFFSET_0;
+       p[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       p[2] = 0;               /* initiator context */
+       p[3] = 0x218;           /* transaction context */
+       p[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_MAC_ADDR;
+
+       p_atu->InQueue = off;   /* send it to the I2O device */
+       dprintk ("RCGetMAC: p_atu 0x%08x, off 0x%08x, p 0x%08x\n",
+                (uint) p_atu, (uint) off, (uint) p);
+
+       /* wait for the rcpci45 board to update the info */
+       timeout = 1000000;
+       while (0 == p_atu->EtherMacLow) {
+               if (WaitCallback)
+                       (*WaitCallback) ();
+
+               udelay (10);
+
+               if (!timeout--) {
+                       printk ("rc_getmac: Timeout\n");
+                       return RC_RTN_MSG_REPLY_TIMEOUT;
+               }
+       }
+
+       /* read the mac address  */
+       temp[0] = p_atu->EtherMacLow;
+       temp[1] = p_atu->EtherMacHi;
+       memcpy ((char *) mac, (char *) temp, 6);
+
+       dprintk ("rc_getmac: 0x%x\n", (u32) mac);
+
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** RCSetMAC()
+**
+** set MAC address the adapter is listening for in non-promiscous mode.
+** MAC address is in media format.
+** =========================================================================
+*/
+RC_RETURN
+RCSetMAC (struct net_device * dev, PU8 mac)
+{
+       U32 off;
+       PU32 pMsg;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       off = pPab->p_atu->InQueue;     /* get addresss of message */
+
+       if (0xFFFFFFFF == off)
+               return RC_RTN_FREE_Q_EMPTY;
+
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + off);
+
+       /* setup private message */
+       pMsg[0] = SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = 0;            /* initiator context */
+       pMsg[3] = 0x219;        /* transaction context */
+       pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_SET_MAC_ADDR;
+       pMsg[5] = *(unsigned *) mac;    /* first four bytes */
+       pMsg[6] = *(unsigned *) (mac + 4);      /* last two bytes */
+
+       pPab->p_atu->InQueue = off;     /* send it to the I2O device */
+
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** RCSetLinkSpeed()
+**
+** set ethernet link speed. 
+** input: speedControl - determines action to take as follows
+**          0 = reset and auto-negotiate (NWay)
+**          1 = Full Duplex 100BaseT
+**          2 = Half duplex 100BaseT
+**          3 = Full Duplex  10BaseT
+**          4 = Half duplex  10BaseT
+**          all other values are ignore (do nothing)
+** =========================================================================
+*/
+RC_RETURN
+RCSetLinkSpeed (struct net_device * dev, U16 LinkSpeedCode)
+{
+       U32 off;
+       PU32 pMsg;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       off = pPab->p_atu->InQueue;     /* get addresss of message */
+
+       if (0xFFFFFFFF == off)
+               return RC_RTN_FREE_Q_EMPTY;
+
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + off);
+
+       /* setup private message */
+       pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = 0;            /* initiator context */
+       pMsg[3] = 0x219;        /* transaction context */
+       pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_SET_LINK_SPEED;
+       pMsg[5] = LinkSpeedCode;        /* link speed code */
+
+       pPab->p_atu->InQueue = off;     /* send it to the I2O device */
+
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** RCSetPromiscuousMode()
+**
+** Defined values for Mode:
+**  0 - turn off promiscuous mode
+**  1 - turn on  promiscuous mode
+**
+** =========================================================================
+*/
+RC_RETURN
+RCSetPromiscuousMode (struct net_device * dev, U16 Mode)
+{
+       U32 off;
+       PU32 pMsg;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       off = pPab->p_atu->InQueue;     /* get addresss of message */
+
+       if (0xFFFFFFFF == off)
+               return RC_RTN_FREE_Q_EMPTY;
+
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + off);
+
+       /* setup private message */
+       pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = 0;            /* initiator context */
+       pMsg[3] = 0x219;        /* transaction context */
+       pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_SET_PROMISCUOUS_MODE;
+       pMsg[5] = Mode;         /* promiscuous mode setting */
+
+       pPab->p_atu->InQueue = off;     /* send it to the device */
+
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** RCGetPromiscuousMode()
+**
+** get promiscuous mode setting
+**
+** Possible return values placed in pMode:
+**  0 = promisuous mode not set
+**  1 = promisuous mode is set
+**
+** =========================================================================
+*/
+RC_RETURN
+RCGetPromiscuousMode (struct net_device * dev, PU32 pMode,
+                     PFNWAITCALLBACK WaitCallback)
+{
+       U32 msgOffset, timeout;
+       PU32 pMsg;
+       volatile PU32 p32;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       msgOffset = pPab->p_atu->InQueue;
+
+       if (msgOffset == 0xFFFFFFFF) {
+               printk (KERN_WARNING
+                       "(rcpci45 driver:) RCGetLinkSpeed(): Inbound Free Q empty!\n");
+               return RC_RTN_FREE_Q_EMPTY;
+       }
+
+       /* calc virtual address of msg - virtual already mapped to physical */
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset);
+
+       /* virtual pointer to return buffer - clear first two dwords */
+       p32 = (volatile PU32) pPab->pLinOutMsgBlock;
+       p32[0] = 0xff;
+
+       /* setup private message */
+       pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = 0;            /* initiator context */
+       pMsg[3] = 0x219;        /* transaction context */
+       pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_PROMISCUOUS_MODE;
+       /* phys address to return status - area right after PAB */
+       pMsg[5] = pPab->outMsgBlockPhyAddr;
+
+       /* post to Inbound Post Q */
+
+       pPab->p_atu->InQueue = msgOffset;
+
+       /* wait for response */
+       timeout = 1000000;
+       while (1) {
+               if (WaitCallback)
+                       (*WaitCallback) ();
+
+               udelay (10);    /* please don't hog the bus!!! */
+
+               if (p32[0] != 0xff)
+                       break;
+
+               if (!timeout--) {
+                       dprintk
+                           ("Timeout waiting for promiscuous mode from adapter\n");
+                       dprintk ("0x%8x\n", p32[0]);
+                       return RC_RTN_NO_LINK_SPEED;
+               }
+       }
+
+       /* get mode */
+       *pMode = (U8) ((volatile PU8) p32)[0] & 0x0f;
+
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** RCSetBroadcastMode()
+**
+** Defined values for Mode:
+**  0 - turn off promiscuous mode
+**  1 - turn on  promiscuous mode
+**
+** =========================================================================
+*/
+RC_RETURN
+RCSetBroadcastMode (struct net_device * dev, U16 Mode)
+{
+       U32 off;
+       PU32 pMsg;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       off = pPab->p_atu->InQueue;     /* get addresss of message */
+
+       if (0xFFFFFFFF == off)
+               return RC_RTN_FREE_Q_EMPTY;
+
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + off);
+
+       /* setup private message */
+       pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = 0;            /* initiator context */
+       pMsg[3] = 0x219;        /* transaction context */
+       pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_SET_BROADCAST_MODE;
+       pMsg[5] = Mode;         /* promiscuous mode setting */
+
+       pPab->p_atu->InQueue = off;     /* send it to the device */
+
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** RCGetBroadcastMode()
+**
+** get promiscuous mode setting
+**
+** Possible return values placed in pMode:
+**  0 = promisuous mode not set
+**  1 = promisuous mode is set
+**
+** =========================================================================
+*/
+RC_RETURN
+RCGetBroadcastMode (struct net_device * dev, PU32 pMode,
+                   PFNWAITCALLBACK WaitCallback)
+{
+       U32 msgOffset, timeout;
+       PU32 pMsg;
+       volatile PU32 p32;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       msgOffset = pPab->p_atu->InQueue;
+
+       if (msgOffset == 0xFFFFFFFF) {
+               printk (KERN_WARNING
+                       "(rcpci45 driver:) RCGetLinkSpeed(): Inbound Free Q empty!\n");
+               return RC_RTN_FREE_Q_EMPTY;
+       }
+
+       /* calc virtual address of msg - virtual already mapped to physical */
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset);
+
+       /* virtual pointer to return buffer - clear first two dwords */
+       p32 = (volatile PU32) pPab->pLinOutMsgBlock;
+       p32[0] = 0xff;
+
+       /* setup private message */
+       pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = 0;            /* initiator context */
+       pMsg[3] = 0x219;        /* transaction context */
+       pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_BROADCAST_MODE;
+       /* phys address to return status - area right after PAB */
+       pMsg[5] = pPab->outMsgBlockPhyAddr;
+
+       /* post to Inbound Post Q */
+
+       pPab->p_atu->InQueue = msgOffset;
+
+       /* wait for response */
+       timeout = 1000000;
+       while (1) {
+               if (WaitCallback)
+                       (*WaitCallback) ();
+
+               udelay (10);    /* please don't hog the bus!!! */
+
+               if (p32[0] != 0xff)
+                       break;
+
+               if (!timeout--) {
+                       printk (KERN_WARNING
+                               "(rcpci45 driver:) Timeout waiting for promiscuous mode from adapter\n");
+                       printk (KERN_WARNING "(rcpci45 driver:) 0x%8x\n",
+                               p32[0]);
+                       return RC_RTN_NO_LINK_SPEED;
+               }
+       }
+
+       /* get mode */
+       *pMode = (U8) ((volatile PU8) p32)[0] & 0x0f;
+
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** RCGetLinkSpeed()
+**
+** get ethernet link speed. 
+**
+** 0 = Unknown
+** 1 = Full Duplex 100BaseT
+** 2 = Half duplex 100BaseT
+** 3 = Full Duplex  10BaseT
+** 4 = Half duplex  10BaseT
+**
+** =========================================================================
+*/
+RC_RETURN
+RCGetLinkSpeed (struct net_device * dev, PU32 pLinkSpeedCode,
+               PFNWAITCALLBACK WaitCallback)
+{
+       U32 msgOffset, timeout;
+       PU32 pMsg;
+       volatile PU32 p32;
+       U8 IOPLinkSpeed;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       msgOffset = pPab->p_atu->InQueue;
+
+       if (msgOffset == 0xFFFFFFFF) {
+               printk (KERN_WARNING
+                       "(rcpci45 driver:) RCGetLinkSpeed(): Inbound Free Q empty!\n");
+               return RC_RTN_FREE_Q_EMPTY;
+       }
+
+       /* calc virtual address of msg - virtual already mapped to physical */
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset);
+
+       /* virtual pointer to return buffer - clear first two dwords */
+       p32 = (volatile PU32) pPab->pLinOutMsgBlock;
+       p32[0] = 0xff;
+
+       /* setup private message */
+       pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = 0;            /* initiator context */
+       pMsg[3] = 0x219;        /* transaction context */
+       pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_LINK_SPEED;
+       /* phys address to return status - area right after PAB */
+       pMsg[5] = pPab->outMsgBlockPhyAddr;
+
+       /* post to Inbound Post Q */
+
+       pPab->p_atu->InQueue = msgOffset;
+
+       /* wait for response */
+       timeout = 1000000;
+       while (1) {
+               if (WaitCallback)
+                       (*WaitCallback) ();
+
+               udelay (10);    /* please don't hog the bus!!! */
+
+               if (p32[0] != 0xff)
+                       break;
+
+               if (!timeout--) {
+                       dprintk ("Timeout waiting for link speed from IOP\n");
+                       dprintk ("0x%8x\n", p32[0]);
+                       return RC_RTN_NO_LINK_SPEED;
+               }
+       }
+
+       /* get Link speed */
+       IOPLinkSpeed = (U8) ((volatile PU8) p32)[0] & 0x0f;
+
+       *pLinkSpeedCode = IOPLinkSpeed;
+
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** RCReportDriverCapability(struct net_device *dev, U32 capability)
+**
+** Currently defined bits:
+** WARM_REBOOT_CAPABLE   0x01
+**
+** =========================================================================
+*/
+RC_RETURN
+RCReportDriverCapability (struct net_device * dev, U32 capability)
+{
+       U32 off;
+       PU32 pMsg;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       off = pPab->p_atu->InQueue;     /* get addresss of message */
+
+       if (0xFFFFFFFF == off)
+               return RC_RTN_FREE_Q_EMPTY;
+
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + off);
+
+       /* setup private message */
+       pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = 0;            /* initiator context */
+       pMsg[3] = 0x219;        /* transaction context */
+       pMsg[4] =
+           RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_REPORT_DRIVER_CAPABILITY;
+       pMsg[5] = capability;
+
+       pPab->p_atu->InQueue = off;     /* send it to the I2O device */
+
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** RCGetFirmwareVer()
+**
+** Return firmware version in the form "SoftwareVersion : Bt BootVersion"
+**
+** =========================================================================
+*/
+RC_RETURN
+RCGetFirmwareVer (struct net_device * dev, PU8 pFirmString,
+                 PFNWAITCALLBACK WaitCallback)
+{
+       U32 msgOffset, timeout;
+       PU32 pMsg;
+       volatile PU32 p32;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       msgOffset = pPab->p_atu->InQueue;
+       if (msgOffset == 0xFFFFFFFF) {
+               dprintk ("RCGetFirmwareVer(): Inbound Free Q empty!\n");
+               return RC_RTN_FREE_Q_EMPTY;
+       }
+
+       /* calc virtual address of msg - virtual already mapped to physical */
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset);
+
+       /* virtual pointer to return buffer - clear first two dwords */
+       p32 = (volatile PU32) pPab->pLinOutMsgBlock;
+       p32[0] = 0xff;
+
+       /* setup private message */
+       pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = 0;            /* initiator context */
+       pMsg[3] = 0x219;        /* transaction context */
+       pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_FIRMWARE_REV;
+       /* phys address to return status - area right after PAB */
+       pMsg[5] = pPab->outMsgBlockPhyAddr;
+
+       /* post to Inbound Post Q */
+
+       pPab->p_atu->InQueue = msgOffset;
+
+       /* wait for response */
+       timeout = 1000000;
+       while (1) {
+               if (WaitCallback)
+                       (*WaitCallback) ();
+
+               udelay (10);    /* please don't hog the bus!!! */
+
+               if (p32[0] != 0xff)
+                       break;
+
+               if (!timeout--) {
+                       dprintk ("Timeout waiting for link speed from IOP\n");
+                       return RC_RTN_NO_FIRM_VER;
+               }
+       }
+
+       strcpy (pFirmString, (PU8) p32);
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** RCResetLANCard()
+**
+** ResourceFlags indicates whether to return buffer resource explicitly
+** to host or keep and reuse.
+** CallbackFunction (if not NULL) is the function to be called when 
+** reset is complete.
+** If CallbackFunction is NULL, ReturnAddr will have a 1 placed in it when
+** reset is done (if not NULL).
+**
+** =========================================================================
+*/
+RC_RETURN
+RCResetLANCard (struct net_device * dev, U16 ResourceFlags, PU32 ReturnAddr,
+               PFNCALLBACK CallbackFunction)
+{
+       unsigned long off;
+       PU32 pMsg;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+       long timeout = 0;
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       off = pPab->p_atu->InQueue;     /* get addresss of message */
+
+       if (0xFFFFFFFF == off)
+               return RC_RTN_FREE_Q_EMPTY;
+
+       pPab->pCallbackFunc = CallbackFunction;
+
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + off);
+
+       /* setup message */
+       pMsg[0] = FOUR_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_LAN_RESET << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = DEFAULT_RECV_INIT_CONTEXT;
+       pMsg[3] = ResourceFlags << 16;  /* resource flags */
+
+       pPab->p_atu->InQueue = off;     /* send it to the I2O device */
+
+       if (CallbackFunction == (PFNCALLBACK) NULL) {
+               /* call RCProcI2OMsgQ() until something in pPab->pCallbackFunc
+                  or until timer goes off */
+               while (pPab->pCallbackFunc == (PFNCALLBACK) NULL) {
+                       RCProcI2OMsgQ (dev);
+                       udelay (1000);  /* please don't hog the bus!!! */
+                       timeout++;
+                       if (timeout > 10000) {
+                               break;
+                       }
+               }
+               if (ReturnAddr != (PU32) NULL)
+                       *ReturnAddr = (U32) pPab->pCallbackFunc;
+       }
+
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** RCResetIOP()
+**
+** Send StatusGet Msg, wait for results return directly to buffer.
+**
+** =========================================================================
+*/
+RC_RETURN
+RCResetIOP (struct net_device * dev)
+{
+       U32 msgOffset, timeout;
+       PU32 pMsg;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+       volatile PU32 p32;
+
+       msgOffset = pPab->p_atu->InQueue;
+
+       if (msgOffset == 0xFFFFFFFF) {
+               return RC_RTN_FREE_Q_EMPTY;
+       }
+
+       /* calc virtual address of msg - virtual already mapped to physical */
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset);
+
+       pMsg[0] = NINE_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_EXEC_IOP_RESET << 24 | I2O_HOST_TID << 12 | I2O_IOP_TID;
+       pMsg[2] = 0;            /* universal context */
+       pMsg[3] = 0;            /* universal context */
+       pMsg[4] = 0;            /* universal context */
+       pMsg[5] = 0;            /* universal context */
+       /* phys address to return status - area right after PAB */
+       pMsg[6] = pPab->outMsgBlockPhyAddr;
+       pMsg[7] = 0;
+       pMsg[8] = 1;            /*  return 1 byte */
+
+       /* virtual pointer to return buffer - clear first two dwords */
+       p32 = (volatile PU32) pPab->pLinOutMsgBlock;
+       p32[0] = 0;
+       p32[1] = 0;
+
+       /* post to Inbound Post Q */
+
+       pPab->p_atu->InQueue = msgOffset;
+
+       /* wait for response */
+       timeout = 1000000;
+       while (1) {
+               udelay (10);    /* please don't hog the bus!!! */
+
+               if (p32[0] || p32[1])
+                       break;
+
+               if (!timeout--) {
+                       dprintk ("RCResetIOP timeout\n");
+                       return RC_RTN_MSG_REPLY_TIMEOUT;
+               }
+       }
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** RCShutdownLANCard()
+**
+** ResourceFlags indicates whether to return buffer resource explicitly
+** to host or keep and reuse.
+** CallbackFunction (if not NULL) is the function to be called when 
+** shutdown is complete.
+** If CallbackFunction is NULL, ReturnAddr will have a 1 placed in it when
+** shutdown is done (if not NULL).
+**
+** =========================================================================
+*/
+RC_RETURN
+RCShutdownLANCard (struct net_device * dev, U16 ResourceFlags,
+                  PU32 ReturnAddr, PFNCALLBACK CallbackFunction)
+{
+       volatile PU32 pMsg;
+       U32 off;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+       long timeout = 0;
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       off = pPab->p_atu->InQueue;     /* get addresss of message */
+
+       if (0xFFFFFFFF == off)
+               return RC_RTN_FREE_Q_EMPTY;
+
+       pPab->pCallbackFunc = CallbackFunction;
+
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + off);
+
+       /* setup message */
+       pMsg[0] = FOUR_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] =
+           I2O_LAN_SHUTDOWN << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = DEFAULT_RECV_INIT_CONTEXT;
+       pMsg[3] = ResourceFlags << 16;  /* resource flags */
+
+       pPab->p_atu->InQueue = off;     /* send it to the I2O device */
+
+       if (CallbackFunction == (PFNCALLBACK) NULL) {
+               /* call RCProcI2OMsgQ() until something in pPab->pCallbackFunc
+                  or until timer goes off */
+               while (pPab->pCallbackFunc == (PFNCALLBACK) NULL) {
+                       RCProcI2OMsgQ (dev);
+                       udelay (1000);  /* please don't hog the bus!!! */
+                       timeout++;
+                       if (timeout > 10000) {
+                               printk (KERN_WARNING
+                                       "(rcpci45 driver:) RCShutdownLANCard(): timeout\n");
+                               break;
+                       }
+               }
+               if (ReturnAddr != (PU32) NULL)
+                       *ReturnAddr = (U32) pPab->pCallbackFunc;
+       }
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** RCSetRavlinIPandMask()
+**
+** Set the Ravlin 45/PCI cards IP address and network mask.
+**
+** IP address and mask must be in network byte order.
+** For example, IP address 1.2.3.4 and mask 255.255.255.0 would be
+** 0x04030201 and 0x00FFFFFF on a little endian machine.
+**
+** =========================================================================
+*/
+RC_RETURN
+RCSetRavlinIPandMask (struct net_device * dev, U32 ipAddr, U32 netMask)
+{
+       volatile PU32 pMsg;
+       U32 off;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       off = pPab->p_atu->InQueue;     /* get addresss of message */
+
+       if (0xFFFFFFFF == off)
+               return RC_RTN_FREE_Q_EMPTY;
+
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + off);
+
+       /* setup private message */
+       pMsg[0] = SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = 0;            /* initiator context */
+       pMsg[3] = 0x219;        /* transaction context */
+       pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_SET_IP_AND_MASK;
+       pMsg[5] = ipAddr;
+       pMsg[6] = netMask;
+
+       pPab->p_atu->InQueue = off;     /* send it to the I2O device */
+       return RC_RTN_NO_ERROR;
+
+}
+
+/*
+** =========================================================================
+** RCGetRavlinIPandMask()
+**
+** get the IP address and MASK from the card
+** 
+** =========================================================================
+*/
+RC_RETURN
+RCGetRavlinIPandMask (struct net_device * dev, PU32 pIpAddr, PU32 pNetMask,
+                     PFNWAITCALLBACK WaitCallback)
+{
+       unsigned timeout;
+       U32 off;
+       PU32 pMsg, p32;
+       PPAB pPab = ((PDPA) dev->priv)->pPab;
+       PATU p_atu;
+
+       dprintk
+           ("RCGetRavlinIPandMask: pIpAddr is 0x%08ulx, *IpAddr is 0x%08ulx\n",
+            (u32) pIpAddr, *pIpAddr);
+
+       if (pPab == NULL)
+               return RC_RTN_ADPTR_NOT_REGISTERED;
+
+       p_atu = pPab->p_atu;
+       off = p_atu->InQueue;   /* get addresss of message */
+
+       if (0xFFFFFFFF == off)
+               return RC_RTN_FREE_Q_EMPTY;
+
+       p32 = (volatile PU32) pPab->pLinOutMsgBlock;
+       *p32 = 0xFFFFFFFF;
+
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + off);
+
+       dprintk
+           ("RCGetRavlinIPandMask: p_atu 0x%08ulx, off 0x%08ulx, p32 0x%08ulx\n",
+            (u32) p_atu, off, (u32) p32);
+       /* setup private message */
+       pMsg[0] = FIVE_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID;
+       pMsg[2] = 0;            /* initiator context */
+       pMsg[3] = 0x218;        /* transaction context */
+       pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_IP_AND_MASK;
+       pMsg[5] = pPab->outMsgBlockPhyAddr;
+
+       p_atu->InQueue = off;   /* send it to the I2O device */
+       dprintk
+           ("RCGetRavlinIPandMask: p_atu 0x%08ulx, off 0x%08ulx, p32 0x%08ulx\n",
+            (u32) p_atu, off, (u32) p32);
+
+       /* wait for the rcpci45 board to update the info */
+       timeout = 100000;
+       while (0xffffffff == *p32) {
+               if (WaitCallback)
+                       (*WaitCallback) ();
+
+               udelay (10);
+
+               if (!timeout--) {
+                       dprintk ("RCGetRavlinIPandMask: Timeout\n");
+                       return RC_RTN_MSG_REPLY_TIMEOUT;
+               }
+       }
+
+       dprintk
+           ("RCGetRavlinIPandMask: after time out\np32[0] (IpAddr) 0x%08ulx, p32[1] (IPmask) 0x%08ulx\n",
+            p32[0], p32[1]);
+
+       /* send IP and mask to user's space  */
+       *pIpAddr = p32[0];
+       *pNetMask = p32[1];
+
+       dprintk
+           ("RCGetRavlinIPandMask: pIpAddr is 0x%08ulx, *IpAddr is 0x%08ulx\n",
+            (u32) pIpAddr, *pIpAddr);
+
+       return RC_RTN_NO_ERROR;
+}
+
+/* 
+** /////////////////////////////////////////////////////////////////////////
+** /////////////////////////////////////////////////////////////////////////
+**
+**                        local functions
+**
+** /////////////////////////////////////////////////////////////////////////
+** /////////////////////////////////////////////////////////////////////////
+*/
+
+/*
+** =========================================================================
+** SendI2OOutboundQInitMsg()
+**
+** =========================================================================
+*/
+static int
+SendI2OOutboundQInitMsg (PPAB pPab)
+{
+       U32 msgOffset, timeout, phyOutQFrames, i;
+       volatile PU32 pMsg;
+       volatile PU32 p32;
+
+       msgOffset = pPab->p_atu->InQueue;
+
+       if (msgOffset == 0xFFFFFFFF) {
+               dprintk ("SendI2OOutboundQInitMsg(): Inbound Free Q empty!\n");
+               return RC_RTN_FREE_Q_EMPTY;
+       }
+
+       /* calc virtual address of msg - virtual already mapped to physical */
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset);
+
+       dprintk
+           ("SendI2OOutboundQInitMsg - pMsg = 0x%08ulx, InQ msgOffset = 0x%08ulx\n",
+            (u32) pMsg, msgOffset);
+
+       pMsg[0] = EIGHT_WORD_MSG_SIZE | TRL_OFFSET_6;
+       pMsg[1] =
+           I2O_EXEC_OUTBOUND_INIT << 24 | I2O_HOST_TID << 12 | I2O_IOP_TID;
+       pMsg[2] = DEFAULT_RECV_INIT_CONTEXT;
+       pMsg[3] = 0x106;        /* transaction context */
+       pMsg[4] = 4096;         /* Host page frame size */
+       pMsg[5] = MSG_FRAME_SIZE << 16 | 0x80;  /* outbound msg frame size and Initcode */
+       pMsg[6] = 0xD0000004;   /* simple sgl element LE, EOB */
+       /* phys address to return status - area right after PAB */
+       pMsg[7] = pPab->outMsgBlockPhyAddr;
+
+       /* virtual pointer to return buffer - clear first two dwords */
+       p32 = (PU32) pPab->pLinOutMsgBlock;
+       p32[0] = 0;
+
+       /* post to Inbound Post Q */
+       pPab->p_atu->InQueue = msgOffset;
+
+       /* wait for response */
+       timeout = 100000;
+       while (1) {
+               udelay (10);    /* please don't hog the bus!!! */
+
+               if (p32[0])
+                       break;
+
+               if (!timeout--) {
+                       dprintk
+                           ("Timeout wait for InitOutQ InPrgress status from IOP\n");
+                       return RC_RTN_NO_I2O_STATUS;
+               }
+       }
+
+       timeout = 100000;
+       while (1) {
+               udelay (10);    /* please don't hog the bus!!! */
+
+               if (p32[0] == I2O_EXEC_OUTBOUND_INIT_COMPLETE)
+                       break;
+
+               if (!timeout--) {
+                       dprintk
+                           ("Timeout wait for InitOutQ Complete status from IOP\n");
+                       return RC_RTN_NO_I2O_STATUS;
+               }
+       }
+
+       /* load PCI outbound free Q with MF physical addresses */
+       phyOutQFrames = pPab->outMsgBlockPhyAddr;
+
+       for (i = 0; i < NMBR_MSG_FRAMES; i++) {
+               pPab->p_atu->OutQueue = phyOutQFrames;
+               phyOutQFrames += MSG_FRAME_SIZE;
+       }
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** GetI2OStatus()
+**
+** Send StatusGet Msg, wait for results return directly to buffer.
+**
+** =========================================================================
+*/
+static int
+GetI2OStatus (PPAB pPab)
+{
+       U32 msgOffset, timeout;
+       PU32 pMsg;
+       volatile PU32 p32;
+
+       msgOffset = pPab->p_atu->InQueue;
+       dprintk ("GetI2OStatus: msg offset = 0x%x\n", msgOffset);
+       if (msgOffset == 0xFFFFFFFF) {
+               dprintk ("GetI2OStatus(): Inbound Free Q empty!\n");
+               return RC_RTN_FREE_Q_EMPTY;
+       }
+
+       /* calc virtual address of msg - virtual already mapped to physical */
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset);
+
+       pMsg[0] = NINE_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_EXEC_STATUS_GET << 24 | I2O_HOST_TID << 12 | I2O_IOP_TID;
+       pMsg[2] = 0;            /* universal context */
+       pMsg[3] = 0;            /* universal context */
+       pMsg[4] = 0;            /* universal context */
+       pMsg[5] = 0;            /* universal context */
+       /* phys address to return status - area right after PAB */
+       pMsg[6] = pPab->outMsgBlockPhyAddr;
+       pMsg[7] = 0;
+       pMsg[8] = 88;           /*  return 88 bytes */
+
+       /* virtual pointer to return buffer - clear first two dwords */
+       p32 = (volatile PU32) pPab->pLinOutMsgBlock;
+       p32[0] = 0;
+       p32[1] = 0;
+
+       dprintk
+           ("GetI2OStatus - pMsg:0x%08ulx, msgOffset:0x%08ulx, [1]:0x%08ulx, [6]:0x%08ulx\n",
+            (u32) pMsg, msgOffset, pMsg[1], pMsg[6]);
+
+       /* post to Inbound Post Q */
+       pPab->p_atu->InQueue = msgOffset;
+
+       dprintk ("Return status to p32 = 0x%08ulx\n", (u32) p32);
+
+       /* wait for response */
+       timeout = 1000000;
+       while (1) {
+               udelay (10);    /* please don't hog the bus!!! */
+
+               if (p32[0] && p32[1])
+                       break;
+
+               if (!timeout--) {
+                       dprintk ("Timeout waiting for status from IOP\n");
+                       dprintk ("0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n",
+                                p32[0], p32[1], p32[2], p32[3]);
+                       dprintk ("0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n",
+                                p32[4], p32[5], p32[6], p32[7]);
+                       dprintk ("0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n",
+                                p32[8], p32[9], p32[10], p32[11]);
+                       return RC_RTN_NO_I2O_STATUS;
+               }
+       }
+
+       dprintk ("0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", p32[0], p32[1],
+                p32[2], p32[3]);
+       dprintk ("0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", p32[4], p32[5],
+                p32[6], p32[7]);
+       dprintk ("0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", p32[8], p32[9],
+                p32[10], p32[11]);
+       /* get IOP state */
+       pPab->IOPState = ((volatile PU8) p32)[10];
+       pPab->InboundMFrameSize = ((volatile PU16) p32)[6];
+
+       dprintk ("IOP state 0x%02x InFrameSize = 0x%04x\n",
+                pPab->IOPState, pPab->InboundMFrameSize);
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** SendEnableSysMsg()
+**
+**
+** =========================================================================
+*/
+static int
+SendEnableSysMsg (PPAB pPab)
+{
+       U32 msgOffset;
+       volatile PU32 pMsg;
+
+       msgOffset = pPab->p_atu->InQueue;
+
+       if (msgOffset == 0xFFFFFFFF) {
+               dprintk ("SendEnableSysMsg(): Inbound Free Q empty!\n");
+               return RC_RTN_FREE_Q_EMPTY;
+       }
+
+       /* calc virtual address of msg - virtual already mapped to physical */
+       pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset);
+
+       dprintk
+           ("SendEnableSysMsg - pMsg = 0x%08ulx, InQ msgOffset = 0x%08ulx\n",
+            (u32) pMsg, msgOffset);
+
+       pMsg[0] = FOUR_WORD_MSG_SIZE | SGL_OFFSET_0;
+       pMsg[1] = I2O_EXEC_SYS_ENABLE << 24 | I2O_HOST_TID << 12 | I2O_IOP_TID;
+       pMsg[2] = DEFAULT_RECV_INIT_CONTEXT;
+       pMsg[3] = 0x110;        /* transaction context */
+       pMsg[4] = 0x50657465;   /*  RedCreek Private */
+
+       /* post to Inbound Post Q */
+       pPab->p_atu->InQueue = msgOffset;
+
+       return RC_RTN_NO_ERROR;
+}
+
+/*
+** =========================================================================
+** FillI2OMsgFromTCB()
+**
+** inputs   pMsgU32 - virtual pointer (mapped to physical) of message frame
+**          pXmitCntrlBlock - pointer to caller buffer control block.
+**
+** fills in LAN SGL after Transaction Control Word or Bucket Count.
+** =========================================================================
+*/
+static int
+FillI2OMsgSGLFromTCB (PU32 pMsgFrame, PRCTCB pTransCtrlBlock)
+{
+       unsigned int nmbrBuffers, nmbrSeg, nmbrDwords, context, flags;
+       PU32 pTCB, pMsg;
+
+       /* SGL element flags */
+#define EOB        0x40000000
+#define LE         0x80000000
+#define SIMPLE_SGL 0x10000000
+#define BC_PRESENT 0x01000000
+
+       pTCB = (PU32) pTransCtrlBlock;
+       pMsg = pMsgFrame;
+       nmbrDwords = 0;
+
+       dprintk ("FillI2OMsgSGLFromTCBX\n");
+       dprintk ("TCB  0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n",
+                pTCB[0], pTCB[1], pTCB[2], pTCB[3], pTCB[4]);
+       dprintk ("pTCB 0x%08ulx, pMsg 0x%08ulx\n", (u32) pTCB, (u32) pMsg);
+
+       nmbrBuffers = *pTCB++;
+
+       if (!nmbrBuffers) {
+               return -1;
+       }
+
+       do {
+               context = *pTCB++;      /* buffer tag (context) */
+               nmbrSeg = *pTCB++;      /* number of segments */
+
+               if (!nmbrSeg) {
+                       return -1;
+               }
+
+               flags = SIMPLE_SGL | BC_PRESENT;
+
+               if (1 == nmbrSeg) {
+                       flags |= EOB;
+
+                       if (1 == nmbrBuffers)
+                               flags |= LE;
+               }
+
+               /* 1st SGL buffer element has context */
+               pMsg[0] = pTCB[0] | flags;      /* send over count (segment size) */
+               pMsg[1] = context;
+               pMsg[2] = pTCB[1];      /* send buffer segment physical address */
+               nmbrDwords += 3;
+               pMsg += 3;
+               pTCB += 2;
+
+               if (--nmbrSeg) {
+                       do {
+                               flags = SIMPLE_SGL;
+
+                               if (1 == nmbrSeg) {
+                                       flags |= EOB;
+
+                                       if (1 == nmbrBuffers)
+                                               flags |= LE;
+                               }
+
+                               pMsg[0] = pTCB[0] | flags;      /* send over count */
+                               pMsg[1] = pTCB[1];      /* send buffer segment physical address */
+                               nmbrDwords += 2;
+                               pTCB += 2;
+                               pMsg += 2;
+
+                       } while (--nmbrSeg);
+               }
+
+       } while (--nmbrBuffers);
+
+       return nmbrDwords;
+}
+
+/*
+** =========================================================================
+** ProcessOutboundI2OMsg()
+**
+** process I2O reply message
+** * change to msg structure *
+** =========================================================================
+*/
+static void
+ProcessOutboundI2OMsg (PPAB pPab, U32 phyAddrMsg)
+{
+       PU8 p8Msg;
+       PU32 p32;
+/*      U16 count; */
+
+       p8Msg = pPab->pLinOutMsgBlock + (phyAddrMsg - pPab->outMsgBlockPhyAddr);
+       p32 = (PU32) p8Msg;
+
+       dprintk
+           ("VXD: ProcessOutboundI2OMsg - pPab 0x%08ulx, phyAdr 0x%08ulx, linAdr 0x%08ulx\n",
+            (u32) pPab, phyAddrMsg, (u32) p8Msg);
+       dprintk ("msg :0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", p32[0], p32[1],
+                p32[2], p32[3]);
+       dprintk ("msg :0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", p32[4], p32[5],
+                p32[6], p32[7]);
+
+       if (p32[4] >> 24 != I2O_REPLY_STATUS_SUCCESS) {
+               dprintk ("Message reply status not success\n");
+               return;
+       }
+
+       switch (p8Msg[7]) {     /* function code byte */
+       case I2O_EXEC_SYS_TAB_SET:
+               msgFlag = 1;
+               dprintk ("Received I2O_EXEC_SYS_TAB_SET reply\n");
+               break;
+
+       case I2O_EXEC_HRT_GET:
+               msgFlag = 1;
+               dprintk ("Received I2O_EXEC_HRT_GET reply\n");
+               break;
+
+       case I2O_EXEC_LCT_NOTIFY:
+               msgFlag = 1;
+               dprintk ("Received I2O_EXEC_LCT_NOTIFY reply\n");
+               break;
+
+       case I2O_EXEC_SYS_ENABLE:
+               msgFlag = 1;
+               dprintk ("Received I2O_EXEC_SYS_ENABLE reply\n");
+               break;
+
+       default:
+               dprintk ("Received UNKNOWN reply\n");
+               break;
+       }
+}
diff --git a/drivers/net/rclanmtl.h b/drivers/net/rclanmtl.h
new file mode 100644 (file)
index 0000000..9488c0f
--- /dev/null
@@ -0,0 +1,701 @@
+/*
+** *************************************************************************
+**
+**
+**     R C L A N M T L . H             $Revision: 6 $
+**
+**
+**  RedCreek I2O LAN Message Transport Layer header file.
+**
+**  ---------------------------------------------------------------------
+**  ---     Copyright (c) 1997-1999, RedCreek Communications Inc.     ---
+**  ---                   All rights reserved.                        ---
+**  ---------------------------------------------------------------------
+**
+**  File Description:
+**
+**  Header file for host I2O (Intelligent I/O) LAN message transport layer 
+**  API and data types.
+**
+**  This program is free software; you can redistribute it and/or modify
+**  it under the terms of the GNU General Public License as published by
+**  the Free Software Foundation; either version 2 of the License, or
+**  (at your option) any later version.
+
+**  This program is distributed in the hope that it will be useful,
+**  but WITHOUT ANY WARRANTY; without even the implied warranty of
+**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+**  GNU General Public License for more details.
+
+**  You should have received a copy of the GNU General Public License
+**  along with this program; if not, write to the Free Software
+**  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+**
+** *************************************************************************
+*/
+
+#ifndef RCLANMTL_H
+#define RCLANMTL_H
+
+/* Linux specific includes */
+#include <asm/types.h>
+#ifdef RC_LINUX_MODULE         /* linux modules need non-library version of string functions */
+#include <linux/string.h>
+#else
+#include <string.h>
+#endif
+#include <linux/delay.h>       /* for udelay() */
+
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+
+#include <asm/io.h>
+
+/* Debug stuff. Define for debug output */
+#undef RCDEBUG
+
+#ifdef RCDEBUG
+#define dprintk(args...) printk(KERN_DEBUG "rc: " args)
+#else
+#define dprintk(args...) { }
+#endif
+
+/* Typedefs */
+
+ /* scalar data types */
+typedef __u8 U8;
+typedef __u16 U16;
+typedef __u32 U32;
+typedef __u8 *PU8;
+typedef __u16 *PU16;
+typedef __u32 *PU32;
+typedef unsigned long BF;
+typedef int RC_RETURN;
+
+ /* 
+    ** type PFNWAITCALLBACK
+    **
+    ** pointer to void function - type used for WaitCallback in some functions 
+  */
+typedef void (*PFNWAITCALLBACK) (void);        /* void argument avoids compiler complaint */
+
+ /*
+    ** type PFNTXCALLBACK 
+    **
+    ** Pointer to user's transmit callback function.  This user function is
+    ** called from RCProcI2OMsgQ() when packet have been transmitted from buffers
+    ** given in the RCI2OSendPacket() function.  BufferContext is a pointer to
+    ** an array of 32 bit context values.  These are the values the user assigned
+    ** and passed in the TCB to the RCI2OSendPacket() function.  PcktCount
+    ** indicates the number of buffer context values in the BufferContext[] array.
+    ** The User's TransmitCallbackFunction should recover (put back in free queue)
+    ** the packet buffers associated with the buffer context values.
+  */
+typedef void (*PFNTXCALLBACK) (U32 Status,
+                              U16 PcktCount,
+                              PU32 BufferContext, struct net_device *);
+
+ /* 
+    ** type PFNRXCALLBACK 
+    **
+    ** Pointer to user's receive callback function.  This user function
+    ** is called from RCProcI2OMsgQ() when packets have been received into
+    ** previously posted packet buffers throught the RCPostRecvBuffers() function.
+    ** The received callback function should process the Packet Descriptor Block
+    ** pointed to by PacketDescBlock. See Packet Decription Block below.
+  */
+typedef void (*PFNRXCALLBACK) (U32 Status,
+                              U8 PktCount,
+                              U32 BucketsRemain,
+                              PU32 PacketDescBlock, struct net_device *);
+
+ /* 
+    ** type PFNCALLBACK 
+    **
+    ** Pointer to user's generic callback function.  This user function
+    ** can be passed to LANReset or LANShutdown and is called when the 
+    ** the reset or shutdown is complete.
+    ** Param1 and Param2 are invalid for LANReset and LANShutdown.
+  */
+typedef void (*PFNCALLBACK) (U32 Status,
+                            U32 Param1, U32 Param2, struct net_device * dev);
+
+/*
+**  Message Unit CSR definitions for RedCreek PCI45 board
+*/
+typedef struct tag_rcatu {
+       volatile unsigned long APICRegSel;      /* APIC Register Select */
+       volatile unsigned long reserved0;
+       volatile unsigned long APICWinReg;      /* APIC Window Register */
+       volatile unsigned long reserved1;
+       volatile unsigned long InMsgReg0;       /* inbound message register 0 */
+       volatile unsigned long InMsgReg1;       /* inbound message register 1 */
+       volatile unsigned long OutMsgReg0;      /* outbound message register 0 */
+       volatile unsigned long OutMsgReg1;      /* outbound message register 1 */
+       volatile unsigned long InDoorReg;       /* inbound doorbell register */
+       volatile unsigned long InIntStat;       /* inbound interrupt status register */
+       volatile unsigned long InIntMask;       /* inbound interrupt mask register */
+       volatile unsigned long OutDoorReg;      /* outbound doorbell register */
+       volatile unsigned long OutIntStat;      /* outbound interrupt status register */
+       volatile unsigned long OutIntMask;      /* outbound interrupt mask register */
+       volatile unsigned long reserved2;
+       volatile unsigned long reserved3;
+       volatile unsigned long InQueue; /* inbound queue port */
+       volatile unsigned long OutQueue;        /* outbound queue port */
+       volatile unsigned long reserved4;
+       volatile unsigned long reserver5;
+       /* RedCreek extension */
+       volatile unsigned long EtherMacLow;
+       volatile unsigned long EtherMacHi;
+       volatile unsigned long IPaddr;
+       volatile unsigned long IPmask;
+} *PATU;
+
+ /* 
+    ** typedef PAB
+    **
+    ** PCI Adapter Block - holds instance specific information.
+  */
+typedef struct {
+       PATU p_atu;             /* ptr to  ATU register block */
+       PU8 pPci45LinBaseAddr;
+       PU8 pLinOutMsgBlock;
+       U32 outMsgBlockPhyAddr;
+       PFNTXCALLBACK pTransCallbackFunc;
+       PFNRXCALLBACK pRecvCallbackFunc;
+       PFNCALLBACK pRebootCallbackFunc;
+       PFNCALLBACK pCallbackFunc;
+       U16 IOPState;
+       U16 InboundMFrameSize;
+} *PPAB;
+
+/*
+ * Driver Private Area, DPA.
+ */
+typedef struct {
+       U8 id;                  /* the AdapterID */
+
+       /* These two field are basically for the RCioctl function.
+        * I could not determine if they could be avoided. (RAA)*/
+       U32 pci_addr;           /* the pci address of the adapter */
+       U32 pci_addr_len;
+
+       struct pci_dev *pci_dev;
+       struct timer_list timer;        /*  timer */
+       struct net_device_stats stats;  /* the statistics structure */
+       unsigned long numOutRcvBuffers; /* number of outstanding receive buffers */
+       unsigned char shutdown;
+       unsigned char reboot;
+       unsigned char nexus;
+       PU8 msgbuf;             /* Pointer to Lan Api Private Area */
+       dma_addr_t msgbuf_dma;
+       PPAB pPab;              /* Pointer to the PCI Adapter Block */
+} *PDPA;
+
+/* PCI/45 Configuration space values */
+#define RC_PCI45_VENDOR_ID  0x4916
+#define RC_PCI45_DEVICE_ID  0x1960
+
+ /* RedCreek API function return values */
+#define RC_RTN_NO_ERROR             0
+#define RC_RTN_I2O_NOT_INIT         1
+#define RC_RTN_FREE_Q_EMPTY         2
+#define RC_RTN_TCB_ERROR            3
+#define RC_RTN_TRANSACTION_ERROR    4
+#define RC_RTN_ADAPTER_ALREADY_INIT 5
+#define RC_RTN_MALLOC_ERROR         6
+#define RC_RTN_ADPTR_NOT_REGISTERED 7
+#define RC_RTN_MSG_REPLY_TIMEOUT    8
+#define RC_RTN_NO_I2O_STATUS        9
+#define RC_RTN_NO_FIRM_VER         10
+#define RC_RTN_NO_LINK_SPEED       11
+
+/* Driver capability flags */
+#define WARM_REBOOT_CAPABLE      0x01
+
+/*
+** Status - Transmit and Receive callback status word 
+**
+** A 32 bit Status is returned to the TX and RX callback functions.  This value
+** contains both the reply status and the detailed status as follows:
+**
+**  32    24     16            0
+**  +------+------+------------+
+**  | Reply|      |  Detailed  |
+**  |Status|   0  |   Status   |
+**  +------+------+------------+
+**
+** Reply Status and Detailed Status of zero indicates No Errors.
+*/
+ /* reply message status defines */
+#define    I2O_REPLY_STATUS_SUCCESS                    0x00
+#define    I2O_REPLY_STATUS_ABORT_NO_DATA_TRANSFER     0x02
+#define    I2O_REPLY_STATUS_TRANSACTION_ERROR          0x0A
+
+/* DetailedStatusCode defines */
+#define    I2O_LAN_DSC_SUCCESS                         0x0000
+#define    I2O_LAN_DSC_DEVICE_FAILURE                  0x0001
+#define    I2O_LAN_DSC_DESTINATION_NOT_FOUND           0x0002
+#define    I2O_LAN_DSC_TRANSMIT_ERROR                  0x0003
+#define    I2O_LAN_DSC_TRANSMIT_ABORTED                0x0004
+#define    I2O_LAN_DSC_RECEIVE_ERROR                   0x0005
+#define    I2O_LAN_DSC_RECEIVE_ABORTED                 0x0006
+#define    I2O_LAN_DSC_DMA_ERROR                       0x0007
+#define    I2O_LAN_DSC_BAD_PACKET_DETECTED             0x0008
+#define    I2O_LAN_DSC_OUT_OF_MEMORY                   0x0009
+#define    I2O_LAN_DSC_BUCKET_OVERRUN                  0x000A
+#define    I2O_LAN_DSC_IOP_INTERNAL_ERROR              0x000B
+#define    I2O_LAN_DSC_CANCELED                        0x000C
+#define    I2O_LAN_DSC_INVALID_TRANSACTION_CONTEXT     0x000D
+#define    I2O_LAN_DSC_DESTINATION_ADDRESS_DETECTED    0x000E
+#define    I2O_LAN_DSC_DESTINATION_ADDRESS_OMITTED     0x000F
+#define    I2O_LAN_DSC_PARTIAL_PACKET_RETURNED         0x0010
+
+/*
+** Packet Description Block   (Received packets)
+**
+** A pointer to this block structure is returned to the ReceiveCallback 
+** function.  It contains the list of packet buffers which have either been
+** filled with a packet or returned to host due to a LANReset function. 
+** Currently there will only be one packet per receive bucket (buffer) posted. 
+**
+**   32   24               0     
+**  +-----------------------+  -\
+**  |   Buffer 1 Context    |    \
+**  +-----------------------+     \
+**  |      0xC0000000       |     / First Bucket Descriptor
+**  +-----+-----------------+    /
+**  |  0  | packet 1 length |   / 
+**  +-----------------------+  -\
+**  |   Buffer 2 Context    |    \
+**  +-----------------------+     \
+**  |      0xC0000000       |     / Second Bucket Descriptor
+**  +-----+-----------------+    /
+**  |  0  | packet 2 length |   / 
+**  +-----+-----------------+  -
+**  |         ...           |  ----- more bucket descriptors
+**  +-----------------------+  -\
+**  |   Buffer n Context    |    \
+**  +-----------------------+     \
+**  |      0xC0000000       |     / Last Bucket Descriptor
+**  +-----+-----------------+    /
+**  |  0  | packet n length |   / 
+**  +-----+-----------------+  -
+**
+** Buffer Context values are those given to adapter in the TCB on calls to
+** RCPostRecvBuffers().
+**  
+*/
+
+/*
+** Transaction Control Block (TCB) structure
+**
+** A structure like this is filled in by the user and passed by reference to 
+** RCI2OSendPacket() and RCPostRecvBuffers() functions.  Minimum size is five
+** 32-bit words for one buffer with one segment descriptor.  
+** MAX_NMBR_POST_BUFFERS_PER_MSG defines the maximum single segment buffers
+** that can be described in a given TCB.
+**
+**   32                    0
+**  +-----------------------+
+**  |   Buffer Count        |  Number of buffers in the TCB
+**  +-----------------------+
+**  |   Buffer 1 Context    |  first buffer reference
+**  +-----------------------+
+**  |   Buffer 1 Seg Count  |  number of segments in buffer
+**  +-----------------------+
+**  |   Buffer 1 Seg Desc 1 |  first segment descriptor (size, physical address)
+**  +-----------------------+
+**  |         ...           |  more segment descriptors (size, physical address)
+**  +-----------------------+
+**  |   Buffer 1 Seg Desc n |  last segment descriptor (size, physical address)
+**  +-----------------------+
+**  |   Buffer 2 Context    |  second buffer reference
+**  +-----------------------+
+**  |   Buffer 2 Seg Count  |  number of segments in buffer
+**  +-----------------------+
+**  |   Buffer 2 Seg Desc 1 |  segment descriptor (size, physical address)
+**  +-----------------------+
+**  |         ...           |  more segment descriptors (size, physical address)
+**  +-----------------------+
+**  |   Buffer 2 Seg Desc n |
+**  +-----------------------+
+**  |         ...           |  more buffer descriptor blocks ...
+**  +-----------------------+
+**  |   Buffer n Context    |
+**  +-----------------------+
+**  |   Buffer n Seg Count  |
+**  +-----------------------+
+**  |   Buffer n Seg Desc 1 |
+**  +-----------------------+
+**  |         ...           |
+**  +-----------------------+
+**  |   Buffer n Seg Desc n |
+**  +-----------------------+
+**
+**
+** A TCB for one contigous packet buffer would look like the following:
+**
+**   32                    0
+**  +-----------------------+
+**  |         1             |  one buffer in the TCB
+**  +-----------------------+
+**  |  <user's Context>     |  user's buffer reference
+**  +-----------------------+
+**  |         1             |  one segment buffer
+**  +-----------------------+                            _
+**  |    <buffer size>      |  size                       \ 
+**  +-----------------------+                              \ segment descriptor
+**  |  <physical address>   |  physical address of buffer  /
+**  +-----------------------+                            _/
+**
+*/
+
+ /* Buffer Segment Descriptor */
+typedef struct {
+       U32 size;
+       U32 phyAddress;
+} BSD, *PBSD;
+
+typedef PU32 PRCTCB;
+/*
+** -------------------------------------------------------------------------
+** Exported functions comprising the API to the LAN I2O message transport layer
+** -------------------------------------------------------------------------
+*/
+
+ /*
+    ** InitRCI2OMsgLayer()
+    ** 
+    ** Called once prior to using the I2O LAN message transport layer.  User 
+    ** provides both the physical and virual address of a locked page buffer 
+    ** that is used as a private buffer for the RedCreek I2O message
+    ** transport layer.  This buffer must be a contigous memory block of a 
+    ** minimum of 16K bytes and long word aligned.  The user also must provide
+    ** the base address of the RedCreek PCI adapter assigned by BIOS or operating
+    ** system.  
+    **
+    ** Inputs:  dev - the net_device struct for the device.
+    **          TransmitCallbackFunction - address of user's TX callback function
+    **          ReceiveCallbackFunction  - address of user's RX callback function
+    **          RebootCallbackFunction  - address of user's reboot callback function
+    **
+  */
+RC_RETURN RCInitI2OMsgLayer (struct net_device *dev,
+                            PFNTXCALLBACK TransmitCallbackFunction,
+                            PFNRXCALLBACK ReceiveCallbackFunction,
+                            PFNCALLBACK RebootCallbackFunction);
+
+ /*
+    ** RCSetRavlinIPandMask()
+    **
+    ** Set the Ravlin 45/PCI cards IP address and network mask.
+    **
+    ** IP address and mask must be in network byte order.
+    ** For example, IP address 1.2.3.4 and mask 255.255.255.0 would be
+    ** 0x04030201 and 0x00FFFFFF on a little endian machine.
+    **
+  */
+RC_RETURN RCSetRavlinIPandMask (struct net_device *dev, U32 ipAddr,
+                               U32 netMask);
+
+/*
+** =========================================================================
+** RCGetRavlinIPandMask()
+**
+** get the IP address and MASK from the card
+** 
+** =========================================================================
+*/
+RC_RETURN
+RCGetRavlinIPandMask (struct net_device *dev, PU32 pIpAddr, PU32 pNetMask,
+                     PFNWAITCALLBACK WaitCallback);
+
+ /* 
+    ** RCProcI2OMsgQ()
+    ** 
+    ** Called from user's polling loop or Interrupt Service Routine for a PCI 
+    ** interrupt from the RedCreek PCI adapter.  User responsible for determining
+    ** and hooking the PCI interrupt. This function will call the registered
+    ** callback functions, TransmitCallbackFunction or ReceiveCallbackFunction,
+    ** if a TX or RX transaction has completed.
+  */
+irqreturn_t RCProcI2OMsgQ (struct net_device *dev);
+
+ /*
+    ** Disable and Enable I2O interrupts.  I2O interrupts are enabled at Init time
+    ** but can be disabled and re-enabled through these two function calls.
+    ** Packets will still be put into any posted received buffers and packets will
+    ** be sent through RCI2OSendPacket() functions.  Disabling I2O interrupts
+    ** will prevent hardware interrupt to host even though the outbound I2O msg
+    ** queue is not emtpy.
+  */
+RC_RETURN RCEnableI2OInterrupts (struct net_device *dev);
+RC_RETURN RCDisableI2OInterrupts (struct net_device *dev);
+
+ /* 
+    ** RCPostRecvBuffers()
+    ** 
+    ** Post user's page locked buffers for use by the PCI adapter to
+    ** return ethernet packets received from the LAN.  Transaction Control Block,
+    ** provided by user, contains buffer descriptor(s) which includes a buffer
+    ** context number along with buffer size and physical address.  See TCB above.
+    ** The buffer context and actual packet length are returned to the 
+    ** ReceiveCallbackFunction when packets have been received.  Buffers posted
+    ** to the RedCreek adapter are considered owned by the adapter until the
+    ** context is return to user through the ReceiveCallbackFunction.
+  */
+RC_RETURN RCPostRecvBuffers (struct net_device *dev,
+                            PRCTCB pTransactionCtrlBlock);
+#define MAX_NMBR_POST_BUFFERS_PER_MSG 32
+
+ /*
+    ** RCI2OSendPacket()
+    ** 
+    ** Send user's ethernet packet from a locked page buffer.  
+    ** Packet must have full MAC header, however without a CRC.  
+    ** Initiator context is a user provided value that is returned 
+    ** to the TransmitCallbackFunction when packet buffer is free.
+    ** Transmit buffer are considered owned by the adapter until context's
+    ** returned to user through the TransmitCallbackFunction.
+  */
+RC_RETURN RCI2OSendPacket (struct net_device *dev,
+                          U32 context, PRCTCB pTransactionCtrlBlock);
+
+ /* Ethernet Link Statistics structure */
+typedef struct tag_RC_link_stats {
+       U32 TX_good;            /* good transmit frames */
+       U32 TX_maxcol;          /* frames not TX due to MAX collisions */
+       U32 TX_latecol;         /* frames not TX due to late collisions */
+       U32 TX_urun;            /* frames not TX due to DMA underrun */
+       U32 TX_crs;             /* frames TX with lost carrier sense */
+       U32 TX_def;             /* frames deferred due to activity on link */
+       U32 TX_singlecol;       /* frames TX with one and only on collision */
+       U32 TX_multcol;         /* frames TX with more than one collision */
+       U32 TX_totcol;          /* total collisions detected during TX */
+       U32 Rcv_good;           /* good frames received */
+       U32 Rcv_CRCerr;         /* frames RX and discarded with CRC errors */
+       U32 Rcv_alignerr;       /* frames RX with alignment and CRC errors */
+       U32 Rcv_reserr;         /* good frames discarded due to no RX buffer */
+       U32 Rcv_orun;           /* RX frames lost due to FIFO overrun */
+       U32 Rcv_cdt;            /* RX frames with collision during RX */
+       U32 Rcv_runt;           /* RX frames shorter than 64 bytes */
+} RCLINKSTATS, *P_RCLINKSTATS;
+
+ /*
+    ** RCGetLinkStatistics()
+    **
+    ** Returns link statistics in user's structure at address StatsReturnAddr
+    ** If given, not NULL, the function WaitCallback is called during the wait
+    ** loop while waiting for the adapter to respond.
+  */
+RC_RETURN RCGetLinkStatistics (struct net_device *dev,
+                              P_RCLINKSTATS StatsReturnAddr,
+                              PFNWAITCALLBACK WaitCallback);
+
+ /*
+    ** RCGetLinkStatus()
+    **
+    ** Return link status, up or down, to user's location addressed by ReturnAddr.
+    ** If given, not NULL, the function WaitCallback is called during the wait
+    ** loop while waiting for the adapter to respond.
+  */
+RC_RETURN RCGetLinkStatus (struct net_device *dev,
+                          PU32 pReturnStatus, PFNWAITCALLBACK WaitCallback);
+
+ /* Link Status defines - value returned in pReturnStatus */
+#define RC_LAN_LINK_STATUS_DOWN     0
+#define RC_LAN_LINK_STATUS_UP       1
+
+ /*
+    ** RCGetMAC()
+    **
+    ** Get the current MAC address assigned to user.  RedCreek Ravlin 45/PCI 
+    ** has two MAC addresses.  One which is private to the PCI Card, and 
+    ** another MAC which is given to the user as its link layer MAC address. The
+    ** adapter runs in promiscous mode because of the dual address requirement.
+    ** The MAC address is returned to the unsigned char array pointer to by mac.
+  */
+RC_RETURN RCGetMAC (struct net_device *dev, PFNWAITCALLBACK WaitCallback);
+
+ /*
+    ** RCSetMAC()
+    **
+    ** Set a new user port MAC address.  This address will be returned on
+    ** subsequent RCGetMAC() calls.
+  */
+RC_RETURN RCSetMAC (struct net_device *dev, PU8 mac);
+
+ /*
+    ** RCSetLinkSpeed()
+    **
+    ** set adapter's link speed based on given input code.
+  */
+RC_RETURN RCSetLinkSpeed (struct net_device *dev, U16 LinkSpeedCode);
+ /* Set link speed codes */
+#define LNK_SPD_AUTO_NEG_NWAY   0
+#define LNK_SPD_100MB_FULL      1
+#define LNK_SPD_100MB_HALF      2
+#define LNK_SPD_10MB_FULL       3
+#define LNK_SPD_10MB_HALF       4
+
+ /*
+    ** RCGetLinkSpeed()
+    **
+    ** Return link speed code.
+  */
+ /* Return link speed codes */
+#define LNK_SPD_UNKNOWN         0
+#define LNK_SPD_100MB_FULL      1
+#define LNK_SPD_100MB_HALF      2
+#define LNK_SPD_10MB_FULL       3
+#define LNK_SPD_10MB_HALF       4
+
+RC_RETURN
+RCGetLinkSpeed (struct net_device *dev, PU32 pLinkSpeedCode,
+               PFNWAITCALLBACK WaitCallback);
+/*
+** =========================================================================
+** RCSetPromiscuousMode(struct net_device *dev, U16 Mode)
+**
+** Defined values for Mode:
+**  0 - turn off promiscuous mode
+**  1 - turn on  promiscuous mode
+**
+** =========================================================================
+*/
+#define PROMISCUOUS_MODE_OFF 0
+#define PROMISCUOUS_MODE_ON  1
+RC_RETURN RCSetPromiscuousMode (struct net_device *dev, U16 Mode);
+/*
+** =========================================================================
+** RCGetPromiscuousMode(struct net_device *dev, PU32 pMode, PFNWAITCALLBACK WaitCallback)
+**
+** get promiscuous mode setting
+**
+** Possible return values placed in pMode:
+**  0 = promisuous mode not set
+**  1 = promisuous mode is set
+**
+** =========================================================================
+*/
+RC_RETURN
+RCGetPromiscuousMode (struct net_device *dev, PU32 pMode,
+                     PFNWAITCALLBACK WaitCallback);
+
+/*
+** =========================================================================
+** RCSetBroadcastMode(struct net_device *dev, U16 Mode)
+**
+** Defined values for Mode:
+**  0 - turn off promiscuous mode
+**  1 - turn on  promiscuous mode
+**
+** =========================================================================
+*/
+#define BROADCAST_MODE_OFF 0
+#define BROADCAST_MODE_ON  1
+RC_RETURN RCSetBroadcastMode (struct net_device *dev, U16 Mode);
+/*
+** =========================================================================
+** RCGetBroadcastMode(struct net_device *dev, PU32 pMode, PFNWAITCALLBACK WaitCallback)
+**
+** get broadcast mode setting
+**
+** Possible return values placed in pMode:
+**  0 = broadcast mode not set
+**  1 = broadcast mode is set
+**
+** =========================================================================
+*/
+RC_RETURN
+RCGetBroadcastMode (struct net_device *dev, PU32 pMode,
+                   PFNWAITCALLBACK WaitCallback);
+/*
+** =========================================================================
+** RCReportDriverCapability(struct net_device *dev, U32 capability)
+**
+** Currently defined bits:
+** WARM_REBOOT_CAPABLE   0x01
+**
+** =========================================================================
+*/
+RC_RETURN RCReportDriverCapability (struct net_device *dev, U32 capability);
+
+/*
+** RCGetFirmwareVer()
+**
+** Return firmware version in the form "SoftwareVersion : Bt BootVersion"
+**
+** WARNING: user's space pointed to by pFirmString should be at least 60 bytes.
+*/
+RC_RETURN
+RCGetFirmwareVer (struct net_device *dev, PU8 pFirmString,
+                 PFNWAITCALLBACK WaitCallback);
+
+/*
+** ----------------------------------------------
+** LAN adapter Reset and Shutdown functions
+** ----------------------------------------------
+*/
+ /* resource flag bit assignments for RCResetLANCard() & RCShutdownLANCard() */
+#define RC_RESOURCE_RETURN_POSTED_RX_BUCKETS  0x0001
+#define RC_RESOURCE_RETURN_PEND_TX_BUFFERS    0x0002
+
+ /*
+    ** RCResetLANCard()
+    **
+    ** Reset LAN card operation.  Causes a software reset of the ethernet
+    ** controller and restarts the command and receive units. Depending on 
+    ** the ResourceFlags given, the buffers are either returned to the
+    ** host with reply status of I2O_REPLY_STATUS_ABORT_NO_DATA_TRANSFER and
+    ** detailed status of I2O_LAN_DSC_CANCELED (new receive buffers must be
+    ** posted after issuing this) OR the buffers are kept and reused by
+    ** the ethernet controller. If CallbackFunction is not NULL, the function
+    ** will be called when the reset is complete.  If the CallbackFunction is
+    ** NULL,a 1 will be put into the ReturnAddr after waiting for the reset 
+    ** to complete (please disable I2O interrupts during this method).
+    ** Any outstanding transmit or receive buffers that are complete will be
+    ** returned via the normal reply messages before the requested resource
+    ** buffers are returned.
+    ** A call to RCPostRecvBuffers() is needed to return the ethernet to full
+    ** operation if the receive buffers were returned during LANReset.
+    ** Note: The IOP status is not affected by a LAN reset.
+  */
+RC_RETURN RCResetLANCard (struct net_device *dev, U16 ResourceFlags,
+                         PU32 ReturnAddr, PFNCALLBACK CallbackFunction);
+
+ /*
+    ** RCShutdownLANCard()
+    **
+    ** Shutdown LAN card operation and put into an idle (suspended) state.
+    ** The LAN card is restarted with RCResetLANCard() function.
+    ** Depending on the ResourceFlags given, the buffers are either returned 
+    ** to the host with reply status of I2O_REPLY_STATUS_ABORT_NO_DATA_TRANSFER 
+    ** and detailed status of I2O_LAN_DSC_CANCELED (new receive buffers must be
+    ** posted after issuing this) OR the buffers are kept and reused by
+    ** the ethernet controller. If CallbackFunction is not NULL, the function
+    ** will be called when the reset is complete.  If the CallbackFunction is
+    ** NULL,a 1 will be put into the ReturnAddr after waiting for the reset 
+    ** to complete (please disable I2O interrupts during this method).
+    ** Any outstanding transmit or receive buffers that are complete will be
+    ** returned via the normal reply messages before the requested resource
+    ** buffers are returned.
+    ** Note: The IOP status is not affected by a LAN shutdown.
+  */
+RC_RETURN
+RCShutdownLANCard (struct net_device *dev, U16 ResourceFlags, PU32 ReturnAddr,
+                  PFNCALLBACK CallbackFunction);
+
+ /*
+    ** RCResetIOP();
+    **     Initializes IOPState to I2O_IOP_STATE_RESET.
+    **     Stops access to outbound message Q.
+    **     Discards any outstanding transmit or posted receive buffers.
+    **     Clears outbound message Q. 
+  */
+RC_RETURN RCResetIOP (struct net_device *dev);
+
+#endif                         /* RCLANMTL_H */
diff --git a/drivers/net/rcpci45.c b/drivers/net/rcpci45.c
new file mode 100644 (file)
index 0000000..76b63f3
--- /dev/null
@@ -0,0 +1,1049 @@
+/* 
+**
+**  RCpci45.c  
+**
+**
+**
+**  ---------------------------------------------------------------------
+**  ---     Copyright (c) 1998, 1999, RedCreek Communications Inc.    ---
+**  ---                   All rights reserved.                        ---
+**  ---------------------------------------------------------------------
+**
+** Written by Pete Popov and Brian Moyle.
+**
+** Known Problems
+** 
+** None known at this time.
+**
+**  This program is free software; you can redistribute it and/or modify
+**  it under the terms of the GNU General Public License as published by
+**  the Free Software Foundation; either version 2 of the License, or
+**  (at your option) any later version.
+
+**  This program is distributed in the hope that it will be useful,
+**  but WITHOUT ANY WARRANTY; without even the implied warranty of
+**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+**  GNU General Public License for more details.
+
+**  You should have received a copy of the GNU General Public License
+**  along with this program; if not, write to the Free Software
+**  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+**
+**  Francois Romieu, Apr 2003: Converted to pci DMA mapping API.
+**
+**  Pete Popov, Oct 2001: Fixed a few bugs to make the driver functional
+**  again. Note that this card is not supported or manufactured by 
+**  RedCreek anymore.
+**   
+**  Rasmus Andersen, December 2000: Converted to new PCI API and general
+**  cleanup.
+**
+**  Pete Popov, January 11,99: Fixed a couple of 2.1.x problems 
+**  (virt_to_bus() not called), tested it under 2.2pre5 (as a module), and 
+**  added a #define(s) to enable the use of the same file for both, the 2.0.x 
+**  kernels as well as the 2.1.x.
+**
+**  Ported to 2.1.x by Alan Cox 1998/12/9. 
+**
+**  Sometime in mid 1998, written by Pete Popov and Brian Moyle.
+**
+***************************************************************************/
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/timer.h>
+
+#include <asm/irq.h>           /* For NR_IRQS only. */
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+
+static char version[] __initdata =
+    "RedCreek Communications PCI linux driver version 2.21\n";
+
+#define RC_LINUX_MODULE
+#include "rclanmtl.h"
+#include "rcif.h"
+
+#define RUN_AT(x) (jiffies + (x))
+
+#define NEW_MULTICAST
+
+#define MAX_ETHER_SIZE        1520
+#define MAX_NMBR_RCV_BUFFERS    96
+#define RC_POSTED_BUFFERS_LOW_MARK MAX_NMBR_RCV_BUFFERS-16
+#define BD_SIZE 3              /* Bucket Descriptor size */
+#define BD_LEN_OFFSET 2                /* Bucket Descriptor offset to length field */
+
+/* RedCreek LAN device Target ID */
+#define RC_LAN_TARGET_ID  0x10
+/* RedCreek's OSM default LAN receive Initiator */
+#define DEFAULT_RECV_INIT_CONTEXT  0xA17
+
+/* minimum msg buffer size needed by the card 
+ * Note that the size of this buffer is hard code in the
+ * ipsec card's firmware. Thus, the size MUST be a minimum
+ * of 16K. Otherwise the card will end up using memory
+ * that does not belong to it.
+ */
+#define MSG_BUF_SIZE  16384
+
+/* 2003/04/20: I don't know about the hardware ability but the driver won't
+ * play safe with 64 bit addressing and DAC without NETIF_F_HIGHDMA doesn't
+ * really make sense anyway. Let's play safe - romieu.
+ */
+#define RCPCI45_DMA_MASK       ((u64) 0xffffffff)
+
+static U32 DriverControlWord;
+
+static void rc_timer (unsigned long);
+
+static int RCopen (struct net_device *);
+static int RC_xmit_packet (struct sk_buff *, struct net_device *);
+static irqreturn_t RCinterrupt (int, void *, struct pt_regs *);
+static int RCclose (struct net_device *dev);
+static struct net_device_stats *RCget_stats (struct net_device *);
+static int RCioctl (struct net_device *, struct ifreq *, int);
+static int RCconfig (struct net_device *, struct ifmap *);
+static void RCxmit_callback (U32, U16, PU32, struct net_device *);
+static void RCrecv_callback (U32, U8, U32, PU32, struct net_device *);
+static void RCreset_callback (U32, U32, U32, struct net_device *);
+static void RCreboot_callback (U32, U32, U32, struct net_device *);
+static int RC_allocate_and_post_buffers (struct net_device *, int);
+
+static struct pci_device_id rcpci45_pci_table[] = {
+       { PCI_VENDOR_ID_REDCREEK, PCI_DEVICE_ID_RC45, PCI_ANY_ID, PCI_ANY_ID,},
+       {}
+};
+MODULE_DEVICE_TABLE (pci, rcpci45_pci_table);
+MODULE_LICENSE("GPL");
+
+static void __devexit
+rcpci45_remove_one (struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata (pdev);
+       PDPA pDpa = dev->priv;
+
+       RCResetIOP (dev);
+       unregister_netdev (dev);
+       free_irq (dev->irq, dev);
+       iounmap ((void *) dev->base_addr);
+       pci_release_regions (pdev);
+       pci_free_consistent (pdev, MSG_BUF_SIZE, pDpa->msgbuf,
+                            pDpa->msgbuf_dma);
+       if (pDpa->pPab)
+               kfree (pDpa->pPab);
+       free_netdev (dev);
+       pci_set_drvdata (pdev, NULL);
+}
+
+static int
+rcpci45_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+       unsigned long *vaddr;
+       PDPA pDpa;
+       int error;
+       static int card_idx = -1;
+       struct net_device *dev;
+       unsigned long pci_start, pci_len;
+
+       card_idx++;
+
+       /* 
+        * Allocate and fill new device structure. 
+        * We need enough for struct net_device plus DPA plus the LAN 
+        * API private area, which requires a minimum of 16KB.  The top 
+        * of the allocated area will be assigned to struct net_device; 
+        * the next chunk will be assigned to DPA; and finally, the rest 
+        * will be assigned to the LAN API layer.
+        */
+
+       dev = alloc_etherdev(sizeof(*pDpa));
+       if (!dev) {
+               printk (KERN_ERR
+                       "(rcpci45 driver:) alloc_etherdev alloc failed\n");
+               error = -ENOMEM;
+               goto err_out;
+       }
+
+       SET_MODULE_OWNER(dev);
+       SET_NETDEV_DEV(dev, &pdev->dev);
+
+       error = pci_enable_device (pdev);
+       if (error) {
+               printk (KERN_ERR
+                       "(rcpci45 driver:) %d: pci enable device error\n",
+                       card_idx);
+               goto err_out;
+       }
+       pci_start = pci_resource_start (pdev, 0);
+       pci_len = pci_resource_len (pdev, 0);
+       printk("pci_start %lx pci_len %lx\n", pci_start, pci_len);
+
+       pci_set_drvdata (pdev, dev);
+
+       pDpa = dev->priv;
+       pDpa->id = card_idx;
+       pDpa->pci_dev = pdev;
+       pDpa->pci_addr = pci_start;
+
+       if (!pci_start || !(pci_resource_flags (pdev, 0) & IORESOURCE_MEM)) {
+               printk (KERN_ERR
+                       "(rcpci45 driver:) No PCI mem resources! Aborting\n");
+               error = -EBUSY;
+               goto err_out_free_dev;
+       }
+
+       /*
+        * pDpa->msgbuf is where the card will dma the I2O 
+        * messages. Thus, we need contiguous physical pages of memory.
+        * 2003/04/20:  pci_alloc_consistent() provides well over the needed
+        * alignment on a 256 bytes boundary for the LAN API private area.
+        * Thus it isn't needed anymore to align it by hand.
+         */
+       pDpa->msgbuf = pci_alloc_consistent (pdev, MSG_BUF_SIZE,
+                                            &pDpa->msgbuf_dma);
+       if (!pDpa->msgbuf) {
+               printk (KERN_ERR "(rcpci45 driver:) \
+                       Could not allocate %d byte memory for the \
+                               private msgbuf!\n", MSG_BUF_SIZE);
+               error = -ENOMEM;
+               goto err_out_free_dev;
+       }
+
+       /* The adapter is accessible through memory-access read/write, not
+        * I/O read/write.  Thus, we need to map it to some virtual address
+        * area in order to access the registers as normal memory.
+        */
+       error = pci_request_regions (pdev, dev->name);
+       if (error)
+               goto err_out_free_msgbuf;
+
+       error = pci_set_dma_mask (pdev, RCPCI45_DMA_MASK);
+       if (error) {
+               printk (KERN_ERR
+                       "(rcpci45 driver:) pci_set_dma_mask failed!\n");
+               goto err_out_free_region;
+       }
+
+       vaddr = (ulong *) ioremap (pci_start, pci_len);
+       if (!vaddr) {
+               printk (KERN_ERR
+                       "(rcpci45 driver:) \
+                       Unable to remap address range from %lu to %lu\n",
+                       pci_start, pci_start + pci_len);
+               error = -EIO;
+               goto err_out_free_region;
+       }
+
+       dev->base_addr = (unsigned long) vaddr;
+       dev->irq = pdev->irq;
+       dev->open = &RCopen;
+       dev->hard_start_xmit = &RC_xmit_packet;
+       dev->stop = &RCclose;
+       dev->get_stats = &RCget_stats;
+       dev->do_ioctl = &RCioctl;
+       dev->set_config = &RCconfig;
+
+       if ((error = register_netdev(dev)))
+               goto err_out_iounmap;
+
+       return 0;               /* success */
+
+err_out_iounmap:
+       iounmap((void *) dev->base_addr);
+err_out_free_region:
+       pci_release_regions (pdev);
+err_out_free_msgbuf:
+       pci_free_consistent (pdev, MSG_BUF_SIZE, pDpa->msgbuf,
+                            pDpa->msgbuf_dma);
+err_out_free_dev:
+       free_netdev (dev);
+err_out:
+       card_idx--;
+       return error;
+}
+
+static struct pci_driver rcpci45_driver = {
+       .name           = "rcpci45",
+       .id_table       = rcpci45_pci_table,
+       .probe          = rcpci45_init_one,
+       .remove         = __devexit_p(rcpci45_remove_one),
+};
+
+static int __init
+rcpci_init_module (void)
+{
+       int rc = pci_module_init (&rcpci45_driver);
+       if (!rc)
+               printk (KERN_ERR "%s", version);
+       return rc;
+}
+
+static int
+RCopen (struct net_device *dev)
+{
+       int post_buffers = MAX_NMBR_RCV_BUFFERS;
+       PDPA pDpa = dev->priv;
+       int count = 0;
+       int requested = 0;
+       int error;
+
+       if (pDpa->nexus) {
+               /* This is not the first time RCopen is called.  Thus,
+                * the interface was previously opened and later closed
+                * by RCclose().  RCclose() does a Shutdown; to wake up
+                * the adapter, a reset is mandatory before we can post
+                * receive buffers.  However, if the adapter initiated 
+                * a reboot while the interface was closed -- and interrupts
+                * were turned off -- we need will need to reinitialize
+                * the adapter, rather than simply waking it up.  
+                */
+               printk (KERN_INFO "Waking up adapter...\n");
+               RCResetLANCard (dev, 0, 0, 0);
+       } else {
+               pDpa->nexus = 1;
+               /* 
+                * RCInitI2OMsgLayer is done only once, unless the
+                * adapter was sent a warm reboot
+                */
+               error = RCInitI2OMsgLayer (dev, (PFNTXCALLBACK) RCxmit_callback,
+                                          (PFNRXCALLBACK) RCrecv_callback,
+                                          (PFNCALLBACK) RCreboot_callback);
+               if (error) {
+                       printk (KERN_ERR "%s: Unable to init msg layer (%x)\n",
+                                       dev->name, error);
+                       goto err_out;
+               }
+               if ((error = RCGetMAC (dev, NULL))) {
+                       printk (KERN_ERR "%s: Unable to get adapter MAC\n",
+                                       dev->name);
+                       goto err_out;
+               }
+       }
+
+       /* Request a shared interrupt line. */
+       error = request_irq (dev->irq, RCinterrupt, SA_SHIRQ, dev->name, dev);
+       if (error) {
+               printk (KERN_ERR "%s: unable to get IRQ %d\n", 
+                               dev->name, dev->irq);
+               goto err_out;
+       }
+
+       DriverControlWord |= WARM_REBOOT_CAPABLE;
+       RCReportDriverCapability (dev, DriverControlWord);
+
+       printk (KERN_INFO "%s: RedCreek Communications IPSEC VPN adapter\n",
+               dev->name);
+
+       RCEnableI2OInterrupts (dev);
+
+       while (post_buffers) {
+               if (post_buffers > MAX_NMBR_POST_BUFFERS_PER_MSG)
+                       requested = MAX_NMBR_POST_BUFFERS_PER_MSG;
+               else
+                       requested = post_buffers;
+               count = RC_allocate_and_post_buffers (dev, requested);
+
+               if (count < requested) {
+                       /*
+                        * Check to see if we were able to post 
+                        * any buffers at all.
+                        */
+                       if (post_buffers == MAX_NMBR_RCV_BUFFERS) {
+                               printk (KERN_ERR "%s: \
+                                       unable to allocate any buffers\n", 
+                                               dev->name);
+                               goto err_out_free_irq;
+                       }
+                       printk (KERN_WARNING "%s: \
+                       unable to allocate all requested buffers\n", dev->name);
+                       break;  /* we'll try to post more buffers later */
+               } else
+                       post_buffers -= count;
+       }
+       pDpa->numOutRcvBuffers = MAX_NMBR_RCV_BUFFERS - post_buffers;
+       pDpa->shutdown = 0;     /* just in case */
+       netif_start_queue (dev);
+       return 0;
+
+err_out_free_irq:
+       free_irq (dev->irq, dev);
+err_out:
+       return error;
+}
+
+static int
+RC_xmit_packet (struct sk_buff *skb, struct net_device *dev)
+{
+
+       PDPA pDpa = dev->priv;
+       singleTCB tcb;
+       psingleTCB ptcb = &tcb;
+       RC_RETURN status = 0;
+
+       netif_stop_queue (dev);
+
+       if (pDpa->shutdown || pDpa->reboot) {
+               printk ("RC_xmit_packet: tbusy!\n");
+               return 1;
+       }
+
+       /*
+        * The user is free to reuse the TCB after RCI2OSendPacket() 
+        * returns, since the function copies the necessary info into its 
+        * own private space.  Thus, our TCB can be a local structure.  
+        * The skb, on the other hand, will be freed up in our interrupt 
+        * handler.
+        */
+
+       ptcb->bcount = 1;
+
+       /* 
+        * we'll get the context when the adapter interrupts us to tell us that
+        * the transmission is done. At that time, we can free skb.
+        */
+       ptcb->b.context = (U32) skb;
+       ptcb->b.scount = 1;
+       ptcb->b.size = skb->len;
+       ptcb->b.addr = pci_map_single(pDpa->pci_dev, skb->data, skb->len,
+                                     PCI_DMA_TODEVICE);
+
+       if ((status = RCI2OSendPacket (dev, (U32) NULL, (PRCTCB) ptcb))
+           != RC_RTN_NO_ERROR) {
+               printk ("%s: send error 0x%x\n", dev->name, (uint) status);
+               return 1;
+       } else {
+               dev->trans_start = jiffies;
+               netif_wake_queue (dev);
+       }
+       /*
+        * That's it!
+        */
+       return 0;
+}
+
+/*
+ * RCxmit_callback()
+ *
+ * The transmit callback routine. It's called by RCProcI2OMsgQ()
+ * because the adapter is done with one or more transmit buffers and
+ * it's returning them to us, or we asked the adapter to return the
+ * outstanding transmit buffers by calling RCResetLANCard() with 
+ * RC_RESOURCE_RETURN_PEND_TX_BUFFERS flag. 
+ * All we need to do is free the buffers.
+ */
+static void
+RCxmit_callback (U32 Status,
+                U16 PcktCount, PU32 BufferContext, struct net_device *dev)
+{
+       struct sk_buff *skb;
+       PDPA pDpa = dev->priv;
+
+       if (!pDpa) {
+               printk (KERN_ERR "%s: Fatal Error in xmit callback, !pDpa\n",
+                               dev->name);
+               return;
+       }
+
+       if (Status != I2O_REPLY_STATUS_SUCCESS)
+               printk (KERN_INFO "%s: xmit_callback: Status = 0x%x\n", 
+                               dev->name, (uint) Status);
+       if (pDpa->shutdown || pDpa->reboot)
+               printk (KERN_INFO "%s: xmit callback: shutdown||reboot\n",
+                               dev->name);
+
+       while (PcktCount--) {
+               skb = (struct sk_buff *) (BufferContext[0]);
+               BufferContext++;
+               pci_unmap_single(pDpa->pci_dev, BufferContext[1], skb->len,
+                                PCI_DMA_TODEVICE);
+               dev_kfree_skb_irq (skb);
+       }
+       netif_wake_queue (dev);
+}
+
+static void
+RCreset_callback (U32 Status, U32 p1, U32 p2, struct net_device *dev)
+{
+       PDPA pDpa = dev->priv;
+
+       printk ("RCreset_callback Status 0x%x\n", (uint) Status);
+       /*
+        * Check to see why we were called.
+        */
+       if (pDpa->shutdown) {
+               printk (KERN_INFO "%s: shutting down interface\n",
+                               dev->name);
+               pDpa->shutdown = 0;
+               pDpa->reboot = 0;
+       } else if (pDpa->reboot) {
+               printk (KERN_INFO "%s: reboot, shutdown adapter\n",
+                               dev->name);
+               /*
+                * We don't set any of the flags in RCShutdownLANCard()
+                * and we don't pass a callback routine to it.
+                * The adapter will have already initiated the reboot by
+                * the time the function returns.
+                */
+               RCDisableI2OInterrupts (dev);
+               RCShutdownLANCard (dev, 0, 0, 0);
+               printk (KERN_INFO "%s: scheduling timer...\n", dev->name);
+               init_timer (&pDpa->timer);
+               pDpa->timer.expires = RUN_AT ((40 * HZ) / 10);  /* 4 sec. */
+               pDpa->timer.data = (unsigned long) dev;
+               pDpa->timer.function = &rc_timer;       /* timer handler */
+               add_timer (&pDpa->timer);
+       }
+}
+
+static void
+RCreboot_callback (U32 Status, U32 p1, U32 p2, struct net_device *dev)
+{
+       PDPA pDpa = dev->priv;
+
+       printk (KERN_INFO "%s: reboot: rcv buffers outstanding = %d\n",
+                dev->name, (uint) pDpa->numOutRcvBuffers);
+
+       if (pDpa->shutdown) {
+               printk (KERN_INFO "%s: skip reboot, shutdown initiated\n",
+                               dev->name);
+               return;
+       }
+       pDpa->reboot = 1;
+       /*
+        * OK, we reset the adapter and ask it to return all
+        * outstanding transmit buffers as well as the posted
+        * receive buffers.  When the adapter is done returning
+        * those buffers, it will call our RCreset_callback() 
+        * routine.  In that routine, we'll call RCShutdownLANCard()
+        * to tell the adapter that it's OK to start the reboot and
+        * schedule a timer callback routine to execute 3 seconds 
+        * later; this routine will reinitialize the adapter at that time.
+        */
+       RCResetLANCard (dev, RC_RESOURCE_RETURN_POSTED_RX_BUCKETS |
+                       RC_RESOURCE_RETURN_PEND_TX_BUFFERS, 0,
+                       (PFNCALLBACK) RCreset_callback);
+}
+
+/*
+ * RCrecv_callback()
+ * 
+ * The receive packet callback routine.  This is called by
+ * RCProcI2OMsgQ() after the adapter posts buffers which have been
+ * filled (one ethernet packet per buffer).
+ */
+static void
+RCrecv_callback (U32 Status,
+                U8 PktCount,
+                U32 BucketsRemain,
+                PU32 PacketDescBlock, struct net_device *dev)
+{
+
+       U32 len, count;
+       PDPA pDpa = dev->priv;
+       struct sk_buff *skb;
+       singleTCB tcb;
+       psingleTCB ptcb = &tcb;
+
+       ptcb->bcount = 1;
+
+       if ((pDpa->shutdown || pDpa->reboot) && !Status)
+               printk (KERN_INFO "%s: shutdown||reboot && !Status (%d)\n",
+                               dev->name, PktCount);
+
+       if ((Status != I2O_REPLY_STATUS_SUCCESS) || pDpa->shutdown) {
+               /*
+                * Free whatever buffers the adapter returned, but don't
+                * pass them to the kernel.
+                */
+
+               if (!pDpa->shutdown && !pDpa->reboot)
+                       printk (KERN_INFO "%s: recv error status = 0x%x\n",
+                                       dev->name, (uint) Status);
+               else
+                       printk (KERN_DEBUG "%s: Returning %d buffs stat 0x%x\n",
+                                       dev->name, PktCount, (uint) Status);
+               /*
+                * TO DO: check the nature of the failure and put the 
+                * adapter in failed mode if it's a hard failure.  
+                * Send a reset to the adapter and free all outstanding memory.
+                */
+               if (PacketDescBlock) {
+                       while (PktCount--) {
+                               skb = (struct sk_buff *) PacketDescBlock[0];
+                               dev_kfree_skb (skb);
+                               pDpa->numOutRcvBuffers--;
+                               /* point to next context field */
+                               PacketDescBlock += BD_SIZE;
+                       }
+               }
+               return;
+       } else {
+               while (PktCount--) {
+                       skb = (struct sk_buff *) PacketDescBlock[0];
+                       len = PacketDescBlock[2];
+                       skb->dev = dev;
+                       skb_put (skb, len);     /* adjust length and tail */
+                       skb->protocol = eth_type_trans (skb, dev);
+                       netif_rx (skb); /* send the packet to the kernel */
+                       dev->last_rx = jiffies;
+                       pDpa->numOutRcvBuffers--;       
+                       /* point to next context field */
+                       PacketDescBlock += BD_SIZE;
+               }
+       }
+
+       /*
+        * Replenish the posted receive buffers. 
+        * DO NOT replenish buffers if the driver has already
+        * initiated a reboot or shutdown!
+        */
+
+       if (!pDpa->shutdown && !pDpa->reboot) {
+               count = RC_allocate_and_post_buffers (dev,
+                                                     MAX_NMBR_RCV_BUFFERS -
+                                                     pDpa->numOutRcvBuffers);
+               pDpa->numOutRcvBuffers += count;
+       }
+
+}
+
+/*
+ * RCinterrupt()
+ * 
+ * Interrupt handler. 
+ * This routine sets up a couple of pointers and calls
+ * RCProcI2OMsgQ(), which in turn process the message and
+ * calls one of our callback functions.
+ */
+static irqreturn_t
+RCinterrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+
+       PDPA pDpa;
+       struct net_device *dev = dev_id;
+
+       pDpa = dev->priv;
+
+       if (pDpa->shutdown)
+               printk (KERN_DEBUG "%s: shutdown, service irq\n",
+                               dev->name);
+
+       return RCProcI2OMsgQ (dev);
+}
+
+#define REBOOT_REINIT_RETRY_LIMIT 4
+static void
+rc_timer (unsigned long data)
+{
+       struct net_device *dev = (struct net_device *) data;
+       PDPA pDpa = dev->priv;
+       int init_status;
+       static int retry;
+       int post_buffers = MAX_NMBR_RCV_BUFFERS;
+       int count = 0;
+       int requested = 0;
+
+       if (pDpa->reboot) {
+               init_status =
+                   RCInitI2OMsgLayer (dev, (PFNTXCALLBACK) RCxmit_callback,
+                                      (PFNRXCALLBACK) RCrecv_callback,
+                                      (PFNCALLBACK) RCreboot_callback);
+
+               switch (init_status) {
+               case RC_RTN_NO_ERROR:
+
+                       pDpa->reboot = 0;
+                       pDpa->shutdown = 0;     /* just in case */
+                       RCReportDriverCapability (dev, DriverControlWord);
+                       RCEnableI2OInterrupts (dev);
+
+
+                       if (!(dev->flags & IFF_UP)) {
+                               retry = 0;
+                               return;
+                       }
+                       while (post_buffers) {
+                               if (post_buffers > 
+                                               MAX_NMBR_POST_BUFFERS_PER_MSG)
+                                       requested = 
+                                               MAX_NMBR_POST_BUFFERS_PER_MSG;
+                               else
+                                       requested = post_buffers;
+                               count =
+                                   RC_allocate_and_post_buffers (dev,
+                                                                 requested);
+                               post_buffers -= count;
+                               if (count < requested)
+                                       break;
+                       }
+                       pDpa->numOutRcvBuffers =
+                           MAX_NMBR_RCV_BUFFERS - post_buffers;
+                       printk ("Initialization done.\n");
+                       netif_wake_queue (dev);
+                       retry = 0;
+                       return;
+               case RC_RTN_FREE_Q_EMPTY:
+                       retry++;
+                       printk (KERN_WARNING "%s inbound free q empty\n",
+                                       dev->name);
+                       break;
+               default:
+                       retry++;
+                       printk (KERN_WARNING "%s bad stat after reboot: %d\n",
+                                       dev->name, init_status);
+                       break;
+               }
+
+               if (retry > REBOOT_REINIT_RETRY_LIMIT) {
+                       printk (KERN_WARNING "%s unable to reinitialize adapter after reboot\n", dev->name);
+                       printk (KERN_WARNING "%s shutting down interface\n", dev->name);
+                       RCDisableI2OInterrupts (dev);
+                       dev->flags &= ~IFF_UP;
+               } else {
+                       printk (KERN_INFO "%s: rescheduling timer...\n",
+                                       dev->name);
+                       init_timer (&pDpa->timer);
+                       pDpa->timer.expires = RUN_AT ((40 * HZ) / 10);
+                       pDpa->timer.data = (unsigned long) dev;
+                       pDpa->timer.function = &rc_timer;
+                       add_timer (&pDpa->timer);
+               }
+       } else
+               printk (KERN_WARNING "%s: unexpected timer irq\n", dev->name);
+}
+
+static int
+RCclose (struct net_device *dev)
+{
+       PDPA pDpa = dev->priv;
+
+       printk("RCclose\n");
+       netif_stop_queue (dev);
+
+       if (pDpa->reboot) {
+               printk (KERN_INFO "%s skipping reset -- adapter already in reboot mode\n", dev->name);
+               dev->flags &= ~IFF_UP;
+               pDpa->shutdown = 1;
+               return 0;
+       }
+
+       pDpa->shutdown = 1;
+
+       /*
+        * We can't allow the driver to be unloaded until the adapter returns
+        * all posted receive buffers.  It doesn't hurt to tell the adapter
+        * to return all posted receive buffers and outstanding xmit buffers,
+        * even if there are none.
+        */
+
+       RCShutdownLANCard (dev, RC_RESOURCE_RETURN_POSTED_RX_BUCKETS |
+                          RC_RESOURCE_RETURN_PEND_TX_BUFFERS, 0,
+                          (PFNCALLBACK) RCreset_callback);
+
+       dev->flags &= ~IFF_UP;
+       return 0;
+}
+
+static struct net_device_stats *
+RCget_stats (struct net_device *dev)
+{
+       RCLINKSTATS RCstats;
+
+       PDPA pDpa = dev->priv;
+
+       if (!pDpa) {
+               return 0;
+       } else if (!(dev->flags & IFF_UP)) {
+               return 0;
+       }
+
+       memset (&RCstats, 0, sizeof (RCLINKSTATS));
+       if ((RCGetLinkStatistics (dev, &RCstats, (void *) 0)) ==
+           RC_RTN_NO_ERROR) {
+
+               /* total packets received    */
+               pDpa->stats.rx_packets = RCstats.Rcv_good
+               /* total packets transmitted    */;
+               pDpa->stats.tx_packets = RCstats.TX_good;
+
+               pDpa->stats.rx_errors = RCstats.Rcv_CRCerr + 
+                       RCstats.Rcv_alignerr + RCstats.Rcv_reserr + 
+                       RCstats.Rcv_orun + RCstats.Rcv_cdt + RCstats.Rcv_runt;
+
+               pDpa->stats.tx_errors = RCstats.TX_urun + RCstats.TX_crs + 
+                       RCstats.TX_def + RCstats.TX_totcol;
+
+               /*
+                * This needs improvement.
+                */
+               pDpa->stats.rx_dropped = 0; /* no space in linux buffers   */
+               pDpa->stats.tx_dropped = 0; /* no space available in linux */
+               pDpa->stats.multicast = 0;  /* multicast packets received  */
+               pDpa->stats.collisions = RCstats.TX_totcol;
+
+               /* detailed rx_errors: */
+               pDpa->stats.rx_length_errors = 0;
+               pDpa->stats.rx_over_errors = RCstats.Rcv_orun;
+               pDpa->stats.rx_crc_errors = RCstats.Rcv_CRCerr;
+               pDpa->stats.rx_frame_errors = 0;
+               pDpa->stats.rx_fifo_errors = 0; 
+               pDpa->stats.rx_missed_errors = 0;
+
+               /* detailed tx_errors */
+               pDpa->stats.tx_aborted_errors = 0;
+               pDpa->stats.tx_carrier_errors = 0;
+               pDpa->stats.tx_fifo_errors = 0;
+               pDpa->stats.tx_heartbeat_errors = 0;
+               pDpa->stats.tx_window_errors = 0;
+
+               return ((struct net_device_stats *) &(pDpa->stats));
+       }
+       return 0;
+}
+
+static int
+RCioctl (struct net_device *dev, struct ifreq *rq, int cmd)
+{
+       RCuser_struct RCuser;
+       PDPA pDpa = dev->priv;
+
+       if (!capable (CAP_NET_ADMIN))
+               return -EPERM;
+
+       switch (cmd) {
+
+       case RCU_PROTOCOL_REV:
+               /*
+                * Assign user protocol revision, to tell user-level
+                * controller program whether or not it's in sync.
+                */
+               rq->ifr_ifru.ifru_data = (caddr_t) USER_PROTOCOL_REV;
+               break;
+
+       case RCU_COMMAND:
+               {
+                       if (copy_from_user
+                           (&RCuser, rq->ifr_data, sizeof (RCuser)))
+                               return -EFAULT;
+
+                       dprintk ("RCioctl: RCuser_cmd = 0x%x\n", RCuser.cmd);
+
+                       switch (RCuser.cmd) {
+                       case RCUC_GETFWVER:
+                               RCUD_GETFWVER = &RCuser.RCUS_GETFWVER;
+                               RCGetFirmwareVer (dev,
+                                                 (PU8) & RCUD_GETFWVER->
+                                                 FirmString, NULL);
+                               break;
+                       case RCUC_GETINFO:
+                               RCUD_GETINFO = &RCuser.RCUS_GETINFO;
+                               RCUD_GETINFO->mem_start = dev->base_addr;
+                               RCUD_GETINFO->mem_end =
+                                   dev->base_addr + pDpa->pci_addr_len;
+                               RCUD_GETINFO->base_addr = pDpa->pci_addr;
+                               RCUD_GETINFO->irq = dev->irq;
+                               break;
+                       case RCUC_GETIPANDMASK:
+                               RCUD_GETIPANDMASK = &RCuser.RCUS_GETIPANDMASK;
+                               RCGetRavlinIPandMask (dev,
+                                                     (PU32) &
+                                                     RCUD_GETIPANDMASK->IpAddr,
+                                                     (PU32) &
+                                                     RCUD_GETIPANDMASK->
+                                                     NetMask, NULL);
+                               break;
+                       case RCUC_GETLINKSTATISTICS:
+                               RCUD_GETLINKSTATISTICS =
+                                   &RCuser.RCUS_GETLINKSTATISTICS;
+                               RCGetLinkStatistics (dev,
+                                                    (P_RCLINKSTATS) &
+                                                    RCUD_GETLINKSTATISTICS->
+                                                    StatsReturn, NULL);
+                               break;
+                       case RCUC_GETLINKSTATUS:
+                               RCUD_GETLINKSTATUS = &RCuser.RCUS_GETLINKSTATUS;
+                               RCGetLinkStatus (dev,
+                                                (PU32) & RCUD_GETLINKSTATUS->
+                                                ReturnStatus, NULL);
+                               break;
+                       case RCUC_GETMAC:
+                               RCUD_GETMAC = &RCuser.RCUS_GETMAC;
+                               RCGetMAC (dev, NULL);
+                               memcpy(RCUD_GETMAC, dev->dev_addr, 8);
+                               break;
+                       case RCUC_GETPROM:
+                               RCUD_GETPROM = &RCuser.RCUS_GETPROM;
+                               RCGetPromiscuousMode (dev,
+                                                     (PU32) & RCUD_GETPROM->
+                                                     PromMode, NULL);
+                               break;
+                       case RCUC_GETBROADCAST:
+                               RCUD_GETBROADCAST = &RCuser.RCUS_GETBROADCAST;
+                               RCGetBroadcastMode (dev,
+                                                   (PU32) & RCUD_GETBROADCAST->
+                                                   BroadcastMode, NULL);
+                               break;
+                       case RCUC_GETSPEED:
+                               if (!(dev->flags & IFF_UP)) {
+                                       return -ENODATA;
+                               }
+                               RCUD_GETSPEED = &RCuser.RCUS_GETSPEED;
+                               RCGetLinkSpeed (dev,
+                                               (PU32) & RCUD_GETSPEED->
+                                               LinkSpeedCode, NULL);
+                               break;
+                       case RCUC_SETIPANDMASK:
+                               RCUD_SETIPANDMASK = &RCuser.RCUS_SETIPANDMASK;
+                               RCSetRavlinIPandMask (dev,
+                                                     (U32) RCUD_SETIPANDMASK->
+                                                     IpAddr,
+                                                     (U32) RCUD_SETIPANDMASK->
+                                                     NetMask);
+                               break;
+                       case RCUC_SETMAC:
+                               RCSetMAC (dev, (PU8) & RCUD_SETMAC->mac);
+                               break;
+                       case RCUC_SETSPEED:
+                               RCUD_SETSPEED = &RCuser.RCUS_SETSPEED;
+                               RCSetLinkSpeed (dev,
+                                               (U16) RCUD_SETSPEED->
+                                               LinkSpeedCode);
+                               break;
+                       case RCUC_SETPROM:
+                               RCUD_SETPROM = &RCuser.RCUS_SETPROM;
+                               RCSetPromiscuousMode (dev,
+                                                     (U16) RCUD_SETPROM->
+                                                     PromMode);
+                               break;
+                       case RCUC_SETBROADCAST:
+                               RCUD_SETBROADCAST = &RCuser.RCUS_SETBROADCAST;
+                               RCSetBroadcastMode (dev,
+                                                   (U16) RCUD_SETBROADCAST->
+                                                   BroadcastMode);
+                               break;
+                       default:
+                               RCUD_DEFAULT = &RCuser.RCUS_DEFAULT;
+                               RCUD_DEFAULT->rc = 0x11223344;
+                               break;
+                       }
+                       if (copy_to_user (rq->ifr_data, &RCuser, 
+                                               sizeof (RCuser)))
+                               return -EFAULT;
+                       break;
+               }               /* RCU_COMMAND */
+
+       default:
+               rq->ifr_ifru.ifru_data = (caddr_t) 0x12345678;
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int
+RCconfig (struct net_device *dev, struct ifmap *map)
+{
+       /*
+        * To be completed ...
+        */
+       return 0;
+       if (dev->flags & IFF_UP)        /* can't act on a running interface */
+               return -EBUSY;
+
+       /* Don't allow changing the I/O address */
+       if (map->base_addr != dev->base_addr) {
+               printk (KERN_WARNING "%s Change I/O address not implemented\n",
+                               dev->name);
+               return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
+static void __exit
+rcpci_cleanup_module (void)
+{
+       pci_unregister_driver (&rcpci45_driver);
+}
+
+module_init (rcpci_init_module);
+module_exit (rcpci_cleanup_module);
+
+static int
+RC_allocate_and_post_buffers (struct net_device *dev, int numBuffers)
+{
+
+       int i;
+       PU32 p;
+       psingleB pB;
+       struct sk_buff *skb;
+       PDPA pDpa = dev->priv;
+       RC_RETURN status;
+       U32 res = 0;
+
+       if (!numBuffers)
+               return 0;
+       else if (numBuffers > MAX_NMBR_POST_BUFFERS_PER_MSG) {
+               printk (KERN_ERR "%s: Too many buffers requested!\n",
+                               dev->name);
+               numBuffers = 32;
+       }
+
+       p = (PU32) kmalloc (sizeof (U32) + numBuffers * sizeof (singleB),
+                           GFP_DMA | GFP_ATOMIC);
+
+       if (!p) {
+               printk (KERN_WARNING "%s unable to allocate TCB\n",
+                               dev->name);
+               goto out;
+       }
+
+       p[0] = 0;               /* Buffer Count */
+       pB = (psingleB) ((U32) p + sizeof (U32));/* point to the first buffer */
+
+       for (i = 0; i < numBuffers; i++) {
+               skb = dev_alloc_skb (MAX_ETHER_SIZE + 2);
+               if (!skb) {
+                       printk (KERN_WARNING 
+                                       "%s: unable to allocate enough skbs!\n",
+                                       dev->name);
+                       goto err_out_unmap;
+               }
+               skb_reserve (skb, 2);   /* Align IP on 16 byte boundaries */
+               pB->context = (U32) skb;
+               pB->scount = 1; /* segment count */
+               pB->size = MAX_ETHER_SIZE;
+               pB->addr = pci_map_single(pDpa->pci_dev, skb->data, 
+                                         MAX_ETHER_SIZE, PCI_DMA_FROMDEVICE);
+               p[0]++;
+               pB++;
+       }
+
+       if ((status = RCPostRecvBuffers (dev, (PRCTCB) p)) != RC_RTN_NO_ERROR) {
+               printk (KERN_WARNING "%s: Post buffer failed, error 0x%x\n",
+                               dev->name, status);
+               goto err_out_unmap;
+       }
+out_free:
+       res = p[0];
+       kfree (p);
+out:
+       return (res);           /* return the number of posted buffers */
+
+err_out_unmap:
+       for (; p[0] > 0; p[0]--) {
+               --pB;
+               skb = (struct sk_buff *) pB->context;
+               pci_unmap_single(pDpa->pci_dev, pB->addr, MAX_ETHER_SIZE,
+                                PCI_DMA_FROMDEVICE);
+               dev_kfree_skb (skb);
+       }
+       goto out_free;
+}
diff --git a/drivers/net/wan/comx-hw-comx.c b/drivers/net/wan/comx-hw-comx.c
new file mode 100644 (file)
index 0000000..a62fe55
--- /dev/null
@@ -0,0 +1,1450 @@
+/*
+ * Hardware-level driver for the COMX and HICOMX cards
+ * for Linux kernel 2.2.X
+ *
+ * Original authors:  Arpad Bakay <bakay.arpad@synergon.hu>,
+ *                    Peter Bajan <bajan.peter@synergon.hu>,
+ * Rewritten by: Tivadar Szemethy <tiv@itc.hu>
+ * Currently maintained by: Gergely Madarasz <gorgo@itc.hu>
+ *
+ * Copyright (C) 1995-2000 ITConsult-Pro Co. <info@itc.hu>
+ *
+ * Contributors:
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br> - 0.86
+ * Daniele Bellucci         <bellucda@tiscali.it>   - 0.87
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Version 0.80 (99/06/11):
+ *             - port back to kernel, add support builtin driver 
+ *             - cleaned up the source code a bit
+ *
+ * Version 0.81 (99/06/22):
+ *             - cleaned up the board load functions, no more long reset
+ *               timeouts
+ *             - lower modem lines on close
+ *             - some interrupt handling fixes
+ *
+ * Version 0.82 (99/08/24):
+ *             - fix multiple board support
+ *
+ * Version 0.83 (99/11/30):
+ *             - interrupt handling and locking fixes during initalization
+ *             - really fix multiple board support
+ * 
+ * Version 0.84 (99/12/02):
+ *             - some workarounds for problematic hardware/firmware
+ *
+ * Version 0.85 (00/01/14):
+ *             - some additional workarounds :/
+ *             - printk cleanups
+ * Version 0.86 (00/08/15):
+ *             - resource release on failure at COMX_init
+ *
+ * Version 0.87 (03/07/09)
+ *              - audit copy_from_user in comxhw_write_proc
+ */
+
+#define VERSION "0.87"
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#include "comx.h"
+#include "comxhw.h"
+
+MODULE_AUTHOR("Gergely Madarasz <gorgo@itc.hu>, Tivadar Szemethy <tiv@itc.hu>, Arpad Bakay");
+MODULE_DESCRIPTION("Hardware-level driver for the COMX and HICOMX adapters\n");
+MODULE_LICENSE("GPL");
+
+#define        COMX_readw(dev, offset) (readw(dev->mem_start + offset + \
+       (unsigned int)(((struct comx_privdata *)\
+       ((struct comx_channel *)dev->priv)->HW_privdata)->channel) \
+       * COMX_CHANNEL_OFFSET))
+
+#define COMX_WRITE(dev, offset, value) (writew(value, dev->mem_start + offset \
+       + (unsigned int)(((struct comx_privdata *) \
+       ((struct comx_channel *)dev->priv)->HW_privdata)->channel) \
+       * COMX_CHANNEL_OFFSET))
+
+#define COMX_CMD(dev, cmd)     (COMX_WRITE(dev, OFF_A_L2_CMD, cmd))
+
+struct comx_firmware {
+       int     len;
+       unsigned char *data;
+};
+
+struct comx_privdata {
+       struct comx_firmware *firmware;
+       u16     clock;
+       char    channel;                // channel no.
+       int     memory_size;
+       short   io_extent;
+       u_long  histogram[5];
+};
+
+static struct net_device *memory_used[(COMX_MEM_MAX - COMX_MEM_MIN) / 0x10000];
+extern struct comx_hardware hicomx_hw;
+extern struct comx_hardware comx_hw;
+extern struct comx_hardware cmx_hw;
+
+static irqreturn_t COMX_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+
+static void COMX_board_on(struct net_device *dev)
+{
+       outb_p( (byte) (((dev->mem_start & 0xf0000) >> 16) | 
+           COMX_ENABLE_BOARD_IT | COMX_ENABLE_BOARD_MEM), dev->base_addr);
+}
+
+static void COMX_board_off(struct net_device *dev)
+{
+       outb_p( (byte) (((dev->mem_start & 0xf0000) >> 16) | 
+          COMX_ENABLE_BOARD_IT), dev->base_addr);
+}
+
+static void HICOMX_board_on(struct net_device *dev)
+{
+       outb_p( (byte) (((dev->mem_start & 0xf0000) >> 12) | 
+          HICOMX_ENABLE_BOARD_MEM), dev->base_addr);
+}
+
+static void HICOMX_board_off(struct net_device *dev)
+{
+       outb_p( (byte) (((dev->mem_start & 0xf0000) >> 12) | 
+          HICOMX_DISABLE_BOARD_MEM), dev->base_addr);
+}
+
+static void COMX_set_clock(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct comx_privdata *hw = ch->HW_privdata;
+
+       COMX_WRITE(dev, OFF_A_L1_CLKINI, hw->clock);
+}
+
+static struct net_device *COMX_access_board(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct net_device *ret;
+       int mempos = (dev->mem_start - COMX_MEM_MIN) >> 16;
+       unsigned long flags;
+
+
+       save_flags(flags); cli();
+       
+       ret = memory_used[mempos];
+
+       if(ret == dev) {
+               goto out;
+       }
+
+       memory_used[mempos] = dev;
+
+       if (!ch->twin || ret != ch->twin) {
+               if (ret) ((struct comx_channel *)ret->priv)->HW_board_off(ret);
+               ch->HW_board_on(dev);
+       }
+out:
+       restore_flags(flags);
+       return ret;
+}
+
+static void COMX_release_board(struct net_device *dev, struct net_device *savep)
+{
+       unsigned long flags;
+       int mempos = (dev->mem_start - COMX_MEM_MIN) >> 16;
+       struct comx_channel *ch = dev->priv;
+
+       save_flags(flags); cli();
+
+       if (memory_used[mempos] == savep) {
+               goto out;
+       }
+
+       memory_used[mempos] = savep;
+       if (!ch->twin || ch->twin != savep) {
+               ch->HW_board_off(dev);
+               if (savep) ((struct comx_channel*)savep->priv)->HW_board_on(savep);
+       }
+out:
+       restore_flags(flags);
+}
+
+static int COMX_txe(struct net_device *dev) 
+{
+       struct net_device *savep;
+       struct comx_channel *ch = dev->priv;
+       int rc = 0;
+
+       savep = ch->HW_access_board(dev);
+       if (COMX_readw(dev,OFF_A_L2_LINKUP) == LINKUP_READY) {
+               rc = COMX_readw(dev,OFF_A_L2_TxEMPTY);
+       } 
+       ch->HW_release_board(dev,savep);
+       if(rc==0xffff) {
+               printk(KERN_ERR "%s, OFF_A_L2_TxEMPTY is %d\n",dev->name, rc);
+       }
+       return rc;
+}
+
+static int COMX_send_packet(struct net_device *dev, struct sk_buff *skb)
+{
+       struct net_device *savep;
+       struct comx_channel *ch = dev->priv;
+       struct comx_privdata *hw = ch->HW_privdata;
+       int ret = FRAME_DROPPED;
+       word tmp;
+
+       savep = ch->HW_access_board(dev);       
+
+       if (ch->debug_flags & DEBUG_HW_TX) {
+               comx_debug_bytes(dev, skb->data, skb->len,"COMX_send packet");
+       }
+
+       if (skb->len > COMX_MAX_TX_SIZE) {
+               ret=FRAME_DROPPED;
+               goto out;
+       }
+
+       tmp=COMX_readw(dev, OFF_A_L2_TxEMPTY);
+       if ((ch->line_status & LINE_UP) && tmp==1) {
+               int lensave = skb->len;
+               int dest = COMX_readw(dev, OFF_A_L2_TxBUFP);
+               word *data = (word *)skb->data;
+
+               if(dest==0xffff) {
+                       printk(KERN_ERR "%s: OFF_A_L2_TxBUFP is %d\n", dev->name, dest);
+                       ret=FRAME_DROPPED;
+                       goto out;
+               }
+                                       
+               writew((unsigned short)skb->len, dev->mem_start + dest);
+               dest += 2;
+               while (skb->len > 1) {
+                       writew(*data++, dev->mem_start + dest);
+                       dest += 2; skb->len -= 2;
+               }
+               if (skb->len == 1) {
+                       writew(*((byte *)data), dev->mem_start + dest);
+               }
+               writew(0, dev->mem_start + (int)hw->channel * 
+                  COMX_CHANNEL_OFFSET + OFF_A_L2_TxEMPTY);
+               ch->stats.tx_packets++; 
+               ch->stats.tx_bytes += lensave; 
+               ret = FRAME_ACCEPTED;
+       } else {
+               ch->stats.tx_dropped++;
+               printk(KERN_INFO "%s: frame dropped\n",dev->name);
+               if(tmp) {
+                       printk(KERN_ERR "%s: OFF_A_L2_TxEMPTY is %d\n",dev->name,tmp);
+               }
+       }
+       
+out:
+       ch->HW_release_board(dev, savep);
+       dev_kfree_skb(skb);
+       return ret;
+}
+
+static inline int comx_read_buffer(struct net_device *dev) 
+{
+       struct comx_channel *ch = dev->priv;
+       word rbuf_offs;
+       struct sk_buff *skb;
+       word len;
+       int i=0;
+       word *writeptr;
+
+       i = 0;
+       rbuf_offs = COMX_readw(dev, OFF_A_L2_RxBUFP);
+       if(rbuf_offs == 0xffff) {
+               printk(KERN_ERR "%s: OFF_A_L2_RxBUFP is %d\n",dev->name,rbuf_offs);
+               return 0;
+       }
+       len = readw(dev->mem_start + rbuf_offs);
+       if(len > COMX_MAX_RX_SIZE) {
+               printk(KERN_ERR "%s: packet length is %d\n",dev->name,len);
+               return 0;
+       }
+       if ((skb = dev_alloc_skb(len + 16)) == NULL) {
+               ch->stats.rx_dropped++;
+               COMX_WRITE(dev, OFF_A_L2_DAV, 0);
+               return 0;
+       }
+       rbuf_offs += 2;
+       skb_reserve(skb, 16);
+       skb_put(skb, len);
+       skb->dev = dev;
+       writeptr = (word *)skb->data;
+       while (i < len) {
+               *writeptr++ = readw(dev->mem_start + rbuf_offs);
+               rbuf_offs += 2; 
+               i += 2;
+       }
+       COMX_WRITE(dev, OFF_A_L2_DAV, 0);
+       ch->stats.rx_packets++;
+       ch->stats.rx_bytes += len;
+       if (ch->debug_flags & DEBUG_HW_RX) {
+               comx_debug_skb(dev, skb, "COMX_interrupt receiving");
+       }
+       ch->LINE_rx(dev, skb);
+       return 1;
+}
+
+static inline char comx_line_change(struct net_device *dev, char linestat)
+{
+       struct comx_channel *ch=dev->priv;
+       char idle=1;
+       
+       
+       if (linestat & LINE_UP) { /* Vonal fol */
+               if (ch->lineup_delay) {
+                       if (!test_and_set_bit(0, &ch->lineup_pending)) {
+                               ch->lineup_timer.function = comx_lineup_func;
+                               ch->lineup_timer.data = (unsigned long)dev;
+                               ch->lineup_timer.expires = jiffies +
+                                       HZ*ch->lineup_delay;
+                               add_timer(&ch->lineup_timer);
+                               idle=0;
+                       }
+               } else {
+                       idle=0;
+                       ch->LINE_status(dev, ch->line_status |= LINE_UP);
+               }
+       } else { /* Vonal le */
+               idle=0;
+               if (test_and_clear_bit(0, &ch->lineup_pending)) {
+                       del_timer(&ch->lineup_timer);
+               } else {
+                       ch->line_status &= ~LINE_UP;
+                       if (ch->LINE_status) {
+                               ch->LINE_status(dev, ch->line_status);
+                       }
+               }
+       }
+       return idle;
+}
+
+
+
+static irqreturn_t COMX_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+       struct net_device *dev = dev_id;
+       struct comx_channel *ch = dev->priv;
+       struct comx_privdata *hw = ch->HW_privdata;
+       struct net_device *interrupted;
+       unsigned long jiffs;
+       char idle = 0;
+       int count = 0;
+       word tmp;
+
+       if (dev == NULL) {
+               printk(KERN_ERR "COMX_interrupt: irq %d for unknown device\n", irq);
+               return IRQ_NONE;
+       }
+
+       jiffs = jiffies;
+
+       interrupted = ch->HW_access_board(dev);
+
+       while (!idle && count < 5000) {
+               char channel = 0;
+               idle = 1;
+
+               while (channel < 2) {
+                       char linestat = 0;
+                       char buffers_emptied = 0;
+
+                       if (channel == 1) {
+                               if (ch->twin) {
+                                       dev = ch->twin;
+                                       ch = dev->priv;
+                                       hw = ch->HW_privdata;
+                               } else {
+                                       break;
+                               }
+                       } else {
+                               COMX_WRITE(dev, OFF_A_L1_REPENA, 
+                                   COMX_readw(dev, OFF_A_L1_REPENA) & 0xFF00);
+                       }
+                       channel++;
+
+                       if ((ch->init_status & (HW_OPEN | LINE_OPEN)) != 
+                          (HW_OPEN | LINE_OPEN)) {
+                               continue;
+                       }
+       
+                       /* Collect stats */
+                       tmp = COMX_readw(dev, OFF_A_L1_ABOREC);
+                       COMX_WRITE(dev, OFF_A_L1_ABOREC, 0);
+                       if(tmp==0xffff) {
+                               printk(KERN_ERR "%s: OFF_A_L1_ABOREC is %d\n",dev->name,tmp);
+                               break;
+                       } else {
+                               ch->stats.rx_missed_errors += (tmp >> 8) & 0xff;
+                               ch->stats.rx_over_errors += tmp & 0xff;
+                       }
+                       tmp = COMX_readw(dev, OFF_A_L1_CRCREC);
+                       COMX_WRITE(dev, OFF_A_L1_CRCREC, 0);
+                       if(tmp==0xffff) {
+                               printk(KERN_ERR "%s: OFF_A_L1_CRCREC is %d\n",dev->name,tmp);
+                               break;
+                       } else {
+                               ch->stats.rx_crc_errors += (tmp >> 8) & 0xff;
+                               ch->stats.rx_missed_errors += tmp & 0xff;
+                       }
+                       
+                       if ((ch->line_status & LINE_UP) && ch->LINE_rx) {
+                               tmp=COMX_readw(dev, OFF_A_L2_DAV); 
+                               while (tmp==1) {
+                                       idle=0;
+                                       buffers_emptied+=comx_read_buffer(dev);
+                                       tmp=COMX_readw(dev, OFF_A_L2_DAV); 
+                               }
+                               if(tmp) {
+                                       printk(KERN_ERR "%s: OFF_A_L2_DAV is %d\n", dev->name, tmp);
+                                       break;
+                               }
+                       }
+
+                       tmp=COMX_readw(dev, OFF_A_L2_TxEMPTY);
+                       if (tmp==1 && ch->LINE_tx) {
+                               ch->LINE_tx(dev);
+                       } 
+                       if(tmp==0xffff) {
+                               printk(KERN_ERR "%s: OFF_A_L2_TxEMPTY is %d\n", dev->name, tmp);
+                               break;
+                       }
+
+                       if (COMX_readw(dev, OFF_A_L1_PBUFOVR) >> 8) {
+                               linestat &= ~LINE_UP;
+                       } else {
+                               linestat |= LINE_UP;
+                       }
+
+                       if ((linestat & LINE_UP) != (ch->line_status & LINE_UP)) {
+                               ch->stats.tx_carrier_errors++;
+                               idle &= comx_line_change(dev,linestat);
+                       }
+                               
+                       hw->histogram[(int)buffers_emptied]++;
+               }
+               count++;
+       }
+
+       if(count==5000) {
+               printk(KERN_WARNING "%s: interrupt stuck\n",dev->name);
+       }
+
+       ch->HW_release_board(dev, interrupted);
+       return IRQ_HANDLED;
+}
+
+static int COMX_open(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct comx_privdata *hw = ch->HW_privdata;
+       struct proc_dir_entry *procfile = ch->procdir->subdir;
+       unsigned long jiffs;
+       int twin_open=0;
+       int retval;
+       struct net_device *savep;
+
+       if (!dev->base_addr || !dev->irq || !dev->mem_start) {
+               return -ENODEV;
+       }
+
+       if (ch->twin && (((struct comx_channel *)(ch->twin->priv))->init_status & HW_OPEN)) {
+               twin_open=1;
+       }
+
+       if (!twin_open) {
+               if (!request_region(dev->base_addr, hw->io_extent, dev->name)) {
+                       return -EAGAIN;
+               }
+               if (request_irq(dev->irq, COMX_interrupt, 0, dev->name, 
+                  (void *)dev)) {
+                       printk(KERN_ERR "comx-hw-comx: unable to obtain irq %d\n", dev->irq);
+                       release_region(dev->base_addr, hw->io_extent);
+                       return -EAGAIN;
+               }
+               ch->init_status |= IRQ_ALLOCATED;
+               if (!ch->HW_load_board || ch->HW_load_board(dev)) {
+                       ch->init_status &= ~IRQ_ALLOCATED;
+                       retval=-ENODEV;
+                       goto error;
+               }
+       }
+
+       savep = ch->HW_access_board(dev);
+       COMX_WRITE(dev, OFF_A_L2_LINKUP, 0);
+
+       if (ch->HW_set_clock) {
+               ch->HW_set_clock(dev);
+       }
+
+       COMX_CMD(dev, COMX_CMD_INIT); 
+       jiffs = jiffies;
+       while (COMX_readw(dev, OFF_A_L2_LINKUP) != 1 && time_before(jiffies, jiffs + HZ)) {
+               schedule_timeout(1);
+       }
+       
+       if (time_after_eq(jiffies, jiffs + HZ)) {
+               printk(KERN_ERR "%s: board timeout on INIT command\n", dev->name);
+               ch->HW_release_board(dev, savep);
+               retval=-EIO;
+               goto error;
+       }
+       udelay(1000);
+
+       COMX_CMD(dev, COMX_CMD_OPEN);
+
+       jiffs = jiffies;
+       while (COMX_readw(dev, OFF_A_L2_LINKUP) != 3 && time_before(jiffies, jiffs + HZ)) {
+               schedule_timeout(1);
+       }
+       
+       if (time_after_eq(jiffies, jiffs + HZ)) {
+               printk(KERN_ERR "%s: board timeout on OPEN command\n", dev->name);
+               ch->HW_release_board(dev, savep);
+               retval=-EIO;
+               goto error;
+       }
+       
+       ch->init_status |= HW_OPEN;
+       
+       /* Ez eleg ciki, de ilyen a rendszer */
+       if (COMX_readw(dev, OFF_A_L1_PBUFOVR) >> 8) {
+               ch->line_status &= ~LINE_UP;
+       } else {
+               ch->line_status |= LINE_UP;
+       }
+       
+       if (ch->LINE_status) {
+               ch->LINE_status(dev, ch->line_status);
+       }
+
+       ch->HW_release_board(dev, savep);
+
+       for ( ; procfile ; procfile = procfile->next) {
+               if (strcmp(procfile->name, FILENAME_IRQ) == 0 
+                   || strcmp(procfile->name, FILENAME_IO) == 0
+                   || strcmp(procfile->name, FILENAME_MEMADDR) == 0
+                   || strcmp(procfile->name, FILENAME_CHANNEL) == 0
+                   || strcmp(procfile->name, FILENAME_FIRMWARE) == 0
+                   || strcmp(procfile->name, FILENAME_CLOCK) == 0) {
+                       procfile->mode = S_IFREG | 0444;
+               
+               }
+       }       
+       
+       return 0;       
+
+error:
+       if(!twin_open) {
+               release_region(dev->base_addr, hw->io_extent);
+               free_irq(dev->irq, (void *)dev);
+       }
+       return retval;
+
+}
+
+static int COMX_close(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct proc_dir_entry *procfile = ch->procdir->subdir;
+       struct comx_privdata *hw = ch->HW_privdata;
+       struct comx_channel *twin_ch;
+       struct net_device *savep;
+
+       savep = ch->HW_access_board(dev);
+
+       COMX_CMD(dev, COMX_CMD_CLOSE);
+       udelay(1000);
+       COMX_CMD(dev, COMX_CMD_EXIT);
+
+       ch->HW_release_board(dev, savep);
+
+       if (ch->init_status & IRQ_ALLOCATED) {
+               free_irq(dev->irq, (void *)dev);
+               ch->init_status &= ~IRQ_ALLOCATED;
+       }
+       release_region(dev->base_addr, hw->io_extent);
+
+       if (ch->twin && (twin_ch = ch->twin->priv) && 
+           (twin_ch->init_status & HW_OPEN)) {
+               /* Pass the irq to the twin */
+               if (request_irq(dev->irq, COMX_interrupt, 0, ch->twin->name, 
+                  (void *)ch->twin) == 0) {
+                       twin_ch->init_status |= IRQ_ALLOCATED;
+               }
+       }
+
+       for ( ; procfile ; procfile = procfile->next) {
+               if (strcmp(procfile->name, FILENAME_IRQ) == 0 
+                   || strcmp(procfile->name, FILENAME_IO) == 0
+                   || strcmp(procfile->name, FILENAME_MEMADDR) == 0
+                   || strcmp(procfile->name, FILENAME_CHANNEL) == 0
+                   || strcmp(procfile->name, FILENAME_FIRMWARE) == 0
+                   || strcmp(procfile->name, FILENAME_CLOCK) == 0) {
+                       procfile->mode = S_IFREG | 0644;
+               }
+       }
+       
+       ch->init_status &= ~HW_OPEN;
+       return 0;
+}
+
+static int COMX_statistics(struct net_device *dev, char *page)
+{
+       struct comx_channel *ch = dev->priv;
+       struct comx_privdata *hw = ch->HW_privdata;
+       struct net_device *savep;
+       int len = 0;
+
+       savep = ch->HW_access_board(dev);
+
+       len += sprintf(page + len, "Board data: %s %s %s %s\nPBUFOVR: %02x, "
+               "MODSTAT: %02x, LINKUP: %02x, DAV: %02x\nRxBUFP: %02x, "
+               "TxEMPTY: %02x, TxBUFP: %02x\n",
+               (ch->init_status & HW_OPEN) ? "HW_OPEN" : "",
+               (ch->init_status & LINE_OPEN) ? "LINE_OPEN" : "",
+               (ch->init_status & FW_LOADED) ? "FW_LOADED" : "",
+               (ch->init_status & IRQ_ALLOCATED) ? "IRQ_ALLOCATED" : "",
+               COMX_readw(dev, OFF_A_L1_PBUFOVR) & 0xff,
+               (COMX_readw(dev, OFF_A_L1_PBUFOVR) >> 8) & 0xff,
+               COMX_readw(dev, OFF_A_L2_LINKUP) & 0xff,
+               COMX_readw(dev, OFF_A_L2_DAV) & 0xff,
+               COMX_readw(dev, OFF_A_L2_RxBUFP) & 0xff,
+               COMX_readw(dev, OFF_A_L2_TxEMPTY) & 0xff,
+               COMX_readw(dev, OFF_A_L2_TxBUFP) & 0xff);
+
+       len += sprintf(page + len, "hist[0]: %8lu hist[1]: %8lu hist[2]: %8lu\n"
+               "hist[3]: %8lu hist[4]: %8lu\n",hw->histogram[0],hw->histogram[1],
+               hw->histogram[2],hw->histogram[3],hw->histogram[4]);
+
+       ch->HW_release_board(dev, savep);
+
+       return len;
+}
+
+static int COMX_load_board(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct comx_privdata *hw = ch->HW_privdata;
+       struct comx_firmware *fw = hw->firmware;
+       word board_segment = dev->mem_start >> 16;
+       int mempos = (dev->mem_start - COMX_MEM_MIN) >> 16;
+       unsigned long flags;
+       unsigned char id1, id2;
+       struct net_device *saved;
+       int retval;
+       int loopcount;
+       int len;
+       byte *COMX_address;
+
+       if (!fw || !fw->len) {
+               struct comx_channel *twin_ch = ch->twin ? ch->twin->priv : NULL;
+               struct comx_privdata *twin_hw;
+
+               if (!twin_ch || !(twin_hw = twin_ch->HW_privdata)) {
+                       return -EAGAIN;
+               }
+
+               if (!(fw = twin_hw->firmware) || !fw->len) {
+                       return -EAGAIN;
+               }
+       }
+
+       id1 = fw->data[OFF_FW_L1_ID]; 
+       id2 = fw->data[OFF_FW_L1_ID + 1];
+
+       if (id1 != FW_L1_ID_1 || id2 != FW_L1_ID_2_COMX) {
+               printk(KERN_ERR "%s: incorrect firmware, load aborted\n", 
+                       dev->name);
+               return -EAGAIN;
+       }
+
+       printk(KERN_INFO "%s: Loading COMX Layer 1 firmware %s\n", dev->name, 
+               (char *)(fw->data + OFF_FW_L1_ID + 2));
+
+       id1 = fw->data[OFF_FW_L2_ID]; 
+       id2 = fw->data[OFF_FW_L2_ID + 1];
+       if (id1 == FW_L2_ID_1 && (id2 == 0xc0 || id2 == 0xc1 || id2 == 0xc2)) {
+               printk(KERN_INFO "with Layer 2 code %s\n", 
+                       (char *)(fw->data + OFF_FW_L2_ID + 2));
+       }
+
+       outb_p(board_segment | COMX_BOARD_RESET, dev->base_addr);
+       /* 10 usec should be enough here */
+       udelay(100);
+
+       save_flags(flags); cli();
+       saved=memory_used[mempos];
+       if(saved) {
+               ((struct comx_channel *)saved->priv)->HW_board_off(saved);
+       }
+       memory_used[mempos]=dev;
+
+       outb_p(board_segment | COMX_ENABLE_BOARD_MEM, dev->base_addr);
+
+       writeb(0, dev->mem_start + COMX_JAIL_OFFSET);   
+
+       loopcount=0;
+       while(loopcount++ < 10000 && 
+           readb(dev->mem_start + COMX_JAIL_OFFSET) != COMX_JAIL_VALUE) {
+               udelay(100);
+       }       
+       
+       if (readb(dev->mem_start + COMX_JAIL_OFFSET) != COMX_JAIL_VALUE) {
+               printk(KERN_ERR "%s: Can't reset board, JAIL value is %02x\n",
+                       dev->name, readb(dev->mem_start + COMX_JAIL_OFFSET));
+               retval=-ENODEV;
+               goto out;
+       }
+
+       writeb(0x55, dev->mem_start + 0x18ff);
+       
+       loopcount=0;
+       while(loopcount++ < 10000 && readb(dev->mem_start + 0x18ff) != 0) {
+               udelay(100);
+       }
+
+       if(readb(dev->mem_start + 0x18ff) != 0) {
+               printk(KERN_ERR "%s: Can't reset board, reset timeout\n",
+                       dev->name);
+               retval=-ENODEV;
+               goto out;
+       }               
+
+       len = 0;
+       COMX_address = (byte *)dev->mem_start;
+       while (fw->len > len) {
+               writeb(fw->data[len++], COMX_address++);
+       }
+
+       len = 0;
+       COMX_address = (byte *)dev->mem_start;
+       while (len != fw->len && readb(COMX_address++) == fw->data[len]) {
+               len++;
+       }
+
+       if (len != fw->len) {
+               printk(KERN_ERR "%s: error loading firmware: [%d] is 0x%02x "
+                       "instead of 0x%02x\n", dev->name, len, 
+                       readb(COMX_address - 1), fw->data[len]);
+               retval=-EAGAIN;
+               goto out;
+       }
+
+       writeb(0, dev->mem_start + COMX_JAIL_OFFSET);
+
+       loopcount = 0;
+       while ( loopcount++ < 10000 && COMX_readw(dev, OFF_A_L2_LINKUP) != 1 ) {
+               udelay(100);
+       }
+
+       if (COMX_readw(dev, OFF_A_L2_LINKUP) != 1) {
+               printk(KERN_ERR "%s: error starting firmware, linkup word is %04x\n",
+                       dev->name, COMX_readw(dev, OFF_A_L2_LINKUP));
+               retval=-EAGAIN;
+               goto out;
+       }
+
+
+       ch->init_status |= FW_LOADED;
+       retval=0;
+
+out: 
+       outb_p(board_segment | COMX_DISABLE_ALL, dev->base_addr);
+       if(saved) {
+               ((struct comx_channel *)saved->priv)->HW_board_on(saved);
+       }
+       memory_used[mempos]=saved;
+       restore_flags(flags);
+       return retval;
+}
+
+static int CMX_load_board(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct comx_privdata *hw = ch->HW_privdata;
+       struct comx_firmware *fw = hw->firmware;
+       word board_segment = dev->mem_start >> 16;
+       int mempos = (dev->mem_start - COMX_MEM_MIN) >> 16;
+       #if 0
+       unsigned char id1, id2;
+       #endif
+       struct net_device *saved;
+       unsigned long flags;
+       int retval;
+       int loopcount;
+       int len;
+       byte *COMX_address;
+
+       if (!fw || !fw->len) {
+               struct comx_channel *twin_ch = ch->twin ? ch->twin->priv : NULL;
+               struct comx_privdata *twin_hw;
+
+               if (!twin_ch || !(twin_hw = twin_ch->HW_privdata)) {
+                       return -EAGAIN;
+               }
+
+               if (!(fw = twin_hw->firmware) || !fw->len) {
+                       return -EAGAIN;
+               }
+       }
+
+       /* Ide kell olyat tenni, hogy ellenorizze az ID-t */
+
+       if (inb_p(dev->base_addr) != CMX_ID_BYTE) {
+               printk(KERN_ERR "%s: CMX id byte is invalid(%02x)\n", dev->name,
+                       inb_p(dev->base_addr));
+               return -ENODEV;
+       }
+
+       printk(KERN_INFO "%s: Loading CMX Layer 1 firmware %s\n", dev->name, 
+               (char *)(fw->data + OFF_FW_L1_ID + 2));
+
+       save_flags(flags); cli();
+       saved=memory_used[mempos];
+       if(saved) {
+               ((struct comx_channel *)saved->priv)->HW_board_off(saved);
+       }
+       memory_used[mempos]=dev;
+       
+       outb_p(board_segment | COMX_ENABLE_BOARD_MEM | COMX_BOARD_RESET, 
+               dev->base_addr);
+
+       len = 0;
+       COMX_address = (byte *)dev->mem_start;
+       while (fw->len > len) {
+               writeb(fw->data[len++], COMX_address++);
+       }
+
+       len = 0;
+       COMX_address = (byte *)dev->mem_start;
+       while (len != fw->len && readb(COMX_address++) == fw->data[len]) {
+               len++;
+       }
+
+       outb_p(board_segment | COMX_ENABLE_BOARD_MEM, dev->base_addr);
+
+       if (len != fw->len) {
+               printk(KERN_ERR "%s: error loading firmware: [%d] is 0x%02x "
+                       "instead of 0x%02x\n", dev->name, len, 
+                       readb(COMX_address - 1), fw->data[len]);
+               retval=-EAGAIN;
+               goto out;
+       }
+
+       loopcount=0;
+       while( loopcount++ < 10000 && COMX_readw(dev, OFF_A_L2_LINKUP) != 1 ) {
+               udelay(100);
+       }
+
+       if (COMX_readw(dev, OFF_A_L2_LINKUP) != 1) {
+               printk(KERN_ERR "%s: error starting firmware, linkup word is %04x\n",
+                       dev->name, COMX_readw(dev, OFF_A_L2_LINKUP));
+               retval=-EAGAIN;
+               goto out;
+       }
+
+       ch->init_status |= FW_LOADED;
+       retval=0;
+
+out: 
+       outb_p(board_segment | COMX_DISABLE_ALL, dev->base_addr);
+       if(saved) {
+               ((struct comx_channel *)saved->priv)->HW_board_on(saved);
+       }
+       memory_used[mempos]=saved;
+       restore_flags(flags);
+       return retval;
+}
+
+static int HICOMX_load_board(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct comx_privdata *hw = ch->HW_privdata;
+       struct comx_firmware *fw = hw->firmware;
+       word board_segment = dev->mem_start >> 12;
+       int mempos = (dev->mem_start - COMX_MEM_MIN) >> 16;
+       struct net_device *saved;
+       unsigned char id1, id2;
+       unsigned long flags;
+       int retval;
+       int loopcount;
+       int len;
+       word *HICOMX_address;
+       char id = 1;
+
+       if (!fw || !fw->len) {
+               struct comx_channel *twin_ch = ch->twin ? ch->twin->priv : NULL;
+               struct comx_privdata *twin_hw;
+
+               if (!twin_ch || !(twin_hw = twin_ch->HW_privdata)) {
+                       return -EAGAIN;
+               }
+
+               if (!(fw = twin_hw->firmware) || !fw->len) {
+                       return -EAGAIN;
+               }
+       }
+
+       while (id != 4) {
+               if (inb_p(dev->base_addr + id++) != HICOMX_ID_BYTE) {
+                       break;
+               }
+       }
+
+       if (id != 4) {
+               printk(KERN_ERR "%s: can't find HICOMX at 0x%04x, id[%d] = %02x\n",
+                       dev->name, (unsigned int)dev->base_addr, id - 1,
+                       inb_p(dev->base_addr + id - 1));
+               return -1;      
+       }
+
+       id1 = fw->data[OFF_FW_L1_ID]; 
+       id2 = fw->data[OFF_FW_L1_ID + 1];
+       if (id1 != FW_L1_ID_1 || id2 != FW_L1_ID_2_HICOMX) {
+               printk(KERN_ERR "%s: incorrect firmware, load aborted\n", dev->name);
+               return -EAGAIN;
+       }
+
+       printk(KERN_INFO "%s: Loading HICOMX Layer 1 firmware %s\n", dev->name, 
+               (char *)(fw->data + OFF_FW_L1_ID + 2));
+
+       id1 = fw->data[OFF_FW_L2_ID]; 
+       id2 = fw->data[OFF_FW_L2_ID + 1];
+       if (id1 == FW_L2_ID_1 && (id2 == 0xc0 || id2 == 0xc1 || id2 == 0xc2)) {
+               printk(KERN_INFO "with Layer 2 code %s\n", 
+                       (char *)(fw->data + OFF_FW_L2_ID + 2));
+       }
+
+       outb_p(board_segment | HICOMX_BOARD_RESET, dev->base_addr);
+       udelay(10);     
+
+       save_flags(flags); cli();
+       saved=memory_used[mempos];
+       if(saved) {
+               ((struct comx_channel *)saved->priv)->HW_board_off(saved);
+       }
+       memory_used[mempos]=dev;
+
+       outb_p(board_segment | HICOMX_ENABLE_BOARD_MEM, dev->base_addr);
+       outb_p(HICOMX_PRG_MEM, dev->base_addr + 1);
+
+       len = 0;
+       HICOMX_address = (word *)dev->mem_start;
+       while (fw->len > len) {
+               writeb(fw->data[len++], HICOMX_address++);
+       }
+
+       len = 0;
+       HICOMX_address = (word *)dev->mem_start;
+       while (len != fw->len && (readw(HICOMX_address++) & 0xff) == fw->data[len]) {
+               len++;
+       }
+
+       if (len != fw->len) {
+               printk(KERN_ERR "%s: error loading firmware: [%d] is 0x%02x "
+                       "instead of 0x%02x\n", dev->name, len, 
+                       readw(HICOMX_address - 1) & 0xff, fw->data[len]);
+               retval=-EAGAIN;
+               goto out;
+       }
+
+       outb_p(board_segment | HICOMX_BOARD_RESET, dev->base_addr);
+       outb_p(HICOMX_DATA_MEM, dev->base_addr + 1);
+
+       outb_p(board_segment | HICOMX_ENABLE_BOARD_MEM, dev->base_addr);
+
+       loopcount=0;
+       while(loopcount++ < 10000 && COMX_readw(dev, OFF_A_L2_LINKUP) != 1) {
+               udelay(100);
+       }
+
+       if ( COMX_readw(dev, OFF_A_L2_LINKUP) != 1 ) {
+               printk(KERN_ERR "%s: error starting firmware, linkup word is %04x\n",
+                       dev->name, COMX_readw(dev, OFF_A_L2_LINKUP));
+               retval=-EAGAIN;
+               goto out;
+       }
+
+       ch->init_status |= FW_LOADED;
+       retval=0;
+
+out:
+       outb_p(board_segment | HICOMX_DISABLE_ALL, dev->base_addr);
+       outb_p(HICOMX_DATA_MEM, dev->base_addr + 1);
+
+       if(saved) {
+               ((struct comx_channel *)saved->priv)->HW_board_on(saved);
+       }
+       memory_used[mempos]=saved;
+       restore_flags(flags);
+       return retval;
+}
+
+static struct net_device *comx_twin_check(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct proc_dir_entry *procfile = ch->procdir->parent->subdir;
+       struct comx_privdata *hw = ch->HW_privdata;
+
+       struct net_device *twin;
+       struct comx_channel *ch_twin;
+       struct comx_privdata *hw_twin;
+
+
+       for ( ; procfile ; procfile = procfile->next) {
+       
+               if(!S_ISDIR(procfile->mode)) {
+                       continue;
+               }
+       
+               twin=procfile->data;
+               ch_twin=twin->priv;
+               hw_twin=ch_twin->HW_privdata;
+
+
+               if (twin != dev && dev->irq && dev->base_addr && dev->mem_start &&
+                  dev->irq == twin->irq && dev->base_addr == twin->base_addr &&
+                  dev->mem_start == twin->mem_start &&
+                  hw->channel == (1 - hw_twin->channel) &&
+                  ch->hardware == ch_twin->hardware) {
+                       return twin;
+               }
+       }
+       return NULL;
+}
+
+static int comxhw_write_proc(struct file *file, const char *buffer, 
+       u_long count, void *data)
+{
+       struct proc_dir_entry *entry = (struct proc_dir_entry *)data;
+       struct net_device *dev = entry->parent->data;
+       struct comx_channel *ch = dev->priv;
+       struct comx_privdata *hw = ch->HW_privdata;
+       char *page;
+
+
+       if(ch->init_status & HW_OPEN) {
+               return -EAGAIN; 
+       }
+       
+       if (strcmp(FILENAME_FIRMWARE, entry->name) != 0) {
+               if (!(page = (char *)__get_free_page(GFP_KERNEL))) {
+                       return -ENOMEM;
+               }
+               if(copy_from_user(page, buffer, count = (min_t(int, count, PAGE_SIZE))))
+               {
+                       count = -EFAULT;
+                       goto out;
+               }
+               if (page[count-1] == '\n')
+                       page[count-1] = '\0';
+               else if (count < PAGE_SIZE)
+                       page[count] = '\0';
+               else if (page[count]) {
+                       count = -EINVAL;
+                       goto out;
+               }
+               page[count]=0;  /* Null terminate */
+       } else {
+               byte *tmp;
+
+               if (!hw->firmware) {
+                       if ((hw->firmware = kmalloc(sizeof(struct comx_firmware), 
+                           GFP_KERNEL)) == NULL) {
+                               return -ENOMEM;
+                       }
+                       hw->firmware->len = 0;
+                       hw->firmware->data = NULL;
+               }
+               
+               if ((tmp = kmalloc(count + file->f_pos, GFP_KERNEL)) == NULL) {
+                       return -ENOMEM;
+               }
+               
+               /* Ha nem 0 a fpos, akkor meglevo file-t irunk. Gyenge trukk. */
+               if (hw->firmware && hw->firmware->len && file->f_pos 
+                   && hw->firmware->len < count + file->f_pos) {
+                       memcpy(tmp, hw->firmware->data, hw->firmware->len);
+               }
+               if (hw->firmware->data) {
+                       kfree(hw->firmware->data);
+               }
+               if (copy_from_user(tmp + file->f_pos, buffer, count))
+                       return -EFAULT;
+               hw->firmware->len = entry->size = file->f_pos + count;
+               hw->firmware->data = tmp;
+               file->f_pos += count;
+               return count;
+       }
+
+       if (strcmp(entry->name, FILENAME_CHANNEL) == 0) {
+               hw->channel = simple_strtoul(page, NULL, 0);
+               if (hw->channel >= MAX_CHANNELNO) {
+                       printk(KERN_ERR "Invalid channel number\n");
+                       hw->channel = 0;
+               }
+               if ((ch->twin = comx_twin_check(dev)) != NULL) {
+                       struct comx_channel *twin_ch = ch->twin->priv;
+                       twin_ch->twin = dev;
+               }
+       } else if (strcmp(entry->name, FILENAME_IRQ) == 0) {
+               dev->irq = simple_strtoul(page, NULL, 0);
+               if (dev->irq == 2) {
+                       dev->irq = 9;
+               }
+               if (dev->irq < 3 || dev->irq > 15) {
+                       printk(KERN_ERR "comxhw: Invalid irq number\n");
+                       dev->irq = 0;
+               }
+               if ((ch->twin = comx_twin_check(dev)) != NULL) {
+                       struct comx_channel *twin_ch = ch->twin->priv;
+                       twin_ch->twin = dev;
+               }
+       } else if (strcmp(entry->name, FILENAME_IO) == 0) {
+               dev->base_addr = simple_strtoul(page, NULL, 0);
+               if ((dev->base_addr & 3) != 0 || dev->base_addr < 0x300 
+                  || dev->base_addr > 0x3fc) {
+                       printk(KERN_ERR "Invalid io value\n");
+                       dev->base_addr = 0;
+               }
+               if ((ch->twin = comx_twin_check(dev)) != NULL) {
+                       struct comx_channel *twin_ch = ch->twin->priv;
+
+                       twin_ch->twin = dev;
+               }
+       } else if (strcmp(entry->name, FILENAME_MEMADDR) == 0) {
+               dev->mem_start = simple_strtoul(page, NULL, 0);
+               if (dev->mem_start <= 0xf000 && dev->mem_start >= 0xa000) {
+                       dev->mem_start *= 16;
+               }
+               if ((dev->mem_start & 0xfff) != 0 || dev->mem_start < COMX_MEM_MIN
+                   || dev->mem_start + hw->memory_size > COMX_MEM_MAX) {
+                       printk(KERN_ERR "Invalid memory page\n");
+                       dev->mem_start = 0;
+               }
+               dev->mem_end = dev->mem_start + hw->memory_size;
+               if ((ch->twin = comx_twin_check(dev)) != NULL) {
+                       struct comx_channel *twin_ch = ch->twin->priv;
+
+                       twin_ch->twin = dev;
+               }
+       } else if (strcmp(entry->name, FILENAME_CLOCK) == 0) {
+               if (strncmp("ext", page, 3) == 0) {
+                       hw->clock = 0;
+               } else {
+                       int kbps;
+
+                       kbps = simple_strtoul(page, NULL, 0);
+                       hw->clock = kbps ? COMX_CLOCK_CONST/kbps : 0;
+               }
+       }
+out:
+       free_page((unsigned long)page);
+       return count;
+}
+
+static int comxhw_read_proc(char *page, char **start, off_t off, int count,
+       int *eof, void *data)
+{
+       struct proc_dir_entry *file = (struct proc_dir_entry *)data;
+       struct net_device *dev = file->parent->data;
+       struct comx_channel *ch = dev->priv;
+       struct comx_privdata *hw = ch->HW_privdata;
+       int len = 0;
+
+
+       if (strcmp(file->name, FILENAME_IO) == 0) {
+               len = sprintf(page, "0x%03x\n", (unsigned int)dev->base_addr);
+       } else if (strcmp(file->name, FILENAME_IRQ) == 0) {
+               len = sprintf(page, "0x%02x\n", dev->irq == 9 ? 2 : dev->irq);
+       } else if (strcmp(file->name, FILENAME_CHANNEL) == 0) {
+               len = sprintf(page, "%01d\n", hw->channel);
+       } else if (strcmp(file->name, FILENAME_MEMADDR) == 0) {
+               len = sprintf(page, "0x%05x\n", (unsigned int)dev->mem_start);
+       } else if (strcmp(file->name, FILENAME_TWIN) == 0) {
+               len = sprintf(page, "%s\n", ch->twin ? ch->twin->name : "none");
+       } else if (strcmp(file->name, FILENAME_CLOCK) == 0) {
+               if (hw->clock) {
+                       len = sprintf(page, "%-8d\n", COMX_CLOCK_CONST/hw->clock);
+               } else {
+                       len = sprintf(page, "external\n");
+               }
+       } else if (strcmp(file->name, FILENAME_FIRMWARE) == 0) {
+               len = min_t(int, FILE_PAGESIZE,
+                         min_t(int, count, 
+                             hw->firmware ?
+                             (hw->firmware->len - off) : 0));
+               if (len < 0) {
+                       len = 0;
+               }
+               *start = hw->firmware ? (hw->firmware->data + off) : NULL;
+               if (off + len >= (hw->firmware ? hw->firmware->len : 0) || len == 0) {
+                       *eof = 1;
+               }
+               return len;
+       }       
+
+       if (off >= len) {
+               *eof = 1;
+               return 0;
+       }
+
+       *start = page + off;
+       if (count >= len - off) {
+               *eof = 1;
+       }
+       return min_t(int, count, len - off);
+}
+
+/* Called on echo comx >boardtype */
+static int COMX_init(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct comx_privdata *hw;
+       struct proc_dir_entry *new_file;
+
+       if ((ch->HW_privdata = kmalloc(sizeof(struct comx_privdata), 
+           GFP_KERNEL)) == NULL) {
+               return -ENOMEM;
+       }
+       memset(hw = ch->HW_privdata, 0, sizeof(struct comx_privdata));
+
+       if (ch->hardware == &comx_hw || ch->hardware == &cmx_hw) {
+               hw->memory_size = COMX_MEMORY_SIZE;
+               hw->io_extent = COMX_IO_EXTENT;
+               dev->base_addr = COMX_DEFAULT_IO;
+               dev->irq = COMX_DEFAULT_IRQ;
+               dev->mem_start = COMX_DEFAULT_MEMADDR;
+               dev->mem_end = COMX_DEFAULT_MEMADDR + COMX_MEMORY_SIZE;
+       } else if (ch->hardware == &hicomx_hw) {
+               hw->memory_size = HICOMX_MEMORY_SIZE;
+               hw->io_extent = HICOMX_IO_EXTENT;
+               dev->base_addr = HICOMX_DEFAULT_IO;
+               dev->irq = HICOMX_DEFAULT_IRQ;
+               dev->mem_start = HICOMX_DEFAULT_MEMADDR;
+               dev->mem_end = HICOMX_DEFAULT_MEMADDR + HICOMX_MEMORY_SIZE;
+       } else {
+               printk(KERN_ERR "SERIOUS INTERNAL ERROR in %s, line %d\n", __FILE__, __LINE__);
+       }
+
+       if ((new_file = create_proc_entry(FILENAME_IO, S_IFREG | 0644, ch->procdir))
+           == NULL) {
+           goto cleanup_HW_privdata;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &comxhw_read_proc;
+       new_file->write_proc = &comxhw_write_proc;
+       new_file->size = 6;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_IRQ, S_IFREG | 0644, ch->procdir))
+           == NULL) {
+           goto cleanup_filename_io;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &comxhw_read_proc;
+       new_file->write_proc = &comxhw_write_proc;
+       new_file->size = 5;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_CHANNEL, S_IFREG | 0644, 
+           ch->procdir)) == NULL) {
+           goto cleanup_filename_irq;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &comxhw_read_proc;
+       new_file->write_proc = &comxhw_write_proc;
+       new_file->size = 2;             // Ezt tudjuk
+       new_file->nlink = 1;
+
+       if (ch->hardware == &hicomx_hw || ch->hardware == &cmx_hw) {
+               if ((new_file = create_proc_entry(FILENAME_CLOCK, S_IFREG | 0644, 
+                  ch->procdir)) == NULL) {
+                   goto cleanup_filename_channel;
+               }
+               new_file->data = (void *)new_file;
+               new_file->read_proc = &comxhw_read_proc;
+               new_file->write_proc = &comxhw_write_proc;
+               new_file->size = 9;
+               new_file->nlink = 1;
+       }
+
+       if ((new_file = create_proc_entry(FILENAME_MEMADDR, S_IFREG | 0644, 
+           ch->procdir)) == NULL) {
+                   goto cleanup_filename_clock;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &comxhw_read_proc;
+       new_file->write_proc = &comxhw_write_proc;
+       new_file->size = 8;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_TWIN, S_IFREG | 0444, 
+           ch->procdir)) == NULL) {
+                   goto cleanup_filename_memaddr;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &comxhw_read_proc;
+       new_file->write_proc = NULL;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_FIRMWARE, S_IFREG | 0644, 
+           ch->procdir)) == NULL) {
+                   goto cleanup_filename_twin;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &comxhw_read_proc;
+       new_file->write_proc = &comxhw_write_proc;
+       new_file->nlink = 1;
+
+       if (ch->hardware == &comx_hw) {
+               ch->HW_board_on = COMX_board_on;
+               ch->HW_board_off = COMX_board_off;
+               ch->HW_load_board = COMX_load_board;
+       } else if (ch->hardware == &cmx_hw) {
+               ch->HW_board_on = COMX_board_on;
+               ch->HW_board_off = COMX_board_off;
+               ch->HW_load_board = CMX_load_board;
+               ch->HW_set_clock = COMX_set_clock;
+       } else if (ch->hardware == &hicomx_hw) {
+               ch->HW_board_on = HICOMX_board_on;
+               ch->HW_board_off = HICOMX_board_off;
+               ch->HW_load_board = HICOMX_load_board;
+               ch->HW_set_clock = COMX_set_clock;
+       } else {
+               printk(KERN_ERR "SERIOUS INTERNAL ERROR in %s, line %d\n", __FILE__, __LINE__);
+       }
+
+       ch->HW_access_board = COMX_access_board;
+       ch->HW_release_board = COMX_release_board;
+       ch->HW_txe = COMX_txe;
+       ch->HW_open = COMX_open;
+       ch->HW_close = COMX_close;
+       ch->HW_send_packet = COMX_send_packet;
+       ch->HW_statistics = COMX_statistics;
+
+       if ((ch->twin = comx_twin_check(dev)) != NULL) {
+               struct comx_channel *twin_ch = ch->twin->priv;
+
+               twin_ch->twin = dev;
+       }
+
+       MOD_INC_USE_COUNT;
+       return 0;
+
+cleanup_filename_twin:
+       remove_proc_entry(FILENAME_TWIN, ch->procdir);
+cleanup_filename_memaddr:
+       remove_proc_entry(FILENAME_MEMADDR, ch->procdir);
+cleanup_filename_clock:
+       if (ch->hardware == &hicomx_hw || ch->hardware == &cmx_hw)
+               remove_proc_entry(FILENAME_CLOCK, ch->procdir);
+cleanup_filename_channel:
+       remove_proc_entry(FILENAME_CHANNEL, ch->procdir);
+cleanup_filename_irq:
+       remove_proc_entry(FILENAME_IRQ, ch->procdir);
+cleanup_filename_io:
+       remove_proc_entry(FILENAME_IO, ch->procdir);
+cleanup_HW_privdata:
+       kfree(ch->HW_privdata);
+       return -EIO;
+}
+
+/* Called on echo valami >boardtype */
+static int COMX_exit(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct comx_privdata *hw = ch->HW_privdata;
+
+       if (hw->firmware) {
+               if (hw->firmware->data) kfree(hw->firmware->data);
+               kfree(hw->firmware);
+       } if (ch->twin) {
+               struct comx_channel *twin_ch = ch->twin->priv;
+
+               twin_ch->twin = NULL;
+       }
+       
+       kfree(ch->HW_privdata);
+       remove_proc_entry(FILENAME_IO, ch->procdir);
+       remove_proc_entry(FILENAME_IRQ, ch->procdir);
+       remove_proc_entry(FILENAME_CHANNEL, ch->procdir);
+       remove_proc_entry(FILENAME_MEMADDR, ch->procdir);
+       remove_proc_entry(FILENAME_FIRMWARE, ch->procdir);
+       remove_proc_entry(FILENAME_TWIN, ch->procdir);
+       if (ch->hardware == &hicomx_hw || ch->hardware == &cmx_hw) {
+               remove_proc_entry(FILENAME_CLOCK, ch->procdir);
+       }
+
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+static int COMX_dump(struct net_device *dev)
+{
+       printk(KERN_INFO "%s: COMX_dump called, why ?\n", dev->name);
+       return 0;
+}
+
+static struct comx_hardware comx_hw = {
+       "comx",
+       VERSION,
+       COMX_init,
+       COMX_exit,
+       COMX_dump,
+       NULL
+};
+
+static struct comx_hardware cmx_hw = {
+       "cmx",
+       VERSION,
+       COMX_init,
+       COMX_exit,
+       COMX_dump,
+       NULL
+};
+
+static struct comx_hardware hicomx_hw = {
+       "hicomx",
+       VERSION,
+       COMX_init,
+       COMX_exit,
+       COMX_dump,
+       NULL
+};
+
+static int __init comx_hw_comx_init(void)
+{
+       comx_register_hardware(&comx_hw);
+       comx_register_hardware(&cmx_hw);
+       comx_register_hardware(&hicomx_hw);
+       return 0;
+}
+
+static void __exit comx_hw_comx_exit(void)
+{
+       comx_unregister_hardware("comx");
+       comx_unregister_hardware("cmx");
+       comx_unregister_hardware("hicomx");
+}
+
+module_init(comx_hw_comx_init);
+module_exit(comx_hw_comx_exit);
diff --git a/drivers/net/wan/comx-hw-locomx.c b/drivers/net/wan/comx-hw-locomx.c
new file mode 100644 (file)
index 0000000..5246016
--- /dev/null
@@ -0,0 +1,496 @@
+/*
+ * Hardware driver for the LoCOMX card, using the generic z85230
+ * functions
+ *
+ * Author: Gergely Madarasz <gorgo@itc.hu>
+ *
+ * Based on skeleton code and old LoCOMX driver by Tivadar Szemethy <tiv@itc.hu> 
+ * and the hostess_sv11 driver
+ *
+ * Contributors:
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br> (0.14)
+ *
+ * Copyright (C) 1999 ITConsult-Pro Co. <info@itc.hu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Version 0.10 (99/06/17):
+ *             - rewritten for the z85230 layer
+ *
+ * Version 0.11 (99/06/21):
+ *             - some printk's fixed
+ *             - get rid of a memory leak (it was impossible though :))
+ * 
+ * Version 0.12 (99/07/07):
+ *             - check CTS for modem lines, not DCD (which is always high
+ *               in case of this board)
+ * Version 0.13 (99/07/08):
+ *             - Fix the transmitter status check
+ *             - Handle the net device statistics better
+ * Version 0.14 (00/08/15):
+ *             - resource release on failure at LOCOMX_init
+ */
+
+#define VERSION "0.14"
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+
+#include <asm/types.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/dma.h>
+
+#include "comx.h"
+#include "z85230.h"
+
+MODULE_AUTHOR("Gergely Madarasz <gorgo@itc.hu>");
+MODULE_DESCRIPTION("Hardware driver for the LoCOMX board");
+MODULE_LICENSE("GPL");
+
+#define RX_DMA 3
+#define TX_DMA 1
+#define LOCOMX_ID 0x33
+#define LOCOMX_IO_EXTENT 8
+#define LOCOMX_DEFAULT_IO 0x368
+#define LOCOMX_DEFAULT_IRQ 7
+
+u8 z8530_locomx[] = {
+       11,     TCRTxCP,
+       14,     DTRREQ,
+       255
+};
+
+struct locomx_data {
+       int     io_extent;
+       struct  z8530_dev board;
+       struct timer_list status_timer;
+};
+
+static int LOCOMX_txe(struct net_device *dev)
+{
+       struct comx_channel *ch = netdev_priv(dev);
+       struct locomx_data *hw = ch->HW_privdata;
+
+       return (!hw->board.chanA.tx_next_skb);
+}
+
+
+static void locomx_rx(struct z8530_channel *c, struct sk_buff *skb)
+{
+       struct net_device *dev = c->netdevice;
+       struct comx_channel *ch = netdev_priv(dev);
+       
+       if (ch->debug_flags & DEBUG_HW_RX) {
+               comx_debug_skb(dev, skb, "locomx_rx receiving");
+       }
+       ch->LINE_rx(dev,skb);
+}
+
+static int LOCOMX_send_packet(struct net_device *dev, struct sk_buff *skb) 
+{
+       struct comx_channel *ch = netdev_priv(dev);
+       struct locomx_data *hw = ch->HW_privdata;
+
+       if (ch->debug_flags & DEBUG_HW_TX) {
+               comx_debug_bytes(dev, skb->data, skb->len, "LOCOMX_send_packet");
+       }
+
+       if (!(ch->line_status & LINE_UP)) {
+               return FRAME_DROPPED;
+       }
+
+       if(z8530_queue_xmit(&hw->board.chanA,skb)) {
+               printk(KERN_WARNING "%s: FRAME_DROPPED\n",dev->name);
+               return FRAME_DROPPED;
+       }
+
+       if (ch->debug_flags & DEBUG_HW_TX) {
+               comx_debug(dev, "%s: LOCOMX_send_packet was successful\n\n", dev->name);
+       }
+
+       if(!hw->board.chanA.tx_next_skb) {
+               return FRAME_QUEUED;
+       } else {
+               return FRAME_ACCEPTED;
+       }
+}
+
+static void locomx_status_timerfun(unsigned long d)
+{
+       struct net_device *dev = (struct net_device *)d;
+       struct comx_channel *ch = netdev_priv(dev);
+       struct locomx_data *hw = ch->HW_privdata;
+
+       if(!(ch->line_status & LINE_UP) &&
+           (hw->board.chanA.status & CTS)) {
+               ch->LINE_status(dev, ch->line_status | LINE_UP);
+       }
+       if((ch->line_status & LINE_UP) &&
+           !(hw->board.chanA.status & CTS)) {
+               ch->LINE_status(dev, ch->line_status & ~LINE_UP);
+       }
+       mod_timer(&hw->status_timer,jiffies + ch->lineup_delay * HZ);
+}
+
+
+static int LOCOMX_open(struct net_device *dev)
+{
+       struct comx_channel *ch = netdev_priv(dev);
+       struct locomx_data *hw = ch->HW_privdata;
+       struct proc_dir_entry *procfile = ch->procdir->subdir;
+       unsigned long flags;
+       int ret;
+
+       if (!dev->base_addr || !dev->irq) {
+               return -ENODEV;
+       }
+
+       if (!request_region(dev->base_addr, hw->io_extent, dev->name)) {
+               return -EAGAIN;
+       }
+
+       hw->board.chanA.ctrlio=dev->base_addr + 5;
+       hw->board.chanA.dataio=dev->base_addr + 7;
+       
+       hw->board.irq=dev->irq;
+       hw->board.chanA.netdevice=dev;
+       hw->board.chanA.dev=&hw->board;
+       hw->board.name=dev->name;
+       hw->board.chanA.txdma=TX_DMA;
+       hw->board.chanA.rxdma=RX_DMA;
+       hw->board.chanA.irqs=&z8530_nop;
+       hw->board.chanB.irqs=&z8530_nop;
+
+       if(request_irq(dev->irq, z8530_interrupt, SA_INTERRUPT, 
+           dev->name, &hw->board)) {
+               printk(KERN_ERR "%s: unable to obtain irq %d\n", dev->name, 
+                       dev->irq);
+               ret=-EAGAIN;
+               goto irq_fail;
+       }
+       if(request_dma(TX_DMA,"LoCOMX (TX)")) {
+               printk(KERN_ERR "%s: unable to obtain TX DMA (DMA channel %d)\n", 
+                       dev->name, TX_DMA);
+               ret=-EAGAIN;
+               goto dma1_fail;
+       }
+
+       if(request_dma(RX_DMA,"LoCOMX (RX)")) {
+               printk(KERN_ERR "%s: unable to obtain RX DMA (DMA channel %d)\n", 
+                       dev->name, RX_DMA);
+               ret=-EAGAIN;
+               goto dma2_fail;
+       }
+       
+       save_flags(flags); 
+       cli();
+
+       if(z8530_init(&hw->board)!=0)
+       {
+               printk(KERN_ERR "%s: Z8530 device not found.\n",dev->name);
+               ret=-ENODEV;
+               goto z8530_fail;
+       }
+
+       hw->board.chanA.dcdcheck=CTS;
+
+       z8530_channel_load(&hw->board.chanA, z8530_hdlc_kilostream_85230);
+       z8530_channel_load(&hw->board.chanA, z8530_locomx);
+       z8530_channel_load(&hw->board.chanB, z8530_dead_port);
+
+       z8530_describe(&hw->board, "I/O", dev->base_addr);
+
+       if((ret=z8530_sync_dma_open(dev, &hw->board.chanA))!=0) {
+               goto z8530_fail;
+       }
+
+       restore_flags(flags);
+
+
+       hw->board.active=1;
+       hw->board.chanA.rx_function=locomx_rx;
+
+       ch->init_status |= HW_OPEN;
+       if (hw->board.chanA.status & DCD) {
+               ch->line_status |= LINE_UP;
+       } else {
+               ch->line_status &= ~LINE_UP;
+       }
+
+       comx_status(dev, ch->line_status);
+
+       init_timer(&hw->status_timer);
+       hw->status_timer.function=locomx_status_timerfun;
+       hw->status_timer.data=(unsigned long)dev;
+       hw->status_timer.expires=jiffies + ch->lineup_delay * HZ;
+       add_timer(&hw->status_timer);
+
+       for (; procfile ; procfile = procfile->next) {
+               if (strcmp(procfile->name, FILENAME_IO) == 0 ||
+                    strcmp(procfile->name, FILENAME_IRQ) == 0) {
+                       procfile->mode = S_IFREG |  0444;
+               }
+       }
+       return 0;
+
+z8530_fail:
+       restore_flags(flags);
+       free_dma(RX_DMA);
+dma2_fail:
+       free_dma(TX_DMA);
+dma1_fail:
+       free_irq(dev->irq, &hw->board);
+irq_fail:
+       release_region(dev->base_addr, hw->io_extent);
+       return ret;
+}
+
+static int LOCOMX_close(struct net_device *dev)
+{
+       struct comx_channel *ch = netdev_priv(dev);
+       struct locomx_data *hw = ch->HW_privdata;
+       struct proc_dir_entry *procfile = ch->procdir->subdir;
+
+       hw->board.chanA.rx_function=z8530_null_rx;
+       netif_stop_queue(dev);
+       z8530_sync_dma_close(dev, &hw->board.chanA);
+
+       z8530_shutdown(&hw->board);
+
+       del_timer(&hw->status_timer);
+       free_dma(RX_DMA);
+       free_dma(TX_DMA);
+       free_irq(dev->irq,&hw->board);
+       release_region(dev->base_addr,8);
+
+       for (; procfile ; procfile = procfile->next) {
+               if (strcmp(procfile->name, FILENAME_IO) == 0 ||
+                   strcmp(procfile->name, FILENAME_IRQ) == 0) {
+                       procfile->mode = S_IFREG |  0644;
+               }
+       }
+
+       ch->init_status &= ~HW_OPEN;
+       return 0;
+}
+
+static int LOCOMX_statistics(struct net_device *dev,char *page)
+{
+       int len = 0;
+
+       len += sprintf(page + len, "Hello\n");
+
+       return len;
+}
+
+static int LOCOMX_dump(struct net_device *dev) {
+       printk(KERN_INFO "LOCOMX_dump called\n");
+       return(-1);
+}
+
+static int locomx_read_proc(char *page, char **start, off_t off, int count,
+       int *eof, void *data)
+{
+       struct proc_dir_entry *file = (struct proc_dir_entry *)data;
+       struct net_device *dev = file->parent->data;
+       int len = 0;
+
+       if (strcmp(file->name, FILENAME_IO) == 0) {
+               len = sprintf(page, "0x%x\n", (unsigned int)dev->base_addr);
+       } else if (strcmp(file->name, FILENAME_IRQ) == 0) {
+               len = sprintf(page, "%d\n", (unsigned int)dev->irq);
+       } else {
+               printk(KERN_ERR "hw_read_proc: internal error, filename %s\n", 
+                       file->name);
+               return -EBADF;
+       }
+
+       if (off >= len) {
+               *eof = 1;
+               return 0;
+       }
+
+       *start = page + off;
+       if (count >= len - off) {
+               *eof = 1;
+       }
+       return min_t(int, count, len - off);
+}
+
+static int locomx_write_proc(struct file *file, const char *buffer,
+       u_long count, void *data)
+{
+       struct proc_dir_entry *entry = (struct proc_dir_entry *)data;
+       struct net_device *dev = (struct net_device *)entry->parent->data;
+       int val;
+       char *page;
+
+       if (!(page = (char *)__get_free_page(GFP_KERNEL))) {
+               return -ENOMEM;
+       }
+
+       if (copy_from_user(page, buffer, count = min_t(unsigned long, count, PAGE_SIZE))) {
+               free_page((unsigned long)page);
+               return -EBADF;
+       }
+       if (*(page + count - 1) == '\n') {
+               *(page + count - 1) = 0;
+       }
+
+       if (strcmp(entry->name, FILENAME_IO) == 0) {
+               val = simple_strtoul(page, NULL, 0);
+               if (val != 0x360 && val != 0x368 && val != 0x370 && 
+                  val != 0x378) {
+                       printk(KERN_ERR "LoCOMX: incorrect io address!\n");     
+               } else {
+                       dev->base_addr = val;
+               }
+       } else if (strcmp(entry->name, FILENAME_IRQ) == 0) {
+               val = simple_strtoul(page, NULL, 0);
+               if (val != 3 && val != 4 && val != 5 && val != 6 && val != 7) {
+                       printk(KERN_ERR "LoCOMX: incorrect irq value!\n");
+               } else {
+                       dev->irq = val;
+               }       
+       } else {
+               printk(KERN_ERR "locomx_write_proc: internal error, filename %s\n", 
+                       entry->name);
+               free_page((unsigned long)page);
+               return -EBADF;
+       }
+
+       free_page((unsigned long)page);
+       return count;
+}
+
+
+
+static int LOCOMX_init(struct net_device *dev) 
+{
+       struct comx_channel *ch = netdev_priv(dev);
+       struct locomx_data *hw;
+       struct proc_dir_entry *new_file;
+
+       /* Alloc data for private structure */
+       if ((ch->HW_privdata = kmalloc(sizeof(struct locomx_data), 
+          GFP_KERNEL)) == NULL) {
+               return -ENOMEM;
+       }
+
+       memset(hw = ch->HW_privdata, 0, sizeof(struct locomx_data));
+       hw->io_extent = LOCOMX_IO_EXTENT;
+
+       /* Register /proc files */
+       if ((new_file = create_proc_entry(FILENAME_IO, S_IFREG | 0644, 
+           ch->procdir)) == NULL) {
+               goto cleanup_HW_privdata;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &locomx_read_proc;
+       new_file->write_proc = &locomx_write_proc;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_IRQ, S_IFREG | 0644, 
+           ch->procdir)) == NULL)  {
+               goto cleanup_filename_io;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &locomx_read_proc;
+       new_file->write_proc = &locomx_write_proc;
+       new_file->nlink = 1;
+
+/*     No clock yet */
+/*
+       if ((new_file = create_proc_entry(FILENAME_CLOCK, S_IFREG | 0644, 
+           ch->procdir)) == NULL) {
+               return -EIO;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &locomx_read_proc;
+       new_file->write_proc = &locomx_write_proc;
+       new_file->nlink = 1;
+*/
+
+       ch->HW_access_board = NULL;
+       ch->HW_release_board = NULL;
+       ch->HW_txe = LOCOMX_txe;
+       ch->HW_open = LOCOMX_open;
+       ch->HW_close = LOCOMX_close;
+       ch->HW_send_packet = LOCOMX_send_packet;
+       ch->HW_statistics = LOCOMX_statistics;
+       ch->HW_set_clock = NULL;
+
+       ch->current_stats = &hw->board.chanA.stats;
+       memcpy(ch->current_stats, &ch->stats, sizeof(struct net_device_stats));
+
+       dev->base_addr = LOCOMX_DEFAULT_IO;
+       dev->irq = LOCOMX_DEFAULT_IRQ;
+       
+       
+       /* O.K. Count one more user on this module */
+       MOD_INC_USE_COUNT;
+       return 0;
+cleanup_filename_io:
+       remove_proc_entry(FILENAME_IO, ch->procdir);
+cleanup_HW_privdata:
+       kfree(ch->HW_privdata);
+       return -EIO;
+}
+
+
+static int LOCOMX_exit(struct net_device *dev)
+{
+       struct comx_channel *ch = netdev_priv(dev);
+
+       ch->HW_access_board = NULL;
+       ch->HW_release_board = NULL;
+       ch->HW_txe = NULL;
+       ch->HW_open = NULL;
+       ch->HW_close = NULL;
+       ch->HW_send_packet = NULL;
+       ch->HW_statistics = NULL;
+       ch->HW_set_clock = NULL;
+       memcpy(&ch->stats, ch->current_stats, sizeof(struct net_device_stats));
+       ch->current_stats = &ch->stats;
+
+       kfree(ch->HW_privdata);
+
+       remove_proc_entry(FILENAME_IO, ch->procdir);
+       remove_proc_entry(FILENAME_IRQ, ch->procdir);
+//     remove_proc_entry(FILENAME_CLOCK, ch->procdir);
+
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+static struct comx_hardware locomx_hw = {
+       "locomx",
+       VERSION,
+       LOCOMX_init, 
+       LOCOMX_exit,
+       LOCOMX_dump,
+       NULL
+};
+       
+static int __init comx_hw_locomx_init(void)
+{
+       comx_register_hardware(&locomx_hw);
+       return 0;
+}
+
+static void __exit comx_hw_locomx_exit(void)
+{
+       comx_unregister_hardware("locomx");
+}
+
+module_init(comx_hw_locomx_init);
+module_exit(comx_hw_locomx_exit);
diff --git a/drivers/net/wan/comx-hw-mixcom.c b/drivers/net/wan/comx-hw-mixcom.c
new file mode 100644 (file)
index 0000000..c6fb9ac
--- /dev/null
@@ -0,0 +1,960 @@
+/* 
+ * Hardware driver for the MixCom synchronous serial board 
+ *
+ * Author: Gergely Madarasz <gorgo@itc.hu>
+ *
+ * based on skeleton driver code and a preliminary hscx driver by 
+ * Tivadar Szemethy <tiv@itc.hu>
+ *
+ * Copyright (C) 1998-1999 ITConsult-Pro Co. <info@itc.hu>
+ *
+ * Contributors:
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br> (0.65)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Version 0.60 (99/06/11):
+ *             - ported to the kernel, now works as builtin code
+ *
+ * Version 0.61 (99/06/11):
+ *             - recognize the one-channel MixCOM card (id byte = 0x13)
+ *             - printk fixes
+ * 
+ * Version 0.62 (99/07/15):
+ *             - fixes according to the new hw docs 
+ *             - report line status when open
+ *
+ * Version 0.63 (99/09/21):
+ *             - line status report fixes
+ *
+ * Version 0.64 (99/12/01):
+ *             - some more cosmetical fixes
+ *
+ * Version 0.65 (00/08/15)
+ *             - resource release on failure at MIXCOM_init
+ */
+
+#define VERSION "0.65"
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+
+#include <asm/types.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#include "comx.h"
+#include "mixcom.h"
+#include "hscx.h"
+
+MODULE_AUTHOR("Gergely Madarasz <gorgo@itc.hu>");
+MODULE_DESCRIPTION("Hardware-level driver for the serial port of the MixCom board");
+MODULE_LICENSE("GPL");
+
+#define MIXCOM_DATA(d) ((struct mixcom_privdata *)(COMX_CHANNEL(d)-> \
+       HW_privdata))
+
+#define MIXCOM_BOARD_BASE(d) (d->base_addr - MIXCOM_SERIAL_OFFSET - \
+       (1 - MIXCOM_DATA(d)->channel) * MIXCOM_CHANNEL_OFFSET)
+
+#define MIXCOM_DEV_BASE(port,channel) (port + MIXCOM_SERIAL_OFFSET + \
+       (1 - channel) * MIXCOM_CHANNEL_OFFSET)
+
+/* Values used to set the IRQ line */
+static unsigned char mixcom_set_irq[]={0xFF, 0xFF, 0xFF, 0x0, 0xFF, 0x2, 0x4, 0x6, 0xFF, 0xFF, 0x8, 0xA, 0xC, 0xFF, 0xE, 0xFF};
+
+static unsigned char* hscx_versions[]={"A1", NULL, "A2", NULL, "A3", "2.1"};
+
+struct mixcom_privdata {
+       u16     clock;
+       char    channel;
+       long    txbusy;
+       struct sk_buff *sending;
+       unsigned tx_ptr;
+       struct sk_buff *recving;
+       unsigned rx_ptr;
+       unsigned char status;
+       char    card_has_status;
+};
+
+static inline void wr_hscx(struct net_device *dev, int reg, unsigned char val) 
+{
+       outb(val, dev->base_addr + reg);
+}
+
+static inline unsigned char rd_hscx(struct net_device *dev, int reg)
+{
+       return inb(dev->base_addr + reg);
+}
+
+static inline void hscx_cmd(struct net_device *dev, int cmd)
+{
+       unsigned long jiffs = jiffies;
+       unsigned char cec;
+       unsigned delay = 0;
+
+       while ((cec = (rd_hscx(dev, HSCX_STAR) & HSCX_CEC) != 0) && 
+           time_before(jiffies, jiffs + HZ)) {
+               udelay(1);
+               if (++delay > (100000 / HZ)) break;
+       }
+       if (cec) {
+               printk(KERN_WARNING "%s: CEC stuck, probably no clock!\n",dev->name);
+       } else {
+               wr_hscx(dev, HSCX_CMDR, cmd);
+       }
+}
+
+static inline void hscx_fill_fifo(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct mixcom_privdata *hw = ch->HW_privdata;
+       register word to_send = hw->sending->len - hw->tx_ptr;
+
+
+       outsb(dev->base_addr + HSCX_FIFO,
+               &(hw->sending->data[hw->tx_ptr]), min_t(unsigned int, to_send, 32));
+       if (to_send <= 32) {
+               hscx_cmd(dev, HSCX_XTF | HSCX_XME);
+               kfree_skb(hw->sending);
+               hw->sending = NULL; 
+               hw->tx_ptr = 0;
+        } else {
+               hscx_cmd(dev, HSCX_XTF);
+               hw->tx_ptr += 32;
+        }
+}
+
+static inline void hscx_empty_fifo(struct net_device *dev, int cnt)
+{
+       struct comx_channel *ch = dev->priv;
+       struct mixcom_privdata *hw = ch->HW_privdata;
+
+       if (hw->recving == NULL) {
+               if (!(hw->recving = dev_alloc_skb(HSCX_MTU + 16))) {
+                       ch->stats.rx_dropped++;
+                       hscx_cmd(dev, HSCX_RHR);
+                } else {
+                       skb_reserve(hw->recving, 16);
+                       skb_put(hw->recving, HSCX_MTU);
+                }
+               hw->rx_ptr = 0;
+        }
+       if (cnt > 32 || !cnt || hw->recving == NULL) {
+               printk(KERN_ERR "hscx_empty_fifo: cnt is %d, hw->recving %p\n",
+                       cnt, (void *)hw->recving);
+               return;
+        }
+        
+       insb(dev->base_addr + HSCX_FIFO, &(hw->recving->data[hw->rx_ptr]),cnt);
+       hw->rx_ptr += cnt;
+       hscx_cmd(dev, HSCX_RMC);
+}
+
+
+static int MIXCOM_txe(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct mixcom_privdata *hw = ch->HW_privdata;
+
+       return !test_bit(0, &hw->txbusy);
+}
+
+static int mixcom_probe(struct net_device *dev)
+{
+       unsigned long flags;
+       int id, vstr, ret=0;
+
+       save_flags(flags); cli();
+
+       id=inb_p(MIXCOM_BOARD_BASE(dev) + MIXCOM_ID_OFFSET) & 0x7f;
+
+       if (id != MIXCOM_ID ) {
+               ret=-ENODEV;
+               printk(KERN_WARNING "%s: no MixCOM board found at 0x%04lx\n",dev->name, dev->base_addr);
+               goto out;
+       }
+
+       vstr=inb_p(dev->base_addr + HSCX_VSTR) & 0x0f;
+       if(vstr>=sizeof(hscx_versions)/sizeof(char*) || 
+           hscx_versions[vstr]==NULL) {
+               printk(KERN_WARNING "%s: board found but no HSCX chip detected at 0x%4lx (vstr = 0x%1x)\n",dev->name,dev->base_addr,vstr);
+               ret = -ENODEV;
+       } else {
+               printk(KERN_INFO "%s: HSCX chip version %s\n",dev->name,hscx_versions[vstr]);
+               ret = 0;
+       }
+
+out:
+
+       restore_flags(flags);
+       return ret;
+}
+
+#if 0
+static void MIXCOM_set_clock(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct mixcom_privdata *hw = ch->HW_privdata;
+
+       if (hw->clock) {
+               ;
+       } else {
+               ;
+       }
+}
+#endif
+
+static void mixcom_board_on(struct net_device *dev)
+{
+       outb_p(MIXCOM_OFF , MIXCOM_BOARD_BASE(dev) + MIXCOM_IT_OFFSET);
+       udelay(1000);
+       outb_p(mixcom_set_irq[dev->irq] | MIXCOM_ON, 
+               MIXCOM_BOARD_BASE(dev) + MIXCOM_IT_OFFSET);
+       udelay(1000);
+}
+
+static void mixcom_board_off(struct net_device *dev)
+{
+       outb_p(MIXCOM_OFF , MIXCOM_BOARD_BASE(dev) + MIXCOM_IT_OFFSET);
+       udelay(1000);
+}
+
+static void mixcom_off(struct net_device *dev)
+{
+       wr_hscx(dev, HSCX_CCR1, 0x0);
+}
+
+static void mixcom_on(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+
+       wr_hscx(dev, HSCX_CCR1, HSCX_PU | HSCX_ODS | HSCX_ITF); // power up, push-pull
+       wr_hscx(dev, HSCX_CCR2, HSCX_CIE /* | HSCX_RIE */ );
+       wr_hscx(dev, HSCX_MODE, HSCX_TRANS | HSCX_ADM8 | HSCX_RAC | HSCX_RTS );
+       wr_hscx(dev, HSCX_RLCR, HSCX_RC | 47); // 1504 bytes
+       wr_hscx(dev, HSCX_MASK, HSCX_RSC | HSCX_TIN );
+       hscx_cmd(dev, HSCX_XRES | HSCX_RHR);
+
+       if (ch->HW_set_clock) ch->HW_set_clock(dev);
+
+}
+
+static int MIXCOM_send_packet(struct net_device *dev, struct sk_buff *skb) 
+{
+       struct comx_channel *ch = dev->priv;
+       struct mixcom_privdata *hw = ch->HW_privdata;
+       unsigned long flags;
+
+       if (ch->debug_flags & DEBUG_HW_TX) {
+               comx_debug_bytes(dev, skb->data, skb->len, "MIXCOM_send_packet");
+       }
+
+       if (!(ch->line_status & LINE_UP)) {
+               return FRAME_DROPPED;
+       }
+
+       if (skb->len > HSCX_MTU) {
+               ch->stats.tx_errors++;  
+               return FRAME_ERROR;
+       }
+
+       save_flags(flags); cli();
+
+       if (test_and_set_bit(0, &hw->txbusy)) {
+               printk(KERN_ERR "%s: transmitter called while busy... dropping frame (length %d)\n", dev->name, skb->len);
+               restore_flags(flags);
+               return FRAME_DROPPED;
+       }
+
+
+       hw->sending = skb;
+       hw->tx_ptr = 0;
+       hw->txbusy = 1;
+//     atomic_inc(&skb->users);        // save it
+       hscx_fill_fifo(dev);
+       restore_flags(flags);
+
+       ch->stats.tx_packets++;
+       ch->stats.tx_bytes += skb->len; 
+
+       if (ch->debug_flags & DEBUG_HW_TX) {
+               comx_debug(dev, "MIXCOM_send_packet was successful\n\n");
+       }
+
+       return FRAME_ACCEPTED;
+}
+
+static inline void mixcom_receive_frame(struct net_device *dev) 
+{
+       struct comx_channel *ch=dev->priv;
+       struct mixcom_privdata *hw=ch->HW_privdata;
+       register byte rsta;
+       register word length;
+
+       rsta = rd_hscx(dev, HSCX_RSTA) & (HSCX_VFR | HSCX_RDO | 
+               HSCX_CRC | HSCX_RAB);
+       length = ((rd_hscx(dev, HSCX_RBCH) & 0x0f) << 8) | 
+               rd_hscx(dev, HSCX_RBCL);
+
+       if ( length > hw->rx_ptr ) {
+               hscx_empty_fifo(dev, length - hw->rx_ptr);
+       }
+       
+       if (!(rsta & HSCX_VFR)) {
+               ch->stats.rx_length_errors++;
+       }
+       if (rsta & HSCX_RDO) {
+               ch->stats.rx_over_errors++;
+       }
+       if (!(rsta & HSCX_CRC)) {
+               ch->stats.rx_crc_errors++;
+       }
+       if (rsta & HSCX_RAB) {
+               ch->stats.rx_frame_errors++;
+       }
+       ch->stats.rx_packets++; 
+       ch->stats.rx_bytes += length;
+
+       if (rsta == (HSCX_VFR | HSCX_CRC) && hw->recving) {
+               skb_trim(hw->recving, hw->rx_ptr - 1);
+               if (ch->debug_flags & DEBUG_HW_RX) {
+                       comx_debug_skb(dev, hw->recving,
+                               "MIXCOM_interrupt receiving");
+               }
+               hw->recving->dev = dev;
+               if (ch->LINE_rx) {
+                       ch->LINE_rx(dev, hw->recving);
+               }
+       }
+       else if(hw->recving) {
+               kfree_skb(hw->recving);
+       }
+       hw->recving = NULL; 
+       hw->rx_ptr = 0;
+}
+
+
+static inline void mixcom_extended_interrupt(struct net_device *dev) 
+{
+       struct comx_channel *ch=dev->priv;
+       struct mixcom_privdata *hw=ch->HW_privdata;
+       register byte exir;
+
+       exir = rd_hscx(dev, HSCX_EXIR) & (HSCX_XDU | HSCX_RFO | HSCX_CSC );
+
+       if (exir & HSCX_RFO) {
+               ch->stats.rx_over_errors++;
+               if (hw->rx_ptr) {
+                       kfree_skb(hw->recving);
+                       hw->recving = NULL; hw->rx_ptr = 0;
+               }
+               printk(KERN_ERR "MIXCOM: rx overrun\n");
+               hscx_cmd(dev, HSCX_RHR);
+       }
+
+       if (exir & HSCX_XDU) { // xmit underrun
+               ch->stats.tx_errors++;
+               ch->stats.tx_aborted_errors++;
+               if (hw->tx_ptr) {
+                       kfree_skb(hw->sending);
+                       hw->sending = NULL; 
+                       hw->tx_ptr = 0;
+               }
+               hscx_cmd(dev, HSCX_XRES);
+               clear_bit(0, &hw->txbusy);
+               if (ch->LINE_tx) {
+                       ch->LINE_tx(dev);
+               }
+               printk(KERN_ERR "MIXCOM: tx underrun\n");
+       }
+
+       if (exir & HSCX_CSC) {        
+               ch->stats.tx_carrier_errors++;
+               if ((rd_hscx(dev, HSCX_STAR) & HSCX_CTS) == 0) { // Vonal le
+                       if (test_and_clear_bit(0, &ch->lineup_pending)) {
+                                       del_timer(&ch->lineup_timer);
+                       } else if (ch->line_status & LINE_UP) {
+                               ch->line_status &= ~LINE_UP;
+                               if (ch->LINE_status) {
+                                       ch->LINE_status(dev,ch->line_status);
+                               }
+                       }
+               }
+               if (!(ch->line_status & LINE_UP) && (rd_hscx(dev, HSCX_STAR) & 
+                   HSCX_CTS)) { // Vonal fol
+                       if (!test_and_set_bit(0,&ch->lineup_pending)) {
+                               ch->lineup_timer.function = comx_lineup_func;
+                               ch->lineup_timer.data = (unsigned long)dev;
+                               ch->lineup_timer.expires = jiffies + HZ * 
+                                       ch->lineup_delay;
+                               add_timer(&ch->lineup_timer);
+                               hscx_cmd(dev, HSCX_XRES);
+                               clear_bit(0, &hw->txbusy);
+                               if (hw->sending) {
+                                       kfree_skb(hw->sending);
+                               }
+                               hw->sending=NULL;
+                               hw->tx_ptr = 0;
+                       }
+               }
+       }
+}
+
+
+static irqreturn_t MIXCOM_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+       unsigned long flags;
+       struct net_device *dev = (struct net_device *)dev_id;
+       struct comx_channel *ch, *twin_ch;
+       struct mixcom_privdata *hw, *twin_hw;
+       register unsigned char ista;
+
+       if (dev==NULL) {
+               printk(KERN_ERR "comx_interrupt: irq %d for unknown device\n",irq);
+               return IRQ_NONE;
+       }
+
+       ch = dev->priv; 
+       hw = ch->HW_privdata;
+
+       save_flags(flags); cli(); 
+
+       while((ista = (rd_hscx(dev, HSCX_ISTA) & (HSCX_RME | HSCX_RPF | 
+           HSCX_XPR | HSCX_EXB | HSCX_EXA | HSCX_ICA)))) {
+               register byte ista2 = 0;
+
+               if (ista & HSCX_RME) {
+                       mixcom_receive_frame(dev);
+               }
+               if (ista & HSCX_RPF) {
+                       hscx_empty_fifo(dev, 32);
+               }
+               if (ista & HSCX_XPR) {
+                       if (hw->tx_ptr) {
+                               hscx_fill_fifo(dev);
+                       } else {
+                               clear_bit(0, &hw->txbusy);
+                                       ch->LINE_tx(dev);
+                       }
+               }
+               
+               if (ista & HSCX_EXB) {
+                       mixcom_extended_interrupt(dev);
+               }
+               
+               if ((ista & HSCX_EXA) && ch->twin)  {
+                       mixcom_extended_interrupt(ch->twin);
+               }
+       
+               if ((ista & HSCX_ICA) && ch->twin &&
+                   (ista2 = rd_hscx(ch->twin, HSCX_ISTA) &
+                   (HSCX_RME | HSCX_RPF | HSCX_XPR ))) {
+                       if (ista2 & HSCX_RME) {
+                               mixcom_receive_frame(ch->twin);
+                       }
+                       if (ista2 & HSCX_RPF) {
+                               hscx_empty_fifo(ch->twin, 32);
+                       }
+                       if (ista2 & HSCX_XPR) {
+                               twin_ch=ch->twin->priv;
+                               twin_hw=twin_ch->HW_privdata;
+                               if (twin_hw->tx_ptr) {
+                                       hscx_fill_fifo(ch->twin);
+                               } else {
+                                       clear_bit(0, &twin_hw->txbusy);
+                                       ch->LINE_tx(ch->twin);
+                               }
+                       }
+               }
+       }
+
+       restore_flags(flags);
+       return IRQ_HANDLED;
+}
+
+static int MIXCOM_open(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct mixcom_privdata *hw = ch->HW_privdata;
+       struct proc_dir_entry *procfile = ch->procdir->subdir;
+       unsigned long flags; 
+       int ret = -ENODEV;
+
+       if (!dev->base_addr || !dev->irq)
+               goto err_ret;
+
+
+       if(hw->channel==1) {
+               if(!TWIN(dev) || !(COMX_CHANNEL(TWIN(dev))->init_status & 
+                   IRQ_ALLOCATED)) {
+                       printk(KERN_ERR "%s: channel 0 not yet initialized\n",dev->name);
+                       ret = -EAGAIN;
+                       goto err_ret;
+               }
+       }
+
+
+       /* Is our hw present at all ? Not checking for channel 0 if it is already 
+          open */
+       if(hw->channel!=0 || !(ch->init_status & IRQ_ALLOCATED)) {
+               if (!request_region(dev->base_addr, MIXCOM_IO_EXTENT, dev->name)) {
+                       ret = -EAGAIN;
+                       goto err_ret;
+               }
+               if (mixcom_probe(dev)) {
+                       ret = -ENODEV;
+                       goto err_release_region;
+               }
+       }
+
+       if(hw->channel==0 && !(ch->init_status & IRQ_ALLOCATED)) {
+               if (request_irq(dev->irq, MIXCOM_interrupt, 0, 
+                   dev->name, (void *)dev)) {
+                       printk(KERN_ERR "MIXCOM: unable to obtain irq %d\n", dev->irq);
+                       ret = -EAGAIN;
+                       goto err_release_region;
+               }
+       }
+
+       save_flags(flags); cli();
+
+       if(hw->channel==0 && !(ch->init_status & IRQ_ALLOCATED)) {
+               ch->init_status|=IRQ_ALLOCATED;
+               mixcom_board_on(dev);
+       }
+
+       mixcom_on(dev);
+
+
+       hw->status=inb(MIXCOM_BOARD_BASE(dev) + MIXCOM_STATUS_OFFSET);
+       if(hw->status != 0xff) {
+               printk(KERN_DEBUG "%s: board has status register, good\n", dev->name);
+               hw->card_has_status=1;
+       }
+
+       hw->txbusy = 0;
+       ch->init_status |= HW_OPEN;
+       
+       if (rd_hscx(dev, HSCX_STAR) & HSCX_CTS) {
+               ch->line_status |= LINE_UP;
+       } else {
+               ch->line_status &= ~LINE_UP;
+       }
+
+       restore_flags(flags);
+
+       ch->LINE_status(dev, ch->line_status);
+
+       for (; procfile ; procfile = procfile->next) {
+               if (strcmp(procfile->name, FILENAME_IO) == 0 ||
+                   strcmp(procfile->name, FILENAME_CHANNEL) == 0 ||
+                   strcmp(procfile->name, FILENAME_CLOCK) == 0 ||
+                   strcmp(procfile->name, FILENAME_IRQ) == 0) {
+                       procfile->mode = S_IFREG |  0444;
+               }
+       }
+
+       return 0;
+       
+err_release_region:
+       release_region(dev->base_addr, MIXCOM_IO_EXTENT);
+err_ret:
+       return ret;
+}
+
+static int MIXCOM_close(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct mixcom_privdata *hw = ch->HW_privdata;
+       struct proc_dir_entry *procfile = ch->procdir->subdir;
+       unsigned long flags;
+
+
+       save_flags(flags); cli();
+
+       mixcom_off(dev);
+
+       /* This is channel 0, twin is not open, we can safely turn off everything */
+       if(hw->channel==0 && (!(TWIN(dev)) || 
+           !(COMX_CHANNEL(TWIN(dev))->init_status & HW_OPEN))) {
+               mixcom_board_off(dev);
+               free_irq(dev->irq, dev);
+               release_region(dev->base_addr, MIXCOM_IO_EXTENT);
+               ch->init_status &= ~IRQ_ALLOCATED;
+       }
+
+       /* This is channel 1, channel 0 has already been shutdown, we can release
+          this one too */
+       if(hw->channel==1 && !(COMX_CHANNEL(TWIN(dev))->init_status & HW_OPEN)) {
+               if(COMX_CHANNEL(TWIN(dev))->init_status & IRQ_ALLOCATED) {
+                       mixcom_board_off(TWIN(dev));
+                       free_irq(TWIN(dev)->irq, TWIN(dev));
+                       release_region(TWIN(dev)->base_addr, MIXCOM_IO_EXTENT);
+                       COMX_CHANNEL(TWIN(dev))->init_status &= ~IRQ_ALLOCATED;
+               }
+       }
+
+       /* the ioports for channel 1 can be safely released */
+       if(hw->channel==1) {
+               release_region(dev->base_addr, MIXCOM_IO_EXTENT);
+       }
+
+       restore_flags(flags);
+
+       /* If we don't hold any hardware open */
+       if(!(ch->init_status & IRQ_ALLOCATED)) {
+               for (; procfile ; procfile = procfile->next) {
+                       if (strcmp(procfile->name, FILENAME_IO) == 0 ||
+                           strcmp(procfile->name, FILENAME_CHANNEL) == 0 ||
+                           strcmp(procfile->name, FILENAME_CLOCK) == 0 ||
+                           strcmp(procfile->name, FILENAME_IRQ) == 0) {
+                               procfile->mode = S_IFREG |  0644;
+                       }
+               }
+       }
+
+       /* channel 0 was only waiting for us to close channel 1 
+          close it completely */
+   
+       if(hw->channel==1 && !(COMX_CHANNEL(TWIN(dev))->init_status & HW_OPEN)) {
+               for (procfile=COMX_CHANNEL(TWIN(dev))->procdir->subdir; 
+                   procfile ; procfile = procfile->next) {
+                       if (strcmp(procfile->name, FILENAME_IO) == 0 ||
+                           strcmp(procfile->name, FILENAME_CHANNEL) == 0 ||
+                           strcmp(procfile->name, FILENAME_CLOCK) == 0 ||
+                           strcmp(procfile->name, FILENAME_IRQ) == 0) {
+                               procfile->mode = S_IFREG |  0644;
+                       }
+               }
+       }
+       
+       ch->init_status &= ~HW_OPEN;
+       return 0;
+}
+
+static int MIXCOM_statistics(struct net_device *dev,char *page)
+{
+       struct comx_channel *ch = dev->priv;
+       // struct mixcom_privdata *hw = ch->HW_privdata;
+       int len = 0;
+
+       if(ch->init_status && IRQ_ALLOCATED) {
+               len += sprintf(page + len, "Mixcom board: hardware open\n");
+       }
+
+       return len;
+}
+
+static int MIXCOM_dump(struct net_device *dev) {
+       return 0;
+}
+
+static int mixcom_read_proc(char *page, char **start, off_t off, int count,
+       int *eof, void *data)
+{
+       struct proc_dir_entry *file = (struct proc_dir_entry *)data;
+       struct net_device *dev = file->parent->data;
+       struct comx_channel *ch = dev->priv;
+       struct mixcom_privdata *hw = ch->HW_privdata;
+       int len = 0;
+
+       if (strcmp(file->name, FILENAME_IO) == 0) {
+               len = sprintf(page, "0x%x\n", 
+                       (unsigned int)MIXCOM_BOARD_BASE(dev));
+       } else if (strcmp(file->name, FILENAME_IRQ) == 0) {
+               len = sprintf(page, "%d\n", (unsigned int)dev->irq);
+       } else if (strcmp(file->name, FILENAME_CLOCK) == 0) {
+               if (hw->clock) len = sprintf(page, "%d\n", hw->clock);
+                       else len = sprintf(page, "external\n");
+       } else if (strcmp(file->name, FILENAME_CHANNEL) == 0) {
+               len = sprintf(page, "%01d\n", hw->channel);
+       } else if (strcmp(file->name, FILENAME_TWIN) == 0) {
+               if (ch->twin) {
+                       len = sprintf(page, "%s\n",ch->twin->name);
+               } else {
+                       len = sprintf(page, "none\n");
+               }
+       } else {
+               printk(KERN_ERR "mixcom_read_proc: internal error, filename %s\n", file->name);
+               return -EBADF;
+       }
+
+       if (off >= len) {
+               *eof = 1;
+               return 0;
+       }
+       *start = page + off;
+       if (count >= len - off) *eof = 1;
+       return min_t(int, count, len - off);
+}
+
+
+static struct net_device *mixcom_twin_check(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct proc_dir_entry *procfile = ch->procdir->parent->subdir;
+       struct mixcom_privdata *hw = ch->HW_privdata;
+
+       struct net_device *twin;
+       struct comx_channel *ch_twin;
+       struct mixcom_privdata *hw_twin;
+
+
+       for ( ; procfile ; procfile = procfile->next) {
+               if(!S_ISDIR(procfile->mode)) continue;
+                
+               twin = procfile->data;
+               ch_twin = twin->priv;
+               hw_twin = ch_twin->HW_privdata;
+
+
+               if (twin != dev && dev->irq && dev->base_addr && 
+                   dev->irq == twin->irq && 
+                   ch->hardware == ch_twin->hardware &&
+                   dev->base_addr == twin->base_addr + 
+                   (1-2*hw->channel)*MIXCOM_CHANNEL_OFFSET &&
+                   hw->channel == (1 - hw_twin->channel)) {
+                       if  (!TWIN(twin) || TWIN(twin)==dev) {
+                               return twin;
+                       }
+               }
+        }
+       return NULL;
+}
+
+
+static void setup_twin(struct net_device* dev) 
+{
+
+       if(TWIN(dev) && TWIN(TWIN(dev))) {
+               TWIN(TWIN(dev))=NULL;
+       }
+       if ((TWIN(dev) = mixcom_twin_check(dev)) != NULL) {
+               if (TWIN(TWIN(dev)) && TWIN(TWIN(dev)) != dev) {
+                       TWIN(dev)=NULL;
+               } else {
+                       TWIN(TWIN(dev))=dev;
+               }
+       }       
+}
+
+static int mixcom_write_proc(struct file *file, const char *buffer,
+       u_long count, void *data)
+{
+       struct proc_dir_entry *entry = (struct proc_dir_entry *)data;
+       struct net_device *dev = (struct net_device *)entry->parent->data;
+       struct comx_channel *ch = dev->priv;
+       struct mixcom_privdata *hw = ch->HW_privdata;
+       char *page;
+       int value;
+
+       if (!(page = (char *)__get_free_page(GFP_KERNEL))) {
+               return -ENOMEM;
+       }
+
+       if (copy_from_user(page, buffer, count = min_t(unsigned long, count, PAGE_SIZE))) {
+               free_page((unsigned long)page);
+               return -EFAULT;
+       }
+       if (*(page + count - 1) == '\n') {
+               *(page + count - 1) = 0;
+       }
+
+       if (strcmp(entry->name, FILENAME_IO) == 0) {
+               value = simple_strtoul(page, NULL, 0);
+               if (value != 0x180 && value != 0x280 && value != 0x380) {
+                       printk(KERN_ERR "MIXCOM: incorrect io address!\n");
+               } else {
+                       dev->base_addr = MIXCOM_DEV_BASE(value,hw->channel);
+               }
+       } else if (strcmp(entry->name, FILENAME_IRQ) == 0) {
+               value = simple_strtoul(page, NULL, 0); 
+               if (value < 0 || value > 15 || mixcom_set_irq[value]==0xFF) {
+                       printk(KERN_ERR "MIXCOM: incorrect irq value!\n");
+               } else {
+                       dev->irq = value;       
+               }
+       } else if (strcmp(entry->name, FILENAME_CLOCK) == 0) {
+               if (strncmp("ext", page, 3) == 0) {
+                       hw->clock = 0;
+               } else {
+                       int kbps;
+
+                       kbps = simple_strtoul(page, NULL, 0);
+                       if (!kbps) {
+                               hw->clock = 0;
+                       } else {
+                               hw->clock = kbps;
+                       }
+                       if (hw->clock < 32 || hw->clock > 2000) {
+                               hw->clock = 0;
+                               printk(KERN_ERR "MIXCOM: invalid clock rate!\n");
+                       }
+               }
+               if (ch->init_status & HW_OPEN && ch->HW_set_clock) {
+                       ch->HW_set_clock(dev);
+               }
+       } else if (strcmp(entry->name, FILENAME_CHANNEL) == 0) {
+               value = simple_strtoul(page, NULL, 0);
+               if (value > 2) {
+                       printk(KERN_ERR "Invalid channel number\n");
+               } else {
+                       dev->base_addr+=(hw->channel - value) * MIXCOM_CHANNEL_OFFSET;
+                       hw->channel = value;
+               }               
+       } else {
+               printk(KERN_ERR "hw_read_proc: internal error, filename %s\n", 
+                       entry->name);
+               return -EBADF;
+       }
+
+       setup_twin(dev);
+
+       free_page((unsigned long)page);
+       return count;
+}
+
+static int MIXCOM_init(struct net_device *dev) {
+       struct comx_channel *ch = dev->priv;
+       struct mixcom_privdata *hw;
+       struct proc_dir_entry *new_file;
+
+       if ((ch->HW_privdata = kmalloc(sizeof(struct mixcom_privdata), 
+           GFP_KERNEL)) == NULL) {
+               return -ENOMEM;
+       }
+
+       memset(hw = ch->HW_privdata, 0, sizeof(struct mixcom_privdata));
+
+       if ((new_file = create_proc_entry(FILENAME_IO, S_IFREG | 0644, 
+           ch->procdir)) == NULL) {
+               goto cleanup_HW_privdata;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &mixcom_read_proc;
+       new_file->write_proc = &mixcom_write_proc;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_IRQ, S_IFREG | 0644, 
+           ch->procdir)) == NULL) {
+               goto cleanup_filename_io;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &mixcom_read_proc;
+       new_file->write_proc = &mixcom_write_proc;
+       new_file->nlink = 1;
+
+#if 0
+       if ((new_file = create_proc_entry(FILENAME_CLOCK, S_IFREG | 0644, 
+           ch->procdir)) == NULL) {
+               return -EIO;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &mixcom_read_proc;
+       new_file->write_proc = &mixcom_write_proc;
+       new_file->nlink = 1;
+#endif
+
+       if ((new_file = create_proc_entry(FILENAME_CHANNEL, S_IFREG | 0644, 
+           ch->procdir)) == NULL) {
+               goto cleanup_filename_irq;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &mixcom_read_proc;
+       new_file->write_proc = &mixcom_write_proc;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_TWIN, S_IFREG | 0444, 
+           ch->procdir)) == NULL) {
+               goto cleanup_filename_channel;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &mixcom_read_proc;
+       new_file->write_proc = &mixcom_write_proc;
+       new_file->nlink = 1;
+
+       setup_twin(dev);
+
+       /* Fill in ch_struct hw specific pointers */
+       ch->HW_access_board = NULL;
+       ch->HW_release_board = NULL;
+       ch->HW_txe = MIXCOM_txe;
+       ch->HW_open = MIXCOM_open;
+       ch->HW_close = MIXCOM_close;
+       ch->HW_send_packet = MIXCOM_send_packet;
+       ch->HW_statistics = MIXCOM_statistics;
+       ch->HW_set_clock = NULL;
+
+       dev->base_addr = MIXCOM_DEV_BASE(MIXCOM_DEFAULT_IO,0);
+       dev->irq = MIXCOM_DEFAULT_IRQ;
+
+       MOD_INC_USE_COUNT;
+       return 0;
+cleanup_filename_channel:
+       remove_proc_entry(FILENAME_CHANNEL, ch->procdir);
+cleanup_filename_irq:
+       remove_proc_entry(FILENAME_IRQ, ch->procdir);
+cleanup_filename_io:
+       remove_proc_entry(FILENAME_IO, ch->procdir);
+cleanup_HW_privdata:
+       kfree(ch->HW_privdata);
+       return -EIO;
+}
+
+static int MIXCOM_exit(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct mixcom_privdata *hw = ch->HW_privdata;
+
+       if(hw->channel==0 && TWIN(dev)) {
+               return -EBUSY;
+       }
+
+       if(hw->channel==1 && TWIN(dev)) {
+               TWIN(TWIN(dev))=NULL;
+       }
+
+       kfree(ch->HW_privdata);
+       remove_proc_entry(FILENAME_IO, ch->procdir);
+       remove_proc_entry(FILENAME_IRQ, ch->procdir);
+#if 0
+       remove_proc_entry(FILENAME_CLOCK, ch->procdir);
+#endif
+       remove_proc_entry(FILENAME_CHANNEL, ch->procdir);
+       remove_proc_entry(FILENAME_TWIN, ch->procdir);
+
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+static struct comx_hardware mixcomhw = {
+       "mixcom",
+       VERSION,
+       MIXCOM_init, 
+       MIXCOM_exit,
+       MIXCOM_dump,
+       NULL
+};
+       
+static int __init comx_hw_mixcom_init(void)
+{
+       return comx_register_hardware(&mixcomhw);
+}
+
+static void __exit comx_hw_mixcom_exit(void)
+{
+       comx_unregister_hardware("mixcom");
+}
+
+module_init(comx_hw_mixcom_init);
+module_exit(comx_hw_mixcom_exit);
diff --git a/drivers/net/wan/comx-hw-munich.c b/drivers/net/wan/comx-hw-munich.c
new file mode 100644 (file)
index 0000000..195bc2d
--- /dev/null
@@ -0,0 +1,2854 @@
+/*
+ * Hardware-level driver for the SliceCOM board for Linux kernels 2.4.X
+ *
+ * Current maintainer / latest changes: Pasztor Szilard <don@itc.hu>
+ *
+ * Original author: Bartok Istvan <bartoki@itc.hu>
+ * Based on skeleton by Tivadar Szemethy <tiv@itc.hu>
+ *
+ * 0.51:
+ *      - port for 2.4.x
+ *     - clean up some code, make it more portable
+ *     - busted direct hardware access through mapped memory
+ *     - fix a possible race
+ *     - prevent procfs buffer overflow
+ *
+ * 0.50:
+ *     - support for the pcicom board, lots of rearrangements
+ *     - handle modem status lines
+ *
+ * 0.50a:
+ *     - fix for falc version 1.0
+ *
+ * 0.50b: T&t
+ *     - fix for bad localbus
+ */
+
+#define VERSION                "0.51"
+#define VERSIONSTR     "SliceCOM v" VERSION ", 2002/01/07\n"
+
+#include <linux/config.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#include <asm/delay.h>
+#include <asm/types.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#define COMX_NEW
+
+#ifndef COMX_NEW
+#include "../include/comx.h"
+#include "../include/munich32x.h"
+#include "../include/falc-lh.h"
+#else
+#include "comx.h"
+#include "munich32x.h"
+#include "falc-lh.h"
+#endif
+
+MODULE_AUTHOR("Bartok Istvan <bartoki@itc.hu>, Gergely Madarasz <gorgo@itc.hu>, Szilard Pasztor <don@itc.hu>");
+MODULE_DESCRIPTION("Hardware-level driver for the SliceCOM and PciCOM (WelCOM) adapters");
+MODULE_LICENSE("GPL");
+/*
+ *     TODO: az ilyenek a comxhw.h -ban szoktak lenni, idovel menjenek majd oda:
+ */
+
+#define FILENAME_BOARDNUM      "boardnum"      /* /proc/comx/comx0.1/boardnum          */
+#define FILENAME_TIMESLOTS     "timeslots"     /* /proc/comx/comx0.1/timeslots         */
+#define FILENAME_FRAMING       "framing"       /* /proc/comx/comx0.1/framing           */
+#define FILENAME_LINECODE      "linecode"      /* /proc/comx/comx0.1/linecode          */
+#define FILENAME_CLOCK_SOURCE  "clock_source"  /* /proc/comx/comx0.1/clock_source      */
+#define FILENAME_LOOPBACK      "loopback"      /* /proc/comx/comx0.1/loopback          */
+#define FILENAME_REG           "reg"           /* /proc/comx/comx0.1/reg               */
+#define FILENAME_LBIREG                "lbireg"        /* /proc/comx/comx0.1/lbireg            */
+
+#define SLICECOM_BOARDNUM_DEFAULT      0
+
+#define SLICECOM_FRAMING_CRC4          1
+#define SLICECOM_FRAMING_NO_CRC4       2
+#define SLICECOM_FRAMING_DEFAULT       SLICECOM_FRAMING_CRC4
+
+#define SLICECOM_LINECODE_HDB3         1
+#define SLICECOM_LINECODE_AMI          2
+#define SLICECOM_LINECODE_DEFAULT      SLICECOM_LINECODE_HDB3
+
+#define SLICECOM_CLOCK_SOURCE_LINE     1
+#define SLICECOM_CLOCK_SOURCE_INTERNAL 2
+#define SLICECOM_CLOCK_SOURCE_DEFAULT  SLICECOM_CLOCK_SOURCE_LINE
+
+#define SLICECOM_LOOPBACK_NONE         1
+#define SLICECOM_LOOPBACK_LOCAL                2
+#define SLICECOM_LOOPBACK_REMOTE       3
+#define SLICECOM_LOOPBACK_DEFAULT      SLICECOM_LOOPBACK_NONE
+
+#define MUNICH_VIRT(addr) (void *)(&bar1[addr])
+
+struct slicecom_stringtable
+{
+    char *name;
+    int value;
+};
+
+/* A convention: keep "default" the last not NULL when reading from /proc,
+   "error" is an indication that something went wrong, we have an undefined value */
+
+struct slicecom_stringtable slicecom_framings[] =
+{
+    {"crc4", SLICECOM_FRAMING_CRC4},
+    {"no-crc4", SLICECOM_FRAMING_NO_CRC4},
+    {"default", SLICECOM_FRAMING_DEFAULT},
+    {"error", 0}
+};
+
+struct slicecom_stringtable slicecom_linecodes[] =
+{
+    {"hdb3", SLICECOM_LINECODE_HDB3},
+    {"ami", SLICECOM_LINECODE_AMI},
+    {"default", SLICECOM_LINECODE_DEFAULT},
+    {"error", 0}
+};
+
+struct slicecom_stringtable slicecom_clock_sources[] =
+{
+    {"line", SLICECOM_CLOCK_SOURCE_LINE},
+    {"internal", SLICECOM_CLOCK_SOURCE_INTERNAL},
+    {"default", SLICECOM_CLOCK_SOURCE_DEFAULT},
+    {"error", 0}
+};
+
+struct slicecom_stringtable slicecom_loopbacks[] =
+{
+    {"none", SLICECOM_LOOPBACK_NONE},
+    {"local", SLICECOM_LOOPBACK_LOCAL},
+    {"remote", SLICECOM_LOOPBACK_REMOTE},
+    {"default", SLICECOM_LOOPBACK_DEFAULT},
+    {"error", 0}
+};
+
+/*
+ *     Some tunable values...
+ *
+ *     Note: when tuning values which change the length of text in
+ *     /proc/comx/comx[n]/status, keep in mind that it must be shorter then
+ *     PAGESIZE !
+ */
+
+#define MAX_BOARDS     4       /* ezzel 4 kartya lehet a gepben: 0..3          */
+#define RX_DESC_MAX    8       /* Rx ring size, must be >= 4                   */
+#define TX_DESC_MAX    4       /* Tx ring size, must be >= 2                   */
+                               /* a sokkal hosszabb Tx ring mar ronthatja a nem-FIFO packet    */
+                               /* schedulerek (fair queueing, stb.) hatekonysagat.             */
+#define MAX_WORK       10      /* TOD: update the info max. ennyi-1 esemenyt dolgoz fel egy interrupt hivasnal */
+
+/*
+ *     These are tunable too, but don't touch them without fully understanding what is happening
+ */
+
+#define UDELAY         20      /* We wait UDELAY usecs with disabled interrupts before and     */
+                               /* after each command to avoid writing into each other's        */
+                               /* ccb->action_spec. A _send_packet nem var, mert azt az        */
+                               /* _interrupt()-bol is meghivhatja a LINE_tx()                  */
+
+/*
+ *     Just to avoid warnings about implicit declarations:
+ */
+
+static int MUNICH_close(struct net_device *dev);
+static struct comx_hardware slicecomhw;
+static struct comx_hardware pcicomhw;
+
+static unsigned long flags;
+static spinlock_t mister_lock = SPIN_LOCK_UNLOCKED;
+
+typedef volatile struct                /* Time Slot Assignment */
+{
+    u32 rxfillmask:8,          // ----------------------------+------+
+                               //                             |      |
+      rxchannel:5,             // ----------------------+---+ |      |
+      rti:1,                   // ---------------------+|   | |      |
+      res2:2,                  // -------------------++||   | |      |
+                               //                    ||||   | |      |
+      txfillmask:8,            // ----------+------+ ||||   | |      |
+                               //           |      | ||||   | |      |
+      txchannel:5,             // ----+---+ |      | ||||   | |      |
+      tti:1,                   // ---+|   | |      | ||||   | |      |
+      res1:2;                  // -++||   | |      | ||||   | |      |
+                               //   3          2          1
+                               //  10987654 32109876 54321098 76543210
+} timeslot_spec_t;
+
+typedef volatile struct                /* Receive Descriptor */
+{
+    u32 zero1:16, no:13, hi:1, hold:1, zero2:1;
+
+    u32 next;
+    u32 data;
+
+    u32 zero3:8, status:8, bno:13, zero4:1, c:1, fe:1;
+} rx_desc_t;
+
+typedef volatile struct                /* Transmit Descriptor */
+{
+    u32 fnum:11, csm:1, no13:1, zero1:2, v110:1, no:13, hi:1, hold:1, fe:1;
+
+    u32 next;
+    u32 data;
+
+} tx_desc_t;
+
+typedef volatile struct                /* Channel Specification */
+{
+    u32 iftf:1, mode:2, fa:1, trv:2, crc:1, inv:1, cs:1, tflag:7, ra:1, ro:1,
+       th:1, ta:1, to:1, ti:1, ri:1, nitbs:1, fit:1, fir:1, re:1, te:1, ch:1,
+       ifc:1, sfe:1, fe2:1;
+
+    u32 frda;
+    u32 ftda;
+
+    u32 itbs:6, zero1:26;
+
+} channel_spec_t;
+
+typedef volatile struct                /* Configuration Control Block */
+{
+    u32 action_spec;
+    u32 reserved1;
+    u32 reserved2;
+    timeslot_spec_t timeslot_spec[32];
+    channel_spec_t channel_spec[32];
+    u32 current_rx_desc[32];
+    u32 current_tx_desc[32];
+    u32 csa;                   /* Control Start Address. CSA = *CCBA; CCB = *CSA */
+                               /* MUNICH does it like: CCB = *( *CCBA )          */
+} munich_ccb_t;
+
+typedef volatile struct                /* Entry in the interrupt queue */
+{
+    u32 all;
+} munich_intq_t;
+
+#define MUNICH_INTQLEN 63      /* Rx/Tx Interrupt Queue Length
+                                  (not the real len, but the TIQL/RIQL value)  */
+#define MUNICH_INTQMAX ( 16*(MUNICH_INTQLEN+1) )       /* Rx/Tx/Periph Interrupt Queue size in munich_intq_t's */
+#define MUNICH_INTQSIZE        ( 4*MUNICH_INTQMAX )    /* Rx/Tx/Periph Interrupt Queue size in bytes           */
+
+#define MUNICH_PIQLEN  4       /* Peripheral Interrupt Queue Length. Unlike the RIQL/TIQL, */
+#define MUNICH_PIQMAX  ( 4*MUNICH_PIQLEN )     /* PIQL register needs it like this                     */
+#define MUNICH_PIQSIZE ( 4*MUNICH_PIQMAX )
+
+typedef volatile u32 vol_u32;  /* TOD: ezek megszunnek ha atirom readw()/writew()-re - kész */
+typedef volatile u8 vol_u8;
+
+typedef volatile struct                /* counters of E1-errors and errored seconds, see rfc2495 */
+{
+    /* use here only unsigned ints, we depend on it when calculating the sum for the last N intervals */
+
+    unsigned line_code_violations,     /* AMI: bipolar violations, HDB3: hdb3 violations                       */
+      path_code_violations,    /* FAS errors and CRC4 errors                                                   */
+      e_bit_errors,            /* E-Bit Errors (the remote side received from us with CRC4-error) */
+      slip_secs,               /* number of seconds with (receive) Controlled Slip(s)          */
+      fr_loss_secs,            /* number of seconds an Out Of Frame defect was detected                */
+      line_err_secs,           /* number of seconds with one or more Line Code Violations              */
+      degraded_mins,           /* Degraded Minute - the estimated error rate is >1E-6, but <1E-3       */
+      errored_secs,            /* Errored Second - at least one of these happened:
+                                  - Path Code Violation
+                                  - Out Of Frame defect
+                                  - Slip
+                                  - receiving AIS
+                                  - not incremented during an Unavailable Second                       */
+      bursty_err_secs,         /* Bursty Errored Second: (rfc2495 says it does not apply to E1)
+                                  - Path Code Violations >1, but <320
+                                  - not a Severely Errored Second
+                                  - no AIS
+                                  - not incremented during an Unavailabla Second                       */
+      severely_err_secs,       /* Severely Errored Second:
+                                  - CRC4: >=832 Path COde Violations || >0 Out Of Frame defects
+                                  - noCRC4: >=2048 Line Code Violations
+                                  - not incremented during an Unavailable Second                       */
+      unavail_secs;            /* number of Unavailable Seconds. Unavailable state is said after:
+                                  - 10 contiguous Severely Errored Seconds
+                                  - or RAI || AIS || LOF || LOS 
+                                  - (any) loopback has been set                                                */
+
+    /*
+     * we do not strictly comply to the rfc: we do not retroactively reduce errored_secs,
+     * bursty_err_secs, severely_err_secs when 'unavailable state' is reached
+     */
+
+} e1_stats_t;
+
+typedef volatile struct                /* ezek board-adatok, nem lehetnek a slicecom_privdata -ban     */
+{
+    int use_count;             /* num. of interfaces using the board                           */
+    int irq;                   /* a kartya irq-ja. belemasoljuk a dev->irq -kba is, de csak hogy       */
+    /* szebb legyen az ifconfig outputja                            */
+    /* ha != 0, az azt jelenti hogy az az irq most nekunk sikeresen */
+    /* le van foglalva                                              */
+    struct pci_dev *pci;       /* a kartya PCI strukturaja. NULL, ha nincs kartya              */
+    u32 *bar1;                 /* pci->base_address[0] ioremap()-ed by munich_probe(),         */
+    /* on x86 can be used both as a bus or virtual address.         */
+    /* These are the Munich's registers                             */
+    u8 *lbi;                   /* pci->base_address[1] ioremap()-ed by munich_probe(),         */
+    /* this is a 256-byte range, the start of the LBI on the board  */
+    munich_ccb_t *ccb;         /* virtual address of CCB                                       */
+    munich_intq_t *tiq;                /* Tx Interrupt Queue                                           */
+    munich_intq_t *riq;                /* Rx Interrupt Queue                                           */
+    munich_intq_t *piq;                /* Peripheral Interrupt Queue (FALC interrupts arrive here)     */
+    int tiq_ptr,               /* A 'current' helyek a tiq/riq/piq -ban.                       */
+      riq_ptr,                 /* amikor feldolgoztam az interruptokat, a legelso ures         */
+      piq_ptr;                 /* interrupt_information szora mutatnak.                        */
+    struct net_device *twins[32];      /* MUNICH channel -> network interface assignment       */
+
+    unsigned long lastcheck;   /* When were the Rx rings last checked. Time in jiffies         */
+
+    struct timer_list modemline_timer;
+    char isx21;
+    char lineup;
+    char framing;              /* a beallitasok tarolasa                               */
+    char linecode;
+    char clock_source;
+    char loopback;
+
+    char devname[30];          /* what to show in /proc/interrupts                     */
+    unsigned histogram[MAX_WORK];      /* number of processed events in the interrupt loop     */
+    unsigned stat_pri_races;   /* number of special events, we try to handle them      */
+    unsigned stat_pti_races;
+    unsigned stat_pri_races_missed;    /* when it can not be handled, because of MAX_WORK      */
+    unsigned stat_pti_races_missed;
+
+#define SLICECOM_BOARD_INTERVALS_SIZE  97
+    e1_stats_t intervals[SLICECOM_BOARD_INTERVALS_SIZE];       /* E1 line statistics           */
+    unsigned current_interval; /* pointer to the current interval                      */
+    unsigned elapsed_seconds;  /* elapsed seconds from the start of the current interval */
+    unsigned ses_seconds;      /* counter of contiguous Severely Errored Seconds       */
+    unsigned is_unavailable;   /* set to 1 after 10 contiguous Severely Errored Seconds */
+    unsigned no_ses_seconds;   /* contiguous Severely Error -free seconds in unavail state */
+
+    unsigned deg_elapsed_seconds;      /* for counting the 'Degraded Mins'                     */
+    unsigned deg_cumulated_errors;
+
+    struct module *owner;      /* pointer to our module to avoid module load races */
+} munich_board_t;
+
+struct slicecom_privdata
+{
+    int busy;                  /* transmitter busy - number of packets in the Tx ring  */
+    int channel;               /* Munich logical channel ('channel-group' in Cisco)    */
+    unsigned boardnum;
+    u32 timeslots;             /* i-th bit means i-th timeslot is our                  */
+
+    int tx_ring_hist[TX_DESC_MAX];     /* histogram: number of packets in Tx ring when _send_packet is called  */
+
+    tx_desc_t tx_desc[TX_DESC_MAX];    /* the ring of Tx descriptors                           */
+    u8 tx_data[TX_DESC_MAX][TXBUFFER_SIZE];    /* buffers for data to transmit                 */
+    int tx_desc_ptr;           /* hanyadik descriptornal tartunk a beirassal   */
+    /* ahol ez all, oda irtunk utoljara                     */
+
+    rx_desc_t rx_desc[RX_DESC_MAX];    /* the ring of Rx descriptors                           */
+    u8 rx_data[RX_DESC_MAX][RXBUFFER_SIZE];    /* buffers for received data                            */
+    int rx_desc_ptr;           /* hanyadik descriptornal tartunk az olvasassal */
+
+    int rafutott;
+};
+
+static u32 reg, reg_ertek;     /* why static: don't write stack trash into regs if strtoul() fails */
+static u32 lbireg;
+static u8 lbireg_ertek;                /* why static: don't write stack trash into regs if strtoul() fails */
+
+static munich_board_t slicecom_boards[MAX_BOARDS];
+static munich_board_t pcicom_boards[MAX_BOARDS];
+
+/*
+ * Reprogram Idle Channel Registers in the FALC - send special code in not used channels
+ * Should be called from the open and close, when the timeslot assignment changes
+ */
+
+void rework_idle_channels(struct net_device *dev)
+{
+    struct comx_channel *ch = netdev_priv(dev);
+    struct slicecom_privdata *hw = ch->HW_privdata;
+    munich_board_t *board = slicecom_boards + hw->boardnum;
+    munich_ccb_t *ccb = board->ccb;
+
+    u8 *lbi = board->lbi;
+    int i, j, tmp;
+
+
+    spin_lock_irqsave(&mister_lock, flags);
+
+    for (i = 0; i < 4; i++)
+    {
+       tmp = 0xFF;
+       for (j = 0; j < 8; j++)
+           if (ccb->timeslot_spec[8 * i + j].tti == 0) tmp ^= (0x80 >> j);
+       writeb(tmp, lbi + 0x30 + i);
+    }
+
+    spin_unlock_irqrestore(&mister_lock, flags);
+}
+
+/*
+ * Set PCM framing - /proc/comx/comx0/framing
+ */
+
+void slicecom_set_framing(int boardnum, int value)
+{
+    u8 *lbi = slicecom_boards[boardnum].lbi;
+
+    spin_lock_irqsave(&mister_lock, flags);
+
+    slicecom_boards[boardnum].framing = value;
+    switch (value)
+    {
+       case SLICECOM_FRAMING_CRC4:
+           writeb(readb(lbi + FMR1) | 8, lbi + FMR1);
+           writeb((readb(lbi + FMR2) & 0x3f) | 0x80, lbi + FMR2);
+           break;
+       case SLICECOM_FRAMING_NO_CRC4:
+           writeb(readb(lbi + FMR1) & 0xf7, lbi + FMR1);
+           writeb(readb(lbi + FMR2) & 0x3f, lbi + FMR2);
+           break;
+       default:
+           printk("slicecom: board %d: unhandled " FILENAME_FRAMING
+                  " value %d\n", boardnum, value);
+    }
+
+    spin_unlock_irqrestore(&mister_lock, flags);
+}
+
+/*
+ * Set PCM linecode - /proc/comx/comx0/linecode
+ */
+
+void slicecom_set_linecode(int boardnum, int value)
+{
+    u8 *lbi = slicecom_boards[boardnum].lbi;
+
+    spin_lock_irqsave(&mister_lock, flags);
+
+    slicecom_boards[boardnum].linecode = value;
+    switch (value)
+    {
+       case SLICECOM_LINECODE_HDB3:
+           writeb(readb(lbi + FMR0) | 0xf0, lbi + FMR0);
+           break;
+       case SLICECOM_LINECODE_AMI:
+           writeb((readb(lbi + FMR0) & 0x0f) | 0xa0, lbi + FMR0);
+           break;
+       default:
+           printk("slicecom: board %d: unhandled " FILENAME_LINECODE
+                  " value %d\n", boardnum, value);
+    }
+    spin_unlock_irqrestore(&mister_lock, flags);
+}
+
+/*
+ * Set PCM clock source - /proc/comx/comx0/clock_source
+ */
+
+void slicecom_set_clock_source(int boardnum, int value)
+{
+    u8 *lbi = slicecom_boards[boardnum].lbi;
+
+    spin_lock_irqsave(&mister_lock, flags);
+
+    slicecom_boards[boardnum].clock_source = value;
+    switch (value)
+    {
+       case SLICECOM_CLOCK_SOURCE_LINE:
+           writeb(readb(lbi + LIM0) & ~1, lbi + LIM0);
+           break;
+       case SLICECOM_CLOCK_SOURCE_INTERNAL:
+           writeb(readb(lbi + LIM0) | 1, lbi + LIM0);
+           break;
+       default:
+           printk("slicecom: board %d: unhandled " FILENAME_CLOCK_SOURCE
+                  " value %d\n", boardnum, value);
+    }
+    spin_unlock_irqrestore(&mister_lock, flags);
+}
+
+/*
+ * Set loopbacks - /proc/comx/comx0/loopback
+ */
+
+void slicecom_set_loopback(int boardnum, int value)
+{
+    u8 *lbi = slicecom_boards[boardnum].lbi;
+
+    spin_lock_irqsave(&mister_lock, flags);
+
+    slicecom_boards[boardnum].loopback = value;
+    switch (value)
+    {
+       case SLICECOM_LOOPBACK_NONE:
+           writeb(readb(lbi + LIM0) & ~2, lbi + LIM0); /* Local Loop OFF  */
+           writeb(readb(lbi + LIM1) & ~2, lbi + LIM1); /* Remote Loop OFF */
+           break;
+       case SLICECOM_LOOPBACK_LOCAL:
+           writeb(readb(lbi + LIM1) & ~2, lbi + LIM1); /* Remote Loop OFF */
+           writeb(readb(lbi + LIM0) | 2, lbi + LIM0);  /* Local Loop ON   */
+           break;
+       case SLICECOM_LOOPBACK_REMOTE:
+           writeb(readb(lbi + LIM0) & ~2, lbi + LIM0); /* Local Loop OFF  */
+           writeb(readb(lbi + LIM1) | 2, lbi + LIM1);  /* Remote Loop ON  */
+           break;
+       default:
+           printk("slicecom: board %d: unhandled " FILENAME_LOOPBACK
+                  " value %d\n", boardnum, value);
+    }
+    spin_unlock_irqrestore(&mister_lock, flags);
+}
+
+/*
+ * Update E1 line status LEDs on the adapter
+ */
+
+void slicecom_update_leds(munich_board_t * board)
+{
+    u32 *bar1 = board->bar1;
+    u8 *lbi = board->lbi;
+    u8 frs0;
+    u32 leds;
+    int i;
+
+    spin_lock_irqsave(&mister_lock, flags);
+
+    leds = 0;
+    frs0 = readb(lbi + FRS0);  /* FRS0 bits described on page 137 */
+
+    if (!(frs0 & 0xa0))
+    {
+       leds |= 0x2000;         /* Green LED: Input signal seems to be OK, no LOS, no LFA       */
+       if (frs0 & 0x10)
+           leds |= 0x8000;     /* Red LED: Receiving Remote Alarm                                      */
+    }
+    writel(leds, MUNICH_VIRT(GPDATA));
+
+    if (leds == 0x2000 && !board->lineup)
+    {                          /* line up */
+       board->lineup = 1;
+       for (i = 0; i < 32; i++)
+       {
+           if (board->twins[i] && (board->twins[i]->flags & IFF_RUNNING))
+           {
+               struct comx_channel *ch = board->twins[i]->priv;
+
+               if (!test_and_set_bit(0, &ch->lineup_pending))
+               {
+                   ch->lineup_timer.function = comx_lineup_func;
+                   ch->lineup_timer.data = (unsigned long)board->twins[i];
+                   ch->lineup_timer.expires = jiffies + HZ * ch->lineup_delay;
+                   add_timer(&ch->lineup_timer);
+               }
+           }
+       }
+    }
+    else if (leds != 0x2000 && board->lineup)
+    {                          /* line down */
+       board->lineup = 0;
+       for (i = 0; i < 32; i++)
+           if (board->twins[i] && (board->twins[i]->flags & IFF_RUNNING))
+           {
+               struct comx_channel *ch = board->twins[i]->priv;
+
+               if (test_and_clear_bit(0, &ch->lineup_pending))
+                   del_timer(&ch->lineup_timer);
+               else if (ch->line_status & LINE_UP)
+               {
+                   ch->line_status &= ~LINE_UP;
+                   if (ch->LINE_status)
+                       ch->LINE_status(board->twins[i], ch->line_status);
+               }
+           }
+    }
+    spin_unlock_irqrestore(&mister_lock, flags);
+}
+
+/*
+ * This function gets called every second when the FALC issues the interrupt.
+ * Hardware counters contain error counts for last 1-second time interval.
+ * We add them to the global counters here.
+ * Read rfc2495 to understand this.
+ */
+
+void slicecom_update_line_counters(munich_board_t * board)
+{
+    e1_stats_t *curr_int = &board->intervals[board->current_interval];
+
+    u8 *lbi = board->lbi;
+
+    unsigned framing_errors, code_violations, path_code_violations, crc4_errors,
+       e_bit_errors;
+    unsigned slip_detected,    /* this one has logical value, not the number of slips! */
+      out_of_frame_defect,     /* logical value        */
+      ais_defect,              /* logical value        */
+      errored_sec, bursty_err_sec, severely_err_sec = 0, failure_sec;
+    u8 isr2, isr3, isr5, frs0;
+
+    spin_lock_irqsave(&mister_lock, flags);
+
+    isr2 = readb(lbi + ISR2);  /* ISR0-5 described on page 156     */
+    isr3 = readb(lbi + ISR3);
+    isr5 = readb(lbi + ISR5);
+    frs0 = readb(lbi + FRS0);  /* FRS0 described on page 137       */
+
+    /* Error Events: */
+
+    code_violations = readb(lbi + CVCL) + (readb(lbi + CVCH) << 8);
+    framing_errors = readb(lbi + FECL) + (readb(lbi + FECH) << 8);
+    crc4_errors = readb(lbi + CEC1L) + (readb(lbi + CEC1H) << 8);
+    e_bit_errors = readb(lbi + EBCL) + (readb(lbi + EBCH) << 8);
+    slip_detected = isr3 & (ISR3_RSN | ISR3_RSP);
+
+    path_code_violations = framing_errors + crc4_errors;
+
+    curr_int->line_code_violations += code_violations;
+    curr_int->path_code_violations += path_code_violations;
+    curr_int->e_bit_errors += e_bit_errors;
+
+    /* Performance Defects: */
+
+    /* there was an LFA in the last second, but maybe disappeared: */
+    out_of_frame_defect = (isr2 & ISR2_LFA) || (frs0 & FRS0_LFA);
+
+    /* there was an AIS in the last second, but maybe disappeared: */
+    ais_defect = (isr2 & ISR2_AIS) || (frs0 & FRS0_AIS);
+
+    /* Performance Parameters: */
+
+    if (out_of_frame_defect)
+       curr_int->fr_loss_secs++;
+    if (code_violations)
+       curr_int->line_err_secs++;
+
+    errored_sec = ((board->framing == SLICECOM_FRAMING_NO_CRC4) &&
+                  (code_violations)) || path_code_violations ||
+       out_of_frame_defect || slip_detected || ais_defect;
+
+    bursty_err_sec = !out_of_frame_defect && !ais_defect &&
+       (path_code_violations > 1) && (path_code_violations < 320);
+
+    switch (board->framing)
+    {
+       case SLICECOM_FRAMING_CRC4:
+           severely_err_sec = out_of_frame_defect ||
+               (path_code_violations >= 832);
+           break;
+       case SLICECOM_FRAMING_NO_CRC4:
+           severely_err_sec = (code_violations >= 2048);
+           break;
+    }
+
+    /*
+     * failure_sec: true if there was a condition leading to a failure
+     * (and leading to unavailable state) in this second:
+     */
+
+    failure_sec = (isr2 & ISR2_RA) || (frs0 & FRS0_RRA)        /* Remote/Far End/Distant Alarm Failure */
+       || ais_defect || out_of_frame_defect    /* AIS or LOF Failure                           */
+       || (isr2 & ISR2_LOS) || (frs0 & FRS0_LOS)       /* Loss Of Signal Failure                       */
+       || (board->loopback != SLICECOM_LOOPBACK_NONE); /* Loopback has been set                        */
+
+    if (board->is_unavailable)
+    {
+       if (severely_err_sec)
+           board->no_ses_seconds = 0;
+       else
+           board->no_ses_seconds++;
+
+       if ((board->no_ses_seconds >= 10) && !failure_sec)
+       {
+           board->is_unavailable = 0;
+           board->ses_seconds = 0;
+           board->no_ses_seconds = 0;
+       }
+    }
+    else
+    {
+       if (severely_err_sec)
+           board->ses_seconds++;
+       else
+           board->ses_seconds = 0;
+
+       if ((board->ses_seconds >= 10) || failure_sec)
+       {
+           board->is_unavailable = 1;
+           board->ses_seconds = 0;
+           board->no_ses_seconds = 0;
+       }
+    }
+
+    if (board->is_unavailable)
+       curr_int->unavail_secs++;
+    else
+    {
+       if (slip_detected)
+           curr_int->slip_secs++;
+       curr_int->errored_secs += errored_sec;
+       curr_int->bursty_err_secs += bursty_err_sec;
+       curr_int->severely_err_secs += severely_err_sec;
+    }
+
+    /* the RFC does not say clearly which errors to count here, we try to count bit errors */
+
+    if (!board->is_unavailable && !severely_err_sec)
+    {
+       board->deg_cumulated_errors += code_violations;
+       board->deg_elapsed_seconds++;
+       if (board->deg_elapsed_seconds >= 60)
+       {
+           if (board->deg_cumulated_errors >= 123)
+               curr_int->degraded_mins++;
+           board->deg_cumulated_errors = 0;
+           board->deg_elapsed_seconds = 0;
+       }
+
+    }
+
+    board->elapsed_seconds++;
+    if (board->elapsed_seconds >= 900)
+    {
+       board->current_interval =
+           (board->current_interval + 1) % SLICECOM_BOARD_INTERVALS_SIZE;
+       memset((void *)&board->intervals[board->current_interval], 0,
+              sizeof(e1_stats_t));
+       board->elapsed_seconds = 0;
+    }
+
+    spin_unlock_irqrestore(&mister_lock, flags);
+}
+
+static void pcicom_modemline(unsigned long b)
+{
+    munich_board_t *board = (munich_board_t *) b;
+    struct net_device *dev = board->twins[0];
+    struct comx_channel *ch = netdev_priv(dev);
+    unsigned long regs;
+
+    regs = readl((void *)(&board->bar1[GPDATA]));
+    if ((ch->line_status & LINE_UP) && (regs & 0x0800))
+    {
+       ch->line_status &= ~LINE_UP;
+       board->lineup = 0;
+       if (ch->LINE_status)
+       {
+           ch->LINE_status(dev, ch->line_status);
+       }
+    }
+
+    if (!(ch->line_status & LINE_UP) && !(regs & 0x0800))
+    {
+       ch->line_status |= LINE_UP;
+       board->lineup = 1;
+       if (ch->LINE_status)
+       {
+           ch->LINE_status(dev, ch->line_status);
+       }
+    }
+
+    mod_timer((struct timer_list *)&board->modemline_timer, jiffies + HZ);
+}
+
+/* 
+ * Is it possible to transmit ?
+ * Called (may be called) by the protocol layer 
+ */
+
+static int MUNICH_txe(struct net_device *dev)
+{
+    struct comx_channel *ch = netdev_priv(dev);
+    struct slicecom_privdata *hw = ch->HW_privdata;
+
+    return (hw->busy < TX_DESC_MAX - 1);
+}
+
+/* 
+ * Hw probe function. Detects all the boards in the system,
+ * and fills up slicecom_boards[] and pcicom_boards[]
+ * Returns 0 on success.
+ * We do not disable interrupts!
+ */
+static int munich_probe(void)
+{
+    struct pci_dev *pci;
+    int boardnum;
+    int slicecom_boardnum;
+    int pcicom_boardnum;
+    u32 *bar1;
+    u8 *lbi;
+    munich_board_t *board;
+
+    for (boardnum = 0; boardnum < MAX_BOARDS; boardnum++)
+    {
+       pcicom_boards[boardnum].pci = 0;
+       pcicom_boards[boardnum].bar1 = 0;
+       pcicom_boards[boardnum].lbi = 0;
+       slicecom_boards[boardnum].pci = 0;
+       slicecom_boards[boardnum].bar1 = 0;
+       slicecom_boards[boardnum].lbi = 0;
+    }
+
+    pci = NULL;
+    board = NULL;
+    slicecom_boardnum = 0;
+    pcicom_boardnum = 0;
+
+    for (boardnum = 0;
+       boardnum < MAX_BOARDS && (pci = pci_find_device(PCI_VENDOR_ID_SIEMENS,
+       PCI_DEVICE_ID_SIEMENS_MUNICH32X, pci)); boardnum++)
+    {
+       if (pci_enable_device(pci))
+           continue;
+
+       printk("munich_probe: munich chip found, IRQ %d\n", pci->irq);
+
+       bar1 = ioremap_nocache(pci->resource[0].start, 0x100);
+       lbi = ioremap_nocache(pci->resource[1].start, 0x100);
+
+       if (bar1 && lbi)
+       {
+           pci_write_config_dword(pci, MUNICH_PCI_PCIRES, 0xe0000);
+           set_current_state(TASK_UNINTERRUPTIBLE);
+           schedule_timeout(1);
+           pci_write_config_dword(pci, MUNICH_PCI_PCIRES, 0);
+           set_current_state(TASK_UNINTERRUPTIBLE);
+           schedule_timeout(1);
+           /* check the type of the card */
+           writel(LREG0_MAGIC, MUNICH_VIRT(LREG0));
+           writel(LREG1_MAGIC, MUNICH_VIRT(LREG1));
+           writel(LREG2_MAGIC, MUNICH_VIRT(LREG2));
+           writel(LREG3_MAGIC, MUNICH_VIRT(LREG3));
+           writel(LREG4_MAGIC, MUNICH_VIRT(LREG4));
+           writel(LREG5_MAGIC, MUNICH_VIRT(LREG5));
+           writel(LCONF_MAGIC2,MUNICH_VIRT(LCONF));    /* enable the DMSM */
+
+           if ((readb(lbi + VSTR) == 0x13) || (readb(lbi + VSTR) == 0x10))
+           {
+               board = slicecom_boards + slicecom_boardnum;
+               sprintf((char *)board->devname, "slicecom%d",
+                       slicecom_boardnum);
+               board->isx21 = 0;
+               slicecom_boardnum++;
+           }
+           else if ((readb(lbi + VSTR) == 0x6) || (readb(lbi + GIS) == 0x6))
+           {
+               board = pcicom_boards + pcicom_boardnum;
+               sprintf((char *)board->devname, "pcicom%d", pcicom_boardnum);
+               board->isx21 = 1;
+               pcicom_boardnum++;
+           }
+           if (board)
+           {
+               printk("munich_probe: %s board found\n", board->devname);
+               writel(LCONF_MAGIC1, MUNICH_VIRT(LCONF));       /* reset the DMSM */
+               board->pci = pci;
+               board->bar1 = bar1;
+               board->lbi = lbi;
+               board->framing = SLICECOM_FRAMING_DEFAULT;
+               board->linecode = SLICECOM_LINECODE_DEFAULT;
+               board->clock_source = SLICECOM_CLOCK_SOURCE_DEFAULT;
+               board->loopback = SLICECOM_LOOPBACK_DEFAULT;
+               board->owner = THIS_MODULE;
+           }
+           else
+           {
+               printk("munich_probe: Board error, VSTR: %02X\n",
+                      readb(lbi + VSTR));
+               iounmap((void *)bar1);
+               iounmap((void *)lbi);
+           }
+       }
+       else
+       {
+           printk("munich_probe: ioremap() failed, not enabling this board!\n");
+           /* .pci = NULL, so the MUNICH_open will not try to open it            */
+           if (bar1) iounmap((void *)bar1);
+           if (lbi) iounmap((void *)lbi);
+       }
+    }
+
+    if (!pci && !boardnum)
+    {
+       printk("munich_probe: no PCI present!\n");
+       return -ENODEV;
+    }
+
+    if (pcicom_boardnum + slicecom_boardnum == 0)
+    {
+       printk
+           ("munich_probe: Couldn't find any munich board: vendor:device %x:%x not found\n",
+            PCI_VENDOR_ID_SIEMENS, PCI_DEVICE_ID_SIEMENS_MUNICH32X);
+       return -ENODEV;
+    }
+
+    /* Found some */
+    if (pcicom_boardnum)
+       printk("%d pcicom board(s) found.\n", pcicom_boardnum);
+    if (slicecom_boardnum)
+       printk("%d slicecom board(s) found.\n", slicecom_boardnum);
+
+    return 0;
+}
+
+/* 
+ * Reset the hardware. Get called only from within this module if needed.
+ */
+#if 0
+static int slicecom_reset(struct net_device *dev)
+{
+    struct comx_channel *ch = netdev_priv(dev);
+
+    printk("slicecom_reset: resetting the hardware\n");
+
+    /* Begin to reset the hardware */
+
+    if (ch->HW_set_clock)
+       ch->HW_set_clock(dev);
+
+    /* And finish it */
+
+    return 0;
+}
+#endif
+
+/* 
+ * Transmit a packet. 
+ * Called by the protocol layer
+ * Return values:      
+ *     FRAME_ACCEPTED: frame is being transmited, transmitter is busy
+ *     FRAME_QUEUED:   frame is being transmitted, there's more room in
+ *                             the transmitter for additional packet(s)
+ *     FRAME_ERROR:
+ *     FRAME_DROPPED:  there was some error
+ */
+
+static int MUNICH_send_packet(struct net_device *dev, struct sk_buff *skb)
+{
+    struct comx_channel *ch = netdev_priv(dev);
+    struct slicecom_privdata *hw = ch->HW_privdata;
+
+    /* Send it to the debug facility too if needed: */
+
+    if (ch->debug_flags & DEBUG_HW_TX)
+       comx_debug_bytes(dev, skb->data, skb->len, "MUNICH_send_packet");
+
+    /* If the line is inactive, don't accept: */
+
+    /* TODO: atgondolni hogy mi is legyen itt */
+    /* if (!(ch->line_status & LINE_UP)) return FRAME_DROPPED; */
+
+    /* More check, to be sure: */
+
+    if (skb->len > TXBUFFER_SIZE)
+    {
+       ch->stats.tx_errors++;
+       kfree_skb(skb);
+       return FRAME_ERROR;
+    }
+
+    /* Maybe you have to disable irq's while programming the hw: */
+
+    spin_lock_irqsave(&mister_lock, flags);
+
+    /* And more check: */
+
+    if (hw->busy >= TX_DESC_MAX - 1)
+    {
+       printk(KERN_ERR
+              "%s: Transmitter called while busy... dropping frame, busy = %d\n",
+              dev->name, hw->busy);
+       spin_unlock_irqrestore(&mister_lock, flags);
+       kfree_skb(skb);
+       return FRAME_DROPPED;
+    }
+
+    if (hw->busy >= 0)
+       hw->tx_ring_hist[hw->busy]++;
+    /* DELL: */
+    else
+       printk("slicecom: %s: FATAL: busy = %d\n", dev->name, hw->busy);
+
+//              /* DEL: */
+//      printk("slicecom: %s: _send_packet called, busy = %d\n", dev->name, hw->busy );
+
+    /* Packet can go, update stats: */
+
+    ch->stats.tx_packets++;
+    ch->stats.tx_bytes += skb->len;
+
+    /* Pass the packet to the HW:                   */
+    /* Step forward with the transmit descriptors:  */
+
+    hw->tx_desc_ptr = (hw->tx_desc_ptr + 1) % TX_DESC_MAX;
+
+    memcpy(&(hw->tx_data[hw->tx_desc_ptr][0]), skb->data, skb->len);
+    hw->tx_desc[hw->tx_desc_ptr].no = skb->len;
+
+    /* We don't issue any command, just step with the HOLD bit      */
+
+    hw->tx_desc[hw->tx_desc_ptr].hold = 1;
+    hw->tx_desc[(hw->tx_desc_ptr + TX_DESC_MAX - 1) % TX_DESC_MAX].hold = 0;
+
+#ifdef COMX_NEW
+    dev_kfree_skb(skb);
+#endif
+    /* csomag kerult a Tx ringbe: */
+
+    hw->busy++;
+
+    /* Report it: */
+
+    if (ch->debug_flags & DEBUG_HW_TX)
+       comx_debug(dev, "%s: MUNICH_send_packet was successful\n\n", dev->name);
+
+    if (hw->busy >= TX_DESC_MAX - 1)
+    {
+       spin_unlock_irqrestore(&mister_lock, flags);
+       return FRAME_ACCEPTED;
+    }
+
+    spin_unlock_irqrestore(&mister_lock, flags);
+
+    /* All done */
+
+    return FRAME_QUEUED;
+}
+
+/*
+ * Interrupt handler routine.
+ * Called by the Linux kernel.
+ * BEWARE! The interrupts are enabled on the call!
+ */
+static irqreturn_t MUNICH_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+    struct sk_buff *skb;
+    int length;
+    int rx_status;
+    int work;                  /* hany esemenyt kezeltem mar le                                */
+    u32 *bar1;
+    u8 *lbi;
+    u32 stat,                  /* az esemenyek, amiket a ebben a loop korben le kell meg kezelni       */
+      race_stat = 0,           /* race eseten ebben uzenek magamnak hogy mit kell meg lekezelni        */
+      ack;                     /* ezt fogom a vegen a STAT-ba irni, kiveszek belole 1-1 bitet ha       */
+
+    /* az adott dolgot nem kell ack-olni mert volt vele munkam, es  */
+    /* legjobb ha visszaterek ide megegyszer                        */
+    munich_intq_t int_info;
+
+    struct net_device *dev;
+    struct comx_channel *ch;
+    struct slicecom_privdata *hw;
+    munich_board_t *board = (munich_board_t *) dev_id;
+    int channel;
+
+    //      , boardnum = (int)dev_id;
+
+    // board = munich_boards + boardnum;
+    bar1 = board->bar1;
+    lbi = board->lbi;
+
+    //      Do not uncomment this under heavy load! :->
+    //      printk("MUNICH_interrupt: masked STAT=0x%08x, tiq=0x%08x, riq=0x%08x, piq=0x%08x\n", stat, board->tiq[0].all, board->riq[0].all, board->piq[0].all );
+
+    for (work = 0; (stat = (race_stat | (readl(MUNICH_VIRT(STAT)) & ~STAT_NOT_HANDLED_BY_INTERRUPT))) && (work < MAX_WORK - 1); work++)
+    {
+       ack = stat & (STAT_PRI | STAT_PTI | STAT_LBII);
+
+       /* Handle the interrupt information in the Rx queue. We don't really trust      */
+       /* info from this queue, because it can be overflowed, so later check           */
+       /* every Rx ring for received packets. But there are some errors which can't    */
+       /* be counted from the Rx rings, so we parse it.                                        */
+
+       int_info = board->riq[board->riq_ptr];
+       if (int_info.all & 0xF0000000)  /* ha ez nem 0, akkor itt interrupt_info van                    */
+       {
+           ack &= ~STAT_PRI;   /* don't ack the interrupt, we had some work to do              */
+
+           channel = PCM_INT_CHANNEL(int_info.all);
+           dev = board->twins[channel];
+
+           if (dev == NULL)
+           {
+               printk
+                   ("MUNICH_interrupt: got an Rx interrupt info for NULL device "
+                    "%s.twins[%d], int_info = 0x%08x\n", board->devname,
+                    channel, int_info.all);
+               goto go_for_next_interrupt;
+           }
+
+           ch = netdev_priv(dev);
+           hw = (struct slicecom_privdata *)ch->HW_privdata;
+
+           //      printk("Rx STAT=0x%08x int_info=0x%08x rx_desc_ptr=%d rx_desc.status=0x%01x\n",
+           //              stat, int_info.all, hw->rx_desc_ptr, hw->rx_desc[ hw->rx_desc_ptr ].status );
+
+           if (int_info.all & PCM_INT_HI)
+               printk("SliceCOM: %s: Host Initiated interrupt\n", dev->name);
+           if (int_info.all & PCM_INT_IFC)
+               printk("SliceCOM: %s: Idle/Flag Change\n", dev->name);
+           /* TOD: jo ez az Idle/Flag Change valamire? - azonnal latszik belole hogy mikor ad a masik oldal */
+           /* TOD: ilyen IT most nem is jon, mert ki van maszkolva az interrupt, biztosan kell ez? */
+
+           if (int_info.all & PCM_INT_FO)
+               /* Internal buffer (RB) overrun */
+               ch->stats.rx_over_errors++;     /* TOD: Ez azt jelenti hogy a belso RB nem volt hozzaferheto, es ezert kihagyott valamit. De nem csak csomag lehetett, hanem esemeny, stb. is. lasd page 247. Ezzel a 'cat status'-hoz igazodok, de a netdevice.h szerint nem egyertelmu hogy ide ez kellene. Nem lehet hogy rx_missed ? */
+               /* DE: nem gotozok sehova, elvileg jo igy */
+               /* kesobb meg visszaterek az FO-ra, ha packet-FO volt. Keresd a "packet-FO"-t. */
+           if (int_info.all & PCM_INT_FI)      /* frame received, but we do not trust the int_info queue       */
+               if (int_info.all & PCM_INT_SF)
+               {               /* Short Frame: rovidebb mint a CRC */
+                   /* "rovidebb mint CRC+2byte" vizsgalat a "CRC+2"-nel */
+                   ch->stats.rx_length_errors++;       /* TOD: noveljem? ne noveljem? */
+                   goto go_for_next_interrupt;
+               }
+
+           go_for_next_interrupt:      /* One step in the interrupt queue */
+           board->riq[board->riq_ptr].all = 0; /* megjelolom hogy itt meg nem jart a hw */
+           board->riq_ptr = (board->riq_ptr + 1) % MUNICH_INTQMAX;
+
+       }
+
+       /* Check every Rx ring for incomed packets: */
+
+       for (channel = 0; channel < 32; channel++)
+       {
+           dev = board->twins[channel];
+
+           if (dev != NULL)
+           {
+               ch = netdev_priv(dev);
+               hw = (struct slicecom_privdata *)ch->HW_privdata;
+
+               rx_status = hw->rx_desc[hw->rx_desc_ptr].status;
+
+               if (!(rx_status & 0x80))        /* mar jart itt a hardver */
+               {
+                   ack &= ~STAT_PRI;   /* Don't ack, we had some work          */
+
+                   /* Ez most egy kicsit zuros, mert itt mar nem latom az int_infot        */
+                   if (rx_status & RX_STATUS_ROF)
+                       ch->stats.rx_over_errors++;     /* TOD: 'cat status'-hoz igazodok */
+
+                   if (rx_status & RX_STATUS_RA)
+                       /* Abort received or issued on channel  */
+                       ch->stats.rx_frame_errors++;    /* or HOLD bit in the descriptor                */
+                       /* TOD: 'cat status'-hoz igazodok */
+
+                   if (rx_status & RX_STATUS_LFD)
+                   {           /* Long Frame (longer then MFL in the MODE1) */
+                       ch->stats.rx_length_errors++;
+                       goto go_for_next_frame;
+                   }
+
+                   if (rx_status & RX_STATUS_NOB)
+                   {           /* Not n*8 bits long frame - frame alignment */
+                       ch->stats.rx_frame_errors++;    /* ez viszont nem igazodik a 'cat status'-hoz */
+                       goto go_for_next_frame;
+                   }
+
+                   if (rx_status & RX_STATUS_CRCO)
+                   {           /* CRC error */
+                       ch->stats.rx_crc_errors++;
+                       goto go_for_next_frame;
+                   }
+
+                   if (rx_status & RX_STATUS_SF)
+                   {           /* Short Frame: rovidebb mint CRC+2byte */
+                       ch->stats.rx_errors++;  /* The HW does not set PCI_INT_ERR bit for this one, see page 246 */
+                       ch->stats.rx_length_errors++;
+                       goto go_for_next_frame;
+                   }
+
+                   if (rx_status != 0)
+                   {
+                       printk("SliceCOM: %s: unhandled rx_status: 0x%02x\n",
+                              dev->name, rx_status);
+                       goto go_for_next_frame;
+                   }
+
+                   /* frame received without errors: */
+
+                   length = hw->rx_desc[hw->rx_desc_ptr].bno;
+                   ch->stats.rx_packets++;     /* Count only 'good' packets */
+                   ch->stats.rx_bytes += length;
+
+                   /* Allocate a larger skb and reserve the heading for efficiency: */
+
+                   if ((skb = dev_alloc_skb(length + 16)) == NULL)
+                   {
+                       ch->stats.rx_dropped++;
+                       goto go_for_next_frame;
+                   }
+
+                   /* Do bookkeeping: */
+
+                   skb_reserve(skb, 16);
+                   skb_put(skb, length);
+                   skb->dev = dev;
+
+                   /* Now copy the data into the buffer: */
+
+                   memcpy(skb->data, &(hw->rx_data[hw->rx_desc_ptr][0]), length);
+
+                   /* DEL: UGLY HACK!!!! */
+                   if (*((int *)skb->data) == 0x02000000 &&
+                       *(((int *)skb->data) + 1) == 0x3580008f)
+                   {
+                       printk("%s: swapping hack\n", dev->name);
+                       *((int *)skb->data) = 0x3580008f;
+                       *(((int *)skb->data) + 1) = 0x02000000;
+                   }
+
+                   if (ch->debug_flags & DEBUG_HW_RX)
+                       comx_debug_skb(dev, skb, "MUNICH_interrupt receiving");
+
+                   /* Pass it to the protocol entity: */
+
+                   ch->LINE_rx(dev, skb);
+
+                   go_for_next_frame:
+                   /* DEL: rafutott-e a HOLD bitre -detektalas */
+                   {
+                       if( ((rx_desc_t*)phys_to_virt(board->ccb->current_rx_desc[channel]))->hold
+                           && ((rx_desc_t*)phys_to_virt(board->ccb->current_rx_desc[channel]))->status != 0xff)
+                           hw->rafutott++;     /* rafutott: hanyszor volt olyan hogy a current descriptoron HOLD bit volt, es a hw mar befejezte az irast (azaz a hw rafutott a HOLD bitre) */
+                   }
+
+                   //      if( jiffies % 2 )               /* DELL: okozzunk egy kis Rx ring slipet :) */
+                   //      {
+                   /* Step forward with the receive descriptors: */
+                   /* if you change this, change the copy of it below too! Search for: "RxSlip" */
+                   hw->rx_desc[(hw->rx_desc_ptr + RX_DESC_MAX - 1) % RX_DESC_MAX].hold = 1;
+                   hw->rx_desc[hw->rx_desc_ptr].status = 0xFF; /* megjelolom hogy itt meg nem jart a hw */
+                   hw->rx_desc[(hw->rx_desc_ptr + RX_DESC_MAX - 2) % RX_DESC_MAX].hold = 0;
+                   hw->rx_desc_ptr = (hw->rx_desc_ptr + 1) % RX_DESC_MAX;
+                   //      }
+               }
+           }
+       }
+
+       stat &= ~STAT_PRI;
+
+//      }
+
+//      if( stat & STAT_PTI )   /* TOD: primko megvalositas: mindig csak egy esemenyt dolgozok fel, */
+       /* es nem torlom a STAT-ot, ezert ujra visszajon ide a rendszer. Amikor */
+       /* jon interrupt, de nincs mit feldolgozni, akkor torlom a STAT-ot.     */
+       /* 'needs a rewrite', de elso megoldasnak jo lesz                       */
+//              {
+       int_info = board->tiq[board->tiq_ptr];
+       if (int_info.all & 0xF0000000)  /* ha ez nem 0, akkor itt interrupt_info van    */
+       {
+           ack &= ~STAT_PTI;   /* don't ack the interrupt, we had some work to do      */
+
+           channel = PCM_INT_CHANNEL(int_info.all);
+           dev = board->twins[channel];
+
+           if (dev == NULL)
+           {
+               printk("MUNICH_interrupt: got a Tx interrupt for NULL device "
+                      "%s.twins[%d], int_info = 0x%08x\n",
+                      board->isx21 ? "pcicom" : "slicecom", channel, int_info.all);
+               goto go_for_next_tx_interrupt;
+           }
+
+           ch = netdev_priv(dev);
+           hw = (struct slicecom_privdata *)ch->HW_privdata;
+
+           //      printk("Tx STAT=0x%08x int_info=0x%08x tiq_ptr=%d\n", stat, int_info.all, board->tiq_ptr );
+
+           if (int_info.all & PCM_INT_FE2)
+           {                   /* "Tx available"                               */
+               /* do nothing */
+           }
+           else if (int_info.all & PCM_INT_FO)
+           {                   /* Internal buffer (RB) overrun */
+               ch->stats.rx_over_errors++;
+           }
+           else
+           {
+               printk("slicecom: %s: unhandled Tx int_info: 0x%08x\n",
+                      dev->name, int_info.all);
+           }
+
+           go_for_next_tx_interrupt:
+           board->tiq[board->tiq_ptr].all = 0;
+           board->tiq_ptr = (board->tiq_ptr + 1) % MUNICH_INTQMAX;
+       }
+
+       /* Check every Tx ring for incoming packets: */
+
+       for (channel = 0; channel < 32; channel++)
+       {
+           dev = board->twins[channel];
+
+           if (dev != NULL)
+           {
+               int newbusy;
+
+               ch = netdev_priv(dev);
+               hw = (struct slicecom_privdata *)ch->HW_privdata;
+
+               /* We don't trust the "Tx available" info from the TIQ, but check        */
+               /* every ring if there is some free room                                        */
+
+               if (ch->init_status && netif_running(dev))
+               {
+                   newbusy = ( TX_DESC_MAX + (& hw->tx_desc[ hw->tx_desc_ptr ]) -
+                       (tx_desc_t*)phys_to_virt(board->ccb->current_tx_desc[ hw->channel ]) ) % TX_DESC_MAX;
+
+                   if(newbusy < 0)
+                   {
+                       printk("slicecom: %s: FATAL: fresly computed busy = %d, HW: 0x%p, SW: 0x%p\n",
+                       dev->name, newbusy,
+                       phys_to_virt(board->ccb->current_tx_desc[hw->channel]),
+                       & hw->tx_desc[hw->tx_desc_ptr]);
+                   }
+
+                   /* Fogyott valami a Tx ringbol? */
+
+                   if (newbusy < hw->busy)
+                   {
+                       // ack &= ~STAT_PTI;                            /* Don't ack, we had some work  */
+                       hw->busy = newbusy;
+                       if (ch->LINE_tx)
+                           ch->LINE_tx(dev);   /* Report it to protocol driver */
+                   }
+                   else if (newbusy > hw->busy)
+                       printk("slicecom: %s: newbusy > hw->busy, this should not happen!\n", dev->name);
+               }
+           }
+       }
+       stat &= ~STAT_PTI;
+
+       int_info = board->piq[board->piq_ptr];
+       if (int_info.all & 0xF0000000)  /* ha ez nem 0, akkor itt interrupt_info van            */
+       {
+           ack &= ~STAT_LBII;  /* don't ack the interrupt, we had some work to do      */
+
+           /* We do not really use (yet) the interrupt info from this queue, */
+
+           // printk("slicecom: %s: LBI Interrupt event: %08x\n", board->devname, int_info.all);
+
+           if (!board->isx21)
+           {
+               slicecom_update_leds(board);
+               slicecom_update_line_counters(board);
+           }
+
+           goto go_for_next_lbi_interrupt;     /* To avoid warning about unused label  */
+
+           go_for_next_lbi_interrupt:  /* One step in the interrupt queue */
+           board->piq[board->piq_ptr].all = 0; /* megjelolom hogy itt meg nem jart a hw        */
+           board->piq_ptr = (board->piq_ptr + 1) % MUNICH_PIQMAX;
+       }
+       stat &= ~STAT_LBII;
+
+       writel(ack, MUNICH_VIRT(STAT));
+
+       if (stat & STAT_TSPA)
+       {
+           //      printk("slicecom: %s: PCM TSP Asynchronous\n", board->devname);
+           writel(STAT_TSPA, MUNICH_VIRT(STAT));
+           stat &= ~STAT_TSPA;
+       }
+
+       if (stat & STAT_RSPA)
+       {
+           //      printk("slicecom: %s: PCM RSP Asynchronous\n", board->devname);
+           writel(STAT_RSPA, MUNICH_VIRT(STAT));
+           stat &= ~STAT_RSPA;
+       }
+       if (stat)
+       {
+           printk("MUNICH_interrupt: unhandled interrupt, STAT=0x%08x\n",
+                  stat);
+           writel(stat, MUNICH_VIRT(STAT));    /* ha valamit megsem kezeltunk le, azert ack-ot kuldunk neki */
+       }
+
+    }
+    board->histogram[work]++;
+
+    /* We can miss these if we reach the MAX_WORK   */
+    /* Count it to see how often it happens         */
+
+    if (race_stat & STAT_PRI)
+       board->stat_pri_races_missed++;
+    if (race_stat & STAT_PTI)
+       board->stat_pti_races_missed++;
+    return IRQ_HANDLED;
+}
+
+/* 
+ * Hardware open routine.
+ * Called by comx (upper) layer when the user wants to bring up the interface
+ * with ifconfig.
+ * Initializes hardware, allocates resources etc.
+ * Returns 0 on OK, or standard error value on error.
+ */
+
+static int MUNICH_open(struct net_device *dev)
+{
+    struct comx_channel *ch = netdev_priv(dev);
+    struct slicecom_privdata *hw = ch->HW_privdata;
+    struct proc_dir_entry *procfile = ch->procdir->subdir;
+    munich_board_t *board;
+    munich_ccb_t *ccb;
+
+    u32 *bar1;
+    u8 *lbi;
+    u32 stat;
+    unsigned long flags, jiffs;
+
+    int i, channel;
+    u32 timeslots = hw->timeslots;
+
+    board = hw->boardnum + (ch->hardware == &pcicomhw ? pcicom_boards : slicecom_boards);
+
+    bar1 = board->bar1;
+    lbi = board->lbi;
+
+    /* TODO: a timeslotok ellenorzese kell majd ide .. hat, biztos? mar a write_proc-ban is
+       ellenorzom valamennyire.
+       if (!dev->io || !dev->irq) return -ENODEV;
+     */
+
+    if (!board->pci)
+    {
+       printk("MUNICH_open: no %s board with boardnum = %d\n",
+              ch->hardware->name, hw->boardnum);
+       return -ENODEV;
+    }
+
+    spin_lock_irqsave(&mister_lock, flags);
+    /* lock the section to avoid race with multiple opens and make sure
+       that no interrupts get called while this lock is active */
+
+    if (board->use_count == 0) /* bring up the board if it was unused                  */
+       /* if fails, frees allocated resources and returns.     */
+       /* TOD: is it safe? nem kellene resetelni a kartyat?    */
+    {
+       printk("MUNICH_open: %s: bringing up board\n", board->devname);
+
+       /* Clean up the board's static struct if messed: */
+
+       for (i = 0; i < 32; i++)
+           board->twins[i] = NULL;
+       for (i = 0; i < MAX_WORK; i++)
+           board->histogram[i] = 0;
+
+       board->lineup = 0;
+
+       /* Allocate CCB: */
+        board->ccb = kmalloc(sizeof(munich_ccb_t), GFP_KERNEL);
+       if (board->ccb == NULL)
+       {
+           spin_unlock_irqrestore(&mister_lock, flags);
+           return -ENOMEM;
+       }
+       memset((void *)board->ccb, 0, sizeof(munich_ccb_t));
+       board->ccb->csa = virt_to_phys(board->ccb);
+       ccb = board->ccb;
+       for (i = 0; i < 32; i++)
+       {
+           ccb->timeslot_spec[i].tti = 1;
+           ccb->timeslot_spec[i].rti = 1;
+       }
+
+       /* Interrupt queues: */
+
+       board->tiq = kmalloc(MUNICH_INTQSIZE, GFP_KERNEL);
+       if (board->tiq == NULL)
+       {
+           spin_unlock_irqrestore(&mister_lock, flags);
+           return -ENOMEM;
+       }
+       memset((void *)board->tiq, 0, MUNICH_INTQSIZE);
+
+       board->riq = kmalloc(MUNICH_INTQSIZE, GFP_KERNEL);
+       if (board->riq == NULL)
+       {
+           spin_unlock_irqrestore(&mister_lock, flags);
+           return -ENOMEM;
+       }
+       memset((void *)board->riq, 0, MUNICH_INTQSIZE);
+
+       board->piq = kmalloc(MUNICH_PIQSIZE, GFP_KERNEL);
+       if (board->piq == NULL)
+       {
+           spin_unlock_irqrestore(&mister_lock, flags);
+           return -ENOMEM;
+       }
+       memset((void *)board->piq, 0, MUNICH_PIQSIZE);
+
+       board->tiq_ptr = 0;
+       board->riq_ptr = 0;
+       board->piq_ptr = 0;
+
+       /* Request irq: */
+
+       board->irq = 0;
+
+       /* (char*) cast to avoid warning about discarding volatile:             */
+       if (request_irq(board->pci->irq, MUNICH_interrupt, 0,
+           (char *)board->devname, (void *)board))
+       {
+           printk("MUNICH_open: %s: unable to obtain irq %d\n", board->devname,
+                  board->pci->irq);
+           /* TOD: free other resources (a sok malloc feljebb)                     */
+           spin_unlock_irqrestore(&mister_lock, flags);
+           return -EAGAIN;
+       }
+       board->irq = board->pci->irq;   /* csak akkor legyen != 0, ha tenyleg le van foglalva nekunk */
+
+       /* Programming device: */
+
+       /* Reset the board like a power-on: */
+       /* TOD:
+          - It is not a real power-on: if a DMA transaction fails with master abort, the board
+          stays in half-dead state.
+          - It doesn't reset the FALC line driver */
+
+       pci_write_config_dword(board->pci, MUNICH_PCI_PCIRES, 0xe0000);
+       set_current_state(TASK_UNINTERRUPTIBLE);
+       schedule_timeout(1);
+       pci_write_config_dword(board->pci, MUNICH_PCI_PCIRES, 0);
+       set_current_state(TASK_UNINTERRUPTIBLE);
+       schedule_timeout(1);
+
+        writel(virt_to_phys(&ccb->csa), MUNICH_VIRT(CCBA));
+        writel(virt_to_phys( board->tiq ), MUNICH_VIRT(TIQBA));
+        writel(MUNICH_INTQLEN, MUNICH_VIRT(TIQL));
+        writel(virt_to_phys( board->riq ), MUNICH_VIRT(RIQBA));
+        writel(MUNICH_INTQLEN, MUNICH_VIRT(RIQL));
+        writel(virt_to_phys( board->piq ), MUNICH_VIRT(PIQBA));
+        writel(MUNICH_PIQLEN, MUNICH_VIRT(PIQL));
+        
+       /* Put the magic values into the registers: */
+
+       writel(MODE1_MAGIC, MUNICH_VIRT(MODE1));
+       writel(MODE2_MAGIC, MUNICH_VIRT(MODE2));
+
+       writel(LREG0_MAGIC, MUNICH_VIRT(LREG0));
+       writel(LREG1_MAGIC, MUNICH_VIRT(LREG1));
+       writel(LREG2_MAGIC, MUNICH_VIRT(LREG2));
+       writel(LREG3_MAGIC, MUNICH_VIRT(LREG3));
+       writel(LREG4_MAGIC, MUNICH_VIRT(LREG4));
+       writel(LREG5_MAGIC, MUNICH_VIRT(LREG5));
+
+       writel(LCONF_MAGIC1, MUNICH_VIRT(LCONF));       /* reset the DMSM */
+       writel(LCONF_MAGIC2, MUNICH_VIRT(LCONF));       /* enable the DMSM */
+
+       writel(~0, MUNICH_VIRT(TXPOLL));
+       writel(board->isx21 ? 0x1400 : 0xa000, MUNICH_VIRT(GPDIR));
+
+       if (readl(MUNICH_VIRT(STAT))) writel(readl(MUNICH_VIRT(STAT)), MUNICH_VIRT(STAT));
+
+       ccb->action_spec = CCB_ACTIONSPEC_RES | CCB_ACTIONSPEC_IA;
+       writel(CMD_ARPCM, MUNICH_VIRT(CMD));    /* Start the PCM core reset */
+       set_current_state(TASK_UNINTERRUPTIBLE);
+       schedule_timeout(1);
+
+       stat = 0;               /* Wait for the action to complete max. 1 second */
+       jiffs = jiffies;
+       while (!((stat = readl(MUNICH_VIRT(STAT))) & (STAT_PCMA | STAT_PCMF)) && time_before(jiffies, jiffs + HZ))
+       {
+           set_current_state(TASK_UNINTERRUPTIBLE);
+           schedule_timeout(1);
+       }
+
+       if (stat & STAT_PCMF)
+       {
+           printk(KERN_ERR
+                  "MUNICH_open: %s: Initial ARPCM failed. STAT=0x%08x\n",
+                  board->devname, stat);
+           writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMF, MUNICH_VIRT(STAT));
+           free_irq(board->irq, (void *)board);        /* TOD: free other resources too *//* maybe shut down hw? */
+           board->irq = 0;
+           spin_unlock_irqrestore(&mister_lock, flags);
+           return -EAGAIN;
+       }
+       else if (!(stat & STAT_PCMA))
+       {
+           printk(KERN_ERR
+                  "MUNICH_open: %s: Initial ARPCM timeout. STAT=0x%08x\n",
+                  board->devname, stat);
+           free_irq(board->irq, (void *)board);        /* TOD: free other resources too *//* maybe shut off the hw? */
+           board->irq = 0;
+           spin_unlock_irqrestore(&mister_lock, flags);
+           return -EIO;
+       }
+
+       writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMA, MUNICH_VIRT(STAT));        /* Acknowledge */
+
+       if (board->isx21) writel(0, MUNICH_VIRT(GPDATA));
+
+       printk("MUNICH_open: %s: succesful HW-open took %ld jiffies\n",
+              board->devname, jiffies - jiffs);
+
+       /* Set up the FALC hanging on the Local Bus: */
+
+       if (!board->isx21)
+       {
+           writeb(0x0e, lbi + FMR1);
+           writeb(0, lbi + LIM0);
+           writeb(0xb0, lbi + LIM1);   /* TODO: input threshold */
+           writeb(0xf7, lbi + XPM0);
+           writeb(0x02, lbi + XPM1);
+           writeb(0x00, lbi + XPM2);
+           writeb(0xf0, lbi + FMR0);
+           writeb(0x80, lbi + PCD);
+           writeb(0x80, lbi + PCR);
+           writeb(0x00, lbi + LIM2);
+           writeb(0x07, lbi + XC0);
+           writeb(0x3d, lbi + XC1);
+           writeb(0x05, lbi + RC0);
+           writeb(0x00, lbi + RC1);
+           writeb(0x83, lbi + FMR2);
+           writeb(0x9f, lbi + XSW);
+           writeb(0x0f, lbi + XSP);
+           writeb(0x00, lbi + TSWM);
+           writeb(0xe0, lbi + MODE);
+           writeb(0xff, lbi + IDLE);   /* Idle Code to send in unused timeslots        */
+           writeb(0x83, lbi + IPC);    /* interrupt query line mode: Push/pull output, active high     */
+           writeb(0xbf, lbi + IMR3);   /* send an interrupt every second               */
+
+           slicecom_set_framing(hw->boardnum, board->framing);
+           slicecom_set_linecode(hw->boardnum, board->linecode);
+           slicecom_set_clock_source(hw->boardnum, board->clock_source);
+           slicecom_set_loopback(hw->boardnum, board->loopback);
+
+           memset((void *)board->intervals, 0, sizeof(board->intervals));
+           board->current_interval = 0;
+           board->elapsed_seconds = 0;
+           board->ses_seconds = 0;
+           board->is_unavailable = 0;
+           board->no_ses_seconds = 0;
+           board->deg_elapsed_seconds = 0;
+           board->deg_cumulated_errors = 0;
+       }
+
+       /* Enable the interrupts last                                                   */
+       /* These interrupts will be enabled. We do not need the others. */
+
+       writel(readl(MUNICH_VIRT(IMASK)) & ~(STAT_PTI | STAT_PRI | STAT_LBII | STAT_TSPA | STAT_RSPA), MUNICH_VIRT(IMASK));
+    }
+
+    spin_unlock_irqrestore(&mister_lock, flags);
+
+    dev->irq = board->irq;     /* hogy szep legyen az ifconfig outputja */
+    ccb = board->ccb;          /* TODO: ez igy csunya egy kicsit hogy benn is meg kinn is beletoltom :( */
+
+    spin_lock_irqsave(&mister_lock, flags);
+
+    set_current_state(TASK_UNINTERRUPTIBLE);
+    schedule_timeout(1);
+
+    /* Check if the selected timeslots aren't used already */
+
+    for (i = 0; i < 32; i++)
+       if (((1 << i) & timeslots) && !ccb->timeslot_spec[i].tti)
+       {
+           printk("MUNICH_open: %s: timeslot %d already used by %s\n",
+                  dev->name, i, board->twins[ccb->timeslot_spec[i].txchannel]->name);
+           spin_unlock_irqrestore(&mister_lock, flags);
+           return -EBUSY;      /* TODO: lehet hogy valami mas errno kellene? */
+       }
+
+    /* find a free channel: */
+    /* TODO: ugly, rewrite it  */
+
+    for (channel = 0; channel <= 32; channel++)
+    {
+       if (channel == 32)
+       {                       /* not found a free one */
+           printk
+               ("MUNICH_open: %s: FATAL: can not find a free channel - this should not happen!\n",
+                dev->name);
+           spin_unlock_irqrestore(&mister_lock, flags);
+           return -ENODEV;
+       }
+       if (board->twins[channel] == NULL)
+           break;              /* found the first free one */
+    }
+
+    board->lastcheck = jiffies;        /* avoid checking uninitialized hardware channel */
+
+    /* Open the channel. If fails, calls MUNICH_close() to properly free resources and stop the HW */
+
+    hw->channel = channel;
+    board->twins[channel] = dev;
+
+    board->use_count++;                /* meg nem nyitottuk meg a csatornat, de a twins-ben
+                                  mar elfoglaltunk egyet, es ha a _close-t akarjuk hivni, akkor ez kell. */
+    for (i = 0; i < 32; i++)
+       if ((1 << i) & timeslots)
+       {
+           ccb->timeslot_spec[i].tti = 0;
+           ccb->timeslot_spec[i].txchannel = channel;
+           ccb->timeslot_spec[i].txfillmask = ~0;
+
+           ccb->timeslot_spec[i].rti = 0;
+           ccb->timeslot_spec[i].rxchannel = channel;
+           ccb->timeslot_spec[i].rxfillmask = ~0;
+       }
+
+    if (!board->isx21) rework_idle_channels(dev);
+
+    memset((void *)&(hw->tx_desc), 0, TX_DESC_MAX * sizeof(tx_desc_t));
+    memset((void *)&(hw->rx_desc), 0, RX_DESC_MAX * sizeof(rx_desc_t));
+
+    for (i = 0; i < TX_DESC_MAX; i++)
+    {
+       hw->tx_desc[i].fe = 1;
+       hw->tx_desc[i].fnum = 2;
+                hw->tx_desc[i].data     = virt_to_phys( & (hw->tx_data[i][0]) );
+                hw->tx_desc[i].next     = virt_to_phys( & (hw->tx_desc[ (i+1) % TX_DESC_MAX ]) );
+
+    }
+    hw->tx_desc_ptr = 0;       /* we will send an initial packet so it is correct: "oda irtunk utoljara" */
+    hw->busy = 0;
+    hw->tx_desc[hw->tx_desc_ptr].hold = 1;
+    hw->tx_desc[hw->tx_desc_ptr].no = 1;       /* TOD: inkabb csak 0 hosszut kuldjunk ki az initkor? */
+
+    for (i = 0; i < RX_DESC_MAX; i++)
+    {
+       hw->rx_desc[i].no = RXBUFFER_SIZE;
+       hw->rx_desc[i].data = virt_to_phys(&(hw->rx_data[i][0]));
+       hw->rx_desc[i].next = virt_to_phys(&(hw->rx_desc[(i+1) % RX_DESC_MAX]));
+       hw->rx_desc[i].status = 0xFF;
+    }
+    hw->rx_desc_ptr = 0;
+
+    hw->rx_desc[(hw->rx_desc_ptr + RX_DESC_MAX - 2) % RX_DESC_MAX].hold = 1;
+
+    memset((void *)&ccb->channel_spec[channel], 0, sizeof(channel_spec_t));
+
+    ccb->channel_spec[channel].ti = 0; /* Transmit off */
+    ccb->channel_spec[channel].to = 1;
+    ccb->channel_spec[channel].ta = 0;
+
+    ccb->channel_spec[channel].th = 1; /* Transmit hold        */
+
+    ccb->channel_spec[channel].ri = 0; /* Receive off  */
+    ccb->channel_spec[channel].ro = 1;
+    ccb->channel_spec[channel].ra = 0;
+
+    ccb->channel_spec[channel].mode = 3;       /* HDLC */
+
+    ccb->action_spec = CCB_ACTIONSPEC_IN | (channel << 8);
+    writel(CMD_ARPCM, MUNICH_VIRT(CMD));
+    set_current_state(TASK_UNINTERRUPTIBLE);
+    schedule_timeout(1);
+
+    spin_unlock_irqrestore(&mister_lock, flags);
+
+    stat = 0;
+    jiffs = jiffies;
+    while (!((stat = readl(MUNICH_VIRT(STAT))) & (STAT_PCMA | STAT_PCMF)) && time_before(jiffies, jiffs + HZ))
+    {
+       set_current_state(TASK_UNINTERRUPTIBLE);
+       schedule_timeout(1);
+    }
+
+    if (stat & STAT_PCMF)
+    {
+       printk(KERN_ERR "MUNICH_open: %s: %s channel %d off failed\n",
+              dev->name, board->devname, channel);
+       writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMF, MUNICH_VIRT(STAT));
+       MUNICH_close(dev);
+       return -EAGAIN;
+    }
+    else if (!(stat & STAT_PCMA))
+    {
+       printk(KERN_ERR "MUNICH_open: %s: %s channel %d off timeout\n",
+              dev->name, board->devname, channel);
+       MUNICH_close(dev);
+       return -EIO;
+    }
+
+    writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMA, MUNICH_VIRT(STAT));
+    //      printk("MUNICH_open: %s: succesful channel off took %ld jiffies\n", board->devname, jiffies-jiffs);
+
+    spin_lock_irqsave(&mister_lock, flags);
+
+    set_current_state(TASK_UNINTERRUPTIBLE);
+    schedule_timeout(1);
+
+    ccb->channel_spec[channel].ifc = 1;        /* 1 .. 'Idle/Flag change' interrupt letiltva   */
+    ccb->channel_spec[channel].fit = 1;
+    ccb->channel_spec[channel].nitbs = 1;
+    ccb->channel_spec[channel].itbs = 2;
+
+    /* TODOO: lehet hogy jo lenne igy, de utana kellene nezni hogy nem okoz-e fragmentaciot */
+    //      ccb->channel_spec[channel].itbs = 2 * number_of_timeslots;
+    //      printk("open: %s: number_of_timeslots: %d\n", dev->name, number_of_timeslots);
+
+    ccb->channel_spec[channel].mode = 3;       /* HDLC */
+    ccb->channel_spec[channel].ftda = virt_to_phys(&(hw->tx_desc));
+    ccb->channel_spec[channel].frda = virt_to_phys(&(hw->rx_desc[0]));
+
+    ccb->channel_spec[channel].ti = 1; /* Transmit init        */
+    ccb->channel_spec[channel].to = 0;
+    ccb->channel_spec[channel].ta = 1;
+
+    ccb->channel_spec[channel].th = 0;
+
+    ccb->channel_spec[channel].ri = 1; /* Receive init */
+    ccb->channel_spec[channel].ro = 0;
+    ccb->channel_spec[channel].ra = 1;
+
+    ccb->action_spec = CCB_ACTIONSPEC_ICO | (channel << 8);
+    writel(CMD_ARPCM, MUNICH_VIRT(CMD));       /* Start the channel init */
+    set_current_state(TASK_UNINTERRUPTIBLE);
+    schedule_timeout(1);
+
+    spin_unlock_irqrestore(&mister_lock, flags);
+
+    stat = 0;                  /* Wait for the action to complete max. 1 second */
+    jiffs = jiffies;
+    while (!((stat = readl(MUNICH_VIRT(STAT))) & (STAT_PCMA | STAT_PCMF)) && time_before(jiffies, jiffs + HZ))
+    {
+       set_current_state(TASK_UNINTERRUPTIBLE);
+        schedule_timeout(1);
+    }
+
+    if (stat & STAT_PCMF)
+    {
+       printk(KERN_ERR "MUNICH_open: %s: channel open ARPCM failed\n",
+              board->devname);
+       writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMF, MUNICH_VIRT(STAT));
+       MUNICH_close(dev);
+       return -EAGAIN;
+    }
+    else if (!(stat & STAT_PCMA))
+    {
+       printk(KERN_ERR "MUNICH_open: %s: channel open ARPCM timeout\n",
+              board->devname);
+       MUNICH_close(dev);
+       return -EIO;
+    }
+
+    writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMA, MUNICH_VIRT(STAT));
+    //      printk("MUNICH_open: %s: succesful channel open took %ld jiffies\n", board->devname, jiffies-jiffs);
+
+    spin_lock_irqsave(&mister_lock, flags);
+
+    ccb->channel_spec[channel].nitbs = 0;      /* once ITBS defined, these must be 0   */
+    ccb->channel_spec[channel].itbs = 0;
+
+    if (board->isx21)
+    {
+       init_timer(&board->modemline_timer);
+       board->modemline_timer.data = (unsigned long)board;
+       board->modemline_timer.function = pcicom_modemline;
+       board->modemline_timer.expires = jiffies + HZ;
+       add_timer((struct timer_list *)&board->modemline_timer);
+    }
+
+    /* It is done. Declare that we're open: */
+    hw->busy = 0;              /* It may be 1 if the frame at Tx init already ended, but it is not     */
+    /* a real problem: we compute hw->busy on every interrupt                       */
+    hw->rafutott = 0;
+    ch->init_status |= HW_OPEN;
+
+    /* Initialize line state: */
+    if (board->lineup)
+       ch->line_status |= LINE_UP;
+    else
+       ch->line_status &= ~LINE_UP;
+
+    /* Remove w attribute from /proc files associated to hw parameters:
+       no write when the device is open */
+
+    for (; procfile; procfile = procfile->next)
+       if (strcmp(procfile->name, FILENAME_BOARDNUM) == 0 ||
+           strcmp(procfile->name, FILENAME_TIMESLOTS) == 0)
+           procfile->mode = S_IFREG | 0444;
+
+    spin_unlock_irqrestore(&mister_lock, flags);
+
+    return 0;
+}
+
+/*
+ * Hardware close routine.
+ * Called by comx (upper) layer when the user wants to bring down the interface
+ * with ifconfig.
+ * We also call it from MUNICH_open, if the open fails.
+ * Brings down hardware, frees resources, stops receiver
+ * Returns 0 on OK, or standard error value on error.
+ */
+
+static int MUNICH_close(struct net_device *dev)
+{
+    struct comx_channel *ch = netdev_priv(dev);
+    struct slicecom_privdata *hw = ch->HW_privdata;
+    struct proc_dir_entry *procfile = ch->procdir->subdir;
+    munich_board_t *board;
+    munich_ccb_t *ccb;
+
+    u32 *bar1;
+    u32 timeslots = hw->timeslots;
+    int stat, i, channel = hw->channel;
+    unsigned long jiffs;
+
+    board = hw->boardnum + (ch->hardware == &pcicomhw ? pcicom_boards : slicecom_boards);
+
+    ccb = board->ccb;
+    bar1 = board->bar1;
+
+    if (board->isx21)
+       del_timer((struct timer_list *)&board->modemline_timer);
+
+    spin_lock_irqsave(&mister_lock, flags);
+
+    set_current_state(TASK_UNINTERRUPTIBLE);
+    schedule_timeout(1);
+
+    /* Disable receiver for the channel: */
+
+    for (i = 0; i < 32; i++)
+       if ((1 << i) & timeslots)
+       {
+           ccb->timeslot_spec[i].tti = 1;
+           ccb->timeslot_spec[i].txfillmask = 0;       /* just to be double-sure :) */
+
+           ccb->timeslot_spec[i].rti = 1;
+           ccb->timeslot_spec[i].rxfillmask = 0;
+       }
+
+    if (!board->isx21) rework_idle_channels(dev);
+
+    ccb->channel_spec[channel].ti = 0; /* Receive off, Transmit off */
+    ccb->channel_spec[channel].to = 1;
+    ccb->channel_spec[channel].ta = 0;
+    ccb->channel_spec[channel].th = 1;
+
+    ccb->channel_spec[channel].ri = 0;
+    ccb->channel_spec[channel].ro = 1;
+    ccb->channel_spec[channel].ra = 0;
+
+    board->twins[channel] = NULL;
+
+    ccb->action_spec = CCB_ACTIONSPEC_IN | (channel << 8);
+    writel(CMD_ARPCM, MUNICH_VIRT(CMD));
+    set_current_state(TASK_UNINTERRUPTIBLE);
+    schedule_timeout(1);
+
+    spin_unlock_irqrestore(&mister_lock, flags);
+
+    stat = 0;
+    jiffs = jiffies;
+    while (!((stat = readl(MUNICH_VIRT(STAT))) & (STAT_PCMA | STAT_PCMF)) && time_before(jiffies, jiffs + HZ))
+    {
+       set_current_state(TASK_UNINTERRUPTIBLE);
+       schedule_timeout(1);
+    }
+
+    if (stat & STAT_PCMF)
+    {
+       printk(KERN_ERR
+              "MUNICH_close: %s: FATAL: channel off ARPCM failed, not closing!\n",
+              dev->name);
+       writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMF, MUNICH_VIRT(STAT));
+       /* If we return success, the privdata (and the descriptor list) will be freed */
+       return -EIO;
+    }
+    else if (!(stat & STAT_PCMA))
+       printk(KERN_ERR "MUNICH_close: %s: channel off ARPCM timeout\n",
+              board->devname);
+
+    writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMA, MUNICH_VIRT(STAT));
+    //      printk("MUNICH_close: %s: channel off took %ld jiffies\n", board->devname, jiffies-jiffs);
+
+    spin_lock_irqsave(&mister_lock, flags);
+
+    if (board->use_count) board->use_count--;
+
+    if (!board->use_count)     /* we were the last user of the board */
+    {
+       printk("MUNICH_close: bringing down board %s\n", board->devname);
+
+       /* program down the board: */
+
+       writel(0x0000FF7F, MUNICH_VIRT(IMASK)); /* do not send any interrupts */
+       writel(0, MUNICH_VIRT(CMD));    /* stop the timer if someone started it */
+       writel(~0U, MUNICH_VIRT(STAT)); /* if an interrupt came between the cli()-sti(), quiet it */
+       if (ch->hardware == &pcicomhw)
+           writel(0x1400, MUNICH_VIRT(GPDATA));
+
+       /* Put the board into 'reset' state: */
+       pci_write_config_dword(board->pci, MUNICH_PCI_PCIRES, 0xe0000);
+
+       /* Free irq and other resources: */
+       if (board->irq)
+           free_irq(board->irq, (void *)board);        /* Ha nem inicializalta magat, akkor meg nincs irq */
+       board->irq = 0;
+
+       /* Free CCB and the interrupt queues */
+       if (board->ccb) kfree((void *)board->ccb);
+       if (board->tiq) kfree((void *)board->tiq);
+       if (board->riq) kfree((void *)board->riq);
+       if (board->piq) kfree((void *)board->piq);
+       board->ccb = NULL;
+       board->tiq = board->riq = board->piq = NULL;
+    }
+
+    /* Enable setting of hw parameters */
+    for (; procfile; procfile = procfile->next)
+       if (strcmp(procfile->name, FILENAME_BOARDNUM) == 0 ||
+           strcmp(procfile->name, FILENAME_TIMESLOTS) == 0)
+           procfile->mode = S_IFREG | 0644;
+
+    /* We're not open anymore */
+    ch->init_status &= ~HW_OPEN;
+
+    spin_unlock_irqrestore(&mister_lock, flags);
+
+    return 0;
+}
+
+/* 
+ * Give (textual) status information.
+ * The text it returns will be a part of what appears when the user does a
+ * cat /proc/comx/comx[n]/status 
+ * Don't write more than PAGESIZE.
+ * Return value: number of bytes written (length of the string, incl. 0)
+ */
+
+static int MUNICH_minden(struct net_device *dev, char *page)
+{
+    struct comx_channel *ch = netdev_priv(dev);
+    struct slicecom_privdata *hw = ch->HW_privdata;
+    munich_board_t *board;
+    struct net_device *devp;
+
+    u8 *lbi;
+    e1_stats_t *curr_int, *prev_int;
+    e1_stats_t last4, last96;  /* sum of last 4, resp. last 96 intervals               */
+    unsigned *sump,            /* running pointer for the sum data                     */
+     *p;                       /* running pointer for the interval data                */
+
+    int len = 0;
+    u8 frs0, frs1;
+    u8 fmr2;
+    int i, j;
+    u32 timeslots;
+
+    board = hw->boardnum + (ch->hardware == &pcicomhw ? pcicom_boards : slicecom_boards);
+
+    lbi = board->lbi;
+    curr_int = &board->intervals[board->current_interval];
+    prev_int =
+       &board->
+       intervals[(board->current_interval + SLICECOM_BOARD_INTERVALS_SIZE -
+                  1) % SLICECOM_BOARD_INTERVALS_SIZE];
+
+    if (!board->isx21)
+    {
+       frs0 = readb(lbi + FRS0);
+       fmr2 = readb(lbi + FMR2);
+       len += scnprintf(page + len, PAGE_SIZE - len, "Controller status:\n");
+       if (frs0 == 0)
+           len += scnprintf(page + len, PAGE_SIZE - len, "\tNo alarms\n");
+       else
+       {
+           if (frs0 & FRS0_LOS)
+                   len += scnprintf(page + len, PAGE_SIZE - len, "\tLoss Of Signal\n");
+           else
+           {
+               if (frs0 & FRS0_AIS)
+                   len += scnprintf(page + len, PAGE_SIZE - len,
+                                "\tAlarm Indication Signal\n");
+               else
+               {
+                   if (frs0 & FRS0_AUXP)
+                       len += scnprintf(page + len, PAGE_SIZE - len,
+                                    "\tAuxiliary Pattern Indication\n");
+                   if (frs0 & FRS0_LFA)
+                       len += scnprintf(page + len, PAGE_SIZE - len,
+                                    "\tLoss of Frame Alignment\n");
+                   else
+                   {
+                       if (frs0 & FRS0_RRA)
+                           len += scnprintf(page + len, PAGE_SIZE - len,
+                                        "\tReceive Remote Alarm\n");
+
+                       /* You can't set this framing with the /proc interface, but it  */
+                       /* may be good to have here this alarm if you set it by hand:   */
+
+                       if ((board->framing == SLICECOM_FRAMING_CRC4) &&
+                           (frs0 & FRS0_LMFA))
+                           len += scnprintf(page + len, PAGE_SIZE - len,
+                                        "\tLoss of CRC4 Multiframe Alignment\n");
+
+                       if (((fmr2 & 0xc0) == 0xc0) && (frs0 & FRS0_NMF))
+                           len += scnprintf(page + len, PAGE_SIZE - len,
+                                "\tNo CRC4 Multiframe alignment Found after 400 msec\n");
+                   }
+               }
+           }
+       }
+
+       frs1 = readb(lbi + FRS1);
+       if (FRS1_XLS & frs1)
+           len += scnprintf(page + len, PAGE_SIZE - len,
+                "\tTransmit Line Short\n");
+
+       /* debug Rx ring: DEL: - vagy meghagyni, de akkor legyen kicsit altalanosabb */
+    }
+
+    len += scnprintf(page + len, PAGE_SIZE - len, "Rx ring:\n");
+    len += scnprintf(page + len, PAGE_SIZE - len, "\trafutott: %d\n", hw->rafutott);
+    len += scnprintf(page + len, PAGE_SIZE - len,
+                "\tlastcheck: %ld, jiffies: %ld\n", board->lastcheck, jiffies);
+    len += scnprintf(page + len, PAGE_SIZE - len, "\tbase: %08x\n",
+       (u32) virt_to_phys(&hw->rx_desc[0]));
+    len += scnprintf(page + len, PAGE_SIZE - len, "\trx_desc_ptr: %d\n",
+                hw->rx_desc_ptr);
+    len += scnprintf(page + len, PAGE_SIZE - len, "\trx_desc_ptr: %08x\n",
+       (u32) virt_to_phys(&hw->rx_desc[hw->rx_desc_ptr]));
+    len += scnprintf(page + len, PAGE_SIZE - len, "\thw_curr_ptr: %08x\n",
+                board->ccb->current_rx_desc[hw->channel]);
+
+    for (i = 0; i < RX_DESC_MAX; i++)
+       len += scnprintf(page + len, PAGE_SIZE - len, "\t%08x %08x %08x %08x\n",
+                    *((u32 *) & hw->rx_desc[i] + 0),
+                    *((u32 *) & hw->rx_desc[i] + 1),
+                    *((u32 *) & hw->rx_desc[i] + 2),
+                    *((u32 *) & hw->rx_desc[i] + 3));
+
+    if (!board->isx21)
+    {
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "Interfaces using this board: (channel-group, interface, timeslots)\n");
+       for (i = 0; i < 32; i++)
+       {
+           devp = board->twins[i];
+           if (devp != NULL)
+           {
+               timeslots =
+                   ((struct slicecom_privdata *)((struct comx_channel *)devp->
+                                                 priv)->HW_privdata)->
+                   timeslots;
+               len += scnprintf(page + len, PAGE_SIZE - len, "\t%2d %s: ", i,
+                            devp->name);
+               for (j = 0; j < 32; j++)
+                   if ((1 << j) & timeslots)
+                       len += scnprintf(page + len, PAGE_SIZE - len, "%d ", j);
+               len += scnprintf(page + len, PAGE_SIZE - len, "\n");
+           }
+       }
+    }
+
+    len += scnprintf(page + len, PAGE_SIZE - len, "Interrupt work histogram:\n");
+    for (i = 0; i < MAX_WORK; i++)
+       len += scnprintf(page + len, PAGE_SIZE - len, "hist[%2d]: %8u%c", i,
+                    board->histogram[i], (i &&
+                                          ((i + 1) % 4 == 0 ||
+                                           i == MAX_WORK - 1)) ? '\n' : ' ');
+
+    len += scnprintf(page + len, PAGE_SIZE - len, "Tx ring histogram:\n");
+    for (i = 0; i < TX_DESC_MAX; i++)
+       len += scnprintf(page + len, PAGE_SIZE - len, "hist[%2d]: %8u%c", i,
+                    hw->tx_ring_hist[i], (i &&
+                                          ((i + 1) % 4 == 0 ||
+                                           i ==
+                                           TX_DESC_MAX - 1)) ? '\n' : ' ');
+
+    if (!board->isx21)
+    {
+
+       memset((void *)&last4, 0, sizeof(last4));
+       memset((void *)&last96, 0, sizeof(last96));
+
+       /* Calculate the sum of last 4 intervals: */
+
+       for (i = 1; i <= 4; i++)
+       {
+           p = (unsigned *)&board->intervals[(board->current_interval +
+                          SLICECOM_BOARD_INTERVALS_SIZE -
+                          i) % SLICECOM_BOARD_INTERVALS_SIZE];
+           sump = (unsigned *)&last4;
+           for (j = 0; j < (sizeof(e1_stats_t) / sizeof(unsigned)); j++)
+               sump[j] += p[j];
+       }
+
+       /* Calculate the sum of last 96 intervals: */
+
+       for (i = 1; i <= 96; i++)
+       {
+           p = (unsigned *)&board->intervals[(board->current_interval +
+                          SLICECOM_BOARD_INTERVALS_SIZE -
+                          i) % SLICECOM_BOARD_INTERVALS_SIZE];
+           sump = (unsigned *)&last96;
+           for (j = 0; j < (sizeof(e1_stats_t) / sizeof(unsigned)); j++)
+               sump[j] += p[j];
+       }
+
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "Data in current interval (%d seconds elapsed):\n",
+                    board->elapsed_seconds);
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "   %d Line Code Violations, %d Path Code Violations, %d E-Bit Errors\n",
+                    curr_int->line_code_violations,
+                    curr_int->path_code_violations, curr_int->e_bit_errors);
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "   %d Slip Secs, %d Fr Loss Secs, %d Line Err Secs, %d Degraded Mins\n",
+                    curr_int->slip_secs, curr_int->fr_loss_secs,
+                    curr_int->line_err_secs, curr_int->degraded_mins);
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "   %d Errored Secs, %d Bursty Err Secs, %d Severely Err Secs, %d Unavail Secs\n",
+                    curr_int->errored_secs, curr_int->bursty_err_secs,
+                    curr_int->severely_err_secs, curr_int->unavail_secs);
+
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "Data in Interval 1 (15 minutes):\n");
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "   %d Line Code Violations, %d Path Code Violations, %d E-Bit Errors\n",
+                    prev_int->line_code_violations,
+                    prev_int->path_code_violations, prev_int->e_bit_errors);
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "   %d Slip Secs, %d Fr Loss Secs, %d Line Err Secs, %d Degraded Mins\n",
+                    prev_int->slip_secs, prev_int->fr_loss_secs,
+                    prev_int->line_err_secs, prev_int->degraded_mins);
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "   %d Errored Secs, %d Bursty Err Secs, %d Severely Err Secs, %d Unavail Secs\n",
+                    prev_int->errored_secs, prev_int->bursty_err_secs,
+                    prev_int->severely_err_secs, prev_int->unavail_secs);
+
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "Data in last 4 intervals (1 hour):\n");
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "   %d Line Code Violations, %d Path Code Violations, %d E-Bit Errors\n",
+                    last4.line_code_violations, last4.path_code_violations,
+                    last4.e_bit_errors);
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "   %d Slip Secs, %d Fr Loss Secs, %d Line Err Secs, %d Degraded Mins\n",
+                    last4.slip_secs, last4.fr_loss_secs, last4.line_err_secs,
+                    last4.degraded_mins);
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "   %d Errored Secs, %d Bursty Err Secs, %d Severely Err Secs, %d Unavail Secs\n",
+                    last4.errored_secs, last4.bursty_err_secs,
+                    last4.severely_err_secs, last4.unavail_secs);
+
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "Data in last 96 intervals (24 hours):\n");
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "   %d Line Code Violations, %d Path Code Violations, %d E-Bit Errors\n",
+                    last96.line_code_violations, last96.path_code_violations,
+                    last96.e_bit_errors);
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "   %d Slip Secs, %d Fr Loss Secs, %d Line Err Secs, %d Degraded Mins\n",
+                    last96.slip_secs, last96.fr_loss_secs,
+                    last96.line_err_secs, last96.degraded_mins);
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "   %d Errored Secs, %d Bursty Err Secs, %d Severely Err Secs, %d Unavail Secs\n",
+                    last96.errored_secs, last96.bursty_err_secs,
+                    last96.severely_err_secs, last96.unavail_secs);
+
+    }
+
+//      len +=scnprintf( page + len, PAGE_SIZE - len, "Special events:\n" );
+//      len +=scnprintf( page + len, PAGE_SIZE - len, "\tstat_pri/missed: %u / %u\n", board->stat_pri_races, board->stat_pri_races_missed );
+//      len +=scnprintf( page + len, PAGE_SIZE - len, "\tstat_pti/missed: %u / %u\n", board->stat_pti_races, board->stat_pti_races_missed );
+    return len;
+}
+
+/*
+ * Memory dump function. Not used currently.
+ */
+static int BOARD_dump(struct net_device *dev)
+{
+    printk
+       ("BOARD_dump() requested. It is unimplemented, it should not be called\n");
+    return (-1);
+}
+
+/* 
+ * /proc file read function for the files registered by this module.
+ * This function is called by the procfs implementation when a user
+ * wants to read from a file registered by this module.
+ * page is the workspace, start should point to the real start of data,
+ * off is the file offset, data points to the file's proc_dir_entry
+ * structure.
+ * Returns the number of bytes copied to the request buffer.
+ */
+
+static int munich_read_proc(char *page, char **start, off_t off, int count,
+                           int *eof, void *data)
+{
+    struct proc_dir_entry *file = (struct proc_dir_entry *)data;
+    struct net_device *dev = file->parent->data;
+    struct comx_channel *ch = netdev_priv(dev);
+    struct slicecom_privdata *hw = ch->HW_privdata;
+    munich_board_t *board;
+
+    int len = 0, i;
+    u32 timeslots = hw->timeslots;
+
+    board = hw->boardnum + (ch->hardware == &pcicomhw ? pcicom_boards : slicecom_boards);
+
+    if (!strcmp(file->name, FILENAME_BOARDNUM))
+       len = sprintf(page, "%d\n", hw->boardnum);
+    else if (!strcmp(file->name, FILENAME_TIMESLOTS))
+    {
+       for (i = 0; i < 32; i++)
+           if ((1 << i) & timeslots)
+               len += scnprintf(page + len, PAGE_SIZE - len, "%d ", i);
+       len += scnprintf(page + len, PAGE_SIZE - len, "\n");
+    }
+    else if (!strcmp(file->name, FILENAME_FRAMING))
+    {
+       i = 0;
+       while (slicecom_framings[i].value &&
+              slicecom_framings[i].value != board->framing)
+           i++;
+       len += scnprintf(page + len, PAGE_SIZE - len, "%s\n",
+                    slicecom_framings[i].name);
+    }
+    else if (!strcmp(file->name, FILENAME_LINECODE))
+    {
+       i = 0;
+       while (slicecom_linecodes[i].value &&
+              slicecom_linecodes[i].value != board->linecode)
+           i++;
+       len += scnprintf(page + len, PAGE_SIZE - len, "%s\n",
+                    slicecom_linecodes[i].name);
+    }
+    else if (!strcmp(file->name, FILENAME_CLOCK_SOURCE))
+    {
+       i = 0;
+       while (slicecom_clock_sources[i].value &&
+              slicecom_clock_sources[i].value != board->clock_source)
+           i++;
+       len +=
+           scnprintf(page + len, PAGE_SIZE - len, "%s\n",
+                    slicecom_clock_sources[i].name);
+    }
+    else if (!strcmp(file->name, FILENAME_LOOPBACK))
+    {
+       i = 0;
+       while (slicecom_loopbacks[i].value &&
+              slicecom_loopbacks[i].value != board->loopback)
+           i++;
+       len += scnprintf(page + len, PAGE_SIZE - len, "%s\n",
+                    slicecom_loopbacks[i].name);
+    }
+    /* We set permissions to write-only for REG and LBIREG, but root can read them anyway: */
+    else if (!strcmp(file->name, FILENAME_REG))
+    {
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "%s: " FILENAME_REG ": write-only file\n", dev->name);
+    }
+    else if (!strcmp(file->name, FILENAME_LBIREG))
+    {
+       len += scnprintf(page + len, PAGE_SIZE - len,
+                    "%s: " FILENAME_LBIREG ": write-only file\n", dev->name);
+    }
+    else
+    {
+       printk("slicecom_read_proc: internal error, filename %s\n", file->name);
+       return -EBADF;
+    }
+    /* file handling administration: count eof status, offset, start address
+       and count: */
+
+    if (off >= len)
+    {
+       *eof = 1;
+       return 0;
+    }
+
+    *start = page + off;
+    if (count >= len - off)
+       *eof = 1;
+    return min((off_t) count, (off_t) len - off);
+}
+
+/* 
+ * Write function for /proc files registered by us.
+ * See the comment on read function above.
+ * Beware! buffer is in userspace!!!
+ * Returns the number of bytes written
+ */
+
+static int munich_write_proc(struct file *file, const char *buffer,
+                            u_long count, void *data)
+{
+    struct proc_dir_entry *entry = (struct proc_dir_entry *)data;
+    struct net_device *dev = (struct net_device *)entry->parent->data;
+    struct comx_channel *ch = netdev_priv(dev);
+    struct slicecom_privdata *hw = ch->HW_privdata;
+    munich_board_t *board;
+
+    unsigned long ts, tmp_boardnum;
+
+    u32 tmp_timeslots = 0;
+    char *page, *p;
+    int i;
+
+    board = hw->boardnum + (ch->hardware == &pcicomhw ? pcicom_boards : slicecom_boards);
+
+    /* Paranoia checking: */
+
+    if (PDE(file->f_dentry->d_inode) != entry)
+    {
+       printk(KERN_ERR "munich_write_proc: file <-> data internal error\n");
+       return -EIO;
+    }
+
+    /* Request tmp buffer */
+    if (!(page = (char *)__get_free_page(GFP_KERNEL)))
+       return -ENOMEM;
+
+    /* Copy user data and cut trailing \n */
+    if (copy_from_user(page, buffer, count = min(count, PAGE_SIZE))) {
+           free_page((unsigned long)page);
+           return -EFAULT;
+    }
+    if (*(page + count - 1) == '\n')
+       *(page + count - 1) = 0;
+    *(page + PAGE_SIZE - 1) = 0;
+
+    if (!strcmp(entry->name, FILENAME_BOARDNUM))
+    {
+       tmp_boardnum = simple_strtoul(page, NULL, 0);
+       if (0 <= tmp_boardnum && tmp_boardnum < MAX_BOARDS)
+           hw->boardnum = tmp_boardnum;
+       else
+       {
+           printk("%s: " FILENAME_BOARDNUM " range is 0...%d\n", dev->name,
+                  MAX_BOARDS - 1);
+           free_page((unsigned long)page);
+           return -EINVAL;
+       }
+    }
+    else if (!strcmp(entry->name, FILENAME_TIMESLOTS))
+    {
+       p = page;
+       while (*p)
+       {
+           if (isspace(*p))
+               p++;
+           else
+           {
+               ts = simple_strtoul(p, &p, 10); /* base = 10: Don't read 09 as an octal number */
+               /* ts = 0 ha nem tudta beolvasni a stringet, erre egy kicsit epitek itt: */
+               if (0 <= ts && ts < 32)
+               {
+                   tmp_timeslots |= (1 << ts);
+               }
+               else
+               {
+                   printk("%s: " FILENAME_TIMESLOTS " range is 1...31\n",
+                          dev->name);
+                   free_page((unsigned long)page);
+                   return -EINVAL;
+               }
+           }
+       }
+       hw->timeslots = tmp_timeslots;
+    }
+    else if (!strcmp(entry->name, FILENAME_FRAMING))
+    {
+       i = 0;
+       while (slicecom_framings[i].value &&
+              strncmp(slicecom_framings[i].name, page,
+                      strlen(slicecom_framings[i].name)))
+           i++;
+       if (!slicecom_framings[i].value)
+       {
+           printk("slicecom: %s: Invalid " FILENAME_FRAMING " '%s'\n",
+                  dev->name, page);
+           free_page((unsigned long)page);
+           return -EINVAL;
+       }
+       else
+       {                       /*
+                                * If somebody says:
+                                *      echo >boardnum  0
+                                *      echo >framing   no-crc4
+                                *      echo >boardnum  1
+                                * - when the framing was set, hw->boardnum was 0, so it would set the framing for board 0
+                                * Workaround: allow to set it only if interface is administrative UP
+                                */
+           if (netif_running(dev))
+               slicecom_set_framing(hw->boardnum, slicecom_framings[i].value);
+           else
+           {
+               printk("%s: " FILENAME_FRAMING
+                      " can not be set while the interface is DOWN\n",
+                      dev->name);
+               free_page((unsigned long)page);
+               return -EINVAL;
+           }
+       }
+    }
+    else if (!strcmp(entry->name, FILENAME_LINECODE))
+    {
+       i = 0;
+       while (slicecom_linecodes[i].value &&
+              strncmp(slicecom_linecodes[i].name, page,
+                      strlen(slicecom_linecodes[i].name)))
+           i++;
+       if (!slicecom_linecodes[i].value)
+       {
+           printk("slicecom: %s: Invalid " FILENAME_LINECODE " '%s'\n",
+                  dev->name, page);
+           free_page((unsigned long)page);
+           return -EINVAL;
+       }
+       else
+       {                       /*
+                                * Allow to set it only if interface is administrative UP,
+                                * for the same reason as FILENAME_FRAMING
+                                */
+           if (netif_running(dev))
+               slicecom_set_linecode(hw->boardnum,
+                                     slicecom_linecodes[i].value);
+           else
+           {
+               printk("%s: " FILENAME_LINECODE
+                      " can not be set while the interface is DOWN\n",
+                      dev->name);
+               free_page((unsigned long)page);
+               return -EINVAL;
+           }
+       }
+    }
+    else if (!strcmp(entry->name, FILENAME_CLOCK_SOURCE))
+    {
+       i = 0;
+       while (slicecom_clock_sources[i].value &&
+              strncmp(slicecom_clock_sources[i].name, page,
+                      strlen(slicecom_clock_sources[i].name)))
+           i++;
+       if (!slicecom_clock_sources[i].value)
+       {
+           printk("%s: Invalid " FILENAME_CLOCK_SOURCE " '%s'\n", dev->name,
+                  page);
+           free_page((unsigned long)page);
+           return -EINVAL;
+       }
+       else
+       {                       /*
+                                * Allow to set it only if interface is administrative UP,
+                                * for the same reason as FILENAME_FRAMING
+                                */
+           if (netif_running(dev))
+               slicecom_set_clock_source(hw->boardnum,
+                                         slicecom_clock_sources[i].value);
+           else
+           {
+               printk("%s: " FILENAME_CLOCK_SOURCE
+                      " can not be set while the interface is DOWN\n",
+                      dev->name);
+               free_page((unsigned long)page);
+               return -EINVAL;
+           }
+       }
+    }
+    else if (!strcmp(entry->name, FILENAME_LOOPBACK))
+    {
+       i = 0;
+       while (slicecom_loopbacks[i].value &&
+              strncmp(slicecom_loopbacks[i].name, page,
+                      strlen(slicecom_loopbacks[i].name)))
+           i++;
+       if (!slicecom_loopbacks[i].value)
+       {
+           printk("%s: Invalid " FILENAME_LOOPBACK " '%s'\n", dev->name, page);
+           free_page((unsigned long)page);
+           return -EINVAL;
+       }
+       else
+       {                       /*
+                                * Allow to set it only if interface is administrative UP,
+                                * for the same reason as FILENAME_FRAMING
+                                */
+           if (netif_running(dev))
+               slicecom_set_loopback(hw->boardnum,
+                                     slicecom_loopbacks[i].value);
+           else
+           {
+               printk("%s: " FILENAME_LOOPBACK
+                      " can not be set while the interface is DOWN\n",
+                      dev->name);
+               free_page((unsigned long)page);
+               return -EINVAL;
+           }
+       }
+    }
+    else if (!strcmp(entry->name, FILENAME_REG))
+    {                          /* DEL: 'reg' csak tmp */
+       char *p;
+       u32 *bar1 = board->bar1;
+
+       reg = simple_strtoul(page, &p, 0);
+       reg_ertek = simple_strtoul(p + 1, NULL, 0);
+
+       if (reg < 0x100)
+       {
+           printk("reg(0x%02x) := 0x%08x  jiff: %lu\n", reg, reg_ertek, jiffies);
+           writel(reg_ertek, MUNICH_VIRT(reg >> 2));
+       }
+       else
+       {
+           printk("reg(0x%02x) is 0x%08x  jiff: %lu\n", reg - 0x100,
+                  readl(MUNICH_VIRT((reg - 0x100) >> 2)), jiffies);
+       }
+    }
+    else if (!strcmp(entry->name, FILENAME_LBIREG))
+    {                          /* DEL: 'lbireg' csak tmp */
+       char *p;
+       u8 *lbi = board->lbi;
+
+       lbireg = simple_strtoul(page, &p, 0);
+       lbireg_ertek = simple_strtoul(p + 1, NULL, 0);
+
+       if (lbireg < 0x100)
+       {
+           printk("lbireg(0x%02x) := 0x%02x  jiff: %lu\n", lbireg,
+                  lbireg_ertek, jiffies);
+           writeb(lbireg_ertek, lbi + lbireg);
+       }
+       else
+           printk("lbireg(0x%02x) is 0x%02x  jiff: %lu\n", lbireg - 0x100,
+                  readb(lbi + lbireg - 0x100), jiffies);
+    }
+    else
+    {
+       printk(KERN_ERR "munich_write_proc: internal error, filename %s\n",
+              entry->name);
+       free_page((unsigned long)page);
+       return -EBADF;
+    }
+
+    /* Don't forget to free the workspace */
+    free_page((unsigned long)page);
+    return count;
+}
+
+/* 
+ * Boardtype init function.
+ * Called by the comx (upper) layer, when you set boardtype.
+ * Allocates resources associated to using munich board for this device,
+ * initializes ch_struct pointers etc.
+ * Returns 0 on success and standard error codes on error.
+ */
+
+static int init_escape(struct comx_channel *ch)
+{
+    kfree(ch->HW_privdata);
+    return -EIO;
+}
+
+static int BOARD_init(struct net_device *dev)
+{
+    struct comx_channel *ch = netdev_priv(dev);
+    struct slicecom_privdata *hw;
+    struct proc_dir_entry *new_file;
+
+    /* Alloc data for private structure */
+    if ((ch->HW_privdata =
+       kmalloc(sizeof(struct slicecom_privdata), GFP_KERNEL)) == NULL)
+        return -ENOMEM;
+        
+    memset(hw = ch->HW_privdata, 0, sizeof(struct slicecom_privdata));
+
+    /* Register /proc files */
+    if ((new_file = create_proc_entry(FILENAME_BOARDNUM, S_IFREG | 0644,
+                          ch->procdir)) == NULL)
+       return init_escape(ch);
+    new_file->data = (void *)new_file;
+    new_file->read_proc = &munich_read_proc;
+    new_file->write_proc = &munich_write_proc;
+//      new_file->proc_iops = &comx_normal_inode_ops;
+    new_file->nlink = 1;
+
+    if (ch->hardware == &slicecomhw)
+    {
+       if ((new_file = create_proc_entry(FILENAME_TIMESLOTS, S_IFREG | 0644,
+                              ch->procdir)) == NULL)
+           return init_escape(ch);
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &munich_read_proc;
+       new_file->write_proc = &munich_write_proc;
+//              new_file->proc_iops = &comx_normal_inode_ops;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_FRAMING, S_IFREG | 0644,
+                              ch->procdir)) == NULL)
+           return init_escape(ch);
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &munich_read_proc;
+       new_file->write_proc = &munich_write_proc;
+//              new_file->proc_iops = &comx_normal_inode_ops;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_LINECODE, S_IFREG | 0644,
+                              ch->procdir)) == NULL)
+           return init_escape(ch);
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &munich_read_proc;
+       new_file->write_proc = &munich_write_proc;
+//              new_file->proc_iops = &comx_normal_inode_ops;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_CLOCK_SOURCE, S_IFREG | 0644,
+                              ch->procdir)) == NULL)
+           return init_escape(ch);
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &munich_read_proc;
+       new_file->write_proc = &munich_write_proc;
+//              new_file->proc_iops = &comx_normal_inode_ops;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_LOOPBACK, S_IFREG | 0644,
+                              ch->procdir)) == NULL)
+           return init_escape(ch);
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &munich_read_proc;
+       new_file->write_proc = &munich_write_proc;
+//              new_file->proc_iops = &comx_normal_inode_ops;
+       new_file->nlink = 1;
+    }
+
+    /* DEL: ez itt csak fejlesztesi celokra!! */
+    if ((new_file = create_proc_entry(FILENAME_REG, S_IFREG | 0200, ch->procdir)) == NULL)
+       return init_escape(ch);
+    new_file->data = (void *)new_file;
+    new_file->read_proc = &munich_read_proc;
+    new_file->write_proc = &munich_write_proc;
+//      new_file->proc_iops = &comx_normal_inode_ops;
+    new_file->nlink = 1;
+
+    /* DEL: ez itt csak fejlesztesi celokra!! */
+    if ((new_file = create_proc_entry(FILENAME_LBIREG, S_IFREG | 0200,
+                          ch->procdir)) == NULL)
+       return init_escape(ch);
+    new_file->data = (void *)new_file;
+    new_file->read_proc = &munich_read_proc;
+    new_file->write_proc = &munich_write_proc;
+//      new_file->proc_iops = &comx_normal_inode_ops;
+    new_file->nlink = 1;
+
+    /* Fill in ch_struct hw specific pointers: */
+
+    ch->HW_txe = MUNICH_txe;
+    ch->HW_open = MUNICH_open;
+    ch->HW_close = MUNICH_close;
+    ch->HW_send_packet = MUNICH_send_packet;
+#ifndef COMX_NEW
+    ch->HW_minden = MUNICH_minden;
+#else
+    ch->HW_statistics = MUNICH_minden;
+#endif
+
+    hw->boardnum = SLICECOM_BOARDNUM_DEFAULT;
+    hw->timeslots = ch->hardware == &pcicomhw ?  0xffffffff : 2;
+
+    /* O.K. Count one more user on this module */
+    MOD_INC_USE_COUNT;
+    return 0;
+}
+
+/* 
+ * Boardtype exit function.
+ * Called by the comx (upper) layer, when you clear boardtype from munich.
+ * Frees resources associated to using munich board for this device,
+ * resets ch_struct pointers etc.
+ */
+static int BOARD_exit(struct net_device *dev)
+{
+    struct comx_channel *ch = netdev_priv(dev);
+
+    /* Free private data area */
+//    board = hw->boardnum + (ch->hardware == &pcicomhw ? pcicom_boards : slicecom_boards);
+
+    kfree(ch->HW_privdata);
+    /* Remove /proc files */
+    remove_proc_entry(FILENAME_BOARDNUM, ch->procdir);
+    if (ch->hardware == &slicecomhw)
+    {
+       remove_proc_entry(FILENAME_TIMESLOTS, ch->procdir);
+       remove_proc_entry(FILENAME_FRAMING, ch->procdir);
+       remove_proc_entry(FILENAME_LINECODE, ch->procdir);
+       remove_proc_entry(FILENAME_CLOCK_SOURCE, ch->procdir);
+       remove_proc_entry(FILENAME_LOOPBACK, ch->procdir);
+    }
+    remove_proc_entry(FILENAME_REG, ch->procdir);
+    remove_proc_entry(FILENAME_LBIREG, ch->procdir);
+
+    /* Minus one user for the module accounting */
+    MOD_DEC_USE_COUNT;
+    return 0;
+}
+
+static struct comx_hardware slicecomhw =
+{
+    "slicecom",
+#ifdef COMX_NEW
+    VERSION,
+#endif
+    BOARD_init,
+    BOARD_exit,
+    BOARD_dump,
+    NULL
+};
+
+static struct comx_hardware pcicomhw =
+{
+    "pcicom",
+#ifdef COMX_NEW
+    VERSION,
+#endif
+    BOARD_init,
+    BOARD_exit,
+    BOARD_dump,
+    NULL
+};
+
+/* Module management */
+
+static int __init init_mister(void)
+{
+    printk(VERSIONSTR);
+    comx_register_hardware(&slicecomhw);
+    comx_register_hardware(&pcicomhw);
+    return munich_probe();
+}
+
+static void __exit cleanup_mister(void)
+{
+    int i;
+
+    comx_unregister_hardware("slicecom");
+    comx_unregister_hardware("pcicom");
+
+    for (i = 0; i < MAX_BOARDS; i++)
+    {
+       if (slicecom_boards[i].bar1)
+           iounmap((void *)slicecom_boards[i].bar1);
+       if (slicecom_boards[i].lbi)
+           iounmap((void *)slicecom_boards[i].lbi);
+       if (pcicom_boards[i].bar1)
+           iounmap((void *)pcicom_boards[i].bar1);
+       if (pcicom_boards[i].lbi)
+           iounmap((void *)pcicom_boards[i].lbi);
+    }
+}
+
+module_init(init_mister);
+module_exit(cleanup_mister);
diff --git a/drivers/net/wan/comx-proto-fr.c b/drivers/net/wan/comx-proto-fr.c
new file mode 100644 (file)
index 0000000..c955136
--- /dev/null
@@ -0,0 +1,1014 @@
+/*
+ * Frame-relay protocol module for the COMX driver 
+ * for Linux 2.2.X
+ *
+ * Original author: Tivadar Szemethy <tiv@itc.hu>
+ * Maintainer: Gergely Madarasz <gorgo@itc.hu>
+ *
+ * Copyright (C) 1998-1999 ITConsult-Pro Co. <info@itc.hu>
+ * 
+ * Contributors:
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br> (0.73)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Version 0.70 (99/06/14):
+ *             - cleaned up the source code a bit
+ *             - ported back to kernel, now works as builtin code 
+ *
+ * Version 0.71 (99/06/25):
+ *             - use skb priorities and queues for sending keepalive
+ *             - use device queues for slave->master data transmit
+ *             - set IFF_RUNNING only line protocol up
+ *             - fixes on slave device flags
+ * 
+ * Version 0.72 (99/07/09):
+ *             - handle slave tbusy with master tbusy (should be fixed)
+ *             - fix the keepalive timer addition/deletion
+ *
+ * Version 0.73 (00/08/15)
+ *             - resource release on failure at fr_master_init and
+ *               fr_slave_init                   
+ */
+
+#define VERSION "0.73"
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/jiffies.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <linux/pkt_sched.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+
+#include "comx.h"
+#include "comxhw.h"
+
+MODULE_AUTHOR("Author: Tivadar Szemethy <tiv@itc.hu>");
+MODULE_DESCRIPTION("Frame Relay protocol implementation for the COMX drivers"
+       "for Linux kernel 2.4.X");
+MODULE_LICENSE("GPL");
+
+#define        FRAD_UI         0x03
+#define        NLPID_IP        0xcc
+#define        NLPID_Q933_LMI  0x08
+#define        NLPID_CISCO_LMI 0x09    
+#define Q933_ENQ       0x75
+#define        Q933_LINESTAT   0x51
+#define        Q933_COUNTERS   0x53
+
+#define        MAXALIVECNT     3               /* No. of failures */
+
+struct fr_data {
+       u16     dlci;
+       struct  net_device *master;
+       char    keepa_pend;
+       char    keepa_freq;
+       char    keepalivecnt, keeploopcnt;
+       struct  timer_list keepa_timer;
+       u8      local_cnt, remote_cnt;
+};
+
+static struct comx_protocol fr_master_protocol;
+static struct comx_protocol fr_slave_protocol;
+static struct comx_hardware fr_dlci;
+
+static void fr_keepalive_send(struct net_device *dev) 
+{
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr = ch->LINE_privdata;
+       struct sk_buff *skb;
+       u8 *fr_packet;
+       
+       skb=alloc_skb(dev->hard_header_len + 13, GFP_ATOMIC);
+       
+       if(skb==NULL)
+               return;
+               
+        skb_reserve(skb, dev->hard_header_len);
+        
+        fr_packet=(u8*)skb_put(skb, 13);
+                 
+       fr_packet[0] = (fr->dlci & (1024 - 15)) >> 2;
+       fr_packet[1] = (fr->dlci & 15) << 4 | 1;        // EA bit 1
+       fr_packet[2] = FRAD_UI;
+       fr_packet[3] = NLPID_Q933_LMI;
+       fr_packet[4] = 0;
+       fr_packet[5] = Q933_ENQ;
+       fr_packet[6] = Q933_LINESTAT;
+       fr_packet[7] = 0x01;
+       fr_packet[8] = 0x01;
+       fr_packet[9] = Q933_COUNTERS;
+       fr_packet[10] = 0x02;
+       fr_packet[11] = ++fr->local_cnt;
+       fr_packet[12] = fr->remote_cnt;
+
+       skb->dev = dev;
+       skb->priority = TC_PRIO_CONTROL;
+       dev_queue_xmit(skb);
+}
+
+static void fr_keepalive_timerfun(unsigned long d) 
+{
+       struct net_device *dev = (struct net_device *)d;
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr = ch->LINE_privdata;
+       struct proc_dir_entry *dir = ch->procdir->parent->subdir;
+       struct comx_channel *sch;
+       struct fr_data *sfr;
+       struct net_device *sdev;
+
+       if (ch->init_status & LINE_OPEN) {
+               if (fr->keepalivecnt == MAXALIVECNT) {
+                       comx_status(dev, ch->line_status & ~PROTO_UP);
+                       dev->flags &= ~IFF_RUNNING;
+                       for (; dir ; dir = dir->next) {
+                               if(!S_ISDIR(dir->mode)) {
+                                   continue;
+                               }
+       
+                               if ((sdev = dir->data) && (sch = sdev->priv) && 
+                                   (sdev->type == ARPHRD_DLCI) && 
+                                   (sfr = sch->LINE_privdata) 
+                                   && (sfr->master == dev) && 
+                                   (sdev->flags & IFF_UP)) {
+                                       sdev->flags &= ~IFF_RUNNING;
+                                       comx_status(sdev, 
+                                               sch->line_status & ~PROTO_UP);
+                               }
+                       }
+               }
+               if (fr->keepalivecnt <= MAXALIVECNT) {
+                       ++fr->keepalivecnt;
+               }
+               fr_keepalive_send(dev);
+       }
+       mod_timer(&fr->keepa_timer, jiffies + HZ * fr->keepa_freq);
+}
+
+static void fr_rx_lmi(struct net_device *dev, struct sk_buff *skb, 
+       u16 dlci, u8 nlpid) 
+{
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr = ch->LINE_privdata;
+       struct proc_dir_entry *dir = ch->procdir->parent->subdir;
+       struct comx_channel *sch;
+       struct fr_data *sfr;
+       struct net_device *sdev;
+
+       if (dlci != fr->dlci || nlpid != NLPID_Q933_LMI || !fr->keepa_freq) {
+               return;
+       }
+
+       fr->remote_cnt = skb->data[7];
+       if (skb->data[8] == fr->local_cnt) { // keepalive UP!
+               fr->keepalivecnt = 0;
+               if ((ch->line_status & LINE_UP) && 
+                   !(ch->line_status & PROTO_UP)) {
+                       comx_status(dev, ch->line_status |= PROTO_UP);
+                       dev->flags |= IFF_RUNNING;
+                       for (; dir ; dir = dir->next) {
+                               if(!S_ISDIR(dir->mode)) {
+                                   continue;
+                               }
+       
+                               if ((sdev = dir->data) && (sch = sdev->priv) && 
+                                   (sdev->type == ARPHRD_DLCI) && 
+                                   (sfr = sch->LINE_privdata) 
+                                   && (sfr->master == dev) && 
+                                   (sdev->flags & IFF_UP)) {
+                                       sdev->flags |= IFF_RUNNING;
+                                       comx_status(sdev, 
+                                               sch->line_status | PROTO_UP);
+                               }
+                       }
+               }
+       }
+}
+
+static void fr_set_keepalive(struct net_device *dev, int keepa) 
+{
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr = ch->LINE_privdata;
+
+       if (!keepa && fr->keepa_freq) { // switch off
+               fr->keepa_freq = 0;
+               if (ch->line_status & LINE_UP) {
+                       comx_status(dev, ch->line_status | PROTO_UP);
+                       dev->flags |= IFF_RUNNING;
+                       del_timer(&fr->keepa_timer);
+               }
+               return;
+       }
+
+       if (keepa) { // bekapcs
+               if(fr->keepa_freq && (ch->line_status & LINE_UP)) {
+                       del_timer(&fr->keepa_timer);
+               }
+               fr->keepa_freq = keepa;
+               fr->local_cnt = fr->remote_cnt = 0;
+               init_timer(&fr->keepa_timer);
+               fr->keepa_timer.expires = jiffies + HZ;
+               fr->keepa_timer.function = fr_keepalive_timerfun;
+               fr->keepa_timer.data = (unsigned long)dev;
+               ch->line_status &= ~(PROTO_UP | PROTO_LOOP);
+               dev->flags &= ~IFF_RUNNING;
+               comx_status(dev, ch->line_status);
+               if(ch->line_status & LINE_UP) {
+                       add_timer(&fr->keepa_timer);
+               }
+       }
+}
+
+static void fr_rx(struct net_device *dev, struct sk_buff *skb) 
+{
+       struct comx_channel *ch = dev->priv;
+       struct proc_dir_entry *dir = ch->procdir->parent->subdir;
+       struct net_device *sdev = dev;
+       struct comx_channel *sch;
+       struct fr_data *sfr;
+       u16 dlci;
+       u8 nlpid;
+
+       if(skb->len <= 4 || skb->data[2] != FRAD_UI) {
+               kfree_skb(skb);
+               return;
+       }
+
+       /* Itt majd ki kell talalni, melyik slave kapja a csomagot */
+       dlci = ((skb->data[0] & 0xfc) << 2) | ((skb->data[1] & 0xf0) >> 4);
+       if ((nlpid = skb->data[3]) == 0) { // Optional padding 
+               nlpid = skb->data[4];
+               skb_pull(skb, 1);
+       }
+       skb_pull(skb, 4);       /* DLCI and header throw away */
+
+       if (ch->debug_flags & DEBUG_COMX_DLCI) {
+               comx_debug(dev, "Frame received, DLCI: %d, NLPID: 0x%02x\n", 
+                       dlci, nlpid);
+               comx_debug_skb(dev, skb, "Contents");
+       }
+
+       /* Megkeressuk, kihez tartozik */
+       for (; dir ; dir = dir->next) {
+               if(!S_ISDIR(dir->mode)) {
+                       continue;
+               }
+               if ((sdev = dir->data) && (sch = sdev->priv) && 
+                   (sdev->type == ARPHRD_DLCI) && (sfr = sch->LINE_privdata) &&
+                   (sfr->master == dev) && (sfr->dlci == dlci)) {
+                       skb->dev = sdev;        
+                       if (ch->debug_flags & DEBUG_COMX_DLCI) {
+                               comx_debug(dev, "Passing it to %s\n",sdev->name);
+                       }
+                       if (dev != sdev) {
+                               sch->stats.rx_packets++;
+                               sch->stats.rx_bytes += skb->len;
+                       }
+                       break;
+               }
+       }
+       switch(nlpid) {
+               case NLPID_IP:
+                       skb->protocol = htons(ETH_P_IP);
+                       skb->mac.raw = skb->data;
+                       comx_rx(sdev, skb);
+                       break;
+               case NLPID_Q933_LMI:
+                       fr_rx_lmi(dev, skb, dlci, nlpid);
+               default:
+                       kfree_skb(skb);
+                       break;
+       }
+}
+
+static int fr_tx(struct net_device *dev) 
+{
+       struct comx_channel *ch = dev->priv;
+       struct proc_dir_entry *dir = ch->procdir->parent->subdir;
+       struct net_device *sdev;
+       struct comx_channel *sch;
+       struct fr_data *sfr;
+       int cnt = 1;
+
+       /* Ha minden igaz, 2 helyen fog allni a tbusy: a masternel, 
+          es annal a slave-nel aki eppen kuldott.
+          Egy helyen akkor all, ha a master kuldott.
+          Ez megint jo lesz majd, ha utemezni akarunk */
+          
+       /* This should be fixed, the slave tbusy should be set when 
+          the masters queue is full and reset when not */
+
+       for (; dir ; dir = dir->next) {
+               if(!S_ISDIR(dir->mode)) {
+                   continue;
+               }
+               if ((sdev = dir->data) && (sch = sdev->priv) && 
+                   (sdev->type == ARPHRD_DLCI) && (sfr = sch->LINE_privdata) &&
+                   (sfr->master == dev) && (netif_queue_stopped(sdev))) {
+                       netif_wake_queue(sdev);
+                       cnt++;
+               }
+       }
+
+       netif_wake_queue(dev);
+       return 0;
+}
+
+static void fr_status(struct net_device *dev, unsigned short status)
+{
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr = ch->LINE_privdata;
+       struct proc_dir_entry *dir = ch->procdir->parent->subdir;
+       struct net_device *sdev;
+       struct comx_channel *sch;
+       struct fr_data *sfr;
+
+       if (status & LINE_UP) {
+               if (!fr->keepa_freq) {
+                       status |= PROTO_UP;
+               }
+       } else {
+               status &= ~(PROTO_UP | PROTO_LOOP);
+       }
+
+       if (dev == fr->master && fr->keepa_freq) {
+               if (status & LINE_UP) {
+                       fr->keepa_timer.expires = jiffies + HZ;
+                       add_timer(&fr->keepa_timer);
+                       fr->keepalivecnt = MAXALIVECNT + 1;
+                       fr->keeploopcnt = 0;
+               } else {
+                       del_timer(&fr->keepa_timer);
+               }
+       }
+               
+       /* Itt a status valtozast vegig kell vinni az osszes slave-n */
+       for (; dir ; dir = dir->next) {
+               if(!S_ISDIR(dir->mode)) {
+                   continue;
+               }
+       
+               if ((sdev = dir->data) && (sch = sdev->priv) && 
+                   (sdev->type == ARPHRD_FRAD || sdev->type == ARPHRD_DLCI) && 
+                   (sfr = sch->LINE_privdata) && (sfr->master == dev)) {
+                       if(status & LINE_UP) {
+                               netif_wake_queue(sdev);
+                       }
+                       comx_status(sdev, status);
+                       if(status & (PROTO_UP | PROTO_LOOP)) {
+                               dev->flags |= IFF_RUNNING;
+                       } else {
+                               dev->flags &= ~IFF_RUNNING;
+                       }
+               }
+       }
+}
+
+static int fr_open(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr = ch->LINE_privdata;
+       struct proc_dir_entry *comxdir = ch->procdir;
+       struct comx_channel *mch;
+
+       if (!(ch->init_status & HW_OPEN)) {
+               return -ENODEV;
+       }
+
+       if ((ch->hardware == &fr_dlci && ch->protocol != &fr_slave_protocol) ||
+           (ch->protocol == &fr_slave_protocol && ch->hardware != &fr_dlci)) {
+               printk(KERN_ERR "Trying to open an improperly set FR interface, giving up\n");
+               return -EINVAL;
+       }
+
+       if (!fr->master) {
+               return -ENODEV;
+       }
+       mch = fr->master->priv;
+       if (fr->master != dev && (!(mch->init_status & LINE_OPEN) 
+          || (mch->protocol != &fr_master_protocol))) {
+               printk(KERN_ERR "Master %s is inactive, or incorrectly set up, "
+                       "unable to open %s\n", fr->master->name, dev->name);
+               return -ENODEV;
+       }
+
+       ch->init_status |= LINE_OPEN;
+       ch->line_status &= ~(PROTO_UP | PROTO_LOOP);
+       dev->flags &= ~IFF_RUNNING;
+
+       if (fr->master == dev) {
+               if (fr->keepa_freq) {
+                       fr->keepa_timer.function = fr_keepalive_timerfun;
+                       fr->keepa_timer.data = (unsigned long)dev;
+                       add_timer(&fr->keepa_timer);
+               } else {
+                       if (ch->line_status & LINE_UP) {
+                               ch->line_status |= PROTO_UP;
+                               dev->flags |= IFF_RUNNING;
+                       }
+               }
+       } else {
+               ch->line_status = mch->line_status;
+               if(fr->master->flags & IFF_RUNNING) {
+                       dev->flags |= IFF_RUNNING;
+               }
+       }
+
+       for (; comxdir ; comxdir = comxdir->next) {
+               if (strcmp(comxdir->name, FILENAME_DLCI) == 0 ||
+                  strcmp(comxdir->name, FILENAME_MASTER) == 0 ||
+                  strcmp(comxdir->name, FILENAME_KEEPALIVE) == 0) {
+                       comxdir->mode = S_IFREG | 0444;
+               }
+       }
+//     comx_status(dev, ch->line_status);
+       return 0;
+}
+
+static int fr_close(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr = ch->LINE_privdata;
+       struct proc_dir_entry *comxdir = ch->procdir;
+
+       if (fr->master == dev) { // Ha master 
+               struct proc_dir_entry *dir = ch->procdir->parent->subdir;
+               struct net_device *sdev = dev;
+               struct comx_channel *sch;
+               struct fr_data *sfr;
+
+               if (!(ch->init_status & HW_OPEN)) {
+                       return -ENODEV;
+               }
+
+               if (fr->keepa_freq) {
+                       del_timer(&fr->keepa_timer);
+               }
+               
+               for (; dir ; dir = dir->next) {
+                       if(!S_ISDIR(dir->mode)) {
+                               continue;
+                       }
+                       if ((sdev = dir->data) && (sch = sdev->priv) && 
+                           (sdev->type == ARPHRD_DLCI) && 
+                           (sfr = sch->LINE_privdata) &&
+                           (sfr->master == dev) && 
+                           (sch->init_status & LINE_OPEN)) {
+                               dev_close(sdev);
+                       }
+               }
+       }
+
+       ch->init_status &= ~LINE_OPEN;
+       ch->line_status &= ~(PROTO_UP | PROTO_LOOP);
+       dev->flags &= ~IFF_RUNNING;
+
+       for (; comxdir ; comxdir = comxdir->next) {
+               if (strcmp(comxdir->name, FILENAME_DLCI) == 0 ||
+                   strcmp(comxdir->name, FILENAME_MASTER) == 0 ||
+                   strcmp(comxdir->name, FILENAME_KEEPALIVE) == 0) {
+                       comxdir->mode = S_IFREG | 0444;
+               }
+       }
+
+       return 0;
+}
+
+static int fr_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct comx_channel *sch, *mch;
+       struct fr_data *fr = ch->LINE_privdata;
+       struct fr_data *sfr;
+       struct net_device *sdev;
+       struct proc_dir_entry *dir = ch->procdir->parent->subdir;
+
+       if (!fr->master) {
+               printk(KERN_ERR "BUG: fr_xmit without a master!!! dev: %s\n", dev->name);
+               return 0;
+       }
+
+       mch = fr->master->priv;
+
+       /* Ennek majd a slave utemezeskor lesz igazan jelentosege */
+       if (ch->debug_flags & DEBUG_COMX_DLCI) {
+               comx_debug_skb(dev, skb, "Sending frame");
+       }
+
+       if (dev != fr->master) {
+               struct sk_buff *newskb=skb_clone(skb, GFP_ATOMIC);
+               if (!newskb)
+                       return -ENOMEM;
+               newskb->dev=fr->master;
+               dev_queue_xmit(newskb);
+               ch->stats.tx_bytes += skb->len;
+               ch->stats.tx_packets++;
+               dev_kfree_skb(skb);
+       } else {
+               netif_stop_queue(dev);
+               for (; dir ; dir = dir->next) {
+                       if(!S_ISDIR(dir->mode)) {
+                           continue;
+                       }
+                       if ((sdev = dir->data) && (sch = sdev->priv) && 
+                           (sdev->type == ARPHRD_DLCI) && (sfr = sch->LINE_privdata) &&
+                           (sfr->master == dev) && (netif_queue_stopped(sdev))) {
+                               netif_stop_queue(sdev);
+                       }
+               }
+                       
+               switch(mch->HW_send_packet(dev, skb)) {
+                       case FRAME_QUEUED:
+                               netif_wake_queue(dev);
+                               break;
+                       case FRAME_ACCEPTED:
+                       case FRAME_DROPPED:
+                               break;
+                       case FRAME_ERROR:
+                               printk(KERN_ERR "%s: Transmit frame error (len %d)\n", 
+                                       dev->name, skb->len);
+                               break;
+               }
+       }
+       return 0;
+}
+
+static int fr_header(struct sk_buff *skb, struct net_device *dev, 
+       unsigned short type, void *daddr, void *saddr, unsigned len) 
+{
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr = ch->LINE_privdata;
+
+       skb_push(skb, dev->hard_header_len);      
+       /* Put in DLCI */
+       skb->data[0] = (fr->dlci & (1024 - 15)) >> 2;
+       skb->data[1] = (fr->dlci & 15) << 4 | 1;        // EA bit 1
+       skb->data[2] = FRAD_UI;
+       skb->data[3] = NLPID_IP;
+
+       return dev->hard_header_len;  
+}
+
+static int fr_statistics(struct net_device *dev, char *page) 
+{
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr = ch->LINE_privdata;
+       int len = 0;
+
+       if (fr->master == dev) {
+               struct proc_dir_entry *dir = ch->procdir->parent->subdir;
+               struct net_device *sdev;
+               struct comx_channel *sch;
+               struct fr_data *sfr;
+               int slaves = 0;
+
+               len += sprintf(page + len, 
+                       "This is a Frame Relay master device\nSlaves: ");
+               for (; dir ; dir = dir->next) {
+                       if(!S_ISDIR(dir->mode)) {
+                               continue;
+                       }
+                       if ((sdev = dir->data) && (sch = sdev->priv) && 
+                           (sdev->type == ARPHRD_DLCI) &&
+                           (sfr = sch->LINE_privdata) && 
+                           (sfr->master == dev) && (sdev != dev)) {
+                               slaves++;
+                               len += sprintf(page + len, "%s ", sdev->name);
+                       }
+               }
+               len += sprintf(page + len, "%s\n", slaves ? "" : "(none)");
+               if (fr->keepa_freq) {
+                       len += sprintf(page + len, "Line keepalive (value %d) "
+                               "status %s [%d]\n", fr->keepa_freq, 
+                               ch->line_status & PROTO_LOOP ? "LOOP" :
+                               ch->line_status & PROTO_UP ? "UP" : "DOWN", 
+                               fr->keepalivecnt);
+               } else {
+                       len += sprintf(page + len, "Line keepalive protocol "
+                               "is not set\n");
+               }
+       } else {                // if slave
+               len += sprintf(page + len, 
+                       "This is a Frame Relay slave device, master: %s\n",
+                       fr->master ? fr->master->name : "(not set)");
+       }
+       return len;
+}
+
+static int fr_read_proc(char *page, char **start, off_t off, int count,
+       int *eof, void *data)
+{
+       struct proc_dir_entry *file = (struct proc_dir_entry *)data;
+       struct net_device *dev = file->parent->data;
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr = NULL;
+       int len = 0;
+
+       if (ch) {
+               fr = ch->LINE_privdata;
+       }
+
+       if (strcmp(file->name, FILENAME_DLCI) == 0) {
+               len = sprintf(page, "%04d\n", fr->dlci);
+       } else if (strcmp(file->name, FILENAME_MASTER) == 0) {
+               len = sprintf(page, "%-9s\n", fr->master ? fr->master->name :
+                       "(none)");
+       } else if (strcmp(file->name, FILENAME_KEEPALIVE) == 0) {
+               len = fr->keepa_freq ? sprintf(page, "% 3d\n", fr->keepa_freq) 
+                       : sprintf(page, "off\n");
+       } else {
+               printk(KERN_ERR "comxfr: internal error, filename %s\n", file->name);
+               return -EBADF;
+       }
+
+       if (off >= len) {
+               *eof = 1;
+               return 0;
+       }
+
+       *start = page + off;
+       if (count >= len - off) *eof = 1;
+       return min_t(int, count, len - off);
+}
+
+static int fr_write_proc(struct file *file, const char *buffer, 
+       u_long count, void *data)
+{
+       struct proc_dir_entry *entry = (struct proc_dir_entry *)data;
+       struct net_device *dev = entry->parent->data;
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr = NULL; 
+       char *page;
+
+       if (ch) {
+               fr = ch->LINE_privdata;
+       }
+
+       if (!(page = (char *)__get_free_page(GFP_KERNEL))) {
+               return -ENOMEM;
+       }
+
+       if (copy_from_user(page, buffer, count)) {
+               free_page((unsigned long)page);
+               return -EFAULT;
+       }
+       if (*(page + count - 1) == '\n') {
+               *(page + count - 1) = 0;
+       }
+
+       if (strcmp(entry->name, FILENAME_DLCI) == 0) {
+               u16 dlci_new = simple_strtoul(page, NULL, 10);
+
+               if (dlci_new > 1023) {
+                       printk(KERN_ERR "Invalid DLCI value\n");
+               }
+               else fr->dlci = dlci_new;
+       } else if (strcmp(entry->name, FILENAME_MASTER) == 0) {
+               struct net_device *new_master = dev_get_by_name(page);
+
+               if (new_master && new_master->type == ARPHRD_FRAD) {
+                       struct comx_channel *sch = new_master->priv;
+                       struct fr_data *sfr = sch->LINE_privdata;
+
+                       if (sfr && sfr->master == new_master) {
+                               if(fr->master)
+                                       dev_put(fr->master);
+                               fr->master = new_master;
+                               /* Megorokli a master statuszat */
+                               ch->line_status = sch->line_status;
+                       }
+               }
+       } else if (strcmp(entry->name, FILENAME_KEEPALIVE) == 0) {
+               int keepa_new = -1;
+
+               if (strcmp(page, KEEPALIVE_OFF) == 0) {
+                       keepa_new = 0;
+               } else {
+                       keepa_new = simple_strtoul(page, NULL, 10);
+               }
+
+               if (keepa_new < 0 || keepa_new > 100) {
+                       printk(KERN_ERR "invalid keepalive\n");
+               } else {
+                       if (fr->keepa_freq && keepa_new != fr->keepa_freq) {
+                               fr_set_keepalive(dev, 0);
+                       }
+                       if (keepa_new) {
+                               fr_set_keepalive(dev, keepa_new);
+                       }
+               }
+       } else {
+               printk(KERN_ERR "comxfr_write_proc: internal error, filename %s\n", 
+                       entry->name);
+               count = -EBADF;
+       }
+
+       free_page((unsigned long)page);
+       return count;
+}
+
+static int fr_exit(struct net_device *dev) 
+{
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr = ch->LINE_privdata;
+       struct net_device *sdev = dev;
+       struct comx_channel *sch;
+       struct fr_data *sfr;
+       struct proc_dir_entry *dir = ch->procdir->parent->subdir;
+
+       /* Ha lezarunk egy master-t, le kell kattintani a slave-eket is */
+       if (fr->master && fr->master == dev) {
+               for (; dir ; dir = dir->next) {
+                       if(!S_ISDIR(dir->mode)) {
+                               continue;
+                       }
+                       if ((sdev = dir->data) && (sch = sdev->priv) && 
+                           (sdev->type == ARPHRD_DLCI) && 
+                           (sfr = sch->LINE_privdata) && (sfr->master == dev)) {
+                               dev_close(sdev);
+                               sfr->master = NULL;
+                       }
+               }
+       }
+       dev->flags              = 0;
+       dev->type               = 0;
+       dev->mtu                = 0;
+       dev->hard_header_len    = 0;
+
+       ch->LINE_rx     = NULL;
+       ch->LINE_tx     = NULL;
+       ch->LINE_status = NULL;
+       ch->LINE_open   = NULL;
+       ch->LINE_close  = NULL;
+       ch->LINE_xmit   = NULL;
+       ch->LINE_header = NULL;
+       ch->LINE_rebuild_header = NULL;
+       ch->LINE_statistics = NULL;
+
+       ch->LINE_status = 0;
+
+       if (fr->master != dev) { // if not master, remove dlci
+               if(fr->master)
+                       dev_put(fr->master);
+               remove_proc_entry(FILENAME_DLCI, ch->procdir);
+               remove_proc_entry(FILENAME_MASTER, ch->procdir);
+       } else {
+               if (fr->keepa_freq) {
+                       fr_set_keepalive(dev, 0);
+               }
+               remove_proc_entry(FILENAME_KEEPALIVE, ch->procdir);
+               remove_proc_entry(FILENAME_DLCI, ch->procdir);
+       }
+
+       kfree(fr);
+       ch->LINE_privdata = NULL;
+
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+static int fr_master_init(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr;
+       struct proc_dir_entry *new_file;
+
+       if ((fr = ch->LINE_privdata = kmalloc(sizeof(struct fr_data), 
+           GFP_KERNEL)) == NULL) {
+               return -ENOMEM;
+       }
+       memset(fr, 0, sizeof(struct fr_data));
+       fr->master = dev;       // this means master
+       fr->dlci = 0;           // let's say default
+
+       dev->flags      = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+       dev->type       = ARPHRD_FRAD;
+       dev->mtu        = 1500;
+       dev->hard_header_len    = 4;            
+       dev->addr_len   = 0;
+
+       ch->LINE_rx     = fr_rx;
+       ch->LINE_tx     = fr_tx;
+       ch->LINE_status = fr_status;
+       ch->LINE_open   = fr_open;
+       ch->LINE_close  = fr_close;
+       ch->LINE_xmit   = fr_xmit;
+       ch->LINE_header = fr_header;
+       ch->LINE_rebuild_header = NULL;
+       ch->LINE_statistics = fr_statistics;
+
+       if ((new_file = create_proc_entry(FILENAME_DLCI, S_IFREG | 0644, 
+           ch->procdir)) == NULL) {
+               goto cleanup_LINE_privdata;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &fr_read_proc;
+       new_file->write_proc = &fr_write_proc;
+       new_file->size = 5;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_KEEPALIVE, S_IFREG | 0644, 
+           ch->procdir)) == NULL) {
+               goto cleanup_filename_dlci;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &fr_read_proc;
+       new_file->write_proc = &fr_write_proc;
+       new_file->size = 4;
+       new_file->nlink = 1;
+
+       fr_set_keepalive(dev, 0);
+
+       MOD_INC_USE_COUNT;
+       return 0;
+cleanup_filename_dlci:
+        remove_proc_entry(FILENAME_DLCI, ch->procdir);
+cleanup_LINE_privdata:
+       kfree(fr);
+       return -EIO;
+}
+
+static int fr_slave_init(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr;
+       struct proc_dir_entry *new_file;
+
+       if ((fr = ch->LINE_privdata = kmalloc(sizeof(struct fr_data), 
+           GFP_KERNEL)) == NULL) {
+               return -ENOMEM;
+       }
+       memset(fr, 0, sizeof(struct fr_data));
+
+       dev->flags      = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+       dev->type       = ARPHRD_DLCI;
+       dev->mtu        = 1500;
+       dev->hard_header_len    = 4;            
+       dev->addr_len   = 0;
+
+       ch->LINE_rx     = fr_rx;
+       ch->LINE_tx     = fr_tx;
+       ch->LINE_status = fr_status;
+       ch->LINE_open   = fr_open;
+       ch->LINE_close  = fr_close;
+       ch->LINE_xmit   = fr_xmit;
+       ch->LINE_header = fr_header;
+       ch->LINE_rebuild_header = NULL;
+       ch->LINE_statistics = fr_statistics;
+
+       if ((new_file = create_proc_entry(FILENAME_DLCI, S_IFREG | 0644, 
+           ch->procdir)) == NULL) {
+               goto cleanup_LINE_privdata;
+       }
+       
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &fr_read_proc;
+       new_file->write_proc = &fr_write_proc;
+       new_file->size = 5;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_MASTER, S_IFREG | 0644, 
+           ch->procdir)) == NULL) {
+               goto cleanup_filename_dlci;
+       }
+       new_file->data = (void *)new_file;
+       new_file->read_proc = &fr_read_proc;
+       new_file->write_proc = &fr_write_proc;
+       new_file->size = 10;
+       new_file->nlink = 1;
+       MOD_INC_USE_COUNT;
+       return 0;
+cleanup_filename_dlci:
+         remove_proc_entry(FILENAME_DLCI, ch->procdir);
+cleanup_LINE_privdata:
+       kfree(fr);
+       return -EIO;
+}
+
+static int dlci_open(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+
+       ch->init_status |= HW_OPEN;
+
+       MOD_INC_USE_COUNT;
+       return 0;
+}
+
+static int dlci_close(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+
+       ch->init_status &= ~HW_OPEN;
+
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+static int dlci_txe(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct fr_data *fr = ch->LINE_privdata;
+
+       if (!fr->master) {
+               return 0;
+       }
+
+       ch = fr->master->priv;
+       fr = ch->LINE_privdata;
+       return ch->HW_txe(fr->master);
+}
+
+static int dlci_statistics(struct net_device *dev, char *page) 
+{
+       return 0;
+}
+
+static int dlci_init(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+
+       ch->HW_open = dlci_open;
+       ch->HW_close = dlci_close;
+       ch->HW_txe = dlci_txe;
+       ch->HW_statistics = dlci_statistics;
+
+       /* Nincs egyeb hw info, mert ugyis a fr->master-bol fog minden kiderulni */
+
+       MOD_INC_USE_COUNT;
+       return 0;
+}
+
+static int dlci_exit(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+
+       ch->HW_open = NULL;
+       ch->HW_close = NULL;
+       ch->HW_txe = NULL;
+       ch->HW_statistics = NULL;
+
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+static int dlci_dump(struct net_device *dev)
+{
+       printk(KERN_INFO "dlci_dump %s, HOGY MI ???\n", dev->name);
+       return -1;
+}
+
+static struct comx_protocol fr_master_protocol = {
+       .name           = "frad", 
+       .version        = VERSION,
+       .encap_type     = ARPHRD_FRAD, 
+       .line_init      = fr_master_init, 
+       .line_exit      = fr_exit, 
+};
+
+static struct comx_protocol fr_slave_protocol = {
+       .name           = "ietf-ip", 
+       .version        = VERSION,
+       .encap_type     = ARPHRD_DLCI, 
+       .line_init      = fr_slave_init, 
+       .line_exit      = fr_exit, 
+};
+
+static struct comx_hardware fr_dlci = { 
+       .name           = "dlci", 
+       .version        = VERSION,
+       .hw_init        = dlci_init, 
+       .hw_exit        = dlci_exit, 
+       .hw_dump        = dlci_dump, 
+};
+
+static int __init comx_proto_fr_init(void)
+{
+       int ret; 
+
+       if ((ret = comx_register_hardware(&fr_dlci))) {
+               return ret;
+       }
+       if ((ret = comx_register_protocol(&fr_master_protocol))) {
+               return ret;
+       }
+       return comx_register_protocol(&fr_slave_protocol);
+}
+
+static void __exit comx_proto_fr_exit(void)
+{
+       comx_unregister_hardware(fr_dlci.name);
+       comx_unregister_protocol(fr_master_protocol.name);
+       comx_unregister_protocol(fr_slave_protocol.name);
+}
+
+module_init(comx_proto_fr_init);
+module_exit(comx_proto_fr_exit);
diff --git a/drivers/net/wan/comx-proto-lapb.c b/drivers/net/wan/comx-proto-lapb.c
new file mode 100644 (file)
index 0000000..b203ff6
--- /dev/null
@@ -0,0 +1,551 @@
+/*
+ * LAPB protocol module for the COMX driver 
+ * for Linux kernel 2.2.X
+ *
+ * Original author: Tivadar Szemethy <tiv@itc.hu>
+ * Maintainer: Gergely Madarasz <gorgo@itc.hu>
+ *
+ * Copyright (C) 1997-1999 (C) ITConsult-Pro Co. <info@itc.hu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Version 0.80 (99/06/14):
+ *             - cleaned up the source code a bit
+ *             - ported back to kernel, now works as non-module
+ *
+ * Changed      (00/10/29, Henner Eisen):
+ *             - comx_rx() / comxlapb_data_indication() return status.
+ * 
+ */
+
+#define VERSION "0.80"
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <asm/uaccess.h>
+#include <linux/lapb.h>
+#include <linux/init.h>
+
+#include       "comx.h"
+#include       "comxhw.h"
+
+static struct proc_dir_entry *create_comxlapb_proc_entry(char *name, int mode,
+       int size, struct proc_dir_entry *dir);
+
+static void comxlapb_rx(struct net_device *dev, struct sk_buff *skb) 
+{
+       if (!dev || !dev->priv) {
+               dev_kfree_skb(skb);
+       } else {
+               lapb_data_received(dev, skb);
+       }
+}
+
+static int comxlapb_tx(struct net_device *dev) 
+{
+       netif_wake_queue(dev);
+       return 0;
+}
+
+static int comxlapb_header(struct sk_buff *skb, struct net_device *dev, 
+       unsigned short type, void *daddr, void *saddr, unsigned len) 
+{
+       return dev->hard_header_len;  
+}
+
+static void comxlapb_status(struct net_device *dev, unsigned short status)
+{
+       struct comx_channel *ch;
+
+       if (!dev || !(ch = dev->priv)) {
+               return;
+       }
+       if (status & LINE_UP) {
+               netif_wake_queue(dev);
+       }
+       comx_status(dev, status);
+}
+
+static int comxlapb_open(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       int err = 0;
+
+       if (!(ch->init_status & HW_OPEN)) {
+               return -ENODEV;
+       }
+
+       err = lapb_connect_request(dev);
+
+       if (ch->debug_flags & DEBUG_COMX_LAPB) {
+               comx_debug(dev, "%s: lapb opened, error code: %d\n", 
+                       dev->name, err);
+       }
+
+       if (!err) {
+               ch->init_status |= LINE_OPEN;
+               MOD_INC_USE_COUNT;
+       }
+       return err;
+}
+
+static int comxlapb_close(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+
+       if (!(ch->init_status & HW_OPEN)) {
+               return -ENODEV;
+       }
+
+       if (ch->debug_flags & DEBUG_COMX_LAPB) {
+               comx_debug(dev, "%s: lapb closed\n", dev->name);
+       }
+
+       lapb_disconnect_request(dev);
+
+       ch->init_status &= ~LINE_OPEN;
+       ch->line_status &= ~PROTO_UP;
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+static int comxlapb_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct sk_buff *skb2;
+
+       if (!dev || !(ch = dev->priv) || !(dev->flags & (IFF_UP | IFF_RUNNING))) {
+               return -ENODEV;
+       }
+
+       if (dev->type == ARPHRD_X25) { // first byte tells what to do 
+               switch(skb->data[0]) {
+                       case 0x00:      
+                               break;  // transmit
+                       case 0x01:      
+                               lapb_connect_request(dev);
+                               kfree_skb(skb);
+                               return 0;
+                       case 0x02:      
+                               lapb_disconnect_request(dev);
+                       default:
+                               kfree_skb(skb);
+                               return 0;
+               }
+               skb_pull(skb,1);
+       }
+
+       netif_stop_queue(dev);
+       
+       if ((skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
+               lapb_data_request(dev, skb2);
+       }
+
+       return FRAME_ACCEPTED;
+}
+
+static int comxlapb_statistics(struct net_device *dev, char *page) 
+{
+       struct lapb_parms_struct parms;
+       int len = 0;
+
+       len += sprintf(page + len, "Line status: ");
+       if (lapb_getparms(dev, &parms) != LAPB_OK) {
+               len += sprintf(page + len, "not initialized\n");
+               return len;
+       }
+       len += sprintf(page + len, "%s (%s), T1: %d/%d, T2: %d/%d, N2: %d/%d, "
+               "window: %d\n", parms.mode & LAPB_DCE ? "DCE" : "DTE", 
+               parms.mode & LAPB_EXTENDED ? "EXTENDED" : "STANDARD",
+               parms.t1timer, parms.t1, parms.t2timer, parms.t2, 
+               parms.n2count, parms.n2, parms.window);
+
+       return len;
+}
+
+static int comxlapb_read_proc(char *page, char **start, off_t off, int count,
+       int *eof, void *data)
+{
+       struct proc_dir_entry *file = (struct proc_dir_entry *)data;
+       struct net_device *dev = file->parent->data;
+       struct lapb_parms_struct parms;
+       int len = 0;
+
+       if (lapb_getparms(dev, &parms)) {
+               return -ENODEV;
+       }
+
+       if (strcmp(file->name, FILENAME_T1) == 0) {
+               len += sprintf(page + len, "%02u / %02u\n", 
+                       parms.t1timer, parms.t1);
+       } else if (strcmp(file->name, FILENAME_T2) == 0) {
+               len += sprintf(page + len, "%02u / %02u\n", 
+                       parms.t2timer, parms.t2);
+       } else if (strcmp(file->name, FILENAME_N2) == 0) {
+               len += sprintf(page + len, "%02u / %02u\n", 
+                       parms.n2count, parms.n2);
+       } else if (strcmp(file->name, FILENAME_WINDOW) == 0) {
+               len += sprintf(page + len, "%u\n", parms.window);
+       } else if (strcmp(file->name, FILENAME_MODE) == 0) {
+               len += sprintf(page + len, "%s, %s\n", 
+                       parms.mode & LAPB_DCE ? "DCE" : "DTE",
+                       parms.mode & LAPB_EXTENDED ? "EXTENDED" : "STANDARD");
+       } else {
+               printk(KERN_ERR "comxlapb: internal error, filename %s\n", file->name);
+               return -EBADF;
+       }
+
+       if (off >= len) {
+               *eof = 1;
+               return 0;
+       }
+
+       *start = page + off;
+       if (count >= len - off) {
+               *eof = 1;
+       }
+       return min_t(int, count, len - off);
+}
+
+static int comxlapb_write_proc(struct file *file, const char *buffer, 
+       u_long count, void *data)
+{
+       struct proc_dir_entry *entry = (struct proc_dir_entry *)data;
+       struct net_device *dev = entry->parent->data;
+       struct lapb_parms_struct parms;
+       unsigned long parm;
+       char *page;
+
+       if (lapb_getparms(dev, &parms)) {
+               return -ENODEV;
+       }
+
+       if (!(page = (char *)__get_free_page(GFP_KERNEL))) {
+               return -ENOMEM;
+       }
+
+       if (copy_from_user(page, buffer, count)) {
+               free_page((unsigned long)page);
+               return -EFAULT;
+       }
+       if (*(page + count - 1) == '\n') {
+               *(page + count - 1) = 0;
+       }
+
+       if (strcmp(entry->name, FILENAME_T1) == 0) {
+               parm=simple_strtoul(page,NULL,10);
+               if (parm > 0 && parm < 100) {
+                       parms.t1=parm;
+                       lapb_setparms(dev, &parms);
+               }
+       } else if (strcmp(entry->name, FILENAME_T2) == 0) {
+               parm=simple_strtoul(page, NULL, 10);
+               if (parm > 0 && parm < 100) {
+                       parms.t2=parm;
+                       lapb_setparms(dev, &parms);
+               }
+       } else if (strcmp(entry->name, FILENAME_N2) == 0) {
+               parm=simple_strtoul(page, NULL, 10);
+               if (parm > 0 && parm < 100) {
+                       parms.n2=parm;
+                       lapb_setparms(dev, &parms);
+               }
+       } else if (strcmp(entry->name, FILENAME_WINDOW) == 0) {
+               parms.window = simple_strtoul(page, NULL, 10);
+               lapb_setparms(dev, &parms);
+       } else if (strcmp(entry->name, FILENAME_MODE) == 0) {
+               if (comx_strcasecmp(page, "dte") == 0) {
+                       parms.mode &= ~(LAPB_DCE | LAPB_DTE); 
+                       parms.mode |= LAPB_DTE;
+               } else if (comx_strcasecmp(page, "dce") == 0) {
+                       parms.mode &= ~(LAPB_DTE | LAPB_DCE); 
+                       parms.mode |= LAPB_DCE;
+               } else if (comx_strcasecmp(page, "std") == 0 || 
+                   comx_strcasecmp(page, "standard") == 0) {
+                       parms.mode &= ~LAPB_EXTENDED; 
+                       parms.mode |= LAPB_STANDARD;
+               } else if (comx_strcasecmp(page, "ext") == 0 || 
+                   comx_strcasecmp(page, "extended") == 0) {
+                       parms.mode &= ~LAPB_STANDARD; 
+                       parms.mode |= LAPB_EXTENDED;
+               }
+               lapb_setparms(dev, &parms);
+       } else {
+               printk(KERN_ERR "comxlapb_write_proc: internal error, filename %s\n", 
+                       entry->name);
+               return -EBADF;
+       }
+
+       free_page((unsigned long)page);
+       return count;
+}
+
+static void comxlapb_connected(struct net_device *dev, int reason)
+{
+       struct comx_channel *ch = dev->priv; 
+       struct proc_dir_entry *comxdir = ch->procdir->subdir;
+
+       if (ch->debug_flags & DEBUG_COMX_LAPB) {
+               comx_debug(ch->dev, "%s: lapb connected, reason: %d\n", 
+                       ch->dev->name, reason);
+       }
+
+       if (ch->dev->type == ARPHRD_X25) {
+               unsigned char *p;
+               struct sk_buff *skb;
+
+               if ((skb = dev_alloc_skb(1)) == NULL) {
+                       printk(KERN_ERR "comxlapb: out of memory!\n");
+                       return;
+               }
+               p = skb_put(skb,1);
+               *p = 0x01;              // link established
+               skb->dev = ch->dev;
+               skb->protocol = htons(ETH_P_X25);
+               skb->mac.raw = skb->data;
+               skb->pkt_type = PACKET_HOST;
+
+               netif_rx(skb);
+               ch->dev->last_rx = jiffies;
+       }
+
+       for (; comxdir; comxdir = comxdir->next) {
+               if (strcmp(comxdir->name, FILENAME_MODE) == 0) {
+                       comxdir->mode = S_IFREG | 0444;
+               }
+       }
+
+
+       ch->line_status |= PROTO_UP;
+       comx_status(ch->dev, ch->line_status);
+}
+
+static void comxlapb_disconnected(struct net_device *dev, int reason)
+{
+       struct comx_channel *ch = dev->priv; 
+       struct proc_dir_entry *comxdir = ch->procdir->subdir;
+
+       if (ch->debug_flags & DEBUG_COMX_LAPB) {
+               comx_debug(ch->dev, "%s: lapb disconnected, reason: %d\n", 
+                       ch->dev->name, reason);
+       }
+
+       if (ch->dev->type == ARPHRD_X25) {
+               unsigned char *p;
+               struct sk_buff *skb;
+
+               if ((skb = dev_alloc_skb(1)) == NULL) {
+                       printk(KERN_ERR "comxlapb: out of memory!\n");
+                       return;
+               }
+               p = skb_put(skb,1);
+               *p = 0x02;              // link disconnected
+               skb->dev = ch->dev;
+               skb->protocol = htons(ETH_P_X25);
+               skb->mac.raw = skb->data;
+               skb->pkt_type = PACKET_HOST;
+
+               netif_rx(skb);
+               ch->dev->last_rx = jiffies;
+       }
+
+       for (; comxdir; comxdir = comxdir->next) {
+               if (strcmp(comxdir->name, FILENAME_MODE) == 0) {
+                       comxdir->mode = S_IFREG | 0644;
+               }
+       }
+       
+       ch->line_status &= ~PROTO_UP;
+       comx_status(ch->dev, ch->line_status);
+}
+
+static int comxlapb_data_indication(struct net_device *dev, struct sk_buff *skb)
+{
+       struct comx_channel *ch = dev->priv; 
+
+       if (ch->dev->type == ARPHRD_X25) {
+               skb_push(skb, 1);
+
+               if (skb_cow(skb, 1))
+                       return NET_RX_DROP;
+
+               skb->data[0] = 0;       // indicate data for X25
+               skb->protocol = htons(ETH_P_X25);
+       } else {
+               skb->protocol = htons(ETH_P_IP);
+       }
+
+       skb->dev = ch->dev;
+       skb->mac.raw = skb->data;
+       return comx_rx(ch->dev, skb);
+}
+
+static void comxlapb_data_transmit(struct net_device *dev, struct sk_buff *skb)
+{
+       struct comx_channel *ch = dev->priv; 
+
+       if (ch->HW_send_packet) {
+               ch->HW_send_packet(ch->dev, skb);
+       }
+}
+
+static int comxlapb_exit(struct net_device *dev) 
+{
+       struct comx_channel *ch = dev->priv;
+
+       dev->flags              = 0;
+       dev->type               = 0;
+       dev->mtu                = 0;
+       dev->hard_header_len    = 0;
+
+       ch->LINE_rx     = NULL;
+       ch->LINE_tx     = NULL;
+       ch->LINE_status = NULL;
+       ch->LINE_open   = NULL;
+       ch->LINE_close  = NULL;
+       ch->LINE_xmit   = NULL;
+       ch->LINE_header = NULL;
+       ch->LINE_statistics = NULL;
+
+       if (ch->debug_flags & DEBUG_COMX_LAPB) {
+               comx_debug(dev, "%s: unregistering lapb\n", dev->name);
+       }
+       lapb_unregister(dev);
+
+       remove_proc_entry(FILENAME_T1, ch->procdir);
+       remove_proc_entry(FILENAME_T2, ch->procdir);
+       remove_proc_entry(FILENAME_N2, ch->procdir);
+       remove_proc_entry(FILENAME_MODE, ch->procdir);
+       remove_proc_entry(FILENAME_WINDOW, ch->procdir);
+
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+static int comxlapb_init(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct lapb_register_struct lapbreg;
+
+       dev->mtu                = 1500;
+       dev->hard_header_len    = 4;
+       dev->addr_len           = 0;
+
+       ch->LINE_rx     = comxlapb_rx;
+       ch->LINE_tx     = comxlapb_tx;
+       ch->LINE_status = comxlapb_status;
+       ch->LINE_open   = comxlapb_open;
+       ch->LINE_close  = comxlapb_close;
+       ch->LINE_xmit   = comxlapb_xmit;
+       ch->LINE_header = comxlapb_header;
+       ch->LINE_statistics = comxlapb_statistics;
+
+       lapbreg.connect_confirmation = comxlapb_connected;
+       lapbreg.connect_indication = comxlapb_connected;
+       lapbreg.disconnect_confirmation = comxlapb_disconnected;
+       lapbreg.disconnect_indication = comxlapb_disconnected;
+       lapbreg.data_indication = comxlapb_data_indication;
+       lapbreg.data_transmit = comxlapb_data_transmit;
+       if (lapb_register(dev, &lapbreg)) {
+               return -ENOMEM;
+       }
+       if (ch->debug_flags & DEBUG_COMX_LAPB) {
+               comx_debug(dev, "%s: lapb registered\n", dev->name);
+       }
+
+       if (!create_comxlapb_proc_entry(FILENAME_T1, 0644, 8, ch->procdir)) {
+               return -ENOMEM;
+       }
+       if (!create_comxlapb_proc_entry(FILENAME_T2, 0644, 8, ch->procdir)) {
+               return -ENOMEM;
+       }
+       if (!create_comxlapb_proc_entry(FILENAME_N2, 0644, 8, ch->procdir)) {
+               return -ENOMEM;
+       }
+       if (!create_comxlapb_proc_entry(FILENAME_MODE, 0644, 14, ch->procdir)) {
+               return -ENOMEM;
+       }
+       if (!create_comxlapb_proc_entry(FILENAME_WINDOW, 0644, 0, ch->procdir)) {
+               return -ENOMEM;
+       }
+
+       MOD_INC_USE_COUNT;
+       return 0;
+}
+
+static int comxlapb_init_lapb(struct net_device *dev) 
+{
+       dev->flags      = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+       dev->type       = ARPHRD_LAPB;
+
+       return(comxlapb_init(dev));
+}
+
+static int comxlapb_init_x25(struct net_device *dev)
+{
+       dev->flags              = IFF_NOARP;
+       dev->type               = ARPHRD_X25;
+
+       return(comxlapb_init(dev));
+}
+
+static struct proc_dir_entry *create_comxlapb_proc_entry(char *name, int mode,
+       int size, struct proc_dir_entry *dir)
+{
+       struct proc_dir_entry *new_file;
+
+       if ((new_file = create_proc_entry(name, S_IFREG | mode, dir)) != NULL) {
+               new_file->data = (void *)new_file;
+               new_file->read_proc = &comxlapb_read_proc;
+               new_file->write_proc = &comxlapb_write_proc;
+               new_file->size = size;
+               new_file->nlink = 1;
+       }
+       return(new_file);
+}
+
+static struct comx_protocol comxlapb_protocol = {
+       "lapb", 
+       VERSION,
+       ARPHRD_LAPB, 
+       comxlapb_init_lapb, 
+       comxlapb_exit, 
+       NULL 
+};
+
+static struct comx_protocol comx25_protocol = {
+       "x25", 
+       VERSION,
+       ARPHRD_X25, 
+       comxlapb_init_x25, 
+       comxlapb_exit, 
+       NULL 
+};
+
+static int __init comx_proto_lapb_init(void)
+{
+       int ret;
+
+       if ((ret = comx_register_protocol(&comxlapb_protocol)) != 0) {
+               return ret;
+       }
+       return comx_register_protocol(&comx25_protocol);
+}
+
+static void __exit comx_proto_lapb_exit(void)
+{
+       comx_unregister_protocol(comxlapb_protocol.name);
+       comx_unregister_protocol(comx25_protocol.name);
+}
+
+module_init(comx_proto_lapb_init);
+module_exit(comx_proto_lapb_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/wan/comx-proto-ppp.c b/drivers/net/wan/comx-proto-ppp.c
new file mode 100644 (file)
index 0000000..3f45010
--- /dev/null
@@ -0,0 +1,269 @@
+/*
+ * Synchronous PPP / Cisco-HDLC driver for the COMX boards
+ *
+ * Author: Gergely Madarasz <gorgo@itc.hu>
+ *
+ * based on skeleton code by Tivadar Szemethy <tiv@itc.hu>
+ *
+ * Copyright (C) 1999 ITConsult-Pro Co. <info@itc.hu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Version 0.10 (99/06/10):
+ *             - written the first code :)
+ *
+ * Version 0.20 (99/06/16):
+ *             - added hdlc protocol 
+ *             - protocol up is IFF_RUNNING
+ *
+ * Version 0.21 (99/07/15):
+ *             - some small fixes with the line status
+ *
+ * Version 0.22 (99/08/05):
+ *             - don't test IFF_RUNNING but the pp_link_state of the sppp
+ * 
+ * Version 0.23 (99/12/02):
+ *             - tbusy fixes
+ *
+ */
+
+#define VERSION "0.23"
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/jiffies.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <asm/uaccess.h>
+#include <linux/init.h>
+
+#include <net/syncppp.h>
+#include       "comx.h"
+
+MODULE_AUTHOR("Author: Gergely Madarasz <gorgo@itc.hu>");
+MODULE_DESCRIPTION("Cisco-HDLC / Synchronous PPP driver for the COMX sync serial boards");
+MODULE_LICENSE("GPL");
+
+static struct comx_protocol syncppp_protocol;
+static struct comx_protocol hdlc_protocol;
+
+struct syncppp_data {
+       struct timer_list status_timer;
+};
+
+static void syncppp_status_timerfun(unsigned long d) {
+       struct net_device *dev=(struct net_device *)d;
+       struct comx_channel *ch=dev->priv;
+       struct syncppp_data *spch=ch->LINE_privdata;
+       struct sppp *sp = (struct sppp *)sppp_of(dev);
+        
+       if(!(ch->line_status & PROTO_UP) && 
+           (sp->pp_link_state==SPPP_LINK_UP)) {
+               comx_status(dev, ch->line_status | PROTO_UP);
+       }
+       if((ch->line_status & PROTO_UP) &&
+           (sp->pp_link_state==SPPP_LINK_DOWN)) {
+               comx_status(dev, ch->line_status & ~PROTO_UP);
+       }
+       mod_timer(&spch->status_timer,jiffies + HZ*3);
+}
+
+static int syncppp_tx(struct net_device *dev) 
+{
+       struct comx_channel *ch=dev->priv;
+       
+       if(ch->line_status & LINE_UP) {
+               netif_wake_queue(dev);
+       }
+       return 0;
+}
+
+static void syncppp_status(struct net_device *dev, unsigned short status)
+{
+       status &= ~(PROTO_UP | PROTO_LOOP);
+       if(status & LINE_UP) {
+               netif_wake_queue(dev);
+               sppp_open(dev);
+       } else  {
+               /* Line went down */
+               netif_stop_queue(dev);
+               sppp_close(dev);
+       }
+       comx_status(dev, status);
+}
+
+static int syncppp_open(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct syncppp_data *spch = ch->LINE_privdata;
+
+       if (!(ch->init_status & HW_OPEN)) return -ENODEV;
+
+       ch->init_status |= LINE_OPEN;
+       ch->line_status &= ~(PROTO_UP | PROTO_LOOP);
+
+       if(ch->line_status & LINE_UP) {
+               sppp_open(dev);
+       }
+
+       init_timer(&spch->status_timer);
+       spch->status_timer.function=syncppp_status_timerfun;
+       spch->status_timer.data=(unsigned long)dev;
+       spch->status_timer.expires=jiffies + HZ*3;
+       add_timer(&spch->status_timer);
+       
+       return 0;
+}
+
+static int syncppp_close(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct syncppp_data *spch = ch->LINE_privdata;
+
+       if (!(ch->init_status & HW_OPEN)) return -ENODEV;
+       del_timer(&spch->status_timer);
+       
+       sppp_close(dev);
+
+       ch->init_status &= ~LINE_OPEN;
+       ch->line_status &= ~(PROTO_UP | PROTO_LOOP);
+
+       return 0;
+}
+
+static int syncppp_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+
+       netif_stop_queue(dev);
+       switch(ch->HW_send_packet(dev, skb)) {
+               case FRAME_QUEUED:
+                       netif_wake_queue(dev);
+                       break;
+               case FRAME_ACCEPTED:
+               case FRAME_DROPPED:
+                       break;
+               case FRAME_ERROR:
+                       printk(KERN_ERR "%s: Transmit frame error (len %d)\n", 
+                               dev->name, skb->len);
+               break;
+       }
+       return 0;
+}
+
+
+static int syncppp_statistics(struct net_device *dev, char *page) 
+{
+       int len = 0;
+
+       len += sprintf(page + len, " ");
+       return len;
+}
+
+
+static int syncppp_exit(struct net_device *dev) 
+{
+       struct comx_channel *ch = dev->priv;
+
+       sppp_detach(dev);
+
+       dev->flags = 0;
+       dev->type = 0;
+       dev->mtu = 0;
+
+       ch->LINE_rx = NULL;
+       ch->LINE_tx = NULL;
+       ch->LINE_status = NULL;
+       ch->LINE_open = NULL;
+       ch->LINE_close = NULL;
+       ch->LINE_xmit = NULL;
+       ch->LINE_header = NULL;
+       ch->LINE_rebuild_header = NULL;
+       ch->LINE_statistics = NULL;
+
+       kfree(ch->LINE_privdata);
+       ch->LINE_privdata = NULL;
+
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+static int syncppp_init(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct ppp_device *pppdev = (struct ppp_device *)ch->if_ptr;
+
+       ch->LINE_privdata = kmalloc(sizeof(struct syncppp_data), GFP_KERNEL);
+       if (!ch->LINE_privdata)
+               return -ENOMEM;
+
+       pppdev->dev = dev;
+       sppp_attach(pppdev);
+
+       if(ch->protocol == &hdlc_protocol) {
+               pppdev->sppp.pp_flags |= PP_CISCO;
+               dev->type = ARPHRD_HDLC;
+       } else {
+               pppdev->sppp.pp_flags &= ~PP_CISCO;
+               dev->type = ARPHRD_PPP;
+       }
+
+       ch->LINE_rx = sppp_input;
+       ch->LINE_tx = syncppp_tx;
+       ch->LINE_status = syncppp_status;
+       ch->LINE_open = syncppp_open;
+       ch->LINE_close = syncppp_close;
+       ch->LINE_xmit = syncppp_xmit;
+       ch->LINE_header = NULL;
+       ch->LINE_statistics = syncppp_statistics;
+
+
+       MOD_INC_USE_COUNT;
+       return 0;
+}
+
+static struct comx_protocol syncppp_protocol = {
+       "ppp", 
+       VERSION,
+       ARPHRD_PPP, 
+       syncppp_init, 
+       syncppp_exit, 
+       NULL 
+};
+
+static struct comx_protocol hdlc_protocol = {
+       "hdlc", 
+       VERSION,
+       ARPHRD_PPP, 
+       syncppp_init, 
+       syncppp_exit, 
+       NULL 
+};
+
+static int __init comx_proto_ppp_init(void)
+{
+       int ret;
+
+       ret = comx_register_protocol(&hdlc_protocol);
+       if (!ret) {
+               ret = comx_register_protocol(&syncppp_protocol);
+               if (ret)
+                       comx_unregister_protocol(hdlc_protocol.name);
+       }
+       return ret;
+}
+
+static void __exit comx_proto_ppp_exit(void)
+{
+       comx_unregister_protocol(syncppp_protocol.name);
+       comx_unregister_protocol(hdlc_protocol.name);
+}
+
+module_init(comx_proto_ppp_init);
+module_exit(comx_proto_ppp_exit);
diff --git a/drivers/net/wan/comx.c b/drivers/net/wan/comx.c
new file mode 100644 (file)
index 0000000..6c0e3fc
--- /dev/null
@@ -0,0 +1,1128 @@
+/*
+ * Device driver framework for the COMX line of synchronous serial boards
+ * 
+ * for Linux kernel 2.2.X / 2.4.X
+ *
+ * Original authors:  Arpad Bakay <bakay.arpad@synergon.hu>,
+ *                    Peter Bajan <bajan.peter@synergon.hu>,
+ * Previous maintainer: Tivadar Szemethy <tiv@itc.hu>
+ * Current maintainer: Gergely Madarasz <gorgo@itc.hu>
+ *
+ * Copyright (C) 1995-1999 ITConsult-Pro Co.
+ *
+ * Contributors:
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br> (0.85)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Version 0.80 (99/06/11):
+ *             - clean up source code (playing a bit of indent)
+ *             - port back to kernel, add support for non-module versions
+ *             - add support for board resets when channel protocol is down
+ *             - reset the device structure after protocol exit
+ *               the syncppp driver needs it
+ *             - add support for /proc/comx/protocols and 
+ *               /proc/comx/boardtypes
+ *
+ * Version 0.81 (99/06/21):
+ *             - comment out the board reset support code, the locomx
+ *               driver seems not buggy now
+ *             - printk() levels fixed
+ *
+ * Version 0.82 (99/07/08):
+ *             - Handle stats correctly if the lowlevel driver is
+ *               is not a comx one (locomx - z85230)
+ *
+ * Version 0.83 (99/07/15):
+ *             - reset line_status when interface is down
+ *
+ * Version 0.84 (99/12/01):
+ *             - comx_status should not check for IFF_UP (to report
+ *               line status from dev->open())
+ *
+ * Version 0.85 (00/08/15):
+ *             - resource release on failure in comx_mkdir
+ *             - fix return value on failure at comx_write_proc
+ *
+ * Changed      (00/10/29, Henner Eisen):
+ *             - comx_rx() / comxlapb_data_indication() return status.
+ */
+
+#define VERSION "0.85"
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/jiffies.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <asm/uaccess.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/smp_lock.h>
+
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
+
+#ifndef CONFIG_PROC_FS
+#error For now, COMX really needs the /proc filesystem
+#endif
+
+#include <net/syncppp.h>
+#include "comx.h"
+
+MODULE_AUTHOR("Gergely Madarasz <gorgo@itc.hu>");
+MODULE_DESCRIPTION("Common code for the COMX synchronous serial adapters");
+MODULE_LICENSE("GPL");
+
+static struct comx_hardware *comx_channels = NULL;
+static struct comx_protocol *comx_lines = NULL;
+
+static int comx_mkdir(struct inode *, struct dentry *, int);
+static int comx_rmdir(struct inode *, struct dentry *);
+static struct dentry *comx_lookup(struct inode *, struct dentry *, struct nameidata *);
+
+static struct inode_operations comx_root_inode_ops = {
+       .lookup = comx_lookup,
+       .mkdir = comx_mkdir,
+       .rmdir = comx_rmdir,
+};
+
+static int comx_delete_dentry(struct dentry *dentry);
+static struct proc_dir_entry *create_comx_proc_entry(char *name, int mode,
+       int size, struct proc_dir_entry *dir);
+
+static struct dentry_operations comx_dentry_operations = {
+       .d_delete       = comx_delete_dentry,
+};
+
+
+static struct proc_dir_entry * comx_root_dir;
+
+struct comx_debugflags_struct  comx_debugflags[] = {
+       { "comx_rx",            DEBUG_COMX_RX           },
+       { "comx_tx",            DEBUG_COMX_TX           },
+       { "hw_tx",              DEBUG_HW_TX             },
+       { "hw_rx",              DEBUG_HW_RX             },
+       { "hdlc_keepalive",     DEBUG_HDLC_KEEPALIVE    },
+       { "comxppp",            DEBUG_COMX_PPP          },
+       { "comxlapb",           DEBUG_COMX_LAPB         },
+       { "dlci",               DEBUG_COMX_DLCI         },
+       { NULL,                 0                       } 
+};
+
+
+int comx_debug(struct net_device *dev, char *fmt, ...)
+{
+       struct comx_channel *ch = dev->priv;
+       char *page,*str;
+       va_list args;
+       int len;
+
+       if (!ch->debug_area) return 0;
+
+       if (!(page = (char *)__get_free_page(GFP_ATOMIC))) return -ENOMEM;
+
+       va_start(args, fmt);
+       len = vsprintf(str = page, fmt, args);
+       va_end(args);
+
+       if (len >= PAGE_SIZE) {
+               printk(KERN_ERR "comx_debug: PANIC! len = %d !!!\n", len);
+               free_page((unsigned long)page);
+               return -EINVAL;
+       }
+
+       while (len) {
+               int to_copy;
+               int free = (ch->debug_start - ch->debug_end + ch->debug_size) 
+                       % ch->debug_size;
+
+               to_copy = min_t(int, free ? free : ch->debug_size, 
+                             min_t(int, ch->debug_size - ch->debug_end, len));
+               memcpy(ch->debug_area + ch->debug_end, str, to_copy);
+               str += to_copy;
+               len -= to_copy;
+               ch->debug_end = (ch->debug_end + to_copy) % ch->debug_size;
+               if (ch->debug_start == ch->debug_end) // Full ? push start away
+                       ch->debug_start = (ch->debug_start + len + 1) % 
+                                       ch->debug_size;
+               ch->debug_file->size = (ch->debug_end - ch->debug_start +
+                                       ch->debug_size) % ch->debug_size;
+       } 
+
+       free_page((unsigned long)page);
+       return 0;
+}
+
+int comx_debug_skb(struct net_device *dev, struct sk_buff *skb, char *msg)
+{
+       struct comx_channel *ch = dev->priv;
+
+       if (!ch->debug_area) return 0;
+       if (!skb) comx_debug(dev, "%s: %s NULL skb\n\n", dev->name, msg);
+       if (!skb->len) comx_debug(dev, "%s: %s empty skb\n\n", dev->name, msg);
+
+       return comx_debug_bytes(dev, skb->data, skb->len, msg);
+}
+
+int comx_debug_bytes(struct net_device *dev, unsigned char *bytes, int len, 
+               char *msg)
+{
+       int pos = 0;
+       struct comx_channel *ch = dev->priv;
+
+       if (!ch->debug_area) return 0;
+
+       comx_debug(dev, "%s: %s len %d\n", dev->name, msg, len);
+
+       while (pos != len) {
+               char line[80];
+               int i = 0;
+
+               memset(line, 0, 80);
+               sprintf(line,"%04d ", pos);
+               do {
+                       sprintf(line + 5 + (pos % 16) * 3, "%02x", bytes[pos]);
+                       sprintf(line + 60 + (pos % 16), "%c", 
+                               isprint(bytes[pos]) ? bytes[pos] : '.');
+                       pos++;
+               } while (pos != len && pos % 16);
+
+               while ( i++ != 78 ) if (line[i] == 0) line[i] = ' ';
+               line[77] = '\n';
+               line[78] = 0;
+       
+               comx_debug(dev, "%s", line);
+       }
+       comx_debug(dev, "\n");
+       return 0;
+}
+
+static void comx_loadavg_timerfun(unsigned long d)
+{
+       struct net_device *dev = (struct net_device *)d;
+       struct comx_channel *ch = dev->priv;
+
+       ch->avg_bytes[ch->loadavg_counter] = ch->current_stats->rx_bytes;
+       ch->avg_bytes[ch->loadavg_counter + ch->loadavg_size] = 
+               ch->current_stats->tx_bytes;
+
+       ch->loadavg_counter = (ch->loadavg_counter + 1) % ch->loadavg_size;
+
+       mod_timer(&ch->loadavg_timer,jiffies + HZ * ch->loadavg[0]);
+}
+
+#if 0
+static void comx_reset_timerfun(unsigned long d)
+{ 
+       struct net_device *dev = (struct net_device *)d;
+       struct comx_channel *ch = dev->priv;
+
+       if(!(ch->line_status & (PROTO_LOOP | PROTO_UP))) {
+               if(test_and_set_bit(0,&ch->reset_pending) && ch->HW_reset) {
+                       ch->HW_reset(dev);
+               }
+       }
+
+       mod_timer(&ch->reset_timer, jiffies + HZ * ch->reset_timeout);
+}
+#endif                                            
+
+static int comx_open(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct proc_dir_entry *comxdir = ch->procdir->subdir;
+       int ret=0;
+
+       if (!ch->protocol || !ch->hardware) return -ENODEV;
+
+       if ((ret = ch->HW_open(dev))) return ret;
+       if ((ret = ch->LINE_open(dev))) { 
+               ch->HW_close(dev); 
+               return ret; 
+       };
+
+       for (; comxdir ; comxdir = comxdir->next) {
+               if (strcmp(comxdir->name, FILENAME_HARDWARE) == 0 ||
+                  strcmp(comxdir->name, FILENAME_PROTOCOL) == 0)
+                       comxdir->mode = S_IFREG | 0444;
+       }
+
+#if 0
+       ch->reset_pending = 1;
+       ch->reset_timeout = 30;
+       ch->reset_timer.function = comx_reset_timerfun;
+       ch->reset_timer.data = (unsigned long)dev;
+       ch->reset_timer.expires = jiffies + HZ * ch->reset_timeout;
+       add_timer(&ch->reset_timer);
+#endif
+
+       return 0;
+}
+
+static int comx_close(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       struct proc_dir_entry *comxdir = ch->procdir->subdir;
+       int ret = -ENODEV;
+
+       if (test_and_clear_bit(0, &ch->lineup_pending)) {
+               del_timer(&ch->lineup_timer);
+       }
+
+#if 0  
+       del_timer(&ch->reset_timer);
+#endif
+
+       if (ch->init_status & LINE_OPEN && ch->protocol && ch->LINE_close) {
+               ret = ch->LINE_close(dev);
+       }
+
+       if (ret) return ret;
+
+       if (ch->init_status & HW_OPEN && ch->hardware && ch->HW_close) {
+               ret = ch->HW_close(dev);
+       }
+       
+       ch->line_status=0;
+
+       for (; comxdir ; comxdir = comxdir->next) {
+               if (strcmp(comxdir->name, FILENAME_HARDWARE) == 0 ||
+                   strcmp(comxdir->name, FILENAME_PROTOCOL) == 0)
+                       comxdir->mode = S_IFREG | 0644;
+       }
+
+       return ret;
+}
+
+void comx_status(struct net_device *dev, int status)
+{
+       struct comx_channel *ch = dev->priv;
+
+#if 0
+       if(status & (PROTO_UP | PROTO_LOOP)) {
+               clear_bit(0,&ch->reset_pending);
+       }
+#endif
+
+       printk(KERN_NOTICE "Interface %s: modem status %s, line protocol %s\n",
+                   dev->name, status & LINE_UP ? "UP" : "DOWN", 
+                   status & PROTO_LOOP ? "LOOP" : status & PROTO_UP ? 
+                   "UP" : "DOWN");
+       
+       ch->line_status = status;
+}
+
+static int comx_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+       int rc;
+
+       if (skb->len > dev->mtu + dev->hard_header_len) {
+               printk(KERN_ERR "comx_xmit: %s: skb->len %d > dev->mtu %d\n", dev->name,
+               (int)skb->len, dev->mtu);
+       }
+       
+       if (ch->debug_flags & DEBUG_COMX_TX) {
+               comx_debug_skb(dev, skb, "comx_xmit skb");
+       }
+       
+       rc=ch->LINE_xmit(skb, dev);
+//     if (!rc) dev_kfree_skb(skb);
+
+       return rc;
+}
+
+static int comx_header(struct sk_buff *skb, struct net_device *dev, 
+       unsigned short type, void *daddr, void *saddr, unsigned len) 
+{
+       struct comx_channel *ch = dev->priv;
+
+       if (ch->LINE_header) {
+               return (ch->LINE_header(skb, dev, type, daddr, saddr, len));
+       } else {
+               return 0;
+       }
+}
+
+static int comx_rebuild_header(struct sk_buff *skb) 
+{
+       struct net_device *dev = skb->dev;
+       struct comx_channel *ch = dev->priv;
+
+       if (ch->LINE_rebuild_header) {
+               return(ch->LINE_rebuild_header(skb));
+       } else {
+               return 0;
+       }
+}
+
+int comx_rx(struct net_device *dev, struct sk_buff *skb)
+{
+       struct comx_channel *ch = dev->priv;
+
+       if (ch->debug_flags & DEBUG_COMX_RX) {
+               comx_debug_skb(dev, skb, "comx_rx skb");
+       }
+       if (skb) {
+               netif_rx(skb);
+               dev->last_rx = jiffies;
+       }
+       return 0;
+}
+
+static struct net_device_stats *comx_stats(struct net_device *dev)
+{
+       struct comx_channel *ch = dev->priv;
+
+       return ch->current_stats;
+}
+
+void comx_lineup_func(unsigned long d)
+{
+       struct net_device *dev = (struct net_device *)d;
+       struct comx_channel *ch = dev->priv;
+
+       del_timer(&ch->lineup_timer);
+       clear_bit(0, &ch->lineup_pending);
+
+       if (ch->LINE_status) {
+               ch->LINE_status(dev, ch->line_status |= LINE_UP);
+       }
+}
+
+#define LOADAVG(avg, off) (int) \
+       ((ch->avg_bytes[(ch->loadavg_counter - 1 + ch->loadavg_size * 2) \
+       % ch->loadavg_size + off] -  ch->avg_bytes[(ch->loadavg_counter - 1 \
+               - ch->loadavg[avg] / ch->loadavg[0] + ch->loadavg_size * 2) \
+               % ch->loadavg_size + off]) / ch->loadavg[avg] * 8)
+
+static int comx_statistics(struct net_device *dev, char *page)
+{
+       struct comx_channel *ch = dev->priv;
+       int len = 0;
+       int tmp;
+       int i = 0;
+       char tmpstr[20];
+       int tmpstrlen = 0;
+
+       len += sprintf(page + len, "Interface administrative status is %s, "
+               "modem status is %s, protocol is %s\n", 
+               dev->flags & IFF_UP ? "UP" : "DOWN",
+               ch->line_status & LINE_UP ? "UP" : "DOWN",
+               ch->line_status & PROTO_LOOP ? "LOOP" : 
+               ch->line_status & PROTO_UP ? "UP" : "DOWN");
+       len += sprintf(page + len, "Modem status changes: %lu, Transmitter status "
+               "is %s, tbusy: %d\n", ch->current_stats->tx_carrier_errors, ch->HW_txe ? 
+               ch->HW_txe(dev) ? "IDLE" : "BUSY" : "NOT READY", netif_running(dev));
+       len += sprintf(page + len, "Interface load (input): %d / %d / %d bits/s (",
+               LOADAVG(0,0), LOADAVG(1, 0), LOADAVG(2, 0));
+       tmpstr[0] = 0;
+       for (i=0; i != 3; i++) {
+               char tf;
+
+               tf = ch->loadavg[i] % 60 == 0 && 
+                       ch->loadavg[i] / 60 > 0 ? 'm' : 's';
+               tmpstrlen += sprintf(tmpstr + tmpstrlen, "%d%c%s", 
+                       ch->loadavg[i] / (tf == 'm' ? 60 : 1), tf, 
+                       i == 2 ? ")\n" : "/");
+       }
+       len += sprintf(page + len, 
+               "%s              (output): %d / %d / %d bits/s (%s", tmpstr, 
+               LOADAVG(0,ch->loadavg_size), LOADAVG(1, ch->loadavg_size), 
+               LOADAVG(2, ch->loadavg_size), tmpstr);
+
+       len += sprintf(page + len, "Debug flags: ");
+       tmp = len; i = 0;
+       while (comx_debugflags[i].name) {
+               if (ch->debug_flags & comx_debugflags[i].value) 
+                       len += sprintf(page + len, "%s ", 
+                               comx_debugflags[i].name);
+               i++;
+       }
+       len += sprintf(page + len, "%s\n", tmp == len ? "none" : "");
+
+       len += sprintf(page + len, "RX errors: len: %lu, overrun: %lu, crc: %lu, "
+               "aborts: %lu\n           buffer overrun: %lu, pbuffer overrun: %lu\n"
+               "TX errors: underrun: %lu\n",
+               ch->current_stats->rx_length_errors, ch->current_stats->rx_over_errors, 
+               ch->current_stats->rx_crc_errors, ch->current_stats->rx_frame_errors, 
+               ch->current_stats->rx_missed_errors, ch->current_stats->rx_fifo_errors,
+               ch->current_stats->tx_fifo_errors);
+
+       if (ch->LINE_statistics && (ch->init_status & LINE_OPEN)) {
+               len += ch->LINE_statistics(dev, page + len);
+       } else {
+               len += sprintf(page+len, "Line status: driver not initialized\n");
+       }
+       if (ch->HW_statistics && (ch->init_status & HW_OPEN)) {
+               len += ch->HW_statistics(dev, page + len);
+       } else {
+               len += sprintf(page+len, "Board status: driver not initialized\n");
+       }
+
+       return len;
+}
+
+static int comx_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+       struct comx_channel *ch = dev->priv;
+
+       if (ch->LINE_ioctl) {
+               return(ch->LINE_ioctl(dev, ifr, cmd));
+       }
+       return -EINVAL;
+}
+
+static void comx_reset_dev(struct net_device *dev)
+{
+       dev->open = comx_open;
+       dev->stop = comx_close;
+       dev->hard_start_xmit = comx_xmit;
+       dev->hard_header = comx_header;
+       dev->rebuild_header = comx_rebuild_header;
+       dev->get_stats = comx_stats;
+       dev->do_ioctl = comx_ioctl;
+       dev->change_mtu = NULL;
+       dev->tx_queue_len = 20;
+       dev->flags = IFF_NOARP;
+}
+
+static int comx_init_dev(struct net_device *dev)
+{
+       struct comx_channel *ch;
+
+       if ((ch = kmalloc(sizeof(struct comx_channel), GFP_KERNEL)) == NULL) {
+               return -ENOMEM;
+       }
+       memset(ch, 0, sizeof(struct comx_channel));
+
+       ch->loadavg[0] = 5;
+       ch->loadavg[1] = 300;
+       ch->loadavg[2] = 900;
+       ch->loadavg_size = ch->loadavg[2] / ch->loadavg[0] + 1; 
+       if ((ch->avg_bytes = kmalloc(ch->loadavg_size * 
+               sizeof(unsigned long) * 2, GFP_KERNEL)) == NULL) {
+               kfree(ch);
+               return -ENOMEM;
+       }
+
+       memset(ch->avg_bytes, 0, ch->loadavg_size * sizeof(unsigned long) * 2);
+       ch->loadavg_counter = 0;
+       ch->loadavg_timer.function = comx_loadavg_timerfun;
+       ch->loadavg_timer.data = (unsigned long)dev;
+       ch->loadavg_timer.expires = jiffies + HZ * ch->loadavg[0];
+       add_timer(&ch->loadavg_timer);
+
+       dev->priv = (void *)ch;
+       ch->dev = dev;
+       ch->line_status &= ~LINE_UP;
+
+       ch->current_stats = &ch->stats;
+
+       comx_reset_dev(dev);
+       return 0;
+}
+
+static int comx_read_proc(char *page, char **start, off_t off, int count, 
+       int *eof, void *data)
+{
+       struct proc_dir_entry *file = (struct proc_dir_entry *)data;
+       struct net_device *dev = file->parent->data;
+       struct comx_channel *ch = dev->priv;
+       int len = 0;
+
+       if (strcmp(file->name, FILENAME_STATUS) == 0) {
+               len = comx_statistics(dev, page);
+       } else if (strcmp(file->name, FILENAME_HARDWARE) == 0) {
+               len = sprintf(page, "%s\n", ch->hardware ? 
+                       ch->hardware->name : HWNAME_NONE);
+       } else if (strcmp(file->name, FILENAME_PROTOCOL) == 0) {
+               len = sprintf(page, "%s\n", ch->protocol ? 
+                       ch->protocol->name : PROTONAME_NONE);
+       } else if (strcmp(file->name, FILENAME_LINEUPDELAY) == 0) {
+               len = sprintf(page, "%01d\n", ch->lineup_delay);
+       }
+
+       if (off >= len) {
+               *eof = 1;
+               return 0;
+       }
+
+       *start = page + off;
+       if (count >= len - off) {
+               *eof = 1;
+       }
+       return min_t(int, count, len - off);
+}
+
+
+static int comx_root_read_proc(char *page, char **start, off_t off, int count, 
+       int *eof, void *data)
+{
+       struct proc_dir_entry *file = (struct proc_dir_entry *)data;
+       struct comx_hardware *hw;
+       struct comx_protocol *line;
+
+       int len = 0;
+
+       if (strcmp(file->name, FILENAME_HARDWARELIST) == 0) {
+               for(hw=comx_channels;hw;hw=hw->next) 
+                       len+=sprintf(page+len, "%s\n", hw->name);
+       } else if (strcmp(file->name, FILENAME_PROTOCOLLIST) == 0) {
+               for(line=comx_lines;line;line=line->next)
+                       len+=sprintf(page+len, "%s\n", line->name);
+       }
+
+       if (off >= len) {
+               *eof = 1;
+               return 0;
+       }
+
+       *start = page + off;
+       if (count >= len - off) {
+               *eof = 1;
+       }
+       return min_t(int, count, len - off);
+}
+
+
+
+static int comx_write_proc(struct file *file, const char *buffer, u_long count,
+       void *data)
+{
+       struct proc_dir_entry *entry = (struct proc_dir_entry *)data;
+       struct net_device *dev = (struct net_device *)entry->parent->data;
+       struct comx_channel *ch = dev->priv;
+       char *page;
+       struct comx_hardware *hw = comx_channels;
+       struct comx_protocol *line = comx_lines;
+       int ret=0;
+
+       if (count > PAGE_SIZE) {
+               printk(KERN_ERR "count is %lu > %d!!!\n", count, (int)PAGE_SIZE);
+               return -ENOSPC;
+       }
+
+       if (!(page = (char *)__get_free_page(GFP_KERNEL))) return -ENOMEM;
+
+       if(copy_from_user(page, buffer, count))
+       {
+               count = -EFAULT;
+               goto out;
+       }
+
+       if (page[count-1] == '\n')
+               page[count-1] = '\0';
+       else if (count < PAGE_SIZE)
+               page[count] = '\0';
+       else if (page[count]) {
+               count = -EINVAL;
+               goto out;
+       }
+
+       if (strcmp(entry->name, FILENAME_DEBUG) == 0) {
+               int i;
+               int ret = 0;
+
+               if ((i = simple_strtoul(page, NULL, 10)) != 0) {
+                       unsigned long flags;
+
+                       save_flags(flags); cli();
+                       if (ch->debug_area) kfree(ch->debug_area);
+                       if ((ch->debug_area = kmalloc(ch->debug_size = i, 
+                               GFP_KERNEL)) == NULL) {
+                               ret = -ENOMEM;
+                       }
+                       ch->debug_start = ch->debug_end = 0;
+                       restore_flags(flags);
+                       free_page((unsigned long)page);
+                       return ret ? ret : count;
+               }
+               
+               if (*page != '+' && *page != '-') {
+                       free_page((unsigned long)page);
+                       return -EINVAL;
+               }
+               while (comx_debugflags[i].value && 
+                       strncmp(comx_debugflags[i].name, page + 1, 
+                       strlen(comx_debugflags[i].name))) {
+                       i++;
+               }
+       
+               if (comx_debugflags[i].value == 0) {
+                       printk(KERN_ERR "Invalid debug option\n");
+                       free_page((unsigned long)page);
+                       return -EINVAL;
+               }
+               if (*page == '+') {
+                       ch->debug_flags |= comx_debugflags[i].value;
+               } else {
+                       ch->debug_flags &= ~comx_debugflags[i].value;
+               }
+       } else if (strcmp(entry->name, FILENAME_HARDWARE) == 0) {
+               if(strlen(page)>10) {
+                       free_page((unsigned long)page);
+                       return -EINVAL;
+               }
+               while (hw) { 
+                       if (strcmp(hw->name, page) == 0) {
+                               break;
+                       } else {
+                               hw = hw->next;
+                       }
+               }
+#ifdef CONFIG_KMOD
+               if(!hw && comx_strcasecmp(HWNAME_NONE,page) != 0){
+                       request_module("comx-hw-%s",page);
+               }               
+               hw=comx_channels;
+               while (hw) {
+                       if (comx_strcasecmp(hw->name, page) == 0) {
+                               break;
+                       } else {
+                               hw = hw->next;
+                       }
+               }
+#endif
+
+               if (comx_strcasecmp(HWNAME_NONE, page) != 0 && !hw)  {
+                       free_page((unsigned long)page);
+                       return -ENODEV;
+               }
+               if (ch->init_status & HW_OPEN) {
+                       free_page((unsigned long)page);
+                       return -EBUSY;
+               }
+               if (ch->hardware && ch->hardware->hw_exit && 
+                  (ret=ch->hardware->hw_exit(dev))) {
+                       free_page((unsigned long)page);
+                       return ret;
+               }
+               ch->hardware = hw;
+               entry->size = strlen(page) + 1;
+               if (hw && hw->hw_init) hw->hw_init(dev);
+       } else if (strcmp(entry->name, FILENAME_PROTOCOL) == 0) {
+               if(strlen(page)>10) {
+                       free_page((unsigned long)page);
+                       return -EINVAL;
+               }
+               while (line) {
+                       if (comx_strcasecmp(line->name, page) == 0) {
+                               break;
+                       } else {
+                               line = line->next;
+                       }
+               }
+#ifdef CONFIG_KMOD
+               if(!line && comx_strcasecmp(PROTONAME_NONE, page) != 0) {
+                       request_module("comx-proto-%s",page);
+               }               
+               line=comx_lines;
+               while (line) {
+                       if (comx_strcasecmp(line->name, page) == 0) {
+                               break;
+                       } else {
+                               line = line->next;
+                       }
+               }
+#endif
+               
+               if (comx_strcasecmp(PROTONAME_NONE, page) != 0 && !line) {
+                       free_page((unsigned long)page);
+                       return -ENODEV;
+               }
+               
+               if (ch->init_status & LINE_OPEN) {
+                       free_page((unsigned long)page);
+                       return -EBUSY;
+               }
+               
+               if (ch->protocol && ch->protocol->line_exit && 
+                   (ret=ch->protocol->line_exit(dev))) {
+                       free_page((unsigned long)page);
+                       return ret;
+               }
+               ch->protocol = line;
+               entry->size = strlen(page) + 1;
+               comx_reset_dev(dev);
+               if (line && line->line_init) line->line_init(dev);
+       } else if (strcmp(entry->name, FILENAME_LINEUPDELAY) == 0) {
+               int i;
+
+               if ((i = simple_strtoul(page, NULL, 10)) != 0) {
+                       if (i >=0 && i < 10) { 
+                               ch->lineup_delay = i; 
+                       } else {
+                               printk(KERN_ERR "comx: invalid lineup_delay value\n");
+                       }
+               }
+       }
+out:
+       free_page((unsigned long)page);
+       return count;
+}
+
+static int comx_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+       struct proc_dir_entry *new_dir, *debug_file;
+       struct net_device *dev;
+       struct comx_channel *ch;
+       int ret = -EIO;
+
+       if ((dev = kmalloc(sizeof(struct net_device), GFP_KERNEL)) == NULL) {
+               return -ENOMEM;
+       }
+       memset(dev, 0, sizeof(struct net_device));
+
+       lock_kernel();
+       if ((new_dir = create_proc_entry(dentry->d_name.name, mode | S_IFDIR, 
+               comx_root_dir)) == NULL) {
+               goto cleanup_dev;
+       }
+
+       new_dir->nlink = 2;
+       new_dir->data = NULL; // ide jon majd a struct dev
+
+       /* Ezek kellenek */
+       if (!create_comx_proc_entry(FILENAME_HARDWARE, 0644, 
+           strlen(HWNAME_NONE) + 1, new_dir)) {
+               goto cleanup_new_dir;
+       }
+       if (!create_comx_proc_entry(FILENAME_PROTOCOL, 0644, 
+           strlen(PROTONAME_NONE) + 1, new_dir)) {
+               goto cleanup_filename_hardware;
+       }
+       if (!create_comx_proc_entry(FILENAME_STATUS, 0444, 0, new_dir)) {
+               goto cleanup_filename_protocol;
+       }
+       if (!create_comx_proc_entry(FILENAME_LINEUPDELAY, 0644, 2, new_dir)) {
+               goto cleanup_filename_status;
+       }
+
+       if ((debug_file = create_proc_entry(FILENAME_DEBUG, 
+           S_IFREG | 0644, new_dir)) == NULL) {
+               goto cleanup_filename_lineupdelay;
+       }
+       debug_file->data = (void *)debug_file; 
+       debug_file->read_proc = NULL; // see below
+       debug_file->write_proc = &comx_write_proc;
+       debug_file->nlink = 1;
+
+       strcpy(dev->name, (char *)new_dir->name);
+       dev->init = comx_init_dev;
+
+       if (register_netdevice(dev)) {
+               goto cleanup_filename_debug;
+       }
+       ch = dev->priv;
+       if((ch->if_ptr = (void *)kmalloc(sizeof(struct ppp_device), 
+                                GFP_KERNEL)) == NULL) {
+               goto cleanup_register;
+       }
+       memset(ch->if_ptr, 0, sizeof(struct ppp_device));
+       ch->debug_file = debug_file; 
+       ch->procdir = new_dir;
+       new_dir->data = dev;
+
+       ch->debug_start = ch->debug_end = 0;
+       if ((ch->debug_area = kmalloc(ch->debug_size = DEFAULT_DEBUG_SIZE, 
+           GFP_KERNEL)) == NULL) {
+               ret = -ENOMEM;
+               goto cleanup_if_ptr;
+       }
+
+       ch->lineup_delay = DEFAULT_LINEUP_DELAY;
+
+       MOD_INC_USE_COUNT;
+       unlock_kernel();
+       return 0;
+cleanup_if_ptr:
+       kfree(ch->if_ptr);
+cleanup_register:
+       unregister_netdevice(dev);
+cleanup_filename_debug:
+       remove_proc_entry(FILENAME_DEBUG, new_dir);
+cleanup_filename_lineupdelay:
+       remove_proc_entry(FILENAME_LINEUPDELAY, new_dir);
+cleanup_filename_status:
+       remove_proc_entry(FILENAME_STATUS, new_dir);
+cleanup_filename_protocol:
+       remove_proc_entry(FILENAME_PROTOCOL, new_dir);
+cleanup_filename_hardware:
+       remove_proc_entry(FILENAME_HARDWARE, new_dir);
+cleanup_new_dir:
+       remove_proc_entry(dentry->d_name.name, comx_root_dir);
+cleanup_dev:
+       kfree(dev);
+       unlock_kernel();
+       return ret;
+}
+
+static int comx_rmdir(struct inode *dir, struct dentry *dentry)
+{
+       struct proc_dir_entry *entry = PDE(dentry->d_inode);
+       struct net_device *dev;
+       struct comx_channel *ch;
+       int ret;
+
+       lock_kernel();
+       dev = entry->data;
+       ch = dev->priv;
+       if (dev->flags & IFF_UP) {
+               printk(KERN_ERR "%s: down interface before removing it\n", dev->name);
+               unlock_kernel();
+               return -EBUSY;
+       }
+
+       if (ch->protocol && ch->protocol->line_exit && 
+           (ret=ch->protocol->line_exit(dev))) {
+               unlock_kernel();
+               return ret;
+       }
+       if (ch->hardware && ch->hardware->hw_exit && 
+          (ret=ch->hardware->hw_exit(dev))) { 
+               if(ch->protocol && ch->protocol->line_init) {
+                       ch->protocol->line_init(dev);
+               }
+               unlock_kernel();
+               return ret;
+       }
+       ch->protocol = NULL;
+       ch->hardware = NULL;
+
+       del_timer(&ch->loadavg_timer);
+       kfree(ch->avg_bytes);
+
+       unregister_netdev(dev);
+       if (ch->debug_area) {
+               kfree(ch->debug_area);
+       }
+       if (dev->priv) {
+               kfree(dev->priv);
+       }
+       free_netdev(dev);
+
+       remove_proc_entry(FILENAME_DEBUG, entry);
+       remove_proc_entry(FILENAME_LINEUPDELAY, entry);
+       remove_proc_entry(FILENAME_STATUS, entry);
+       remove_proc_entry(FILENAME_HARDWARE, entry);
+       remove_proc_entry(FILENAME_PROTOCOL, entry);
+       remove_proc_entry(dentry->d_name.name, comx_root_dir);
+
+       MOD_DEC_USE_COUNT;
+       unlock_kernel();
+       return 0;
+}
+
+static struct dentry *comx_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+{
+       struct proc_dir_entry *de;
+       struct inode *inode = NULL;
+
+       lock_kernel();
+       if ((de = PDE(dir)) != NULL) {
+               for (de = de->subdir ; de ; de = de->next) {
+                       if ((de->namelen == dentry->d_name.len) &&
+                           (memcmp(dentry->d_name.name, de->name, 
+                           de->namelen) == 0)) {
+                               if ((inode = proc_get_inode(dir->i_sb, 
+                                   de->low_ino, de)) == NULL) { 
+                                       printk(KERN_ERR "COMX: lookup error\n"); 
+                                       unlock_kernel();
+                                       return ERR_PTR(-EINVAL); 
+                               }
+                               break;
+                       }
+               }
+       }
+       unlock_kernel();
+       dentry->d_op = &comx_dentry_operations;
+       d_add(dentry, inode);
+       return NULL;
+}
+
+int comx_strcasecmp(const char *cs, const char *ct)
+{
+       register signed char __res;
+
+       while (1) {
+               if ((__res = toupper(*cs) - toupper(*ct++)) != 0 || !*cs++) {
+                       break;
+               }
+       }
+       return __res;
+}
+
+static int comx_delete_dentry(struct dentry *dentry)
+{
+       return 1;
+}
+
+static struct proc_dir_entry *create_comx_proc_entry(char *name, int mode,
+       int size, struct proc_dir_entry *dir)
+{
+       struct proc_dir_entry *new_file;
+
+       if ((new_file = create_proc_entry(name, S_IFREG | mode, dir)) != NULL) {
+               new_file->data = (void *)new_file;
+               new_file->read_proc = &comx_read_proc;
+               new_file->write_proc = &comx_write_proc;
+               new_file->size = size;
+               new_file->nlink = 1;
+       }
+       return(new_file);
+}
+
+int comx_register_hardware(struct comx_hardware *comx_hw)
+{
+       struct comx_hardware *hw = comx_channels;
+
+       if (!hw) {
+               comx_channels = comx_hw;
+       } else {
+               while (hw->next != NULL && strcmp(comx_hw->name, hw->name) != 0) {
+                       hw = hw->next;
+               }
+               if (strcmp(comx_hw->name, hw->name) == 0) {
+                       return -1;
+               }
+               hw->next = comx_hw;
+       }
+
+       printk(KERN_INFO "COMX: driver for hardware type %s, version %s\n", comx_hw->name, comx_hw->version);
+       return 0;
+}
+
+int comx_unregister_hardware(char *name)
+{
+       struct comx_hardware *hw = comx_channels;
+
+       if (!hw) {
+               return -1;
+       }
+
+       if (strcmp(hw->name, name) == 0) {
+               comx_channels = comx_channels->next;
+               return 0;
+       }
+
+       while (hw->next != NULL && strcmp(hw->next->name,name) != 0) {
+               hw = hw->next;
+       }
+
+       if (hw->next != NULL && strcmp(hw->next->name, name) == 0) {
+               hw->next = hw->next->next;
+               return 0;
+       }
+       return -1;
+}
+
+int comx_register_protocol(struct comx_protocol *comx_line)
+{
+       struct comx_protocol *pr = comx_lines;
+
+       if (!pr) {
+               comx_lines = comx_line;
+       } else {
+               while (pr->next != NULL && strcmp(comx_line->name, pr->name) !=0) {
+                       pr = pr->next;
+               }
+               if (strcmp(comx_line->name, pr->name) == 0) {
+                       return -1;
+               }
+               pr->next = comx_line;
+       }
+
+       printk(KERN_INFO "COMX: driver for protocol type %s, version %s\n", comx_line->name, comx_line->version);
+       return 0;
+}
+
+int comx_unregister_protocol(char *name)
+{
+       struct comx_protocol *pr = comx_lines;
+
+       if (!pr) {
+               return -1;
+       }
+
+       if (strcmp(pr->name, name) == 0) {
+               comx_lines = comx_lines->next;
+               return 0;
+       }
+
+       while (pr->next != NULL && strcmp(pr->next->name,name) != 0) {
+               pr = pr->next;
+       }
+
+       if (pr->next != NULL && strcmp(pr->next->name, name) == 0) {
+               pr->next = pr->next->next;
+               return 0;
+       }
+       return -1;
+}
+
+static int __init comx_init(void)
+{
+       struct proc_dir_entry *new_file;
+
+       comx_root_dir = create_proc_entry("comx", 
+               S_IFDIR | S_IWUSR | S_IRUGO | S_IXUGO, &proc_root);
+       if (!comx_root_dir)
+               return -ENOMEM;
+       comx_root_dir->proc_iops = &comx_root_inode_ops;
+
+       if ((new_file = create_proc_entry(FILENAME_HARDWARELIST, 
+          S_IFREG | 0444, comx_root_dir)) == NULL) {
+               return -ENOMEM;
+       }
+       
+       new_file->data = new_file;
+       new_file->read_proc = &comx_root_read_proc;
+       new_file->write_proc = NULL;
+       new_file->nlink = 1;
+
+       if ((new_file = create_proc_entry(FILENAME_PROTOCOLLIST, 
+          S_IFREG | 0444, comx_root_dir)) == NULL) {
+               return -ENOMEM;
+       }
+       
+       new_file->data = new_file;
+       new_file->read_proc = &comx_root_read_proc;
+       new_file->write_proc = NULL;
+       new_file->nlink = 1;
+
+
+       printk(KERN_INFO "COMX: driver version %s (C) 1995-1999 ITConsult-Pro Co. <info@itc.hu>\n", 
+               VERSION);
+       return 0;
+}
+
+static void __exit comx_exit(void)
+{
+       remove_proc_entry(FILENAME_HARDWARELIST, comx_root_dir);
+       remove_proc_entry(FILENAME_PROTOCOLLIST, comx_root_dir);
+       remove_proc_entry(comx_root_dir->name, &proc_root);
+}
+
+module_init(comx_init);
+module_exit(comx_exit);
+
+EXPORT_SYMBOL(comx_register_hardware);
+EXPORT_SYMBOL(comx_unregister_hardware);
+EXPORT_SYMBOL(comx_register_protocol);
+EXPORT_SYMBOL(comx_unregister_protocol);
+EXPORT_SYMBOL(comx_debug_skb);
+EXPORT_SYMBOL(comx_debug_bytes);
+EXPORT_SYMBOL(comx_debug);
+EXPORT_SYMBOL(comx_lineup_func);
+EXPORT_SYMBOL(comx_status);
+EXPORT_SYMBOL(comx_rx);
+EXPORT_SYMBOL(comx_strcasecmp);
+EXPORT_SYMBOL(comx_root_dir);
diff --git a/drivers/net/wan/comx.h b/drivers/net/wan/comx.h
new file mode 100644 (file)
index 0000000..0f7404f
--- /dev/null
@@ -0,0 +1,232 @@
+/*
+ * General definitions for the COMX driver 
+ * 
+ * Original authors:  Arpad Bakay <bakay.arpad@synergon.hu>,
+ *                    Peter Bajan <bajan.peter@synergon.hu>,
+ * Previous maintainer: Tivadar Szemethy <tiv@itc.hu>
+ * Currently maintained by: Gergely Madarasz <gorgo@itc.hu>
+ *
+ * Copyright (C) 1995-1999 ITConsult-Pro Co. <info@itc.hu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * net_device_stats:
+ *     rx_length_errors        rec_len < 4 || rec_len > 2000
+ *     rx_over_errors          receive overrun (OVR)
+ *     rx_crc_errors           rx crc error
+ *     rx_frame_errors         aborts rec'd (ABO)
+ *     rx_fifo_errors          status fifo overrun (PBUFOVR)
+ *     rx_missed_errors        receive buffer overrun (BUFOVR)
+ *     tx_aborted_errors       ?
+ *     tx_carrier_errors       modem line status changes
+ *     tx_fifo_errors          tx underrun (locomx)
+ */
+#include <linux/config.h>
+
+struct comx_protocol {
+       char    *name;
+       char    *version;
+       unsigned short encap_type;
+       int     (*line_init)(struct net_device *dev);
+       int     (*line_exit)(struct net_device *dev);
+       struct comx_protocol *next;
+       };
+
+struct comx_hardware {
+       char *name; 
+       char *version;
+       int     (*hw_init)(struct net_device *dev);
+       int     (*hw_exit)(struct net_device *dev);
+       int     (*hw_dump)(struct net_device *dev);
+       struct comx_hardware *next;
+       };
+
+struct comx_channel {
+       void            *if_ptr;        // General purpose pointer
+       struct net_device       *dev;           // Where we belong to
+       struct net_device       *twin;          // On dual-port cards
+       struct proc_dir_entry *procdir; // the directory
+
+       unsigned char   init_status;
+       unsigned char   line_status;
+
+       struct timer_list lineup_timer; // against line jitter
+       long int        lineup_pending;
+       unsigned char   lineup_delay;
+
+#if 0
+       struct timer_list reset_timer; // for board resetting
+       long            reset_pending;
+       int             reset_timeout;
+#endif
+
+       struct net_device_stats stats;  
+       struct net_device_stats *current_stats;
+#if 0
+       unsigned long   board_resets;
+#endif
+       unsigned long   *avg_bytes;
+       int             loadavg_counter, loadavg_size;
+       int             loadavg[3];
+       struct timer_list loadavg_timer;
+       int             debug_flags;
+       char            *debug_area;
+       int             debug_start, debug_end, debug_size;
+       struct proc_dir_entry *debug_file;
+#ifdef CONFIG_COMX_DEBUG_RAW
+       char            *raw;
+       int             raw_len;
+#endif
+       // LINE specific        
+       struct comx_protocol *protocol;
+       void            (*LINE_rx)(struct net_device *dev, struct sk_buff *skb);
+       int             (*LINE_tx)(struct net_device *dev);
+       void            (*LINE_status)(struct net_device *dev, u_short status);
+       int             (*LINE_open)(struct net_device *dev);
+       int             (*LINE_close)(struct net_device *dev);
+       int             (*LINE_xmit)(struct sk_buff *skb, struct net_device *dev);
+       int             (*LINE_header)(struct sk_buff *skb, struct net_device *dev,
+                               u_short type,void *daddr, void *saddr, 
+                               unsigned len);
+       int             (*LINE_rebuild_header)(struct sk_buff *skb);
+       int             (*LINE_statistics)(struct net_device *dev, char *page);
+       int             (*LINE_parameter_check)(struct net_device *dev);
+       int             (*LINE_ioctl)(struct net_device *dev, struct ifreq *ifr,
+                               int cmd);
+       void            (*LINE_mod_use)(int);
+       void *          LINE_privdata;
+
+       // HW specific
+
+       struct comx_hardware *hardware;
+       void    (*HW_board_on)(struct net_device *dev);
+       void    (*HW_board_off)(struct net_device *dev);
+       struct net_device *(*HW_access_board)(struct net_device *dev);
+       void    (*HW_release_board)(struct net_device *dev, struct net_device *savep);
+       int     (*HW_txe)(struct net_device *dev);
+       int     (*HW_open)(struct net_device *dev);
+       int     (*HW_close)(struct net_device *dev);
+       int     (*HW_send_packet)(struct net_device *dev,struct sk_buff *skb);
+       int     (*HW_statistics)(struct net_device *dev, char *page);
+#if 0
+       int     (*HW_reset)(struct net_device *dev, char *page);
+#endif
+       int     (*HW_load_board)(struct net_device *dev);
+       void    (*HW_set_clock)(struct net_device *dev);
+       void    *HW_privdata;
+       };
+
+struct comx_debugflags_struct {
+       char *name;
+       int  value;
+       };
+
+#define        COMX_ROOT_DIR_NAME      "comx"
+
+#define        FILENAME_HARDWARE       "boardtype"
+#define FILENAME_HARDWARELIST  "boardtypes"
+#define FILENAME_PROTOCOL      "protocol"
+#define FILENAME_PROTOCOLLIST  "protocols"
+#define FILENAME_DEBUG         "debug"
+#define FILENAME_CLOCK         "clock"
+#define        FILENAME_STATUS         "status"
+#define        FILENAME_IO             "io"
+#define FILENAME_IRQ           "irq"
+#define        FILENAME_KEEPALIVE      "keepalive"
+#define FILENAME_LINEUPDELAY   "lineup_delay"
+#define FILENAME_CHANNEL       "channel"
+#define FILENAME_FIRMWARE      "firmware"
+#define FILENAME_MEMADDR       "memaddr"
+#define        FILENAME_TWIN           "twin"
+#define FILENAME_T1            "t1"
+#define FILENAME_T2            "t2"
+#define FILENAME_N2            "n2"
+#define FILENAME_WINDOW                "window"
+#define FILENAME_MODE          "mode"
+#define        FILENAME_DLCI           "dlci"
+#define        FILENAME_MASTER         "master"
+#ifdef CONFIG_COMX_DEBUG_RAW
+#define        FILENAME_RAW            "raw"
+#endif
+
+#define PROTONAME_NONE         "none"
+#define HWNAME_NONE            "none"
+#define KEEPALIVE_OFF          "off"
+
+#define FRAME_ACCEPTED         0               /* sending and xmitter busy */
+#define FRAME_DROPPED          1
+#define FRAME_ERROR            2               /* xmitter error */
+#define        FRAME_QUEUED            3               /* sending but more can come */
+
+#define        LINE_UP                 1               /* Modem UP */
+#define PROTO_UP               2
+#define PROTO_LOOP             4
+
+#define        HW_OPEN                 1
+#define        LINE_OPEN               2
+#define FW_LOADED              4
+#define IRQ_ALLOCATED          8
+
+#define DEBUG_COMX_RX          2
+#define        DEBUG_COMX_TX           4
+#define        DEBUG_HW_TX             16
+#define        DEBUG_HW_RX             32
+#define        DEBUG_HDLC_KEEPALIVE    64
+#define        DEBUG_COMX_PPP          128
+#define DEBUG_COMX_LAPB                256
+#define        DEBUG_COMX_DLCI         512
+
+#define        DEBUG_PAGESIZE          3072
+#define DEFAULT_DEBUG_SIZE     4096
+#define        DEFAULT_LINEUP_DELAY    1
+#define        FILE_PAGESIZE           3072
+
+#ifndef        COMX_PPP_MAJOR
+#define        COMX_PPP_MAJOR          88
+#endif
+
+
+#define COMX_CHANNEL(dev) ((struct comx_channel*)dev->priv)
+
+#define TWIN(dev) (COMX_CHANNEL(dev)->twin)
+
+
+#ifndef byte
+typedef u8     byte;
+#endif
+#ifndef word
+typedef u16    word;
+#endif
+
+#ifndef        SEEK_SET
+#define        SEEK_SET        0
+#endif
+#ifndef        SEEK_CUR
+#define        SEEK_CUR        1
+#endif
+#ifndef        SEEK_END
+#define        SEEK_END        2
+#endif
+
+extern struct proc_dir_entry * comx_root_dir;
+
+extern int     comx_register_hardware(struct comx_hardware *comx_hw);
+extern int     comx_unregister_hardware(char *name);
+extern int     comx_register_protocol(struct comx_protocol *comx_line);
+extern int     comx_unregister_protocol(char *name);
+
+extern int     comx_rx(struct net_device *dev, struct sk_buff *skb);
+extern void    comx_status(struct net_device *dev, int status);
+extern void    comx_lineup_func(unsigned long d);
+
+extern int     comx_debug(struct net_device *dev, char *fmt, ...);
+extern int     comx_debug_skb(struct net_device *dev, struct sk_buff *skb, char *msg);
+extern int     comx_debug_bytes(struct net_device *dev, unsigned char *bytes, int len,
+               char *msg);
+extern int     comx_strcasecmp(const char *cs, const char *ct);
+
+extern struct inode_operations comx_normal_inode_ops;
diff --git a/drivers/net/wan/comxhw.h b/drivers/net/wan/comxhw.h
new file mode 100644 (file)
index 0000000..15230dc
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Defines for comxhw.c
+ *
+ * Original authors:  Arpad Bakay <bakay.arpad@synergon.hu>,
+ *                    Peter Bajan <bajan.peter@synergon.hu>,
+ * Previous maintainer: Tivadar Szemethy <tiv@itc.hu>
+ * Current maintainer: Gergely Madarasz <gorgo@itc.hu>
+ *
+ * Copyright (C) 1995-1999 ITConsult-Pro Co. <info@itc.hu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define        LOCOMX_IO_EXTENT        8
+#define COMX_IO_EXTENT         4
+#define        HICOMX_IO_EXTENT        16
+
+#define COMX_MAX_TX_SIZE       1600
+#define COMX_MAX_RX_SIZE       2048
+
+#define COMX_JAIL_OFFSET       0xffff
+#define COMX_JAIL_VALUE                0xfe
+#define        COMX_MEMORY_SIZE        65536
+#define HICOMX_MEMORY_SIZE     16384
+#define COMX_MEM_MIN           0xa0000
+#define COMX_MEM_MAX           0xf0000
+
+#define        COMX_DEFAULT_IO         0x360
+#define        COMX_DEFAULT_IRQ        10
+#define        COMX_DEFAULT_MEMADDR    0xd0000
+#define        HICOMX_DEFAULT_IO       0x320
+#define        HICOMX_DEFAULT_IRQ      10
+#define        HICOMX_DEFAULT_MEMADDR  0xd0000
+#define        LOCOMX_DEFAULT_IO       0x368
+#define        LOCOMX_DEFAULT_IRQ      7
+
+#define MAX_CHANNELNO          2
+
+#define        COMX_CHANNEL_OFFSET     0x2000
+
+#define COMX_ENABLE_BOARD_IT    0x40
+#define COMX_BOARD_RESET               0x20
+#define COMX_ENABLE_BOARD_MEM   0x10
+#define COMX_DISABLE_BOARD_MEM  0
+#define COMX_DISABLE_ALL       0x00
+
+#define HICOMX_DISABLE_ALL     0x00
+#define HICOMX_ENABLE_BOARD_MEM        0x02
+#define HICOMX_DISABLE_BOARD_MEM 0x0
+#define HICOMX_BOARD_RESET     0x01
+#define HICOMX_PRG_MEM         4
+#define HICOMX_DATA_MEM                0
+#define HICOMX_ID_BYTE         0x55
+
+#define CMX_ID_BYTE            0x31
+#define COMX_CLOCK_CONST       8000
+
+#define        LINKUP_READY            3
+
+#define        OFF_FW_L1_ID    0x01e    /* ID bytes */
+#define OFF_FW_L2_ID   0x1006
+#define        FW_L1_ID_1      0xab
+#define FW_L1_ID_2_COMX                0xc0
+#define FW_L1_ID_2_HICOMX      0xc1
+#define        FW_L2_ID_1      0xab
+
+#define OFF_A_L2_CMD     0x130   /* command register for L2 */
+#define OFF_A_L2_CMDPAR  0x131   /* command parameter byte */
+#define OFF_A_L1_STATB   0x122   /* stat. block for L1 */
+#define OFF_A_L1_ABOREC  0x122   /* receive ABORT counter */
+#define OFF_A_L1_OVERRUN 0x123   /* receive overrun counter */
+#define OFF_A_L1_CRCREC  0x124   /* CRC error counter */
+#define OFF_A_L1_BUFFOVR 0x125   /* buffer overrun counter */
+#define OFF_A_L1_PBUFOVR 0x126   /* priority buffer overrun counter */
+#define OFF_A_L1_MODSTAT 0x127   /* current state of modem ctrl lines */
+#define OFF_A_L1_STATE   0x127   /* end of stat. block for L1 */
+#define OFF_A_L1_TXPC    0x128   /* Tx counter for the PC */
+#define OFF_A_L1_TXZ80   0x129   /* Tx counter for the Z80 */
+#define OFF_A_L1_RXPC    0x12a   /* Rx counter for the PC */
+#define OFF_A_L1_RXZ80   0x12b   /* Rx counter for the Z80 */
+#define OFF_A_L1_REPENA  0x12c   /* IT rep disable */
+#define OFF_A_L1_CHNR    0x12d   /* L1 channel logical number */
+#define OFF_A_L1_CLKINI  0x12e   /* Timer Const */
+#define OFF_A_L2_LINKUP         0x132   /* Linkup byte */
+#define OFF_A_L2_DAV    0x134   /* Rx DAV */
+#define OFF_A_L2_RxBUFP  0x136  /* Rx buff relative to membase */
+#define OFF_A_L2_TxEMPTY 0x138   /* Tx Empty */
+#define OFF_A_L2_TxBUFP  0x13a   /* Tx Buf */
+#define OFF_A_L2_NBUFFS         0x144   /* Number of buffers to fetch */
+
+#define OFF_A_L2_SABMREC 0x164  /* LAPB no. of SABMs received */
+#define OFF_A_L2_SABMSENT 0x165         /* LAPB no. of SABMs sent */
+#define OFF_A_L2_REJREC  0x166  /* LAPB no. of REJs received */
+#define OFF_A_L2_REJSENT 0x167  /* LAPB no. of REJs sent */
+#define OFF_A_L2_FRMRREC 0x168  /* LAPB no. of FRMRs received */
+#define OFF_A_L2_FRMRSENT 0x169         /* LAPB no. of FRMRs sent */
+#define OFF_A_L2_PROTERR 0x16A  /* LAPB no. of protocol errors rec'd */
+#define OFF_A_L2_LONGREC 0x16B  /* LAPB no. of long frames */
+#define OFF_A_L2_INVNR   0x16C  /* LAPB no. of invalid N(R)s rec'd */
+#define OFF_A_L2_UNDEFFR 0x16D  /* LAPB no. of invalid frames */
+
+#define        OFF_A_L2_T1     0x174    /* T1 timer */
+#define        OFF_A_L2_ADDR   0x176    /* DCE = 1, DTE = 3 */
+
+#define        COMX_CMD_INIT   1
+#define COMX_CMD_EXIT  2
+#define COMX_CMD_OPEN  16
+#define COMX_CMD_CLOSE 17
+
diff --git a/drivers/net/wan/falc-lh.h b/drivers/net/wan/falc-lh.h
new file mode 100644 (file)
index 0000000..e30726c
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ *     Defines for comx-hw-slicecom.c - FALC-LH specific
+ *
+ *     Author:         Bartok Istvan <bartoki@itc.hu>
+ *     Last modified:  Mon Feb  7 20:00:38 CET 2000
+ *
+ *     :set tabstop=6
+ */
+
+/*
+ *     Control register offsets on the LBI (page 90)
+ *     use it like:
+ *     lbi[ MODE ] = 0x34;
+ */
+
+#define MODE   0x03
+#define IPC            0x08
+#define IMR0   0x14    /* Interrupt Mask Register 0    */
+#define IMR1   0x15
+#define IMR2   0x16
+#define IMR3   0x17
+#define IMR4   0x18
+#define IMR5   0x19
+#define FMR0   0x1a    /* Framer Mode Register 0       */
+#define FMR1   0x1b
+#define FMR2   0x1c
+#define XSW            0x1e
+#define XSP            0x1f
+#define XC0            0x20
+#define XC1            0x21
+#define RC0            0x22
+#define RC1            0x23
+#define XPM0   0x24
+#define XPM1   0x25
+#define XPM2   0x26
+#define TSWM   0x27
+#define IDLE   0x29    /* Idle Code    */
+#define LIM0   0x34
+#define LIM1   0x35
+#define PCD            0x36
+#define PCR            0x37
+#define LIM2   0x38
+
+/*
+ *     Status registers on the LBI (page 134)
+ *     these are read-only, use it like:
+ *     if( lbi[ FRS0 ] ) ...
+ */
+
+#define FRS0   0x4c    /* Framer Receive Status register 0     */
+#define FRS1   0x4d    /* Framer Receive Status register 1     */
+#define FECL   0x50    /* Framing Error Counter low byte       */ /* Counts FAS word receive errors            */
+#define FECH   0x51    /*                       high byte      */
+#define CVCL   0x52    /* Code Violation Counter low byte      */ /* Counts bipolar and HDB3 code violations   */
+#define CVCH   0x53    /*                        high byte     */
+#define CEC1L  0x54    /* CRC4 Error Counter 1 low byte        */ /* Counts CRC4 errors in the incoming stream */
+#define CEC1H  0x55    /*                      high byte       */
+#define EBCL   0x56    /* E Bit error Counter low byte */ /* E-bits: the remote end sends them, when   */
+#define EBCH   0x57    /*                     high byte        */ /* it detected a CRC4-error                  */
+#define ISR0   0x68    /* Interrupt Status Register 0  */
+#define ISR1   0x69    /* Interrupt Status Register 1  */
+#define ISR2   0x6a    /* Interrupt Status Register 2  */
+#define ISR3   0x6b    /* Interrupt Status Register 3  */
+#define ISR5   0x6c    /* Interrupt Status Register 5  */
+#define GIS    0x6e    /* Global Interrupt Status Register     */
+#define VSTR   0x6f    /* version information */
+
+/*
+ *     Bit fields
+ */
+
+#define FRS0_LOS               (1 << 7)
+#define FRS0_AIS               (1 << 6)
+#define FRS0_LFA               (1 << 5)
+#define FRS0_RRA               (1 << 4)
+#define FRS0_AUXP              (1 << 3)
+#define FRS0_NMF               (1 << 2)
+#define FRS0_LMFA              (1 << 1)
+
+#define FRS1_XLS               (1 << 1)
+#define FRS1_XLO               (1)
+
+#define ISR2_FAR               (1 << 7)
+#define ISR2_LFA               (1 << 6)
+#define ISR2_MFAR              (1 << 5)
+#define ISR2_T400MS    (1 << 4)
+#define ISR2_AIS               (1 << 3)
+#define ISR2_LOS               (1 << 2)
+#define ISR2_RAR               (1 << 1)
+#define ISR2_RA                (1)
+
+#define ISR3_ES                (1 << 7)
+#define ISR3_SEC               (1 << 6)
+#define ISR3_LMFA16    (1 << 5)
+#define ISR3_AIS16     (1 << 4)
+#define ISR3_RA16              (1 << 3)
+#define ISR3_API               (1 << 2)
+#define ISR3_RSN               (1 << 1)
+#define ISR3_RSP               (1)
+
+#define ISR5_XSP               (1 << 7)
+#define ISR5_XSN               (1 << 6)
diff --git a/drivers/net/wan/hscx.h b/drivers/net/wan/hscx.h
new file mode 100644 (file)
index 0000000..675b7b1
--- /dev/null
@@ -0,0 +1,103 @@
+#define        HSCX_MTU        1600
+
+#define        HSCX_ISTA       0x00
+#define HSCX_MASK      0x00
+#define HSCX_STAR      0x01
+#define HSCX_CMDR      0x01
+#define HSCX_MODE      0x02
+#define HSCX_TIMR      0x03
+#define HSCX_EXIR      0x04
+#define HSCX_XAD1      0x04
+#define HSCX_RBCL      0x05
+#define HSCX_SAD2      0x05
+#define HSCX_RAH1      0x06
+#define HSCX_RSTA      0x07
+#define HSCX_RAH2      0x07
+#define HSCX_RAL1      0x08
+#define HSCX_RCHR      0x09
+#define HSCX_RAL2      0x09
+#define HSCX_XBCL      0x0a
+#define HSCX_BGR       0x0b
+#define HSCX_CCR2      0x0c
+#define HSCX_RBCH      0x0d
+#define HSCX_XBCH      0x0d
+#define HSCX_VSTR      0x0e
+#define HSCX_RLCR      0x0e
+#define HSCX_CCR1      0x0f
+#define HSCX_FIFO      0x1e
+
+#define HSCX_HSCX_CHOFFS       0x400
+#define HSCX_SEROFFS   0x1000
+
+#define HSCX_RME       0x80
+#define HSCX_RPF       0x40
+#define HSCX_RSC       0x20
+#define HSCX_XPR       0x10
+#define HSCX_TIN       0x08
+#define HSCX_ICA       0x04
+#define HSCX_EXA       0x02
+#define HSCX_EXB       0x01
+
+#define HSCX_XMR       0x80
+#define HSCX_XDU       0x40
+#define HSCX_EXE       0x40
+#define HSCX_PCE       0x20
+#define HSCX_RFO       0x10
+#define HSCX_CSC       0x08
+#define HSCX_RFS       0x04
+
+#define HSCX_XDOV      0x80
+#define HSCX_XFW       0x40
+#define HSCX_XRNR      0x20
+#define HSCX_RRNR      0x10
+#define HSCX_RLI       0x08
+#define HSCX_CEC       0x04
+#define HSCX_CTS       0x02
+#define HSCX_WFA       0x01
+
+#define HSCX_RMC       0x80
+#define HSCX_RHR       0x40
+#define HSCX_RNR       0x20
+#define HSCX_XREP      0x20
+#define HSCX_STI       0x10
+#define HSCX_XTF       0x08
+#define HSCX_XIF       0x04
+#define HSCX_XME       0x02
+#define HSCX_XRES      0x01
+
+#define HSCX_AUTO      0x00
+#define HSCX_NONAUTO   0x40
+#define HSCX_TRANS     0x80
+#define HSCX_XTRANS    0xc0
+#define HSCX_ADM16     0x20
+#define HSCX_ADM8      0x00
+#define HSCX_TMD_EXT   0x00
+#define HSCX_TMD_INT   0x10
+#define HSCX_RAC       0x08
+#define HSCX_RTS       0x04
+#define HSCX_TLP       0x01
+
+#define HSCX_VFR       0x80
+#define HSCX_RDO       0x40
+#define HSCX_CRC       0x20
+#define HSCX_RAB       0x10
+
+#define HSCX_CIE       0x04
+#define HSCX_RIE       0x02
+
+#define HSCX_DMA       0x80
+#define HSCX_NRM       0x40
+#define HSCX_CAS       0x20
+#define HSCX_XC        0x10
+
+#define HSCX_OV        0x10
+
+#define HSCX_CD        0x80
+
+#define HSCX_RC        0x80
+
+#define HSCX_PU        0x80
+#define HSCX_NRZ       0x00
+#define HSCX_NRZI      0x40
+#define HSCX_ODS       0x10
+#define HSCX_ITF       0x08
diff --git a/drivers/net/wan/mixcom.h b/drivers/net/wan/mixcom.h
new file mode 100644 (file)
index 0000000..1815eef
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Defines for the mixcom board
+ *
+ * Author: Gergely Madarasz <gorgo@itc.hu>
+ *
+ * Copyright (C) 1999 ITConsult-Pro Co. <info@itc.hu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define        MIXCOM_IO_EXTENT        0x20
+
+#define        MIXCOM_DEFAULT_IO       0x180
+#define        MIXCOM_DEFAULT_IRQ      5
+
+#define MIXCOM_ID              0x11
+#define MIXCOM_SERIAL_OFFSET   0x1000
+#define MIXCOM_CHANNEL_OFFSET  0x400
+#define MIXCOM_IT_OFFSET       0xc14
+#define MIXCOM_STATUS_OFFSET   0xc14
+#define MIXCOM_ID_OFFSET       0xc10
+#define MIXCOM_ON              0x1
+#define MIXCOM_OFF             0x0
+
+/* Status register bits */
+
+#define MIXCOM_CTSB            0x1
+#define MIXCOM_CTSA            0x2
+#define MIXCOM_CHANNELNO       0x20
+#define MIXCOM_POWERFAIL       0x40
+#define MIXCOM_BOOT            0x80
diff --git a/drivers/net/wan/munich32x.h b/drivers/net/wan/munich32x.h
new file mode 100644 (file)
index 0000000..8f151f2
--- /dev/null
@@ -0,0 +1,191 @@
+/*
+ *     Defines for comx-hw-slicecom.c - MUNICH32X specific
+ *
+ *     Author:        Bartok Istvan <bartoki@itc.hu>
+ *     Last modified: Tue Jan 11 14:27:36 CET 2000
+ *
+ *     :set tabstop=6
+ */
+
+#define TXBUFFER_SIZE  1536                    /* Max mennyit tud a kartya hardver atvenni                             */
+#define RXBUFFER_SIZE  (TXBUFFER_SIZE+4)       /* For Rx reasons it must be a multiple of 4, and =>4 (page 265)        */
+                                                       /* +4 .. see page 265, bit FE                                                   */
+                                                       /* TOD: a MODE1-be nem is ezt teszem, hanem a TXBUFFER-t, lehet hogy nem is kell? */
+
+//#define PCI_VENDOR_ID_SIEMENS                        0x110a
+#define PCI_DEVICE_ID_SIEMENS_MUNICH32X        0x2101
+
+/*
+ *     PCI config space registers (page 120)
+ */
+
+#define MUNICH_PCI_PCIRES      0x4c            /* 0xe0000 resets       the chip        */
+
+
+/*
+ *     MUNICH slave register offsets relative to base_address[0] (PCI BAR1) (page 181):
+ *     offsets are in bytes, registers are u32's, so we need a >>2 for indexing
+ *     the int[] by byte offsets. Use it like:
+ *
+ *     bar1[ STAT ] = ~0L;  or
+ *     x = bar1[ STAT ];
+ */
+
+#define CONF   (0x00 >> 2)
+#define CMD            (0x04 >> 2)
+#define STAT   (0x08 >> 2)
+#define STACK  (0x08 >> 2)
+#define IMASK  (0x0c >> 2)
+#define PIQBA  (0x14 >> 2)
+#define PIQL   (0x18 >> 2)
+#define MODE1  (0x20 >> 2)
+#define MODE2  (0x24 >> 2)
+#define CCBA   (0x28 >> 2)
+#define TXPOLL (0x2c >> 2)
+#define TIQBA  (0x30 >> 2)
+#define TIQL   (0x34 >> 2)
+#define RIQBA  (0x38 >> 2)
+#define RIQL   (0x3c >> 2)
+#define LCONF  (0x40 >> 2)             /* LBI Configuration Register           */
+#define LCCBA  (0x44 >> 2)             /* LBI Configuration Control Block      */      /* DE: lehet hogy nem is kell? */
+#define LTIQBA (0x50 >> 2)             /* DE: lehet hogy nem is kell? page 210: LBI DMA Controller intq - nem hasznalunk DMA-t.. */
+#define LTIQL  (0x54 >> 2)             /* DE: lehet hogy nem is kell? */
+#define LRIQBA (0x58 >> 2)             /* DE: lehet hogy nem is kell? */
+#define LRIQL  (0x5c >> 2)             /* DE: lehet hogy nem is kell? */
+#define LREG0  (0x60 >> 2)             /* LBI Indirect External Configuration register 0       */
+#define LREG1  (0x64 >> 2)
+#define LREG2  (0x68 >> 2)
+#define LREG3  (0x6c >> 2)
+#define LREG4  (0x70 >> 2)
+#define LREG5  (0x74 >> 2)
+#define LREG6  (0x78 >> 2)             /* LBI Indirect External Configuration register 6               */
+#define LSTAT  (0x7c >> 2)             /* LBI Status Register                                                  */
+#define GPDIR  (0x80 >> 2)             /* General Purpose Bus DIRection - 0..input, 1..output  */
+#define GPDATA (0x84 >> 2)             /* General Purpose Bus DATA                                             */
+
+
+/*
+ *     MUNICH commands: (they go into register CMD)
+ */
+
+#define CMD_ARPCM      0x01                    /* Action Request Serial PCM Core       */
+#define CMD_ARLBI      0x02                    /* Action Request LBI                   */
+
+
+/*
+ *     MUNICH event bits in the STAT, STACK, IMASK registers (page 188,189)
+ */
+
+#define STAT_PTI       (1 << 15)
+#define STAT_PRI       (1 << 14)
+#define STAT_LTI       (1 << 13)
+#define STAT_LRI       (1 << 12)
+#define STAT_IOMI      (1 << 11)
+#define STAT_SSCI      (1 << 10)
+#define STAT_LBII      (1 << 9)
+#define STAT_MBI       (1 << 8)
+
+#define STAT_TI        (1 << 6)
+#define STAT_TSPA      (1 << 5)
+#define STAT_RSPA      (1 << 4)
+#define STAT_LBIF      (1 << 3)
+#define STAT_LBIA      (1 << 2)
+#define STAT_PCMF      (1 << 1)
+#define STAT_PCMA      (1) 
+
+/*
+ *     We do not handle these (and do not touch their STAT bits) in the interrupt loop
+ */
+
+#define STAT_NOT_HANDLED_BY_INTERRUPT  (STAT_PCMF | STAT_PCMA)
+
+
+/*
+ *     MUNICH MODE1/MODE2 slave register fields (page 193,196)
+ *     these are not all masks, MODE1_XX_YY are my magic values!
+ */
+
+#define MODE1_PCM_E1   (1 << 31)               /* E1, 2.048 Mbit/sec           */
+#define MODE1_TBS_4    (1 << 24)               /* TBS = 4 .. no Tx bit shift   */
+#define MODE1_RBS_4    (1 << 18)               /* RBS = 4 .. no Rx bit shift   */
+#define MODE1_REN              (1 << 15)               /* Rx Enable                    */
+#define MODE1_MFL_MY   TXBUFFER_SIZE   /* Maximum Frame Length         */
+#define MODE1_MAGIC    (MODE1_PCM_E1 | MODE1_TBS_4 | MODE1_RBS_4 | MODE1_REN | MODE1_MFL_MY)
+
+#define MODE2_HPOLL    (1 << 8)                /* Hold Poll                    */
+#define MODE2_SPOLL    (1 << 7)                /* Slow Poll                    */
+#define MODE2_TSF              (1)                     /* real magic - discovered by probing :)        */
+// #define MODE2_MAGIC (MODE2_TSF)
+#define MODE2_MAGIC    (MODE2_SPOLL | MODE2_TSF)
+
+
+/*
+ *     LCONF bits (page 205)
+ *     these are not all masks, LCONF_XX_YY are my magic values!
+ */
+
+#define LCONF_IPA                      (1 << 31)       /* Interrupt Pass. Use 1 for FALC54                                                     */
+#define LCONF_DCA                      (1 << 30)       /* Disregard the int's for Channel A - DMSM does not try to handle them */
+#define LCONF_DCB                      (1 << 29)       /* Disregard the int's for Channel B                                            */
+#define LCONF_EBCRES           (1 << 22)       /* Reset LBI External Bus Controller, 0..reset, 1..normal operation     */
+#define LCONF_LBIRES           (1 << 21)       /* Reset LBI DMSM, 0..reset, 1..normal operation                                */
+#define LCONF_BTYP_16DEMUX     (1 << 7)        /* 16-bit demultiplexed bus     */
+#define LCONF_ABM                      (1 << 4)        /* Arbitration Master           */
+
+/* writing LCONF_MAGIC1 followed by a LCONF_MAGIC2 into LCONF resets the EBC and DMSM: */
+
+#define LCONF_MAGIC1           (LCONF_BTYP_16DEMUX | LCONF_ABM | LCONF_IPA | LCONF_DCA | LCONF_DCB)
+#define LCONF_MAGIC2           (LCONF_MAGIC1 | LCONF_EBCRES | LCONF_LBIRES)
+
+
+/*
+ *     LREGx magic values if a FALC54 is on the LBI (page 217)
+ */
+
+#define LREG0_MAGIC    0x00000264
+#define LREG1_MAGIC    0x6e6a6b66
+#define LREG2_MAGIC    0x00000264
+#define LREG3_MAGIC    0x6e686966
+#define LREG4_MAGIC    0x00000000
+#define LREG5_MAGIC    ( (7<<27) | (3<<24) | (1<<21) | (7<<3) | (2<<9) )
+
+
+/*
+ *     PCM Action Specification fields (munich_ccb_t.action_spec)
+ */
+
+#define CCB_ACTIONSPEC_IN                      (1 << 15)       /* init                         */
+#define CCB_ACTIONSPEC_ICO                     (1 << 14)       /* init only this channel       */
+#define CCB_ACTIONSPEC_RES                     (1 << 6)        /* reset all channels           */
+#define CCB_ACTIONSPEC_LOC                     (1 << 5)
+#define CCB_ACTIONSPEC_LOOP                    (1 << 4)
+#define CCB_ACTIONSPEC_LOOPI                   (1 << 3)
+#define CCB_ACTIONSPEC_IA                      (1 << 2)
+
+
+/*
+ *     Interrupt Information bits in the TIQ, RIQ
+ */
+
+#define PCM_INT_HI     (1 << 12)
+#define PCM_INT_FI     (1 << 11)
+#define PCM_INT_IFC    (1 << 10)
+#define PCM_INT_SF     (1 << 9)
+#define PCM_INT_ERR    (1 << 8)
+#define PCM_INT_FO     (1 << 7)
+#define PCM_INT_FE2    (1 << 6)
+
+#define PCM_INT_CHANNEL( info )        (info & 0x1F)
+
+
+/*
+ *     Rx status info in the rx_desc_t.status
+ */
+
+#define RX_STATUS_SF   (1 << 6)
+#define RX_STATUS_LOSS (1 << 5)
+#define RX_STATUS_CRCO (1 << 4)
+#define RX_STATUS_NOB  (1 << 3)
+#define RX_STATUS_LFD  (1 << 2)
+#define RX_STATUS_RA   (1 << 1)
+#define RX_STATUS_ROF  1 
diff --git a/drivers/pcmcia/sa1100.h b/drivers/pcmcia/sa1100.h
new file mode 100644 (file)
index 0000000..d2defe5
--- /dev/null
@@ -0,0 +1,164 @@
+/*======================================================================
+
+    Device driver for the PCMCIA control functionality of StrongARM
+    SA-1100 microprocessors.
+
+    The contents of this file are subject to the Mozilla Public
+    License Version 1.1 (the "License"); you may not use this file
+    except in compliance with the License. You may obtain a copy of
+    the License at http://www.mozilla.org/MPL/
+
+    Software distributed under the License is distributed on an "AS
+    IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+    implied. See the License for the specific language governing
+    rights and limitations under the License.
+
+    The initial developer of the original code is John G. Dorsey
+    <john+@cs.cmu.edu>.  Portions created by John G. Dorsey are
+    Copyright (C) 1999 John G. Dorsey.  All Rights Reserved.
+
+    Alternatively, the contents of this file may be used under the
+    terms of the GNU Public License version 2 (the "GPL"), in which
+    case the provisions of the GPL are applicable instead of the
+    above.  If you wish to allow the use of your version of this file
+    only under the terms of the GPL and not to allow others to use
+    your version of this file under the MPL, indicate your decision
+    by deleting the provisions above and replace them with the notice
+    and other provisions required by the GPL.  If you do not delete
+    the provisions above, a recipient may use your version of this
+    file under either the MPL or the GPL.
+    
+======================================================================*/
+
+#if !defined(_PCMCIA_SA1100_H)
+# define _PCMCIA_SA1100_H
+
+#include <pcmcia/cs_types.h>
+#include <pcmcia/ss.h>
+#include <pcmcia/bulkmem.h>
+#include <pcmcia/cistpl.h>
+#include "cs_internal.h"
+#include "sa1100_generic.h"
+
+/* MECR: Expansion Memory Configuration Register
+ * (SA-1100 Developers Manual, p.10-13; SA-1110 Developers Manual, p.10-24)
+ *
+ * MECR layout is:  
+ *
+ *   FAST1 BSM1<4:0> BSA1<4:0> BSIO1<4:0> FAST0 BSM0<4:0> BSA0<4:0> BSIO0<4:0>
+ *
+ * (This layout is actually true only for the SA-1110; the FASTn bits are
+ * reserved on the SA-1100.)
+ */
+
+#define MECR_SOCKET_0_SHIFT (0)
+#define MECR_SOCKET_1_SHIFT (16)
+
+#define MECR_BS_MASK        (0x1f)
+#define MECR_FAST_MODE_MASK (0x01)
+
+#define MECR_BSIO_SHIFT (0)
+#define MECR_BSA_SHIFT  (5)
+#define MECR_BSM_SHIFT  (10)
+#define MECR_FAST_SHIFT (15)
+
+#define MECR_SET(mecr, sock, shift, mask, bs) \
+((mecr)=((mecr)&~(((mask)<<(shift))<<\
+                  ((sock)==0?MECR_SOCKET_0_SHIFT:MECR_SOCKET_1_SHIFT)))|\
+        (((bs)<<(shift))<<((sock)==0?MECR_SOCKET_0_SHIFT:MECR_SOCKET_1_SHIFT)))
+
+#define MECR_GET(mecr, sock, shift, mask) \
+((((mecr)>>(((sock)==0)?MECR_SOCKET_0_SHIFT:MECR_SOCKET_1_SHIFT))>>\
+ (shift))&(mask))
+
+#define MECR_BSIO_SET(mecr, sock, bs) \
+MECR_SET((mecr), (sock), MECR_BSIO_SHIFT, MECR_BS_MASK, (bs))
+
+#define MECR_BSIO_GET(mecr, sock) \
+MECR_GET((mecr), (sock), MECR_BSIO_SHIFT, MECR_BS_MASK)
+
+#define MECR_BSA_SET(mecr, sock, bs) \
+MECR_SET((mecr), (sock), MECR_BSA_SHIFT, MECR_BS_MASK, (bs))
+
+#define MECR_BSA_GET(mecr, sock) \
+MECR_GET((mecr), (sock), MECR_BSA_SHIFT, MECR_BS_MASK)
+
+#define MECR_BSM_SET(mecr, sock, bs) \
+MECR_SET((mecr), (sock), MECR_BSM_SHIFT, MECR_BS_MASK, (bs))
+
+#define MECR_BSM_GET(mecr, sock) \
+MECR_GET((mecr), (sock), MECR_BSM_SHIFT, MECR_BS_MASK)
+
+#define MECR_FAST_SET(mecr, sock, fast) \
+MECR_SET((mecr), (sock), MECR_FAST_SHIFT, MECR_FAST_MODE_MASK, (fast))
+
+#define MECR_FAST_GET(mecr, sock) \
+MECR_GET((mecr), (sock), MECR_FAST_SHIFT, MECR_FAST_MODE_MASK)
+
+
+/* This function implements the BS value calculation for setting the MECR
+ * using integer arithmetic:
+ */
+static inline unsigned int sa1100_pcmcia_mecr_bs(unsigned int pcmcia_cycle_ns,
+                                                unsigned int cpu_clock_khz){
+  unsigned int t = ((pcmcia_cycle_ns * cpu_clock_khz) / 6) - 1000000;
+  return (t / 1000000) + (((t % 1000000) == 0) ? 0 : 1);
+}
+
+/* This function returns the (approxmiate) command assertion period, in
+ * nanoseconds, for a given CPU clock frequency and MECR BS value:
+ */
+static inline unsigned int sa1100_pcmcia_cmd_time(unsigned int cpu_clock_khz,
+                                                 unsigned int pcmcia_mecr_bs){
+  return (((10000000 * 2) / cpu_clock_khz) * (3 * (pcmcia_mecr_bs + 1))) / 10;
+}
+
+
+/* SA-1100 PCMCIA Memory and I/O timing
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * The SA-1110 Developer's Manual, section 10.2.5, says the following:
+ *
+ *  "To calculate the recommended BS_xx value for each address space:
+ *   divide the command width time (the greater of twIOWR and twIORD,
+ *   or the greater of twWE and twOE) by processor cycle time; divide
+ *   by 2; divide again by 3 (number of BCLK's per command assertion);
+ *   round up to the next whole number; and subtract 1."
+ *
+ * The PC Card Standard, Release 7, section 4.13.4, says that twIORD
+ * has a minimum value of 165ns. Section 4.13.5 says that twIOWR has
+ * a minimum value of 165ns, as well. Section 4.7.2 (describing
+ * common and attribute memory write timing) says that twWE has a
+ * minimum value of 150ns for a 250ns cycle time (for 5V operation;
+ * see section 4.7.4), or 300ns for a 600ns cycle time (for 3.3V
+ * operation, also section 4.7.4). Section 4.7.3 says that taOE
+ * has a maximum value of 150ns for a 300ns cycle time (for 5V
+ * operation), or 300ns for a 600ns cycle time (for 3.3V operation).
+ *
+ * When configuring memory maps, Card Services appears to adopt the policy
+ * that a memory access time of "0" means "use the default." The default
+ * PCMCIA I/O command width time is 165ns. The default PCMCIA 5V attribute
+ * and memory command width time is 150ns; the PCMCIA 3.3V attribute and
+ * memory command width time is 300ns.
+ */
+#define SA1100_PCMCIA_IO_ACCESS      (165)
+#define SA1100_PCMCIA_5V_MEM_ACCESS  (150)
+#define SA1100_PCMCIA_3V_MEM_ACCESS  (300)
+
+
+/* The socket driver actually works nicely in interrupt-driven form,
+ * so the (relatively infrequent) polling is "just to be sure."
+ */
+#define SA1100_PCMCIA_POLL_PERIOD    (2*HZ)
+
+struct pcmcia_low_level;
+
+/* I/O pins replacing memory pins
+ * (PCMCIA System Architecture, 2nd ed., by Don Anderson, p.75)
+ *
+ * These signals change meaning when going from memory-only to 
+ * memory-or-I/O interface:
+ */
+#define iostschg bvd1
+#define iospkr   bvd2
+
+#endif  /* !defined(_PCMCIA_SA1100_H) */
diff --git a/drivers/pcmcia/sa11xx_core.c b/drivers/pcmcia/sa11xx_core.c
new file mode 100644 (file)
index 0000000..d7249c0
--- /dev/null
@@ -0,0 +1,971 @@
+/*======================================================================
+
+    Device driver for the PCMCIA control functionality of StrongARM
+    SA-1100 microprocessors.
+
+    The contents of this file are subject to the Mozilla Public
+    License Version 1.1 (the "License"); you may not use this file
+    except in compliance with the License. You may obtain a copy of
+    the License at http://www.mozilla.org/MPL/
+
+    Software distributed under the License is distributed on an "AS
+    IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+    implied. See the License for the specific language governing
+    rights and limitations under the License.
+
+    The initial developer of the original code is John G. Dorsey
+    <john+@cs.cmu.edu>.  Portions created by John G. Dorsey are
+    Copyright (C) 1999 John G. Dorsey.  All Rights Reserved.
+
+    Alternatively, the contents of this file may be used under the
+    terms of the GNU Public License version 2 (the "GPL"), in which
+    case the provisions of the GPL are applicable instead of the
+    above.  If you wish to allow the use of your version of this file
+    only under the terms of the GPL and not to allow others to use
+    your version of this file under the MPL, indicate your decision
+    by deleting the provisions above and replace them with the notice
+    and other provisions required by the GPL.  If you do not delete
+    the provisions above, a recipient may use your version of this
+    file under either the MPL or the GPL.
+    
+======================================================================*/
+/*
+ * Please see linux/Documentation/arm/SA1100/PCMCIA for more information
+ * on the low-level kernel interface.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/config.h>
+#include <linux/cpufreq.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+
+#include <asm/hardware.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/system.h>
+
+#include "sa11xx_core.h"
+#include "sa1100.h"
+
+#ifdef DEBUG
+static int pc_debug;
+
+module_param(pc_debug, int, 0644);
+
+#define debug(skt, lvl, fmt, arg...) do {                      \
+       if (pc_debug > (lvl))                                   \
+               printk(KERN_DEBUG "skt%u: %s: " fmt,            \
+                      (skt)->nr, __func__ , ## arg);           \
+} while (0)
+
+#else
+#define debug(skt, lvl, fmt, arg...) do { } while (0)
+#endif
+
+#define to_sa1100_socket(x)    container_of(x, struct sa1100_pcmcia_socket, socket)
+
+/*
+ * sa1100_pcmcia_default_mecr_timing
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ *
+ * Calculate MECR clock wait states for given CPU clock
+ * speed and command wait state. This function can be over-
+ * written by a board specific version.
+ *
+ * The default is to simply calculate the BS values as specified in
+ * the INTEL SA1100 development manual
+ * "Expansion Memory (PCMCIA) Configuration Register (MECR)"
+ * that's section 10.2.5 in _my_ version of the manual ;)
+ */
+static unsigned int
+sa1100_pcmcia_default_mecr_timing(struct sa1100_pcmcia_socket *skt,
+                                 unsigned int cpu_speed,
+                                 unsigned int cmd_time)
+{
+       return sa1100_pcmcia_mecr_bs(cmd_time, cpu_speed);
+}
+
+static unsigned short
+calc_speed(unsigned short *spds, int num, unsigned short dflt)
+{
+       unsigned short speed = 0;
+       int i;
+
+       for (i = 0; i < num; i++)
+               if (speed < spds[i])
+                       speed = spds[i];
+       if (speed == 0)
+               speed = dflt;
+
+       return speed;
+}
+
+/* sa1100_pcmcia_set_mecr()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ *
+ * set MECR value for socket <sock> based on this sockets
+ * io, mem and attribute space access speed.
+ * Call board specific BS value calculation to allow boards
+ * to tweak the BS values.
+ */
+static int
+sa1100_pcmcia_set_mecr(struct sa1100_pcmcia_socket *skt, unsigned int cpu_clock)
+{
+       u32 mecr, old_mecr;
+       unsigned long flags;
+       unsigned short speed;
+       unsigned int bs_io, bs_mem, bs_attr;
+
+       speed = calc_speed(skt->spd_io, MAX_IO_WIN, SA1100_PCMCIA_IO_ACCESS);
+       bs_io = skt->ops->socket_get_timing(skt, cpu_clock, speed);
+
+       speed = calc_speed(skt->spd_mem, MAX_WIN, SA1100_PCMCIA_3V_MEM_ACCESS);
+       bs_mem = skt->ops->socket_get_timing(skt, cpu_clock, speed);
+
+       speed = calc_speed(skt->spd_attr, MAX_WIN, SA1100_PCMCIA_3V_MEM_ACCESS);
+       bs_attr = skt->ops->socket_get_timing(skt, cpu_clock, speed);
+
+       local_irq_save(flags);
+
+       old_mecr = mecr = MECR;
+       MECR_FAST_SET(mecr, skt->nr, 0);
+       MECR_BSIO_SET(mecr, skt->nr, bs_io);
+       MECR_BSA_SET(mecr, skt->nr, bs_attr);
+       MECR_BSM_SET(mecr, skt->nr, bs_mem);
+       if (old_mecr != mecr)
+               MECR = mecr;
+
+       local_irq_restore(flags);
+
+       debug(skt, 2, "FAST %X  BSM %X  BSA %X  BSIO %X\n",
+             MECR_FAST_GET(mecr, skt->nr),
+             MECR_BSM_GET(mecr, skt->nr), MECR_BSA_GET(mecr, skt->nr),
+             MECR_BSIO_GET(mecr, skt->nr));
+
+       return 0;
+}
+
+static unsigned int sa1100_pcmcia_skt_state(struct sa1100_pcmcia_socket *skt)
+{
+       struct pcmcia_state state;
+       unsigned int stat;
+
+       memset(&state, 0, sizeof(struct pcmcia_state));
+
+       skt->ops->socket_state(skt, &state);
+
+       stat = state.detect  ? SS_DETECT : 0;
+       stat |= state.ready  ? SS_READY  : 0;
+       stat |= state.wrprot ? SS_WRPROT : 0;
+       stat |= state.vs_3v  ? SS_3VCARD : 0;
+       stat |= state.vs_Xv  ? SS_XVCARD : 0;
+
+       /* The power status of individual sockets is not available
+        * explicitly from the hardware, so we just remember the state
+        * and regurgitate it upon request:
+        */
+       stat |= skt->cs_state.Vcc ? SS_POWERON : 0;
+
+       if (skt->cs_state.flags & SS_IOCARD)
+               stat |= state.bvd1 ? SS_STSCHG : 0;
+       else {
+               if (state.bvd1 == 0)
+                       stat |= SS_BATDEAD;
+               else if (state.bvd2 == 0)
+                       stat |= SS_BATWARN;
+       }
+       return stat;
+}
+
+/*
+ * sa1100_pcmcia_config_skt
+ * ^^^^^^^^^^^^^^^^^^^^^^^^
+ *
+ * Convert PCMCIA socket state to our socket configure structure.
+ */
+static int
+sa1100_pcmcia_config_skt(struct sa1100_pcmcia_socket *skt, socket_state_t *state)
+{
+       int ret;
+
+       ret = skt->ops->configure_socket(skt, state);
+       if (ret == 0) {
+               /*
+                * This really needs a better solution.  The IRQ
+                * may or may not be claimed by the driver.
+                */
+               if (skt->irq_state != 1 && state->io_irq) {
+                       skt->irq_state = 1;
+                       set_irq_type(skt->irq, IRQT_FALLING);
+               } else if (skt->irq_state == 1 && state->io_irq == 0) {
+                       skt->irq_state = 0;
+                       set_irq_type(skt->irq, IRQT_NOEDGE);
+               }
+
+               skt->cs_state = *state;
+       }
+
+       if (ret < 0)
+               printk(KERN_ERR "sa1100_pcmcia: unable to configure "
+                      "socket %d\n", skt->nr);
+
+       return ret;
+}
+
+/* sa1100_pcmcia_sock_init()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^
+ *
+ * (Re-)Initialise the socket, turning on status interrupts
+ * and PCMCIA bus.  This must wait for power to stabilise
+ * so that the card status signals report correctly.
+ *
+ * Returns: 0
+ */
+static int sa1100_pcmcia_sock_init(struct pcmcia_socket *sock)
+{
+       struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock);
+
+       debug(skt, 2, "initializing socket\n");
+
+       skt->ops->socket_init(skt);
+       return 0;
+}
+
+
+/*
+ * sa1100_pcmcia_suspend()
+ * ^^^^^^^^^^^^^^^^^^^^^^^
+ *
+ * Remove power on the socket, disable IRQs from the card.
+ * Turn off status interrupts, and disable the PCMCIA bus.
+ *
+ * Returns: 0
+ */
+static int sa1100_pcmcia_suspend(struct pcmcia_socket *sock)
+{
+       struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock);
+       int ret;
+
+       debug(skt, 2, "suspending socket\n");
+
+       ret = sa1100_pcmcia_config_skt(skt, &dead_socket);
+       if (ret == 0)
+               skt->ops->socket_suspend(skt);
+
+       return ret;
+}
+
+static spinlock_t status_lock = SPIN_LOCK_UNLOCKED;
+
+/* sa1100_check_status()
+ * ^^^^^^^^^^^^^^^^^^^^^
+ */
+static void sa1100_check_status(struct sa1100_pcmcia_socket *skt)
+{
+       unsigned int events;
+
+       debug(skt, 4, "entering PCMCIA monitoring thread\n");
+
+       do {
+               unsigned int status;
+               unsigned long flags;
+
+               status = sa1100_pcmcia_skt_state(skt);
+
+               spin_lock_irqsave(&status_lock, flags);
+               events = (status ^ skt->status) & skt->cs_state.csc_mask;
+               skt->status = status;
+               spin_unlock_irqrestore(&status_lock, flags);
+
+               debug(skt, 4, "events: %s%s%s%s%s%s\n",
+                       events == 0         ? "<NONE>"   : "",
+                       events & SS_DETECT  ? "DETECT "  : "",
+                       events & SS_READY   ? "READY "   : "",
+                       events & SS_BATDEAD ? "BATDEAD " : "",
+                       events & SS_BATWARN ? "BATWARN " : "",
+                       events & SS_STSCHG  ? "STSCHG "  : "");
+
+               if (events)
+                       pcmcia_parse_events(&skt->socket, events);
+       } while (events);
+}
+
+/* sa1100_pcmcia_poll_event()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Let's poll for events in addition to IRQs since IRQ only is unreliable...
+ */
+static void sa1100_pcmcia_poll_event(unsigned long dummy)
+{
+       struct sa1100_pcmcia_socket *skt = (struct sa1100_pcmcia_socket *)dummy;
+       debug(skt, 4, "polling for events\n");
+
+       mod_timer(&skt->poll_timer, jiffies + SA1100_PCMCIA_POLL_PERIOD);
+
+       sa1100_check_status(skt);
+}
+
+
+/* sa1100_pcmcia_interrupt()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Service routine for socket driver interrupts (requested by the
+ * low-level PCMCIA init() operation via sa1100_pcmcia_thread()).
+ * The actual interrupt-servicing work is performed by
+ * sa1100_pcmcia_thread(), largely because the Card Services event-
+ * handling code performs scheduling operations which cannot be
+ * executed from within an interrupt context.
+ */
+static irqreturn_t sa1100_pcmcia_interrupt(int irq, void *dev, struct pt_regs *regs)
+{
+       struct sa1100_pcmcia_socket *skt = dev;
+
+       debug(skt, 3, "servicing IRQ %d\n", irq);
+
+       sa1100_check_status(skt);
+
+       return IRQ_HANDLED;
+}
+
+
+/* sa1100_pcmcia_get_status()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the get_status() operation for the in-kernel PCMCIA
+ * service (formerly SS_GetStatus in Card Services). Essentially just
+ * fills in bits in `status' according to internal driver state or
+ * the value of the voltage detect chipselect register.
+ *
+ * As a debugging note, during card startup, the PCMCIA core issues
+ * three set_socket() commands in a row the first with RESET deasserted,
+ * the second with RESET asserted, and the last with RESET deasserted
+ * again. Following the third set_socket(), a get_status() command will
+ * be issued. The kernel is looking for the SS_READY flag (see
+ * setup_socket(), reset_socket(), and unreset_socket() in cs.c).
+ *
+ * Returns: 0
+ */
+static int
+sa1100_pcmcia_get_status(struct pcmcia_socket *sock, unsigned int *status)
+{
+       struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock);
+
+       skt->status = sa1100_pcmcia_skt_state(skt);
+       *status = skt->status;
+
+       return 0;
+}
+
+
+/* sa1100_pcmcia_get_socket()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the get_socket() operation for the in-kernel PCMCIA
+ * service (formerly SS_GetSocket in Card Services). Not a very 
+ * exciting routine.
+ *
+ * Returns: 0
+ */
+static int
+sa1100_pcmcia_get_socket(struct pcmcia_socket *sock, socket_state_t *state)
+{
+  struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock);
+
+  debug(skt, 2, "\n");
+
+  *state = skt->cs_state;
+
+  return 0;
+}
+
+/* sa1100_pcmcia_set_socket()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the set_socket() operation for the in-kernel PCMCIA
+ * service (formerly SS_SetSocket in Card Services). We more or
+ * less punt all of this work and let the kernel handle the details
+ * of power configuration, reset, &c. We also record the value of
+ * `state' in order to regurgitate it to the PCMCIA core later.
+ *
+ * Returns: 0
+ */
+static int
+sa1100_pcmcia_set_socket(struct pcmcia_socket *sock, socket_state_t *state)
+{
+  struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock);
+
+  debug(skt, 2, "mask: %s%s%s%s%s%sflags: %s%s%s%s%s%sVcc %d Vpp %d irq %d\n",
+       (state->csc_mask==0)?"<NONE> ":"",
+       (state->csc_mask&SS_DETECT)?"DETECT ":"",
+       (state->csc_mask&SS_READY)?"READY ":"",
+       (state->csc_mask&SS_BATDEAD)?"BATDEAD ":"",
+       (state->csc_mask&SS_BATWARN)?"BATWARN ":"",
+       (state->csc_mask&SS_STSCHG)?"STSCHG ":"",
+       (state->flags==0)?"<NONE> ":"",
+       (state->flags&SS_PWR_AUTO)?"PWR_AUTO ":"",
+       (state->flags&SS_IOCARD)?"IOCARD ":"",
+       (state->flags&SS_RESET)?"RESET ":"",
+       (state->flags&SS_SPKR_ENA)?"SPKR_ENA ":"",
+       (state->flags&SS_OUTPUT_ENA)?"OUTPUT_ENA ":"",
+       state->Vcc, state->Vpp, state->io_irq);
+
+  return sa1100_pcmcia_config_skt(skt, state);
+}  /* sa1100_pcmcia_set_socket() */
+
+
+/* sa1100_pcmcia_set_io_map()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the set_io_map() operation for the in-kernel PCMCIA
+ * service (formerly SS_SetIOMap in Card Services). We configure
+ * the map speed as requested, but override the address ranges
+ * supplied by Card Services.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+static int
+sa1100_pcmcia_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *map)
+{
+       struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock);
+       unsigned short speed = map->speed;
+
+       debug(skt, 2, "map %u  speed %u start 0x%08x stop 0x%08x\n",
+               map->map, map->speed, map->start, map->stop);
+       debug(skt, 2, "flags: %s%s%s%s%s%s%s%s\n",
+               (map->flags==0)?"<NONE>":"",
+               (map->flags&MAP_ACTIVE)?"ACTIVE ":"",
+               (map->flags&MAP_16BIT)?"16BIT ":"",
+               (map->flags&MAP_AUTOSZ)?"AUTOSZ ":"",
+               (map->flags&MAP_0WS)?"0WS ":"",
+               (map->flags&MAP_WRPROT)?"WRPROT ":"",
+               (map->flags&MAP_USE_WAIT)?"USE_WAIT ":"",
+               (map->flags&MAP_PREFETCH)?"PREFETCH ":"");
+
+       if (map->map >= MAX_IO_WIN) {
+               printk(KERN_ERR "%s(): map (%d) out of range\n", __FUNCTION__,
+                      map->map);
+               return -1;
+       }
+
+       if (map->flags & MAP_ACTIVE) {
+               if (speed == 0)
+                       speed = SA1100_PCMCIA_IO_ACCESS;
+       } else {
+               speed = 0;
+       }
+
+       skt->spd_io[map->map] = speed;
+       sa1100_pcmcia_set_mecr(skt, cpufreq_get(0));
+
+       if (map->stop == 1)
+               map->stop = PAGE_SIZE-1;
+
+       map->stop -= map->start;
+       map->stop += (unsigned long)skt->virt_io;
+       map->start = (unsigned long)skt->virt_io;
+
+       return 0;
+}  /* sa1100_pcmcia_set_io_map() */
+
+
+/* sa1100_pcmcia_set_mem_map()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the set_mem_map() operation for the in-kernel PCMCIA
+ * service (formerly SS_SetMemMap in Card Services). We configure
+ * the map speed as requested, but override the address ranges
+ * supplied by Card Services.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+static int
+sa1100_pcmcia_set_mem_map(struct pcmcia_socket *sock, struct pccard_mem_map *map)
+{
+       struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock);
+       struct resource *res;
+       unsigned short speed = map->speed;
+
+       debug(skt, 2, "map %u speed %u card_start %08x\n",
+               map->map, map->speed, map->card_start);
+       debug(skt, 2, "flags: %s%s%s%s%s%s%s%s\n",
+               (map->flags==0)?"<NONE>":"",
+               (map->flags&MAP_ACTIVE)?"ACTIVE ":"",
+               (map->flags&MAP_16BIT)?"16BIT ":"",
+               (map->flags&MAP_AUTOSZ)?"AUTOSZ ":"",
+               (map->flags&MAP_0WS)?"0WS ":"",
+               (map->flags&MAP_WRPROT)?"WRPROT ":"",
+               (map->flags&MAP_ATTRIB)?"ATTRIB ":"",
+               (map->flags&MAP_USE_WAIT)?"USE_WAIT ":"");
+
+       if (map->map >= MAX_WIN)
+               return -EINVAL;
+
+       if (map->flags & MAP_ACTIVE) {
+               if (speed == 0)
+                       speed = 300;
+       } else {
+               speed = 0;
+       }
+
+       if (map->flags & MAP_ATTRIB) {
+               res = &skt->res_attr;
+               skt->spd_attr[map->map] = speed;
+               skt->spd_mem[map->map] = 0;
+       } else {
+               res = &skt->res_mem;
+               skt->spd_attr[map->map] = 0;
+               skt->spd_mem[map->map] = speed;
+       }
+
+       sa1100_pcmcia_set_mecr(skt, cpufreq_get(0));
+
+       map->sys_stop -= map->sys_start;
+       map->sys_stop += res->start + map->card_start;
+       map->sys_start = res->start + map->card_start;
+
+       return 0;
+}
+
+struct bittbl {
+       unsigned int mask;
+       const char *name;
+};
+
+static struct bittbl status_bits[] = {
+       { SS_WRPROT,            "SS_WRPROT"     },
+       { SS_BATDEAD,           "SS_BATDEAD"    },
+       { SS_BATWARN,           "SS_BATWARN"    },
+       { SS_READY,             "SS_READY"      },
+       { SS_DETECT,            "SS_DETECT"     },
+       { SS_POWERON,           "SS_POWERON"    },
+       { SS_STSCHG,            "SS_STSCHG"     },
+       { SS_3VCARD,            "SS_3VCARD"     },
+       { SS_XVCARD,            "SS_XVCARD"     },
+};
+
+static struct bittbl conf_bits[] = {
+       { SS_PWR_AUTO,          "SS_PWR_AUTO"   },
+       { SS_IOCARD,            "SS_IOCARD"     },
+       { SS_RESET,             "SS_RESET"      },
+       { SS_DMA_MODE,          "SS_DMA_MODE"   },
+       { SS_SPKR_ENA,          "SS_SPKR_ENA"   },
+       { SS_OUTPUT_ENA,        "SS_OUTPUT_ENA" },
+};
+
+static void
+dump_bits(char **p, const char *prefix, unsigned int val, struct bittbl *bits, int sz)
+{
+       char *b = *p;
+       int i;
+
+       b += sprintf(b, "%-9s:", prefix);
+       for (i = 0; i < sz; i++)
+               if (val & bits[i].mask)
+                       b += sprintf(b, " %s", bits[i].name);
+       *b++ = '\n';
+       *p = b;
+}
+
+/* show_status()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * Implements the /sys/class/pcmcia_socket/??/status file.
+ *
+ * Returns: the number of characters added to the buffer
+ */
+static ssize_t show_status(struct class_device *class_dev, char *buf)
+{
+       struct sa1100_pcmcia_socket *skt = container_of(class_dev, 
+                               struct sa1100_pcmcia_socket, socket.dev);
+       unsigned int clock = cpufreq_get(0);
+       unsigned long mecr = MECR;
+       char *p = buf;
+
+       p+=sprintf(p, "slot     : %d\n", skt->nr);
+
+       dump_bits(&p, "status", skt->status,
+                 status_bits, ARRAY_SIZE(status_bits));
+       dump_bits(&p, "csc_mask", skt->cs_state.csc_mask,
+                 status_bits, ARRAY_SIZE(status_bits));
+       dump_bits(&p, "cs_flags", skt->cs_state.flags,
+                 conf_bits, ARRAY_SIZE(conf_bits));
+
+       p+=sprintf(p, "Vcc      : %d\n", skt->cs_state.Vcc);
+       p+=sprintf(p, "Vpp      : %d\n", skt->cs_state.Vpp);
+       p+=sprintf(p, "IRQ      : %d (%d)\n", skt->cs_state.io_irq, skt->irq);
+
+       p+=sprintf(p, "I/O      : %u (%u)\n",
+               calc_speed(skt->spd_io, MAX_IO_WIN, SA1100_PCMCIA_IO_ACCESS),
+               sa1100_pcmcia_cmd_time(clock, MECR_BSIO_GET(mecr, skt->nr)));
+
+       p+=sprintf(p, "attribute: %u (%u)\n",
+               calc_speed(skt->spd_attr, MAX_WIN, SA1100_PCMCIA_3V_MEM_ACCESS),
+               sa1100_pcmcia_cmd_time(clock, MECR_BSA_GET(mecr, skt->nr)));
+
+       p+=sprintf(p, "common   : %u (%u)\n",
+               calc_speed(skt->spd_mem, MAX_WIN, SA1100_PCMCIA_3V_MEM_ACCESS),
+               sa1100_pcmcia_cmd_time(clock, MECR_BSM_GET(mecr, skt->nr)));
+
+       return p-buf;
+}
+static CLASS_DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
+
+
+static struct pccard_operations sa11xx_pcmcia_operations = {
+       .init                   = sa1100_pcmcia_sock_init,
+       .suspend                = sa1100_pcmcia_suspend,
+       .get_status             = sa1100_pcmcia_get_status,
+       .get_socket             = sa1100_pcmcia_get_socket,
+       .set_socket             = sa1100_pcmcia_set_socket,
+       .set_io_map             = sa1100_pcmcia_set_io_map,
+       .set_mem_map            = sa1100_pcmcia_set_mem_map,
+};
+
+int sa11xx_request_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr)
+{
+       int i, res = 0;
+
+       for (i = 0; i < nr; i++) {
+               if (irqs[i].sock != skt->nr)
+                       continue;
+               res = request_irq(irqs[i].irq, sa1100_pcmcia_interrupt,
+                                 SA_INTERRUPT, irqs[i].str, skt);
+               if (res)
+                       break;
+               set_irq_type(irqs[i].irq, IRQT_NOEDGE);
+       }
+
+       if (res) {
+               printk(KERN_ERR "PCMCIA: request for IRQ%d failed (%d)\n",
+                       irqs[i].irq, res);
+
+               while (i--)
+                       if (irqs[i].sock == skt->nr)
+                               free_irq(irqs[i].irq, skt);
+       }
+       return res;
+}
+EXPORT_SYMBOL(sa11xx_request_irqs);
+
+void sa11xx_free_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr)
+{
+       int i;
+
+       for (i = 0; i < nr; i++)
+               if (irqs[i].sock == skt->nr)
+                       free_irq(irqs[i].irq, skt);
+}
+EXPORT_SYMBOL(sa11xx_free_irqs);
+
+void sa11xx_disable_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr)
+{
+       int i;
+
+       for (i = 0; i < nr; i++)
+               if (irqs[i].sock == skt->nr)
+                       set_irq_type(irqs[i].irq, IRQT_NOEDGE);
+}
+EXPORT_SYMBOL(sa11xx_disable_irqs);
+
+void sa11xx_enable_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr)
+{
+       int i;
+
+       for (i = 0; i < nr; i++)
+               if (irqs[i].sock == skt->nr) {
+                       set_irq_type(irqs[i].irq, IRQT_RISING);
+                       set_irq_type(irqs[i].irq, IRQT_BOTHEDGE);
+               }
+}
+EXPORT_SYMBOL(sa11xx_enable_irqs);
+
+static LIST_HEAD(sa1100_sockets);
+static DECLARE_MUTEX(sa1100_sockets_lock);
+
+static const char *skt_names[] = {
+       "PCMCIA socket 0",
+       "PCMCIA socket 1",
+};
+
+struct skt_dev_info {
+       int nskt;
+       struct sa1100_pcmcia_socket skt[0];
+};
+
+#define SKT_DEV_INFO_SIZE(n) \
+       (sizeof(struct skt_dev_info) + (n)*sizeof(struct sa1100_pcmcia_socket))
+
+int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int first, int nr)
+{
+       struct skt_dev_info *sinfo;
+       unsigned int cpu_clock;
+       int ret, i;
+
+       /*
+        * set default MECR calculation if the board specific
+        * code did not specify one...
+        */
+       if (!ops->socket_get_timing)
+               ops->socket_get_timing = sa1100_pcmcia_default_mecr_timing;
+
+       down(&sa1100_sockets_lock);
+
+       sinfo = kmalloc(SKT_DEV_INFO_SIZE(nr), GFP_KERNEL);
+       if (!sinfo) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       memset(sinfo, 0, SKT_DEV_INFO_SIZE(nr));
+       sinfo->nskt = nr;
+
+       cpu_clock = cpufreq_get(0);
+
+       /*
+        * Initialise the per-socket structure.
+        */
+       for (i = 0; i < nr; i++) {
+               struct sa1100_pcmcia_socket *skt = &sinfo->skt[i];
+
+               skt->socket.ops = &sa11xx_pcmcia_operations;
+               skt->socket.owner = ops->owner;
+               skt->socket.dev.dev = dev;
+
+               init_timer(&skt->poll_timer);
+               skt->poll_timer.function = sa1100_pcmcia_poll_event;
+               skt->poll_timer.data = (unsigned long)skt;
+               skt->poll_timer.expires = jiffies + SA1100_PCMCIA_POLL_PERIOD;
+
+               skt->nr         = first + i;
+               skt->irq        = NO_IRQ;
+               skt->dev        = dev;
+               skt->ops        = ops;
+
+               skt->res_skt.start      = _PCMCIA(skt->nr);
+               skt->res_skt.end        = _PCMCIA(skt->nr) + PCMCIASp - 1;
+               skt->res_skt.name       = skt_names[skt->nr];
+               skt->res_skt.flags      = IORESOURCE_MEM;
+
+               ret = request_resource(&iomem_resource, &skt->res_skt);
+               if (ret)
+                       goto out_err_1;
+
+               skt->res_io.start       = _PCMCIAIO(skt->nr);
+               skt->res_io.end         = _PCMCIAIO(skt->nr) + PCMCIAIOSp - 1;
+               skt->res_io.name        = "io";
+               skt->res_io.flags       = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+               ret = request_resource(&skt->res_skt, &skt->res_io);
+               if (ret)
+                       goto out_err_2;
+
+               skt->res_mem.start      = _PCMCIAMem(skt->nr);
+               skt->res_mem.end        = _PCMCIAMem(skt->nr) + PCMCIAMemSp - 1;
+               skt->res_mem.name       = "memory";
+               skt->res_mem.flags      = IORESOURCE_MEM;
+
+               ret = request_resource(&skt->res_skt, &skt->res_mem);
+               if (ret)
+                       goto out_err_3;
+
+               skt->res_attr.start     = _PCMCIAAttr(skt->nr);
+               skt->res_attr.end       = _PCMCIAAttr(skt->nr) + PCMCIAAttrSp - 1;
+               skt->res_attr.name      = "attribute";
+               skt->res_attr.flags     = IORESOURCE_MEM;
+               
+               ret = request_resource(&skt->res_skt, &skt->res_attr);
+               if (ret)
+                       goto out_err_4;
+
+               skt->virt_io = ioremap(skt->res_io.start, 0x10000);
+               if (skt->virt_io == NULL) {
+                       ret = -ENOMEM;
+                       goto out_err_5;
+               }
+
+               list_add(&skt->node, &sa1100_sockets);
+
+               /*
+                * We initialize the MECR to default values here, because
+                * we are not guaranteed to see a SetIOMap operation at
+                * runtime.
+                */
+               sa1100_pcmcia_set_mecr(skt, cpu_clock);
+
+               ret = ops->hw_init(skt);
+               if (ret)
+                       goto out_err_6;
+
+               skt->socket.features = SS_CAP_STATIC_MAP|SS_CAP_PCCARD;
+               skt->socket.irq_mask = 0;
+               skt->socket.map_size = PAGE_SIZE;
+               skt->socket.pci_irq = skt->irq;
+               skt->socket.io_offset = (unsigned long)skt->virt_io;
+
+               skt->status = sa1100_pcmcia_skt_state(skt);
+
+               ret = pcmcia_register_socket(&skt->socket);
+               if (ret)
+                       goto out_err_7;
+
+               WARN_ON(skt->socket.sock != i);
+
+               add_timer(&skt->poll_timer);
+
+               class_device_create_file(&skt->socket.dev, &class_device_attr_status);
+       }
+
+       dev_set_drvdata(dev, sinfo);
+       ret = 0;
+       goto out;
+
+       do {
+               struct sa1100_pcmcia_socket *skt = &sinfo->skt[i];
+
+               del_timer_sync(&skt->poll_timer);
+               pcmcia_unregister_socket(&skt->socket);
+
+ out_err_7:
+               flush_scheduled_work();
+
+               ops->hw_shutdown(skt);
+ out_err_6:
+               list_del(&skt->node);
+               iounmap(skt->virt_io);
+ out_err_5:
+               release_resource(&skt->res_attr);
+ out_err_4:
+               release_resource(&skt->res_mem);
+ out_err_3:
+               release_resource(&skt->res_io);
+ out_err_2:
+               release_resource(&skt->res_skt);
+ out_err_1:
+               i--;
+       } while (i > 0);
+
+       kfree(sinfo);
+
+ out:
+       up(&sa1100_sockets_lock);
+       return ret;
+}
+EXPORT_SYMBOL(sa11xx_drv_pcmcia_probe);
+
+int sa11xx_drv_pcmcia_remove(struct device *dev)
+{
+       struct skt_dev_info *sinfo = dev_get_drvdata(dev);
+       int i;
+
+       dev_set_drvdata(dev, NULL);
+
+       down(&sa1100_sockets_lock);
+       for (i = 0; i < sinfo->nskt; i++) {
+               struct sa1100_pcmcia_socket *skt = &sinfo->skt[i];
+
+               del_timer_sync(&skt->poll_timer);
+
+               pcmcia_unregister_socket(&skt->socket);
+
+               flush_scheduled_work();
+
+               skt->ops->hw_shutdown(skt);
+
+               sa1100_pcmcia_config_skt(skt, &dead_socket);
+
+               list_del(&skt->node);
+               iounmap(skt->virt_io);
+               skt->virt_io = NULL;
+               release_resource(&skt->res_attr);
+               release_resource(&skt->res_mem);
+               release_resource(&skt->res_io);
+               release_resource(&skt->res_skt);
+       }
+       up(&sa1100_sockets_lock);
+
+       kfree(sinfo);
+
+       return 0;
+}
+EXPORT_SYMBOL(sa11xx_drv_pcmcia_remove);
+
+#ifdef CONFIG_CPU_FREQ
+
+/* sa1100_pcmcia_update_mecr()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ * When sa1100_pcmcia_notifier() decides that a MECR adjustment (due
+ * to a core clock frequency change) is needed, this routine establishes
+ * new BS_xx values consistent with the clock speed `clock'.
+ */
+static void sa1100_pcmcia_update_mecr(unsigned int clock)
+{
+       struct sa1100_pcmcia_socket *skt;
+
+       down(&sa1100_sockets_lock);
+       list_for_each_entry(skt, &sa1100_sockets, node)
+               sa1100_pcmcia_set_mecr(skt, clock);
+       up(&sa1100_sockets_lock);
+}
+
+/* sa1100_pcmcia_notifier()
+ * ^^^^^^^^^^^^^^^^^^^^^^^^
+ * When changing the processor core clock frequency, it is necessary
+ * to adjust the MECR timings accordingly. We've recorded the timings
+ * requested by Card Services, so this is just a matter of finding
+ * out what our current speed is, and then recomputing the new MECR
+ * values.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+static int
+sa1100_pcmcia_notifier(struct notifier_block *nb, unsigned long val,
+                      void *data)
+{
+       struct cpufreq_freqs *freqs = data;
+
+       switch (val) {
+       case CPUFREQ_PRECHANGE:
+               if (freqs->new > freqs->old)
+                       sa1100_pcmcia_update_mecr(freqs->new);
+               break;
+
+       case CPUFREQ_POSTCHANGE:
+               if (freqs->new < freqs->old)
+                       sa1100_pcmcia_update_mecr(freqs->new);
+               break;
+       }
+
+       return 0;
+}
+
+static struct notifier_block sa1100_pcmcia_notifier_block = {
+       .notifier_call  = sa1100_pcmcia_notifier
+};
+
+static int __init sa11xx_pcmcia_init(void)
+{
+       int ret;
+
+       printk(KERN_INFO "SA11xx PCMCIA\n");
+
+       ret = cpufreq_register_notifier(&sa1100_pcmcia_notifier_block,
+                                       CPUFREQ_TRANSITION_NOTIFIER);
+       if (ret < 0)
+               printk(KERN_ERR "Unable to register CPU frequency change "
+                       "notifier (%d)\n", ret);
+
+       return ret;
+}
+module_init(sa11xx_pcmcia_init);
+
+static void __exit sa11xx_pcmcia_exit(void)
+{
+       cpufreq_unregister_notifier(&sa1100_pcmcia_notifier_block, CPUFREQ_TRANSITION_NOTIFIER);
+}
+
+module_exit(sa11xx_pcmcia_exit);
+#endif
+
+MODULE_AUTHOR("John Dorsey <john+@cs.cmu.edu>");
+MODULE_DESCRIPTION("Linux PCMCIA Card Services: SA-11xx core socket driver");
+MODULE_LICENSE("Dual MPL/GPL");
diff --git a/drivers/pcmcia/sa11xx_core.h b/drivers/pcmcia/sa11xx_core.h
new file mode 100644 (file)
index 0000000..aadf7c0
--- /dev/null
@@ -0,0 +1,121 @@
+/*
+ * linux/include/asm/arch/pcmcia.h
+ *
+ * Copyright (C) 2000 John G Dorsey <john+@cs.cmu.edu>
+ *
+ * This file contains definitions for the low-level SA-1100 kernel PCMCIA
+ * interface. Please see linux/Documentation/arm/SA1100/PCMCIA for details.
+ */
+#ifndef _ASM_ARCH_PCMCIA
+#define _ASM_ARCH_PCMCIA
+
+/* include the world */
+#include <pcmcia/version.h>
+#include <pcmcia/cs_types.h>
+#include <pcmcia/cs.h>
+#include <pcmcia/ss.h>
+#include <pcmcia/bulkmem.h>
+#include <pcmcia/cistpl.h>
+#include "cs_internal.h"
+
+struct device;
+
+/* Ideally, we'd support up to MAX_SOCK sockets, but the SA-1100 only
+ * has support for two. This shows up in lots of hardwired ways, such
+ * as the fact that MECR only has enough bits to configure two sockets.
+ * Since it's so entrenched in the hardware, limiting the software
+ * in this way doesn't seem too terrible.
+ */
+#define SA1100_PCMCIA_MAX_SOCK   (2)
+
+struct pcmcia_state {
+  unsigned detect: 1,
+            ready: 1,
+             bvd1: 1,
+             bvd2: 1,
+           wrprot: 1,
+            vs_3v: 1,
+            vs_Xv: 1;
+};
+
+/*
+ * This structure encapsulates per-socket state which we might need to
+ * use when responding to a Card Services query of some kind.
+ */
+struct sa1100_pcmcia_socket {
+       struct pcmcia_socket    socket;
+
+       /*
+        * Info from low level handler
+        */
+       struct device           *dev;
+       unsigned int            nr;
+       unsigned int            irq;
+
+       /*
+        * Core PCMCIA state
+        */
+       struct pcmcia_low_level *ops;
+
+       unsigned int            status;
+       socket_state_t          cs_state;
+
+       unsigned short          spd_io[MAX_IO_WIN];
+       unsigned short          spd_mem[MAX_WIN];
+       unsigned short          spd_attr[MAX_WIN];
+
+       struct resource         res_skt;
+       struct resource         res_io;
+       struct resource         res_mem;
+       struct resource         res_attr;
+       void                    *virt_io;
+
+       unsigned int            irq_state;
+
+       struct timer_list       poll_timer;
+       struct list_head        node;
+};
+
+struct pcmcia_low_level {
+       struct module *owner;
+
+       int (*hw_init)(struct sa1100_pcmcia_socket *);
+       void (*hw_shutdown)(struct sa1100_pcmcia_socket *);
+
+       void (*socket_state)(struct sa1100_pcmcia_socket *, struct pcmcia_state *);
+       int (*configure_socket)(struct sa1100_pcmcia_socket *, const socket_state_t *);
+
+       /*
+        * Enable card status IRQs on (re-)initialisation.  This can
+        * be called at initialisation, power management event, or
+        * pcmcia event.
+        */
+       void (*socket_init)(struct sa1100_pcmcia_socket *);
+
+       /*
+        * Disable card status IRQs and PCMCIA bus on suspend.
+        */
+       void (*socket_suspend)(struct sa1100_pcmcia_socket *);
+
+       /*
+        * Calculate MECR timing clock wait states
+        */
+       unsigned int (*socket_get_timing)(struct sa1100_pcmcia_socket *,
+                       unsigned int cpu_speed, unsigned int cmd_time);
+};
+
+struct pcmcia_irqs {
+       int sock;
+       int irq;
+       const char *str;
+};
+
+int sa11xx_request_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr);
+void sa11xx_free_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr);
+void sa11xx_disable_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr);
+void sa11xx_enable_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr);
+
+extern int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int first, int nr);
+extern int sa11xx_drv_pcmcia_remove(struct device *dev);
+
+#endif
diff --git a/drivers/scsi/pcmcia/qlogic_core.c b/drivers/scsi/pcmcia/qlogic_core.c
new file mode 100644 (file)
index 0000000..78abe22
--- /dev/null
@@ -0,0 +1,2 @@
+#define PCMCIA 1
+#include "qlogicfas.c"
diff --git a/drivers/scsi/qlogicfas.h b/drivers/scsi/qlogicfas.h
new file mode 100644 (file)
index 0000000..6750e8d
--- /dev/null
@@ -0,0 +1,124 @@
+/* to be used by qlogicfas and qlogic_cs */
+#ifndef __QLOGICFAS_H
+#define __QLOGICFAS_H
+
+/*----------------------------------------------------------------*/
+/* Configuration */
+
+/* Set the following to 2 to use normal interrupt (active high/totempole-
+   tristate), otherwise use 0 (REQUIRED FOR PCMCIA) for active low, open
+   drain */
+
+#define QL_INT_ACTIVE_HIGH 2
+
+/* Set the following to max out the speed of the PIO PseudoDMA transfers,
+   again, 0 tends to be slower, but more stable.  */
+
+#define QL_TURBO_PDMA 1
+
+/* This should be 1 to enable parity detection */
+
+#define QL_ENABLE_PARITY 1
+
+/* This will reset all devices when the driver is initialized (during bootup).
+   The other linux drivers don't do this, but the DOS drivers do, and after
+   using DOS or some kind of crash or lockup this will bring things back
+   without requiring a cold boot.  It does take some time to recover from a
+   reset, so it is slower, and I have seen timeouts so that devices weren't
+   recognized when this was set. */
+
+#define QL_RESET_AT_START 0
+
+/* crystal frequency in megahertz (for offset 5 and 9)
+   Please set this for your card.  Most Qlogic cards are 40 Mhz.  The
+   Control Concepts ISA (not VLB) is 24 Mhz */
+
+#define XTALFREQ       40
+
+/**********/
+/* DANGER! modify these at your own risk */
+/* SLOWCABLE can usually be reset to zero if you have a clean setup and
+   proper termination.  The rest are for synchronous transfers and other
+   advanced features if your device can transfer faster than 5Mb/sec.
+   If you are really curious, email me for a quick howto until I have
+   something official */
+/**********/
+
+/*****/
+/* config register 1 (offset 8) options */
+/* This needs to be set to 1 if your cabling is long or noisy */
+#define SLOWCABLE 1
+
+/*****/
+/* offset 0xc */
+/* This will set fast (10Mhz) synchronous timing when set to 1
+   For this to have an effect, FASTCLK must also be 1 */
+#define FASTSCSI 0
+
+/* This when set to 1 will set a faster sync transfer rate */
+#define FASTCLK 0      /*(XTALFREQ>25?1:0)*/
+
+/*****/
+/* offset 6 */
+/* This is the sync transfer divisor, XTALFREQ/X will be the maximum
+   achievable data rate (assuming the rest of the system is capable
+   and set properly) */
+#define SYNCXFRPD 5    /*(XTALFREQ/5)*/
+
+/*****/
+/* offset 7 */
+/* This is the count of how many synchronous transfers can take place
+       i.e. how many reqs can occur before an ack is given.
+       The maximum value for this is 15, the upper bits can modify
+       REQ/ACK assertion and deassertion during synchronous transfers
+       If this is 0, the bus will only transfer asynchronously */
+#define SYNCOFFST 0
+/* for the curious, bits 7&6 control the deassertion delay in 1/2 cycles
+       of the 40Mhz clock. If FASTCLK is 1, specifying 01 (1/2) will
+       cause the deassertion to be early by 1/2 clock.  Bits 5&4 control
+       the assertion delay, also in 1/2 clocks (FASTCLK is ignored here). */
+
+/*----------------------------------------------------------------*/
+#ifdef PCMCIA
+#undef QL_INT_ACTIVE_HIGH
+#define QL_INT_ACTIVE_HIGH 0
+#endif
+
+struct qlogicfas_priv;
+typedef struct qlogicfas_priv *qlogicfas_priv_t;
+struct qlogicfas_priv {
+        int            qbase;          /* Port */
+        int            qinitid;        /* initiator ID */
+        int            qabort;         /* Flag to cause an abort */
+        int            qlirq;          /* IRQ being used */
+        char           qinfo[80];      /* description */
+        Scsi_Cmnd      *qlcmd;         /* current command being processed */
+        struct Scsi_Host       *shost; /* pointer back to host */
+        qlogicfas_priv_t       next;   /* next private struct */
+};
+
+extern int qlcfg5;
+extern int qlcfg6;
+extern int qlcfg7;
+extern int qlcfg8;
+extern int qlcfg9;
+extern int qlcfgc;
+
+/* The qlogic card uses two register maps - These macros select which one */
+#define REG0 ( outb( inb( qbase + 0xd ) & 0x7f , qbase + 0xd ), outb( 4 , qbase + 0xd ))
+#define REG1 ( outb( inb( qbase + 0xd ) | 0x80 , qbase + 0xd ), outb( 0xb4 | QL_INT_ACTIVE_HIGH , qbase + 0xd ))
+
+/* following is watchdog timeout in microseconds */
+#define WATCHDOG 5000000
+
+/*----------------------------------------------------------------*/
+/* the following will set the monitor border color (useful to find
+   where something crashed or gets stuck at and as a simple profiler) */
+
+#if 0
+#define rtrc(i) {inb(0x3da);outb(0x31,0x3c0);outb((i),0x3c0);}
+#else
+#define rtrc(i) {}
+#endif
+#endif /* __QLOGICFAS_H */
+
diff --git a/drivers/usb/core/driverfs.c b/drivers/usb/core/driverfs.c
new file mode 100644 (file)
index 0000000..51ff9bb
--- /dev/null
@@ -0,0 +1,229 @@
+/*
+ * drivers/usb/core/driverfs.c
+ *
+ * (C) Copyright 2002 David Brownell
+ * (C) Copyright 2002 Greg Kroah-Hartman
+ * (C) Copyright 2002 IBM Corp.
+ *
+ * All of the driverfs file attributes for usb devices and interfaces.
+ *
+ */
+
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+
+#ifdef CONFIG_USB_DEBUG
+       #define DEBUG
+#else
+       #undef DEBUG
+#endif
+#include <linux/usb.h>
+
+#include "usb.h"
+
+/* Active configuration fields */
+#define usb_actconfig_show(field, multiplier, format_string)           \
+static ssize_t  show_##field (struct device *dev, char *buf)           \
+{                                                                      \
+       struct usb_device *udev;                                        \
+                                                                       \
+       udev = to_usb_device (dev);                                     \
+       if (udev->actconfig)                                            \
+               return sprintf (buf, format_string,                     \
+                               udev->actconfig->desc.field * multiplier);      \
+       else                                                            \
+               return 0;                                               \
+}                                                                      \
+
+#define usb_actconfig_attr(field, multiplier, format_string)           \
+usb_actconfig_show(field, multiplier, format_string)                   \
+static DEVICE_ATTR(field, S_IRUGO, show_##field, NULL);
+
+usb_actconfig_attr (bNumInterfaces, 1, "%2d\n")
+usb_actconfig_attr (bmAttributes, 1, "%2x\n")
+usb_actconfig_attr (bMaxPower, 2, "%3dmA\n")
+
+/* configuration value is always present, and r/w */
+usb_actconfig_show(bConfigurationValue, 1, "%u\n");
+
+static ssize_t
+set_bConfigurationValue (struct device *dev, const char *buf, size_t count)
+{
+       struct usb_device       *udev = udev = to_usb_device (dev);
+       int                     config, value;
+
+       if (sscanf (buf, "%u", &config) != 1 || config > 255)
+               return -EINVAL;
+       down(&udev->serialize);
+       value = usb_set_configuration (udev, config);
+       up(&udev->serialize);
+       return (value < 0) ? value : count;
+}
+
+static DEVICE_ATTR(bConfigurationValue, S_IRUGO | S_IWUSR, 
+               show_bConfigurationValue, set_bConfigurationValue);
+
+/* String fields */
+#define usb_string_attr(name, field)           \
+static ssize_t  show_##name(struct device *dev, char *buf)             \
+{                                                                      \
+       struct usb_device *udev;                                        \
+       int len;                                                        \
+                                                                       \
+       udev = to_usb_device (dev);                                     \
+       len = usb_string(udev, udev->descriptor.field, buf, PAGE_SIZE); \
+       if (len < 0)                                                    \
+               return 0;                                               \
+       buf[len] = '\n';                                                \
+       buf[len+1] = 0;                                                 \
+       return len+1;                                                   \
+}                                                                      \
+static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL);
+
+usb_string_attr(product, iProduct);
+usb_string_attr(manufacturer, iManufacturer);
+usb_string_attr(serial, iSerialNumber);
+
+static ssize_t
+show_speed (struct device *dev, char *buf)
+{
+       struct usb_device *udev;
+       char *speed;
+
+       udev = to_usb_device (dev);
+
+       switch (udev->speed) {
+       case USB_SPEED_LOW:
+               speed = "1.5";
+               break;
+       case USB_SPEED_UNKNOWN:
+       case USB_SPEED_FULL:
+               speed = "12";
+               break;
+       case USB_SPEED_HIGH:
+               speed = "480";
+               break;
+       default:
+               speed = "unknown";
+       }
+       return sprintf (buf, "%s\n", speed);
+}
+static DEVICE_ATTR(speed, S_IRUGO, show_speed, NULL);
+
+static ssize_t
+show_devnum (struct device *dev, char *buf)
+{
+       struct usb_device *udev;
+
+       udev = to_usb_device (dev);
+       return sprintf (buf, "%d\n", udev->devnum);
+}
+static DEVICE_ATTR(devnum, S_IRUGO, show_devnum, NULL);
+
+static ssize_t
+show_version (struct device *dev, char *buf)
+{
+       struct usb_device *udev;
+
+       udev = to_usb_device (dev);
+       return sprintf (buf, "%2x.%02x\n", udev->descriptor.bcdUSB >> 8, 
+                       udev->descriptor.bcdUSB & 0xff);
+}
+static DEVICE_ATTR(version, S_IRUGO, show_version, NULL);
+
+static ssize_t
+show_maxchild (struct device *dev, char *buf)
+{
+       struct usb_device *udev;
+
+       udev = to_usb_device (dev);
+       return sprintf (buf, "%d\n", udev->maxchild);
+}
+static DEVICE_ATTR(maxchild, S_IRUGO, show_maxchild, NULL);
+
+/* Descriptor fields */
+#define usb_descriptor_attr(field, format_string)                      \
+static ssize_t                                                         \
+show_##field (struct device *dev, char *buf)                           \
+{                                                                      \
+       struct usb_device *udev;                                        \
+                                                                       \
+       udev = to_usb_device (dev);                                     \
+       return sprintf (buf, format_string, udev->descriptor.field);    \
+}                                                                      \
+static DEVICE_ATTR(field, S_IRUGO, show_##field, NULL);
+
+usb_descriptor_attr (idVendor, "%04x\n")
+usb_descriptor_attr (idProduct, "%04x\n")
+usb_descriptor_attr (bcdDevice, "%04x\n")
+usb_descriptor_attr (bDeviceClass, "%02x\n")
+usb_descriptor_attr (bDeviceSubClass, "%02x\n")
+usb_descriptor_attr (bDeviceProtocol, "%02x\n")
+usb_descriptor_attr (bNumConfigurations, "%d\n")
+
+
+void usb_create_driverfs_dev_files (struct usb_device *udev)
+{
+       struct device *dev = &udev->dev;
+
+       /* current configuration's attributes */
+       device_create_file (dev, &dev_attr_bNumInterfaces);
+       device_create_file (dev, &dev_attr_bConfigurationValue);
+       device_create_file (dev, &dev_attr_bmAttributes);
+       device_create_file (dev, &dev_attr_bMaxPower);
+
+       /* device attributes */
+       device_create_file (dev, &dev_attr_idVendor);
+       device_create_file (dev, &dev_attr_idProduct);
+       device_create_file (dev, &dev_attr_bcdDevice);
+       device_create_file (dev, &dev_attr_bDeviceClass);
+       device_create_file (dev, &dev_attr_bDeviceSubClass);
+       device_create_file (dev, &dev_attr_bDeviceProtocol);
+       device_create_file (dev, &dev_attr_bNumConfigurations);
+
+       /* speed varies depending on how you connect the device */
+       device_create_file (dev, &dev_attr_speed);
+       // FIXME iff there are other speed configs, show how many
+
+       if (udev->descriptor.iManufacturer)
+               device_create_file (dev, &dev_attr_manufacturer);
+       if (udev->descriptor.iProduct)
+               device_create_file (dev, &dev_attr_product);
+       if (udev->descriptor.iSerialNumber)
+               device_create_file (dev, &dev_attr_serial);
+
+       device_create_file (dev, &dev_attr_devnum);
+       device_create_file (dev, &dev_attr_version);
+       device_create_file (dev, &dev_attr_maxchild);
+}
+
+/* Interface fields */
+#define usb_intf_attr(field, format_string)                            \
+static ssize_t                                                         \
+show_##field (struct device *dev, char *buf)                           \
+{                                                                      \
+       struct usb_interface *intf = to_usb_interface (dev);            \
+                                                                       \
+       return sprintf (buf, format_string, intf->cur_altsetting->desc.field); \
+}                                                                      \
+static DEVICE_ATTR(field, S_IRUGO, show_##field, NULL);
+
+usb_intf_attr (bInterfaceNumber, "%02x\n")
+usb_intf_attr (bAlternateSetting, "%2d\n")
+usb_intf_attr (bNumEndpoints, "%02x\n")
+usb_intf_attr (bInterfaceClass, "%02x\n")
+usb_intf_attr (bInterfaceSubClass, "%02x\n")
+usb_intf_attr (bInterfaceProtocol, "%02x\n")
+usb_intf_attr (iInterface, "%02x\n")
+
+void usb_create_driverfs_intf_files (struct usb_interface *intf)
+{
+       device_create_file (&intf->dev, &dev_attr_bInterfaceNumber);
+       device_create_file (&intf->dev, &dev_attr_bAlternateSetting);
+       device_create_file (&intf->dev, &dev_attr_bNumEndpoints);
+       device_create_file (&intf->dev, &dev_attr_bInterfaceClass);
+       device_create_file (&intf->dev, &dev_attr_bInterfaceSubClass);
+       device_create_file (&intf->dev, &dev_attr_bInterfaceProtocol);
+       device_create_file (&intf->dev, &dev_attr_iInterface);
+}
diff --git a/fs/intermezzo/Makefile b/fs/intermezzo/Makefile
new file mode 100644 (file)
index 0000000..260c7af
--- /dev/null
@@ -0,0 +1,11 @@
+#
+# Makefile 1.00 Peter Braam <braam@clusterfs.com>
+#
+
+obj-$(CONFIG_INTERMEZZO_FS) += intermezzo.o
+
+intermezzo-objs := cache.o dcache.o dir.o ext_attr.o file.o fileset.o \
+                  inode.o journal.o journal_ext2.o journal_ext3.o \
+                  journal_obdfs.o journal_reiserfs.o journal_tmpfs.o journal_xfs.o \
+                  kml_reint.o kml_unpack.o methods.o presto.o psdev.o replicator.o \
+                  super.o sysctl.o upcall.o vfs.o
diff --git a/fs/intermezzo/cache.c b/fs/intermezzo/cache.c
new file mode 100644 (file)
index 0000000..f97bc16
--- /dev/null
@@ -0,0 +1,207 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2000 Stelias Computing, Inc.
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+/*
+   This file contains the routines associated with managing a
+   cache of files for InterMezzo.  These caches have two reqs:
+   - need to be found fast so they are hashed by the device, 
+     with an attempt to have collision chains of length 1.
+   The methods for the cache are set up in methods.
+*/
+
+extern kmem_cache_t * presto_dentry_slab;
+
+/* the intent of this hash is to have collision chains of length 1 */
+#define CACHES_BITS 8
+#define CACHES_SIZE (1 << CACHES_BITS)
+#define CACHES_MASK CACHES_SIZE - 1
+static struct list_head presto_caches[CACHES_SIZE];
+
+static inline int presto_cache_hash(struct super_block *s)
+{
+        return (CACHES_MASK) & ((unsigned long)s >> L1_CACHE_SHIFT);
+}
+
+inline void presto_cache_add(struct presto_cache *cache)
+{
+        list_add(&cache->cache_chain,
+                 &presto_caches[presto_cache_hash(cache->cache_sb)]);
+}
+
+inline void presto_cache_init_hash(void)
+{
+        int i;
+        for ( i = 0; i < CACHES_SIZE; i++ ) {
+                INIT_LIST_HEAD(&presto_caches[i]);
+        }
+}
+
+int izo_ioctl_packlen(struct izo_ioctl_data *data)
+{
+        int len = sizeof(struct izo_ioctl_data);
+        len += size_round(data->ioc_inllen1);
+        len += size_round(data->ioc_inllen2);
+        return len;
+}
+
+/* map a device to a cache */
+struct presto_cache *presto_cache_find(struct super_block *s)
+{
+        struct presto_cache *cache;
+        struct list_head *lh, *tmp;
+
+        lh = tmp = &(presto_caches[presto_cache_hash(s)]);
+        while ( (tmp = lh->next) != lh ) {
+                cache = list_entry(tmp, struct presto_cache, cache_chain);
+                if (cache->cache_sb == s)
+                        return cache;
+        }
+        return NULL;
+}
+
+
+/* map an inode to a cache */
+struct presto_cache *presto_get_cache(struct inode *inode)
+{
+        struct presto_cache *cache;
+        ENTRY;
+        /* find the correct presto_cache here, based on the device */
+        cache = presto_cache_find(inode->i_sb);
+        if ( !cache ) {
+                CERROR("WARNING: no presto cache for %s, ino %ld\n",
+                       inode->i_sb->s_id, inode->i_ino);
+                EXIT;
+                return NULL;
+        }
+        EXIT;
+        return cache;
+}
+
+/* another debugging routine: check fs is InterMezzo fs */
+int presto_ispresto(struct inode *inode)
+{
+        struct presto_cache *cache;
+
+        if ( !inode )
+                return 0;
+        cache = presto_get_cache(inode);
+        if ( !cache )
+                return 0;
+        return inode->i_sb == cache->cache_sb;
+}
+
+/* setup a cache structure when we need one */
+struct presto_cache *presto_cache_init(void)
+{
+        struct presto_cache *cache;
+
+        PRESTO_ALLOC(cache, sizeof(struct presto_cache));
+        if ( cache ) {
+                memset(cache, 0, sizeof(struct presto_cache));
+                INIT_LIST_HEAD(&cache->cache_chain);
+                INIT_LIST_HEAD(&cache->cache_fset_list);
+                cache->cache_lock = SPIN_LOCK_UNLOCKED;
+                cache->cache_reserved = 0; 
+        }
+        return cache;
+}
+
+/* free a cache structure and all of the memory it is pointing to */
+inline void presto_free_cache(struct presto_cache *cache)
+{
+        if (!cache)
+                return;
+
+        list_del(&cache->cache_chain);
+        if (cache->cache_sb && cache->cache_sb->s_root &&
+                        presto_d2d(cache->cache_sb->s_root)) {
+                kmem_cache_free(presto_dentry_slab, 
+                                presto_d2d(cache->cache_sb->s_root));
+                cache->cache_sb->s_root->d_fsdata = NULL;
+        }
+
+        PRESTO_FREE(cache, sizeof(struct presto_cache));
+}
+
+int presto_reserve_space(struct presto_cache *cache, loff_t req)
+{
+        struct filter_fs *filter; 
+        loff_t avail; 
+        struct super_block *sb = cache->cache_sb;
+        filter = cache->cache_filter;
+        if (!filter ) {
+                EXIT;
+                return 0; 
+        }
+        if (!filter->o_trops ) {
+                EXIT;
+                return 0; 
+        }
+        if (!filter->o_trops->tr_avail ) {
+                EXIT;
+                return 0; 
+        }
+
+        spin_lock(&cache->cache_lock);
+        avail = filter->o_trops->tr_avail(cache, sb); 
+        CDEBUG(D_SUPER, "ESC::%ld +++> %ld \n", (long) cache->cache_reserved,
+                 (long) (cache->cache_reserved + req)); 
+        CDEBUG(D_SUPER, "ESC::Avail::%ld \n", (long) avail);
+        if (req + cache->cache_reserved > avail) {
+                spin_unlock(&cache->cache_lock);
+                EXIT;
+                return -ENOSPC;
+        }
+        cache->cache_reserved += req; 
+        spin_unlock(&cache->cache_lock);
+
+        EXIT;
+        return 0;
+}
+
+void presto_release_space(struct presto_cache *cache, loff_t req)
+{
+        CDEBUG(D_SUPER, "ESC::%ld ---> %ld \n", (long) cache->cache_reserved,
+                 (long) (cache->cache_reserved - req)); 
+        spin_lock(&cache->cache_lock);
+        cache->cache_reserved -= req; 
+        spin_unlock(&cache->cache_lock);
+}
diff --git a/fs/intermezzo/dcache.c b/fs/intermezzo/dcache.c
new file mode 100644 (file)
index 0000000..8f8e2c5
--- /dev/null
@@ -0,0 +1,342 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Original version: Copyright (C) 1996 P. Braam and M. Callahan
+ *  Rewritten for Linux 2.1. Copyright (C) 1997 Carnegie Mellon University
+ *  d_fsdata and NFS compatiblity fixes Copyright (C) 2001 Tacit Networks, Inc.
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Directory operations for InterMezzo filesystem
+ */
+
+/* inode dentry alias list walking code adapted from linux/fs/dcache.c
+ *
+ * fs/dcache.c
+ *
+ * (C) 1997 Thomas Schoebel-Theuer,
+ * with heavy changes by Linus Torvalds
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+
+#include "intermezzo_fs.h"
+
+kmem_cache_t * presto_dentry_slab;
+
+/* called when a cache lookup succeeds */
+static int presto_d_revalidate(struct dentry *de, struct nameidata *nd)
+{
+        struct inode *inode = de->d_inode;
+        struct presto_file_set * root_fset;
+
+        ENTRY;
+        if (!inode) {
+                EXIT;
+                return 0;
+        }
+
+        if (is_bad_inode(inode)) {
+                EXIT;
+                return 0;
+        }
+
+        if (!presto_d2d(de)) {
+                presto_set_dd(de);
+        }
+
+        if (!presto_d2d(de)) {
+                EXIT;
+                return 0;
+        }
+
+        root_fset = presto_d2d(de->d_inode->i_sb->s_root)->dd_fset;
+        if (root_fset->fset_flags & FSET_FLAT_BRANCH && 
+            (presto_d2d(de)->dd_fset != root_fset )) {
+                presto_d2d(de)->dd_fset = root_fset;
+        }
+
+        EXIT;
+        return 1;
+
+#if 0
+        /* The following is needed for metadata on demand. */
+        if ( S_ISDIR(inode->i_mode) ) {
+                EXIT;
+                return (presto_chk(de, PRESTO_DATA) &&
+                        (presto_chk(de, PRESTO_ATTR)));
+        } else {
+                EXIT;
+                return presto_chk(de, PRESTO_ATTR);
+        }
+#endif
+}
+
+static void presto_d_release(struct dentry *dentry)
+{
+        if (!presto_d2d(dentry)) {
+                /* This can happen for dentries from NFSd */
+                return;
+        }
+        presto_d2d(dentry)->dd_count--;
+
+        if (!presto_d2d(dentry)->dd_count) {
+                kmem_cache_free(presto_dentry_slab, presto_d2d(dentry));
+                dentry->d_fsdata = NULL;
+        }
+}
+
+struct dentry_operations presto_dentry_ops = 
+{
+        .d_revalidate =  presto_d_revalidate,
+        .d_release = presto_d_release
+};
+
+static inline int presto_is_dentry_ROOT (struct dentry *dentry)
+{
+        return(dentry_name_cmp(dentry,"ROOT") &&
+               !dentry_name_cmp(dentry->d_parent,".intermezzo"));
+}
+
+static struct presto_file_set* presto_try_find_fset(struct dentry* dentry,
+                int *is_under_d_intermezzo)
+{
+        struct dentry* temp_dentry;
+        struct presto_dentry_data *d_data;
+        int found_root=0;
+
+        ENTRY;
+        CDEBUG(D_FSDATA, "finding fileset for %p:%s\n", dentry, 
+                        dentry->d_name.name);
+
+        *is_under_d_intermezzo = 0;
+
+        /* walk up through the branch to get the fileset */
+        /* The dentry we are passed presumably does not have the correct
+         * fset information. However, we still want to start walking up
+         * the branch from this dentry to get our found_root and 
+         * is_under_d_intermezzo decisions correct
+         */
+        for (temp_dentry = dentry ; ; temp_dentry = temp_dentry->d_parent) {
+                CDEBUG(D_FSDATA, "--->dentry %p:%*s\n", temp_dentry, 
+                        temp_dentry->d_name.len,temp_dentry->d_name.name);
+                if (presto_is_dentry_ROOT(temp_dentry))
+                        found_root = 1;
+                if (!found_root &&
+                    dentry_name_cmp(temp_dentry, ".intermezzo")) {
+                        *is_under_d_intermezzo = 1;
+                }
+                d_data = presto_d2d(temp_dentry);
+                if (d_data) {
+                        /* If we found a "ROOT" dentry while walking up the
+                         * branch, we will journal regardless of whether
+                         * we are under .intermezzo or not.
+                         * If we are already under d_intermezzo don't reverse
+                         * the decision here...even if we found a "ROOT"
+                         * dentry above .intermezzo (if we were ever to
+                         * modify the directory structure).
+                         */
+                        if (!*is_under_d_intermezzo)  
+                                *is_under_d_intermezzo = !found_root &&
+                                  (d_data->dd_flags & PRESTO_DONT_JOURNAL);
+                        EXIT;
+                        return d_data->dd_fset;
+                }
+                if (temp_dentry->d_parent == temp_dentry) {
+                        break;
+                }
+        }
+        EXIT;
+        return NULL;
+}
+
+/* Only call this function on positive dentries */
+static struct presto_dentry_data* presto_try_find_alias_with_dd (
+                  struct dentry* dentry)
+{
+        struct inode *inode=dentry->d_inode;
+        struct list_head *head, *next, *tmp;
+        struct dentry *tmp_dentry;
+
+        /* Search through the alias list for dentries with d_fsdata */
+        spin_lock(&dcache_lock);
+        head = &inode->i_dentry;
+        next = inode->i_dentry.next;
+        while (next != head) {
+                tmp = next;
+                next = tmp->next;
+                tmp_dentry = list_entry(tmp, struct dentry, d_alias);
+                if (!presto_d2d(tmp_dentry)) {
+                        spin_unlock(&dcache_lock);
+                        return presto_d2d(tmp_dentry);
+                }
+        }
+        spin_unlock(&dcache_lock);
+        return NULL;
+}
+
+/* Only call this function on positive dentries */
+static void presto_set_alias_dd (struct dentry *dentry, 
+                struct presto_dentry_data* dd)
+{
+        struct inode *inode=dentry->d_inode;
+        struct list_head *head, *next, *tmp;
+        struct dentry *tmp_dentry;
+
+        /* Set d_fsdata for this dentry */
+        dd->dd_count++;
+        dentry->d_fsdata = dd;
+
+        /* Now set d_fsdata for all dentries in the alias list. */
+        spin_lock(&dcache_lock);
+        head = &inode->i_dentry;
+        next = inode->i_dentry.next;
+        while (next != head) {
+                tmp = next;
+                next = tmp->next;
+                tmp_dentry = list_entry(tmp, struct dentry, d_alias);
+                if (!presto_d2d(tmp_dentry)) {
+                        dd->dd_count++;
+                        tmp_dentry->d_fsdata = dd;
+                }
+        }
+        spin_unlock(&dcache_lock);
+        return;
+}
+
+inline struct presto_dentry_data *izo_alloc_ddata(void)
+{
+        struct presto_dentry_data *dd;
+
+        dd = kmem_cache_alloc(presto_dentry_slab, SLAB_KERNEL);
+        if (dd == NULL) {
+                CERROR("IZO: out of memory trying to allocate presto_dentry_data\n");
+                return NULL;
+        }
+        memset(dd, 0, sizeof(*dd));
+        dd->dd_count = 1;
+
+        return dd;
+}
+
+/* This uses the BKL! */
+int presto_set_dd(struct dentry * dentry)
+{
+        struct presto_file_set *fset;
+        struct presto_dentry_data *dd;
+        int is_under_d_izo;
+        int error=0;
+
+        ENTRY;
+
+        if (!dentry)
+                BUG();
+
+        lock_kernel();
+
+        /* Did we lose a race? */
+        if (dentry->d_fsdata) {
+                CERROR("dentry %p already has d_fsdata set\n", dentry);
+                if (dentry->d_inode)
+                        CERROR("    inode: %ld\n", dentry->d_inode->i_ino);
+                EXIT;
+                goto out_unlock;
+        }
+
+        if (dentry->d_inode != NULL) {
+                /* NFSd runs find_fh_dentry which instantiates disconnected
+                 * dentries which are then connected without a lookup(). 
+                 * So it is possible to have connected dentries that do not 
+                 * have d_fsdata set. So we walk the list trying to find 
+                 * an alias which has its d_fsdata set and then use that 
+                 * for all the other dentries  as well. 
+                 * - SHP,Vinny. 
+                 */
+
+                /* If there is an alias with d_fsdata use it. */
+                if ((dd = presto_try_find_alias_with_dd (dentry))) {
+                        presto_set_alias_dd (dentry, dd);
+                        EXIT;
+                        goto out_unlock;
+                }
+        } else {
+                /* Negative dentry */
+                CDEBUG(D_FSDATA,"negative dentry %p: %*s\n", dentry, 
+                                dentry->d_name.len, dentry->d_name.name);
+        }
+
+        /* No pre-existing d_fsdata, we need to construct one.
+         * First, we must walk up the tree to find the fileset 
+         * If a fileset can't be found, we leave a null fsdata
+         * and return EROFS to indicate that we can't journal
+         * updates. 
+         */
+        fset = presto_try_find_fset (dentry, &is_under_d_izo);
+        if (!fset) { 
+#ifdef PRESTO_NO_NFS
+                CERROR("No fileset for dentry %p: %*s\n", dentry,
+                                dentry->d_name.len, dentry->d_name.name);
+#endif
+                error = -EROFS;
+                EXIT;
+                goto out_unlock;
+        }
+
+        dentry->d_fsdata = izo_alloc_ddata();
+        if (!presto_d2d(dentry)) {
+                CERROR ("InterMezzo: out of memory allocating d_fsdata\n");
+                error = -ENOMEM;
+                goto out_unlock;
+        }
+        presto_d2d(dentry)->dd_fset = fset;
+        if (is_under_d_izo)
+                presto_d2d(dentry)->dd_flags |= PRESTO_DONT_JOURNAL;
+        EXIT;
+
+out_unlock:    
+        CDEBUG(D_FSDATA,"presto_set_dd dentry %p: %*s, d_fsdata %p\n", 
+                        dentry, dentry->d_name.len, dentry->d_name.name, 
+                        dentry->d_fsdata);
+        unlock_kernel();
+        return error; 
+}
+
+int presto_init_ddata_cache(void)
+{
+        ENTRY;
+        presto_dentry_slab =
+                kmem_cache_create("presto_cache",
+                                  sizeof(struct presto_dentry_data), 0,
+                                  SLAB_HWCACHE_ALIGN, NULL,
+                                  NULL);
+        EXIT;
+        return (presto_dentry_slab != NULL);
+}
+
+void presto_cleanup_ddata_cache(void)
+{
+        kmem_cache_destroy(presto_dentry_slab);
+}
diff --git a/fs/intermezzo/dir.c b/fs/intermezzo/dir.c
new file mode 100644 (file)
index 0000000..3ec2e69
--- /dev/null
@@ -0,0 +1,1333 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2000 Stelias Computing, Inc.
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *  Copyright (C) 2000 Tacitus Systems
+ *  Copyright (C) 2000 Peter J. Braam
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/bitops.h>
+#include <asm/termios.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/ext2_fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+static inline void presto_relock_sem(struct inode *dir) 
+{
+        /* the lock from sys_mkdir / lookup_create */
+        down(&dir->i_sem);
+        /* the rest is done by the do_{create,mkdir, ...} */
+}
+
+static inline void presto_relock_other(struct inode *dir) 
+{
+        /* vfs_mkdir locks */
+        //        down(&dir->i_zombie);
+        //lock_kernel(); 
+}
+
+static inline void presto_fulllock(struct inode *dir) 
+{
+        /* the lock from sys_mkdir / lookup_create */
+        down(&dir->i_sem);
+        /* vfs_mkdir locks */
+        //        down(&dir->i_zombie);
+        //lock_kernel(); 
+}
+
+static inline void presto_unlock(struct inode *dir) 
+{
+        /* vfs_mkdir locks */
+        //unlock_kernel(); 
+        //        up(&dir->i_zombie);
+        /* the lock from sys_mkdir / lookup_create */
+        up(&dir->i_sem);
+}
+
+
+/*
+ * these are initialized in super.c
+ */
+extern int presto_permission(struct inode *inode, int mask, struct nameidata *nd);
+static int izo_authorized_uid;
+
+int izo_dentry_is_ilookup(struct dentry *dentry, ino_t *id,
+                          unsigned int *generation)
+{
+        char tmpname[64];
+        char *next;
+
+        ENTRY;
+        /* prefix is 7 characters: '...ino:' */
+        if ( dentry->d_name.len < 7 || dentry->d_name.len > 64 ||
+             memcmp(dentry->d_name.name, PRESTO_ILOOKUP_MAGIC, 7) != 0 ) {
+                EXIT;
+                return 0;
+        }
+
+        memcpy(tmpname, dentry->d_name.name + 7, dentry->d_name.len - 7);
+        *(tmpname + dentry->d_name.len - 7) = '\0';
+
+        /* name is of the form ...ino:<inode number>:<generation> */
+        *id = simple_strtoul(tmpname, &next, 16);
+        if ( *next == PRESTO_ILOOKUP_SEP ) {
+                *generation = simple_strtoul(next + 1, 0, 16);
+                CDEBUG(D_INODE, "ino string: %s, Id = %lx (%lu), "
+                       "generation %x (%d)\n",
+                       tmpname, *id, *id, *generation, *generation);
+                EXIT;
+                return 1;
+        } else {
+                EXIT;
+                return 0;
+        }
+}
+
+struct dentry *presto_tmpfs_ilookup(struct inode *dir, 
+                                    struct dentry *dentry,
+                                    ino_t ino, 
+                                    unsigned int generation)
+{
+        return dentry; 
+}
+
+
+inline int presto_can_ilookup(void)
+{
+        return (current->euid == izo_authorized_uid ||
+                capable(CAP_DAC_READ_SEARCH));
+}
+
+struct dentry *presto_iget_ilookup(struct inode *dir, 
+                                          struct dentry *dentry,
+                                          ino_t ino, 
+                                          unsigned int generation)
+{
+        struct inode *inode;
+        int error;
+
+        ENTRY;
+
+        if ( !presto_can_ilookup() ) {
+                CERROR("ilookup denied: euid %u, authorized_uid %u\n",
+                       current->euid, izo_authorized_uid);
+                return ERR_PTR(-EPERM);
+        }
+        error = -ENOENT;
+        inode = iget(dir->i_sb, ino);
+        if (!inode) { 
+                CERROR("fatal: NULL inode ino %lu\n", ino); 
+                goto cleanup_iput;
+        }
+        if (is_bad_inode(inode) || inode->i_nlink == 0) {
+                CERROR("fatal: bad inode ino %lu, links %d\n", ino, inode->i_nlink); 
+                goto cleanup_iput;
+        }
+        if (inode->i_generation != generation) {
+                CERROR("fatal: bad generation %u (want %u)\n",
+                       inode->i_generation, generation);
+                goto cleanup_iput;
+        }
+
+        d_instantiate(dentry, inode);
+        dentry->d_flags |= DCACHE_DISCONNECTED; /* NFS hack */
+
+        EXIT;
+        return NULL;
+
+cleanup_iput:
+        if (inode)
+                iput(inode);
+        return ERR_PTR(error);
+}
+
+struct dentry *presto_add_ilookup_dentry(struct dentry *parent,
+                                         struct dentry *real)
+{
+        struct inode *inode = real->d_inode;
+        struct dentry *de;
+        char buf[32];
+        char *ptr = buf;
+        struct dentry *inodir;
+        struct presto_dentry_data *dd;
+
+        inodir = lookup_one_len("..iopen..", parent,  strlen("..iopen..")); 
+        if (!inodir || IS_ERR(inodir) || !inodir->d_inode ) { 
+                CERROR("%s: bad ..iopen.. lookup\n", __FUNCTION__); 
+                return NULL; 
+        }
+        inodir->d_inode->i_op = &presto_dir_iops;
+
+        snprintf(ptr, 32, "...ino:%lx:%x", inode->i_ino, inode->i_generation);
+
+        de = lookup_one_len(ptr, inodir,  strlen(ptr)); 
+        if (!de || IS_ERR(de)) {
+                CERROR("%s: bad ...ino lookup %ld\n", 
+                       __FUNCTION__, PTR_ERR(de)); 
+                dput(inodir);
+                return NULL; 
+        }
+
+        dd = presto_d2d(real);
+        if (!dd) 
+                BUG();
+
+        /* already exists */
+        if (de->d_inode)
+                BUG();
+#if 0 
+                if (de->d_inode != inode ) { 
+                        CERROR("XX de->d_inode %ld, inode %ld\n", 
+                               de->d_inode->i_ino, inode->i_ino); 
+                        BUG();
+                }
+                if (dd->dd_inodentry) { 
+                        CERROR("inodentry exists %ld \n", inode->i_ino);
+                        BUG();
+                }
+                dput(inodir);
+                return de;
+        }
+#endif 
+
+        if (presto_d2d(de)) 
+                BUG();
+
+        atomic_inc(&inode->i_count);
+        de->d_op = &presto_dentry_ops;
+        d_add(de, inode);
+        if (!de->d_op)
+                CERROR("DD: no ops dentry %p, dd %p\n", de, dd);
+        dd->dd_inodentry = de;
+        dd->dd_count++;
+        de->d_fsdata = dd;
+
+        dput(inodir);
+        return de;
+}
+
+struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+{
+        int rc = 0;
+        struct dentry *de;
+        struct presto_cache *cache;
+        int minor;
+        ino_t ino;
+        unsigned int generation;
+        struct inode_operations *iops;
+        int is_ilookup = 0;
+
+        ENTRY;
+        cache = presto_get_cache(dir);
+        if (cache == NULL) {
+                CERROR("InterMezzo BUG: no cache in presto_lookup "
+                       "(dir ino: %ld)!\n", dir->i_ino);
+                EXIT;
+                return NULL;
+        }
+        minor = presto_c2m(cache);
+
+        iops = filter_c2cdiops(cache->cache_filter);
+        if (!iops || !iops->lookup) {
+                CERROR("InterMezzo BUG: filesystem has no lookup\n");
+                EXIT;
+                return NULL;
+        }
+
+
+        CDEBUG(D_CACHE, "dentry %p, dir ino: %ld, name: %*s, islento: %d\n",
+               dentry, dir->i_ino, dentry->d_name.len, dentry->d_name.name,
+               ISLENTO(minor));
+
+        if (dentry->d_fsdata)
+                CERROR("DD -- BAD dentry %p has data\n", dentry);
+                       
+        dentry->d_fsdata = NULL;
+#if 0
+        if (ext2_check_for_iopen(dir, dentry))
+                de = NULL;
+        else {
+#endif
+                if ( izo_dentry_is_ilookup(dentry, &ino, &generation) ) { 
+                        de = cache->cache_filter->o_trops->tr_ilookup
+                                (dir, dentry, ino, generation);
+                        is_ilookup = 1;
+                } else
+                        de = iops->lookup(dir, dentry, nd);
+#if 0
+        }
+#endif
+
+        if ( IS_ERR(de) ) {
+                CERROR("dentry lookup error %ld\n", PTR_ERR(de));
+                return de;
+        }
+
+        /* some file systems have no read_inode: set methods here */
+        if (dentry->d_inode)
+                presto_set_ops(dentry->d_inode, cache->cache_filter);
+
+        filter_setup_dentry_ops(cache->cache_filter,
+                                dentry->d_op, &presto_dentry_ops);
+        dentry->d_op = filter_c2udops(cache->cache_filter);
+
+        /* In lookup we will tolerate EROFS return codes from presto_set_dd
+         * to placate NFS. EROFS indicates that a fileset was not found but
+         * we should still be able to continue through a lookup.
+         * Anything else is a hard error and must be returned to VFS. */
+        if (!is_ilookup)
+                rc = presto_set_dd(dentry);
+        if (rc && rc != -EROFS) {
+                CERROR("presto_set_dd failed (dir %ld, name %*s): %d\n",
+                       dir->i_ino, dentry->d_name.len, dentry->d_name.name, rc);
+                return ERR_PTR(rc);
+        }
+
+        EXIT;
+        return NULL;
+}
+
+static inline int presto_check_set_fsdata (struct dentry *de)
+{
+        if (presto_d2d(de) == NULL) {
+#ifdef PRESTO_NO_NFS
+                CERROR("dentry without fsdata: %p: %*s\n", de, 
+                                de->d_name.len, de->d_name.name);
+                BUG();
+#endif
+                return presto_set_dd (de);
+        }
+
+        return 0;
+}
+
+int presto_setattr(struct dentry *de, struct iattr *iattr)
+{
+        int error;
+        struct presto_cache *cache;
+        struct presto_file_set *fset;
+        struct lento_vfs_context info = { 0, {0}, 0 };
+
+        ENTRY;
+
+        error = presto_prep(de, &cache, &fset);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        if (!iattr->ia_valid)
+                CDEBUG(D_INODE, "presto_setattr: iattr is not valid\n");
+
+        CDEBUG(D_INODE, "valid %#x, mode %#o, uid %u, gid %u, size %Lu, "
+               "atime %lu mtime %lu ctime %lu flags %d\n",
+               iattr->ia_valid, iattr->ia_mode, iattr->ia_uid, iattr->ia_gid,
+               iattr->ia_size, iattr->ia_atime.tv_sec, iattr->ia_mtime.tv_sec,
+               iattr->ia_ctime.tv_sec, iattr->ia_attr_flags);
+        
+        if ( presto_get_permit(de->d_inode) < 0 ) {
+                EXIT;
+                return -EROFS;
+        }
+
+        if (!ISLENTO(presto_c2m(cache)))
+                info.flags = LENTO_FL_KML;
+        info.flags |= LENTO_FL_IGNORE_TIME;
+        error = presto_do_setattr(fset, de, iattr, &info);
+        presto_put_permit(de->d_inode);
+        return error;
+}
+
+/*
+ *  Now the meat: the fs operations that require journaling
+ *
+ *
+ *  XXX: some of these need modifications for hierarchical filesets
+ */
+
+int presto_prep(struct dentry *dentry, struct presto_cache **cache,
+                struct presto_file_set **fset)
+{       
+        int rc;
+
+        /* NFS might pass us dentries which have not gone through lookup.
+         * Test and set d_fsdata for such dentries
+         */
+        rc = presto_check_set_fsdata (dentry);
+        if (rc) return rc;
+
+        *fset = presto_fset(dentry);
+        if ( *fset == NULL ) {
+                CERROR("No file set for dentry at %p: %*s\n", dentry,
+                                dentry->d_name.len, dentry->d_name.name);
+                return -EROFS;
+        }
+
+        *cache = (*fset)->fset_cache;
+        if ( *cache == NULL ) {
+                CERROR("PRESTO: BAD, BAD: cannot find cache\n");
+                return -EBADF;
+        }
+
+        CDEBUG(D_PIOCTL, "---> cache flags %x, fset flags %x\n",
+              (*cache)->cache_flags, (*fset)->fset_flags);
+        if( presto_is_read_only(*fset) ) {
+                CERROR("PRESTO: cannot modify read-only fileset, minor %d.\n",
+                       presto_c2m(*cache));
+                return -EROFS;
+        }
+        return 0;
+}
+
+static int presto_create(struct inode * dir, struct dentry * dentry, int mode,
+                struct nameidata *nd)
+{
+        int error;
+        struct presto_cache *cache;
+        struct dentry *parent = dentry->d_parent;
+        struct lento_vfs_context info;
+        struct presto_file_set *fset;
+
+        ENTRY;
+        error = presto_check_set_fsdata(dentry);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        error = presto_prep(dentry->d_parent, &cache, &fset);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+        presto_unlock(dir);
+
+        /* Does blocking and non-blocking behavious need to be 
+           checked for.  Without blocking (return 1), the permit
+           was acquired without reintegration
+        */
+        if ( presto_get_permit(dir) < 0 ) {
+                EXIT;
+                presto_fulllock(dir);
+                return -EROFS;
+        }
+
+        presto_relock_sem(dir);
+        parent = dentry->d_parent; 
+        memset(&info, 0, sizeof(info));
+        if (!ISLENTO(presto_c2m(cache)))
+                info.flags = LENTO_FL_KML;
+        info.flags |= LENTO_FL_IGNORE_TIME;
+        error = presto_do_create(fset, parent, dentry, mode, &info);
+
+        presto_relock_other(dir);
+        presto_put_permit(dir);
+        EXIT;
+        return error;
+}
+
+static int presto_link(struct dentry *old_dentry, struct inode *dir,
+                struct dentry *new_dentry)
+{
+        int error;
+        struct presto_cache *cache, *new_cache;
+        struct presto_file_set *fset, *new_fset;
+        struct dentry *parent = new_dentry->d_parent;
+        struct lento_vfs_context info;
+
+        ENTRY;
+        error = presto_prep(old_dentry, &cache, &fset);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        error = presto_check_set_fsdata(new_dentry);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        error = presto_prep(new_dentry->d_parent, &new_cache, &new_fset);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        if (fset != new_fset) { 
+                EXIT;
+                return -EXDEV;
+        }
+
+        presto_unlock(dir);
+        if ( presto_get_permit(old_dentry->d_inode) < 0 ) {
+                EXIT;
+                presto_fulllock(dir);
+                return -EROFS;
+        }
+
+        if ( presto_get_permit(dir) < 0 ) {
+                EXIT;
+                presto_fulllock(dir);
+                return -EROFS;
+        }
+
+        presto_relock_sem(dir);
+        parent = new_dentry->d_parent;
+
+        memset(&info, 0, sizeof(info));
+        if (!ISLENTO(presto_c2m(cache)))
+                info.flags = LENTO_FL_KML;
+        info.flags |= LENTO_FL_IGNORE_TIME;
+        error = presto_do_link(fset, old_dentry, parent,
+                               new_dentry, &info);
+
+#if 0
+        /* XXX for links this is not right */
+        if (cache->cache_filter->o_trops->tr_add_ilookup ) { 
+                struct dentry *d;
+                d = cache->cache_filter->o_trops->tr_add_ilookup
+                        (dir->i_sb->s_root, new_dentry, 1); 
+        }
+#endif 
+
+        presto_relock_other(dir);
+        presto_put_permit(dir);
+        presto_put_permit(old_dentry->d_inode);
+        return error;
+}
+
+static int presto_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+{
+        int error;
+        struct presto_file_set *fset;
+        struct presto_cache *cache;
+        struct dentry *parent = dentry->d_parent;
+        struct lento_vfs_context info;
+
+        ENTRY;
+
+        error = presto_check_set_fsdata(dentry);
+        if ( error  ) {
+                EXIT;
+                return error;
+        }
+
+        error = presto_prep(dentry->d_parent, &cache, &fset);
+        if ( error  ) {
+                EXIT;
+                return error;
+        }
+
+        presto_unlock(dir); 
+
+        if ( presto_get_permit(dir) < 0 ) {
+                EXIT;
+                presto_fulllock(dir);
+                return -EROFS;
+        }
+
+        memset(&info, 0, sizeof(info));
+        if (!ISLENTO(presto_c2m(cache)))
+                info.flags = LENTO_FL_KML;
+        info.flags |= LENTO_FL_IGNORE_TIME;
+
+        presto_relock_sem(dir); 
+        parent = dentry->d_parent;
+        error = presto_do_mkdir(fset, parent, dentry, mode, &info);
+        presto_relock_other(dir); 
+        presto_put_permit(dir);
+        return error;
+}
+
+
+
+static int presto_symlink(struct inode *dir, struct dentry *dentry,
+                   const char *name)
+{
+        int error;
+        struct presto_cache *cache;
+        struct presto_file_set *fset;
+        struct dentry *parent = dentry->d_parent;
+        struct lento_vfs_context info;
+
+        ENTRY;
+        error = presto_check_set_fsdata(dentry);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        error = presto_prep(dentry->d_parent, &cache, &fset);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        presto_unlock(dir);
+        if ( presto_get_permit(dir) < 0 ) {
+                EXIT;
+                presto_fulllock(dir);
+                return -EROFS;
+        }
+
+        presto_relock_sem(dir);
+        parent = dentry->d_parent;
+        memset(&info, 0, sizeof(info));
+        if (!ISLENTO(presto_c2m(cache)))
+                info.flags = LENTO_FL_KML;
+        info.flags |= LENTO_FL_IGNORE_TIME;
+        error = presto_do_symlink(fset, parent, dentry, name, &info);
+        presto_relock_other(dir);
+        presto_put_permit(dir);
+        return error;
+}
+
+int presto_unlink(struct inode *dir, struct dentry *dentry)
+{
+        int error;
+        struct presto_cache *cache;
+        struct presto_file_set *fset;
+        struct dentry *parent = dentry->d_parent;
+        struct lento_vfs_context info;
+
+        ENTRY;
+        error = presto_check_set_fsdata(dentry);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        error = presto_prep(dentry->d_parent, &cache, &fset);
+        if ( error  ) {
+                EXIT;
+                return error;
+        }
+
+        presto_unlock(dir);
+        if ( presto_get_permit(dir) < 0 ) {
+                EXIT;
+                presto_fulllock(dir);
+                return -EROFS;
+        }
+
+        presto_relock_sem(dir);
+        parent = dentry->d_parent;
+        memset(&info, 0, sizeof(info));
+        if (!ISLENTO(presto_c2m(cache)))
+                info.flags = LENTO_FL_KML;
+        info.flags |= LENTO_FL_IGNORE_TIME;
+
+        error = presto_do_unlink(fset, parent, dentry, &info);
+
+        presto_relock_other(dir);
+        presto_put_permit(dir);
+        return error;
+}
+
+static int presto_rmdir(struct inode *dir, struct dentry *dentry)
+{
+        int error;
+        struct presto_cache *cache;
+        struct presto_file_set *fset;
+        struct dentry *parent = dentry->d_parent;
+        struct lento_vfs_context info;
+
+        ENTRY;
+        CDEBUG(D_FILE, "prepping presto\n");
+        error = presto_check_set_fsdata(dentry);
+
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        error = presto_prep(dentry->d_parent, &cache, &fset);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        CDEBUG(D_FILE, "unlocking\n");
+        /* We need to dget() before the dput in double_unlock, to ensure we
+         * still have dentry references.  double_lock doesn't do dget for us.
+         */
+        if (d_unhashed(dentry))
+                d_rehash(dentry);
+        //        double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
+        up(&dentry->d_inode->i_sem);
+        up(&dir->i_sem);
+
+        CDEBUG(D_FILE, "getting permit\n");
+        if ( presto_get_permit(parent->d_inode) < 0 ) {
+                EXIT;
+                down(&dir->i_sem);
+                down(&dentry->d_inode->i_sem);
+                //                double_down(&dir->i_sem, &dentry->d_inode->i_sem);
+                //                double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
+                
+                lock_kernel();
+                return -EROFS;
+        }
+        CDEBUG(D_FILE, "locking\n");
+
+        down(&dir->i_sem);
+        down(&dentry->d_inode->i_sem);
+        parent = dentry->d_parent;
+        memset(&info, 0, sizeof(info));
+        if (!ISLENTO(presto_c2m(cache)))
+                info.flags = LENTO_FL_KML;
+        info.flags |= LENTO_FL_IGNORE_TIME;
+        error = presto_do_rmdir(fset, parent, dentry, &info);
+        presto_put_permit(parent->d_inode);
+        lock_kernel();
+        EXIT;
+        return error;
+}
+
+static int presto_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_t rdev)
+{
+        int error;
+        struct presto_cache *cache;
+        struct presto_file_set *fset;
+        struct dentry *parent = dentry->d_parent;
+        struct lento_vfs_context info;
+
+       if (!old_valid_dev(rdev))
+               return -EINVAL;
+
+        ENTRY;
+        error = presto_check_set_fsdata(dentry);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        error = presto_prep(dentry->d_parent, &cache, &fset);
+        if ( error  ) {
+                EXIT;
+                return error;
+        }
+
+        presto_unlock(dir);
+        if ( presto_get_permit(dir) < 0 ) {
+                EXIT;
+                presto_fulllock(dir);
+                return -EROFS;
+        }
+        
+        presto_relock_sem(dir);
+        parent = dentry->d_parent;
+        memset(&info, 0, sizeof(info));
+        if (!ISLENTO(presto_c2m(cache)))
+                info.flags = LENTO_FL_KML;
+        info.flags |= LENTO_FL_IGNORE_TIME;
+        error = presto_do_mknod(fset, parent, dentry, mode, rdev, &info);
+        presto_relock_other(dir);
+        presto_put_permit(dir);
+        EXIT;
+        return error;
+}
+
+
+
+// XXX this can be optimized: renamtes across filesets only require 
+//     multiple KML records, but can locally be executed normally. 
+int presto_rename(struct inode *old_dir, struct dentry *old_dentry,
+                  struct inode *new_dir, struct dentry *new_dentry)
+{
+        int error;
+        struct presto_cache *cache, *new_cache;
+        struct presto_file_set *fset, *new_fset;
+        struct lento_vfs_context info;
+        struct dentry *old_parent = old_dentry->d_parent;
+        struct dentry *new_parent = new_dentry->d_parent;
+        int triple;
+
+        ENTRY;
+        error = presto_prep(old_dentry, &cache, &fset);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+        error = presto_prep(new_parent, &new_cache, &new_fset);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        if ( fset != new_fset ) {
+                EXIT;
+                return -EXDEV;
+        }
+
+        /* We need to do dget before the dput in double_unlock, to ensure we
+         * still have dentry references.  double_lock doesn't do dget for us.
+         */
+
+        triple = (S_ISDIR(old_dentry->d_inode->i_mode) && new_dentry->d_inode)?
+                1:0;
+
+        unlock_rename(new_dentry->d_parent, old_dentry->d_parent);
+
+        if ( presto_get_permit(old_dir) < 0 ) {
+                EXIT;
+                return -EROFS;
+        }
+        if ( presto_get_permit(new_dir) < 0 ) {
+                EXIT;
+                return -EROFS;
+        }
+
+        lock_rename(new_dentry->d_parent, old_dentry->d_parent);
+        memset(&info, 0, sizeof(info));
+        if (!ISLENTO(presto_c2m(cache)))
+                info.flags = LENTO_FL_KML;
+        info.flags |= LENTO_FL_IGNORE_TIME;
+        error = do_rename(fset, old_parent, old_dentry, new_parent,
+                          new_dentry, &info);
+
+        presto_put_permit(new_dir);
+        presto_put_permit(old_dir);
+        return error;
+}
+
+/* basically this allows the ilookup processes access to all files for
+ * reading, while not making ilookup totally insecure.  This could all
+ * go away if we could set the CAP_DAC_READ_SEARCH capability for the client.
+ */
+/* If posix acls are available, the underlying cache fs will export the
+ * appropriate permission function. Thus we do not worry here about ACLs
+ * or EAs. -SHP
+ */
+int presto_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+        unsigned short mode = inode->i_mode;
+        struct presto_cache *cache;
+        int rc;
+
+        ENTRY;
+        if ( presto_can_ilookup() && !(mask & S_IWOTH)) {
+                CDEBUG(D_CACHE, "ilookup on %ld OK\n", inode->i_ino);
+                EXIT;
+                return 0;
+        }
+
+        cache = presto_get_cache(inode);
+
+        if ( cache ) {
+                /* we only override the file/dir permission operations */
+                struct inode_operations *fiops = filter_c2cfiops(cache->cache_filter);
+                struct inode_operations *diops = filter_c2cdiops(cache->cache_filter);
+
+                if ( S_ISREG(mode) && fiops && fiops->permission ) {
+                        EXIT;
+                        return fiops->permission(inode, mask, nd);
+                }
+                if ( S_ISDIR(mode) && diops && diops->permission ) {
+                        EXIT;
+                        return diops->permission(inode, mask, nd);
+                }
+        }
+
+        /* The cache filesystem doesn't have its own permission function,
+         * so we call the default one.
+         */
+        rc = vfs_permission(inode, mask);
+
+        EXIT;
+        return rc;
+}
+
+
+int presto_ioctl(struct inode *inode, struct file *file,
+                        unsigned int cmd, unsigned long arg)
+{
+        char buf[1024];
+        struct izo_ioctl_data *data = NULL;
+        struct presto_dentry_data *dd;
+        int rc;
+
+        ENTRY;
+
+        /* Try the filesystem's ioctl first, and return if it succeeded. */
+        dd = presto_d2d(file->f_dentry); 
+        if (dd && dd->dd_fset) { 
+                int (*cache_ioctl)(struct inode *, struct file *, unsigned int, unsigned long ) = filter_c2cdfops(dd->dd_fset->fset_cache->cache_filter)->ioctl;
+                rc = -ENOTTY;
+                if (cache_ioctl)
+                        rc = cache_ioctl(inode, file, cmd, arg);
+                if (rc != -ENOTTY) {
+                        EXIT;
+                        return rc;
+                }
+        }
+
+        if (current->euid != 0 && current->euid != izo_authorized_uid) {
+                EXIT;
+                return -EPERM;
+        }
+
+        memset(buf, 0, sizeof(buf));
+        
+        if (izo_ioctl_getdata(buf, buf + 1024, (void *)arg)) { 
+                CERROR("intermezzo ioctl: data error\n");
+                return -EINVAL;
+        }
+        data = (struct izo_ioctl_data *)buf;
+        
+        switch(cmd) {
+        case IZO_IOC_REINTKML: { 
+                int rc;
+                int cperr;
+                rc = kml_reint_rec(file, data);
+
+                EXIT;
+                cperr = copy_to_user((char *)arg, data, sizeof(*data));
+                if (cperr) { 
+                        CERROR("WARNING: cperr %d\n", cperr); 
+                        rc = -EFAULT;
+                }
+                return rc;
+        }
+
+        case IZO_IOC_GET_RCVD: {
+                struct izo_rcvd_rec rec;
+                struct presto_file_set *fset;
+                int rc;
+
+                fset = presto_fset(file->f_dentry);
+                if (fset == NULL) {
+                        EXIT;
+                        return -ENODEV;
+                }
+                rc = izo_rcvd_get(&rec, fset, data->ioc_uuid);
+                if (rc < 0) {
+                        EXIT;
+                        return rc;
+                }
+
+                EXIT;
+                return copy_to_user((char *)arg, &rec, sizeof(rec))? -EFAULT : 0;
+        }
+
+        case IZO_IOC_REPSTATUS: {
+                __u64 client_kmlsize;
+                struct izo_rcvd_rec *lr_client;
+                struct izo_rcvd_rec rec;
+                struct presto_file_set *fset;
+                int minor;
+                int rc;
+
+                fset = presto_fset(file->f_dentry);
+                if (fset == NULL) {
+                        EXIT;
+                        return -ENODEV;
+                }
+                minor = presto_f2m(fset);
+
+                client_kmlsize = data->ioc_kmlsize;
+                lr_client =  (struct izo_rcvd_rec *) data->ioc_pbuf1;
+
+                rc = izo_repstatus(fset, client_kmlsize, 
+                                       lr_client, &rec);
+                if (rc < 0) {
+                        EXIT;
+                        return rc;
+                }
+
+                EXIT;
+                return copy_to_user((char *)arg, &rec, sizeof(rec))? -EFAULT : 0;
+        }
+
+        case IZO_IOC_GET_CHANNEL: {
+                struct presto_file_set *fset;
+
+                fset = presto_fset(file->f_dentry);
+                if (fset == NULL) {
+                        EXIT;
+                        return -ENODEV;
+                }
+                
+                data->ioc_dev = fset->fset_cache->cache_psdev->uc_minor;
+                CDEBUG(D_PSDEV, "CHANNEL %d\n", data->ioc_dev); 
+                EXIT;
+                return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0;
+        }
+
+        case IZO_IOC_SET_IOCTL_UID:
+                izo_authorized_uid = data->ioc_uid;
+                EXIT;
+                return 0;
+
+        case IZO_IOC_SET_PID:
+                rc = izo_psdev_setpid(data->ioc_dev);
+                EXIT;
+                return rc;
+
+        case IZO_IOC_SET_CHANNEL:
+                rc = izo_psdev_setchannel(file, data->ioc_dev);
+                EXIT;
+                return rc;
+
+        case IZO_IOC_GET_KML_SIZE: {
+                struct presto_file_set *fset;
+                __u64 kmlsize;
+
+                fset = presto_fset(file->f_dentry);
+                if (fset == NULL) {
+                        EXIT;
+                        return -ENODEV;
+                }
+
+                kmlsize = presto_kml_offset(fset) + fset->fset_kml_logical_off;
+
+                EXIT;
+                return copy_to_user((char *)arg, &kmlsize, sizeof(kmlsize))?-EFAULT : 0;
+        }
+
+        case IZO_IOC_PURGE_FILE_DATA: {
+                struct presto_file_set *fset;
+
+                fset = presto_fset(file->f_dentry);
+                if (fset == NULL) {
+                        EXIT;
+                        return -ENODEV;
+                }
+
+                rc = izo_purge_file(fset, data->ioc_inlbuf1);
+                EXIT;
+                return rc;
+        }
+
+        case IZO_IOC_GET_FILEID: {
+                rc = izo_get_fileid(file, data);
+                EXIT;
+                if (rc)
+                        return rc;
+                return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0;
+        }
+
+        case IZO_IOC_SET_FILEID: {
+                rc = izo_set_fileid(file, data);
+                EXIT;
+                if (rc)
+                        return rc;
+                return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT  : 0;
+        }
+
+        case IZO_IOC_ADJUST_LML: { 
+                struct lento_vfs_context *info; 
+                info = (struct lento_vfs_context *)data->ioc_inlbuf1;
+                rc = presto_adjust_lml(file, info); 
+                EXIT;
+                return rc;
+        }
+
+        case IZO_IOC_CONNECT: {
+                struct presto_file_set *fset;
+                int minor;
+
+                fset = presto_fset(file->f_dentry);
+                if (fset == NULL) {
+                        EXIT;
+                        return -ENODEV;
+                }
+                minor = presto_f2m(fset);
+
+                rc = izo_upc_connect(minor, data->ioc_ino,
+                                     data->ioc_generation, data->ioc_uuid,
+                                     data->ioc_flags);
+                EXIT;
+                return rc;
+        }
+
+        case IZO_IOC_GO_FETCH_KML: {
+                struct presto_file_set *fset;
+                int minor;
+
+                fset = presto_fset(file->f_dentry);
+                if (fset == NULL) {
+                        EXIT;
+                        return -ENODEV;
+                }
+                minor = presto_f2m(fset);
+
+                rc = izo_upc_go_fetch_kml(minor, fset->fset_name,
+                                          data->ioc_uuid, data->ioc_kmlsize);
+                EXIT;
+                return rc;
+        }
+
+        case IZO_IOC_REVOKE_PERMIT:
+                if (data->ioc_flags)
+                        rc = izo_revoke_permit(file->f_dentry, data->ioc_uuid);
+                else
+                        rc = izo_revoke_permit(file->f_dentry, NULL);
+                EXIT;
+                return rc;
+
+        case IZO_IOC_CLEAR_FSET:
+                rc = izo_clear_fsetroot(file->f_dentry);
+                EXIT;
+                return rc;
+
+        case IZO_IOC_CLEAR_ALL_FSETS: { 
+                struct presto_file_set *fset;
+
+                fset = presto_fset(file->f_dentry);
+                if (fset == NULL) {
+                        EXIT;
+                        return -ENODEV;
+                }
+
+                rc = izo_clear_all_fsetroots(fset->fset_cache);
+                EXIT;
+                return rc;
+        }
+
+        case IZO_IOC_SET_FSET:
+                /*
+                 * Mark this dentry as being a fileset root.
+                 */
+                rc = presto_set_fsetroot_from_ioc(file->f_dentry, 
+                                                  data->ioc_inlbuf1,
+                                                  data->ioc_flags);
+                EXIT;
+                return rc;
+
+
+        case IZO_IOC_MARK: {
+                int res = 0;  /* resulting flags - returned to user */
+                int error;
+
+                CDEBUG(D_DOWNCALL, "mark inode: %ld, and: %x, or: %x, what %d\n",
+                       file->f_dentry->d_inode->i_ino, data->ioc_and_flag,
+                       data->ioc_or_flag, data->ioc_mark_what);
+
+                switch (data->ioc_mark_what) {
+                case MARK_DENTRY:               
+                        error = izo_mark_dentry(file->f_dentry,
+                                                   data->ioc_and_flag,
+                                                   data->ioc_or_flag, &res);
+                        break;
+                case MARK_FSET:
+                        error = izo_mark_fset(file->f_dentry,
+                                                 data->ioc_and_flag,
+                                                 data->ioc_or_flag, &res);
+                        break;
+                case MARK_CACHE:
+                        error = izo_mark_cache(file->f_dentry,
+                                                  data->ioc_and_flag,
+                                                  data->ioc_or_flag, &res);
+                        break;
+                case MARK_GETFL: {
+                        int fflags, cflags;
+                        data->ioc_and_flag = 0xffffffff;
+                        data->ioc_or_flag = 0; 
+                        error = izo_mark_dentry(file->f_dentry,
+                                                   data->ioc_and_flag,
+                                                   data->ioc_or_flag, &res);
+                        if (error) 
+                                break;
+                        error = izo_mark_fset(file->f_dentry,
+                                                 data->ioc_and_flag,
+                                                 data->ioc_or_flag, &fflags);
+                        if (error) 
+                                break;
+                        error = izo_mark_cache(file->f_dentry,
+                                                  data->ioc_and_flag,
+                                                  data->ioc_or_flag,
+                                                  &cflags);
+
+                        if (error) 
+                                break;
+                        data->ioc_and_flag = fflags;
+                        data->ioc_or_flag = cflags;
+                        break;
+                }
+                default:
+                        error = -EINVAL;
+                }
+
+                if (error) { 
+                        EXIT;
+                        return error;
+                }
+                data->ioc_mark_what = res;
+                CDEBUG(D_DOWNCALL, "mark inode: %ld, and: %x, or: %x, what %x\n",
+                       file->f_dentry->d_inode->i_ino, data->ioc_and_flag,
+                       data->ioc_or_flag, data->ioc_mark_what);
+
+                EXIT;
+                return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0;
+        }
+#if 0
+        case IZO_IOC_CLIENT_MAKE_BRANCH: {
+                struct presto_file_set *fset;
+                int minor;
+
+                fset = presto_fset(file->f_dentry);
+                if (fset == NULL) {
+                        EXIT;
+                        return -ENODEV;
+                }
+                minor = presto_f2m(fset);
+
+                rc = izo_upc_client_make_branch(minor, fset->fset_name,
+                                                data->ioc_inlbuf1,
+                                                data->ioc_inlbuf2);
+                EXIT;
+                return rc;
+        }
+#endif
+        case IZO_IOC_SERVER_MAKE_BRANCH: {
+                struct presto_file_set *fset;
+                int minor;
+
+                fset = presto_fset(file->f_dentry);
+                if (fset == NULL) {
+                        EXIT;
+                        return -ENODEV;
+                }
+                minor = presto_f2m(fset);
+
+                izo_upc_server_make_branch(minor, data->ioc_inlbuf1);
+                EXIT;
+                return 0;
+        }
+        case IZO_IOC_SET_KMLSIZE: {
+                struct presto_file_set *fset;
+                int minor;
+                struct izo_rcvd_rec rec;
+
+                fset = presto_fset(file->f_dentry);
+                if (fset == NULL) {
+                        EXIT;
+                        return -ENODEV;
+                }
+                minor = presto_f2m(fset);
+
+                rc = izo_upc_set_kmlsize(minor, fset->fset_name, data->ioc_uuid,
+                                         data->ioc_kmlsize);
+
+                if (rc != 0) {
+                        EXIT;
+                        return rc;
+                }
+
+                rc = izo_rcvd_get(&rec, fset, data->ioc_uuid);
+                if (rc == -EINVAL) {
+                        /* We don't know anything about this uuid yet; no
+                         * worries. */
+                        memset(&rec, 0, sizeof(rec));
+                } else if (rc <= 0) {
+                        CERROR("InterMezzo: error reading last_rcvd: %d\n", rc);
+                        EXIT;
+                        return rc;
+                }
+                rec.lr_remote_offset = data->ioc_kmlsize;
+                rc = izo_rcvd_write(fset, &rec);
+                if (rc <= 0) {
+                        CERROR("InterMezzo: error writing last_rcvd: %d\n", rc);
+                        EXIT;
+                        return rc;
+                }
+                EXIT;
+                return rc;
+        }
+        case IZO_IOC_BRANCH_UNDO: {
+                struct presto_file_set *fset;
+                int minor;
+
+                fset = presto_fset(file->f_dentry);
+                if (fset == NULL) {
+                        EXIT;
+                        return -ENODEV;
+                }
+                minor = presto_f2m(fset);
+
+                rc = izo_upc_branch_undo(minor, fset->fset_name,
+                                         data->ioc_inlbuf1);
+                EXIT;
+                return rc;
+        }
+        case IZO_IOC_BRANCH_REDO: {
+                struct presto_file_set *fset;
+                int minor;
+
+                fset = presto_fset(file->f_dentry);
+                if (fset == NULL) {
+                        EXIT;
+                        return -ENODEV;
+                }
+                minor = presto_f2m(fset);
+
+                rc = izo_upc_branch_redo(minor, fset->fset_name,
+                                         data->ioc_inlbuf1);
+                EXIT;
+                return rc;
+        }
+
+        default:
+                EXIT;
+                return -ENOTTY;
+                
+        }
+        EXIT;
+        return 0;
+}
+
+struct file_operations presto_dir_fops = {
+        .ioctl =  presto_ioctl
+};
+
+struct inode_operations presto_dir_iops = {
+        .create       = presto_create,
+        .lookup       = presto_lookup,
+        .link         = presto_link,
+        .unlink       = presto_unlink,
+        .symlink      = presto_symlink,
+        .mkdir        = presto_mkdir,
+        .rmdir        = presto_rmdir,
+        .mknod        = presto_mknod,
+        .rename       = presto_rename,
+        .permission   = presto_permission,
+        .setattr      = presto_setattr,
+#ifdef CONFIG_FS_EXT_ATTR
+        .set_ext_attr = presto_set_ext_attr,
+#endif
+};
+
+
diff --git a/fs/intermezzo/ext_attr.c b/fs/intermezzo/ext_attr.c
new file mode 100644 (file)
index 0000000..be91417
--- /dev/null
@@ -0,0 +1,197 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ * 
+ *  Copyright (C) 2001 Tacit Networks, Inc.
+ *    Author: Shirish H. Phatak <shirish@tacitnetworks.com>
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Extended attribute handling for presto.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/segment.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+#ifdef CONFIG_FS_EXT_ATTR
+#include <linux/ext_attr.h>
+
+extern inline void presto_debug_fail_blkdev(struct presto_file_set *fset,
+                                            unsigned long value);
+
+
+/* VFS interface */
+/* XXX! Fixme test for user defined attributes */
+int presto_set_ext_attr(struct inode *inode, 
+                        const char *name, void *buffer,
+                        size_t buffer_len, int flags) 
+{
+        int error;
+        struct presto_cache *cache;
+        struct presto_file_set *fset;
+        struct lento_vfs_context info;
+        struct dentry *dentry;
+        int minor = presto_i2m(inode);
+        char *buf = NULL;
+
+        ENTRY;
+        if (minor < 0) {
+                EXIT;
+                return -1;
+        }
+
+        if ( ISLENTO(minor) ) {
+                EXIT;
+                return -EINVAL;
+        }
+
+        /* BAD...vfs should really pass down the dentry to use, especially
+         * since every other operation in iops does. But for now
+         * we do a reverse mapping from inode to the first dentry 
+         */
+        if (list_empty(&inode->i_dentry)) {
+                CERROR("No alias for inode %d\n", (int) inode->i_ino);
+                EXIT;
+                return -EINVAL;
+        }
+
+        dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+
+        error = presto_prep(dentry, &cache, &fset);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        if ((buffer != NULL) && (buffer_len != 0)) {
+            /* If buffer is a user space pointer copy it to kernel space
+            * and reset the flag. We do this since the journal functions need
+            * access to the contents of the buffer, and the file system
+            * does not care. When we actually invoke the function, we remove
+            * the EXT_ATTR_FLAG_USER flag.
+            *
+            * XXX:Check if the "fs does not care" assertion is always true -SHP
+            * (works for ext3)
+            */
+            if (flags & EXT_ATTR_FLAG_USER) {
+                PRESTO_ALLOC(buf, buffer_len);
+                if (!buf) {
+                        CERROR("InterMezzo: out of memory!!!\n");
+                        return -ENOMEM;
+                }
+                error = copy_from_user(buf, buffer, buffer_len);
+                if (error) 
+                        return -EFAULT;
+            } else 
+                buf = buffer;
+        } else
+                buf = buffer;
+
+        if ( presto_get_permit(inode) < 0 ) {
+                EXIT;
+                if (buffer_len && (flags & EXT_ATTR_FLAG_USER))
+                        PRESTO_FREE(buf, buffer_len);
+                return -EROFS;
+        }
+
+        /* Simulate presto_setup_info */
+        memset(&info, 0, sizeof(info));
+        /* For now redundant..but we keep it around just in case */
+        info.flags = LENTO_FL_IGNORE_TIME;
+        if (!ISLENTO(cache->cache_psdev->uc_minor))
+            info.flags |= LENTO_FL_KML;
+
+        /* We pass in the kernel space pointer and reset the 
+         * EXT_ATTR_FLAG_USER flag.
+         * See comments above. 
+         */ 
+        /* Note that mode is already set by VFS so we send in a NULL */
+        error = presto_do_set_ext_attr(fset, dentry, name, buf,
+                                       buffer_len, flags & ~EXT_ATTR_FLAG_USER,
+                                       NULL, &info);
+        presto_put_permit(inode);
+
+        if (buffer_len && (flags & EXT_ATTR_FLAG_USER))
+                PRESTO_FREE(buf, buffer_len);
+        EXIT;
+        return error;
+}
+
+/* Lento Interface */
+/* XXX: ignore flags? We should be forcing these operations through? -SHP*/
+int lento_set_ext_attr(const char *path, const char *name, 
+                       void *buffer, size_t buffer_len, int flags, mode_t mode, 
+                       struct lento_vfs_context *info) 
+{
+        int error;
+        char * pathname;
+        struct nameidata nd;
+        struct dentry *dentry;
+        struct presto_file_set *fset;
+
+        ENTRY;
+        lock_kernel();
+
+        pathname=getname(path);
+        error = PTR_ERR(pathname);
+        if (IS_ERR(pathname)) {
+                EXIT;
+                goto exit;
+        }
+
+        /* Note that ext_attrs apply to both files and directories..*/
+        error=presto_walk(pathname,&nd);
+        if (error) 
+               goto exit;
+        dentry = nd.dentry;
+
+        fset = presto_fset(dentry);
+        error = -EINVAL;
+        if ( !fset ) {
+                CERROR("No fileset!\n");
+                EXIT;
+                goto exit_dentry;
+        }
+
+        if (buffer==NULL) buffer_len=0;
+
+        error = presto_do_set_ext_attr(fset, dentry, name, buffer,
+                                       buffer_len, flags, &mode, info);
+exit_dentry:
+        path_release(&nd);
+exit_path:
+        putname(pathname);
+exit:
+        unlock_kernel();
+        return error; 
+}
+
+#endif /*CONFIG_FS_EXT_ATTR*/
diff --git a/fs/intermezzo/file.c b/fs/intermezzo/file.c
new file mode 100644 (file)
index 0000000..f625642
--- /dev/null
@@ -0,0 +1,534 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2000 Stelias Computing, Inc.
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *  Copyright (C) 2000 TurboLinux, Inc.
+ *  Copyright (C) 2000 Los Alamos National Laboratory.
+ *  Copyright (C) 2000, 2001 Tacit Networks, Inc.
+ *  Copyright (C) 2000 Peter J. Braam
+ *  Copyright (C) 2001 Mountain View Data, Inc. 
+ *  Copyright (C) 2001 Cluster File Systems, Inc. 
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  This file manages file I/O
+ * 
+ */
+
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <linux/fsfilter.h>
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+/*
+ * these are initialized in super.c
+ */
+extern int presto_permission(struct inode *inode, int mask, struct nameidata *nd);
+
+
+static int presto_open_upcall(int minor, struct dentry *de)
+{
+        int rc = 0;
+        char *path, *buffer;
+        struct presto_file_set *fset;
+        int pathlen;
+        struct lento_vfs_context info;
+        struct presto_dentry_data *dd = presto_d2d(de);
+
+        PRESTO_ALLOC(buffer, PAGE_SIZE);
+        if ( !buffer ) {
+                CERROR("PRESTO: out of memory!\n");
+                return -ENOMEM;
+        }
+        fset = presto_fset(de);
+        path = presto_path(de, fset->fset_dentry, buffer, PAGE_SIZE);
+        pathlen = MYPATHLEN(buffer, path);
+        
+        CDEBUG(D_FILE, "de %p, dd %p\n", de, dd);
+        if (dd->remote_ino == 0) {
+                rc = presto_get_fileid(minor, fset, de);
+        }
+        memset (&info, 0, sizeof(info));
+        if (dd->remote_ino > 0) {
+                info.remote_ino = dd->remote_ino;
+                info.remote_generation = dd->remote_generation;
+        } else
+                CERROR("get_fileid failed %d, ino: %Lx, fetching by name\n", rc,
+                       (unsigned long long) dd->remote_ino);
+
+        rc = izo_upc_open(minor, pathlen, path, fset->fset_name, &info);
+        PRESTO_FREE(buffer, PAGE_SIZE);
+        return rc;
+}
+
+static inline int open_check_dod(struct file *file,
+                                 struct presto_file_set *fset)
+{
+        int gen, is_iopen = 0, minor;
+        struct presto_cache *cache = fset->fset_cache;
+        ino_t inum;
+
+        minor = presto_c2m(cache);
+
+        if ( ISLENTO(minor) ) {
+                CDEBUG(D_CACHE, "is lento, not doing DOD.\n");
+                return 0;
+        }
+
+        /* Files are only ever opened by inode during backfetches, when by
+         * definition we have the authoritative copy of the data.  No DOD. */
+        is_iopen = izo_dentry_is_ilookup(file->f_dentry, &inum, &gen);
+
+        if (is_iopen) {
+                CDEBUG(D_CACHE, "doing iopen, not doing DOD.\n");
+                return 0;
+        }
+
+        if (!(fset->fset_flags & FSET_DATA_ON_DEMAND)) {
+                CDEBUG(D_CACHE, "fileset not on demand.\n");
+                return 0;
+        }
+                
+        if (file->f_flags & O_TRUNC) {
+                CDEBUG(D_CACHE, "fileset dod: O_TRUNC.\n");
+                return 0;
+        }
+                
+        if (presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL)) {
+                CDEBUG(D_CACHE, "file under .intermezzo, not doing DOD\n");
+                return 0;
+        }
+
+        if (presto_chk(file->f_dentry, PRESTO_DATA)) {
+                CDEBUG(D_CACHE, "PRESTO_DATA is set, not doing DOD.\n");
+                return 0;
+        }
+
+        if (cache->cache_filter->o_trops->tr_all_data(file->f_dentry->d_inode)) {
+                CDEBUG(D_CACHE, "file not sparse, not doing DOD.\n");
+                return 0;
+        }
+
+        return 1;
+}
+
+static int presto_file_open(struct inode *inode, struct file *file)
+{
+        int rc = 0;
+        struct file_operations *fops;
+        struct presto_cache *cache;
+        struct presto_file_set *fset;
+        struct presto_file_data *fdata;
+        int writable = (file->f_flags & (O_RDWR | O_WRONLY));
+        int minor, i;
+
+        ENTRY;
+
+        if (presto_prep(file->f_dentry, &cache, &fset) < 0) {
+                EXIT;
+                return -EBADF;
+        }
+
+        minor = presto_c2m(cache);
+
+        CDEBUG(D_CACHE, "DATA_OK: %d, ino: %ld, islento: %d\n",
+               presto_chk(file->f_dentry, PRESTO_DATA), inode->i_ino,
+               ISLENTO(minor));
+
+        if ( !ISLENTO(minor) && (file->f_flags & O_RDWR ||
+                                 file->f_flags & O_WRONLY)) {
+                CDEBUG(D_CACHE, "calling presto_get_permit\n");
+                if ( presto_get_permit(inode) < 0 ) {
+                        EXIT;
+                        return -EROFS;
+                }
+                presto_put_permit(inode);
+        }
+
+        if (open_check_dod(file, fset)) {
+                CDEBUG(D_CACHE, "presto_open_upcall\n");
+                CDEBUG(D_CACHE, "dentry: %p setting DATA, ATTR\n", file->f_dentry);
+                presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
+                rc = presto_open_upcall(minor, file->f_dentry);
+                if (rc) {
+                        EXIT;
+                        CERROR("%s: returning error %d\n", __FUNCTION__, rc);
+                        return rc;
+                }
+
+        }
+
+        /* file was truncated upon open: do not refetch */
+        if (file->f_flags & O_TRUNC) { 
+                CDEBUG(D_CACHE, "setting DATA, ATTR\n");
+                presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
+        }
+
+        fops = filter_c2cffops(cache->cache_filter);
+        if ( fops->open ) {
+                CDEBUG(D_CACHE, "calling fs open\n");
+                rc = fops->open(inode, file);
+
+                if (rc) {
+                        EXIT;
+                        return rc;
+                }
+        }
+
+        if (writable) {
+                PRESTO_ALLOC(fdata, sizeof(*fdata));
+                if (!fdata) {
+                        EXIT;
+                        return -ENOMEM;
+                }
+                /* LOCK: XXX check that the kernel lock protects this alloc */
+                fdata->fd_do_lml = 0;
+                fdata->fd_bytes_written = 0;
+                fdata->fd_fsuid = current->fsuid;
+                fdata->fd_fsgid = current->fsgid;
+                fdata->fd_mode = file->f_dentry->d_inode->i_mode;
+                fdata->fd_uid = file->f_dentry->d_inode->i_uid;
+                fdata->fd_gid = file->f_dentry->d_inode->i_gid;
+                fdata->fd_ngroups = current->group_info->ngroups;
+                for (i=0 ; i < current->group_info->ngroups ; i++)
+                        fdata->fd_groups[i] = GROUP_AT(current->group_info,i);
+                if (!ISLENTO(minor)) 
+                        fdata->fd_info.flags = LENTO_FL_KML; 
+                else { 
+                        /* this is for the case of DOD, 
+                           reint_close will adjust flags if needed */
+                        fdata->fd_info.flags = 0;
+                }
+
+                presto_getversion(&fdata->fd_version, inode);
+                file->private_data = fdata;
+        } else {
+                file->private_data = NULL;
+        }
+
+        EXIT;
+        return 0;
+}
+
+int presto_adjust_lml(struct file *file, struct lento_vfs_context *info)
+{
+        struct presto_file_data *fdata = 
+                (struct presto_file_data *) file->private_data;
+
+        if (!fdata) { 
+                EXIT;
+                return -EINVAL;
+        }
+                
+        memcpy(&fdata->fd_info, info, sizeof(*info));
+        EXIT;
+        return 0; 
+}
+
+
+static int presto_file_release(struct inode *inode, struct file *file)
+{
+        int rc;
+        struct file_operations *fops;
+        struct presto_cache *cache;
+        struct presto_file_set *fset;
+        struct presto_file_data *fdata = 
+                (struct presto_file_data *)file->private_data;
+        ENTRY;
+
+        rc = presto_prep(file->f_dentry, &cache, &fset);
+        if ( rc ) {
+                EXIT;
+                return rc;
+        }
+
+        fops = filter_c2cffops(cache->cache_filter);
+        if (fops && fops->release)
+                rc = fops->release(inode, file);
+
+        CDEBUG(D_CACHE, "islento = %d (minor %d), rc %d, data %p\n",
+               ISLENTO(cache->cache_psdev->uc_minor), 
+               cache->cache_psdev->uc_minor, rc, fdata);
+
+        /* this file was modified: ignore close errors, write KML */
+        if (fdata && fdata->fd_do_lml) {
+                /* XXX: remove when lento gets file granularity cd */
+                if ( presto_get_permit(inode) < 0 ) {
+                        EXIT;
+                        return -EROFS;
+                }
+        
+                fdata->fd_info.updated_time = file->f_dentry->d_inode->i_mtime;
+                rc = presto_do_close(fset, file); 
+                presto_put_permit(inode);
+        }
+
+        if (!rc && fdata) {
+                PRESTO_FREE(fdata, sizeof(*fdata));
+                file->private_data = NULL; 
+        }
+        
+        EXIT;
+        return rc;
+}
+
+static void presto_apply_write_policy(struct file *file,
+                                      struct presto_file_set *fset, loff_t res)
+{
+        struct presto_file_data *fdata =
+                (struct presto_file_data *)file->private_data;
+        struct presto_cache *cache = fset->fset_cache;
+        struct presto_version new_file_ver;
+        int error;
+        struct rec_info rec;
+
+        /* Here we do a journal close after a fixed or a specified
+         amount of KBytes, currently a global parameter set with
+         sysctl. If files are open for a long time, this gives added
+         protection. (XXX todo: per cache, add ioctl, handle
+         journaling in a thread, add more options etc.)
+        */ 
+        if ((fset->fset_flags & FSET_JCLOSE_ON_WRITE) &&
+            (!ISLENTO(cache->cache_psdev->uc_minor))) {
+                fdata->fd_bytes_written += res;
+                if (fdata->fd_bytes_written >= fset->fset_file_maxio) {
+                        presto_getversion(&new_file_ver,
+                                          file->f_dentry->d_inode);
+                        /* This is really heavy weight and should be fixed
+                           ASAP. At most we should be recording the number
+                           of bytes written and not locking the kernel, 
+                           wait for permits, etc, on the write path. SHP
+                        */
+                        lock_kernel();
+                        if ( presto_get_permit(file->f_dentry->d_inode) < 0 ) {
+                                EXIT;
+                                /* we must be disconnected, not to worry */
+                                unlock_kernel();
+                                return; 
+                        }
+                        error = presto_journal_close(&rec, fset, fdata,
+                                                     file->f_dentry,
+                                                     &fdata->fd_version,
+                                                     &new_file_ver);
+                        presto_put_permit(file->f_dentry->d_inode);
+                        unlock_kernel();
+                        if ( error ) {
+                                CERROR("presto_close: cannot journal close\n");
+                                /* XXX these errors are really bad */
+                                /* panic(); */
+                                return;
+                        }
+                        fdata->fd_bytes_written = 0;
+                }
+        }
+}
+
+static ssize_t presto_file_write(struct file *file, const char *buf,
+                                 size_t size, loff_t *off)
+{
+        struct rec_info rec;
+        int error;
+        struct presto_cache *cache;
+        struct presto_file_set *fset;
+        struct file_operations *fops;
+        ssize_t res;
+        int do_lml_here;
+        void *handle = NULL;
+        unsigned long blocks;
+        struct presto_file_data *fdata;
+        loff_t res_size; 
+
+        error = presto_prep(file->f_dentry, &cache, &fset);
+        if ( error ) {
+                EXIT;
+                return error;
+        }
+
+        blocks = (size >> file->f_dentry->d_inode->i_sb->s_blocksize_bits) + 1;
+        /* XXX 3 is for ext2 indirect blocks ... */ 
+        res_size = 2 * PRESTO_REQHIGH + ((blocks+3) 
+                << file->f_dentry->d_inode->i_sb->s_blocksize_bits);
+
+        error = presto_reserve_space(fset->fset_cache, res_size); 
+        CDEBUG(D_INODE, "Reserved %Ld for %Zd\n", res_size, size);
+        if ( error ) { 
+                EXIT;
+                return -ENOSPC;
+        }
+
+        CDEBUG(D_INODE, "islento %d, minor: %d\n", 
+               ISLENTO(cache->cache_psdev->uc_minor),
+               cache->cache_psdev->uc_minor); 
+
+        /* 
+         *  XXX this lock should become a per inode lock when 
+         *  Vinny's changes are in; we could just use i_sem.
+         */
+        read_lock(&fset->fset_lml.fd_lock); 
+        fdata = (struct presto_file_data *)file->private_data;
+        do_lml_here = size && (fdata->fd_do_lml == 0) &&
+                !presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL);
+
+        if (do_lml_here)
+                fdata->fd_do_lml = 1;
+        read_unlock(&fset->fset_lml.fd_lock); 
+
+        /* XXX 
+           There might be a bug here.  We need to make 
+           absolutely sure that the ext3_file_write commits 
+           after our transaction that writes the LML record.
+           Nesting the file write helps if new blocks are allocated. 
+        */
+        res = 0;
+        if (do_lml_here) {
+                struct presto_version file_version;
+                /* handle different space reqs from file system below! */
+                handle = presto_trans_start(fset, file->f_dentry->d_inode, 
+                                            KML_OPCODE_WRITE);
+                if ( IS_ERR(handle) ) {
+                        presto_release_space(fset->fset_cache, res_size); 
+                        CERROR("presto_write: no space for transaction\n");
+                        return -ENOSPC;
+                }
+
+                presto_getversion(&file_version, file->f_dentry->d_inode); 
+                res = presto_write_lml_close(&rec, fset, file, 
+                                             fdata->fd_info.remote_ino, 
+                                             fdata->fd_info.remote_generation, 
+                                             &fdata->fd_info.remote_version, 
+                                             &file_version);
+                fdata->fd_lml_offset = rec.offset;
+                if ( res ) {
+                        CERROR("intermezzo: PANIC failed to write LML\n");
+                        *(int *)0 = 1;
+                        EXIT;
+                        goto exit_write;
+                }
+                presto_trans_commit(fset, handle);
+        }
+
+        fops = filter_c2cffops(cache->cache_filter);
+        res = fops->write(file, buf, size, off);
+        if ( res != size ) {
+                CDEBUG(D_FILE, "file write returns short write: size %Zd, res %Zd\n", size, res); 
+        }
+
+        if ( (res > 0) && fdata ) 
+                 presto_apply_write_policy(file, fset, res);
+
+ exit_write:
+        presto_release_space(fset->fset_cache, res_size); 
+        return res;
+}
+
+struct file_operations presto_file_fops = {
+        .write   = presto_file_write,
+        .open    = presto_file_open,
+        .release = presto_file_release,
+        .ioctl   = presto_ioctl
+};
+
+struct inode_operations presto_file_iops = {
+        .permission   = presto_permission,
+        .setattr      = presto_setattr,
+#ifdef CONFIG_FS_EXT_ATTR
+        .set_ext_attr = presto_set_ext_attr,
+#endif
+};
+
+/* FIXME: I bet we want to add a lock here and in presto_file_open. */
+int izo_purge_file(struct presto_file_set *fset, char *file)
+{
+#if 0
+        void *handle = NULL;
+        char *path = NULL;
+        struct nameidata nd;
+        struct dentry *dentry;
+        int rc = 0, len;
+        loff_t oldsize;
+
+        /* FIXME: not mtpt it's gone */
+        len = strlen(fset->fset_cache->cache_mtpt) + strlen(file) + 1;
+        PRESTO_ALLOC(path, len + 1);
+        if (path == NULL)
+                return -1;
+
+        sprintf(path, "%s/%s", fset->fset_cache->cache_mtpt, file);
+        rc = izo_lookup_file(fset, path, &nd);
+        if (rc)
+                goto error;
+        dentry = nd.dentry;
+
+        /* FIXME: take a lock here */
+
+        if (dentry->d_inode->i_atime.tv_sec > get_seconds() - 5) {
+                /* We lost the race; this file was accessed while we were doing
+                 * ioctls and lookups and whatnot. */
+                rc = -EBUSY;
+                goto error_unlock;
+        }
+
+        /* FIXME: Check if this file is open. */
+
+        handle = presto_trans_start(fset, dentry->d_inode, KML_OPCODE_TRUNC);
+        if (IS_ERR(handle)) {
+                rc = -ENOMEM;
+                goto error_unlock;
+        }
+
+        /* FIXME: Write LML record */
+
+        oldsize = dentry->d_inode->i_size;
+        rc = izo_do_truncate(fset, dentry, 0, oldsize);
+        if (rc != 0)
+                goto error_clear;
+        rc = izo_do_truncate(fset, dentry, oldsize, 0);
+        if (rc != 0)
+                goto error_clear;
+
+ error_clear:
+        /* FIXME: clear LML record */
+
+ error_unlock:
+        /* FIXME: release the lock here */
+
+ error:
+        if (handle != NULL && !IS_ERR(handle))
+                presto_trans_commit(fset, handle);
+        if (path != NULL)
+                PRESTO_FREE(path, len + 1);
+        return rc;
+#else
+        return 0;
+#endif
+}
diff --git a/fs/intermezzo/fileset.c b/fs/intermezzo/fileset.c
new file mode 100644 (file)
index 0000000..9db8cab
--- /dev/null
@@ -0,0 +1,674 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  Managing filesets
+ *
+ */
+
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/ext2_fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+static inline struct presto_file_set *presto_dentry2fset(struct dentry *dentry)
+{
+        if (presto_d2d(dentry) == NULL) {
+                EXIT;
+                return NULL;
+        }
+        return presto_d2d(dentry)->dd_fset;
+}
+
+/* find the fileset dentry for this dentry */
+struct presto_file_set *presto_fset(struct dentry *de)
+{
+        struct dentry *fsde;
+        ENTRY;
+        if ( !de->d_inode ) {
+                /* FIXME: is this ok to be NULL? */
+                CDEBUG(D_INODE,"presto_fset: warning %*s has NULL inode.\n",
+                de->d_name.len, de->d_name.name);
+        }
+        for (fsde = de;; fsde = fsde->d_parent) {
+                if ( presto_dentry2fset(fsde) ) {
+                        EXIT;
+                        return presto_dentry2fset(fsde);
+                }
+                if (fsde->d_parent == fsde)
+                        break;
+        }
+        EXIT;
+        return NULL;
+}
+
+int presto_get_lastrecno(char *path, off_t *recno)
+{
+        struct nameidata nd; 
+        struct presto_file_set *fset;
+        struct dentry *dentry;
+        int error;
+        ENTRY;
+
+        error = presto_walk(path, &nd);
+        if (error) {
+                EXIT;
+                return error;
+        }
+
+        dentry = nd.dentry;
+
+        error = -ENXIO;
+        if ( !presto_ispresto(dentry->d_inode) ) {
+                EXIT;
+                goto kml_out;
+        }
+
+        error = -EINVAL;
+        if ( ! presto_dentry2fset(dentry)) {
+                EXIT;
+                goto kml_out;
+        }
+
+        fset = presto_dentry2fset(dentry);
+        if (!fset) {
+                EXIT;
+                goto kml_out;
+        }
+        error = 0;
+        *recno = fset->fset_kml.fd_recno;
+
+ kml_out:
+        path_release(&nd);
+        return error;
+}
+
+static char * _izo_make_path(char *fsetname, char *name)
+{
+        char *path = NULL;
+        int len;
+
+        len = strlen("/.intermezzo/") + strlen(fsetname) 
+                + 1 + strlen(name) + 1;
+
+        PRESTO_ALLOC(path, len);
+        if (path == NULL)
+                return NULL;
+
+        sprintf(path, "/.intermezzo/%s/%s", fsetname, name);
+
+        return path;
+}
+
+char * izo_make_path(struct presto_file_set *fset, char *name)
+{
+        return _izo_make_path(fset->fset_name, name);
+}
+
+static struct file *_izo_fset_open(char *fsetname, char *name, int flags, int mode) 
+{
+        char *path;
+        struct file *f;
+        int error;
+        ENTRY;
+
+        path = _izo_make_path(fsetname, name);
+        if (path == NULL) {
+                EXIT;
+                return ERR_PTR(-ENOMEM);
+        }
+
+        CDEBUG(D_INODE, "opening file %s\n", path);
+        f = filp_open(path, flags, mode);
+        error = PTR_ERR(f);
+        if (IS_ERR(f)) {
+                CDEBUG(D_INODE, "Error %d\n", error);
+        }
+
+        PRESTO_FREE(path, strlen(path));
+
+        EXIT;
+        return f;
+
+}
+
+struct file *izo_fset_open(struct presto_file_set *fset, char *name, int flags, int mode) 
+{
+        return _izo_fset_open(fset->fset_name, name, flags, mode);
+}
+
+
+
+/*
+ *  note: this routine "pins" a dentry for a fileset root
+ */
+int presto_set_fsetroot(struct dentry *ioctl_dentry, char *fsetname,
+                        unsigned int flags)
+{
+        struct presto_file_set *fset = NULL;
+        struct presto_cache *cache;
+        int error;
+        struct file  *fset_root;
+        struct dentry *dentry;
+
+        ENTRY;
+
+        fset_root = _izo_fset_open(fsetname, "ROOT",  O_RDONLY, 000);
+        if (IS_ERR(fset_root)) {
+                CERROR("Can't open %s/ROOT\n", fsetname);
+                EXIT;
+                error = PTR_ERR(fset_root);
+                goto out;
+        }
+        dentry = dget(fset_root->f_dentry);
+        filp_close(fset_root, NULL);
+
+        dentry->d_inode->i_op = ioctl_dentry->d_inode->i_op;
+        dentry->d_inode->i_fop = ioctl_dentry->d_inode->i_fop;
+        dentry->d_op = ioctl_dentry->d_op;
+        fset = presto_dentry2fset(dentry);
+        if (fset && (fset->fset_dentry == dentry) ) { 
+                CERROR("Fsetroot already set (inode %ld)\n",
+                       dentry->d_inode->i_ino);
+                /* XXX: ignore because clear_fsetroot is broken  */
+#if 0
+                dput(dentry);
+                EXIT;
+                error = -EEXIST;
+                goto out;
+#endif
+        }
+
+        cache = presto_get_cache(dentry->d_inode);
+        if (!cache) { 
+                CERROR("No cache found for inode %ld\n",
+                       dentry->d_inode->i_ino);
+                EXIT;
+                error = -ENODEV;
+                goto out_free;
+        }
+
+        PRESTO_ALLOC(fset, sizeof(*fset));
+        if ( !fset ) {
+                CERROR("No memory allocating fset for %s\n", fsetname);
+                EXIT;
+                error = -ENOMEM;
+                goto out_free;
+        }
+        CDEBUG(D_INODE, "fset at %p\n", fset);
+
+        CDEBUG(D_INODE, "InterMezzo: fsetroot: inode %ld, fileset name %s\n",
+               dentry->d_inode->i_ino, fsetname);
+
+        fset->fset_mnt = mntget(current->fs->pwdmnt); 
+        fset->fset_cache = cache;
+        fset->fset_dentry = dentry; 
+        fset->fset_name = strdup(fsetname);
+        fset->fset_chunkbits = CHUNK_BITS;
+        fset->fset_flags = flags;
+        fset->fset_file_maxio = FSET_DEFAULT_MAX_FILEIO; 
+        fset->fset_permit_lock = SPIN_LOCK_UNLOCKED;
+        PRESTO_ALLOC(fset->fset_reint_buf, 64 * 1024);
+        if (fset->fset_reint_buf == NULL) {
+                EXIT;
+                error = -ENOMEM;
+                goto out_free;
+        }
+        init_waitqueue_head(&fset->fset_permit_queue);
+
+        if (presto_d2d(dentry) == NULL) { 
+                dentry->d_fsdata = izo_alloc_ddata();
+        }
+        if (presto_d2d(dentry) == NULL) {
+                CERROR("InterMezzo: %s: no memory\n", __FUNCTION__);
+                EXIT;
+                error = -ENOMEM;
+                goto out_free;
+        }
+        presto_d2d(dentry)->dd_fset = fset;
+        list_add(&fset->fset_list, &cache->cache_fset_list);
+
+        error = izo_init_kml_file(fset, &fset->fset_kml);
+        if ( error ) {
+                EXIT;
+                CDEBUG(D_JOURNAL, "Error init_kml %d\n", error);
+                goto out_list_del;
+        }
+
+        error = izo_init_lml_file(fset, &fset->fset_lml);
+        if ( error ) {
+                int rc;
+                EXIT;
+                rc = izo_log_close(&fset->fset_kml);
+                CDEBUG(D_JOURNAL, "Error init_lml %d, cleanup %d\n", error, rc);
+                goto out_list_del;
+        }
+
+        /* init_last_rcvd_file could trigger a presto_file_write(), which
+         * requires that the lml structure be initialized. -phil */
+        error = izo_init_last_rcvd_file(fset, &fset->fset_rcvd);
+        if ( error ) {
+                int rc;
+                EXIT;
+                rc = izo_log_close(&fset->fset_kml);
+                rc = izo_log_close(&fset->fset_lml);
+                CDEBUG(D_JOURNAL, "Error init_lastrcvd %d, cleanup %d\n", error, rc);
+                goto out_list_del;
+        }
+
+        CDEBUG(D_PIOCTL, "-------> fset at %p, dentry at %p, mtpt %p,"
+               "fset %s, cache %p, presto_d2d(dentry)->dd_fset %p\n",
+               fset, dentry, fset->fset_dentry, fset->fset_name, cache,
+               presto_d2d(dentry)->dd_fset);
+
+        EXIT;
+        return 0;
+
+ out_list_del:
+        list_del(&fset->fset_list);
+        presto_d2d(dentry)->dd_fset = NULL;
+ out_free:
+        if (fset) {
+                mntput(fset->fset_mnt); 
+                if (fset->fset_reint_buf != NULL)
+                        PRESTO_FREE(fset->fset_reint_buf, 64 * 1024);
+                PRESTO_FREE(fset, sizeof(*fset));
+        }
+        dput(dentry); 
+ out:
+        return error;
+}
+
+static int izo_cleanup_fset(struct presto_file_set *fset)
+{
+        int error;
+        struct presto_cache *cache;
+
+        ENTRY;
+
+        CERROR("Cleaning up fset %s\n", fset->fset_name);
+
+        error = izo_log_close(&fset->fset_kml);
+        if (error)
+                CERROR("InterMezzo: Closing kml for fset %s: %d\n",
+                       fset->fset_name, error);
+        error = izo_log_close(&fset->fset_lml);
+        if (error)
+                CERROR("InterMezzo: Closing lml for fset %s: %d\n",
+                       fset->fset_name, error);
+        error = izo_log_close(&fset->fset_rcvd);
+        if (error)
+                CERROR("InterMezzo: Closing last_rcvd for fset %s: %d\n",
+                       fset->fset_name, error);
+
+        cache = fset->fset_cache;
+
+        list_del(&fset->fset_list);
+
+        presto_d2d(fset->fset_dentry)->dd_fset = NULL;
+        dput(fset->fset_dentry);
+        mntput(fset->fset_mnt);
+
+        PRESTO_FREE(fset->fset_name, strlen(fset->fset_name) + 1);
+        PRESTO_FREE(fset->fset_reint_buf, 64 * 1024);
+        PRESTO_FREE(fset, sizeof(*fset));
+        EXIT;
+        return error;
+}
+
+int izo_clear_fsetroot(struct dentry *dentry)
+{
+        struct presto_file_set *fset;
+
+        ENTRY;
+
+        fset = presto_dentry2fset(dentry);
+        if (!fset) {
+                EXIT;
+                return -EINVAL;
+        }
+
+        izo_cleanup_fset(fset);
+        EXIT;
+        return 0;
+}
+
+int izo_clear_all_fsetroots(struct presto_cache *cache)
+{
+        struct presto_file_set *fset;
+        struct list_head *tmp,*tmpnext;
+        int error;
+        error = 0;
+        tmp = &cache->cache_fset_list;
+        tmpnext = tmp->next;
+        while ( tmpnext != &cache->cache_fset_list) {
+                tmp = tmpnext;
+                tmpnext = tmp->next;
+                fset = list_entry(tmp, struct presto_file_set, fset_list);
+
+                error = izo_cleanup_fset(fset);
+                if (error)
+                        break;
+        }
+        return error;
+}
+
+static struct vfsmount *izo_alloc_vfsmnt(void)
+{
+        struct vfsmount *mnt;
+        PRESTO_ALLOC(mnt, sizeof(*mnt));
+        if (mnt) {
+                memset(mnt, 0, sizeof(struct vfsmount));
+                atomic_set(&mnt->mnt_count,1);
+                INIT_LIST_HEAD(&mnt->mnt_hash);
+                INIT_LIST_HEAD(&mnt->mnt_child);
+                INIT_LIST_HEAD(&mnt->mnt_mounts);
+                INIT_LIST_HEAD(&mnt->mnt_list);
+        }
+        return mnt;
+}
+
+
+static void izo_setup_ctxt(struct dentry *root, struct vfsmount *mnt,
+                           struct run_ctxt *save) 
+{
+        struct run_ctxt new;
+
+        mnt->mnt_root = root;
+        mnt->mnt_sb = root->d_inode->i_sb;
+        unlock_super(mnt->mnt_sb);
+
+        new.rootmnt = mnt;
+        new.root = root;
+        new.pwdmnt = mnt;
+        new.pwd = root;
+        new.fsuid = 0;
+        new.fsgid = 0;
+        new.fs = get_fs(); 
+        /* XXX where can we get the groups from? */
+        new.group_info = groups_alloc(0);
+
+        push_ctxt(save, &new); 
+}
+
+static void izo_cleanup_ctxt(struct vfsmount *mnt, struct run_ctxt *save) 
+{
+        lock_super(mnt->mnt_sb);
+        pop_ctxt(save); 
+}
+
+static int izo_simple_mkdir(struct dentry *dir, char *name, int mode)
+{
+        struct dentry *dchild; 
+        int err;
+        ENTRY;
+        
+        dchild = lookup_one_len(name, dir, strlen(name));
+        if (IS_ERR(dchild)) { 
+                EXIT;
+                return PTR_ERR(dchild); 
+        }
+
+        if (dchild->d_inode) { 
+                dput(dchild);
+                EXIT;
+                return -EEXIST;
+        }
+
+        err = vfs_mkdir(dir->d_inode, dchild, mode);
+        dput(dchild);
+        
+        EXIT;
+        return err;
+}
+
+static int izo_simple_symlink(struct dentry *dir, char *name, char *tgt)
+{
+        struct dentry *dchild; 
+        int err;
+        ENTRY;
+        
+        dchild = lookup_one_len(name, dir, strlen(name));
+        if (IS_ERR(dchild)) { 
+                EXIT;
+                return PTR_ERR(dchild); 
+        }
+
+        if (dchild->d_inode) { 
+                dput(dchild);
+                EXIT;
+                return -EEXIST;
+        }
+
+        err = vfs_symlink(dir->d_inode, dchild, tgt);
+        dput(dchild);
+        
+        EXIT;
+        return err;
+}
+
+/*
+ * run set_fsetroot in chroot environment
+ */
+int presto_set_fsetroot_from_ioc(struct dentry *root, char *fsetname,
+                                 unsigned int flags)
+{
+        int rc;
+        struct presto_cache *cache;
+        struct vfsmount *mnt;
+        struct run_ctxt save;
+
+        if (root != root->d_inode->i_sb->s_root) {
+                CERROR ("IOC_SET_FSET must be called on mount point\n");
+                return -ENODEV;
+        }
+
+        cache = presto_get_cache(root->d_inode);
+        mnt = cache->cache_vfsmount;
+        if (!mnt) { 
+                EXIT;
+                return -ENOMEM;
+        }
+        
+        izo_setup_ctxt(root, mnt, &save); 
+        rc = presto_set_fsetroot(root, fsetname, flags);
+        izo_cleanup_ctxt(mnt, &save);
+        return rc;
+}
+
+/* XXX: this function should detect if fsetname is already in use for
+   the cache under root
+*/ 
+int izo_prepare_fileset(struct dentry *root, char *fsetname) 
+{
+        int err;
+        struct dentry *dotizo = NULL, *fsetdir = NULL, *dotiopen = NULL; 
+        struct presto_cache *cache;
+        struct vfsmount *mnt;
+        struct run_ctxt save;
+
+        cache = presto_get_cache(root->d_inode);
+        mnt = cache->cache_vfsmount = izo_alloc_vfsmnt();
+        if (!mnt) { 
+                EXIT;
+                return -ENOMEM;
+        }
+        
+        if (!fsetname) 
+                fsetname = "rootfset"; 
+
+        izo_setup_ctxt(root, mnt, &save); 
+
+        err = izo_simple_mkdir(root, ".intermezzo", 0755);
+        CDEBUG(D_CACHE, "mkdir on .intermezzo err %d\n", err); 
+
+        err = izo_simple_mkdir(root, "..iopen..", 0755);
+        CDEBUG(D_CACHE, "mkdir on ..iopen.. err %d\n", err); 
+
+        dotiopen = lookup_one_len("..iopen..", root, strlen("..iopen.."));
+        if (IS_ERR(dotiopen)) { 
+                EXIT;
+                goto out;
+        }
+        dotiopen->d_inode->i_op = &presto_dir_iops;
+        dput(dotiopen);
+
+
+        dotizo = lookup_one_len(".intermezzo", root, strlen(".intermezzo"));
+        if (IS_ERR(dotizo)) { 
+                EXIT;
+                goto out;
+        }
+
+
+        err = izo_simple_mkdir(dotizo, fsetname, 0755);
+        CDEBUG(D_CACHE, "mkdir err %d\n", err); 
+
+        /* XXX find the dentry of the root of the fileset (root for now) */ 
+        fsetdir = lookup_one_len(fsetname, dotizo, strlen(fsetname));
+        if (IS_ERR(fsetdir)) { 
+                EXIT;
+                goto out;
+        }
+
+        err = izo_simple_symlink(fsetdir, "ROOT", "../.."); 
+
+        /* XXX read flags from flags file */ 
+        err =  presto_set_fsetroot(root, fsetname, 0); 
+        CDEBUG(D_CACHE, "set_fsetroot err %d\n", err); 
+
+ out:
+        if (dotizo && !IS_ERR(dotizo)) 
+                dput(dotizo); 
+        if (fsetdir && !IS_ERR(fsetdir)) 
+                dput(fsetdir); 
+        izo_cleanup_ctxt(mnt, &save);
+        return err; 
+}
+
+int izo_set_fileid(struct file *dir, struct izo_ioctl_data *data)
+{
+        int rc = 0;
+        struct presto_cache *cache;
+        struct vfsmount *mnt;
+        struct run_ctxt save;
+        struct nameidata nd;
+        struct dentry *dentry;
+        struct presto_dentry_data *dd;
+        struct dentry *root;
+        char *buf = NULL; 
+
+        ENTRY;
+
+
+        root = dir->f_dentry;
+
+        /* actually, needs to be called on ROOT of fset, not mount point  
+        if (root != root->d_inode->i_sb->s_root) {
+                CERROR ("IOC_SET_FSET must be called on mount point\n");
+                return -ENODEV;
+        }
+        */
+
+        cache = presto_get_cache(root->d_inode);
+        mnt = cache->cache_vfsmount;
+        if (!mnt) { 
+                EXIT;
+                return -ENOMEM;
+        }
+        
+        izo_setup_ctxt(root, mnt, &save); 
+        
+        PRESTO_ALLOC(buf, data->ioc_plen1);
+        if (!buf) { 
+                rc = -ENOMEM;
+                EXIT;
+                goto out;
+        }
+        if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) { 
+                rc =  -EFAULT;
+                EXIT;
+                goto out;
+        }
+
+        rc = presto_walk(buf, &nd);
+        if (rc) {
+                CERROR("Unable to open: %s\n", buf);
+                EXIT;
+                goto out;
+        }
+        dentry = nd.dentry;
+        if (!dentry) {
+                CERROR("no dentry!\n");
+                rc =  -EINVAL;
+                EXIT;
+                goto out_close;
+        }
+        dd = presto_d2d(dentry);
+        if (!dd) {
+                CERROR("no dentry_data!\n");
+                rc = -EINVAL;
+                EXIT;
+                goto out_close;
+        }
+
+        CDEBUG(D_FILE,"de:%p dd:%p\n", dentry, dd);
+
+        if (dd->remote_ino != 0) {
+                CERROR("remote_ino already set? %Lx:%Lx\n",
+                       (unsigned long long) dd->remote_ino,
+                       (unsigned long long) dd->remote_generation);
+                rc = 0;
+                EXIT;
+                goto out_close;
+        }
+
+
+        CDEBUG(D_FILE,"setting %p %p, %s to %Lx:%Lx\n", dentry, dd, 
+               buf,
+               (unsigned long long) data->ioc_ino,
+               (unsigned long long) data->ioc_generation);
+        dd->remote_ino = data->ioc_ino;
+        dd->remote_generation = data->ioc_generation;
+
+        EXIT;
+ out_close:
+        path_release(&nd);
+ out:
+        if (buf)
+                PRESTO_FREE(buf, data->ioc_plen1);
+        izo_cleanup_ctxt(mnt, &save);
+        return rc;
+}
diff --git a/fs/intermezzo/inode.c b/fs/intermezzo/inode.c
new file mode 100644 (file)
index 0000000..fda188b
--- /dev/null
@@ -0,0 +1,179 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 1996 Peter J. Braam <braam@maths.ox.ac.uk> and
+ *    Michael Callahan <callahan@maths.ox.ac.uk>
+ *  Copyright (C) 1999 Carnegie Mellon University
+ *    Rewritten for Linux 2.1.  Peter Braam <braam@cs.cmu.edu>
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Super block/filesystem wide operations
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/segment.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+extern void presto_free_cache(struct presto_cache *);
+
+void presto_set_ops(struct inode *inode, struct  filter_fs *filter)
+{
+        ENTRY; 
+
+        if (!inode || is_bad_inode(inode))
+                return;
+
+        if (S_ISREG(inode->i_mode)) {
+                if ( !filter_c2cfiops(filter) ) {
+                       filter_setup_file_ops(filter, 
+                                             inode, &presto_file_iops,
+                                             &presto_file_fops);
+                }
+                inode->i_op = filter_c2ufiops(filter);
+                inode->i_fop = filter_c2uffops(filter);
+                CDEBUG(D_INODE, "set file methods for %ld to %p\n",
+                       inode->i_ino, inode->i_op);
+        } else if (S_ISDIR(inode->i_mode)) {
+                inode->i_op = filter_c2udiops(filter);
+                inode->i_fop = filter_c2udfops(filter);
+                CDEBUG(D_INODE, "set dir methods for %ld to %p ioctl %p\n",
+                       inode->i_ino, inode->i_op, inode->i_fop->ioctl);
+        } else if (S_ISLNK(inode->i_mode)) {
+                if ( !filter_c2csiops(filter)) {
+                        filter_setup_symlink_ops(filter, 
+                                                 inode,
+                                                 &presto_sym_iops, 
+                                                 &presto_sym_fops);
+                }
+                inode->i_op = filter_c2usiops(filter);
+                inode->i_fop = filter_c2usfops(filter);
+                CDEBUG(D_INODE, "set link methods for %ld to %p\n",
+                       inode->i_ino, inode->i_op);
+        }
+        EXIT;
+}
+
+void presto_read_inode(struct inode *inode)
+{
+        struct presto_cache *cache;
+
+        cache = presto_get_cache(inode);
+        if ( !cache ) {
+                CERROR("PRESTO: BAD, BAD: cannot find cache\n");
+                make_bad_inode(inode);
+                return ;
+        }
+
+        filter_c2csops(cache->cache_filter)->read_inode(inode);
+
+        CDEBUG(D_INODE, "presto_read_inode: ino %ld, gid %d\n", 
+               inode->i_ino, inode->i_gid);
+
+        presto_set_ops(inode, cache->cache_filter); 
+        /* XXX handle special inodes here or not - probably not? */
+}
+
+static void presto_put_super(struct super_block *sb)
+{
+        struct presto_cache *cache;
+        struct upc_channel *channel;
+        struct super_operations *sops;
+        struct list_head *lh;
+        int err;
+
+        ENTRY;
+        cache = presto_cache_find(sb);
+        if (!cache) {
+                EXIT;
+                goto exit;
+        }
+        channel = &izo_channels[presto_c2m(cache)];
+        sops = filter_c2csops(cache->cache_filter);
+        err = izo_clear_all_fsetroots(cache); 
+        if (err) { 
+                CERROR("%s: err %d\n", __FUNCTION__, err);
+        }
+        PRESTO_FREE(cache->cache_vfsmount, sizeof(struct vfsmount));
+
+        /* look at kill_super - fsync_super is not exported GRRR but 
+           probably not needed */ 
+        unlock_super(sb);
+        shrink_dcache_parent(cache->cache_root); 
+        dput(cache->cache_root); 
+        //fsync_super(sb); 
+        lock_super(sb);
+
+        if (sops->write_super)
+                sops->write_super(sb); 
+
+        if (sops->put_super)
+                sops->put_super(sb);
+
+        /* free any remaining async upcalls when the filesystem is unmounted */
+        spin_lock(&channel->uc_lock);
+        lh = channel->uc_pending.next;
+        while ( lh != &channel->uc_pending) {
+                struct upc_req *req;
+                req = list_entry(lh, struct upc_req, rq_chain);
+
+                /* assignment must be here: we are about to free &lh */
+                lh = lh->next;
+                if ( ! (req->rq_flags & REQ_ASYNC) ) 
+                        continue;
+                list_del(&(req->rq_chain));
+                PRESTO_FREE(req->rq_data, req->rq_bufsize);
+                PRESTO_FREE(req, sizeof(struct upc_req));
+        }
+        list_del(&cache->cache_channel_list); 
+        spin_unlock(&channel->uc_lock);
+
+        presto_free_cache(cache);
+
+exit:
+        CDEBUG(D_MALLOC, "after umount: kmem %ld, vmem %ld\n",
+               presto_kmemory, presto_vmemory);
+        return ;
+}
+
+struct super_operations presto_super_ops = {
+        .read_inode    = presto_read_inode,
+        .put_super     = presto_put_super,
+};
+
+
+/* symlinks can be chowned */
+struct inode_operations presto_sym_iops = {
+        .setattr       = presto_setattr
+};
+
+/* NULL for now */
+struct file_operations presto_sym_fops; 
diff --git a/fs/intermezzo/intermezzo_fs.h b/fs/intermezzo/intermezzo_fs.h
new file mode 100644 (file)
index 0000000..3500365
--- /dev/null
@@ -0,0 +1,923 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2001 Tacitus Systems, Inc.
+ *  Copyright (C) 2000 Stelias Computing, Inc.
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *  Copyright (C) 2000 TurboLinux, Inc.
+ *  Copyright (C) 2000 Los Alamos National Laboratory.
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __INTERMEZZO_FS_H_
+#define __INTERMEZZO_FS_H_ 1
+
+#include "intermezzo_lib.h"
+#include "intermezzo_idl.h"
+
+
+#ifdef __KERNEL__
+typedef __u8 uuid_t[16];
+#else
+# include <uuid/uuid.h>
+#endif
+
+struct lento_vfs_context {
+        __u64 kml_offset;
+        struct timespec updated_time;
+        __u64 remote_ino;
+        __u64 remote_generation;
+        __u32 slot_offset;
+        __u32 recno;
+        __u32 flags;
+        uuid_t uuid;
+        struct presto_version remote_version;
+};
+
+#ifdef __KERNEL__
+# include <linux/smp.h>
+# include <linux/fsfilter.h>
+# include <linux/mount.h>
+# include <linux/slab.h>
+# include <linux/vmalloc.h>
+# include <linux/smp_lock.h>
+
+/* fixups for fs.h */
+# ifndef fs_down
+#  define fs_down(sem) down(sem)
+# endif
+
+# ifndef fs_up
+#  define fs_up(sem) up(sem)
+# endif
+
+# define KML_IDLE                        0
+# define KML_DECODE                      1
+# define KML_OPTIMIZE                    2
+# define KML_REINT                       3
+
+# define KML_OPEN_REINT                  0x0100
+# define KML_REINT_BEGIN                 0x0200
+# define KML_BACKFETCH                   0x0400
+# define KML_REINT_END                   0x0800
+# define KML_CLOSE_REINT                 0x1000
+# define KML_REINT_MAXBUF                (64 * 1024)
+
+# define CACHE_CLIENT_RO       0x4
+# define CACHE_LENTO_RO        0x8
+
+/* global variables */
+extern int presto_debug;
+extern int presto_print_entry;
+extern long presto_kmemory;
+extern long presto_vmemory;
+
+# define PRESTO_DEBUG
+# ifdef PRESTO_DEBUG
+/* debugging masks */
+#  define D_SUPER       1
+#  define D_INODE       2
+#  define D_FILE        4
+#  define D_CACHE       8  /* cache debugging */
+#  define D_MALLOC     16  /* print malloc, de-alloc information */
+#  define D_JOURNAL    32
+#  define D_UPCALL     64  /* up and downcall debugging */
+#  define D_PSDEV     128
+#  define D_PIOCTL    256
+#  define D_SPECIAL   512
+#  define D_TIMING   1024
+#  define D_DOWNCALL 2048
+#  define D_KML      4096
+#  define D_FSDATA   8192
+
+#  define CDEBUG(mask, format, a...)                                    \
+        do {                                                            \
+                if (presto_debug & mask) {                              \
+                        printk("(%s:%s,l. %d %d): " format, __FILE__,   \
+                               __FUNCTION__, __LINE__, current->pid     \
+                               , ## a);                                 \
+                }                                                       \
+        } while (0)
+
+#define CERROR(format, a...)                                            \
+do {                                                                    \
+        printk("(%s:%s,l. %d %d): " format, __FILE__, __FUNCTION__,     \
+               __LINE__, current->pid , ## a);                          \
+} while (0)
+
+#  define ENTRY                                                         \
+        if (presto_print_entry)                                         \
+                printk("Process %d entered %s\n", current->pid, __FUNCTION__)
+
+#  define EXIT                                                          \
+        if (presto_print_entry)                                         \
+                printk("Process %d leaving %s at %d\n", current->pid,   \
+                       __FUNCTION__, __LINE__)
+
+#  define presto_kmem_inc(ptr, size) presto_kmemory += (size)
+#  define presto_kmem_dec(ptr, size) presto_kmemory -= (size)
+#  define presto_vmem_inc(ptr, size) presto_vmemory += (size)
+#  define presto_vmem_dec(ptr, size) presto_vmemory -= (size)
+# else /* !PRESTO_DEBUG */
+#  define CDEBUG(mask, format, a...) do {} while (0)
+#  define ENTRY do {} while (0)
+#  define EXIT do {} while (0)
+#  define presto_kmem_inc(ptr, size) do {} while (0)
+#  define presto_kmem_dec(ptr, size) do {} while (0)
+#  define presto_vmem_inc(ptr, size) do {} while (0)
+#  define presto_vmem_dec(ptr, size) do {} while (0)
+# endif /* PRESTO_DEBUG */
+
+
+struct run_ctxt {
+        struct vfsmount *pwdmnt;
+        struct dentry   *pwd;
+        struct vfsmount *rootmnt;
+        struct dentry   *root;
+        uid_t            fsuid;
+        gid_t            fsgid;
+        mm_segment_t     fs;
+        struct group_info * group_info;
+/*     int              ngroups;
+       gid_t            groups[NGROUPS];*/
+
+};
+
+static inline void push_ctxt(struct run_ctxt *save, struct run_ctxt *new)
+{
+        save->fs = get_fs();
+        save->pwd = dget(current->fs->pwd);
+        save->pwdmnt = mntget(current->fs->pwdmnt);
+        save->fsgid = current->fsgid;
+        save->fsuid = current->fsuid;
+        save->root = current->fs->root;
+        save->rootmnt = current->fs->rootmnt;
+        save->group_info = current->group_info;
+/*      save->ngroups = current->ngroups;
+        for (i = 0; i< current->ngroups; i++) 
+                save->groups[i] = current->groups[i];*/
+
+        set_fs(new->fs);
+        lock_kernel();
+        set_fs_pwd(current->fs, new->pwdmnt, new->pwd);
+        if (new->root)
+                set_fs_root(current->fs, new->rootmnt, new->root);
+        unlock_kernel();
+        current->fsuid = new->fsuid;
+        current->fsgid = new->fsgid;
+        /*if (new->ngroups > 0) {
+                current->ngroups = new->ngroups;
+                for (i = 0; i< new->ngroups; i++) 
+                        current->groups[i] = new->groups[i];
+        }*/
+        current->group_info = new->group_info;
+        
+}
+
+static inline void pop_ctxt(struct run_ctxt *saved)
+{
+        set_fs(saved->fs);
+        lock_kernel();
+        set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
+        if (saved->root)
+                set_fs_root(current->fs, saved->rootmnt, saved->root);
+        unlock_kernel();
+        current->fsuid = saved->fsuid;
+        current->fsgid = saved->fsgid;
+        current->group_info = saved->group_info;
+/*
+        current->ngroups = saved->ngroups;
+        for (i = 0; i< saved->ngroups; i++) 
+                current->groups[i] = saved->groups[i];
+*/
+        mntput(saved->pwdmnt);
+        dput(saved->pwd);
+}
+
+static inline struct presto_dentry_data *presto_d2d(struct dentry *dentry)
+{
+        return (struct presto_dentry_data *)(dentry->d_fsdata);
+}
+
+struct presto_cache {
+        spinlock_t          cache_lock;
+        loff_t              cache_reserved;
+        struct  vfsmount   *cache_vfsmount;
+        struct super_block *cache_sb;
+        struct  dentry     *cache_root;
+        struct list_head    cache_chain; /* for the dev/cache hash */
+
+        int   cache_flags;
+
+        char *cache_type;            /* filesystem type of cache */
+        struct filter_fs *cache_filter;
+
+        struct upc_channel *cache_psdev;  /* points to channel used */
+        struct list_head cache_channel_list; 
+        struct list_head cache_fset_list; /* filesets mounted in cache */
+};
+
+struct presto_log_fd {
+        rwlock_t         fd_lock;
+        loff_t           fd_offset;  /* offset where next record should go */
+        struct file    *fd_file;
+        int             fd_truncating;
+        unsigned int   fd_recno;   /* last recno written */
+        struct list_head  fd_reservations;
+};
+
+/* file sets */
+# define CHUNK_BITS  16
+
+struct presto_file_set {
+        struct list_head fset_list;
+        struct presto_log_fd fset_kml;
+        struct presto_log_fd fset_lml;
+        struct presto_log_fd fset_rcvd;
+        struct list_head *fset_clients;  /* cache of clients */
+        struct dentry *fset_dentry;
+        struct vfsmount *fset_mnt;
+        struct presto_cache *fset_cache;
+
+        unsigned int fset_lento_recno;  /* last recno mentioned to lento */
+        loff_t fset_lento_off;    /* last offset mentioned to lento */
+        loff_t fset_kml_logical_off; /* logical offset of kml file byte 0 */
+        char * fset_name;
+
+        int fset_flags;
+        int fset_chunkbits;
+        char *fset_reint_buf; /* temporary buffer holds kml during reint */
+
+        spinlock_t fset_permit_lock;
+        int fset_permit_count;
+        int fset_permit_upcall_count;
+        /* This queue is used both for processes waiting for the kernel to give
+         * up the permit as well as processes waiting for the kernel to be given
+         * the permit, depending on the state of FSET_HASPERMIT. */
+        wait_queue_head_t fset_permit_queue;
+
+        loff_t  fset_file_maxio;  /* writing more than this causes a close */
+        unsigned long int kml_truncate_size;
+};
+
+/* This is the default number of bytes written before a close is recorded*/
+#define FSET_DEFAULT_MAX_FILEIO (1024<<10)
+
+struct dentry *presto_tmpfs_ilookup(struct inode *dir, struct dentry *dentry, 
+                                    ino_t ino, unsigned int generation);
+struct dentry *presto_iget_ilookup(struct inode *dir, struct dentry *dentry, 
+                                    ino_t ino, unsigned int generation);
+struct dentry *presto_add_ilookup_dentry(struct dentry *parent,
+                                         struct dentry *real);
+
+struct journal_ops {
+        int (*tr_all_data)(struct inode *);
+        loff_t (*tr_avail)(struct presto_cache *fset, struct super_block *);
+        void *(*tr_start)(struct presto_file_set *, struct inode *, int op);
+        void (*tr_commit)(struct presto_file_set *, void *handle);
+        void (*tr_journal_data)(struct inode *);
+        struct dentry *(*tr_ilookup)(struct inode *dir, struct dentry *dentry, ino_t ino, unsigned int generation);
+        struct dentry *(*tr_add_ilookup)(struct dentry *parent, struct dentry *real);
+};
+
+extern struct journal_ops presto_ext2_journal_ops;
+extern struct journal_ops presto_ext3_journal_ops;
+extern struct journal_ops presto_tmpfs_journal_ops;
+extern struct journal_ops presto_xfs_journal_ops;
+extern struct journal_ops presto_reiserfs_journal_ops;
+extern struct journal_ops presto_obdfs_journal_ops;
+
+# define LENTO_FL_KML            0x0001
+# define LENTO_FL_EXPECT         0x0002
+# define LENTO_FL_VFSCHECK       0x0004
+# define LENTO_FL_JUSTLOG        0x0008
+# define LENTO_FL_WRITE_KML      0x0010
+# define LENTO_FL_CANCEL_LML     0x0020
+# define LENTO_FL_WRITE_EXPECT   0x0040
+# define LENTO_FL_IGNORE_TIME    0x0080
+# define LENTO_FL_TOUCH_PARENT   0x0100
+# define LENTO_FL_TOUCH_NEWOBJ   0x0200
+# define LENTO_FL_SET_DDFILEID   0x0400
+
+struct presto_cache *presto_get_cache(struct inode *inode);
+int presto_sprint_mounts(char *buf, int buflen, int minor);
+struct presto_file_set *presto_fset(struct dentry *de);
+int presto_journal(struct dentry *dentry, char *buf, size_t size);
+int presto_fwrite(struct file *file, const char *str, int len, loff_t *off);
+int presto_ispresto(struct inode *);
+
+/* super.c */
+extern struct file_system_type presto_fs_type;
+extern int init_intermezzo_fs(void);
+
+/* fileset.c */
+extern int izo_prepare_fileset(struct dentry *root, char *fsetname);
+char * izo_make_path(struct presto_file_set *fset, char *name);
+struct file *izo_fset_open(struct presto_file_set *fset, char *name, int flags, int mode);
+
+/* psdev.c */
+int izo_psdev_get_free_channel(void);
+int presto_psdev_init(void);
+int izo_psdev_setpid(int minor);
+extern void presto_psdev_cleanup(void);
+int presto_lento_up(int minor);
+int izo_psdev_setchannel(struct file *file, int fd);
+
+/* inode.c */
+extern struct super_operations presto_super_ops;
+void presto_set_ops(struct inode *inode, struct  filter_fs *filter);
+
+/* dcache.c */
+void presto_frob_dop(struct dentry *de);
+char *presto_path(struct dentry *dentry, struct dentry *root,
+                  char *buffer, int buflen);
+struct presto_dentry_data *izo_alloc_ddata(void);
+int presto_set_dd(struct dentry *);
+int presto_init_ddata_cache(void);
+void presto_cleanup_ddata_cache(void);
+extern struct dentry_operations presto_dentry_ops;
+
+/* dir.c */
+extern struct inode_operations presto_dir_iops;
+extern struct inode_operations presto_file_iops;
+extern struct inode_operations presto_sym_iops;
+extern struct file_operations presto_dir_fops;
+extern struct file_operations presto_file_fops;
+extern struct file_operations presto_sym_fops;
+int presto_setattr(struct dentry *de, struct iattr *iattr);
+int presto_settime(struct presto_file_set *fset, struct dentry *newobj,
+                   struct dentry *parent, struct dentry *target,
+                   struct lento_vfs_context *ctx, int valid);
+int presto_ioctl(struct inode *inode, struct file *file,
+                 unsigned int cmd, unsigned long arg);
+
+extern int presto_ilookup_uid;
+# define PRESTO_ILOOKUP_MAGIC "...ino:"
+# define PRESTO_ILOOKUP_SEP ':'
+int izo_dentry_is_ilookup(struct dentry *, ino_t *id, unsigned int *generation);
+struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd);
+
+struct presto_dentry_data {
+        int dd_count; /* how mnay dentries are using this dentry */
+        struct presto_file_set *dd_fset;
+        struct dentry *dd_inodentry; 
+        loff_t dd_kml_offset;
+        int dd_flags;
+        __u64 remote_ino;
+        __u64 remote_generation;
+};
+
+struct presto_file_data {
+        int fd_do_lml;
+        loff_t fd_lml_offset;
+        size_t fd_bytes_written;
+        /* authorization related data of file at open time */
+        uid_t fd_uid;
+        gid_t fd_gid;
+        mode_t fd_mode;
+        /* identification data of calling process */
+        uid_t fd_fsuid;
+        gid_t fd_fsgid;
+        int fd_ngroups;
+        gid_t fd_groups[NGROUPS_SMALL];
+        /* information how to complete the close operation */
+        struct lento_vfs_context fd_info;
+        struct presto_version fd_version;
+};
+
+/* presto.c and Lento::Downcall */
+
+int presto_walk(const char *name, struct nameidata *nd);
+int izo_clear_fsetroot(struct dentry *dentry);
+int izo_clear_all_fsetroots(struct presto_cache *cache);
+int presto_get_kmlsize(char *path, __u64 *size);
+int presto_get_lastrecno(char *path, off_t *size);
+int presto_set_fsetroot(struct dentry *dentry, char *fsetname,
+                       unsigned int flags);
+int presto_set_fsetroot_from_ioc(struct dentry *dentry, char *fsetname,
+                                 unsigned int flags);
+int presto_is_read_only(struct presto_file_set *);
+int presto_truncate_lml(struct presto_file_set *fset);
+int lento_write_lml(char *path,
+                     __u64 remote_ino,
+                     __u32 remote_generation,
+                     __u32 remote_version,
+                    struct presto_version *remote_file_version);
+int lento_complete_closes(char *path);
+int presto_f2m(struct presto_file_set *fset);
+int presto_prep(struct dentry *, struct presto_cache **,
+                       struct presto_file_set **);
+/* cache.c */
+extern struct presto_cache *presto_cache_init(void);
+extern void presto_cache_add(struct presto_cache *cache);
+extern void presto_cache_init_hash(void);
+
+struct presto_cache *presto_cache_find(struct super_block *sb);
+
+#define PRESTO_REQLOW  (3 * 4096)
+#define PRESTO_REQHIGH (6 * 4096)
+void presto_release_space(struct presto_cache *cache, loff_t req);
+int presto_reserve_space(struct presto_cache *cache, loff_t req);
+
+#define PRESTO_DATA             0x00000002 /* cached data is valid */
+#define PRESTO_ATTR             0x00000004 /* attributes cached */
+#define PRESTO_DONT_JOURNAL     0x00000008 /* things like .intermezzo/ */
+
+struct presto_file_set *presto_path2fileset(const char *name);
+int izo_revoke_permit(struct dentry *, uuid_t uuid);
+int presto_chk(struct dentry *dentry, int flag);
+void presto_set(struct dentry *dentry, int flag);
+int presto_get_permit(struct inode *inode);
+int presto_put_permit(struct inode *inode);
+int presto_set_max_kml_size(const char *path, unsigned long max_size);
+int izo_mark_dentry(struct dentry *dentry, int and, int or, int *res);
+int izo_mark_cache(struct dentry *dentry, int and_bits, int or_bits, int *);
+int izo_mark_fset(struct dentry *dentry, int and_bits, int or_bits, int *);
+void presto_getversion(struct presto_version *pv, struct inode *inode);
+int presto_i2m(struct inode *inode);
+int presto_c2m(struct presto_cache *cache);
+
+
+/* file.c */
+int izo_purge_file(struct presto_file_set *fset, char *file);
+int presto_adjust_lml(struct file *file, struct lento_vfs_context *info);
+
+/* journal.c */
+struct rec_info {
+        loff_t offset;
+        int size;
+        int recno;
+        int is_kml;
+};
+
+void presto_trans_commit(struct presto_file_set *fset, void *handle);
+void *presto_trans_start(struct presto_file_set *fset, struct inode *inode,
+                         int op);
+int presto_fread(struct file *file, char *str, int len, loff_t *off);
+int presto_clear_lml_close(struct presto_file_set *fset,
+                           loff_t  lml_offset);
+int presto_complete_lml(struct presto_file_set *fset);
+int presto_read_kml_logical_offset(struct rec_info *recinfo,
+                                   struct presto_file_set *fset);
+int presto_write_kml_logical_offset(struct presto_file_set *fset);
+struct file *presto_copy_kml_tail(struct presto_file_set *fset,
+                                  unsigned long int start);
+int presto_finish_kml_truncate(struct presto_file_set *fset,
+                               unsigned long int offset);
+int izo_lookup_file(struct presto_file_set *fset, char *path,
+                    struct nameidata *nd);
+int izo_do_truncate(struct presto_file_set *fset, struct dentry *dentry,
+                    loff_t length,  loff_t size_check);
+int izo_log_close(struct presto_log_fd *logfd);
+struct file *izo_log_open(struct presto_file_set *fset, char *name, int flags);
+int izo_init_kml_file(struct presto_file_set *, struct presto_log_fd *);
+int izo_init_lml_file(struct presto_file_set *, struct presto_log_fd *);
+int izo_init_last_rcvd_file(struct presto_file_set *, struct presto_log_fd *);
+
+/* vfs.c */
+
+/* Extra data needed in the KML for rollback operations; this structure is
+ * passed around during the KML-writing process. */
+struct izo_rollback_data {
+        __u32 rb_mode;
+        __u32 rb_rdev;
+        __u64 rb_uid;
+        __u64 rb_gid;
+};
+
+int presto_write_last_rcvd(struct rec_info *recinfo,
+                           struct presto_file_set *fset,
+                           struct lento_vfs_context *info);
+void izo_get_rollback_data(struct inode *inode, struct izo_rollback_data *rb);
+int presto_do_close(struct presto_file_set *fset, struct file *file);
+int presto_do_setattr(struct presto_file_set *fset, struct dentry *dentry,
+                      struct iattr *iattr, struct lento_vfs_context *info);
+int presto_do_create(struct presto_file_set *fset, struct dentry *dir,
+                     struct dentry *dentry, int mode,
+                     struct lento_vfs_context *info);
+int presto_do_link(struct presto_file_set *fset, struct dentry *dir,
+                   struct dentry *old_dentry, struct dentry *new_dentry,
+                   struct lento_vfs_context *info);
+int presto_do_unlink(struct presto_file_set *fset, struct dentry *dir,
+                     struct dentry *dentry, struct lento_vfs_context *info);
+int presto_do_symlink(struct presto_file_set *fset, struct dentry *dir,
+                      struct dentry *dentry, const char *name,
+                      struct lento_vfs_context *info);
+int presto_do_mkdir(struct presto_file_set *fset, struct dentry *dir,
+                    struct dentry *dentry, int mode,
+                    struct lento_vfs_context *info);
+int presto_do_rmdir(struct presto_file_set *fset, struct dentry *dir,
+                    struct dentry *dentry, struct lento_vfs_context *info);
+int presto_do_mknod(struct presto_file_set *fset, struct dentry *dir,
+                    struct dentry *dentry, int mode, dev_t dev,
+                    struct lento_vfs_context *info);
+int do_rename(struct presto_file_set *fset, struct dentry *old_dir,
+              struct dentry *old_dentry, struct dentry *new_dir,
+              struct dentry *new_dentry, struct lento_vfs_context *info);
+int presto_do_statfs (struct presto_file_set *fset,
+                      struct kstatfs * buf);
+
+int lento_setattr(const char *name, struct iattr *iattr,
+                  struct lento_vfs_context *info);
+int lento_create(const char *name, int mode, struct lento_vfs_context *info);
+int lento_link(const char *oldname, const char *newname,
+               struct lento_vfs_context *info);
+int lento_unlink(const char *name, struct lento_vfs_context *info);
+int lento_symlink(const char *oldname,const char *newname,
+                  struct lento_vfs_context *info);
+int lento_mkdir(const char *name, int mode, struct lento_vfs_context *info);
+int lento_rmdir(const char *name, struct lento_vfs_context *info);
+int lento_mknod(const char *name, int mode, dev_t dev,
+                struct lento_vfs_context *info);
+int lento_rename(const char *oldname, const char *newname,
+                 struct lento_vfs_context *info);
+int lento_iopen(const char *name, ino_t ino, unsigned int generation,int flags);
+
+/* journal.c */
+
+#define JOURNAL_PAGE_SZ  PAGE_SIZE
+
+int presto_no_journal(struct presto_file_set *fset);
+int journal_fetch(int minor);
+int presto_log(struct presto_file_set *fset, struct rec_info *rec,
+               const char *buf, size_t size,
+               const char *string1, int len1, 
+               const char *string2, int len2,
+               const char *string3, int len3);
+int presto_get_fileid(int minor, struct presto_file_set *fset,
+                      struct dentry *dentry);
+int presto_journal_setattr(struct rec_info *rec, struct presto_file_set *fset,
+                           struct dentry *dentry, struct presto_version *old_ver,
+                           struct izo_rollback_data *, struct iattr *iattr);
+int presto_journal_create(struct rec_info *rec, struct presto_file_set *fset,
+                          struct dentry *dentry,
+                          struct presto_version *tgt_dir_ver,
+                          struct presto_version *new_file_ver, int mode);
+int presto_journal_link(struct rec_info *rec, struct presto_file_set *fset,
+                        struct dentry *src, struct dentry *tgt,
+                        struct presto_version *tgt_dir_ver,
+                        struct presto_version *new_link_ver);
+int presto_journal_unlink(struct rec_info *rec, struct presto_file_set *fset,
+                          struct dentry *dir,
+                          struct presto_version *tgt_dir_ver,
+                          struct presto_version *old_file_ver,
+                          struct izo_rollback_data *, struct dentry *dentry,
+                          char *old_target, int old_targetlen);
+int presto_journal_symlink(struct rec_info *rec, struct presto_file_set *fset,
+                           struct dentry *dentry, const char *target,
+                           struct presto_version *tgt_dir_ver,
+                           struct presto_version *new_link_ver);
+int presto_journal_mkdir(struct rec_info *rec, struct presto_file_set *fset,
+                         struct dentry *dentry,
+                         struct presto_version *tgt_dir_ver,
+                         struct presto_version *new_dir_ver, int mode);
+int presto_journal_rmdir(struct rec_info *rec, struct presto_file_set *fset,
+                         struct dentry *dentry,
+                         struct presto_version *tgt_dir_ver,
+                         struct presto_version *old_dir_ver,
+                         struct izo_rollback_data *, int len, const char *name);
+int presto_journal_mknod(struct rec_info *rec, struct presto_file_set *fset,
+                         struct dentry *dentry,
+                         struct presto_version *tgt_dir_ver,
+                         struct presto_version *new_node_ver, int mode,
+                         int dmajor, int dminor);
+int presto_journal_rename(struct rec_info *rec, struct presto_file_set *fset,
+                          struct dentry *src, struct dentry *tgt,
+                          struct presto_version *src_dir_ver,
+                          struct presto_version *tgt_dir_ver);
+int presto_journal_open(struct rec_info *, struct presto_file_set *,
+                        struct dentry *, struct presto_version *old_ver);
+int presto_journal_close(struct rec_info *rec, struct presto_file_set *,
+                         struct presto_file_data *, struct dentry *,
+                         struct presto_version *old_file_ver,
+                         struct presto_version *new_file_ver);
+int presto_write_lml_close(struct rec_info *rec,
+                           struct presto_file_set *fset, 
+                           struct file *file,
+                           __u64 remote_ino,
+                           __u64 remote_generation,
+                           struct presto_version *remote_version,
+                           struct presto_version *new_file_ver);
+void presto_log_op(void *data, int len);
+loff_t presto_kml_offset(struct presto_file_set *fset);
+
+/* upcall.c */
+#define SYNCHRONOUS 0
+#define ASYNCHRONOUS 1
+/* asynchronous calls */
+int izo_upc_kml(int minor, __u64 offset, __u32 first_recno, __u64 length,
+                __u32 last_recno, char *fsetname);
+int izo_upc_kml_truncate(int minor, __u64 length, __u32 last_recno,
+                         char *fsetname);
+int izo_upc_go_fetch_kml(int minor, char *fsetname, uuid_t uuid, __u64 kmlsize);
+int izo_upc_backfetch(int minor, char *path, char *fileset, 
+                      struct lento_vfs_context *);
+
+/* synchronous calls */
+int izo_upc_get_fileid(int minor, __u32 reclen, char *rec, 
+                       __u32 pathlen, char *path, char *fsetname);
+int izo_upc_permit(int minor, struct dentry *, __u32 pathlen, char *path,
+                   char *fset);
+int izo_upc_open(int minor, __u32 pathlen, char *path, char *fsetname, 
+                 struct lento_vfs_context *info);
+int izo_upc_connect(int minor, __u64 ip_address, __u64 port, __u8 uuid[16],
+                    int client_flag);
+int izo_upc_revoke_permit(int minor, char *fsetname, uuid_t uuid);
+int izo_upc_set_kmlsize(int minor, char *fsetname, uuid_t uuid, __u64 kmlsize);
+int izo_upc_client_make_branch(int minor, char *fsetname);
+int izo_upc_server_make_branch(int minor, char *fsetname);
+int izo_upc_branch_undo(int minor, char *fsetname, char *branchname);
+int izo_upc_branch_redo(int minor, char *fsetname, char *branchname);
+int izo_upc_repstatus(int minor,  char * fsetname, struct izo_rcvd_rec *lr_server);
+
+/* general mechanism */
+int izo_upc_upcall(int minor, int *size, struct izo_upcall_hdr *, int async);
+
+/* replicator.c */
+int izo_repstatus(struct presto_file_set *fset, __u64 client_kmlsize, 
+                  struct izo_rcvd_rec *lr_client, struct izo_rcvd_rec *lr_server);
+int izo_rep_cache_init(struct presto_file_set *);
+loff_t izo_rcvd_get(struct izo_rcvd_rec *, struct presto_file_set *, char *uuid);
+loff_t izo_rcvd_write(struct presto_file_set *, struct izo_rcvd_rec *);
+loff_t izo_rcvd_upd_remote(struct presto_file_set *fset, char * uuid,  __u64 remote_recno,
+                           __u64 remote_offset);
+
+int izo_ioctl_packlen(struct izo_ioctl_data *data);
+
+/* sysctl.c */
+int init_intermezzo_sysctl(void);
+void cleanup_intermezzo_sysctl(void);
+
+/* ext_attr.c */
+/* We will be more tolerant than the default ea patch with attr name sizes and
+ * the size of value. If these come via VFS from the default ea patches, the
+ * corresponding character strings will be truncated anyway. During journalling- * we journal length for both name and value. See journal_set_ext_attr.
+ */
+#define PRESTO_EXT_ATTR_NAME_MAX 128
+#define PRESTO_EXT_ATTR_VALUE_MAX 8192
+
+#define PRESTO_ALLOC(ptr, size)                                         \
+do {                                                                    \
+        long s = (size);                                                \
+        (ptr) = kmalloc(s, GFP_KERNEL);                                 \
+        if ((ptr) == NULL)                                              \
+                CERROR("IZO: out of memory at %s:%d (trying to "        \
+                       "allocate %ld)\n", __FILE__, __LINE__, s);       \
+        else {                                                          \
+                presto_kmem_inc((ptr), s);                              \
+                memset((ptr), 0, s);                                    \
+        }                                                               \
+        CDEBUG(D_MALLOC, "kmalloced: %ld at %p (tot %ld).\n",           \
+               s, (ptr), presto_kmemory);                               \
+} while (0)
+
+#define PRESTO_FREE(ptr, size)                                          \
+do {                                                                    \
+        long s = (size);                                                \
+        if ((ptr) == NULL) {                                            \
+                CERROR("IZO: free NULL pointer (%ld bytes) at "         \
+                       "%s:%d\n", s, __FILE__, __LINE__);               \
+                break;                                                  \
+        }                                                               \
+        kfree(ptr);                                                     \
+        CDEBUG(D_MALLOC, "kfreed: %ld at %p (tot %ld).\n",              \
+               s, (ptr), presto_kmemory);                               \
+        presto_kmem_dec((ptr), s);                                      \
+} while (0)
+
+static inline int dentry_name_cmp(struct dentry *dentry, char *name)
+{
+        return (strlen(name) == dentry->d_name.len &&
+                memcmp(name, dentry->d_name.name, dentry->d_name.len) == 0);
+}
+
+static inline char *strdup(char *str)
+{
+        char *tmp;
+        tmp = kmalloc(strlen(str) + 1, GFP_KERNEL);
+        if (tmp)
+                memcpy(tmp, str, strlen(str) + 1);
+               
+        return tmp;
+}
+
+static inline int izo_ioctl_is_invalid(struct izo_ioctl_data *data)
+{
+        if (data->ioc_len > (1<<30)) {
+                CERROR("IZO ioctl: ioc_len larger than 1<<30\n");
+                return 1;
+        }
+        if (data->ioc_inllen1 > (1<<30)) {
+                CERROR("IZO ioctl: ioc_inllen1 larger than 1<<30\n");
+                return 1;
+        }
+        if (data->ioc_inllen2 > (1<<30)) {
+                CERROR("IZO ioctl: ioc_inllen2 larger than 1<<30\n");
+                return 1;
+        }
+        if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
+                CERROR("IZO ioctl: inlbuf1 pointer but 0 length\n");
+                return 1;
+        }
+        if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
+                CERROR("IZO ioctl: inlbuf2 pointer but 0 length\n");
+                return 1;
+        }
+        if (data->ioc_pbuf1 && !data->ioc_plen1) {
+                CERROR("IZO ioctl: pbuf1 pointer but 0 length\n");
+                return 1;
+        }
+        if (data->ioc_pbuf2 && !data->ioc_plen2) {
+                CERROR("IZO ioctl: pbuf2 pointer but 0 length\n");
+                return 1;
+        }
+        if (izo_ioctl_packlen(data) != data->ioc_len ) {
+                CERROR("IZO ioctl: packlen exceeds ioc_len\n");
+                return 1;
+        }
+        if (data->ioc_inllen1 &&
+            data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
+                CERROR("IZO ioctl: inlbuf1 not 0 terminated\n");
+                return 1;
+        }
+        if (data->ioc_inllen2 &&
+            data->ioc_bulk[size_round(data->ioc_inllen1) + data->ioc_inllen2
+                           - 1] != '\0') {
+                CERROR("IZO ioctl: inlbuf2 not 0 terminated\n");
+                return 1;
+        }
+        return 0;
+}
+
+/* buffer MUST be at least the size of izo_ioctl_hdr */
+static inline int izo_ioctl_getdata(char *buf, char *end, void *arg)
+{
+        struct izo_ioctl_hdr *hdr;
+        struct izo_ioctl_data *data;
+        int err;
+        ENTRY;
+
+        hdr = (struct izo_ioctl_hdr *)buf;
+        data = (struct izo_ioctl_data *)buf;
+
+        err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
+        if ( err ) {
+                EXIT;
+                return err;
+        }
+
+        if (hdr->ioc_version != IZO_IOCTL_VERSION) {
+                CERROR("IZO: version mismatch kernel vs application\n");
+                return -EINVAL;
+        }
+
+        if (hdr->ioc_len + buf >= end) {
+                CERROR("IZO: user buffer exceeds kernel buffer\n");
+                return -EINVAL;
+        }
+
+        if (hdr->ioc_len < sizeof(struct izo_ioctl_data)) {
+                CERROR("IZO: user buffer too small for ioctl\n");
+                return -EINVAL;
+        }
+
+        err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
+        if ( err ) {
+                EXIT;
+                return err;
+        }
+
+        if (izo_ioctl_is_invalid(data)) {
+                CERROR("IZO: ioctl not correctly formatted\n");
+                return -EINVAL;
+        }
+
+        if (data->ioc_inllen1) {
+                data->ioc_inlbuf1 = &data->ioc_bulk[0];
+        }
+
+        if (data->ioc_inllen2) {
+                data->ioc_inlbuf2 = &data->ioc_bulk[0] +
+                        size_round(data->ioc_inllen1);
+        }
+
+        EXIT;
+        return 0;
+}
+
+# define MYPATHLEN(buffer, path) ((buffer) + PAGE_SIZE - (path))
+
+# define free kfree
+# define malloc(a) kmalloc(a, GFP_KERNEL)
+# define printf printk
+int kml_reint_rec(struct file *dir, struct izo_ioctl_data *data);
+int izo_get_fileid(struct file *dir, struct izo_ioctl_data *data);
+int izo_set_fileid(struct file *dir, struct izo_ioctl_data *data);
+
+#else /* __KERNEL__ */
+# include <stdlib.h>
+# include <stdio.h>
+# include <sys/types.h>
+# include <sys/ioctl.h>
+# include <string.h>
+
+# define printk printf
+# ifndef CERROR
+#   define CERROR printf
+# endif
+# define kmalloc(a,b) malloc(a)
+
+void init_fsreintdata (void);
+int kml_fsreint(struct kml_rec *rec, char *basedir);
+int kml_iocreint(__u32 size, char *ptr, __u32 offset, int dird,
+                 uuid_t uuid, __u32 generate_kml);
+
+static inline void izo_ioctl_init(struct izo_ioctl_data *data)
+{
+        memset(data, 0, sizeof(*data));
+        data->ioc_len = sizeof(*data);
+        data->ioc_version = IZO_IOCTL_VERSION;
+}
+
+static inline int
+izo_ioctl_pack(struct izo_ioctl_data *data, char **pbuf, int max)
+{
+        char *ptr;
+        struct izo_ioctl_data *overlay;
+        data->ioc_len = izo_ioctl_packlen(data);
+        data->ioc_version = IZO_IOCTL_VERSION;
+
+        if (*pbuf && izo_ioctl_packlen(data) > max)
+                return 1;
+        if (*pbuf == NULL)
+                *pbuf = malloc(data->ioc_len);
+        if (*pbuf == NULL)
+                return 1;
+        overlay = (struct izo_ioctl_data *)*pbuf;
+        memcpy(*pbuf, data, sizeof(*data));
+
+        ptr = overlay->ioc_bulk;
+        if (data->ioc_inlbuf1)
+                LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
+        if (data->ioc_inlbuf2)
+                LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
+        if (izo_ioctl_is_invalid(overlay))
+                return 1;
+
+        return 0;
+}
+
+#endif /* __KERNEL__*/
+
+#define IZO_ERROR_NAME 1
+#define IZO_ERROR_UPDATE 2
+#define IZO_ERROR_DELETE 3
+#define IZO_ERROR_RENAME 4
+
+static inline char *izo_error(int err)
+{
+#ifndef __KERNEL__
+        if (err <= 0)
+                return strerror(-err);
+#endif
+        switch (err) {
+        case IZO_ERROR_NAME:
+                return "InterMezzo name/name conflict";
+        case IZO_ERROR_UPDATE:
+                return "InterMezzo update/update conflict";
+        case IZO_ERROR_DELETE:
+                return "InterMezzo update/delete conflict";
+        case IZO_ERROR_RENAME:
+                return "InterMezzo rename/rename conflict";
+        }
+        return "Unknown InterMezzo error";
+}
+
+/* kml_unpack.c */
+char *kml_print_rec(struct kml_rec *rec, int brief);
+int kml_unpack(struct kml_rec *rec, char **buf, char *end);
+
+/* fs 2.5 compat */
+
+/* is_read_only() is replaced by bdev_read_only which takes struct
+   block_device *.  Since this is only needed for debugging, it can be
+   safely ignored now.
+*/
+#define is_read_only(dev) 0
+
+#endif
diff --git a/fs/intermezzo/intermezzo_idl.h b/fs/intermezzo/intermezzo_idl.h
new file mode 100644 (file)
index 0000000..4371b16
--- /dev/null
@@ -0,0 +1,304 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2001 Tacit Networks, Inc.
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __INTERMEZZO_IDL_H__
+#define __INTERMEZZO_IDL_H__
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/* this file contains all data structures used in InterMezzo's interfaces:
+ * - upcalls
+ * - ioctl's
+ * - KML records
+ * - RCVD records
+ * - rpc's
+ */ 
+
+/* UPCALL */
+#define INTERMEZZO_MINOR 248   
+
+
+#define IZO_UPC_VERSION 0x00010002
+#define IZO_UPC_PERMIT        1
+#define IZO_UPC_CONNECT       2
+#define IZO_UPC_GO_FETCH_KML  3
+#define IZO_UPC_OPEN          4
+#define IZO_UPC_REVOKE_PERMIT 5
+#define IZO_UPC_KML           6
+#define IZO_UPC_BACKFETCH     7
+#define IZO_UPC_KML_TRUNC     8
+#define IZO_UPC_SET_KMLSIZE   9
+#define IZO_UPC_BRANCH_UNDO   10
+#define IZO_UPC_BRANCH_REDO   11
+#define IZO_UPC_GET_FILEID    12
+#define IZO_UPC_CLIENT_MAKE_BRANCH    13
+#define IZO_UPC_SERVER_MAKE_BRANCH    14
+#define IZO_UPC_REPSTATUS    15
+
+#define IZO_UPC_LARGEST_OPCODE 15
+
+struct izo_upcall_hdr {
+        __u32 u_len;
+        __u32 u_version;
+        __u32 u_opc;
+        __u32 u_uniq;
+        __u32 u_pid;
+        __u32 u_uid;
+        __u32 u_pathlen;
+        __u32 u_fsetlen;
+        __u64 u_offset;
+        __u64 u_length;
+        __u32 u_first_recno;
+        __u32 u_last_recno;
+        __u32 u_async;
+        __u32 u_reclen;
+        __u8  u_uuid[16];
+};
+
+/* This structure _must_ sit at the beginning of the buffer */
+struct izo_upcall_resp {
+        __u32 opcode;
+        __u32 unique;    
+        __u32 result;
+};
+
+
+/* IOCTL */
+
+#define IZO_IOCTL_VERSION 0x00010003
+
+/* maximum size supported for ioc_pbuf1 */
+#define KML_MAX_BUF (64*1024)
+
+struct izo_ioctl_hdr { 
+        __u32  ioc_len;
+        __u32  ioc_version;
+};
+
+struct izo_ioctl_data {
+        __u32 ioc_len;
+        __u32 ioc_version;
+        __u32 ioc_izodev;
+        __u32 ioc_kmlrecno;
+        __u64 ioc_kmlsize;
+        __u32 ioc_flags;
+        __s32 ioc_inofd;
+        __u64 ioc_ino;
+        __u64 ioc_generation;
+        __u32 ioc_mark_what;
+        __u32 ioc_and_flag;
+        __u32 ioc_or_flag;
+        __u32 ioc_dev;
+        __u32 ioc_offset;
+        __u32 ioc_slot;
+        __u64 ioc_uid;
+        __u8  ioc_uuid[16];
+
+        __u32 ioc_inllen1;   /* path */
+        char *ioc_inlbuf1;
+        __u32 ioc_inllen2;   /* fileset */
+        char *ioc_inlbuf2;
+
+        __u32 ioc_plen1;     /* buffers in user space (KML) */
+        char *ioc_pbuf1;
+        __u32 ioc_plen2;     /* buffers in user space (KML) */
+        char *ioc_pbuf2;
+
+        char  ioc_bulk[0];
+};
+
+#define IZO_IOC_DEVICE          _IOW ('p',0x50, void *)
+#define IZO_IOC_REINTKML        _IOW ('p',0x51, void *)
+#define IZO_IOC_GET_RCVD        _IOW ('p',0x52, void *)
+#define IZO_IOC_SET_IOCTL_UID   _IOW ('p',0x53, void *)
+#define IZO_IOC_GET_KML_SIZE    _IOW ('p',0x54, void *)
+#define IZO_IOC_PURGE_FILE_DATA _IOW ('p',0x55, void *)
+#define IZO_IOC_CONNECT         _IOW ('p',0x56, void *)
+#define IZO_IOC_GO_FETCH_KML    _IOW ('p',0x57, void *)
+#define IZO_IOC_MARK            _IOW ('p',0x58, void *)
+#define IZO_IOC_CLEAR_FSET      _IOW ('p',0x59, void *)
+#define IZO_IOC_CLEAR_ALL_FSETS _IOW ('p',0x60, void *)
+#define IZO_IOC_SET_FSET        _IOW ('p',0x61, void *)
+#define IZO_IOC_REVOKE_PERMIT   _IOW ('p',0x62, void *)
+#define IZO_IOC_SET_KMLSIZE     _IOW ('p',0x63, void *)
+#define IZO_IOC_CLIENT_MAKE_BRANCH _IOW ('p',0x64, void *)
+#define IZO_IOC_SERVER_MAKE_BRANCH _IOW ('p',0x65, void *)
+#define IZO_IOC_BRANCH_UNDO    _IOW ('p',0x66, void *)
+#define IZO_IOC_BRANCH_REDO    _IOW ('p',0x67, void *)
+#define IZO_IOC_SET_PID        _IOW ('p',0x68, void *)
+#define IZO_IOC_SET_CHANNEL    _IOW ('p',0x69, void *)
+#define IZO_IOC_GET_CHANNEL    _IOW ('p',0x70, void *)
+#define IZO_IOC_GET_FILEID    _IOW ('p',0x71, void *)
+#define IZO_IOC_ADJUST_LML    _IOW ('p',0x72, void *)
+#define IZO_IOC_SET_FILEID    _IOW ('p',0x73, void *)
+#define IZO_IOC_REPSTATUS    _IOW ('p',0x74, void *)
+
+/* marking flags for fsets */
+#define FSET_CLIENT_RO        0x00000001
+#define FSET_LENTO_RO         0x00000002
+#define FSET_HASPERMIT        0x00000004 /* we have a permit to WB */
+#define FSET_INSYNC           0x00000008 /* this fileset is in sync */
+#define FSET_PERMIT_WAITING   0x00000010 /* Lento is waiting for permit */
+#define FSET_STEAL_PERMIT     0x00000020 /* take permit if Lento is dead */
+#define FSET_JCLOSE_ON_WRITE  0x00000040 /* Journal closes on writes */
+#define FSET_DATA_ON_DEMAND   0x00000080 /* update data on file_open() */
+#define FSET_PERMIT_EXCLUSIVE 0x00000100 /* only one permitholder allowed */
+#define FSET_HAS_BRANCHES     0x00000200 /* this fileset contains branches */
+#define FSET_IS_BRANCH        0x00000400 /* this fileset is a branch */
+#define FSET_FLAT_BRANCH      0x00000800 /* this fileset is ROOT with branches */
+
+/* what to mark indicator (ioctl parameter) */
+#define MARK_DENTRY   101
+#define MARK_FSET     102
+#define MARK_CACHE    103
+#define MARK_GETFL    104
+
+/* KML */
+
+#define KML_MAJOR_VERSION 0x00010000
+#define KML_MINOR_VERSION 0x00000002
+#define KML_OPCODE_NOOP          0
+#define KML_OPCODE_CREATE        1
+#define KML_OPCODE_MKDIR         2
+#define KML_OPCODE_UNLINK        3
+#define KML_OPCODE_RMDIR         4
+#define KML_OPCODE_CLOSE         5
+#define KML_OPCODE_SYMLINK       6
+#define KML_OPCODE_RENAME        7
+#define KML_OPCODE_SETATTR       8
+#define KML_OPCODE_LINK          9
+#define KML_OPCODE_OPEN          10
+#define KML_OPCODE_MKNOD         11
+#define KML_OPCODE_WRITE         12
+#define KML_OPCODE_RELEASE       13
+#define KML_OPCODE_TRUNC         14
+#define KML_OPCODE_SETEXTATTR    15
+#define KML_OPCODE_DELEXTATTR    16
+#define KML_OPCODE_KML_TRUNC     17
+#define KML_OPCODE_GET_FILEID    18
+#define KML_OPCODE_NUM           19
+/* new stuff */
+struct presto_version {
+        __u32 pv_mtime_sec;
+        __u32 pv_mtime_nsec;
+        __u32 pv_ctime_sec;
+        __u32 pv_ctime_nsec;
+        __u64 pv_size;
+};
+
+struct kml_prefix_hdr {
+        __u32                    len;
+        __u32                    version;
+        __u32                    pid;
+        __u32                    auid;
+        __u32                    fsuid;
+        __u32                    fsgid;
+        __u32                    opcode;
+        __u32                    ngroups;
+};
+
+struct kml_prefix { 
+        struct kml_prefix_hdr    *hdr;
+        __u32                    *groups;
+};
+
+struct kml_suffix { 
+        __u32                    prevrec;
+        __u32                    recno;
+        __u32                    time;
+        __u32                    len;
+};
+
+struct kml_rec {
+        char                   *buf;
+        struct kml_prefix       prefix;
+        __u64                   offset;
+        char                   *path;
+        int                     pathlen;
+        char                   *name;
+        int                     namelen;
+        char                   *target;
+        int                     targetlen;
+        struct presto_version  *old_objectv;
+        struct presto_version  *new_objectv;
+        struct presto_version  *old_parentv;
+        struct presto_version  *new_parentv;
+        struct presto_version  *old_targetv;
+        struct presto_version  *new_targetv;
+        __u32                   valid;
+        __u32                   mode;
+        __u32                   uid;
+        __u32                   gid;
+        __u64                   size;
+        __u32                   mtime_sec;
+        __u32                   mtime_nsec;
+        __u32                   ctime_sec;
+        __u32                   ctime_nsec;
+        __u32                   flags;
+        __u32                   ino;
+        __u32                   rdev;
+        __u32                   major;
+        __u32                   minor;
+        __u32                   generation;
+        __u32                   old_mode;
+        __u32                   old_rdev;
+        __u64                   old_uid;
+        __u64                   old_gid;
+        char                   *old_target;
+        int                     old_targetlen;
+        struct kml_suffix      *suffix;
+};
+
+
+/* RCVD */ 
+
+/* izo_rcvd_rec fills the .intermezzo/fset/last_rcvd file and provides data about
+ * our view of reintegration offsets for a given peer.
+ *
+ * The only exception is the last_rcvd record which has a UUID consisting of all
+ * zeroes; this record's lr_local_offset field is the logical byte offset of our
+ * KML, which is updated when KML truncation takes place.  All other fields are
+ * reserved. */
+
+/* XXX - document how clean shutdowns are recorded */
+
+struct izo_rcvd_rec { 
+        __u8    lr_uuid[16];       /* which peer? */
+        __u64   lr_remote_recno;   /* last confirmed remote recno  */
+        __u64   lr_remote_offset;  /* last confirmed remote offset */
+        __u64   lr_local_recno;    /* last locally reinted recno   */
+        __u64   lr_local_offset;   /* last locally reinted offset  */
+        __u64   lr_last_ctime;     /* the largest ctime that has reintegrated */
+};
+
+/* Cache purge database
+ *
+ * Each DB entry is this structure followed by the path name, no trailing NUL. */
+struct izo_purge_entry {
+        __u64 p_atime;
+        __u32 p_pathlen;
+};
+
+/* RPC */
+
+#endif
diff --git a/fs/intermezzo/intermezzo_journal.h b/fs/intermezzo/intermezzo_journal.h
new file mode 100644 (file)
index 0000000..99d588d
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef __PRESTO_JOURNAL_H
+#define __PRESTO_JOURNAL_H
+
+
+struct journal_prefix {
+       int len;
+        u32 version;
+       int pid;
+       int uid;
+       int fsuid;
+       int fsgid;
+       int opcode;
+        u32 ngroups;
+        u32 groups[0];
+};
+
+struct journal_suffix {
+       unsigned long prevrec;  /* offset of previous record for dentry */
+       int recno;
+       int time;
+       int len;
+};
+
+#endif
diff --git a/fs/intermezzo/intermezzo_kml.h b/fs/intermezzo/intermezzo_kml.h
new file mode 100644 (file)
index 0000000..ca612e6
--- /dev/null
@@ -0,0 +1,260 @@
+#ifndef __INTERMEZZO_KML_H
+#define __INTERMEZZO_KML_H
+
+#include "intermezzo_psdev.h"
+#include <linux/fs.h>
+#include "intermezzo_journal.h"
+
+#define PRESTO_KML_MAJOR_VERSION 0x00010000
+#define PRESTO_KML_MINOR_VERSION 0x00002001
+#define PRESTO_OP_NOOP          0
+#define PRESTO_OP_CREATE        1
+#define PRESTO_OP_MKDIR         2
+#define PRESTO_OP_UNLINK        3
+#define PRESTO_OP_RMDIR         4
+#define PRESTO_OP_CLOSE         5
+#define PRESTO_OP_SYMLINK       6
+#define PRESTO_OP_RENAME        7
+#define PRESTO_OP_SETATTR       8
+#define PRESTO_OP_LINK          9
+#define PRESTO_OP_OPEN          10
+#define PRESTO_OP_MKNOD         11
+#define PRESTO_OP_WRITE         12
+#define PRESTO_OP_RELEASE       13
+#define PRESTO_OP_TRUNC         14
+#define PRESTO_OP_SETEXTATTR    15
+#define PRESTO_OP_DELEXTATTR    16
+
+#define PRESTO_LML_DONE        1 /* flag to get first write to do LML */
+#define KML_KOP_MARK            0xffff
+
+struct presto_lml_data {
+        loff_t   rec_offset;
+};
+
+struct big_journal_prefix {
+        u32 len;
+        u32 version; 
+        u32 pid;
+        u32 uid;
+        u32 fsuid;
+        u32 fsgid;
+        u32 opcode;
+        u32 ngroups;
+        u32 groups[NGROUPS_SMALL];
+};
+
+enum kml_opcode {
+        KML_CREATE = 1,
+        KML_MKDIR,
+        KML_UNLINK,
+        KML_RMDIR,
+        KML_CLOSE,
+        KML_SYMLINK,
+        KML_RENAME,
+        KML_SETATTR,
+        KML_LINK,
+        KML_OPEN,
+        KML_MKNOD,
+        KML_ENDMARK = 0xff
+};
+
+struct kml_create {
+       char                    *path;
+       struct presto_version   new_objectv, 
+                               old_parentv, 
+                               new_parentv;
+       int                     mode;
+       int                     uid;
+       int                     gid;
+};
+
+struct kml_open {
+};
+
+struct kml_mkdir {
+       char                    *path;
+       struct presto_version   new_objectv, 
+                               old_parentv, 
+                               new_parentv;
+       int                     mode;
+       int                     uid;
+       int                     gid;
+};
+
+struct kml_unlink {
+       char                    *path,  
+                               *name;
+       struct presto_version   old_tgtv, 
+                               old_parentv, 
+                               new_parentv;
+};
+
+struct kml_rmdir {
+       char                    *path, 
+                               *name;
+       struct presto_version   old_tgtv, 
+                               old_parentv, 
+                               new_parentv;
+};
+
+struct kml_close {
+       int                     open_mode, 
+                               open_uid, 
+                               open_gid;
+       char                    *path;
+       struct presto_version   new_objectv;
+       __u64                   ino;
+       int                     generation;
+};
+
+struct kml_symlink {
+       char                    *sourcepath,    
+                               *targetpath;
+       struct presto_version   new_objectv, 
+                               old_parentv, 
+                               new_parentv;
+       int                     uid;
+       int                     gid;
+};
+
+struct kml_rename {
+       char                    *sourcepath, 
+                               *targetpath;
+       struct presto_version   old_objectv, 
+                               new_objectv, 
+                               old_tgtv, 
+                               new_tgtv;
+};
+
+struct kml_setattr {
+       char                    *path;
+       struct presto_version   old_objectv;
+       struct iattr            iattr;
+};
+
+struct kml_link {
+       char                    *sourcepath,    
+                               *targetpath;
+       struct presto_version   new_objectv, 
+                               old_parentv, 
+                               new_parentv;
+};
+
+struct kml_mknod {
+       char                    *path;
+       struct presto_version   new_objectv, 
+                               old_parentv, 
+                               new_parentv;
+       int                     mode;
+       int                     uid;
+       int                     gid;
+               int                     major;
+       int                     minor;
+};
+
+/* kml record items for optimizing */
+struct kml_kop_node
+{
+        u32             kml_recno;
+        u32             kml_flag;
+        u32             kml_op;
+        nlink_t         i_nlink;
+        u32             i_ino;
+};
+
+struct kml_kop_lnode
+{
+        struct list_head chains;
+        struct kml_kop_node node;
+};
+
+struct kml_endmark {
+       u32                     total;
+       struct kml_kop_node     *kop;
+};
+
+/* kml_flag */
+#define  KML_REC_DELETE               1
+#define  KML_REC_EXIST                0
+
+struct kml_optimize {
+       struct list_head kml_chains;
+        u32              kml_flag;
+        u32              kml_op;
+        nlink_t          i_nlink;
+        u32              i_ino;
+};
+
+struct kml_rec {
+       /* attribute of this record */
+       int                             rec_size;
+        int                            rec_kml_offset;
+
+       struct  big_journal_prefix      rec_head;
+       union {
+               struct kml_create       create;
+               struct kml_open         open;
+               struct kml_mkdir        mkdir;
+               struct kml_unlink       unlink;
+               struct kml_rmdir        rmdir;
+               struct kml_close        close;
+               struct kml_symlink      symlink;
+               struct kml_rename       rename;
+               struct kml_setattr      setattr;
+               struct kml_mknod        mknod;
+               struct kml_link         link;
+               struct kml_endmark      endmark;
+       } rec_kml;
+        struct         journal_suffix          rec_tail;
+
+        /* for kml optimize only */
+        struct  kml_optimize kml_optimize;
+};
+
+/* kml record items for optimizing */
+extern void kml_kop_init (struct presto_file_set *fset);
+extern void kml_kop_addrec (struct presto_file_set *fset, 
+               struct inode *ino, u32 op, u32 flag);
+extern int  kml_kop_flush (struct presto_file_set *fset);
+
+/* defined in kml_setup.c */
+extern int kml_init (struct presto_file_set *fset);
+extern int kml_cleanup (struct presto_file_set *fset);
+
+/* defined in kml.c */
+extern int begin_kml_reint (struct file *file, unsigned long arg);
+extern int do_kml_reint (struct file *file, unsigned long arg);
+extern int end_kml_reint (struct file *file, unsigned long arg);
+
+/* kml_utils.c */
+extern char *dlogit (void *tbuf, const void *sbuf, int size);
+extern char * bdup_printf (char *format, ...);
+
+/* defined in kml_decode.c */
+/* printop */
+#define  PRINT_KML_PREFIX             0x1
+#define  PRINT_KML_SUFFIX             0x2
+#define  PRINT_KML_REC                0x4
+#define  PRINT_KML_OPTIMIZE           0x8
+#define  PRINT_KML_EXIST              0x10
+#define  PRINT_KML_DELETE             0x20
+extern void   kml_printrec (struct kml_rec *rec, int printop);
+extern int    print_allkmlrec (struct list_head *head, int printop);
+extern int    delete_kmlrec (struct list_head *head);
+extern int    kml_decoderec (char *buf, int pos, int buflen, int *size,
+                            struct kml_rec **newrec);
+extern int decode_kmlrec (struct list_head *head, char *kml_buf, int buflen);
+extern void kml_freerec (struct kml_rec *rec);
+
+/* defined in kml_reint.c */
+#define KML_CLOSE_BACKFETCH            1
+extern int kml_reintbuf (struct  kml_fsdata *kml_fsdata,
+                       char *mtpt, struct kml_rec **rec);
+
+/* defined in kml_setup.c */
+extern int kml_init (struct presto_file_set *fset);
+extern int kml_cleanup (struct presto_file_set *fset);
+
+#endif
+
diff --git a/fs/intermezzo/intermezzo_lib.h b/fs/intermezzo/intermezzo_lib.h
new file mode 100644 (file)
index 0000000..21cc0b9
--- /dev/null
@@ -0,0 +1,162 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Data structures unpacking/packing macros & inlines
+ *
+ */
+
+#ifndef _INTERMEZZO_LIB_H
+#define _INTERMEZZO_LIB_H
+
+#ifdef __KERNEL__
+# include <linux/types.h>
+#else
+# include <string.h>
+# include <sys/types.h>
+#endif
+
+static inline int size_round (int val)
+{
+       return (val + 3) & (~0x3);
+}
+
+static inline int size_round0(int val)
+{
+        if (!val) 
+                return 0;
+       return (val + 1 + 3) & (~0x3);
+}
+
+static inline size_t round_strlen(char *fset)
+{
+       return size_round(strlen(fset) + 1);
+}
+
+#ifdef __KERNEL__
+# define NTOH__u32(var) le32_to_cpu(var)
+# define NTOH__u64(var) le64_to_cpu(var)
+# define HTON__u32(var) cpu_to_le32(var)
+# define HTON__u64(var) cpu_to_le64(var)
+#else
+# include <glib.h>
+# define NTOH__u32(var) GUINT32_FROM_LE(var)
+# define NTOH__u64(var) GUINT64_FROM_LE(var)
+# define HTON__u32(var) GUINT32_TO_LE(var)
+# define HTON__u64(var) GUINT64_TO_LE(var)
+#endif
+
+/* 
+ * copy sizeof(type) bytes from pointer to var and move ptr forward.
+ * return EFAULT if pointer goes beyond end
+ */
+#define UNLOGV(var,type,ptr,end)                \
+do {                                            \
+        var = *(type *)ptr;                     \
+        ptr += sizeof(type);                    \
+        if (ptr > end )                         \
+                return -EFAULT;                 \
+} while (0)
+
+/* the following two macros convert to little endian */
+/* type MUST be __u32 or __u64 */
+#define LUNLOGV(var,type,ptr,end)               \
+do {                                            \
+        var = NTOH##type(*(type *)ptr);         \
+        ptr += sizeof(type);                    \
+        if (ptr > end )                         \
+                return -EFAULT;                 \
+} while (0)
+
+/* now log values */
+#define LOGV(var,type,ptr)                      \
+do {                                            \
+        *((type *)ptr) = var;                   \
+        ptr += sizeof(type);                    \
+} while (0)
+
+/* and in network order */
+#define LLOGV(var,type,ptr)                     \
+do {                                            \
+        *((type *)ptr) = HTON##type(var);       \
+        ptr += sizeof(type);                    \
+} while (0)
+
+
+/* 
+ * set var to point at (type *)ptr, move ptr forward with sizeof(type)
+ * return from function with EFAULT if ptr goes beyond end
+ */
+#define UNLOGP(var,type,ptr,end)                \
+do {                                            \
+        var = (type *)ptr;                      \
+        ptr += sizeof(type);                    \
+        if (ptr > end )                         \
+                return -EFAULT;                 \
+} while (0)
+
+#define LOGP(var,type,ptr)                      \
+do {                                            \
+        memcpy(ptr, var, sizeof(type));         \
+        ptr += sizeof(type);                    \
+} while (0)
+
+/* 
+ * set var to point at (char *)ptr, move ptr forward by size_round(len);
+ * return from function with EFAULT if ptr goes beyond end
+ */
+#define UNLOGL(var,type,len,ptr,end)                    \
+do {                                                    \
+        if (len == 0)                                   \
+                var = (type *)0;                        \
+        else {                                          \
+                var = (type *)ptr;                      \
+                ptr += size_round(len * sizeof(type));  \
+        }                                               \
+        if (ptr > end )                                 \
+                return -EFAULT;                         \
+} while (0)
+
+#define UNLOGL0(var,type,len,ptr,end)                           \
+do {                                                            \
+        UNLOGL(var,type,len+1,ptr,end);                         \
+        if ( *((char *)ptr - size_round(len+1) + len) != '\0')  \
+                        return -EFAULT;                         \
+} while (0)
+
+#define LOGL(var,len,ptr)                               \
+do {                                                    \
+        size_t __fill = size_round(len);                \
+        /* Prevent data leakage. */                     \
+        if (__fill > 0)                                 \
+                memset((char *)ptr, 0, __fill);         \
+        memcpy((char *)ptr, (const char *)var, len);    \
+        ptr += __fill;                                  \
+} while (0)
+
+#define LOGL0(var,len,ptr)                              \
+do {                                                    \
+        if (!len) break;                                \
+        memcpy((char *)ptr, (const char *)var, len);    \
+        *((char *)(ptr) + len) = 0;                     \
+        ptr += size_round(len + 1);                     \
+} while (0)
+
+#endif /* _INTERMEZZO_LIB_H */
+
diff --git a/fs/intermezzo/intermezzo_psdev.h b/fs/intermezzo/intermezzo_psdev.h
new file mode 100644 (file)
index 0000000..fff728a
--- /dev/null
@@ -0,0 +1,55 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+
+#ifndef __PRESTO_PSDEV_H
+#define __PRESTO_PSDEV_H
+
+#define MAX_CHANNEL 16
+#define PROCNAME_SIZE 32
+#include <linux/smp_lock.h>
+
+/* represents state of an instance reached with /dev/intermezzo */
+/* communication pending & processing queues */
+struct upc_channel {
+        unsigned int         uc_seq;
+        wait_queue_head_t    uc_waitq;    /* Lento wait queue */
+        struct list_head     uc_pending;
+        struct list_head     uc_processing;
+        spinlock_t            uc_lock;
+        int                  uc_pid;      /* Lento's pid */
+        int                  uc_hard;     /* allows signals during upcalls */
+        int                  uc_no_filter;
+        int                  uc_no_journal;
+        int                  uc_no_upcall;
+        int                  uc_timeout;  /* . sec: signals will dequeue upc */
+        long                 uc_errorval; /* for testing I/O failures */
+        struct list_head     uc_cache_list;
+        int                  uc_minor;
+};
+
+#define ISLENTO(minor) (current->pid == izo_channels[minor].uc_pid \
+                || current->real_parent->pid == izo_channels[minor].uc_pid \
+                || current->real_parent->real_parent->pid == izo_channels[minor].uc_pid)
+
+extern struct upc_channel izo_channels[MAX_CHANNEL];
+
+/* message types between presto filesystem in kernel */
+#define REQ_READ   1
+#define REQ_WRITE  2
+#define REQ_ASYNC  4
+#define REQ_DEAD   8
+
+struct upc_req {
+        struct list_head   rq_chain;
+        caddr_t            rq_data;
+        int                rq_flags;
+        int                rq_bufsize;
+        int                rq_rep_size;
+        int                rq_opcode;  /* copied from data to save lookup */
+        int                rq_unique;
+        wait_queue_head_t  rq_sleep;   /* process' wait queue */
+        unsigned long      rq_posttime;
+};
+
+#endif
diff --git a/fs/intermezzo/intermezzo_upcall.h b/fs/intermezzo/intermezzo_upcall.h
new file mode 100644 (file)
index 0000000..0b3e6ff
--- /dev/null
@@ -0,0 +1,146 @@
+/*
+ * Based on cfs.h from Coda, but revamped for increased simplicity.
+ * Linux modifications by Peter Braam, Aug 1996
+ * Rewritten for InterMezzo
+ */
+
+#ifndef _PRESTO_HEADER_
+#define _PRESTO_HEADER_
+
+
+/* upcall.c */
+#define SYNCHRONOUS 0
+#define ASYNCHRONOUS 1
+
+int lento_permit(int minor, int pathlen, int fsetnamelen, char *path, char *fset);
+int lento_opendir(int minor, int pathlen, char *path, int async);
+int lento_kml(int minor, unsigned int offset, unsigned int first_recno,
+              unsigned int length, unsigned int last_recno, int namelen,
+              char *fsetname);
+int lento_open(int minor, int pathlen, char *path);
+int lento_journal(int minor, char *page, int async);
+int lento_release_permit(int minor, int cookie);
+
+/*
+ * Kernel <--> Lento communications.
+ */
+/* upcalls */
+#define LENTO_PERMIT    1
+#define LENTO_JOURNAL   2
+#define LENTO_OPENDIR   3
+#define LENTO_OPEN      4
+#define LENTO_SIGNAL    5
+#define LENTO_KML       6
+#define LENTO_COOKIE    7
+
+/*         Lento <-> Presto  RPC arguments       */
+struct lento_up_hdr {
+        unsigned int opcode;
+        unsigned int unique;    /* Keep multiple outstanding msgs distinct */
+        u_short pid;            /* Common to all */
+        u_short uid;
+};
+
+/* This structure _must_ sit at the beginning of the buffer */
+struct lento_down_hdr {
+        unsigned int opcode;
+        unsigned int unique;    
+        unsigned int result;
+};
+
+/* lento_permit: */
+struct lento_permit_in {
+        struct lento_up_hdr uh;
+        int pathlen;
+        int fsetnamelen;
+        char path[0];
+};
+struct lento_permit_out {
+        struct lento_down_hdr dh;
+};
+
+
+/* lento_opendir: */
+struct lento_opendir_in {
+        struct lento_up_hdr uh;
+        int async;
+        int pathlen;
+        char path[0];
+};
+struct lento_opendir_out {
+        struct lento_down_hdr dh;
+};
+
+
+/* lento_kml: */
+struct lento_kml_in {
+        struct lento_up_hdr uh;
+        unsigned int offset;
+        unsigned int first_recno;
+        unsigned int length;
+        unsigned int last_recno;
+        int namelen;
+        char fsetname[0];
+};
+
+struct lento_kml_out {
+        struct lento_down_hdr dh;
+};
+
+
+/* lento_open: */
+struct lento_open_in {
+        struct lento_up_hdr uh;
+        int pathlen;
+        char path[0];
+};
+struct lento_open_out {
+    struct lento_down_hdr dh;
+};
+
+/* lento_response_cookie */
+struct lento_response_cookie_in {
+        struct lento_up_hdr uh;
+        int cookie;
+};
+
+struct lento_response_cookie_out {
+    struct lento_down_hdr dh;
+};
+
+
+struct lento_mknod {
+  struct lento_down_hdr dh;
+  int    major;
+  int    minor;
+  int    mode;
+  char   path[0];
+};
+
+
+/* NB: every struct below begins with an up_hdr */
+union up_args {
+    struct lento_up_hdr uh;             
+    struct lento_permit_in lento_permit;
+    struct lento_open_in lento_open;
+    struct lento_opendir_in lento_opendir;
+    struct lento_kml_in lento_kml;
+    struct lento_response_cookie_in lento_response_cookie;
+};
+
+union down_args {
+    struct lento_down_hdr dh;
+    struct lento_permit_out lento_permit;
+    struct lento_open_out lento_open;
+    struct lento_opendir_out lento_opendir;
+    struct lento_kml_out lento_kml;
+    struct lento_response_cookie_out lento_response_cookie;
+};    
+
+#include "intermezzo_psdev.h"
+
+int lento_upcall(int minor, int read_size, int *rep_size, 
+                 union up_args *buffer, int async,
+                 struct upc_req *rq );
+#endif 
+
diff --git a/fs/intermezzo/journal.c b/fs/intermezzo/journal.c
new file mode 100644 (file)
index 0000000..2beda38
--- /dev/null
@@ -0,0 +1,2452 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 1998 Peter J. Braam
+ *  Copyright (C) 2001 Cluster File Systems, Inc. 
+ *  Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
+ *
+ *  Support for journalling extended attributes
+ *  Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc.
+ * 
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/string.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+struct presto_reservation_data {
+        unsigned int ri_recno;
+        loff_t ri_offset;
+        loff_t ri_size;
+        struct list_head ri_list;
+};
+
+/* 
+ *  Locking Semantics
+ * 
+ * write lock in struct presto_log_fd: 
+ *  - name: fd_lock
+ *  - required for: accessing any field in a presto_log_fd 
+ *  - may not be held across I/O
+ *  - 
+ *  
+ */
+
+/*
+ *  reserve record space and/or atomically request state of the log
+ *  rec will hold the location reserved record upon return
+ *  this reservation will be placed in the queue
+ */ 
+static void presto_reserve_record(struct presto_file_set *fset, 
+                           struct presto_log_fd *fd, 
+                           struct rec_info *rec,
+                           struct presto_reservation_data *rd)
+{
+        int chunked_record = 0; 
+        ENTRY;
+        
+        write_lock(&fd->fd_lock);
+        if ( rec->is_kml ) { 
+                int chunk = 1 << fset->fset_chunkbits;
+                int chunk_mask = ~(chunk -1); 
+                loff_t boundary; 
+
+                boundary =  (fd->fd_offset + chunk - 1) & chunk_mask;
+                if ( fd->fd_offset + rec->size >= boundary ) {
+                        chunked_record = 1;
+                        fd->fd_offset = boundary; 
+                }
+        }
+
+        fd->fd_recno++;
+        
+        /* this moves the fd_offset back after truncation */ 
+        if ( list_empty(&fd->fd_reservations) && 
+             !chunked_record) { 
+                fd->fd_offset = fd->fd_file->f_dentry->d_inode->i_size;
+        }
+
+        rec->offset = fd->fd_offset;
+        if (rec->is_kml)
+                rec->offset += fset->fset_kml_logical_off;
+
+        rec->recno = fd->fd_recno;
+
+        /* add the reservation data to the end of the list */
+        rd->ri_offset = fd->fd_offset;
+        rd->ri_size = rec->size;
+        rd->ri_recno = rec->recno; 
+        list_add(&rd->ri_list, fd->fd_reservations.prev);
+
+        fd->fd_offset += rec->size;
+
+        write_unlock(&fd->fd_lock); 
+
+        EXIT;
+}
+
+static inline void presto_release_record(struct presto_log_fd *fd,
+                                         struct presto_reservation_data *rd)
+{
+        write_lock(&fd->fd_lock);
+        list_del(&rd->ri_list);
+        write_unlock(&fd->fd_lock);
+}
+
+/* XXX should we ask for do_truncate to be exported? */
+int izo_do_truncate(struct presto_file_set *fset, struct dentry *dentry,
+                    loff_t length,  loff_t size_check)
+{
+        struct inode *inode = dentry->d_inode;
+        int error;
+        struct iattr newattrs;
+
+        ENTRY;
+
+        if (length < 0) {
+                EXIT;
+                return -EINVAL;
+        }
+
+        down(&inode->i_sem);
+        lock_kernel();
+        
+        if (size_check != inode->i_size) { 
+                unlock_kernel();
+                up(&inode->i_sem);
+                EXIT;
+                return -EALREADY; 
+        }
+
+        newattrs.ia_size = length;
+        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+
+        if (inode->i_op && inode->i_op->setattr)
+                error = inode->i_op->setattr(dentry, &newattrs);
+        else {
+                inode_setattr(dentry->d_inode, &newattrs);
+                error = 0;
+        }
+
+        unlock_kernel();
+        up(&inode->i_sem);
+        EXIT;
+        return error;
+}
+
+static void presto_kml_truncate(struct presto_file_set *fset)
+{
+        int rc;
+        ENTRY;
+
+        write_lock(&fset->fset_kml.fd_lock);
+        if (fset->fset_kml.fd_truncating == 1 ) {
+                write_unlock(&fset->fset_kml.fd_lock);
+                EXIT;
+                return;
+        }
+
+        fset->fset_kml.fd_truncating = 1;
+        write_unlock(&fset->fset_kml.fd_lock);
+
+        CERROR("islento: %d, count: %d\n",
+               ISLENTO(presto_i2m(fset->fset_dentry->d_inode)),
+               fset->fset_permit_count);
+
+        rc = izo_upc_kml_truncate(fset->fset_cache->cache_psdev->uc_minor,
+                                fset->fset_lento_off, fset->fset_lento_recno,
+                                fset->fset_name);
+
+        /* Userspace is the only permitholder now, and will retain an exclusive
+         * hold on the permit until KML truncation completes. */
+        /* FIXME: double check this code path now that the precise semantics of
+         * fset->fset_permit_count have changed. */
+
+        if (rc != 0) {
+                write_lock(&fset->fset_kml.fd_lock);
+                fset->fset_kml.fd_truncating = 0;
+                write_unlock(&fset->fset_kml.fd_lock);
+        }
+
+        EXIT;
+}
+
+void *presto_trans_start(struct presto_file_set *fset, struct inode *inode,
+                         int op)
+{
+        ENTRY;
+        if ( !fset->fset_cache->cache_filter->o_trops ) {
+                EXIT;
+                return NULL;
+        }
+        EXIT;
+        return fset->fset_cache->cache_filter->o_trops->tr_start
+                (fset, inode, op);
+}
+
+void presto_trans_commit(struct presto_file_set *fset, void *handle)
+{
+        ENTRY;
+        if (!fset->fset_cache->cache_filter->o_trops ) {
+                EXIT;
+                return;
+        }
+
+        fset->fset_cache->cache_filter->o_trops->tr_commit(fset, handle);
+
+        /* Check to see if the KML needs truncated. */
+        if (fset->kml_truncate_size > 0 &&
+            !fset->fset_kml.fd_truncating &&
+            fset->fset_kml.fd_offset > fset->kml_truncate_size) {
+                CDEBUG(D_JOURNAL, "kml size: %lu; truncating\n",
+                       (unsigned long)fset->fset_kml.fd_offset);
+                presto_kml_truncate(fset);
+        }
+        EXIT;
+}
+
+inline int presto_no_journal(struct presto_file_set *fset)
+{
+        int minor = fset->fset_cache->cache_psdev->uc_minor;
+        return izo_channels[minor].uc_no_journal;
+}
+
+#define size_round(x)  (((x)+3) & ~0x3)
+
+#define BUFF_FREE(buf) PRESTO_FREE(buf, PAGE_SIZE)
+#define BUFF_ALLOC(newbuf, oldbuf)              \
+        PRESTO_ALLOC(newbuf, PAGE_SIZE);        \
+        if ( !newbuf ) {                        \
+                if (oldbuf)                     \
+                        BUFF_FREE(oldbuf);      \
+                return -ENOMEM;                 \
+        }
+
+/*
+ * "buflen" should be PAGE_SIZE or more.
+ * Give relative path wrt to a fsetroot
+ */
+char * presto_path(struct dentry *dentry, struct dentry *root,
+                   char *buffer, int buflen)
+{
+        char * end = buffer+buflen;
+        char * retval;
+
+        *--end = '\0';
+        buflen--;
+        if (dentry->d_parent != dentry && d_unhashed(dentry)) {
+                buflen -= 10;
+                end -= 10;
+                memcpy(end, " (deleted)", 10);
+        }
+
+        /* Get '/' right */
+        retval = end-1;
+        *retval = '/';
+
+        for (;;) {
+                struct dentry * parent;
+                int namelen;
+
+                if (dentry == root)
+                        break;
+                parent = dentry->d_parent;
+                if (dentry == parent)
+                        break;
+                namelen = dentry->d_name.len;
+                buflen -= namelen + 1;
+                if (buflen < 0)
+                        break;
+                end -= namelen;
+                memcpy(end, dentry->d_name.name, namelen);
+                *--end = '/';
+                retval = end;
+                dentry = parent;
+        }
+        return retval;
+}
+
+static inline char *logit(char *buf, const void *value, int size)
+{
+        char *ptr = (char *)value;
+
+        memcpy(buf, ptr, size);
+        buf += size;
+        return buf;
+}
+
+
+static inline char *
+journal_log_prefix_with_groups_and_ids(char *buf, int opcode, 
+                                       struct rec_info *rec,
+                                       __u32 ngroups, gid_t *groups,
+                                       __u32 fsuid, __u32 fsgid)
+{
+        struct kml_prefix_hdr p;
+        u32 loggroups[NGROUPS_SMALL];
+
+        int i; 
+
+        p.len = cpu_to_le32(rec->size);
+        p.version = KML_MAJOR_VERSION | KML_MINOR_VERSION;
+        p.pid = cpu_to_le32(current->pid);
+        p.auid = cpu_to_le32(current->uid);
+        p.fsuid = cpu_to_le32(fsuid);
+        p.fsgid = cpu_to_le32(fsgid);
+        p.ngroups = cpu_to_le32(ngroups);
+        p.opcode = cpu_to_le32(opcode);
+        for (i=0 ; i < ngroups ; i++)
+                loggroups[i] = cpu_to_le32((__u32) groups[i]);
+
+        buf = logit(buf, &p, sizeof(struct kml_prefix_hdr));
+        buf = logit(buf, &loggroups, sizeof(__u32) * ngroups);
+        return buf;
+}
+
+static inline char *
+journal_log_prefix(char *buf, int opcode, struct rec_info *rec)
+{
+        __u32 groups[NGROUPS_SMALL]; 
+        int i; 
+
+        /* convert 16 bit gid's to 32 bit gid's */
+        for (i=0; i<current->group_info->ngroups; i++) 
+                groups[i] = GROUP_AT(current->group_info,i);
+        
+        return journal_log_prefix_with_groups_and_ids(buf, opcode, rec,
+                                                      (__u32)current->group_info->ngroups,
+                                                      groups,
+                                                      (__u32)current->fsuid,
+                                                      (__u32)current->fsgid);
+}
+
+static inline char *
+journal_log_prefix_with_groups(char *buf, int opcode, struct rec_info *rec, 
+                               __u32 ngroups, gid_t *groups)
+{
+        return journal_log_prefix_with_groups_and_ids(buf, opcode, rec,
+                                                      ngroups, groups,
+                                                      (__u32)current->fsuid,
+                                                      (__u32)current->fsgid);
+}
+
+static inline char *log_dentry_version(char *buf, struct dentry *dentry)
+{
+        struct presto_version version;
+
+        presto_getversion(&version, dentry->d_inode);
+        
+        version.pv_mtime_sec = HTON__u32(version.pv_mtime_sec);
+        version.pv_ctime_sec = HTON__u32(version.pv_ctime_sec);
+        version.pv_mtime_nsec = HTON__u32(version.pv_mtime_nsec);
+        version.pv_ctime_nsec = HTON__u32(version.pv_ctime_nsec);
+        version.pv_size = HTON__u64(version.pv_size);
+
+        return logit(buf, &version, sizeof(version));
+}
+
+static inline char *log_version(char *buf, struct presto_version *pv)
+{
+        struct presto_version version;
+
+        memcpy(&version, pv, sizeof(version));
+        
+        version.pv_mtime_sec = HTON__u32(version.pv_mtime_sec);
+        version.pv_mtime_nsec = HTON__u32(version.pv_mtime_nsec);
+        version.pv_ctime_sec = HTON__u32(version.pv_ctime_sec);
+        version.pv_ctime_nsec = HTON__u32(version.pv_ctime_nsec);
+        version.pv_size = HTON__u64(version.pv_size);
+
+        return logit(buf, &version, sizeof(version));
+}
+
+static inline char *log_rollback(char *buf, struct izo_rollback_data *rb)
+{
+        struct izo_rollback_data rollback;
+        
+        rollback.rb_mode = HTON__u32(rb->rb_mode);
+        rollback.rb_rdev = HTON__u32(rb->rb_rdev);
+        rollback.rb_uid = HTON__u64(rb->rb_uid);
+        rollback.rb_gid = HTON__u64(rb->rb_gid);
+
+        return logit(buf, &rollback, sizeof(rollback));
+}
+
+static inline char *journal_log_suffix(char *buf, char *log,
+                                       struct presto_file_set *fset,
+                                       struct dentry *dentry,
+                                       struct rec_info *rec)
+{
+        struct kml_suffix s;
+        struct kml_prefix_hdr *p = (struct kml_prefix_hdr *)log;
+
+#if 0
+        /* XXX needs to be done after reservation, 
+           disable ths until version 1.2 */
+        if ( dentry ) { 
+                s.prevrec = cpu_to_le32(rec->offset - 
+                                        presto_d2d(dentry)->dd_kml_offset);
+                presto_d2d(dentry)->dd_kml_offset = rec->offset;
+        } else { 
+                s.prevrec = -1;
+        }
+#endif
+        s.prevrec = 0; 
+
+        /* record number needs to be filled in after reservation 
+           s.recno = cpu_to_le32(rec->recno); */ 
+        s.time = cpu_to_le32(get_seconds());
+        s.len = p->len;
+        return logit(buf, &s, sizeof(s));
+}
+
+int izo_log_close(struct presto_log_fd *logfd)
+{
+        int rc = 0;
+
+        if (logfd->fd_file) {
+                rc = filp_close(logfd->fd_file, 0);
+                logfd->fd_file = NULL;
+        } else
+                CERROR("InterMezzo: %s: no filp\n", __FUNCTION__);
+        if (rc != 0)
+                CERROR("InterMezzo: close files: filp won't close: %d\n", rc);
+
+        return rc;
+}
+
+int presto_fwrite(struct file *file, const char *str, int len, loff_t *off)
+{
+        int rc;
+        mm_segment_t old_fs;
+        ENTRY;
+
+        rc = -EINVAL;
+        if ( !off ) {
+                EXIT;
+                return rc;
+        }
+
+        if ( ! file ) {
+                EXIT;
+                return rc;
+        }
+
+        if ( ! file->f_op ) {
+                EXIT;
+                return rc;
+        }
+
+        if ( ! file->f_op->write ) {
+                EXIT;
+                return rc;
+        }
+
+        old_fs = get_fs();
+        set_fs(get_ds());
+        rc = file->f_op->write(file, str, len, off);
+        if (rc != len) {
+                CERROR("presto_fwrite: wrote %d bytes instead of "
+                       "%d at %ld\n", rc, len, (long)*off);
+                rc = -EIO; 
+        }
+        set_fs(old_fs);
+        EXIT;
+        return rc;
+}
+
+int presto_fread(struct file *file, char *str, int len, loff_t *off)
+{
+        int rc;
+        mm_segment_t old_fs;
+        ENTRY;
+
+        if (len > 512)
+                CERROR("presto_fread: read at %Ld for %d bytes, ino %ld\n",
+                       *off, len, file->f_dentry->d_inode->i_ino); 
+
+        rc = -EINVAL;
+        if ( !off ) {
+                EXIT;
+                return rc;
+        }
+
+        if ( ! file ) {
+                EXIT;
+                return rc;
+        }
+
+        if ( ! file->f_op ) {
+                EXIT;
+                return rc;
+        }
+
+        if ( ! file->f_op->read ) {
+                EXIT;
+                return rc;
+        }
+
+        old_fs = get_fs();
+        set_fs(get_ds());
+        rc = file->f_op->read(file, str, len, off);
+        if (rc != len) {
+                CDEBUG(D_FILE, "presto_fread: read %d bytes instead of "
+                       "%d at %Ld\n", rc, len, *off);
+                rc = -EIO; 
+        }
+        set_fs(old_fs);
+        EXIT;
+        return rc;
+}
+
+loff_t presto_kml_offset(struct presto_file_set *fset)
+{
+        unsigned int kml_recno;
+        struct presto_log_fd *fd = &fset->fset_kml;
+        loff_t  offset;
+        ENTRY;
+
+        write_lock(&fd->fd_lock); 
+
+        /* Determine the largest valid offset, i.e. up until the first
+         * reservation held on the file. */
+        if ( !list_empty(&fd->fd_reservations) ) {
+                struct presto_reservation_data *rd;
+                rd = list_entry(fd->fd_reservations.next, 
+                                struct presto_reservation_data, 
+                                ri_list);
+                offset = rd->ri_offset;
+                kml_recno = rd->ri_recno;
+        } else {
+                offset = fd->fd_file->f_dentry->d_inode->i_size;
+                kml_recno = fset->fset_kml.fd_recno; 
+        }
+        write_unlock(&fd->fd_lock); 
+        return offset; 
+}
+
+static int presto_kml_dispatch(struct presto_file_set *fset)
+{
+        int rc = 0;
+        unsigned int kml_recno;
+        struct presto_log_fd *fd = &fset->fset_kml;
+        loff_t offset;
+        ENTRY;
+
+        write_lock(&fd->fd_lock); 
+
+        /* Determine the largest valid offset, i.e. up until the first
+         * reservation held on the file. */
+        if ( !list_empty(&fd->fd_reservations) ) {
+                struct presto_reservation_data *rd;
+                rd = list_entry(fd->fd_reservations.next, 
+                                struct presto_reservation_data, 
+                                ri_list);
+                offset = rd->ri_offset;
+                kml_recno = rd->ri_recno;
+        } else {
+                offset = fd->fd_file->f_dentry->d_inode->i_size;
+                kml_recno = fset->fset_kml.fd_recno; 
+        }
+
+        if ( kml_recno < fset->fset_lento_recno ) {
+                CERROR("presto_kml_dispatch: smoke is coming\n"); 
+                write_unlock(&fd->fd_lock);
+                EXIT;
+                return 0; 
+        } else if ( kml_recno == fset->fset_lento_recno ) {
+                write_unlock(&fd->fd_lock);
+                EXIT;
+                return 0; 
+                /* XXX add a further "if" here to delay the KML upcall */ 
+#if 0
+        } else if ( kml_recno < fset->fset_lento_recno + 100) {
+                write_unlock(&fd->fd_lock);
+                EXIT;
+                return 0;
+#endif
+        }
+        CDEBUG(D_PIOCTL, "fset: %s\n", fset->fset_name);
+
+        rc = izo_upc_kml(fset->fset_cache->cache_psdev->uc_minor,
+                       fset->fset_lento_off, fset->fset_lento_recno,
+                       offset + fset->fset_kml_logical_off, kml_recno,
+                       fset->fset_name);
+
+        if ( rc ) {
+                write_unlock(&fd->fd_lock);
+                EXIT;
+                return rc;
+        }
+
+        fset->fset_lento_off = offset;
+        fset->fset_lento_recno = kml_recno; 
+        write_unlock(&fd->fd_lock);
+        EXIT;
+        return 0;
+}
+
+int izo_lookup_file(struct presto_file_set *fset, char *path,
+                    struct nameidata *nd)
+{
+        int error = 0;
+
+        CDEBUG(D_CACHE, "looking up: %s\n", path);
+
+        error = path_lookup(path, LOOKUP_PARENT, nd);
+        if (error) {
+                EXIT;
+                return error;
+        }
+
+        return 0;
+}
+
+/* FIXME: this function is a mess of locking and error handling.  There's got to
+ * be a better way. */
+static int do_truncate_rename(struct presto_file_set *fset, char *oldname,
+                              char *newname)
+{
+        struct dentry *old_dentry, *new_dentry;
+        struct nameidata oldnd, newnd;
+        char *oldpath, *newpath;
+        int error;
+
+        ENTRY;
+
+        oldpath = izo_make_path(fset, oldname);
+        if (oldpath == NULL) {
+                EXIT;
+                return -ENOENT;
+        }
+
+        newpath = izo_make_path(fset, newname);
+        if (newpath == NULL) {
+                error = -ENOENT;
+                EXIT;
+                goto exit;
+        }
+
+        if ((error = izo_lookup_file(fset, oldpath, &oldnd)) != 0) {
+                EXIT;
+                goto exit1;
+        }
+
+        if ((error = izo_lookup_file(fset, newpath, &newnd)) != 0) {
+                EXIT;
+                goto exit2;
+        }
+
+        lock_rename(newnd.dentry, oldnd.dentry);
+        old_dentry = lookup_hash(&oldnd.last, oldnd.dentry);
+        error = PTR_ERR(old_dentry);
+        if (IS_ERR(old_dentry)) {
+                EXIT;
+                goto exit3;
+        }
+        error = -ENOENT;
+        if (!old_dentry->d_inode) {
+                EXIT;
+                goto exit4;
+        }
+        new_dentry = lookup_hash(&newnd.last, newnd.dentry);
+        error = PTR_ERR(new_dentry);
+        if (IS_ERR(new_dentry)) {
+                EXIT;
+                goto exit4;
+        }
+
+        {
+        extern int presto_rename(struct inode *old_dir,struct dentry *old_dentry,
+                                struct inode *new_dir,struct dentry *new_dentry);
+        error = presto_rename(old_dentry->d_parent->d_inode, old_dentry,
+                              new_dentry->d_parent->d_inode, new_dentry);
+        }
+
+        dput(new_dentry);
+        EXIT;
+ exit4:
+        dput(old_dentry);
+ exit3:
+        unlock_rename(newnd.dentry, oldnd.dentry);
+        path_release(&newnd);
+ exit2:
+        path_release(&oldnd);
+ exit1:
+        PRESTO_FREE(newpath, strlen(newpath) + 1);
+ exit:
+        PRESTO_FREE(oldpath, strlen(oldpath) + 1);
+        return error;
+}
+
+/* This function is called with the fset->fset_kml.fd_lock held */
+int presto_finish_kml_truncate(struct presto_file_set *fset,
+                               unsigned long int offset)
+{
+        struct lento_vfs_context info;
+        void *handle;
+        struct file *f;
+        struct dentry *dentry;
+        int error = 0, len;
+        struct nameidata nd;
+        char *kmlpath = NULL, *smlpath = NULL;
+        ENTRY;
+
+        if (offset == 0) {
+                /* Lento couldn't do what it needed to; abort the truncation. */
+                fset->fset_kml.fd_truncating = 0;
+                EXIT;
+                return 0;
+        }
+
+        /* someone is about to write to the end of the KML; try again later. */
+        if ( !list_empty(&fset->fset_kml.fd_reservations) ) {
+                EXIT;
+                return -EAGAIN;
+        }
+
+        f = presto_copy_kml_tail(fset, offset);
+        if (IS_ERR(f)) {
+                EXIT;
+                return PTR_ERR(f);
+        }                        
+
+        /* In a single transaction:
+         *
+         *   - unlink 'kml'
+         *   - rename 'kml_tmp' to 'kml'
+         *   - unlink 'sml'
+         *   - rename 'sml_tmp' to 'sml'
+         *   - rewrite the first record of last_rcvd with the new kml
+         *     offset.
+         */
+        handle = presto_trans_start(fset, fset->fset_dentry->d_inode,
+                                    KML_OPCODE_KML_TRUNC);
+        if (IS_ERR(handle)) {
+                presto_release_space(fset->fset_cache, PRESTO_REQLOW);
+                CERROR("ERROR: presto_finish_kml_truncate: no space for transaction\n");
+                EXIT;
+                return -ENOMEM;
+        }
+
+        memset(&info, 0, sizeof(info));
+        info.flags = LENTO_FL_IGNORE_TIME;
+
+        kmlpath = izo_make_path(fset, "kml");
+        if (kmlpath == NULL) {
+                error = -ENOMEM;
+                CERROR("make_path failed: ENOMEM\n");
+                EXIT;
+                goto exit_commit;
+        }
+
+        if ((error = izo_lookup_file(fset, kmlpath, &nd)) != 0) {
+                CERROR("izo_lookup_file(kml) failed: %d.\n", error);
+                EXIT;
+                goto exit_commit;
+        }
+        down(&nd.dentry->d_inode->i_sem);
+        dentry = lookup_hash(&nd.last, nd.dentry);
+        error = PTR_ERR(dentry);
+        if (IS_ERR(dentry)) {
+                up(&nd.dentry->d_inode->i_sem);
+                path_release(&nd);
+                CERROR("lookup_hash failed\n");
+                EXIT;
+                goto exit_commit;
+        }
+        error = presto_do_unlink(fset, dentry->d_parent, dentry, &info);
+        dput(dentry);
+        up(&nd.dentry->d_inode->i_sem);
+        path_release(&nd);
+
+        if (error != 0) {
+                CERROR("presto_do_unlink(kml) failed: %d.\n", error);
+                EXIT;
+                goto exit_commit;
+        }
+
+        smlpath = izo_make_path(fset, "sml");
+        if (smlpath == NULL) {
+                error = -ENOMEM;
+                CERROR("make_path() failed: ENOMEM\n");
+                EXIT;
+                goto exit_commit;
+        }
+
+        if ((error = izo_lookup_file(fset, smlpath, &nd)) != 0) {
+                CERROR("izo_lookup_file(sml) failed: %d.\n", error);
+                EXIT;
+                goto exit_commit;
+        }
+        down(&nd.dentry->d_inode->i_sem);
+        dentry = lookup_hash(&nd.last, nd.dentry);
+        error = PTR_ERR(dentry);
+        if (IS_ERR(dentry)) {
+                up(&nd.dentry->d_inode->i_sem);
+                path_release(&nd);
+                CERROR("lookup_hash failed\n");
+                EXIT;
+                goto exit_commit;
+        }
+        error = presto_do_unlink(fset, dentry->d_parent, dentry, &info);
+        dput(dentry);
+        up(&nd.dentry->d_inode->i_sem);
+        path_release(&nd);
+
+        if (error != 0) {
+                CERROR("presto_do_unlink(sml) failed: %d.\n", error);
+                EXIT;
+                goto exit_commit;
+        }
+
+        error = do_truncate_rename(fset, "kml_tmp", "kml");
+        if (error != 0)
+                CERROR("do_truncate_rename(kml_tmp, kml) failed: %d\n", error);
+        error = do_truncate_rename(fset, "sml_tmp", "sml");
+        if (error != 0)
+                CERROR("do_truncate_rename(sml_tmp, sml) failed: %d\n", error);
+
+        /* Write a new 'last_rcvd' record with the new KML offset */
+        fset->fset_kml_logical_off += offset;
+        CDEBUG(D_CACHE, "new kml_logical_offset: %Lu\n",
+               fset->fset_kml_logical_off);
+        if (presto_write_kml_logical_offset(fset) != 0) {
+                CERROR("presto_write_kml_logical_offset failed\n");
+        }
+
+        presto_trans_commit(fset, handle);
+
+        /* Everything was successful, so swap the KML file descriptors */
+        filp_close(fset->fset_kml.fd_file, NULL);
+        fset->fset_kml.fd_file = f;
+        fset->fset_kml.fd_offset -= offset;
+        fset->fset_kml.fd_truncating = 0;
+
+        EXIT;
+        return 0;
+
+ exit_commit:
+        presto_trans_commit(fset, handle);
+        len = strlen("/.intermezzo/") + strlen(fset->fset_name) +strlen("sml");
+        if (kmlpath != NULL)
+                PRESTO_FREE(kmlpath, len);
+        if (smlpath != NULL)
+                PRESTO_FREE(smlpath, len);
+        return error;
+}
+
+/* structure of an extended log record:
+
+   buf-prefix  buf-body [string1 [string2 [string3]]] buf-suffix
+
+   note: moves offset forward
+*/
+static inline int presto_write_record(struct file *f, loff_t *off,
+                        const char *buf, size_t size,
+                        const char *string1, int len1, 
+                        const char *string2, int len2,
+                        const char *string3, int len3)
+{
+        size_t prefix_size; 
+        int rc;
+
+        prefix_size = size - sizeof(struct kml_suffix);
+        rc = presto_fwrite(f, buf, prefix_size, off);
+        if ( rc != prefix_size ) {
+                CERROR("Write error!\n");
+                EXIT;
+                return -EIO;
+        }
+
+        if  ( string1  && len1 ) {
+                rc = presto_fwrite(f, string1, len1, off);
+                if ( rc != len1 ) {
+                        CERROR("Write error!\n");
+                        EXIT;
+                        return -EIO;
+                }
+        }
+
+        if  ( string2 && len2 ) {
+                rc = presto_fwrite(f, string2, len2, off);
+                if ( rc != len2 ) {
+                        CERROR("Write error!\n");
+                        EXIT;
+                        return -EIO;
+                }
+        }
+
+        if  ( string3 && len3 ) {
+                rc = presto_fwrite(f, string3, len3, off);
+                if ( rc != len3 ) {
+                        CERROR("Write error!\n");
+                        EXIT;
+                        return -EIO;
+                }
+        }
+
+        rc = presto_fwrite(f, buf + prefix_size,
+                           sizeof(struct kml_suffix), off);
+        if ( rc != sizeof(struct kml_suffix) ) {
+                CERROR("Write error!\n");
+                EXIT;
+                return -EIO;
+        }
+        return 0;
+}
+
+
+/*
+ * rec->size must be valid prior to calling this function.
+ *
+ * had to export this for branch_reinter in kml_reint.c 
+ */
+int presto_log(struct presto_file_set *fset, struct rec_info *rec,
+               const char *buf, size_t size,
+               const char *string1, int len1, 
+               const char *string2, int len2,
+               const char *string3, int len3)
+{
+        int rc;
+        struct presto_reservation_data rd;
+        loff_t offset;
+        struct presto_log_fd *fd;
+        struct kml_suffix *s;
+        int prefix_size; 
+
+        ENTRY;
+
+        /* buf is NULL when no_journal is in effect */
+        if (!buf) {
+                EXIT;
+                return -EINVAL;
+        }
+
+        if (rec->is_kml) {
+                fd = &fset->fset_kml;
+        } else {
+                fd = &fset->fset_lml;
+        }
+
+        presto_reserve_record(fset, fd, rec, &rd);
+
+        if (rec->is_kml) {
+                if (rec->offset < fset->fset_kml_logical_off) {
+                        CERROR("record with pre-trunc offset.  tell phil.\n");
+                        BUG();
+                }
+                offset = rec->offset - fset->fset_kml_logical_off;
+        } else {
+                offset = rec->offset;
+        }
+
+        /* now we know the record number */ 
+        prefix_size = size - sizeof(struct kml_suffix);
+        s = (struct kml_suffix *) (buf + prefix_size); 
+        s->recno = cpu_to_le32(rec->recno); 
+
+        rc = presto_write_record(fd->fd_file, &offset, buf, size, 
+                                 string1, len1, string2, len2, string3, len3); 
+        if (rc) {
+                CERROR("presto: error writing record to %s\n",
+                        rec->is_kml ? "KML" : "LML"); 
+                return rc;
+        }
+        presto_release_record(fd, &rd);
+
+        rc = presto_kml_dispatch(fset);
+
+        EXIT;
+        return rc;
+}
+
+/* read from the record at tail */
+static int presto_last_record(struct presto_log_fd *fd, loff_t *size, 
+                             loff_t *tail_offset, __u32 *recno, loff_t tail)
+{
+        struct kml_suffix suffix;
+        int rc;
+        loff_t zeroes;
+
+        *recno = 0;
+        *tail_offset = 0;
+        *size = 0;
+        
+        if (tail < sizeof(struct kml_prefix_hdr) + sizeof(suffix)) {
+                EXIT;
+                return 0;
+        }
+
+        zeroes = tail - sizeof(int);
+        while ( zeroes >= 0 ) {
+                int data;
+                rc = presto_fread(fd->fd_file, (char *)&data, sizeof(data), 
+                                  &zeroes);
+                if ( rc != sizeof(data) ) { 
+                        rc = -EIO;
+                        return rc;
+                }
+                if (data)
+                        break;
+                zeroes -= 2 * sizeof(data);
+        }
+
+        /* zeroes at the begining of file. this is needed to prevent
+           presto_fread errors  -SHP
+        */
+        if (zeroes <= 0) return 0;
+                       
+        zeroes -= sizeof(suffix) + sizeof(int);
+        rc = presto_fread(fd->fd_file, (char *)&suffix, sizeof(suffix), &zeroes);
+        if ( rc != sizeof(suffix) ) {
+                EXIT;
+                return rc;
+        }
+        if ( suffix.len > 500 ) {
+                CERROR("InterMezzo: Warning long record tail at %ld, rec tail_offset at %ld (size %d)\n", 
+                        (long) zeroes, (long)*tail_offset, suffix.len); 
+        }
+
+        *recno = suffix.recno;
+        *size = suffix.len;
+        *tail_offset = zeroes;
+        return 0;
+}
+
+static int izo_kml_last_recno(struct presto_log_fd *logfd)
+{
+        int rc; 
+        loff_t size;
+        loff_t tail_offset;
+        int recno;
+        loff_t tail = logfd->fd_file->f_dentry->d_inode->i_size;
+
+        rc = presto_last_record(logfd, &size, &tail_offset, &recno, tail);
+        if (rc != 0) {
+                EXIT;
+                return rc;
+        }
+
+        logfd->fd_offset = tail_offset;
+        logfd->fd_recno = recno;
+        CDEBUG(D_JOURNAL, "setting fset_kml->fd_recno to %d, offset  %Ld\n",
+               recno, tail_offset); 
+        EXIT;
+        return 0;
+}
+
+struct file *izo_log_open(struct presto_file_set *fset, char *name, int flags)
+{
+        struct presto_cache *cache = fset->fset_cache;
+        struct file *f;
+        int error;
+        ENTRY;
+
+        f = izo_fset_open(fset, name, flags, 0644);
+        error = PTR_ERR(f);
+        if (IS_ERR(f)) {
+                EXIT;
+                return f;
+        }
+
+        error = -EINVAL;
+        if ( cache != presto_get_cache(f->f_dentry->d_inode) ) {
+                CERROR("InterMezzo: %s cache does not match fset cache!\n",name);
+                fset->fset_kml.fd_file = NULL;
+                filp_close(f, NULL);
+                f = NULL;
+                EXIT;
+                return f;
+        }
+
+        if (cache->cache_filter &&  cache->cache_filter->o_trops &&
+            cache->cache_filter->o_trops->tr_journal_data) {
+                cache->cache_filter->o_trops->tr_journal_data
+                        (f->f_dentry->d_inode);
+        } else {
+                CERROR("InterMezzo WARNING: no file data logging!\n"); 
+        }
+
+        EXIT;
+
+        return f;
+}
+
+int izo_init_kml_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
+{
+        int error = 0;
+        struct file *f;
+
+        ENTRY;
+        if (logfd->fd_file) {
+                CDEBUG(D_INODE, "fset already has KML open\n");
+                EXIT;
+                return 0;
+        }
+
+        logfd->fd_lock = RW_LOCK_UNLOCKED;
+        INIT_LIST_HEAD(&logfd->fd_reservations); 
+        f = izo_log_open(fset, "kml",  O_RDWR | O_CREAT);
+        if (IS_ERR(f)) {
+                error = PTR_ERR(f);
+                return error;
+        }
+
+        logfd->fd_file = f;
+        error = izo_kml_last_recno(logfd);
+
+        if (error) {
+                logfd->fd_file = NULL;
+                filp_close(f, NULL);
+                CERROR("InterMezzo: IO error in KML of fset %s\n",
+                       fset->fset_name);
+                EXIT;
+                return error;
+        }
+        fset->fset_lento_off = logfd->fd_offset;
+        fset->fset_lento_recno = logfd->fd_recno;
+
+        EXIT;
+        return error;
+}
+
+int izo_init_last_rcvd_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
+{
+        int error = 0;
+        struct file *f;
+        struct rec_info recinfo;
+
+        ENTRY;
+        if (logfd->fd_file != NULL) {
+                CDEBUG(D_INODE, "fset already has last_rcvd open\n");
+                EXIT;
+                return 0;
+        }
+
+        logfd->fd_lock = RW_LOCK_UNLOCKED;
+        INIT_LIST_HEAD(&logfd->fd_reservations); 
+        f = izo_log_open(fset, "last_rcvd", O_RDWR | O_CREAT);
+        if (IS_ERR(f)) {
+                error = PTR_ERR(f);
+                return error;
+        }
+
+        logfd->fd_file = f;
+        logfd->fd_offset = f->f_dentry->d_inode->i_size;
+
+        error = izo_rep_cache_init(fset);
+
+        if (presto_read_kml_logical_offset(&recinfo, fset) == 0) {
+                fset->fset_kml_logical_off = recinfo.offset;
+        } else {
+                /* The 'last_rcvd' file doesn't contain a kml offset record,
+                 * probably because we just created 'last_rcvd'.  Write one. */
+                fset->fset_kml_logical_off = 0;
+                presto_write_kml_logical_offset(fset);
+        }
+
+        EXIT;
+        return error;
+}
+
+int izo_init_lml_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
+{
+        int error = 0;
+        struct file *f;
+
+        ENTRY;
+        if (logfd->fd_file) {
+                CDEBUG(D_INODE, "fset already has lml open\n");
+                EXIT;
+                return 0;
+        }
+
+        logfd->fd_lock = RW_LOCK_UNLOCKED;
+        INIT_LIST_HEAD(&logfd->fd_reservations); 
+        f = izo_log_open(fset, "lml", O_RDWR | O_CREAT);
+        if (IS_ERR(f)) {
+                error = PTR_ERR(f);
+                return error;
+        }
+
+        logfd->fd_file = f;
+        logfd->fd_offset = f->f_dentry->d_inode->i_size;
+
+        EXIT;
+        return error;
+}
+
+/* Get the KML-offset record from the last_rcvd file */
+int presto_read_kml_logical_offset(struct rec_info *recinfo,
+                                   struct presto_file_set *fset)
+{
+        loff_t off;
+        struct izo_rcvd_rec rec;
+        char uuid[16] = {0};
+
+        off = izo_rcvd_get(&rec, fset, uuid);
+        if (off < 0)
+                return -1;
+
+        recinfo->offset = rec.lr_local_offset;
+        return 0;
+}
+
+int presto_write_kml_logical_offset(struct presto_file_set *fset)
+{
+        loff_t rc;
+        struct izo_rcvd_rec rec;
+        char uuid[16] = {0};
+
+        rc = izo_rcvd_get(&rec, fset, uuid);
+        if (rc < 0)
+                memset(&rec, 0, sizeof(rec));
+
+        rec.lr_local_offset =
+                cpu_to_le64(fset->fset_kml_logical_off);
+
+        return izo_rcvd_write(fset, &rec);
+}
+
+struct file * presto_copy_kml_tail(struct presto_file_set *fset,
+                                   unsigned long int start)
+{
+        struct file *f;
+        int len;
+        loff_t read_off, write_off, bytes;
+
+        ENTRY;
+
+        /* Copy the tail of 'kml' to 'kml_tmp' */
+        f = izo_log_open(fset, "kml_tmp", O_RDWR);
+        if (IS_ERR(f)) {
+                EXIT;
+                return f;
+        }
+
+        write_off = 0;
+        read_off = start;
+        bytes = fset->fset_kml.fd_offset - start;
+        while (bytes > 0) {
+                char buf[4096];
+                int toread;
+
+                if (bytes > sizeof(buf))
+                        toread = sizeof(buf);
+                else
+                        toread = bytes;
+
+                len = presto_fread(fset->fset_kml.fd_file, buf, toread,
+                                   &read_off);
+                if (len <= 0)
+                        break;
+
+                if (presto_fwrite(f, buf, len, &write_off) != len) {
+                        filp_close(f, NULL);
+                        EXIT;
+                        return ERR_PTR(-EIO);
+                }
+
+                bytes -= len;
+        }
+
+        EXIT;
+        return f;
+}
+
+
+/* LML records here */
+/* this writes an LML record to the LML file (rec->is_kml =0)  */
+int presto_write_lml_close(struct rec_info *rec,
+                           struct presto_file_set *fset, 
+                           struct file *file,
+                           __u64 remote_ino,
+                           __u64 remote_generation,
+                           struct presto_version *remote_version,
+                           struct presto_version *new_file_ver)
+{
+        int opcode = KML_OPCODE_CLOSE;
+        char *buffer;
+        struct dentry *dentry = file->f_dentry; 
+        __u64 ino;
+        __u32 pathlen;
+        char *path;
+        __u32 generation;
+        int size;
+        char *logrecord;
+        char record[292];
+        struct dentry *root;
+        int error;
+
+        ENTRY;
+
+        if ( presto_no_journal(fset) ) {
+          EXIT;
+          return 0;
+        }
+        root = fset->fset_dentry;
+
+        BUFF_ALLOC(buffer, NULL);
+        path = presto_path(dentry, root, buffer, PAGE_SIZE);
+        CDEBUG(D_INODE, "Path: %s\n", path);
+        pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
+        ino = cpu_to_le64(dentry->d_inode->i_ino);
+        generation = cpu_to_le32(dentry->d_inode->i_generation);
+        size =  sizeof(__u32) * current->group_info->ngroups + 
+                sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) +
+                sizeof(ino) + sizeof(generation) + sizeof(pathlen) +
+                sizeof(remote_ino) + sizeof(remote_generation) + 
+                sizeof(remote_version) + sizeof(rec->offset) +
+                sizeof(struct kml_suffix);
+
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+        
+        rec->is_kml = 0;
+        rec->size = size + size_round(le32_to_cpu(pathlen));
+
+        logrecord = journal_log_prefix(record, opcode, rec);
+        logrecord = log_version(logrecord, new_file_ver);
+        logrecord = logit(logrecord, &ino, sizeof(ino));
+        logrecord = logit(logrecord, &generation, sizeof(generation));
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = logit(logrecord, &remote_ino, sizeof(remote_ino));
+        logrecord = logit(logrecord, &remote_generation,
+                          sizeof(remote_generation));
+        logrecord = log_version(logrecord, remote_version);
+        logrecord = logit(logrecord, &rec->offset, sizeof(rec->offset));
+        logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
+
+        error = presto_log(fset, rec, record, size,
+                           path, size_round(le32_to_cpu(pathlen)),
+                           NULL, 0, NULL, 0);
+
+        BUFF_FREE(buffer);
+
+        EXIT;
+        return error;
+}
+
+/* 
+ * Check if the given record is at the end of the file. If it is, truncate
+ * the lml to the record's offset, removing it. Repeat on prior record,
+ * until we reach an active record or a reserved record (as defined by the
+ * reservations list).
+ */
+static int presto_truncate_lml_tail(struct presto_file_set *fset)
+{
+        loff_t lml_tail;
+        loff_t lml_last_rec;
+        loff_t lml_last_recsize;
+        loff_t local_offset;
+        int recno;
+        struct kml_prefix_hdr prefix;
+        struct inode *inode = fset->fset_lml.fd_file->f_dentry->d_inode;
+        void *handle;
+        int rc;
+
+        ENTRY;
+        /* If someone else is already truncating the LML, return. */
+        write_lock(&fset->fset_lml.fd_lock); 
+        if (fset->fset_lml.fd_truncating == 1 ) {
+                write_unlock(&fset->fset_lml.fd_lock); 
+                EXIT;
+                return 0;
+        }
+        /* someone is about to write to the end of the LML */ 
+        if ( !list_empty(&fset->fset_lml.fd_reservations) ) {
+                write_unlock(&fset->fset_lml.fd_lock); 
+                EXIT;
+                return 0;
+        }
+       lml_tail = fset->fset_lml.fd_file->f_dentry->d_inode->i_size;
+       /* Nothing to truncate?*/
+       if (lml_tail == 0) {
+                write_unlock(&fset->fset_lml.fd_lock); 
+                EXIT;
+                return 0;
+       }
+       fset->fset_lml.fd_truncating = 1;
+       write_unlock(&fset->fset_lml.fd_lock); 
+
+       presto_last_record(&fset->fset_lml, &lml_last_recsize,
+                          &lml_last_rec, &recno, lml_tail);
+       /* Do we have a record to check? If not we have zeroes at the
+          beginning of the file. -SHP
+       */
+       if (lml_last_recsize != 0) {
+                local_offset = lml_last_rec - lml_last_recsize;
+                rc = presto_fread(fset->fset_lml.fd_file, (char *)&prefix,  
+                                        sizeof(prefix), &local_offset); 
+                if (rc != sizeof(prefix)) {
+                        EXIT;
+                        goto tr_out;
+                }
+       
+                if ( prefix.opcode != KML_OPCODE_NOOP ) {
+                        EXIT;
+                        rc = 0;
+                        /* We may have zeroes at the end of the file, should
+                           we clear them out? -SHP
+                        */
+                        goto tr_out;
+                }
+        } else 
+                lml_last_rec=0;
+
+        handle = presto_trans_start(fset, inode, KML_OPCODE_TRUNC);
+        if ( IS_ERR(handle) ) {
+                EXIT;
+                rc = -ENOMEM;
+                goto tr_out;
+        }
+
+        rc = izo_do_truncate(fset, fset->fset_lml.fd_file->f_dentry, 
+                                lml_last_rec - lml_last_recsize, lml_tail);
+        presto_trans_commit(fset, handle); 
+        if ( rc == 0 ) {
+                rc = 1;
+        }
+        EXIT;
+
+ tr_out:
+        CDEBUG(D_JOURNAL, "rc = %d\n", rc);
+        write_lock(&fset->fset_lml.fd_lock);
+        fset->fset_lml.fd_truncating = 0;
+        write_unlock(&fset->fset_lml.fd_lock);
+        return rc;
+}
+
+int presto_truncate_lml(struct presto_file_set *fset)
+{
+        int rc; 
+        ENTRY;
+        
+        while ( (rc = presto_truncate_lml_tail(fset)) > 0);
+        if ( rc < 0 && rc != -EALREADY) {
+                CERROR("truncate_lml error %d\n", rc); 
+        }
+        EXIT;
+        return rc;
+}
+
+int presto_clear_lml_close(struct presto_file_set *fset, loff_t lml_offset)
+{
+        int rc;
+        struct kml_prefix_hdr record;
+        loff_t offset = lml_offset;
+
+        ENTRY;
+
+        if ( presto_no_journal(fset) ) {
+                EXIT;
+                return 0;
+        }
+
+        CDEBUG(D_JOURNAL, "reading prefix: off %ld, size %Zd\n", 
+               (long)lml_offset, sizeof(record));
+        rc = presto_fread(fset->fset_lml.fd_file, (char *)&record,
+                          sizeof(record), &offset);
+
+        if ( rc != sizeof(record) ) {
+                CERROR("presto: clear_lml io error %d\n", rc); 
+                EXIT;
+                return -EIO;
+        }
+
+        /* overwrite the prefix */ 
+        CDEBUG(D_JOURNAL, "overwriting prefix: off %ld\n", (long)lml_offset);
+        record.opcode = KML_OPCODE_NOOP;
+        offset = lml_offset;
+        /* note: this does just a single transaction in the cache */
+        rc = presto_fwrite(fset->fset_lml.fd_file, (char *)(&record), 
+                              sizeof(record), &offset);
+        if ( rc != sizeof(record) ) {
+                EXIT;
+                return -EIO;
+        }
+
+        EXIT;
+        return 0; 
+}
+
+
+
+/* now a journal function for every operation */
+
+int presto_journal_setattr(struct rec_info *rec, struct presto_file_set *fset,
+                           struct dentry *dentry, struct presto_version *old_ver,
+                           struct izo_rollback_data *rb, struct iattr *iattr)
+{
+        int opcode = KML_OPCODE_SETATTR;
+        char *buffer, *path, *logrecord, record[316];
+        struct dentry *root;
+        __u32 uid, gid, mode, valid, flags, pathlen;
+        __u64 fsize, mtime, ctime;
+        int error, size;
+
+        ENTRY;
+        if ( presto_no_journal(fset) ) {
+                EXIT;
+                return 0;
+        }
+
+        if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) 
+            || ((dentry->d_parent != dentry) && d_unhashed(dentry))) {
+                EXIT;
+                return 0;
+        }
+
+        root = fset->fset_dentry;
+
+        BUFF_ALLOC(buffer, NULL);
+        path = presto_path(dentry, root, buffer, PAGE_SIZE);
+        pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
+        size =  sizeof(__u32) * current->group_info->ngroups + 
+                sizeof(struct kml_prefix_hdr) + sizeof(*old_ver) +
+                sizeof(valid) + sizeof(mode) + sizeof(uid) + sizeof(gid) +
+                sizeof(fsize) + sizeof(mtime) + sizeof(ctime) + sizeof(flags) +
+                sizeof(pathlen) + sizeof(*rb) + sizeof(struct kml_suffix);
+
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+
+        /* Only journal one kind of mtime, and not atime at all.  Also don't
+         * journal bogus data in iattr, to make the journal more compressible.
+         */
+        if (iattr->ia_valid & ATTR_MTIME_SET)
+                iattr->ia_valid = iattr->ia_valid | ATTR_MTIME;
+        valid = cpu_to_le32(iattr->ia_valid & ~(ATTR_ATIME | ATTR_MTIME_SET |
+                                                ATTR_ATIME_SET));
+        mode = iattr->ia_valid & ATTR_MODE ? cpu_to_le32(iattr->ia_mode): 0;
+        uid = iattr->ia_valid & ATTR_UID ? cpu_to_le32(iattr->ia_uid): 0;
+        gid = iattr->ia_valid & ATTR_GID ? cpu_to_le32(iattr->ia_gid): 0;
+        fsize = iattr->ia_valid & ATTR_SIZE ? cpu_to_le64(iattr->ia_size): 0;
+        mtime = iattr->ia_valid & ATTR_MTIME ? cpu_to_le64(iattr->ia_mtime.tv_sec): 0;
+        ctime = iattr->ia_valid & ATTR_CTIME ? cpu_to_le64(iattr->ia_ctime.tv_sec): 0;
+        flags = iattr->ia_valid & ATTR_ATTR_FLAG ?
+                cpu_to_le32(iattr->ia_attr_flags): 0;
+
+        rec->is_kml = 1;
+        rec->size = size + size_round(le32_to_cpu(pathlen));
+
+        logrecord = journal_log_prefix(record, opcode, rec);
+        logrecord = log_version(logrecord, old_ver);
+        logrecord = logit(logrecord, &valid, sizeof(valid));
+        logrecord = logit(logrecord, &mode, sizeof(mode));
+        logrecord = logit(logrecord, &uid, sizeof(uid));
+        logrecord = logit(logrecord, &gid, sizeof(gid));
+        logrecord = logit(logrecord, &fsize, sizeof(fsize));
+        logrecord = logit(logrecord, &mtime, sizeof(mtime));
+        logrecord = logit(logrecord, &ctime, sizeof(ctime));
+        logrecord = logit(logrecord, &flags, sizeof(flags));
+        logrecord = log_rollback(logrecord, rb);
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
+
+        error = presto_log(fset, rec, record, size,
+                           path, size_round(le32_to_cpu(pathlen)),
+                           NULL, 0, NULL, 0);
+
+        BUFF_FREE(buffer);
+        EXIT;
+        return error;
+}
+
+int presto_get_fileid(int minor, struct presto_file_set *fset,
+                      struct dentry *dentry)
+{
+        int opcode = KML_OPCODE_GET_FILEID;
+        struct rec_info rec;
+        char *buffer, *path, *logrecord, record[4096]; /*include path*/
+        struct dentry *root;
+        __u32 uid, gid, pathlen;
+        int error, size;
+        struct kml_suffix *suffix;
+
+        ENTRY;
+
+        root = fset->fset_dentry;
+
+        uid = cpu_to_le32(dentry->d_inode->i_uid);
+        gid = cpu_to_le32(dentry->d_inode->i_gid);
+        BUFF_ALLOC(buffer, NULL);
+        path = presto_path(dentry, root, buffer, PAGE_SIZE);
+        pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
+        size =  sizeof(__u32) * current->group_info->ngroups + 
+                sizeof(struct kml_prefix_hdr) + sizeof(pathlen) +
+                size_round(le32_to_cpu(pathlen)) +
+                sizeof(struct kml_suffix);
+
+        CDEBUG(D_FILE, "kml size: %d\n", size);
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+
+        memset(&rec, 0, sizeof(rec));
+        rec.is_kml = 1;
+        rec.size = size;
+
+        logrecord = journal_log_prefix(record, opcode, &rec);
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = logit(logrecord, path, size_round(le32_to_cpu(pathlen)));
+        suffix = (struct kml_suffix *)logrecord;
+        logrecord = journal_log_suffix(logrecord, record, fset, dentry, &rec);
+        /* journal_log_suffix expects journal_log to set this */
+        suffix->recno = 0;
+
+        CDEBUG(D_FILE, "actual kml size: %Zd\n", logrecord - record);
+        CDEBUG(D_FILE, "get fileid: uid %d, gid %d, path: %s\n", uid, gid,path);
+
+        error = izo_upc_get_fileid(minor, size, record, 
+                                   size_round(le32_to_cpu(pathlen)), path,
+                                   fset->fset_name);
+
+        BUFF_FREE(buffer);
+        EXIT;
+        return error;
+}
+
+int presto_journal_create(struct rec_info *rec, struct presto_file_set *fset,
+                          struct dentry *dentry,
+                          struct presto_version *tgt_dir_ver,
+                          struct presto_version *new_file_ver, int mode)
+{
+        int opcode = KML_OPCODE_CREATE;
+        char *buffer, *path, *logrecord, record[292];
+        struct dentry *root;
+        __u32 uid, gid, lmode, pathlen;
+        int error, size;
+
+        ENTRY;
+        if ( presto_no_journal(fset) ) {
+                EXIT;
+                return 0;
+        }
+
+        root = fset->fset_dentry;
+
+        uid = cpu_to_le32(dentry->d_inode->i_uid);
+        gid = cpu_to_le32(dentry->d_inode->i_gid);
+        lmode = cpu_to_le32(mode);
+        BUFF_ALLOC(buffer, NULL);
+        path = presto_path(dentry, root, buffer, PAGE_SIZE);
+        pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
+        size =  sizeof(__u32) * current->group_info->ngroups + 
+                sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
+                sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
+                sizeof(struct kml_suffix);
+
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+
+        rec->is_kml = 1;
+        rec->size = size + size_round(le32_to_cpu(pathlen));
+
+        logrecord = journal_log_prefix(record, opcode, rec);
+        logrecord = log_version(logrecord, tgt_dir_ver);
+        logrecord = log_dentry_version(logrecord, dentry->d_parent);
+        logrecord = log_version(logrecord, new_file_ver);
+        logrecord = logit(logrecord, &lmode, sizeof(lmode));
+        logrecord = logit(logrecord, &uid, sizeof(uid));
+        logrecord = logit(logrecord, &gid, sizeof(gid));
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
+
+        error = presto_log(fset, rec, record, size,
+                           path, size_round(le32_to_cpu(pathlen)),
+                           NULL, 0, NULL, 0);
+
+        BUFF_FREE(buffer);
+        EXIT;
+        return error;
+}
+
+int presto_journal_symlink(struct rec_info *rec, struct presto_file_set *fset,
+                           struct dentry *dentry, const char *target,
+                           struct presto_version *tgt_dir_ver,
+                           struct presto_version *new_link_ver)
+{
+        int opcode = KML_OPCODE_SYMLINK;
+        char *buffer, *path, *logrecord, record[292];
+        struct dentry *root;
+        __u32 uid, gid, pathlen;
+        __u32 targetlen = cpu_to_le32(strlen(target));
+        int error, size;
+
+        ENTRY;
+        if ( presto_no_journal(fset) ) {
+                EXIT;
+                return 0;
+        }
+
+        root = fset->fset_dentry;
+
+        uid = cpu_to_le32(dentry->d_inode->i_uid);
+        gid = cpu_to_le32(dentry->d_inode->i_gid);
+
+        BUFF_ALLOC(buffer, NULL);
+        path = presto_path(dentry, root, buffer, PAGE_SIZE);
+        pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
+        size =  sizeof(__u32) * current->group_info->ngroups + 
+                sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
+                sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
+                sizeof(targetlen) + sizeof(struct kml_suffix);
+
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+
+        rec->is_kml = 1;
+        rec->size = size + size_round(le32_to_cpu(pathlen)) +
+                size_round(le32_to_cpu(targetlen));
+
+        logrecord = journal_log_prefix(record, opcode, rec);
+        logrecord = log_version(logrecord, tgt_dir_ver);
+        logrecord = log_dentry_version(logrecord, dentry->d_parent);
+        logrecord = log_version(logrecord, new_link_ver);
+        logrecord = logit(logrecord, &uid, sizeof(uid));
+        logrecord = logit(logrecord, &gid, sizeof(gid));
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = logit(logrecord, &targetlen, sizeof(targetlen));
+        logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
+
+        error = presto_log(fset, rec, record, size,
+                           path, size_round(le32_to_cpu(pathlen)),
+                           target, size_round(le32_to_cpu(targetlen)),
+                           NULL, 0);
+
+        BUFF_FREE(buffer);
+        EXIT;
+        return error;
+}
+
+int presto_journal_mkdir(struct rec_info *rec, struct presto_file_set *fset,
+                         struct dentry *dentry,
+                         struct presto_version *tgt_dir_ver,
+                         struct presto_version *new_dir_ver, int mode)
+{
+        int opcode = KML_OPCODE_MKDIR;
+        char *buffer, *path, *logrecord, record[292];
+        struct dentry *root;
+        __u32 uid, gid, lmode, pathlen;
+        int error, size;
+
+        ENTRY;
+        if ( presto_no_journal(fset) ) {
+                EXIT;
+                return 0;
+        }
+
+        root = fset->fset_dentry;
+
+        uid = cpu_to_le32(dentry->d_inode->i_uid);
+        gid = cpu_to_le32(dentry->d_inode->i_gid);
+        lmode = cpu_to_le32(mode);
+
+        BUFF_ALLOC(buffer, NULL);
+        path = presto_path(dentry, root, buffer, PAGE_SIZE);
+        pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
+        size = sizeof(__u32) * current->group_info->ngroups + 
+                sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
+                sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
+                sizeof(struct kml_suffix);
+
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+
+        rec->is_kml = 1;
+        rec->size = size + size_round(le32_to_cpu(pathlen));
+        logrecord = journal_log_prefix(record, opcode, rec);
+
+        logrecord = log_version(logrecord, tgt_dir_ver);
+        logrecord = log_dentry_version(logrecord, dentry->d_parent);
+        logrecord = log_version(logrecord, new_dir_ver);
+        logrecord = logit(logrecord, &lmode, sizeof(lmode));
+        logrecord = logit(logrecord, &uid, sizeof(uid));
+        logrecord = logit(logrecord, &gid, sizeof(gid));
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
+
+        error = presto_log(fset, rec, record, size,
+                           path, size_round(le32_to_cpu(pathlen)),
+                           NULL, 0, NULL, 0);
+
+        BUFF_FREE(buffer);
+        EXIT;
+        return error;
+}
+
+
+int
+presto_journal_rmdir(struct rec_info *rec, struct presto_file_set *fset,
+                     struct dentry *dir, struct presto_version *tgt_dir_ver,
+                     struct presto_version *old_dir_ver,
+                     struct izo_rollback_data *rb, int len, const char *name)
+{
+        int opcode = KML_OPCODE_RMDIR;
+        char *buffer, *path, *logrecord, record[316];
+        __u32 pathlen, llen;
+        struct dentry *root;
+        int error, size;
+
+        ENTRY;
+        if ( presto_no_journal(fset) ) {
+                EXIT;
+                return 0;
+        }
+
+        root = fset->fset_dentry;
+
+        llen = cpu_to_le32(len);
+        BUFF_ALLOC(buffer, NULL);
+        path = presto_path(dir, root, buffer, PAGE_SIZE);
+        pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
+        size =  sizeof(__u32) * current->group_info->ngroups + 
+                sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
+                sizeof(pathlen) + sizeof(llen) + sizeof(*rb) +
+                sizeof(struct kml_suffix);
+
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+
+        CDEBUG(D_JOURNAL, "path: %s (%d), name: %s (%d), size %d\n",
+               path, pathlen, name, len, size);
+
+        rec->is_kml = 1;
+        rec->size = size + size_round(le32_to_cpu(pathlen)) + 
+                size_round(len);
+
+        logrecord = journal_log_prefix(record, opcode, rec);
+        logrecord = log_version(logrecord, tgt_dir_ver);
+        logrecord = log_dentry_version(logrecord, dir);
+        logrecord = log_version(logrecord, old_dir_ver);
+        logrecord = logit(logrecord, rb, sizeof(*rb));
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = logit(logrecord, &llen, sizeof(llen));
+        logrecord = journal_log_suffix(logrecord, record, fset, dir, rec);
+        error = presto_log(fset, rec, record, size,
+                           path, size_round(le32_to_cpu(pathlen)),
+                           name, size_round(len),
+                           NULL, 0);
+
+        BUFF_FREE(buffer);
+        EXIT;
+        return error;
+}
+
+
+int
+presto_journal_mknod(struct rec_info *rec, struct presto_file_set *fset,
+                     struct dentry *dentry, struct presto_version *tgt_dir_ver,
+                     struct presto_version *new_node_ver, int mode,
+                     int dmajor, int dminor )
+{
+        int opcode = KML_OPCODE_MKNOD;
+        char *buffer, *path, *logrecord, record[292];
+        struct dentry *root;
+        __u32 uid, gid, lmode, lmajor, lminor, pathlen;
+        int error, size;
+
+        ENTRY;
+        if ( presto_no_journal(fset) ) {
+                EXIT;
+                return 0;
+        }
+
+        root = fset->fset_dentry;
+
+        uid = cpu_to_le32(dentry->d_inode->i_uid);
+        gid = cpu_to_le32(dentry->d_inode->i_gid);
+        lmode = cpu_to_le32(mode);
+        lmajor = cpu_to_le32(dmajor);
+        lminor = cpu_to_le32(dminor);
+
+        BUFF_ALLOC(buffer, NULL);
+        path = presto_path(dentry, root, buffer, PAGE_SIZE);
+        pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
+        size = sizeof(__u32) * current->group_info->ngroups + 
+                sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
+                sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(lmajor) +
+                sizeof(lminor) + sizeof(pathlen) +
+                sizeof(struct kml_suffix);
+
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+
+        rec->is_kml = 1;
+        rec->size = size + size_round(le32_to_cpu(pathlen));
+
+        logrecord = journal_log_prefix(record, opcode, rec);
+        logrecord = log_version(logrecord, tgt_dir_ver);
+        logrecord = log_dentry_version(logrecord, dentry->d_parent);
+        logrecord = log_version(logrecord, new_node_ver);
+        logrecord = logit(logrecord, &lmode, sizeof(lmode));
+        logrecord = logit(logrecord, &uid, sizeof(uid));
+        logrecord = logit(logrecord, &gid, sizeof(gid));
+        logrecord = logit(logrecord, &lmajor, sizeof(lmajor));
+        logrecord = logit(logrecord, &lminor, sizeof(lminor));
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
+
+        error = presto_log(fset, rec, record, size,
+                           path, size_round(le32_to_cpu(pathlen)),
+                           NULL, 0, NULL, 0);
+
+        BUFF_FREE(buffer);
+        EXIT;
+        return error;
+}
+
+int
+presto_journal_link(struct rec_info *rec, struct presto_file_set *fset,
+                    struct dentry *src, struct dentry *tgt,
+                    struct presto_version *tgt_dir_ver,
+                    struct presto_version *new_link_ver)
+{
+        int opcode = KML_OPCODE_LINK;
+        char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292];
+        __u32 pathlen, srcpathlen;
+        struct dentry *root;
+        int error, size;
+
+        ENTRY;
+        if ( presto_no_journal(fset) ) {
+                EXIT;
+                return 0;
+        }
+
+        root = fset->fset_dentry;
+
+        BUFF_ALLOC(srcbuffer, NULL);
+        srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE);
+        srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath));
+
+        BUFF_ALLOC(buffer, srcbuffer);
+        path = presto_path(tgt, root, buffer, PAGE_SIZE);
+        pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
+        size =  sizeof(__u32) * current->group_info->ngroups + 
+                sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
+                sizeof(srcpathlen) + sizeof(pathlen) +
+                sizeof(struct kml_suffix);
+
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+
+        rec->is_kml = 1;
+        rec->size = size + size_round(le32_to_cpu(pathlen)) + 
+                size_round(le32_to_cpu(srcpathlen));
+
+        logrecord = journal_log_prefix(record, opcode, rec);
+        logrecord = log_version(logrecord, tgt_dir_ver);
+        logrecord = log_dentry_version(logrecord, tgt->d_parent);
+        logrecord = log_version(logrecord, new_link_ver);
+        logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen));
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec);
+
+        error = presto_log(fset, rec, record, size,
+                           srcpath, size_round(le32_to_cpu(srcpathlen)),
+                           path, size_round(le32_to_cpu(pathlen)),
+                           NULL, 0);
+
+        BUFF_FREE(srcbuffer);
+        BUFF_FREE(buffer);
+        EXIT;
+        return error;
+}
+
+
+int presto_journal_rename(struct rec_info *rec, struct presto_file_set *fset,
+                          struct dentry *src, struct dentry *tgt,
+                          struct presto_version *src_dir_ver,
+                          struct presto_version *tgt_dir_ver)
+{
+        int opcode = KML_OPCODE_RENAME;
+        char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292];
+        __u32 pathlen, srcpathlen;
+        struct dentry *root;
+        int error, size;
+
+        ENTRY;
+        if ( presto_no_journal(fset) ) {
+                EXIT;
+                return 0;
+        }
+
+        root = fset->fset_dentry;
+
+        BUFF_ALLOC(srcbuffer, NULL);
+        srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE);
+        srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath));
+
+        BUFF_ALLOC(buffer, srcbuffer);
+        path = presto_path(tgt, root, buffer, PAGE_SIZE);
+        pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
+        size =  sizeof(__u32) * current->group_info->ngroups + 
+                sizeof(struct kml_prefix_hdr) + 4 * sizeof(*src_dir_ver) +
+                sizeof(srcpathlen) + sizeof(pathlen) +
+                sizeof(struct kml_suffix);
+
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+
+        rec->is_kml = 1;
+        rec->size = size + size_round(le32_to_cpu(pathlen)) + 
+                size_round(le32_to_cpu(srcpathlen));
+
+        logrecord = journal_log_prefix(record, opcode, rec);
+        logrecord = log_version(logrecord, src_dir_ver);
+        logrecord = log_dentry_version(logrecord, src->d_parent);
+        logrecord = log_version(logrecord, tgt_dir_ver);
+        logrecord = log_dentry_version(logrecord, tgt->d_parent);
+        logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen));
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec);
+
+        error = presto_log(fset, rec, record, size,
+                           srcpath, size_round(le32_to_cpu(srcpathlen)),
+                           path, size_round(le32_to_cpu(pathlen)),
+                           NULL, 0);
+
+        BUFF_FREE(buffer);
+        BUFF_FREE(srcbuffer);
+        EXIT;
+        return error;
+}
+
+int presto_journal_unlink(struct rec_info *rec, struct presto_file_set *fset,
+                          struct dentry *dir, struct presto_version *tgt_dir_ver,
+                          struct presto_version *old_file_ver,
+                          struct izo_rollback_data *rb, struct dentry *dentry,
+                          char *old_target, int old_targetlen)
+{
+        int opcode = KML_OPCODE_UNLINK;
+        char *buffer, *path, *logrecord, record[316];
+        const char *name;
+        __u32 pathlen, llen;
+        struct dentry *root;
+        int error, size, len;
+
+        ENTRY;
+        if ( presto_no_journal(fset) ) {
+                EXIT;
+                return 0;
+        }
+
+        root = fset->fset_dentry;
+
+        name = dentry->d_name.name;
+        len = dentry->d_name.len;
+
+        llen = cpu_to_le32(len);
+        BUFF_ALLOC(buffer, NULL);
+        path = presto_path(dir, root, buffer, PAGE_SIZE);
+        pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
+        size = sizeof(__u32) * current->group_info->ngroups + 
+                sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
+                sizeof(pathlen) + sizeof(llen) + sizeof(*rb) +
+                sizeof(old_targetlen) + sizeof(struct kml_suffix);
+
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+
+        rec->is_kml = 1;
+        rec->size = size + size_round(le32_to_cpu(pathlen)) + size_round(len) +
+                size_round(old_targetlen);
+
+        logrecord = journal_log_prefix(record, opcode, rec);
+        logrecord = log_version(logrecord, tgt_dir_ver);
+        logrecord = log_dentry_version(logrecord, dir);
+        logrecord = log_version(logrecord, old_file_ver);
+        logrecord = log_rollback(logrecord, rb);
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = logit(logrecord, &llen, sizeof(llen));
+        logrecord = logit(logrecord, &old_targetlen, sizeof(old_targetlen));
+        logrecord = journal_log_suffix(logrecord, record, fset, dir, rec);
+
+        error = presto_log(fset, rec, record, size,
+                           path, size_round(le32_to_cpu(pathlen)),
+                           name, size_round(len),
+                           old_target, size_round(old_targetlen));
+
+        BUFF_FREE(buffer);
+        EXIT;
+        return error;
+}
+
+int
+presto_journal_close(struct rec_info *rec, struct presto_file_set *fset,
+                     struct presto_file_data *fd, struct dentry *dentry,
+                     struct presto_version *old_file_ver,
+                     struct presto_version *new_file_ver)
+{
+        int opcode = KML_OPCODE_CLOSE;
+        char *buffer, *path, *logrecord, record[316];
+        struct dentry *root;
+        int error, size, i;
+        __u32 pathlen, generation;
+        __u64 ino;
+        __u32 open_fsuid;
+        __u32 open_fsgid;
+        __u32 open_ngroups;
+        __u32 open_groups[NGROUPS_SMALL];
+        __u32 open_mode;
+        __u32 open_uid;
+        __u32 open_gid;
+
+        ENTRY;
+
+        if ( presto_no_journal(fset) ) {
+                EXIT;
+                return 0;
+        }
+
+        if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) 
+            || ((dentry->d_parent != dentry) && d_unhashed(dentry))) {
+                EXIT;
+                return 0;
+        }
+
+        root = fset->fset_dentry;
+
+        if (fd) {
+                open_ngroups = fd->fd_ngroups;
+                for (i = 0; i < fd->fd_ngroups; i++)
+                        open_groups[i] = (__u32) fd->fd_groups[i];
+                open_mode = fd->fd_mode;
+                open_uid = fd->fd_uid;
+                open_gid = fd->fd_gid;
+                open_fsuid = fd->fd_fsuid;
+                open_fsgid = fd->fd_fsgid;
+        } else {
+                open_ngroups = current->group_info->ngroups;
+                for (i=0; i<current->group_info->ngroups; i++)
+                        open_groups[i] =  (__u32) GROUP_AT(current->group_info,i); 
+                open_mode = dentry->d_inode->i_mode;
+                open_uid = dentry->d_inode->i_uid;
+                open_gid = dentry->d_inode->i_gid;
+                open_fsuid = current->fsuid;
+                open_fsgid = current->fsgid;
+        }
+        BUFF_ALLOC(buffer, NULL);
+        path = presto_path(dentry, root, buffer, PAGE_SIZE);
+        pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
+        ino = cpu_to_le64(dentry->d_inode->i_ino);
+        generation = cpu_to_le32(dentry->d_inode->i_generation);
+        size =  sizeof(__u32) * open_ngroups +
+                sizeof(open_mode) + sizeof(open_uid) + sizeof(open_gid) +
+                sizeof(struct kml_prefix_hdr) + sizeof(*old_file_ver) +
+                sizeof(*new_file_ver) + sizeof(ino) + sizeof(generation) +
+                sizeof(pathlen) + sizeof(struct kml_suffix);
+
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+
+        rec->is_kml = 1;
+        rec->size = size + size_round(le32_to_cpu(pathlen));
+
+        logrecord = journal_log_prefix_with_groups_and_ids(
+                record, opcode, rec, open_ngroups, open_groups,
+                open_fsuid, open_fsgid);
+        logrecord = logit(logrecord, &open_mode, sizeof(open_mode));
+        logrecord = logit(logrecord, &open_uid, sizeof(open_uid));
+        logrecord = logit(logrecord, &open_gid, sizeof(open_gid));
+        logrecord = log_version(logrecord, old_file_ver);
+        logrecord = log_version(logrecord, new_file_ver);
+        logrecord = logit(logrecord, &ino, sizeof(ino));
+        logrecord = logit(logrecord, &generation, sizeof(generation));
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
+
+        error = presto_log(fset, rec, record, size,
+                           path, size_round(le32_to_cpu(pathlen)),
+                           NULL, 0, NULL, 0);
+        BUFF_FREE(buffer);
+
+        EXIT;
+        return error;
+}
+
+int presto_rewrite_close(struct rec_info *rec, struct presto_file_set *fset, 
+                         char *path, __u32 pathlen, 
+                         int ngroups, __u32 *groups, 
+                         __u64 ino,     __u32 generation, 
+                         struct presto_version *new_file_ver)
+{
+        int opcode = KML_OPCODE_CLOSE;
+        char *logrecord, record[292];
+        struct dentry *root;
+        int error, size;
+
+        ENTRY;
+
+        if ( presto_no_journal(fset) ) {
+                EXIT;
+                return 0;
+        }
+
+        root = fset->fset_dentry;
+
+        size =  sizeof(__u32) * ngroups + 
+                sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) +
+                sizeof(ino) + sizeof(generation) + 
+                sizeof(le32_to_cpu(pathlen)) +
+                sizeof(struct kml_suffix);
+
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+
+        rec->is_kml = 1;
+        rec->size = size + size_round(le32_to_cpu(pathlen));
+
+        logrecord = journal_log_prefix_with_groups(record, opcode, rec,
+                                                   ngroups, groups);
+        logrecord = log_version(logrecord, new_file_ver);
+        logrecord = logit(logrecord, &ino, sizeof(ino));
+        logrecord = logit(logrecord, &generation, sizeof(generation));
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = journal_log_suffix(logrecord, record, fset, NULL, rec);
+
+        error = presto_log(fset, rec, record, size,
+                           path, size_round(le32_to_cpu(pathlen)),
+                           NULL, 0, NULL, 0);
+
+        EXIT;
+        return error;
+}
+
+
+/* write closes for the local close records in the LML */ 
+int presto_complete_lml(struct presto_file_set *fset)
+{
+        __u32 groups[NGROUPS_SMALL];
+        loff_t lml_offset;
+        loff_t read_offset; 
+        char *buffer;
+        void *handle;
+        struct rec_info rec;
+        struct close_rec { 
+                struct presto_version new_file_ver;
+                __u64 ino;
+                __u32 generation;
+                __u32 pathlen;
+                __u64 remote_ino;
+                __u32 remote_generation;
+                __u32 remote_version;
+                __u64 lml_offset;
+        } close_rec; 
+        struct file *file = fset->fset_lml.fd_file;
+        struct kml_prefix_hdr prefix;
+        int rc = 0;
+        ENTRY;
+
+        lml_offset = 0; 
+ again: 
+        if (lml_offset >= file->f_dentry->d_inode->i_size) {
+                EXIT;
+                return rc;
+        }
+
+        read_offset = lml_offset;
+        rc = presto_fread(file, (char *)&prefix,
+                          sizeof(prefix), &read_offset);
+        if ( rc != sizeof(prefix) ) {
+                EXIT;
+                CERROR("presto_complete_lml: ioerror - 1, tell Peter\n");
+                return -EIO;
+        }
+
+        if ( prefix.opcode == KML_OPCODE_NOOP ) {
+                lml_offset += prefix.len; 
+                goto again; 
+        }
+
+        rc = presto_fread(file, (char *)groups, 
+                          prefix.ngroups * sizeof(__u32), &read_offset); 
+        if ( rc != prefix.ngroups * sizeof(__u32) ) {
+                EXIT;
+                CERROR("presto_complete_lml: ioerror - 2, tell Peter\n");
+                return -EIO;
+        }
+
+        rc = presto_fread(file, (char *)&close_rec, 
+                          sizeof(close_rec), &read_offset); 
+        if ( rc != sizeof(close_rec) ) {
+                EXIT;
+                CERROR("presto_complete_lml: ioerror - 3, tell Peter\n");
+                return -EIO;
+        }
+
+        /* is this a backfetch or a close record? */ 
+        if ( le64_to_cpu(close_rec.remote_ino) != 0 ) { 
+                lml_offset += prefix.len;
+                goto again; 
+        }
+
+        BUFF_ALLOC(buffer, NULL);
+        rc = presto_fread(file, (char *)buffer, 
+                          le32_to_cpu(close_rec.pathlen), &read_offset); 
+        if ( rc != le32_to_cpu(close_rec.pathlen) ) {
+                EXIT;
+                CERROR("presto_complete_lml: ioerror - 4, tell Peter\n");
+                return -EIO;
+        }
+        
+        handle = presto_trans_start(fset, file->f_dentry->d_inode, 
+                                    KML_OPCODE_RELEASE);
+        if ( IS_ERR(handle) ) {
+                EXIT;
+                return -ENOMEM; 
+        }
+
+        rc = presto_clear_lml_close(fset, lml_offset); 
+        if ( rc ) {
+                CERROR("error during clearing: %d\n", rc);
+                presto_trans_commit(fset, handle);
+                EXIT; 
+                return rc; 
+        }
+
+        rc = presto_rewrite_close(&rec, fset, buffer, close_rec.pathlen, 
+                                  prefix.ngroups, groups, 
+                                  close_rec.ino, close_rec.generation,
+                                  &close_rec.new_file_ver); 
+        if ( rc ) {
+                CERROR("error during rewrite close: %d\n", rc);
+                presto_trans_commit(fset, handle);
+                EXIT; 
+                return rc; 
+        }
+
+        presto_trans_commit(fset, handle); 
+        if ( rc ) { 
+                CERROR("error during truncation: %d\n", rc);
+                EXIT; 
+                return rc;
+        }
+        
+        lml_offset += prefix.len; 
+        CDEBUG(D_JOURNAL, "next LML record at: %ld\n", (long)lml_offset);
+        goto again;
+
+        EXIT;
+        return -EINVAL;
+}
+
+
+#ifdef CONFIG_FS_EXT_ATTR
+/* Journal an ea operation. A NULL buffer implies the attribute is 
+ * getting deleted. In this case we simply change the opcode, but nothing
+ * else is affected.
+ */
+int presto_journal_set_ext_attr (struct rec_info *rec, 
+                                 struct presto_file_set *fset, 
+                                 struct dentry *dentry, 
+                                 struct presto_version *ver, const char *name, 
+                                 const char *buffer, int buffer_len, 
+                                 int flags) 
+{ 
+        int opcode = (buffer == NULL) ? 
+                     KML_OPCODE_DELEXTATTR : 
+                     KML_OPCODE_SETEXTATTR ;
+        char *temp, *path, *logrecord, record[292];
+        struct dentry *root;
+        int error, size;
+        __u32 namelen=cpu_to_le32(strnlen(name,PRESTO_EXT_ATTR_NAME_MAX));
+        __u32 buflen=(buffer != NULL)? cpu_to_le32(buffer_len): cpu_to_le32(0);
+        __u32 mode, pathlen;
+
+        ENTRY;
+        if ( presto_no_journal(fset) ) {
+                EXIT;
+                return 0;
+        }
+
+        if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) 
+            || ((dentry->d_parent != dentry) && d_unhashed(dentry))) {
+                EXIT;
+                return 0;
+        }
+
+        root = fset->fset_dentry;
+
+        BUFF_ALLOC(temp, NULL);
+        path = presto_path(dentry, root, temp, PAGE_SIZE);
+        pathlen = cpu_to_le32(MYPATHLEN(temp, path));
+
+        flags=cpu_to_le32(flags);
+        /* Ugly, but needed. posix ACLs change the mode without using
+         * setattr, we need to record these changes. The EA code per se
+         * is not really affected.
+         */
+        mode=cpu_to_le32(dentry->d_inode->i_mode);
+
+        size =  sizeof(__u32) * current->group_info->ngroups + 
+                sizeof(struct kml_prefix_hdr) + 
+                2 * sizeof(struct presto_version) +
+                sizeof(flags) + sizeof(mode) + sizeof(namelen) + 
+                sizeof(buflen) + sizeof(pathlen) + 
+                sizeof(struct kml_suffix);
+
+        if ( size > sizeof(record) )
+                CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
+
+        rec->is_kml = 1;
+        /* Make space for a path, a attr name and value*/
+        /* We use the buflen instead of buffer_len to make sure that we 
+         * journal the right length. This may be a little paranoid, but
+         * with 64 bits round the corner, I would rather be safe than sorry!
+         * Also this handles deletes with non-zero buffer_lengths correctly.
+         * SHP
+         */
+        rec->size = size + size_round(le32_to_cpu(pathlen)) +
+                    size_round(le32_to_cpu(namelen)) + 
+                    size_round(le32_to_cpu(buflen));
+
+        logrecord = journal_log_prefix(record, opcode, rec);
+        logrecord = log_version(logrecord, ver);
+        logrecord = log_dentry_version(logrecord, dentry);
+        logrecord = logit(logrecord, &flags, sizeof(flags));
+        logrecord = logit(logrecord, &mode, sizeof(flags));
+        logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
+        logrecord = logit(logrecord, &namelen, sizeof(namelen));
+        logrecord = logit(logrecord, &buflen, sizeof(buflen));
+        logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
+
+        error = presto_log(fset, rec, record, size,
+                           path, size_round(le32_to_cpu(pathlen)),
+                           name, size_round(le32_to_cpu(namelen)),
+                           buffer, size_round(le32_to_cpu(buflen)));
+
+        BUFF_FREE(temp);
+        EXIT;
+        return error;
+}
+#endif
diff --git a/fs/intermezzo/journal_ext2.c b/fs/intermezzo/journal_ext2.c
new file mode 100644 (file)
index 0000000..d1cb293
--- /dev/null
@@ -0,0 +1,90 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/string.h>
+#include <linux/ext2_fs.h> 
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+#if defined(CONFIG_EXT2_FS)
+
+/* EXT2 has no journalling, so these functions do nothing */
+static loff_t presto_e2_freespace(struct presto_cache *cache,
+                                         struct super_block *sb)
+{
+        unsigned long freebl = le32_to_cpu(EXT2_SB(sb)->s_es->s_free_blocks_count);
+        unsigned long avail =   freebl - le32_to_cpu(EXT2_SB(sb)->s_es->s_r_blocks_count);
+       return (avail <<  EXT2_BLOCK_SIZE_BITS(sb));
+}
+
+/* start the filesystem journal operations */
+static void *presto_e2_trans_start(struct presto_file_set *fset, struct inode *inode, int op)
+{
+        __u32 avail_kmlblocks;
+
+        if ( presto_no_journal(fset) ||
+             strcmp(fset->fset_cache->cache_type, "ext2"))
+                return NULL;
+
+        avail_kmlblocks = EXT2_SB(inode->i_sb)->s_es->s_free_blocks_count;
+        
+        if ( avail_kmlblocks < 3 ) {
+                return ERR_PTR(-ENOSPC);
+        }
+        
+        if (  (op != KML_OPCODE_UNLINK && op != KML_OPCODE_RMDIR)
+              && avail_kmlblocks < 6 ) {
+                return ERR_PTR(-ENOSPC);
+        }            
+       return (void *) 1;
+}
+
+static void presto_e2_trans_commit(struct presto_file_set *fset, void *handle)
+{
+        do {} while (0);
+}
+
+static int presto_e2_has_all_data(struct inode *inode)
+{
+        BUG();
+        return 0;
+}
+
+struct journal_ops presto_ext2_journal_ops = {
+        .tr_all_data            = presto_e2_has_all_data,
+        .tr_avail               = presto_e2_freespace,
+        .tr_start               = presto_e2_trans_start,
+        .tr_commit              = presto_e2_trans_commit,
+        .tr_journal_data        = NULL
+};
+
+#endif /* CONFIG_EXT2_FS */
diff --git a/fs/intermezzo/journal_ext3.c b/fs/intermezzo/journal_ext3.c
new file mode 100644 (file)
index 0000000..b847b61
--- /dev/null
@@ -0,0 +1,283 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *  Copyright (C) 2000 Los Alamos National Laboratory
+ *  Copyright (C) 2000 TurboLinux, Inc.
+ *  Copyright (C) 2001 Mountain View Data, Inc.
+ *  Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/param.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/string.h>
+#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#endif
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
+
+#define MAX_PATH_BLOCKS(inode) (PATH_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb))
+#define MAX_NAME_BLOCKS(inode) (NAME_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb))
+
+/* space requirements: 
+   presto_do_truncate: 
+        used to truncate the KML forward to next fset->chunksize boundary
+          - zero partial block
+          - update inode
+   presto_write_record: 
+        write header (< one block) 
+        write one path (< MAX_PATHLEN) 
+        possibly write another path (< MAX_PATHLEN)
+        write suffix (< one block) 
+   presto_update_last_rcvd
+        write one block
+*/
+
+static loff_t presto_e3_freespace(struct presto_cache *cache,
+                                         struct super_block *sb)
+{
+        loff_t freebl = le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count);
+        loff_t avail =   freebl - 
+                le32_to_cpu(EXT3_SB(sb)->s_es->s_r_blocks_count);
+        return (avail <<  EXT3_BLOCK_SIZE_BITS(sb));
+}
+
+/* start the filesystem journal operations */
+static void *presto_e3_trans_start(struct presto_file_set *fset, 
+                                   struct inode *inode, 
+                                   int op)
+{
+        int jblocks;
+        int trunc_blks, one_path_blks, extra_path_blks, 
+                extra_name_blks, lml_blks; 
+        __u32 avail_kmlblocks;
+        handle_t *handle;
+
+        if ( presto_no_journal(fset) ||
+             strcmp(fset->fset_cache->cache_type, "ext3"))
+          {
+            CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n",
+                   fset->fset_cache->cache_type);
+            return NULL;
+          }
+
+        avail_kmlblocks = EXT3_SB(inode->i_sb)->s_es->s_free_blocks_count;
+        
+        if ( avail_kmlblocks < 3 ) {
+                return ERR_PTR(-ENOSPC);
+        }
+        
+        if (  (op != KML_OPCODE_UNLINK && op != KML_OPCODE_RMDIR)
+              && avail_kmlblocks < 6 ) {
+                return ERR_PTR(-ENOSPC);
+        }            
+
+        /* Need journal space for:
+             at least three writes to KML (two one block writes, one a path) 
+             possibly a second name (unlink, rmdir)
+             possibly a second path (symlink, rename)
+             a one block write to the last rcvd file 
+        */
+
+        trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1; 
+        one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3;
+        lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2;
+        extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode); 
+        extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode); 
+
+        /* additional blocks appear for "two pathname" operations
+           and operations involving the LML records 
+        */
+        switch (op) {
+        case KML_OPCODE_TRUNC:
+                jblocks = one_path_blks + extra_name_blks + trunc_blks
+                        + EXT3_DELETE_TRANS_BLOCKS; 
+                break;
+        case KML_OPCODE_KML_TRUNC:
+                /* Hopefully this is a little better, but I'm still mostly
+                 * guessing here. */
+                /* unlink 1 */
+                jblocks = extra_name_blks + trunc_blks +
+                        EXT3_DELETE_TRANS_BLOCKS + 2; 
+
+                /* unlink 2 */
+                jblocks += extra_name_blks + trunc_blks +
+                        EXT3_DELETE_TRANS_BLOCKS + 2; 
+
+                /* rename 1 */
+                jblocks += 2 * extra_path_blks + trunc_blks + 
+                        2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
+
+                /* rename 2 */
+                jblocks += 2 * extra_path_blks + trunc_blks + 
+                        2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
+                break;
+        case KML_OPCODE_RELEASE:
+                /* 
+                jblocks = one_path_blks + lml_blks + 2*trunc_blks; 
+                */
+                jblocks = one_path_blks; 
+                break;
+        case KML_OPCODE_SETATTR:
+                jblocks = one_path_blks + trunc_blks + 1 ; 
+                break;
+        case KML_OPCODE_CREATE:
+                jblocks = one_path_blks + trunc_blks 
+                        + EXT3_DATA_TRANS_BLOCKS + 3 + 2; 
+                break;
+        case KML_OPCODE_LINK:
+                jblocks = one_path_blks + trunc_blks 
+                        + EXT3_DATA_TRANS_BLOCKS + 2; 
+                break;
+        case KML_OPCODE_UNLINK:
+                jblocks = one_path_blks + extra_name_blks + trunc_blks
+                        + EXT3_DELETE_TRANS_BLOCKS + 2; 
+                break;
+        case KML_OPCODE_SYMLINK:
+                jblocks = one_path_blks + extra_path_blks + trunc_blks
+                        + EXT3_DATA_TRANS_BLOCKS + 5; 
+                break;
+        case KML_OPCODE_MKDIR:
+                jblocks = one_path_blks + trunc_blks
+                        + EXT3_DATA_TRANS_BLOCKS + 4 + 2;
+                break;
+        case KML_OPCODE_RMDIR:
+                jblocks = one_path_blks + extra_name_blks + trunc_blks
+                        + EXT3_DELETE_TRANS_BLOCKS + 1; 
+                break;
+        case KML_OPCODE_MKNOD:
+                jblocks = one_path_blks + trunc_blks + 
+                        EXT3_DATA_TRANS_BLOCKS + 3 + 2;
+                break;
+        case KML_OPCODE_RENAME:
+                jblocks = one_path_blks + extra_path_blks + trunc_blks + 
+                        2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
+                break;
+        case KML_OPCODE_WRITE:
+                jblocks = one_path_blks; 
+                /*  add this when we can wrap our transaction with 
+                    that of ext3_file_write (ordered writes)
+                    +  EXT3_DATA_TRANS_BLOCKS;
+                */
+                break;
+        default:
+                CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op);
+                return NULL;
+        }
+
+        CDEBUG(D_JOURNAL, "creating journal handle (%d blocks) for op %d\n",
+               jblocks, op);
+        /* journal_start/stop does not do its own locking while updating
+         * the handle/transaction information. Hence we create our own
+         * critical section to protect these calls. -SHP
+         */
+        lock_kernel();
+        handle = journal_start(EXT3_JOURNAL(inode), jblocks);
+        unlock_kernel();
+        return handle;
+}
+
+static void presto_e3_trans_commit(struct presto_file_set *fset, void *handle)
+{
+        if ( presto_no_journal(fset) || !handle)
+                return;
+
+        /* See comments before journal_start above. -SHP */
+        lock_kernel();
+        journal_stop(handle);
+        unlock_kernel();
+}
+
+static void presto_e3_journal_file_data(struct inode *inode)
+{
+#ifdef EXT3_JOURNAL_DATA_FL
+        EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
+#else
+#warning You must have a facility to enable journaled writes for recovery!
+#endif
+}
+
+/* The logic here is a slightly modified version of ext3/inode.c:block_to_path
+ */
+static int presto_e3_has_all_data(struct inode *inode)
+{
+        int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+        int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
+        const long direct_blocks = EXT3_NDIR_BLOCKS,
+                indirect_blocks = ptrs,
+                double_blocks = (1 << (ptrs_bits * 2));
+        long block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+                inode->i_sb->s_blocksize_bits;
+
+        ENTRY;
+
+        if (inode->i_size == 0) {
+                EXIT;
+                return 1;
+        }
+
+        if (block < direct_blocks) {
+                /* No indirect blocks, no problem. */
+        } else if (block < indirect_blocks + direct_blocks) {
+                block++;
+        } else if (block < double_blocks + indirect_blocks + direct_blocks) {
+                block += 2;
+        } else if (((block - double_blocks - indirect_blocks - direct_blocks)
+                    >> (ptrs_bits * 2)) < ptrs) {
+                block += 3;
+        }
+
+        block *= (inode->i_sb->s_blocksize / 512);
+
+        CDEBUG(D_CACHE, "Need %ld blocks, have %ld.\n", block, inode->i_blocks);
+
+        if (block > inode->i_blocks) {
+                EXIT;
+                return 0;
+        }
+
+        EXIT;
+        return 1;
+}
+
+struct journal_ops presto_ext3_journal_ops = {
+        .tr_all_data     = presto_e3_has_all_data,
+        .tr_avail        = presto_e3_freespace,
+        .tr_start        =  presto_e3_trans_start,
+        .tr_commit       = presto_e3_trans_commit,
+        .tr_journal_data = presto_e3_journal_file_data,
+        .tr_ilookup      = presto_iget_ilookup
+};
+
+#endif /* CONFIG_EXT3_FS */
diff --git a/fs/intermezzo/journal_obdfs.c b/fs/intermezzo/journal_obdfs.c
new file mode 100644 (file)
index 0000000..702ee8b
--- /dev/null
@@ -0,0 +1,193 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *  Copyright (C) 2000 Los Alamos National Laboratory
+ *  Copyright (C) 2000 TurboLinux, Inc.
+ *  Copyright (C) 2001 Mountain View Data, Inc.
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/param.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/string.h>
+#ifdef CONFIG_OBDFS_FS
+#include /usr/src/obd/include/linux/obdfs.h
+#endif
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+#ifdef CONFIG_OBDFS_FS
+
+
+static unsigned long presto_obdfs_freespace(struct presto_file_set *fset,
+                                         struct super_block *sb)
+{
+        return 0x0fffff; 
+}
+
+/* start the filesystem journal operations */
+static void *presto_obdfs_trans_start(struct presto_file_set *fset, 
+                                   struct inode *inode, 
+                                   int op)
+{
+
+        return (void *) 1;
+}
+
+#if 0
+        int jblocks;
+        int trunc_blks, one_path_blks, extra_path_blks, 
+                extra_name_blks, lml_blks; 
+        __u32 avail_kmlblocks;
+
+        if ( presto_no_journal(fset) ||
+             strcmp(fset->fset_cache->cache_type, "ext3"))
+          {
+            CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n",
+                   fset->fset_cache->cache_type);
+            return NULL;
+          }
+
+        avail_kmlblocks = inode->i_sb->u.ext3_sb.s_es->s_free_blocks_count;
+        
+        if ( avail_kmlblocks < 3 ) {
+                return ERR_PTR(-ENOSPC);
+        }
+        
+        if (  (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR)
+              && avail_kmlblocks < 6 ) {
+                return ERR_PTR(-ENOSPC);
+        }            
+
+        /* Need journal space for:
+             at least three writes to KML (two one block writes, one a path) 
+             possibly a second name (unlink, rmdir)
+             possibly a second path (symlink, rename)
+             a one block write to the last rcvd file 
+        */
+
+        trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1; 
+        one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3;
+        lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2;
+        extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode); 
+        extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode); 
+
+        /* additional blocks appear for "two pathname" operations
+           and operations involving the LML records 
+        */
+        switch (op) {
+        case PRESTO_OP_TRUNC:
+                jblocks = one_path_blks + extra_name_blks + trunc_blks
+                        + EXT3_DELETE_TRANS_BLOCKS; 
+                break;
+        case PRESTO_OP_RELEASE:
+                /* 
+                jblocks = one_path_blks + lml_blks + 2*trunc_blks; 
+                */
+                jblocks = one_path_blks; 
+                break;
+        case PRESTO_OP_SETATTR:
+                jblocks = one_path_blks + trunc_blks + 1 ; 
+                break;
+        case PRESTO_OP_CREATE:
+                jblocks = one_path_blks + trunc_blks 
+                        + EXT3_DATA_TRANS_BLOCKS + 3; 
+                break;
+        case PRESTO_OP_LINK:
+                jblocks = one_path_blks + trunc_blks 
+                        + EXT3_DATA_TRANS_BLOCKS; 
+                break;
+        case PRESTO_OP_UNLINK:
+                jblocks = one_path_blks + extra_name_blks + trunc_blks
+                        + EXT3_DELETE_TRANS_BLOCKS; 
+                break;
+        case PRESTO_OP_SYMLINK:
+                jblocks = one_path_blks + extra_path_blks + trunc_blks
+                        + EXT3_DATA_TRANS_BLOCKS + 5; 
+                break;
+        case PRESTO_OP_MKDIR:
+                jblocks = one_path_blks + trunc_blks
+                        + EXT3_DATA_TRANS_BLOCKS + 4;
+                break;
+        case PRESTO_OP_RMDIR:
+                jblocks = one_path_blks + extra_name_blks + trunc_blks
+                        + EXT3_DELETE_TRANS_BLOCKS; 
+                break;
+        case PRESTO_OP_MKNOD:
+                jblocks = one_path_blks + trunc_blks + 
+                        EXT3_DATA_TRANS_BLOCKS + 3;
+                break;
+        case PRESTO_OP_RENAME:
+                jblocks = one_path_blks + extra_path_blks + trunc_blks + 
+                        2 * EXT3_DATA_TRANS_BLOCKS + 2;
+                break;
+        case PRESTO_OP_WRITE:
+                jblocks = one_path_blks; 
+                /*  add this when we can wrap our transaction with 
+                    that of ext3_file_write (ordered writes)
+                    +  EXT3_DATA_TRANS_BLOCKS;
+                */
+                break;
+        default:
+                CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op);
+                return NULL;
+        }
+
+        CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks);
+        return journal_start(EXT3_JOURNAL(inode), jblocks);
+}
+#endif
+
+void presto_obdfs_trans_commit(struct presto_file_set *fset, void *handle)
+{
+#if 0
+        if ( presto_no_journal(fset) || !handle)
+                return;
+
+        journal_stop(handle);
+#endif
+}
+
+void presto_obdfs_journal_file_data(struct inode *inode)
+{
+#ifdef EXT3_JOURNAL_DATA_FL
+        inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL;
+#else
+#warning You must have a facility to enable journaled writes for recovery!
+#endif
+}
+
+struct journal_ops presto_obdfs_journal_ops = {
+        .tr_avail        = presto_obdfs_freespace,
+        .tr_start        =  presto_obdfs_trans_start,
+        .tr_commit       = presto_obdfs_trans_commit,
+        .tr_journal_data = presto_obdfs_journal_file_data
+};
+
+#endif
diff --git a/fs/intermezzo/journal_reiserfs.c b/fs/intermezzo/journal_reiserfs.c
new file mode 100644 (file)
index 0000000..93fc148
--- /dev/null
@@ -0,0 +1,140 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *  Copyright (C) 2000 Los Alamos National Laboratory
+ *  Copyright (C) 2000 TurboLinux, Inc.
+ *  Copyright (C) 2001 Mountain View Data, Inc.
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/param.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/string.h>
+#if 0
+#if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE)
+#include <linux/reiserfs_fs.h>
+#include <linux/reiserfs_fs_sb.h>
+#include <linux/reiserfs_fs_i.h>
+#endif
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+#if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE)
+
+
+static loff_t presto_reiserfs_freespace(struct presto_cache *cache,
+                                         struct super_block *sb)
+{
+        struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (sb);
+       loff_t avail;
+
+        avail =   le32_to_cpu(rs->s_free_blocks) * 
+               le16_to_cpu(rs->s_blocksize);
+        return avail; 
+}
+
+/* start the filesystem journal operations */
+static void *presto_reiserfs_trans_start(struct presto_file_set *fset, 
+                                   struct inode *inode, 
+                                   int op)
+{
+       int jblocks;
+        __u32 avail_kmlblocks;
+       struct reiserfs_transaction_handle *th ;
+
+       PRESTO_ALLOC(th, sizeof(*th));
+       if (!th) { 
+               CERROR("presto: No memory for trans handle\n");
+               return NULL;
+       }
+
+        avail_kmlblocks = presto_reiserfs_freespace(fset->fset_cache, 
+                                                   inode->i_sb);
+        if ( presto_no_journal(fset) ||
+             strcmp(fset->fset_cache->cache_type, "reiserfs"))
+               {
+                       CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n",
+                              fset->fset_cache->cache_type);
+                       return NULL;
+               }
+
+        if ( avail_kmlblocks < 3 ) {
+                return ERR_PTR(-ENOSPC);
+        }
+        
+        if (  (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR)
+              && avail_kmlblocks < 6 ) {
+                return ERR_PTR(-ENOSPC);
+        }            
+
+       jblocks = 3 + JOURNAL_PER_BALANCE_CNT * 4;
+        CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks);
+
+       lock_kernel();
+       journal_begin(th, inode->i_sb, jblocks);
+       unlock_kernel();
+       return th; 
+}
+
+static void presto_reiserfs_trans_commit(struct presto_file_set *fset,
+                                         void *handle)
+{
+       int jblocks;
+       jblocks = 3 + JOURNAL_PER_BALANCE_CNT * 4;
+       
+       lock_kernel();
+       journal_end(handle, fset->fset_cache->cache_sb, jblocks);
+       unlock_kernel();
+       PRESTO_FREE(handle, sizeof(struct reiserfs_transaction_handle));
+}
+
+static void presto_reiserfs_journal_file_data(struct inode *inode)
+{
+#ifdef EXT3_JOURNAL_DATA_FL
+        inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL;
+#else
+#warning You must have a facility to enable journaled writes for recovery!
+#endif
+}
+
+static int presto_reiserfs_has_all_data(struct inode *inode)
+{
+        BUG();
+        return 0;
+}
+
+struct journal_ops presto_reiserfs_journal_ops = {
+        .tr_all_data     = presto_reiserfs_has_all_data,
+        .tr_avail        = presto_reiserfs_freespace,
+        .tr_start        = presto_reiserfs_trans_start,
+        .tr_commit       = presto_reiserfs_trans_commit,
+        .tr_journal_data = presto_reiserfs_journal_file_data
+};
+
+#endif
+#endif
diff --git a/fs/intermezzo/journal_tmpfs.c b/fs/intermezzo/journal_tmpfs.c
new file mode 100644 (file)
index 0000000..4f3c463
--- /dev/null
@@ -0,0 +1,107 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *  Copyright (C) 2000 Los Alamos National Laboratory
+ *  Copyright (C) 2000 TurboLinux, Inc.
+ *  Copyright (C) 2001 Mountain View Data, Inc.
+ *  Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/param.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/string.h>
+#if defined(CONFIG_TMPFS)
+#include <linux/jbd.h>
+#if defined(CONFIG_EXT3)
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#endif
+#endif
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+#if defined(CONFIG_TMPFS)
+
+/* space requirements: 
+   presto_do_truncate: 
+        used to truncate the KML forward to next fset->chunksize boundary
+          - zero partial block
+          - update inode
+   presto_write_record: 
+        write header (< one block) 
+        write one path (< MAX_PATHLEN) 
+        possibly write another path (< MAX_PATHLEN)
+        write suffix (< one block) 
+   presto_update_last_rcvd
+        write one block
+*/
+
+static loff_t presto_tmpfs_freespace(struct presto_cache *cache,
+                                         struct super_block *sb)
+{
+        return (1<<30);
+}
+
+/* start the filesystem journal operations */
+static void *presto_tmpfs_trans_start(struct presto_file_set *fset, 
+                                   struct inode *inode, 
+                                   int op)
+{
+        return (void *)1; 
+}
+
+static void presto_tmpfs_trans_commit(struct presto_file_set *fset, void *handle)
+{
+        return;
+}
+
+static void presto_tmpfs_journal_file_data(struct inode *inode)
+{
+        return; 
+}
+
+/* The logic here is a slightly modified version of ext3/inode.c:block_to_path
+ */
+static int presto_tmpfs_has_all_data(struct inode *inode)
+{
+        return 0;
+}
+
+struct journal_ops presto_tmpfs_journal_ops = {
+        .tr_all_data            = presto_tmpfs_has_all_data,
+        .tr_avail               = presto_tmpfs_freespace,
+        .tr_start               = presto_tmpfs_trans_start,
+        .tr_commit              = presto_tmpfs_trans_commit,
+        .tr_journal_data        = presto_tmpfs_journal_file_data,
+        .tr_ilookup             = presto_tmpfs_ilookup,
+        .tr_add_ilookup         = presto_add_ilookup_dentry
+};
+
+#endif /* CONFIG_EXT3_FS */
diff --git a/fs/intermezzo/journal_xfs.c b/fs/intermezzo/journal_xfs.c
new file mode 100644 (file)
index 0000000..59b22a5
--- /dev/null
@@ -0,0 +1,161 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/string.h>
+#if 0
+/* XFS Support not there yet */
+#ifdef CONFIG_FS_XFS
+#include <linux/xfs_fs.h>
+#endif
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+#include "intermezzo_journal.h"
+
+#if 0
+
+/* XFS has journalling, but these functions do nothing yet... */
+
+static unsigned long presto_xfs_freespace(struct presto_file_set *fset,
+                                         struct super_block *sb)
+{
+
+#if 0
+        vfs_t *vfsp = LINVFS_GET_VFS(sb);
+        struct statvfs_t stat; 
+        bhv_desc_t *bdp;
+        unsigned long avail; 
+        int rc;
+
+        VFS_STATVFS(vfsp, &stat, NULL, rc);
+        avail = statp.f_bfree;
+
+        return sbp->sb_fdblocks;
+#endif
+        return 0x0fffffff;
+}
+
+
+/* start the filesystem journal operations */
+static void *
+presto_xfs_trans_start(struct presto_file_set *fset,
+                      struct inode *inode, int op)
+{
+       int xfs_op;
+       /* do a free blocks check as in journal_ext3? does anything protect
+        * the space in that case or can it disappear out from under us
+        * anyway? */
+       
+/* copied from xfs_trans.h, skipping header maze for now */
+#define XFS_TRANS_SETATTR_NOT_SIZE      1
+#define XFS_TRANS_SETATTR_SIZE          2
+#define XFS_TRANS_INACTIVE              3
+#define XFS_TRANS_CREATE                4
+#define XFS_TRANS_CREATE_TRUNC          5
+#define XFS_TRANS_TRUNCATE_FILE         6
+#define XFS_TRANS_REMOVE                7
+#define XFS_TRANS_LINK                  8
+#define XFS_TRANS_RENAME                9
+#define XFS_TRANS_MKDIR                 10
+#define XFS_TRANS_RMDIR                 11
+#define XFS_TRANS_SYMLINK               12
+
+       /* map the op onto the values for XFS so it can do reservation. if
+        * we don't have enough info to differentiate between e.g. setattr
+        * with or without size, what do we do? will it adjust? */
+       switch (op) {
+       case PRESTO_OP_SETATTR:
+               /* or XFS_TRANS_SETATTR_NOT_SIZE? */
+               xfs_op = XFS_TRANS_SETATTR_SIZE;
+               break;
+       case PRESTO_OP_CREATE:
+               /* or CREATE_TRUNC? */
+               xfs_op = XFS_TRANS_CREATE;
+               break;
+       case PRESTO_OP_LINK:
+               xfs_op = XFS_TRANS_LINK;
+               break;
+       case PRESTO_OP_UNLINK:
+               xfs_op = XFS_TRANS_REMOVE;
+               break;
+       case PRESTO_OP_SYMLINK:
+               xfs_op = XFS_TRANS_SYMLINK;
+               break;
+       case PRESTO_OP_MKDIR:
+               xfs_op = XFS_TRANS_MKDIR;
+               break;
+       case PRESTO_OP_RMDIR:
+               xfs_op = XFS_TRANS_RMDIR;
+               break;
+       case PRESTO_OP_MKNOD:
+               /* XXX can't find an analog for mknod? */
+               xfs_op = XFS_TRANS_CREATE;
+               break;
+       case PRESTO_OP_RENAME:
+               xfs_op = XFS_TRANS_RENAME;
+               break;
+       default:
+               CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op);
+               return NULL;
+       }
+
+       return xfs_trans_start(inode, xfs_op);
+}
+
+static void presto_xfs_trans_commit(struct presto_file_set *fset, void *handle)
+{
+       /* assert (handle == current->j_handle) */
+       xfs_trans_stop(handle);
+}
+
+static void presto_xfs_journal_file_data(struct inode *inode)
+{
+        return; 
+}
+
+static int presto_xfs_has_all_data(struct inode *inode)
+{
+        BUG();
+        return 0;
+}
+
+struct journal_ops presto_xfs_journal_ops = {
+        .tr_all_data     = presto_xfs_has_all_data,
+        .tr_avail        = presto_xfs_freespace,
+        .tr_start        = presto_xfs_trans_start,
+        .tr_commit       = presto_xfs_trans_commit,
+        .tr_journal_data = presto_xfs_journal_file_data
+};
+
+#endif
+
+
+#endif /* CONFIG_XFS_FS */
+
diff --git a/fs/intermezzo/kml.c b/fs/intermezzo/kml.c
new file mode 100644 (file)
index 0000000..e992c18
--- /dev/null
@@ -0,0 +1,194 @@
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_upcall.h"
+#include "intermezzo_psdev.h"
+#include "intermezzo_kml.h"
+
+static struct presto_file_set * kml_getfset (char *path)
+{
+        return presto_path2fileset(path);
+}
+
+/* Send the KML buffer and related volume info into kernel */
+int begin_kml_reint (struct file *file, unsigned long arg)
+{
+        struct {
+                char *volname;
+                int   namelen;  
+                char *recbuf;
+                int   reclen;     /* int   newpos; */
+        } input;
+        struct kml_fsdata *kml_fsdata = NULL;
+        struct presto_file_set *fset = NULL;
+        char   *path;
+        int    error;
+
+        ENTRY;
+        /* allocate buffer & copy it to kernel space */
+        if (copy_from_user(&input, (char *)arg, sizeof(input))) {
+                EXIT;
+                return -EFAULT;
+        }
+
+        if (input.reclen > kml_fsdata->kml_maxsize)
+                return -ENOMEM; /* we'll find solution to this in the future */
+
+        PRESTO_ALLOC(path, char *, input.namelen + 1);
+        if ( !path ) {
+                EXIT;
+                return -ENOMEM;
+        }
+        if (copy_from_user(path, input.volname, input.namelen)) {
+                PRESTO_FREE(path, input.namelen + 1);
+                EXIT;
+                return -EFAULT;
+        }
+        path[input.namelen] = '\0';
+        fset = kml_getfset (path);
+        PRESTO_FREE(path, input.namelen + 1);
+
+        kml_fsdata = FSET_GET_KMLDATA(fset);
+        /* read the buf from user memory here */
+        if (copy_from_user(kml_fsdata->kml_buf, input.recbuf, input.reclen)) {
+                EXIT;
+                return -EFAULT;
+        }
+        kml_fsdata->kml_len = input.reclen;
+
+        decode_kmlrec (&kml_fsdata->kml_reint_cache,
+                        kml_fsdata->kml_buf, kml_fsdata->kml_len);
+
+        kml_fsdata->kml_reint_current = kml_fsdata->kml_reint_cache.next;
+        kml_fsdata->kml_reintpos = 0;
+        kml_fsdata->kml_count = 0;
+        return 0;
+}
+
+/* DO_KML_REINT  */
+int do_kml_reint (struct file *file, unsigned long arg)
+{
+        struct {
+                char *volname;
+                int   namelen;  
+                char *path;
+                int pathlen;
+                int recno;
+                int offset;
+                int len;
+                int generation;
+                __u64 ino;
+        } input;
+        int error;
+        char   *path;
+        struct kml_rec *close_rec;
+        struct kml_fsdata *kml_fsdata;
+        struct presto_file_set *fset;
+
+        ENTRY;
+        if (copy_from_user(&input, (char *)arg, sizeof(input))) {
+                EXIT;
+                return -EFAULT;
+        }
+        PRESTO_ALLOC(path, char *, input.namelen + 1);
+        if ( !path ) {
+                EXIT;
+                return -ENOMEM;
+        }
+        if (copy_from_user(path, input.volname, input.namelen)) {
+                PRESTO_FREE(path, input.namelen + 1);
+                EXIT;
+                return -EFAULT;
+        }
+        path[input.namelen] = '\0';
+        fset = kml_getfset (path);
+        PRESTO_FREE(path, input.namelen + 1);
+
+        kml_fsdata = FSET_GET_KMLDATA(fset);
+
+        error = kml_reintbuf(kml_fsdata, 
+                fset->fset_mtpt->d_name.name, 
+                &close_rec);
+
+        if (error == KML_CLOSE_BACKFETCH && close_rec != NULL) {
+                struct kml_close *close = &close_rec->rec_kml.close;
+                input.ino = close->ino;
+                input.generation = close->generation;
+                if (strlen (close->path) + 1 < input.pathlen) {
+                        strcpy (input.path, close->path);
+                        input.pathlen = strlen (close->path) + 1;
+                        input.recno = close_rec->rec_tail.recno;
+                        input.offset = close_rec->rec_kml_offset;
+                        input.len = close_rec->rec_size;
+                        input.generation = close->generation;
+                        input.ino = close->ino;
+                }
+                else {
+                        CDEBUG(D_KML, "KML_DO_REINT::no space to save:%d < %d",
+                                strlen (close->path) + 1, input.pathlen);
+                        error = -ENOMEM;
+                }
+                if (copy_to_user((char *)arg, &input, sizeof (input)))
+                       return -EFAULT;
+        }
+        return error;
+}
+
+/* END_KML_REINT */
+int end_kml_reint (struct file *file, unsigned long arg)
+{
+        /* Free KML buffer and related volume info */
+        struct {
+                char *volname;
+                int   namelen;  
+#if 0
+                int   count; 
+                int   newpos; 
+#endif
+        } input;
+        struct presto_file_set *fset = NULL;
+        struct kml_fsdata *kml_fsdata = NULL;
+        int error;
+        char *path;
+
+        ENTRY;
+        if (copy_from_user(&input, (char *)arg, sizeof(input))) { 
+               EXIT;
+               return -EFAULT;
+        }
+
+        PRESTO_ALLOC(path, char *, input.namelen + 1);
+        if ( !path ) {
+                EXIT;
+                return -ENOMEM;
+        }
+        if (copy_from_user(path, input.volname, input.namelen)) {
+        if ( error ) {
+                PRESTO_FREE(path, input.namelen + 1);
+                EXIT;
+                return -EFAULT;
+        }
+        path[input.namelen] = '\0';
+        fset = kml_getfset (path);
+        PRESTO_FREE(path, input.namelen + 1);
+
+        kml_fsdata = FSET_GET_KMLDATA(fset);
+        delete_kmlrec (&kml_fsdata->kml_reint_cache);
+
+        /* kml reint support */
+        kml_fsdata->kml_reint_current = NULL;
+        kml_fsdata->kml_len = 0;
+        kml_fsdata->kml_reintpos = 0;
+        kml_fsdata->kml_count = 0;
+#if 0
+        input.newpos = kml_upc->newpos;
+        input.count = kml_upc->count;
+        if (copy_to_user((char *)arg, &input, sizeof (input)))
+               return -EFAULT;
+#endif
+        return error;
+}
diff --git a/fs/intermezzo/kml_decode.c b/fs/intermezzo/kml_decode.c
new file mode 100644 (file)
index 0000000..f04e7d5
--- /dev/null
@@ -0,0 +1,1016 @@
+/*
+ * KML Decoding
+ *
+ * Copryright (C) 1996 Arthur Ma <arthur.ma@mountainviewdata.com> 
+ *
+ * Copyright (C) 2001 Mountainview Data, Inc.
+ */
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include "intermezzo_fs.h"
+#include "intermezzo_kml.h"
+
+static int size_round (int val);
+static int unpack_create (struct kml_create *rec, char *buf,
+                                int pos, int *rec_offs);
+static int unpack_open (struct kml_open *rec, char *buf,
+                                int pos, int *rec_offs);
+static int unpack_symlink (struct kml_symlink *rec, char *buf,
+                                int pos, int *rec_offs);
+static int unpack_mknod (struct kml_mknod *rec, char *buf,
+                                int pos, int *rec_offs);
+static int unpack_link (struct kml_link *rec, char *buf,
+                                int pos, int *rec_offs);
+static int unpack_rename (struct kml_rename *rec, char *buf,
+                                int pos, int *rec_offs);
+static int unpack_unlink (struct kml_unlink *rec, char *buf,
+                                int pos, int *rec_offs);
+static int unpack_rmdir (struct kml_rmdir *rec, char *buf,
+                                int pos, int *rec_offs);
+static int unpack_setattr (struct kml_setattr *rec, char *buf,
+                                int pos, int *rec_offs);
+static int unpack_close (struct kml_close *rec, char *buf,
+                                int pos, int *rec_offs);
+static int unpack_mkdir (struct kml_mkdir *rec, char *buf,
+                                int pos, int *rec_offs);
+
+#if 0
+static int unpack_endmark (struct kml_endmark *rec, char *buf,
+                                int pos, int *rec_offs);
+static void print_kml_endmark (struct kml_endmark *rec);
+#endif
+
+static int kml_unpack (char *kml_buf, int rec_size, int kml_offset,
+                        struct kml_rec **newrec);
+static char *kml_version (struct presto_version *ver);
+static void print_kml_prefix (struct big_journal_prefix *head);
+static void print_kml_create (struct kml_create *rec);
+static void print_kml_mkdir (struct kml_mkdir *rec);
+static void print_kml_unlink (struct kml_unlink *rec);
+static void print_kml_rmdir (struct kml_rmdir *rec);
+static void print_kml_close (struct kml_close *rec);
+static void print_kml_symlink (struct kml_symlink *rec);
+static void print_kml_rename (struct kml_rename *rec);
+static void print_kml_setattr (struct kml_setattr *rec);
+static void print_kml_link (struct kml_link *rec);
+static void print_kml_mknod (struct kml_mknod *rec);
+static void print_kml_open (struct kml_open *rec);
+static void print_kml_suffix (struct journal_suffix *tail);
+static char *readrec (char *recbuf, int reclen, int pos, int *size);
+
+#define  KML_PREFIX_WORDS           8
+static int kml_unpack (char *kml_buf, int rec_size, int kml_offset, 
+                        struct kml_rec **newrec)
+{
+        struct kml_rec  *rec;
+        char            *p;
+        int             pos, rec_offs;
+        int             error;
+
+        ENTRY;
+        if (rec_size < sizeof (struct journal_prefix) +
+                       sizeof (struct journal_suffix))
+                return -EBADF;
+
+        PRESTO_ALLOC(rec, struct kml_rec *, sizeof (struct kml_rec));
+        if (rec == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+        rec->rec_kml_offset = kml_offset;
+        rec->rec_size = rec_size;
+        p = kml_buf;
+        p = dlogit (&rec->rec_head, p, KML_PREFIX_WORDS * sizeof (int));
+        p = dlogit (&rec->rec_head.groups, p, 
+                        sizeof (int) * rec->rec_head.ngroups);
+
+        pos = sizeof (struct journal_prefix) + 
+                        sizeof (int) * rec->rec_head.ngroups;
+        switch (rec->rec_head.opcode)
+        {
+                case KML_CREATE:
+                        error = unpack_create (&rec->rec_kml.create, 
+                                        kml_buf, pos, &rec_offs);
+                        break;
+                case KML_MKDIR:
+                        error = unpack_mkdir (&rec->rec_kml.mkdir, 
+                                        kml_buf, pos, &rec_offs);
+                        break;
+                case KML_UNLINK:
+                        error = unpack_unlink (&rec->rec_kml.unlink, 
+                                        kml_buf, pos, &rec_offs);
+                        break;
+                case KML_RMDIR:
+                        error = unpack_rmdir (&rec->rec_kml.rmdir, 
+                                        kml_buf, pos, &rec_offs);
+                        break;
+                case KML_CLOSE:
+                        error = unpack_close (&rec->rec_kml.close, 
+                                        kml_buf, pos, &rec_offs);
+                        break;
+                case KML_SYMLINK:
+                        error = unpack_symlink (&rec->rec_kml.symlink, 
+                                        kml_buf, pos, &rec_offs);
+                        break;
+                case KML_RENAME:
+                        error = unpack_rename (&rec->rec_kml.rename, 
+                                        kml_buf, pos, &rec_offs);
+                        break;
+                case KML_SETATTR:
+                        error = unpack_setattr (&rec->rec_kml.setattr, 
+                                        kml_buf, pos, &rec_offs);
+                        break;
+                case KML_LINK:
+                        error = unpack_link (&rec->rec_kml.link, 
+                                        kml_buf, pos, &rec_offs);
+                        break;
+                case KML_OPEN:
+                        error = unpack_open (&rec->rec_kml.open, 
+                                        kml_buf, pos, &rec_offs);
+                        break;
+                case KML_MKNOD:
+                        error = unpack_mknod (&rec->rec_kml.mknod, 
+                                        kml_buf, pos, &rec_offs);
+                        break;
+#if 0
+                case KML_ENDMARK:
+                        error = unpack_endmark (&rec->rec_kml.endmark, 
+                                        kml_buf, pos, &rec_offs);
+                        break;
+#endif
+                default:
+                        CDEBUG (D_KML, "wrong opcode::%u\n", 
+                                        rec->rec_head.opcode);
+                        EXIT;
+                        return -EINVAL;
+        } 
+        if (error) {
+                PRESTO_FREE (rec, sizeof (struct kml_rec));
+                return -EINVAL;
+        }
+        p = kml_buf + rec_offs;
+        p = dlogit (&rec->rec_tail, p, sizeof (struct journal_suffix));
+        memset (&rec->kml_optimize, 0, sizeof (struct kml_optimize));
+        *newrec = rec;
+        EXIT;
+        return 0;
+}
+
+static int size_round (int val)
+{
+        return (val + 3) & (~0x3);
+}
+
+static int unpack_create (struct kml_create *rec, char *buf, 
+                                int pos, int *rec_offs)
+{
+        char *p, *q;
+        int unpack_size = 88;
+        int pathlen;
+
+        ENTRY;
+        p = buf + pos;
+        p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->mode, p, sizeof (int));
+        p = dlogit (&rec->uid, p, sizeof (int));
+        p = dlogit (&rec->gid, p, sizeof (int));
+        p = dlogit (&pathlen, p, sizeof (int));
+
+        PRESTO_ALLOC(q, char *, pathlen + 1);
+        if (q == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+
+        memcpy (q, p, pathlen);
+        q[pathlen] = '\0';
+        rec->path = q;
+
+        *rec_offs = pos + unpack_size + size_round(pathlen);
+        EXIT;
+        return 0;
+}
+
+static int unpack_open (struct kml_open *rec, char *buf, 
+                                int pos, int *rec_offs)
+{
+        *rec_offs = pos;
+        return 0;
+}
+
+static int unpack_symlink (struct kml_symlink *rec, char *buf, 
+                                int pos, int *rec_offs)
+{
+        char *p, *q;
+        int unpack_size = 88;
+        int pathlen, targetlen;
+
+        ENTRY;
+        p = buf + pos;
+        p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->uid, p, sizeof (int));
+        p = dlogit (&rec->gid, p, sizeof (int));
+        p = dlogit (&pathlen, p, sizeof (int));
+        p = dlogit (&targetlen, p, sizeof (int));
+
+        PRESTO_ALLOC(q, char *, pathlen + 1);
+        if (q == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+
+        memcpy (q, p, pathlen);
+        q[pathlen] = '\0';
+        rec->sourcepath = q;
+
+        PRESTO_ALLOC(q, char *, targetlen + 1);
+        if (q == NULL) {
+                PRESTO_FREE (rec->sourcepath, pathlen + 1);
+                EXIT;
+                return -ENOMEM;
+        }
+
+        memcpy (q, p, targetlen);
+        q[targetlen] = '\0';
+        rec->targetpath = q;
+
+        *rec_offs = pos + unpack_size + size_round(pathlen) +
+                        size_round(targetlen);
+        EXIT;
+        return 0;
+}
+
+static int unpack_mknod (struct kml_mknod *rec, char *buf, 
+                                int pos, int *rec_offs)
+{
+        char *p, *q;
+        int unpack_size = 96;
+        int pathlen;
+
+        ENTRY;
+        p = buf + pos;
+        p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->mode, p, sizeof (int));
+        p = dlogit (&rec->uid, p, sizeof (int));
+        p = dlogit (&rec->gid, p, sizeof (int));
+        p = dlogit (&rec->major, p, sizeof (int));
+        p = dlogit (&rec->minor, p, sizeof (int));
+        p = dlogit (&pathlen, p, sizeof (int));
+
+        PRESTO_ALLOC(q, char *, pathlen + 1);
+        if (q == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+
+        memcpy (q, p, pathlen);
+        q[pathlen] = '\0';
+        rec->path = q;
+
+        *rec_offs = pos + unpack_size + size_round(pathlen);
+        EXIT;
+        return 0;
+}
+
+static int unpack_link (struct kml_link *rec, char *buf, 
+                                int pos, int *rec_offs)
+{
+        char *p, *q;
+        int unpack_size = 80;
+        int pathlen, targetlen;
+
+        ENTRY;
+        p = buf + pos;
+        p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
+        p = dlogit (&pathlen, p, sizeof (int));
+        p = dlogit (&targetlen, p, sizeof (int));
+
+        PRESTO_ALLOC(q, char *, pathlen + 1);
+        if (q == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+
+        memcpy (q, p, pathlen);
+        q[pathlen] = '\0';
+        rec->sourcepath = q;
+        p += size_round (pathlen);
+
+        PRESTO_ALLOC(q, char *, targetlen + 1);
+        if (q == NULL) {
+                PRESTO_FREE (rec->sourcepath, pathlen + 1);
+                EXIT;
+                return -ENOMEM;
+        }
+        memcpy (q, p, targetlen);
+        q[targetlen] = '\0';
+        rec->targetpath = q;
+
+        *rec_offs = pos + unpack_size + size_round(pathlen) +
+                        size_round(targetlen);
+        EXIT;
+        return 0;
+}
+
+static int unpack_rename (struct kml_rename *rec, char *buf, 
+                                int pos, int *rec_offs)
+{
+        char *p, *q;
+        int unpack_size = 104;
+        int pathlen, targetlen;
+
+        ENTRY;
+        p = buf + pos;
+        p = dlogit (&rec->old_objectv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_tgtv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->old_tgtv, p, sizeof (struct presto_version));
+        p = dlogit (&pathlen, p, sizeof (int));
+        p = dlogit (&targetlen, p, sizeof (int));
+
+        PRESTO_ALLOC(q, char *, pathlen + 1);
+        if (q == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+
+        memcpy (q, p, pathlen);
+        q[pathlen] = '\0';
+        rec->sourcepath = q;
+        p += size_round (pathlen);
+
+        PRESTO_ALLOC(q, char *, targetlen + 1);
+        if (q == NULL) {
+                PRESTO_FREE (rec->sourcepath, pathlen + 1);
+                EXIT;
+                return -ENOMEM;
+        }
+
+        memcpy (q, p, targetlen);
+        q[targetlen] = '\0';
+        rec->targetpath = q;
+
+        *rec_offs = pos + unpack_size + size_round(pathlen) +
+                        size_round(targetlen);
+        EXIT;
+        return 0;
+}
+
+static int unpack_unlink (struct kml_unlink *rec, char *buf, 
+                                int pos, int *rec_offs)
+{
+        char *p, *q;
+        int unpack_size = 80;
+        int pathlen, targetlen;
+
+        ENTRY;
+        p = buf + pos;
+        p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->old_tgtv, p, sizeof (struct presto_version));
+        p = dlogit (&pathlen, p, sizeof (int));
+        p = dlogit (&targetlen, p, sizeof (int));
+
+        PRESTO_ALLOC(q, char *, pathlen + 1);
+        if (q == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+
+        memcpy (q, p, pathlen);
+        q[pathlen] = '\0';
+        rec->path = q;
+        p += size_round (pathlen);
+
+        PRESTO_ALLOC(q, char *, targetlen + 1);
+        if (q == NULL) {
+                PRESTO_FREE (rec->path, pathlen + 1);
+                EXIT;
+                return -ENOMEM;
+        }
+
+        memcpy (q, p, targetlen);
+        q[targetlen] = '\0';
+        rec->name = q;
+
+        /* fix the presto_journal_unlink problem */
+        *rec_offs = pos + unpack_size + size_round(pathlen) +
+                        size_round(targetlen);
+        EXIT;
+        return 0;
+}
+
+static int unpack_rmdir (struct kml_rmdir *rec, char *buf, 
+                                int pos, int *rec_offs)
+{
+        char *p, *q;
+        int unpack_size = 80;
+        int pathlen, targetlen;
+
+        ENTRY;
+        p = buf + pos;
+        p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->old_tgtv, p, sizeof (struct presto_version));
+        p = dlogit (&pathlen, p, sizeof (int));
+        p = dlogit (&targetlen, p, sizeof (int));
+
+        PRESTO_ALLOC(q, char *, pathlen + 1);
+        if (q == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+
+        memcpy (q, p, pathlen);
+        q[pathlen] = '\0';
+        rec->path = q;
+        p += size_round (pathlen);
+
+        PRESTO_ALLOC(q, char *, targetlen + 1);
+        if (q == NULL) {
+                PRESTO_FREE (rec->path, pathlen + 1);
+                EXIT;
+                return -ENOMEM;
+        }
+        memcpy (q, p, targetlen);
+        q[targetlen] = '\0';
+        rec->name = q;
+
+        *rec_offs = pos + unpack_size + size_round(pathlen) +
+                        size_round(targetlen);
+        EXIT;
+        return 0;
+}
+
+static int unpack_setattr (struct kml_setattr *rec, char *buf, 
+                                int pos, int *rec_offs)
+{
+        char *p, *q;
+        int unpack_size = 72;
+        struct kml_attr {
+                __u64   size, mtime, ctime;
+        } objattr;
+        int     valid, mode, uid, gid, flags;
+        int pathlen;
+
+        ENTRY;
+        p = buf + pos;
+        p = dlogit (&rec->old_objectv, p, sizeof (struct presto_version));
+        p = dlogit (&valid, p, sizeof (int));
+        p = dlogit (&mode, p, sizeof (int));
+        p = dlogit (&uid, p, sizeof (int));
+        p = dlogit (&gid, p, sizeof (int));
+        p = dlogit (&objattr, p, sizeof (struct kml_attr));
+        p = dlogit (&flags, p, sizeof (int));
+        p = dlogit (&pathlen, p, sizeof (int));
+
+        rec->iattr.ia_valid = valid;
+        rec->iattr.ia_mode = mode;
+        rec->iattr.ia_uid = uid;
+        rec->iattr.ia_gid = gid;
+        rec->iattr.ia_size = objattr.size;
+        rec->iattr.ia_mtime = objattr.mtime;
+        rec->iattr.ia_ctime = objattr.ctime;
+        rec->iattr.ia_atime = 0;
+        rec->iattr.ia_attr_flags = flags;
+
+        PRESTO_ALLOC(q, char *, pathlen + 1);
+        if (q == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+        memcpy (q, p, pathlen);
+        q[pathlen] = '\0';
+        rec->path = q;
+        p += pathlen;
+
+        *rec_offs = pos + unpack_size + size_round(pathlen);
+        EXIT;
+        return 0;
+}
+
+static int unpack_close (struct kml_close *rec, char *buf, 
+                                int pos, int *rec_offs)
+{
+        char *p, *q;
+        int unpack_size = 52;
+        int pathlen;
+
+        ENTRY;
+        p = buf + pos;
+        p = dlogit (&rec->open_mode, p, sizeof (int));
+        p = dlogit (&rec->open_uid, p, sizeof (int));
+        p = dlogit (&rec->open_gid, p, sizeof (int));
+        p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->ino, p, sizeof (__u64));
+        p = dlogit (&rec->generation, p, sizeof (int));
+        p = dlogit (&pathlen, p, sizeof (int));
+
+        PRESTO_ALLOC(q, char *, pathlen + 1);
+        if (q == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+
+        memcpy (q, p, pathlen);
+        q[pathlen] = '\0';
+        rec->path = q;
+        p += pathlen;
+
+        *rec_offs = pos + unpack_size + size_round(pathlen);
+        EXIT;
+        return 0;
+}
+
+static int unpack_mkdir (struct kml_mkdir *rec, char *buf, 
+                                int pos, int *rec_offs)
+{
+        char *p, *q;
+        int unpack_size = 88;
+        int pathlen;
+
+        ENTRY;
+        p = buf + pos;
+        p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
+        p = dlogit (&rec->mode, p, sizeof (int));
+        p = dlogit (&rec->uid, p, sizeof (int));
+        p = dlogit (&rec->gid, p, sizeof (int));
+        p = dlogit (&pathlen, p, sizeof (int));
+
+        PRESTO_ALLOC(q, char *, pathlen + 1);
+        if (q == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+
+        memcpy (q, p, pathlen);
+        q[pathlen] = '\0';
+        rec->path = q;
+        p += pathlen;
+
+        *rec_offs = pos + unpack_size + size_round(pathlen);
+        EXIT;
+        return 0;
+}
+
+#if 0
+static int unpack_endmark (struct kml_endmark *rec, char *buf, 
+                                int pos, int *rec_offs)
+{
+        char *p;
+        p = buf + pos;
+        p = dlogit (&rec->total, p, sizeof (int));
+
+        PRESTO_ALLOC (rec->kop, struct kml_kop_node *, 
+                        sizeof (struct kml_kop_node) * rec->total);
+        if (rec->kop == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+
+        p = dlogit (rec->kop, p, sizeof (struct kml_kop_node) * rec->total);
+
+        *rec_offs = pos + sizeof (int) + sizeof (struct kml_kop_node) * rec->total;
+        return 0;
+}
+#endif
+
+static char *kml_version (struct presto_version *ver)
+{
+        static char buf[256];
+        sprintf (buf, "mt::%lld, ct::%lld, size::%lld",
+                ver->pv_mtime, ver->pv_ctime, ver->pv_size); 
+        return buf;
+}
+
+static void print_kml_prefix (struct big_journal_prefix *head)
+{
+        int i;
+
+        CDEBUG (D_KML, " === KML PREFIX\n");
+        CDEBUG (D_KML, "     len        = %u\n", head->len);
+        CDEBUG (D_KML, "     version    = %u\n", head->version);
+        CDEBUG (D_KML, "     pid        = %u\n", head->pid);
+        CDEBUG (D_KML, "     uid        = %u\n", head->uid);
+        CDEBUG (D_KML, "     fsuid      = %u\n", head->fsuid);
+        CDEBUG (D_KML, "     fsgid      = %u\n", head->fsgid);
+        CDEBUG (D_KML, "     opcode     = %u\n", head->opcode);
+        CDEBUG (D_KML, "     ngroup     = %u",  head->ngroups);
+        for (i = 0; i < head->ngroups; i++)
+                CDEBUG (D_KML, "%u  ",  head->groups[i]);
+        CDEBUG (D_KML, "\n");
+}
+
+static void print_kml_create (struct kml_create *rec)
+{
+        CDEBUG (D_KML, " === CREATE\n");
+        CDEBUG (D_KML, "     path::%s\n", rec->path);
+        CDEBUG (D_KML, "     new_objv::%s\n", kml_version (&rec->new_objectv));
+        CDEBUG (D_KML, "     old_parv::%s\n", kml_version (&rec->old_parentv));
+        CDEBUG (D_KML, "     new_parv::%s\n", kml_version (&rec->new_parentv));
+        CDEBUG (D_KML, "     mode::%o\n", rec->mode);
+        CDEBUG (D_KML, "     uid::%d\n", rec->uid);
+        CDEBUG (D_KML, "     gid::%d\n", rec->gid);
+}
+
+static void print_kml_mkdir (struct kml_mkdir *rec)
+{
+        CDEBUG (D_KML, " === MKDIR\n");
+        CDEBUG (D_KML, "     path::%s\n", rec->path);
+        CDEBUG (D_KML, "     new_objv::%s\n", kml_version (&rec->new_objectv));
+        CDEBUG (D_KML, "     old_parv::%s\n", kml_version (&rec->old_parentv));
+        CDEBUG (D_KML, "     new_parv::%s\n", kml_version (&rec->new_parentv));
+        CDEBUG (D_KML, "     mode::%o\n", rec->mode);
+        CDEBUG (D_KML, "     uid::%d\n", rec->uid);
+        CDEBUG (D_KML, "     gid::%d\n", rec->gid);
+}
+
+static void print_kml_unlink (struct kml_unlink *rec)
+{
+        CDEBUG (D_KML, " === UNLINK\n");
+        CDEBUG (D_KML, "     path::%s/%s\n", rec->path, rec->name);
+        CDEBUG (D_KML, "     old_tgtv::%s\n", kml_version (&rec->old_tgtv));
+        CDEBUG (D_KML, "     old_parv::%s\n", kml_version (&rec->old_parentv));
+        CDEBUG (D_KML, "     new_parv::%s\n", kml_version (&rec->new_parentv));
+}
+
+static void print_kml_rmdir (struct kml_rmdir *rec)
+{
+        CDEBUG (D_KML, " === RMDIR\n");
+        CDEBUG (D_KML, "     path::%s/%s\n", rec->path, rec->name);
+        CDEBUG (D_KML, "     old_tgtv::%s\n", kml_version (&rec->old_tgtv));
+        CDEBUG (D_KML, "     old_parv::%s\n", kml_version (&rec->old_parentv));
+        CDEBUG (D_KML, "     new_parv::%s\n", kml_version (&rec->new_parentv));
+}
+
+static void print_kml_close (struct kml_close *rec)
+{
+        CDEBUG (D_KML, " === CLOSE\n");
+        CDEBUG (D_KML, "     mode::%o\n", rec->open_mode);
+        CDEBUG (D_KML, "     uid::%d\n", rec->open_uid);
+        CDEBUG (D_KML, "     gid::%d\n", rec->open_gid);
+        CDEBUG (D_KML, "     path::%s\n", rec->path);
+        CDEBUG (D_KML, "     new_objv::%s\n", kml_version (&rec->new_objectv));
+        CDEBUG (D_KML, "     ino::%lld\n", rec->ino);
+        CDEBUG (D_KML, "     gen::%u\n", rec->generation);
+}
+
+static void print_kml_symlink (struct kml_symlink *rec)
+{
+        CDEBUG (D_KML, " === SYMLINK\n");
+        CDEBUG (D_KML, "     s-path::%s\n", rec->sourcepath);
+        CDEBUG (D_KML, "     t-path::%s\n", rec->targetpath);
+        CDEBUG (D_KML, "     old_parv::%s\n", kml_version (&rec->old_parentv));
+        CDEBUG (D_KML, "     new_parv::%s\n", kml_version (&rec->new_parentv));
+        CDEBUG (D_KML, "     new_objv::%s\n", kml_version (&rec->new_objectv));
+        CDEBUG (D_KML, "     uid::%d\n", rec->uid);
+        CDEBUG (D_KML, "     gid::%d\n", rec->gid);
+}
+
+static void print_kml_rename (struct kml_rename *rec)
+{
+        CDEBUG (D_KML, " === RENAME\n");
+        CDEBUG (D_KML, "     s-path::%s\n", rec->sourcepath);
+        CDEBUG (D_KML, "     t-path::%s\n", rec->targetpath);
+        CDEBUG (D_KML, "     old_tgtv::%s\n", kml_version (&rec->old_tgtv));
+        CDEBUG (D_KML, "     new_tgtv::%s\n", kml_version (&rec->new_tgtv));
+        CDEBUG (D_KML, "     new_objv::%s\n", kml_version (&rec->new_objectv));
+        CDEBUG (D_KML, "     old_objv::%s\n", kml_version (&rec->old_objectv));
+}
+
+static void print_kml_setattr (struct kml_setattr *rec)
+{
+        CDEBUG (D_KML, " === SETATTR\n");
+        CDEBUG (D_KML, "     path::%s\n", rec->path);
+        CDEBUG (D_KML, "     old_objv::%s\n", kml_version (&rec->old_objectv));
+        CDEBUG (D_KML, "     valid::0x%x\n", rec->iattr.ia_valid);
+        CDEBUG (D_KML, "     mode::%o\n", rec->iattr.ia_mode);
+        CDEBUG (D_KML, "     uid::%d\n", rec->iattr.ia_uid);
+        CDEBUG (D_KML, "     gid::%d\n", rec->iattr.ia_gid);
+        CDEBUG (D_KML, "     size::%u\n", (u32) rec->iattr.ia_size);
+        CDEBUG (D_KML, "     mtime::%u\n", (u32) rec->iattr.ia_mtime);
+        CDEBUG (D_KML, "     ctime::%u\n", (u32) rec->iattr.ia_ctime);
+        CDEBUG (D_KML, "     flags::%u\n", (u32) rec->iattr.ia_attr_flags);
+}
+
+static void print_kml_link (struct kml_link *rec)
+{
+        CDEBUG (D_KML, " === LINK\n");
+        CDEBUG (D_KML, "     path::%s ==> %s\n", rec->sourcepath, rec->targetpath);
+        CDEBUG (D_KML, "     old_parv::%s\n", kml_version (&rec->old_parentv));
+        CDEBUG (D_KML, "     new_obj::%s\n", kml_version (&rec->new_objectv));
+        CDEBUG (D_KML, "     new_parv::%s\n", kml_version (&rec->new_parentv));
+}
+
+static void print_kml_mknod (struct kml_mknod *rec)
+{
+        CDEBUG (D_KML, " === MKNOD\n");
+        CDEBUG (D_KML, "     path::%s\n", rec->path);
+        CDEBUG (D_KML, "     new_obj::%s\n", kml_version (&rec->new_objectv));
+        CDEBUG (D_KML, "     old_parv::%s\n", kml_version (&rec->old_parentv));
+        CDEBUG (D_KML, "     new_parv::%s\n", kml_version (&rec->new_parentv));
+        CDEBUG (D_KML, "     mode::%o\n", rec->mode);
+        CDEBUG (D_KML, "     uid::%d\n", rec->uid);
+        CDEBUG (D_KML, "     gid::%d\n", rec->gid);
+        CDEBUG (D_KML, "     major::%d\n", rec->major);
+        CDEBUG (D_KML, "     minor::%d\n", rec->minor);
+}
+
+static void print_kml_open (struct kml_open *rec)
+{
+        CDEBUG (D_KML, " === OPEN\n");
+}
+
+#if 0
+static void print_kml_endmark (struct kml_endmark *rec)
+{
+        int i;
+        CDEBUG (D_KML, " === ENDMARK\n");
+        CDEBUG (D_KML, "     total::%u\n", rec->total);
+        for (i = 0; i < rec->total; i++)
+        {       
+                CDEBUG (D_KML, "         recno=%ld::flag=%ld,op=%ld, i_ino=%ld, \
+                        i_nlink=%ld\n", (long) rec->kop[i].kml_recno, 
+                        (long) rec->kop[i].kml_flag, (long) rec->kop[i].kml_op, 
+                        (long) rec->kop[i].i_ino, (long) rec->kop[i].i_nlink);
+        }
+}
+#endif
+
+static void print_kml_optimize (struct kml_optimize  *rec)
+{
+        CDEBUG (D_KML, " === OPTIMIZE\n");
+        if (rec->kml_flag == KML_REC_DELETE)
+                CDEBUG (D_KML, "     kml_flag::deleted\n");
+        else
+                CDEBUG (D_KML, "     kml_flag::exist\n");
+        CDEBUG (D_KML, "     kml_op::%u\n", rec->kml_op);
+        CDEBUG (D_KML, "     i_nlink::%d\n", rec->i_nlink);
+        CDEBUG (D_KML, "     i_ino::%u\n", rec->i_ino);
+}
+
+static void print_kml_suffix (struct journal_suffix *tail)
+{
+        CDEBUG (D_KML, " === KML SUFFIX\n");
+        CDEBUG (D_KML, "     prevrec::%ld\n", tail->prevrec);
+        CDEBUG (D_KML, "     recno::%ld\n", (long) tail->recno);
+        CDEBUG (D_KML, "     time::%d\n", tail->time);
+        CDEBUG (D_KML, "     len::%d\n", tail->len);
+}
+
+void kml_printrec (struct kml_rec *rec, int kml_printop)
+{
+        if (kml_printop & PRINT_KML_PREFIX)
+                print_kml_prefix (&rec->rec_head);
+        if (kml_printop & PRINT_KML_REC) 
+        { 
+                switch (rec->rec_head.opcode)
+                {
+                        case KML_CREATE:
+                                print_kml_create (&rec->rec_kml.create);
+                                break;
+                        case KML_MKDIR:
+                                print_kml_mkdir (&rec->rec_kml.mkdir);
+                                break;
+                        case KML_UNLINK:
+                                print_kml_unlink (&rec->rec_kml.unlink);
+                                break;
+                        case KML_RMDIR:
+                                print_kml_rmdir (&rec->rec_kml.rmdir);
+                                break;
+                        case KML_CLOSE:
+                                print_kml_close (&rec->rec_kml.close);
+                                break;
+                        case KML_SYMLINK:
+                                print_kml_symlink (&rec->rec_kml.symlink);
+                                break;
+                        case KML_RENAME:
+                                print_kml_rename (&rec->rec_kml.rename);
+                                break;
+                        case KML_SETATTR:
+                                print_kml_setattr (&rec->rec_kml.setattr);
+                                break;
+                        case KML_LINK:
+                                print_kml_link (&rec->rec_kml.link);
+                                break;
+                        case KML_OPEN:
+                                print_kml_open (&rec->rec_kml.open);
+                                break;
+                        case KML_MKNOD:
+                                print_kml_mknod (&rec->rec_kml.mknod);
+                                break;
+#if 0
+                        case KML_ENDMARK:
+                                print_kml_endmark (&rec->rec_kml.endmark);
+#endif
+                                break;
+                        default:
+                                CDEBUG (D_KML, " === BAD RECORD, opcode=%u\n",
+                                        rec->rec_head.opcode);
+                                break;
+                }
+        }
+        if (kml_printop & PRINT_KML_SUFFIX)
+                print_kml_suffix (&rec->rec_tail);
+        if (kml_printop & PRINT_KML_OPTIMIZE)
+                print_kml_optimize (&rec->kml_optimize);
+}
+
+void kml_freerec (struct kml_rec *rec)
+{
+        char *sourcepath = NULL,
+             *targetpath = NULL;
+        switch (rec->rec_head.opcode)
+        {
+                case KML_CREATE:
+                        sourcepath = rec->rec_kml.create.path;
+                        break;
+                case KML_MKDIR:
+                        sourcepath = rec->rec_kml.create.path;
+                        break;
+                case KML_UNLINK:
+                        sourcepath = rec->rec_kml.unlink.path;
+                        targetpath = rec->rec_kml.unlink.name;
+                        break;
+                case KML_RMDIR:
+                        sourcepath = rec->rec_kml.rmdir.path;
+                        targetpath = rec->rec_kml.rmdir.name;
+                        break;
+                case KML_CLOSE:
+                        sourcepath = rec->rec_kml.close.path;
+                        break;
+                case KML_SYMLINK:
+                        sourcepath = rec->rec_kml.symlink.sourcepath;
+                        targetpath = rec->rec_kml.symlink.targetpath;
+                        break;
+                case KML_RENAME:
+                        sourcepath = rec->rec_kml.rename.sourcepath;
+                        targetpath = rec->rec_kml.rename.targetpath;
+                        break;
+                case KML_SETATTR:
+                        sourcepath = rec->rec_kml.setattr.path;
+                        break;
+                case KML_LINK:
+                        sourcepath = rec->rec_kml.link.sourcepath;
+                        targetpath = rec->rec_kml.link.targetpath;
+                        break;
+                case KML_OPEN:
+                        break;
+                case KML_MKNOD:
+                        sourcepath = rec->rec_kml.mknod.path;
+                        break;
+#if 0
+                case KML_ENDMARK:
+                        PRESTO_FREE (rec->rec_kml.endmark.kop, sizeof (int) + 
+                                sizeof (struct kml_kop_node) * 
+                                rec->rec_kml.endmark.total);
+#endif
+                        break;
+                default:
+                        break;
+        }
+        if (sourcepath != NULL)
+                PRESTO_FREE (sourcepath, strlen (sourcepath) + 1);
+        if (targetpath != NULL)
+                PRESTO_FREE (targetpath, strlen (targetpath) + 1);
+}
+
+char *readrec (char *recbuf, int reclen, int pos, int *size)
+{
+        char *p = recbuf + pos;
+        *size = *((int *) p);
+        if (*size > (reclen - pos))
+            return NULL;
+        return p; 
+}
+
+int kml_decoderec (char *buf, int pos, int buflen, int *size, 
+                        struct kml_rec **newrec)
+{
+        char *tmp;
+        int  error;
+        tmp = readrec (buf, buflen, pos, size);
+        if (tmp == NULL)
+                return -EBADF;
+        error = kml_unpack (tmp, *size, pos, newrec); 
+        return error;
+}
+
+#if 0
+static void fill_kmlrec_optimize (struct list_head *head, 
+                struct kml_rec *optrec)
+{
+        struct kml_rec *kmlrec;
+        struct list_head *tmp;
+        struct kml_endmark *km;
+        struct kml_optimize *ko;
+        int    n;
+
+        if (optrec->rec_kml.endmark.total == 0)
+                return;
+        n = optrec->rec_kml.endmark.total - 1;
+        tmp = head->prev;
+        km = &optrec->rec_kml.endmark;
+        while ( n >= 0 && tmp != head ) 
+        {
+                kmlrec = list_entry(tmp, struct kml_rec,
+                        kml_optimize.kml_chains);
+                tmp = tmp->prev;
+                if (kmlrec->rec_tail.recno == km->kop[n].kml_recno) 
+                {
+                        ko = &kmlrec->kml_optimize;
+                        ko->kml_flag = km->kop[n].kml_flag;
+                        ko->kml_op   = km->kop[n].kml_op;
+                        ko->i_nlink  = km->kop[n].i_nlink;
+                        ko->i_ino    = km->kop[n].i_ino;
+                        n --;
+                }
+        }
+        if (n != -1)
+                CDEBUG (D_KML, "Yeah!!!, KML optimize error, recno=%d, n=%d\n",
+                        optrec->rec_tail.recno, n);     
+}
+#endif
+
+int decode_kmlrec (struct list_head *head, char *kml_buf, int buflen)
+{
+        struct kml_rec *rec;
+        int    pos = 0, size;
+        int    err;
+        while (pos < buflen) {
+                err = kml_decoderec (kml_buf, pos, buflen, &size, &rec);
+                if (err != 0)
+                        break;
+#if 0
+                if (rec->rec_head.opcode == KML_ENDMARK) {
+                        fill_kmlrec_optimize (head, rec);
+                        mark_rec_deleted (rec);
+                }
+#endif
+                list_add_tail (&rec->kml_optimize.kml_chains, head);
+                pos += size;
+        }
+        return err;
+}
+
+int delete_kmlrec (struct list_head *head)
+{
+        struct kml_rec *rec;
+        struct list_head *tmp;
+
+        if (list_empty(head))
+                return 0;
+        tmp = head->next;
+        while ( tmp != head ) {
+                rec = list_entry(tmp, struct kml_rec, 
+                        kml_optimize.kml_chains);
+                tmp = tmp->next;
+                kml_freerec (rec);
+        }
+        INIT_LIST_HEAD(head);
+        return 0;
+}
+
+int print_allkmlrec (struct list_head *head, int printop)
+{
+        struct kml_rec *rec;
+        struct list_head *tmp;
+
+        if (list_empty(head))
+                return 0;
+        tmp = head->next;
+        while ( tmp != head ) {
+                rec = list_entry(tmp, struct kml_rec,
+                        kml_optimize.kml_chains);
+                tmp = tmp->next;
+#if 0
+                if (printop & PRINT_KML_EXIST) {
+                        if (is_deleted_node (rec))
+                                continue;
+                }
+                else if (printop & PRINT_KML_DELETE) {
+                        if (! is_deleted_node (rec))
+                                continue;
+                }
+#endif
+                kml_printrec (rec, printop);
+        }
+        INIT_LIST_HEAD(head);
+        return 0;
+}
+
diff --git a/fs/intermezzo/kml_reint.c b/fs/intermezzo/kml_reint.c
new file mode 100644 (file)
index 0000000..e447b76
--- /dev/null
@@ -0,0 +1,647 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Reintegration of KML records
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/mmu_context.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+static void kmlreint_pre_secure(struct kml_rec *rec, struct file *dir,
+                                struct run_ctxt *saved)
+{
+        struct run_ctxt ctxt; 
+        struct presto_dentry_data *dd = presto_d2d(dir->f_dentry);
+        int i;
+
+        ctxt.fsuid = rec->prefix.hdr->fsuid;
+        ctxt.fsgid = rec->prefix.hdr->fsgid;
+        ctxt.fs = KERNEL_DS; 
+        ctxt.pwd = dd->dd_fset->fset_dentry;
+        ctxt.pwdmnt = dd->dd_fset->fset_mnt;
+
+        ctxt.root = ctxt.pwd;
+        ctxt.rootmnt = ctxt.pwdmnt;
+        if (rec->prefix.hdr->ngroups > 0) {
+                ctxt.group_info = groups_alloc(rec->prefix.hdr->ngroups);
+                for (i = 0; i< ctxt.group_info->ngroups; i++) 
+                        GROUP_AT(ctxt.group_info,i)= rec->prefix.groups[i];
+        } else
+                ctxt.group_info = groups_alloc(0);
+
+        push_ctxt(saved, &ctxt);
+}
+
+
+/* Append two strings in a less-retarded fashion. */
+static char * path_join(char *p1, int p1len, char *p2, int p2len)
+{
+        int size = p1len + p2len + 2; /* possibly one extra /, one NULL */
+        char *path;
+
+        path = kmalloc(size, GFP_KERNEL);
+        if (path == NULL)
+                return NULL;
+
+        memcpy(path, p1, p1len);
+        if (path[p1len - 1] != '/') {
+                path[p1len] = '/';
+                p1len++;
+        }
+        memcpy(path + p1len, p2, p2len);
+        path[p1len + p2len] = '\0';
+
+        return path;
+}
+
+static inline int kml_recno_equal(struct kml_rec *rec,
+                                  struct presto_file_set *fset)
+{
+        return (rec->suffix->recno == fset->fset_lento_recno + 1);
+}
+
+static inline int version_equal(struct presto_version *a, struct inode *inode)
+{
+        if (a == NULL)
+                return 1;
+
+        if (inode == NULL) {
+                CERROR("InterMezzo: NULL inode in version_equal()\n");
+                return 0;
+        }
+
+        if (inode->i_mtime.tv_sec == a->pv_mtime_sec &&
+            inode->i_mtime.tv_nsec == a->pv_mtime_nsec &&
+            (S_ISDIR(inode->i_mode) || inode->i_size == a->pv_size))
+                return 1;
+
+        return 0;
+}
+
+static int reint_close(struct kml_rec *rec, struct file *file,
+                       struct lento_vfs_context *given_info)
+{
+        struct run_ctxt saved_ctxt;
+        int error;
+        struct presto_file_set *fset;
+        struct lento_vfs_context info; 
+        ENTRY;
+
+        memcpy(&info, given_info, sizeof(*given_info));
+
+
+        CDEBUG (D_KML, "=====REINT_CLOSE::%s\n", rec->path);
+
+        fset = presto_fset(file->f_dentry);
+        if (fset->fset_flags & FSET_DATA_ON_DEMAND) {
+                struct iattr iattr;
+
+                iattr.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_SIZE;
+                iattr.ia_mtime.tv_sec = (time_t)rec->new_objectv->pv_mtime_sec;
+                iattr.ia_mtime.tv_nsec = (time_t)rec->new_objectv->pv_mtime_nsec;
+                iattr.ia_ctime.tv_sec = (time_t)rec->new_objectv->pv_ctime_sec;
+                iattr.ia_ctime.tv_nsec = (time_t)rec->new_objectv->pv_ctime_nsec;
+                iattr.ia_size = (time_t)rec->new_objectv->pv_size;
+
+                /* no kml record, but update last rcvd */
+                /* save fileid in dentry for later backfetch */
+                info.flags |= LENTO_FL_EXPECT | LENTO_FL_SET_DDFILEID;
+                info.remote_ino = rec->ino;
+                info.remote_generation = rec->generation;
+                info.flags &= ~LENTO_FL_KML;
+                kmlreint_pre_secure(rec, file, &saved_ctxt);
+                error = lento_setattr(rec->path, &iattr, &info);
+                pop_ctxt(&saved_ctxt);
+
+                presto_d2d(file->f_dentry)->dd_flags &= ~PRESTO_DATA;
+        } else {
+                int minor = presto_f2m(fset);
+
+                info.updated_time.tv_sec = rec->new_objectv->pv_mtime_sec;
+                info.updated_time.tv_nsec = rec->new_objectv->pv_mtime_nsec;
+                memcpy(&info.remote_version, rec->old_objectv, 
+                       sizeof(*rec->old_objectv));
+                info.remote_ino = rec->ino;
+                info.remote_generation = rec->generation;
+                error = izo_upc_backfetch(minor, rec->path, fset->fset_name,
+                                          &info);
+                if (error) {
+                        CERROR("backfetch error %d\n", error);
+                        /* if file doesn't exist anymore,  then ignore the CLOSE
+                         * and just update the last_rcvd.
+                         */
+                        if (error == ENOENT) {
+                                CDEBUG(D_KML, "manually updating remote offset uuid %s"
+                                       "recno %d offset %Lu\n", info.uuid, info.recno,
+                                       (unsigned long long) info.kml_offset);
+                                error = izo_rcvd_upd_remote(fset, info.uuid, info.recno, info.kml_offset);
+                                if(error)
+                                        CERROR("izo_rcvd_upd_remote error %d\n", error);
+
+                        } 
+                }
+                        
+                /* propagate error to avoid further reint */
+        }
+
+        EXIT;
+        return error;
+}
+
+static int reint_create(struct kml_rec *rec, struct file *dir,
+                        struct lento_vfs_context *info)
+{
+        struct run_ctxt saved_ctxt;
+        int     error;        ENTRY;
+
+        CDEBUG (D_KML, "=====REINT_CREATE::%s\n", rec->path);
+        info->updated_time.tv_sec = rec->new_objectv->pv_ctime_sec;
+        info->updated_time.tv_nsec = rec->new_objectv->pv_ctime_nsec;
+        kmlreint_pre_secure(rec, dir, &saved_ctxt);
+        error = lento_create(rec->path, rec->mode, info);
+        pop_ctxt(&saved_ctxt); 
+
+        EXIT;
+        return error;
+}
+
+static int reint_link(struct kml_rec *rec, struct file *dir,
+                      struct lento_vfs_context *info)
+{
+        struct run_ctxt saved_ctxt;
+        int     error;
+
+        ENTRY;
+
+        CDEBUG (D_KML, "=====REINT_LINK::%s -> %s\n", rec->path, rec->target);
+        info->updated_time.tv_sec = rec->new_objectv->pv_mtime_sec;
+        info->updated_time.tv_nsec = rec->new_objectv->pv_mtime_nsec;
+        kmlreint_pre_secure(rec, dir, &saved_ctxt);
+        error = lento_link(rec->path, rec->target, info);
+        pop_ctxt(&saved_ctxt); 
+
+        EXIT;
+        return error;
+}
+
+static int reint_mkdir(struct kml_rec *rec, struct file *dir,
+                       struct lento_vfs_context *info)
+{
+        struct run_ctxt saved_ctxt;
+        int     error;
+
+        ENTRY;
+
+        CDEBUG (D_KML, "=====REINT_MKDIR::%s\n", rec->path);
+        info->updated_time.tv_sec = rec->new_objectv->pv_ctime_sec;
+        info->updated_time.tv_nsec = rec->new_objectv->pv_ctime_nsec;
+        kmlreint_pre_secure(rec, dir, &saved_ctxt);
+        error = lento_mkdir(rec->path, rec->mode, info);
+        pop_ctxt(&saved_ctxt); 
+
+        EXIT;
+        return error;
+}
+
+static int reint_mknod(struct kml_rec *rec, struct file *dir,
+                       struct lento_vfs_context *info)
+{
+        struct run_ctxt saved_ctxt;
+        int     error;
+       dev_t dev;
+
+        ENTRY;
+
+        CDEBUG (D_KML, "=====REINT_MKNOD::%s\n", rec->path);
+        info->updated_time.tv_sec = rec->new_objectv->pv_ctime_sec;
+        info->updated_time.tv_nsec = rec->new_objectv->pv_ctime_nsec;
+        kmlreint_pre_secure(rec, dir, &saved_ctxt);
+
+        dev = rec->rdev ? old_decode_dev(rec->rdev) : MKDEV(rec->major, rec->minor);
+
+        error = lento_mknod(rec->path, rec->mode, dev, info);
+        pop_ctxt(&saved_ctxt); 
+
+        EXIT;
+        return error;
+}
+
+
+static int reint_noop(struct kml_rec *rec, struct file *dir,
+                      struct lento_vfs_context *info)
+{
+        return 0;
+}
+
+static int reint_rename(struct kml_rec *rec, struct file *dir,
+                        struct lento_vfs_context *info)
+{
+        struct run_ctxt saved_ctxt;
+        int     error;
+
+        ENTRY;
+
+        CDEBUG (D_KML, "=====REINT_RENAME::%s -> %s\n", rec->path, rec->target);
+        info->updated_time.tv_sec = rec->new_objectv->pv_mtime_sec;
+        info->updated_time.tv_nsec = rec->new_objectv->pv_mtime_nsec;
+        kmlreint_pre_secure(rec, dir, &saved_ctxt);
+        error = lento_rename(rec->path, rec->target, info);
+        pop_ctxt(&saved_ctxt); 
+
+        EXIT;
+        return error;
+}
+
+static int reint_rmdir(struct kml_rec *rec, struct file *dir,
+                       struct lento_vfs_context *info)
+{
+        struct run_ctxt saved_ctxt;
+        int     error;
+        char *path;
+
+        ENTRY;
+
+        path = path_join(rec->path, rec->pathlen - 1, rec->target, rec->targetlen);
+        if (path == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+
+        CDEBUG (D_KML, "=====REINT_RMDIR::%s\n", path);
+        info->updated_time.tv_sec = rec->new_parentv->pv_mtime_sec;
+        info->updated_time.tv_nsec = rec->new_parentv->pv_mtime_nsec;
+        kmlreint_pre_secure(rec, dir, &saved_ctxt);
+        error = lento_rmdir(path, info);
+        pop_ctxt(&saved_ctxt); 
+
+        kfree(path);
+        EXIT;
+        return error;
+}
+
+static int reint_setattr(struct kml_rec *rec, struct file *dir,
+                         struct lento_vfs_context *info)
+{
+        struct run_ctxt saved_ctxt;
+        struct iattr iattr;
+        int     error;
+
+        ENTRY;
+
+        iattr.ia_valid = rec->valid;
+        iattr.ia_mode  = (umode_t)rec->mode;
+        iattr.ia_uid   = (uid_t)rec->uid;
+        iattr.ia_gid   = (gid_t)rec->gid;
+        iattr.ia_size  = (off_t)rec->size;
+        iattr.ia_ctime.tv_sec = rec->ctime_sec;
+        iattr.ia_ctime.tv_nsec = rec->ctime_nsec;
+        iattr.ia_mtime.tv_sec = rec->mtime_sec;
+        iattr.ia_mtime.tv_nsec = rec->mtime_nsec;
+        iattr.ia_atime = iattr.ia_mtime; /* We don't track atimes. */
+        iattr.ia_attr_flags = rec->flags;
+
+        CDEBUG (D_KML, "=====REINT_SETATTR::%s (%d)\n", rec->path, rec->valid);
+        kmlreint_pre_secure(rec, dir, &saved_ctxt);
+        error = lento_setattr(rec->path, &iattr, info);
+        pop_ctxt(&saved_ctxt); 
+
+        EXIT;
+        return error;
+}
+
+static int reint_symlink(struct kml_rec *rec, struct file *dir,
+                         struct lento_vfs_context *info)
+{
+        struct run_ctxt saved_ctxt;
+        int     error;
+
+        ENTRY;
+
+        CDEBUG (D_KML, "=====REINT_SYMLINK::%s -> %s\n", rec->path, rec->target);
+        info->updated_time.tv_sec = rec->new_objectv->pv_ctime_sec;
+        info->updated_time.tv_nsec = rec->new_objectv->pv_ctime_nsec;
+        kmlreint_pre_secure(rec, dir, &saved_ctxt);
+        error = lento_symlink(rec->target, rec->path, info);
+        pop_ctxt(&saved_ctxt); 
+
+        EXIT;
+        return error;
+}
+
+static int reint_unlink(struct kml_rec *rec, struct file *dir,
+                        struct lento_vfs_context *info)
+{
+        struct run_ctxt saved_ctxt;
+        int     error;
+        char *path;
+
+        ENTRY;
+
+        path = path_join(rec->path, rec->pathlen - 1, rec->target, rec->targetlen);
+        if (path == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+
+        CDEBUG (D_KML, "=====REINT_UNLINK::%s\n", path);
+        info->updated_time.tv_sec = rec->new_parentv->pv_mtime_sec;
+        info->updated_time.tv_nsec = rec->new_parentv->pv_mtime_nsec;
+        kmlreint_pre_secure(rec, dir, &saved_ctxt);
+        error = lento_unlink(path, info);
+        pop_ctxt(&saved_ctxt); 
+
+        kfree(path);
+        EXIT;
+        return error;
+}
+
+static int branch_reint_rename(struct presto_file_set *fset, struct kml_rec *rec, 
+                   struct file *dir, struct lento_vfs_context *info,
+                   char * kml_data, __u64 kml_size)
+{
+        int     error;
+
+        ENTRY;
+
+        error = reint_rename(rec, dir, info);
+        if (error == -ENOENT) {
+                /* normal reint failed because path was not found */
+                struct rec_info rec;
+                
+                CDEBUG(D_KML, "saving branch rename kml\n");
+                rec.is_kml = 1;
+                rec.size = kml_size;
+                error = presto_log(fset, &rec, kml_data, kml_size,
+                           NULL, 0, NULL, 0,  NULL, 0);
+                if (error == 0)
+                        error = presto_write_last_rcvd(&rec, fset, info);
+        }
+
+        EXIT;
+        return error;
+}
+
+int branch_reinter(struct presto_file_set *fset, struct kml_rec *rec, 
+                   struct file *dir, struct lento_vfs_context *info,
+                   char * kml_data, __u64 kml_size)
+{
+        int error = 0;
+        int op = rec->prefix.hdr->opcode;
+
+        if (op == KML_OPCODE_CLOSE) {
+                /* regular close and backfetch */
+                error = reint_close(rec, dir, info);
+        } else if  (op == KML_OPCODE_RENAME) {
+                /* rename only if name already exists  */
+                error = branch_reint_rename(fset, rec, dir, info,
+                                            kml_data, kml_size);
+        } else {
+                /* just rewrite kml into branch/kml and update last_rcvd */
+                struct rec_info rec;
+                
+                CDEBUG(D_KML, "Saving branch kml\n");
+                rec.is_kml = 1;
+                rec.size = kml_size;
+                error = presto_log(fset, &rec, kml_data, kml_size,
+                           NULL, 0, NULL, 0,  NULL, 0);
+                if (error == 0)
+                        error = presto_write_last_rcvd(&rec, fset, info);
+        }
+                
+        return error;
+}
+
+typedef int (*reinter_t)(struct kml_rec *rec, struct file *basedir,
+                         struct lento_vfs_context *info);
+
+static reinter_t presto_reinters[KML_OPCODE_NUM] =
+{
+        [KML_OPCODE_CLOSE] = reint_close,
+        [KML_OPCODE_CREATE] = reint_create,
+        [KML_OPCODE_LINK] = reint_link,
+        [KML_OPCODE_MKDIR] = reint_mkdir,
+        [KML_OPCODE_MKNOD] = reint_mknod,
+        [KML_OPCODE_NOOP] = reint_noop,
+        [KML_OPCODE_RENAME] = reint_rename,
+        [KML_OPCODE_RMDIR] = reint_rmdir,
+        [KML_OPCODE_SETATTR] = reint_setattr,
+        [KML_OPCODE_SYMLINK] = reint_symlink,
+        [KML_OPCODE_UNLINK] = reint_unlink,
+};
+
+static inline reinter_t get_reinter(int op)
+{
+        if (op < 0 || op >= sizeof(presto_reinters) / sizeof(reinter_t)) 
+                return NULL; 
+        else 
+                return  presto_reinters[op];
+}
+
+int kml_reint_rec(struct file *dir, struct izo_ioctl_data *data)
+{
+        char *ptr;
+        char *end;
+        struct kml_rec rec;
+        int error = 0;
+        struct lento_vfs_context info;
+        struct presto_cache *cache;
+        struct presto_file_set *fset;
+        struct presto_dentry_data *dd = presto_d2d(dir->f_dentry);
+        int op;
+        reinter_t reinter;
+
+        struct izo_rcvd_rec lr_rec;
+        int off;
+
+        ENTRY;
+
+        error = presto_prep(dir->f_dentry, &cache, &fset);
+        if ( error  ) {
+                CERROR("intermezzo: Reintegration on invalid file\n");
+                return error;
+        }
+
+        if (!dd || !dd->dd_fset || dd->dd_fset->fset_dentry != dir->f_dentry) { 
+                CERROR("intermezzo: reintegration on non-fset root (ino %ld)\n",
+                       dir->f_dentry->d_inode->i_ino);
+                    
+                return -EINVAL;
+        }
+
+        if (data->ioc_plen1 > 64 * 1024) {
+                EXIT;
+                return -ENOSPC;
+        }
+
+        ptr = fset->fset_reint_buf;
+        end = ptr + data->ioc_plen1;
+
+        if (copy_from_user(ptr, data->ioc_pbuf1, data->ioc_plen1)) { 
+                EXIT;
+                error = -EFAULT;
+                goto out;
+        }
+
+        error = kml_unpack(&rec, &ptr, end);
+        if (error) { 
+                EXIT;
+                error = -EFAULT;
+                goto out;
+        }
+
+        off = izo_rcvd_get(&lr_rec, fset, data->ioc_uuid);
+        if (off < 0) {
+                CERROR("No last_rcvd record, setting to 0\n");
+                memset(&lr_rec, 0, sizeof(lr_rec));
+        }
+        data->ioc_kmlsize = ptr - fset->fset_reint_buf;
+
+        if (rec.suffix->recno != lr_rec.lr_remote_recno + 1) {
+                CERROR("KML record number %Lu expected, not %d\n",
+                       (unsigned long long) (lr_rec.lr_remote_recno + 1),
+                       rec.suffix->recno);
+
+#if 0
+                if (!version_check(&rec, dd->dd_fset, &info)) {
+                        /* FIXME: do an upcall to resolve conflicts */
+                        CERROR("intermezzo: would be a conflict!\n");
+                        error = -EINVAL;
+                        EXIT;
+                        goto out;
+                }
+#endif
+        }
+
+        op = rec.prefix.hdr->opcode;
+
+        reinter = get_reinter(op);
+        if (!reinter) { 
+                CERROR("%s: Unrecognized KML opcode %d\n", __FUNCTION__, op);
+                error = -EINVAL;
+                EXIT;
+                goto out;
+        }
+
+        info.kml_offset = data->ioc_offset + data->ioc_kmlsize;
+        info.recno = rec.suffix->recno;
+        info.flags = LENTO_FL_EXPECT;
+        if (data->ioc_flags)
+                info.flags |= LENTO_FL_KML;
+
+        memcpy(info.uuid, data->ioc_uuid, sizeof(info.uuid));
+
+        if (fset->fset_flags & FSET_IS_BRANCH && data->ioc_flags)
+                error = branch_reinter(fset, &rec, dir, &info, fset->fset_reint_buf,
+                                       data->ioc_kmlsize);
+        else 
+                error = reinter(&rec, dir, &info);
+ out: 
+        EXIT;
+        return error;
+}
+
+int izo_get_fileid(struct file *dir, struct izo_ioctl_data *data)
+{
+        char *buf = NULL; 
+        char *ptr;
+        char *end;
+        struct kml_rec rec;
+        struct file *file;
+        struct presto_cache *cache;
+        struct presto_file_set *fset;
+        struct presto_dentry_data *dd = presto_d2d(dir->f_dentry);
+        struct run_ctxt saved_ctxt;
+        int     error;
+
+        ENTRY;
+
+        error = presto_prep(dir->f_dentry, &cache, &fset);
+        if ( error  ) {
+                CERROR("intermezzo: Reintegration on invalid file\n");
+                return error;
+        }
+
+        if (!dd || !dd->dd_fset || dd->dd_fset->fset_dentry != dir->f_dentry) { 
+                CERROR("intermezzo: reintegration on non-fset root (ino %ld)\n",
+                       dir->f_dentry->d_inode->i_ino);
+                    
+                return -EINVAL;
+        }
+
+
+        PRESTO_ALLOC(buf, data->ioc_plen1);
+        if (!buf) { 
+                EXIT;
+                return -ENOMEM;
+        }
+        ptr = buf;
+        end = buf + data->ioc_plen1;
+
+        if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) { 
+                EXIT;
+                PRESTO_FREE(buf, data->ioc_plen1);
+                return -EFAULT;
+        }
+
+        error = kml_unpack(&rec, &ptr, end);
+        if (error) { 
+                EXIT;
+                PRESTO_FREE(buf, data->ioc_plen1);
+                return -EFAULT;
+        }
+
+        kmlreint_pre_secure(&rec, dir, &saved_ctxt);
+
+        file = filp_open(rec.path, O_RDONLY, 0);
+        if (!file || IS_ERR(file)) { 
+                error = PTR_ERR(file);
+                goto out;
+        }
+        data->ioc_ino = file->f_dentry->d_inode->i_ino;
+        data->ioc_generation = file->f_dentry->d_inode->i_generation; 
+        filp_close(file, 0); 
+
+        CDEBUG(D_FILE, "%s ino %Lx, gen %Lx\n", rec.path,
+               (unsigned long long) data->ioc_ino,
+               (unsigned long long) data->ioc_generation);
+
+ out:
+        if (buf) 
+                PRESTO_FREE(buf, data->ioc_plen1);
+        pop_ctxt(&saved_ctxt); 
+        EXIT;
+        return error;
+}
+
+
diff --git a/fs/intermezzo/kml_setup.c b/fs/intermezzo/kml_setup.c
new file mode 100644 (file)
index 0000000..8a01718
--- /dev/null
@@ -0,0 +1,58 @@
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_upcall.h"
+#include "intermezzo_psdev.h"
+#include "intermezzo_kml.h"
+
+int kml_init (struct presto_file_set *fset)
+{
+        struct kml_fsdata *data;
+
+        ENTRY;
+        PRESTO_ALLOC (data, struct kml_fsdata *, sizeof (struct kml_fsdata));
+        if (data == NULL) {
+                EXIT;
+                return -ENOMEM;
+        }
+        INIT_LIST_HEAD (&data->kml_reint_cache);
+        INIT_LIST_HEAD (&data->kml_kop_cache);
+
+        PRESTO_ALLOC (data->kml_buf, char *, KML_REINT_MAXBUF);
+        if (data->kml_buf == NULL) {
+                PRESTO_FREE (data, sizeof (struct kml_fsdata));
+                EXIT;
+                return -ENOMEM;
+        }
+
+        data->kml_maxsize = KML_REINT_MAXBUF;
+        data->kml_len = 0;
+        data->kml_reintpos = 0;
+        data->kml_count = 0;
+        fset->fset_kmldata = data;
+        EXIT;
+        return 0;
+}
+
+int kml_cleanup (struct presto_file_set *fset)
+{
+        struct kml_fsdata *data = fset->fset_kmldata;
+
+        if (data == NULL)
+                return 0;
+
+        fset->fset_kmldata = NULL;
+#if 0
+        kml_sop_cleanup (&data->kml_reint_cache);
+        kml_kop_cleanup (&data->kml_kop_cache);
+#endif
+        PRESTO_FREE (data->kml_buf, KML_REINT_MAXBUF);
+        PRESTO_FREE (data, sizeof (struct kml_fsdata));
+        return 0;
+}
+
+
diff --git a/fs/intermezzo/kml_unpack.c b/fs/intermezzo/kml_unpack.c
new file mode 100644 (file)
index 0000000..d12a346
--- /dev/null
@@ -0,0 +1,712 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Unpacking of KML records
+ *
+ */
+
+#ifdef __KERNEL__
+#  include <linux/module.h>
+#  include <linux/errno.h>
+#  include <linux/kernel.h>
+#  include <linux/major.h>
+#  include <linux/sched.h>
+#  include <linux/lp.h>
+#  include <linux/slab.h>
+#  include <linux/ioport.h>
+#  include <linux/fcntl.h>
+#  include <linux/delay.h>
+#  include <linux/skbuff.h>
+#  include <linux/proc_fs.h>
+#  include <linux/vmalloc.h>
+#  include <linux/fs.h>
+#  include <linux/poll.h>
+#  include <linux/init.h>
+#  include <linux/list.h>
+#  include <linux/stat.h>
+#  include <asm/io.h>
+#  include <asm/segment.h>
+#  include <asm/system.h>
+#  include <asm/poll.h>
+#  include <asm/uaccess.h>
+#else
+#  include <time.h>
+#  include <stdio.h>
+#  include <string.h>
+#  include <stdlib.h>
+#  include <errno.h>
+#  include <sys/stat.h>
+#  include <glib.h>
+#endif
+
+#include "intermezzo_lib.h"
+#include "intermezzo_idl.h"
+#include "intermezzo_fs.h"
+
+int kml_unpack_version(struct presto_version **ver, char **buf, char *end) 
+{
+       char *ptr = *buf;
+        struct presto_version *pv;
+
+       UNLOGP(*ver, struct presto_version, ptr, end);
+        pv = *ver;
+        pv->pv_mtime_sec   = NTOH__u32(pv->pv_mtime_sec);
+        pv->pv_mtime_nsec   = NTOH__u32(pv->pv_mtime_nsec);
+        pv->pv_ctime_sec   = NTOH__u32(pv->pv_ctime_sec);
+        pv->pv_ctime_nsec   = NTOH__u32(pv->pv_ctime_nsec);
+        pv->pv_size    = NTOH__u64(pv->pv_size);
+
+       *buf = ptr;
+
+        return 0;
+}
+
+
+static int kml_unpack_noop(struct kml_rec *rec, char **buf, char *end)
+{
+       return 0;
+}
+
+static int kml_unpack_get_fileid(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+
+       LUNLOGV(rec->pathlen, __u32, ptr, end);
+       UNLOGL(rec->path, char, rec->pathlen, ptr, end);
+
+       *buf = ptr;
+       return 0;
+}
+
+static int kml_unpack_create(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+
+       kml_unpack_version(&rec->old_parentv, &ptr, end);
+       kml_unpack_version(&rec->new_parentv, &ptr, end);
+       kml_unpack_version(&rec->new_objectv, &ptr, end);
+       LUNLOGV(rec->mode, __u32, ptr, end);
+       LUNLOGV(rec->uid, __u32, ptr, end);
+       LUNLOGV(rec->gid, __u32, ptr, end);
+       LUNLOGV(rec->pathlen, __u32, ptr, end);
+       UNLOGL(rec->path, char, rec->pathlen, ptr, end);
+
+       *buf = ptr;
+
+       return 0;
+}
+
+static int kml_unpack_mkdir(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+
+       kml_unpack_version(&rec->old_parentv, &ptr, end);
+       kml_unpack_version(&rec->new_parentv, &ptr, end);
+       kml_unpack_version(&rec->new_objectv, &ptr, end);
+       LUNLOGV(rec->mode, __u32, ptr, end);
+       LUNLOGV(rec->uid, __u32, ptr, end);
+       LUNLOGV(rec->gid, __u32, ptr, end);
+       LUNLOGV(rec->pathlen, __u32, ptr, end);
+       UNLOGL(rec->path, char, rec->pathlen, ptr, end);
+
+       *buf = ptr;
+
+       return 0;
+}
+
+
+static int kml_unpack_unlink(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+
+       kml_unpack_version(&rec->old_parentv, &ptr, end);
+       kml_unpack_version(&rec->new_parentv, &ptr, end);
+       kml_unpack_version(&rec->old_objectv, &ptr, end);
+        LUNLOGV(rec->old_mode, __u32, ptr, end);
+        LUNLOGV(rec->old_rdev, __u32, ptr, end);
+        LUNLOGV(rec->old_uid, __u64, ptr, end);
+        LUNLOGV(rec->old_gid, __u64, ptr, end);
+       LUNLOGV(rec->pathlen, __u32, ptr, end);
+       LUNLOGV(rec->targetlen, __u32, ptr, end);
+        LUNLOGV(rec->old_targetlen, __u32, ptr, end);
+       UNLOGL(rec->path, char, rec->pathlen, ptr, end);
+       UNLOGL(rec->target, char, rec->targetlen, ptr, end);
+        UNLOGL(rec->old_target, char, rec->old_targetlen, ptr, end);
+
+       *buf = ptr;
+
+       return 0;
+}
+
+
+static int kml_unpack_rmdir(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+
+       kml_unpack_version(&rec->old_parentv, &ptr, end);
+       kml_unpack_version(&rec->new_parentv, &ptr, end);
+       kml_unpack_version(&rec->old_objectv, &ptr, end);
+        LUNLOGV(rec->old_mode, __u32, ptr, end);
+        LUNLOGV(rec->old_rdev, __u32, ptr, end);
+        LUNLOGV(rec->old_uid, __u64, ptr, end);
+        LUNLOGV(rec->old_gid, __u64, ptr, end);
+       LUNLOGV(rec->pathlen, __u32, ptr, end);
+       LUNLOGV(rec->targetlen, __u32, ptr, end);
+       UNLOGL(rec->path, char, rec->pathlen, ptr, end);
+       UNLOGL(rec->target, char, rec->targetlen, ptr, end);
+
+       *buf = ptr;
+
+       return 0;
+}
+
+
+static int kml_unpack_close(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+
+       LUNLOGV(rec->mode, __u32, ptr, end);  // used for open_mode
+       LUNLOGV(rec->uid, __u32, ptr, end);   // used for open_uid
+       LUNLOGV(rec->gid, __u32, ptr, end);   // used for open_gid
+       kml_unpack_version(&rec->old_objectv, &ptr, end);
+       kml_unpack_version(&rec->new_objectv, &ptr, end);
+       LUNLOGV(rec->ino, __u64, ptr, end);
+       LUNLOGV(rec->generation, __u32, ptr, end);
+       LUNLOGV(rec->pathlen, __u32, ptr, end);
+       UNLOGL(rec->path, char, rec->pathlen, ptr, end);
+
+       *buf = ptr;
+
+       return 0;
+}
+
+
+static int kml_unpack_symlink(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+
+       kml_unpack_version(&rec->old_parentv, &ptr, end);
+       kml_unpack_version(&rec->new_parentv, &ptr, end);
+       kml_unpack_version(&rec->new_objectv, &ptr, end);
+       LUNLOGV(rec->uid, __u32, ptr, end);
+       LUNLOGV(rec->gid, __u32, ptr, end);
+       LUNLOGV(rec->pathlen, __u32, ptr, end);
+       LUNLOGV(rec->targetlen, __u32, ptr, end);
+       UNLOGL(rec->path, char, rec->pathlen, ptr, end);
+       UNLOGL(rec->target, char, rec->targetlen, ptr, end);
+
+       *buf = ptr;
+
+       return 0;
+}
+
+
+static int kml_unpack_rename(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+
+       kml_unpack_version(&rec->old_objectv, &ptr, end);
+       kml_unpack_version(&rec->new_objectv, &ptr, end);
+       kml_unpack_version(&rec->old_parentv, &ptr, end);
+       kml_unpack_version(&rec->new_parentv, &ptr, end);
+       LUNLOGV(rec->pathlen, __u32, ptr, end);
+       LUNLOGV(rec->targetlen, __u32, ptr, end);
+       UNLOGL(rec->path, char, rec->pathlen, ptr, end);
+       UNLOGL(rec->target, char, rec->targetlen, ptr, end);
+
+       *buf = ptr;
+
+       return 0;
+}
+
+
+static int kml_unpack_setattr(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+
+       kml_unpack_version(&rec->old_objectv, &ptr, end);
+       LUNLOGV(rec->valid, __u32, ptr, end);
+       LUNLOGV(rec->mode, __u32, ptr, end);
+       LUNLOGV(rec->uid, __u32, ptr, end);
+       LUNLOGV(rec->gid, __u32, ptr, end);
+       LUNLOGV(rec->size, __u64, ptr, end);
+       LUNLOGV(rec->mtime_sec, __u32, ptr, end);
+       LUNLOGV(rec->mtime_nsec, __u32, ptr, end);
+       LUNLOGV(rec->ctime_sec, __u32, ptr, end);
+       LUNLOGV(rec->ctime_nsec, __u32, ptr, end);
+       LUNLOGV(rec->flags, __u32, ptr, end);
+        LUNLOGV(rec->old_mode, __u32, ptr, end);
+        LUNLOGV(rec->old_rdev, __u32, ptr, end);
+        LUNLOGV(rec->old_uid, __u64, ptr, end);
+        LUNLOGV(rec->old_gid, __u64, ptr, end);
+       LUNLOGV(rec->pathlen, __u32, ptr, end);
+       UNLOGL(rec->path, char, rec->pathlen, ptr, end);
+       
+       *buf = ptr;
+
+       return 0;
+}
+
+
+static int kml_unpack_link(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+
+       kml_unpack_version(&rec->old_parentv, &ptr, end);
+       kml_unpack_version(&rec->new_parentv, &ptr, end);
+       kml_unpack_version(&rec->new_objectv, &ptr, end);
+       LUNLOGV(rec->pathlen, __u32, ptr, end);
+       LUNLOGV(rec->targetlen, __u32, ptr, end);
+       UNLOGL(rec->path, char, rec->pathlen, ptr, end);
+       UNLOGL(rec->target, char, rec->targetlen, ptr, end);
+
+       *buf = ptr;
+
+       return 0;
+}
+
+static int kml_unpack_mknod(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+
+       kml_unpack_version(&rec->old_parentv, &ptr, end);
+       kml_unpack_version(&rec->new_parentv, &ptr, end);
+       kml_unpack_version(&rec->new_objectv, &ptr, end);
+       LUNLOGV(rec->mode, __u32, ptr, end);
+       LUNLOGV(rec->uid, __u32, ptr, end);
+       LUNLOGV(rec->gid, __u32, ptr, end);
+       LUNLOGV(rec->major, __u32, ptr, end);
+       LUNLOGV(rec->minor, __u32, ptr, end);
+       LUNLOGV(rec->pathlen, __u32, ptr, end);
+       UNLOGL(rec->path, char, rec->pathlen, ptr, end);
+
+       *buf = ptr;
+
+       return 0;
+}
+
+
+static int kml_unpack_write(struct kml_rec *rec, char **buf, char *end)
+{
+       printf("NOT IMPLEMENTED");
+       return 0;
+}
+
+
+static int kml_unpack_release(struct kml_rec *rec, char **buf, char *end)
+{
+       printf("NOT IMPLEMENTED");
+       return 0;
+}
+
+
+static int kml_unpack_trunc(struct kml_rec *rec, char **buf, char *end)
+{
+       printf("NOT IMPLEMENTED");
+       return 0;
+}
+
+
+static int kml_unpack_setextattr(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+
+       kml_unpack_version(&rec->old_objectv, &ptr, end);
+       kml_unpack_version(&rec->new_objectv, &ptr, end);
+       LUNLOGV(rec->flags, __u32, ptr, end);
+       LUNLOGV(rec->mode, __u32, ptr, end);
+       LUNLOGV(rec->pathlen, __u32, ptr, end);
+       LUNLOGV(rec->namelen, __u32, ptr, end);
+       LUNLOGV(rec->targetlen, __u32, ptr, end);
+        UNLOGL(rec->path, char, rec->pathlen, ptr, end);
+       UNLOGL(rec->name, char, rec->namelen, ptr, end);
+       UNLOGL(rec->target, char, rec->targetlen, ptr, end);
+
+       *buf = ptr;
+
+       return 0;
+}
+
+
+static int kml_unpack_delextattr(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+
+       kml_unpack_version(&rec->old_objectv, &ptr, end);
+       kml_unpack_version(&rec->new_objectv, &ptr, end);
+       LUNLOGV(rec->flags, __u32, ptr, end);
+       LUNLOGV(rec->mode, __u32, ptr, end);
+       LUNLOGV(rec->pathlen, __u32, ptr, end);
+       LUNLOGV(rec->namelen, __u32, ptr, end);
+       LUNLOGV(rec->targetlen, __u32, ptr, end);
+       UNLOGL(rec->path, char, rec->pathlen, ptr, end);
+       UNLOGL(rec->name, char, rec->namelen, ptr, end);
+
+       *buf = ptr;
+
+       return 0;
+}
+
+static int kml_unpack_open(struct kml_rec *rec, char **buf, char *end)
+{
+       printf("NOT IMPLEMENTED");
+       return 0;
+}
+
+static int kml_unpack_kml_trunc(struct kml_rec *rec, char **buf, char *end)
+{
+
+       printf("NOT IMPLEMENTED");
+       return 0;
+}
+
+
+typedef int (*unpacker)(struct kml_rec *rec, char **buf, char *end);
+
+static unpacker unpackers[KML_OPCODE_NUM] = 
+{
+       [KML_OPCODE_NOOP] = kml_unpack_noop,
+       [KML_OPCODE_CREATE] = kml_unpack_create, 
+       [KML_OPCODE_MKDIR] = kml_unpack_mkdir,
+       [KML_OPCODE_UNLINK] = kml_unpack_unlink,
+       [KML_OPCODE_RMDIR] = kml_unpack_rmdir,
+       [KML_OPCODE_CLOSE] = kml_unpack_close,
+       [KML_OPCODE_SYMLINK] = kml_unpack_symlink,
+       [KML_OPCODE_RENAME] = kml_unpack_rename,
+       [KML_OPCODE_SETATTR] = kml_unpack_setattr,
+       [KML_OPCODE_LINK] = kml_unpack_link,
+       [KML_OPCODE_OPEN] = kml_unpack_open,
+       [KML_OPCODE_MKNOD] = kml_unpack_mknod,
+       [KML_OPCODE_WRITE] = kml_unpack_write,
+       [KML_OPCODE_RELEASE] = kml_unpack_release,
+       [KML_OPCODE_TRUNC] = kml_unpack_trunc,
+       [KML_OPCODE_SETEXTATTR] = kml_unpack_setextattr,
+       [KML_OPCODE_DELEXTATTR] = kml_unpack_delextattr,
+       [KML_OPCODE_KML_TRUNC] = kml_unpack_kml_trunc,
+       [KML_OPCODE_GET_FILEID] = kml_unpack_get_fileid
+};
+
+int kml_unpack_prefix(struct kml_rec *rec, char **buf, char *end) 
+{
+       char *ptr = *buf;
+        int n;
+
+        UNLOGP(rec->prefix.hdr, struct kml_prefix_hdr, ptr, end);
+        rec->prefix.hdr->len     = NTOH__u32(rec->prefix.hdr->len);
+        rec->prefix.hdr->version = NTOH__u32(rec->prefix.hdr->version);
+        rec->prefix.hdr->pid     = NTOH__u32(rec->prefix.hdr->pid);
+        rec->prefix.hdr->auid    = NTOH__u32(rec->prefix.hdr->auid);
+        rec->prefix.hdr->fsuid   = NTOH__u32(rec->prefix.hdr->fsuid);
+        rec->prefix.hdr->fsgid   = NTOH__u32(rec->prefix.hdr->fsgid);
+        rec->prefix.hdr->opcode  = NTOH__u32(rec->prefix.hdr->opcode);
+        rec->prefix.hdr->ngroups = NTOH__u32(rec->prefix.hdr->ngroups);
+
+       UNLOGL(rec->prefix.groups, __u32, rec->prefix.hdr->ngroups, ptr, end);
+        for (n = 0; n < rec->prefix.hdr->ngroups; n++) {
+                rec->prefix.groups[n] = NTOH__u32(rec->prefix.groups[n]);
+        }
+
+       *buf = ptr;
+
+        return 0;
+}
+
+int kml_unpack_suffix(struct kml_rec *rec, char **buf, char *end) 
+{
+       char *ptr = *buf;
+
+       UNLOGP(rec->suffix, struct kml_suffix, ptr, end);
+        rec->suffix->prevrec   = NTOH__u32(rec->suffix->prevrec);
+        rec->suffix->recno    = NTOH__u32(rec->suffix->recno);
+        rec->suffix->time     = NTOH__u32(rec->suffix->time);
+        rec->suffix->len      = NTOH__u32(rec->suffix->len);
+
+       *buf = ptr;
+
+        return 0;
+}
+
+int kml_unpack(struct kml_rec *rec, char **buf, char *end)
+{
+       char *ptr = *buf;
+       int err; 
+
+        if (((unsigned long)ptr % 4) != 0) {
+                printf("InterMezzo: %s: record misaligned.\n", __FUNCTION__);
+                return -EINVAL;
+        }
+
+        while (ptr < end) { 
+                __u32 *i = (__u32 *)ptr;
+                if (*i)
+                        break;
+                ptr += sizeof(*i);
+        }
+       *buf = ptr;
+
+       memset(rec, 0, sizeof(*rec));
+
+        err = kml_unpack_prefix(rec, &ptr, end);
+       if (err) {
+                printf("InterMezzo: %s: unpack_prefix failed: %d\n",
+                       __FUNCTION__, err);
+               return err;
+        }
+
+        if (rec->prefix.hdr->opcode < 0  ||
+            rec->prefix.hdr->opcode >= KML_OPCODE_NUM) {
+                printf("InterMezzo: %s: invalid opcode (%d)\n",
+                       __FUNCTION__, rec->prefix.hdr->opcode);
+               return -EINVAL;
+        }
+       err = unpackers[rec->prefix.hdr->opcode](rec, &ptr, end);
+       if (err) {
+                printf("InterMezzo: %s: unpacker failed: %d\n",
+                       __FUNCTION__, err);
+               return err;
+        }
+
+        err = kml_unpack_suffix(rec, &ptr, end);
+       if (err) {
+                printf("InterMezzo: %s: unpack_suffix failed: %d\n",
+                       __FUNCTION__, err);
+               return err;
+        }
+
+
+       if (rec->prefix.hdr->len != rec->suffix->len) {
+                printf("InterMezzo: %s: lengths don't match\n",
+                       __FUNCTION__);
+               return -EINVAL;
+        }
+        if ((rec->prefix.hdr->len % 4) != 0) {
+                printf("InterMezzo: %s: record length not a "
+                       "multiple of 4.\n", __FUNCTION__);
+                return -EINVAL;
+        }
+        if (ptr - *buf != rec->prefix.hdr->len) {
+                printf("InterMezzo: %s: unpacking error\n",
+                       __FUNCTION__);
+                return -EINVAL;
+        }
+        while (ptr < end) { 
+                __u32 *i = (__u32 *)ptr;
+                if (*i)
+                        break;
+                ptr += sizeof(*i);
+        }
+       *buf = ptr;
+       return 0;
+}
+
+
+#ifndef __KERNEL__
+#define STR(ptr) ((ptr))? (ptr) : ""
+
+#define OPNAME(n) [KML_OPCODE_##n] = #n
+static char *opnames[KML_OPCODE_NUM] = {
+       OPNAME(NOOP),
+       OPNAME(CREATE),
+       OPNAME(MKDIR), 
+       OPNAME(UNLINK),
+       OPNAME(RMDIR),
+       OPNAME(CLOSE),
+       OPNAME(SYMLINK),
+       OPNAME(RENAME),
+       OPNAME(SETATTR),
+       OPNAME(LINK),
+       OPNAME(OPEN),
+       OPNAME(MKNOD),
+       OPNAME(WRITE),
+       OPNAME(RELEASE),
+       OPNAME(TRUNC),
+       OPNAME(SETEXTATTR),
+       OPNAME(DELEXTATTR),
+       OPNAME(KML_TRUNC),
+       OPNAME(GET_FILEID)
+};
+#undef OPNAME
+
+static char *print_opname(int op)
+{
+       if (op < 0 || op >= sizeof (opnames) / sizeof (*opnames))
+               return NULL;
+       return opnames[op];
+}
+
+
+static char *print_time(__u64 i)
+{
+       char buf[128];
+       
+       memset(buf, 0, 128);
+
+#ifndef __KERNEL__
+       strftime(buf, 128, "%Y/%m/%d %H:%M:%S", gmtime((time_t *)&i));
+#else
+       sprintf(buf, "%Ld\n", i);
+#endif
+
+       return strdup(buf);
+}
+
+static char *print_version(struct presto_version *ver)
+{
+       char ver_buf[128];
+       char *mtime;
+       char *ctime;
+
+       if (!ver || ver->pv_ctime == 0) {
+               return strdup("");
+       } 
+       mtime = print_time(ver->pv_mtime);
+       ctime = print_time(ver->pv_ctime);
+       sprintf(ver_buf, "mtime %s, ctime %s, len %lld", 
+               mtime, ctime, ver->pv_size);
+       free(mtime);
+       free(ctime);
+       return strdup(ver_buf);
+}
+
+
+char *kml_print_rec(struct kml_rec *rec, int brief)
+{
+       char *str;
+       char *nov, *oov, *ntv, *otv, *npv, *opv;
+       char *rectime, *mtime, *ctime;
+
+        if (brief) {
+               str = g_strdup_printf(" %08d %7s %*s %*s", 
+                                      rec->suffix->recno,
+                                      print_opname (rec->prefix.hdr->opcode),
+                                      rec->pathlen, STR(rec->path),
+                                      rec->targetlen, STR(rec->target));
+                
+               return str;
+       }
+
+       rectime = print_time(rec->suffix->time);
+       mtime = print_time(rec->mtime);
+       ctime = print_time(rec->ctime);
+
+       nov = print_version(rec->new_objectv);
+       oov = print_version(rec->old_objectv);
+       ntv = print_version(rec->new_targetv);
+       otv = print_version(rec->old_targetv);
+       npv = print_version(rec->new_parentv);
+       opv = print_version(rec->old_parentv);
+
+       str = g_strdup_printf("\n -- Record:\n"
+               "    Recno     %d\n"
+               "    KML off   %lld\n" 
+               "    Version   %d\n" 
+               "    Len       %d\n"
+               "    Suf len   %d\n"
+               "    Time      %s\n"
+               "    Opcode    %d\n"
+               "    Op        %s\n"
+               "    Pid       %d\n"
+               "    AUid      %d\n"
+               "    Fsuid     %d\n" 
+               "    Fsgid     %d\n"
+               "    Prevrec   %d\n" 
+               "    Ngroups   %d\n"
+               //"    Groups    @{$self->{groups}}\n" 
+               " -- Path:\n"
+               "    Inode     %d\n"
+               "    Gen num   %u\n"
+                "    Old mode  %o\n"
+                "    Old rdev  %x\n"
+                "    Old uid   %llu\n"
+                "    Old gid   %llu\n"
+               "    Path      %*s\n"
+               //"    Open_mode %o\n",
+               "    Pathlen   %d\n"
+               "    Tgt       %*s\n"
+               "    Tgtlen    %d\n" 
+               "    Old Tgt   %*s\n"
+               "    Old Tgtln %d\n" 
+               " -- Attr:\n"
+               "    Valid     %x\n"
+               "    mode %o, uid %d, gid %d, size %lld, mtime %s, ctime %s rdev %x (%d:%d)\n"
+               " -- Versions:\n"
+               "    New object %s\n"
+               "    Old object %s\n"
+               "    New target %s\n"
+               "    Old target %s\n"
+               "    New parent %s\n"
+               "    Old parent %s\n", 
+               
+               rec->suffix->recno, 
+               rec->offset, 
+               rec->prefix.hdr->version, 
+               rec->prefix.hdr->len, 
+               rec->suffix->len, 
+               rectime,
+               rec->prefix.hdr->opcode, 
+               print_opname (rec->prefix.hdr->opcode),
+               rec->prefix.hdr->pid,
+               rec->prefix.hdr->auid,
+               rec->prefix.hdr->fsuid,
+               rec->prefix.hdr->fsgid,
+               rec->suffix->prevrec,
+               rec->prefix.hdr->ngroups,
+               rec->ino,
+               rec->generation,
+                rec->old_mode,
+                rec->old_rdev,
+                rec->old_uid,
+                rec->old_gid,
+               rec->pathlen,
+               STR(rec->path),
+               rec->pathlen,
+               rec->targetlen,
+               STR(rec->target),
+               rec->targetlen,
+               rec->old_targetlen,
+               STR(rec->old_target),
+               rec->old_targetlen,
+               
+               rec->valid, 
+               rec->mode,
+               rec->uid,
+               rec->gid,
+               rec->size,
+               mtime,
+               ctime,
+               rec->rdev, rec->major, rec->minor,
+               nov, oov, ntv, otv, npv, opv);
+               
+       free(nov);
+       free(oov);
+       free(ntv);
+       free(otv);
+       free(npv);
+       free(opv);
+
+       free(rectime); 
+       free(ctime);
+       free(mtime);
+
+       return str;
+}
+#endif
diff --git a/fs/intermezzo/kml_utils.c b/fs/intermezzo/kml_utils.c
new file mode 100644 (file)
index 0000000..5062e2d
--- /dev/null
@@ -0,0 +1,43 @@
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_kml.h"
+
+
+// dlogit -- oppsite to logit ()
+//         return the sbuf + size;
+char *dlogit (void *tbuf, const void *sbuf, int size)
+{
+        char *ptr = (char *)sbuf;
+        memcpy(tbuf, ptr, size);
+        ptr += size;
+        return ptr;
+}
+
+static spinlock_t kml_lock = SPIN_LOCK_UNLOCKED;
+static char  buf[1024];
+char * bdup_printf (char *format, ...)
+{
+        va_list args;
+        int  i;
+        char *path;
+        unsigned long flags;
+
+        spin_lock_irqsave(&kml_lock, flags);
+        va_start(args, format);
+        i = vsprintf(buf, format, args); /* hopefully i < sizeof(buf) */
+        va_end(args);
+
+        PRESTO_ALLOC (path, char *, i + 1);
+        if (path == NULL)
+                return NULL;
+        strcpy (path, buf);
+
+        spin_unlock_irqrestore(&kml_lock, flags);
+        return path;
+}
+
+
diff --git a/fs/intermezzo/methods.c b/fs/intermezzo/methods.c
new file mode 100644 (file)
index 0000000..8950efc
--- /dev/null
@@ -0,0 +1,493 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2000 Stelias Computing, Inc.
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *  Copyright (C) 2000 Mountain View Data, Inc.
+ *
+ *  Extended Attribute Support
+ *  Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc.
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <linux/fsfilter.h>
+#include "intermezzo_fs.h"
+
+
+int filter_print_entry = 0;
+int filter_debug = 0xfffffff;
+/*
+ * The function in this file are responsible for setting up the 
+ * correct methods layered file systems like InterMezzo and snapfs
+ */
+
+
+static struct filter_fs filter_oppar[FILTER_FS_TYPES];
+
+/* get to the upper methods (intermezzo, snapfs) */
+inline struct super_operations *filter_c2usops(struct filter_fs *cache)
+{
+        return &cache->o_fops.filter_sops;
+}
+
+inline struct inode_operations *filter_c2udiops(struct filter_fs *cache)
+{
+        return &cache->o_fops.filter_dir_iops;
+}
+
+
+inline struct inode_operations *filter_c2ufiops(struct filter_fs *cache)
+{
+        return &cache->o_fops.filter_file_iops;
+}
+
+inline struct inode_operations *filter_c2usiops(struct filter_fs *cache)
+{
+        return &cache->o_fops.filter_sym_iops;
+}
+
+
+inline struct file_operations *filter_c2udfops(struct filter_fs *cache)
+{
+        return &cache->o_fops.filter_dir_fops;
+}
+
+inline struct file_operations *filter_c2uffops(struct filter_fs *cache)
+{
+        return &cache->o_fops.filter_file_fops;
+}
+
+inline struct file_operations *filter_c2usfops(struct filter_fs *cache)
+{
+        return &cache->o_fops.filter_sym_fops;
+}
+
+inline struct dentry_operations *filter_c2udops(struct filter_fs *cache)
+{
+        return &cache->o_fops.filter_dentry_ops;
+}
+
+/* get to the cache (lower) methods */
+inline struct super_operations *filter_c2csops(struct filter_fs *cache)
+{
+        return cache->o_caops.cache_sops;
+}
+
+inline struct inode_operations *filter_c2cdiops(struct filter_fs *cache)
+{
+        return cache->o_caops.cache_dir_iops;
+}
+
+inline struct inode_operations *filter_c2cfiops(struct filter_fs *cache)
+{
+        return cache->o_caops.cache_file_iops;
+}
+
+inline struct inode_operations *filter_c2csiops(struct filter_fs *cache)
+{
+        return cache->o_caops.cache_sym_iops;
+}
+
+inline struct file_operations *filter_c2cdfops(struct filter_fs *cache)
+{
+        return cache->o_caops.cache_dir_fops;
+}
+
+inline struct file_operations *filter_c2cffops(struct filter_fs *cache)
+{
+        return cache->o_caops.cache_file_fops;
+}
+
+inline struct file_operations *filter_c2csfops(struct filter_fs *cache)
+{
+        return cache->o_caops.cache_sym_fops;
+}
+
+inline struct dentry_operations *filter_c2cdops(struct filter_fs *cache)
+{
+        return cache->o_caops.cache_dentry_ops;
+}
+
+
+void filter_setup_journal_ops(struct filter_fs *ops, char *cache_type)
+{
+        if ( strlen(cache_type) == strlen("ext2") &&
+             memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) {
+#ifdef CONFIG_EXT2_FS
+                ops->o_trops = &presto_ext2_journal_ops;
+#else
+                ops->o_trops = NULL;
+#endif
+                FDEBUG(D_SUPER, "ops at %p\n", ops);
+        }
+
+        if ( strlen(cache_type) == strlen("ext3") &&
+             memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) {
+#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
+                ops->o_trops = &presto_ext3_journal_ops;
+#else
+                ops->o_trops = NULL;
+#endif
+                FDEBUG(D_SUPER, "ops at %p\n", ops);
+        }
+
+        if ( strlen(cache_type) == strlen("tmpfs") &&
+             memcmp(cache_type, "tmpfs", strlen("tmpfs")) == 0 ) {
+#if defined(CONFIG_TMPFS)
+                ops->o_trops = &presto_tmpfs_journal_ops;
+#else
+                ops->o_trops = NULL;
+#endif
+                FDEBUG(D_SUPER, "ops at %p\n", ops);
+        }
+
+        if ( strlen(cache_type) == strlen("reiserfs") &&
+             memcmp(cache_type, "reiserfs", strlen("reiserfs")) == 0 ) {
+#if 0
+               /* #if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE) */
+                ops->o_trops = &presto_reiserfs_journal_ops;
+#else
+                ops->o_trops = NULL;
+#endif
+                FDEBUG(D_SUPER, "ops at %p\n", ops);
+        }
+
+        if ( strlen(cache_type) == strlen("xfs") &&
+             memcmp(cache_type, "xfs", strlen("xfs")) == 0 ) {
+#if 0
+/*#if defined(CONFIG_XFS_FS) || defined (CONFIG_XFS_FS_MODULE) */
+                ops->o_trops = &presto_xfs_journal_ops;
+#else
+                ops->o_trops = NULL;
+#endif
+                FDEBUG(D_SUPER, "ops at %p\n", ops);
+        }
+
+        if ( strlen(cache_type) == strlen("obdfs") &&
+             memcmp(cache_type, "obdfs", strlen("obdfs")) == 0 ) {
+#if defined(CONFIG_OBDFS_FS) || defined (CONFIG_OBDFS_FS_MODULE)
+                ops->o_trops = presto_obdfs_journal_ops;
+#else
+                ops->o_trops = NULL;
+#endif
+                FDEBUG(D_SUPER, "ops at %p\n", ops);
+        }
+}
+
+
+/* find the cache for this FS */
+struct filter_fs *filter_get_filter_fs(const char *cache_type)
+{
+        struct filter_fs *ops = NULL;
+        FENTRY;
+
+        if ( strlen(cache_type) == strlen("ext2") &&
+             memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) {
+                ops = &filter_oppar[FILTER_FS_EXT2];
+                FDEBUG(D_SUPER, "ops at %p\n", ops);
+        }
+
+        if ( strlen(cache_type) == strlen("xfs") &&
+             memcmp(cache_type, "xfs", strlen("xfs")) == 0 ) {
+                ops = &filter_oppar[FILTER_FS_XFS];
+                FDEBUG(D_SUPER, "ops at %p\n", ops);
+        }
+
+        if ( strlen(cache_type) == strlen("ext3") &&
+             memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) {
+                ops = &filter_oppar[FILTER_FS_EXT3];
+                FDEBUG(D_SUPER, "ops at %p\n", ops);
+        }
+
+        if ( strlen(cache_type) == strlen("tmpfs") &&
+             memcmp(cache_type, "tmpfs", strlen("tmpfs")) == 0 ) {
+                ops = &filter_oppar[FILTER_FS_TMPFS];
+                FDEBUG(D_SUPER, "ops at %p\n", ops);
+        }
+
+        if ( strlen(cache_type) == strlen("reiserfs") &&
+             memcmp(cache_type, "reiserfs", strlen("reiserfs")) == 0 ) {
+                ops = &filter_oppar[FILTER_FS_REISERFS];
+                FDEBUG(D_SUPER, "ops at %p\n", ops);
+        }
+        if ( strlen(cache_type) == strlen("obdfs") &&
+             memcmp(cache_type, "obdfs", strlen("obdfs")) == 0 ) {
+                ops = &filter_oppar[FILTER_FS_OBDFS];
+                FDEBUG(D_SUPER, "ops at %p\n", ops);
+        }
+
+        if (ops == NULL) {
+                CERROR("prepare to die: unrecognized cache type for Filter\n");
+        }
+        FEXIT;
+        return ops;
+}
+
+
+/*
+ *  Frobnicate the InterMezzo operations
+ *    this establishes the link between the InterMezzo file system
+ *    and the underlying file system used for the cache.
+ */
+
+void filter_setup_super_ops(struct filter_fs *cache, struct super_operations *cache_sops, struct super_operations *filter_sops)
+{
+        /* Get ptr to the shared struct snapfs_ops structure. */
+        struct filter_ops *props = &cache->o_fops;
+        /* Get ptr to the shared struct cache_ops structure. */
+        struct cache_ops *caops = &cache->o_caops;
+
+        FENTRY;
+
+        if ( cache->o_flags & FILTER_DID_SUPER_OPS ) {
+                FEXIT;
+                return;
+        }
+        cache->o_flags |= FILTER_DID_SUPER_OPS;
+
+        /* Set the cache superblock operations to point to the
+           superblock operations of the underlying file system.  */
+        caops->cache_sops = cache_sops;
+
+        /*
+         * Copy the cache (real fs) superblock ops to the "filter"
+         * superblock ops as defaults. Some will be changed below
+         */
+        memcpy(&props->filter_sops, cache_sops, sizeof(*cache_sops));
+
+        /* 'put_super' unconditionally is that of filter */
+        if (filter_sops->put_super) { 
+                props->filter_sops.put_super = filter_sops->put_super;
+        }
+
+        if (cache_sops->read_inode) {
+                props->filter_sops.read_inode = filter_sops->read_inode;
+                FDEBUG(D_INODE, "setting filter_read_inode, cache_ops %p, cache %p, ri at %p\n",
+                      cache, cache, props->filter_sops.read_inode);
+        }
+
+        if (cache_sops->remount_fs)
+                props->filter_sops.remount_fs = filter_sops->remount_fs;
+        FEXIT;
+}
+
+
+void filter_setup_dir_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops)
+{
+        struct inode_operations *cache_filter_iops;
+        struct inode_operations *cache_iops = inode->i_op;
+        struct file_operations *cache_fops = inode->i_fop;
+        FENTRY;
+
+        if ( cache->o_flags & FILTER_DID_DIR_OPS ) {
+                FEXIT;
+                return;
+        }
+        cache->o_flags |= FILTER_DID_DIR_OPS;
+
+        /* former ops become cache_ops */
+        cache->o_caops.cache_dir_iops = cache_iops;
+        cache->o_caops.cache_dir_fops = cache_fops;
+        FDEBUG(D_SUPER, "filter at %p, cache iops %p, iops %p\n",
+               cache, cache_iops, filter_c2udiops(cache));
+
+        /* setup our dir iops: copy and modify */
+        memcpy(filter_c2udiops(cache), cache_iops, sizeof(*cache_iops));
+
+        /* abbreviate */
+        cache_filter_iops = filter_c2udiops(cache);
+
+        /* methods that filter if cache filesystem has these ops */
+        if (cache_iops->lookup && filter_iops->lookup)
+                cache_filter_iops->lookup = filter_iops->lookup;
+        if (cache_iops->create && filter_iops->create)
+                cache_filter_iops->create = filter_iops->create;
+        if (cache_iops->link && filter_iops->link)
+                cache_filter_iops->link = filter_iops->link;
+        if (cache_iops->unlink && filter_iops->unlink)
+                cache_filter_iops->unlink = filter_iops->unlink;
+        if (cache_iops->mkdir && filter_iops->mkdir)
+                cache_filter_iops->mkdir = filter_iops->mkdir;
+        if (cache_iops->rmdir && filter_iops->rmdir)
+                cache_filter_iops->rmdir = filter_iops->rmdir;
+        if (cache_iops->symlink && filter_iops->symlink)
+                cache_filter_iops->symlink = filter_iops->symlink;
+        if (cache_iops->rename && filter_iops->rename)
+                cache_filter_iops->rename = filter_iops->rename;
+        if (cache_iops->mknod && filter_iops->mknod)
+                cache_filter_iops->mknod = filter_iops->mknod;
+        if (cache_iops->permission && filter_iops->permission)
+                cache_filter_iops->permission = filter_iops->permission;
+        if (cache_iops->getattr)
+                cache_filter_iops->getattr = filter_iops->getattr;
+        /* Some filesystems do not use a setattr method of their own
+           instead relying on inode_setattr/write_inode. We still need to
+           journal these so we make setattr an unconditional operation. 
+           XXX: we should probably check for write_inode. SHP
+        */
+        /*if (cache_iops->setattr)*/
+                cache_filter_iops->setattr = filter_iops->setattr;
+#ifdef CONFIG_FS_EXT_ATTR
+       /* For now we assume that posix acls are handled through extended
+       * attributes. If this is not the case, we must explicitly trap 
+       * posix_set_acl. SHP
+       */
+       if (cache_iops->set_ext_attr && filter_iops->set_ext_attr)
+               cache_filter_iops->set_ext_attr = filter_iops->set_ext_attr;
+#endif
+
+
+        /* copy dir fops */
+        memcpy(filter_c2udfops(cache), cache_fops, sizeof(*cache_fops));
+
+        /* unconditional filtering operations */
+        filter_c2udfops(cache)->ioctl = filter_fops->ioctl;
+
+        FEXIT;
+}
+
+
+void filter_setup_file_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops)
+{
+        struct inode_operations *pr_iops;
+        struct inode_operations *cache_iops = inode->i_op;
+        struct file_operations *cache_fops = inode->i_fop;
+        FENTRY;
+
+        if ( cache->o_flags & FILTER_DID_FILE_OPS ) {
+                FEXIT;
+                return;
+        }
+        cache->o_flags |= FILTER_DID_FILE_OPS;
+
+        /* steal the old ops */
+        /* former ops become cache_ops */
+        cache->o_caops.cache_file_iops = cache_iops;
+        cache->o_caops.cache_file_fops = cache_fops;
+        
+        /* abbreviate */
+        pr_iops = filter_c2ufiops(cache); 
+
+        /* setup our dir iops: copy and modify */
+        memcpy(pr_iops, cache_iops, sizeof(*cache_iops));
+
+        /* copy dir fops */
+        CERROR("*** cache file ops at %p\n", cache_fops);
+        memcpy(filter_c2uffops(cache), cache_fops, sizeof(*cache_fops));
+
+        /* assign */
+        /* See comments above in filter_setup_dir_ops. SHP */
+        /*if (cache_iops->setattr)*/
+                pr_iops->setattr = filter_iops->setattr;
+        if (cache_iops->getattr)
+                pr_iops->getattr = filter_iops->getattr;
+        /* XXX Should this be conditional rmr ? */
+        pr_iops->permission = filter_iops->permission;
+#ifdef CONFIG_FS_EXT_ATTR
+       /* For now we assume that posix acls are handled through extended
+       * attributes. If this is not the case, we must explicitly trap and 
+       * posix_set_acl
+       */
+       if (cache_iops->set_ext_attr && filter_iops->set_ext_attr)
+               pr_iops->set_ext_attr = filter_iops->set_ext_attr;
+#endif
+
+
+        /* unconditional filtering operations */
+        filter_c2uffops(cache)->open = filter_fops->open;
+        filter_c2uffops(cache)->release = filter_fops->release;
+        filter_c2uffops(cache)->write = filter_fops->write;
+        filter_c2uffops(cache)->ioctl = filter_fops->ioctl;
+
+        FEXIT;
+}
+
+/* XXX in 2.3 there are "fast" and "slow" symlink ops for ext2 XXX */
+void filter_setup_symlink_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops)
+{
+        struct inode_operations *pr_iops;
+        struct inode_operations *cache_iops = inode->i_op;
+        struct file_operations *cache_fops = inode->i_fop;
+        FENTRY;
+
+        if ( cache->o_flags & FILTER_DID_SYMLINK_OPS ) {
+                FEXIT;
+                return;
+        }
+        cache->o_flags |= FILTER_DID_SYMLINK_OPS;
+
+        /* steal the old ops */
+        cache->o_caops.cache_sym_iops = cache_iops;
+        cache->o_caops.cache_sym_fops = cache_fops;
+
+        /* abbreviate */
+        pr_iops = filter_c2usiops(cache); 
+
+        /* setup our dir iops: copy and modify */
+        memcpy(pr_iops, cache_iops, sizeof(*cache_iops));
+
+        /* See comments above in filter_setup_dir_ops. SHP */
+        /* if (cache_iops->setattr) */
+                pr_iops->setattr = filter_iops->setattr;
+        if (cache_iops->getattr)
+                pr_iops->getattr = filter_iops->getattr;
+
+        /* assign */
+        /* copy fops - careful for symlinks they might be NULL */
+        if ( cache_fops ) { 
+                memcpy(filter_c2usfops(cache), cache_fops, sizeof(*cache_fops));
+        }
+
+        FEXIT;
+}
+
+void filter_setup_dentry_ops(struct filter_fs *cache,
+                             struct dentry_operations *cache_dop,
+                             struct dentry_operations *filter_dop)
+{
+        if ( cache->o_flags & FILTER_DID_DENTRY_OPS ) {
+                FEXIT;
+                return;
+        }
+        cache->o_flags |= FILTER_DID_DENTRY_OPS;
+
+        cache->o_caops.cache_dentry_ops = cache_dop;
+        memcpy(&cache->o_fops.filter_dentry_ops,
+               filter_dop, sizeof(*filter_dop));
+        
+        if (cache_dop &&  cache_dop != filter_dop && cache_dop->d_revalidate){
+                CERROR("WARNING: filter overriding revalidation!\n");
+        }
+        return;
+}
diff --git a/fs/intermezzo/presto.c b/fs/intermezzo/presto.c
new file mode 100644 (file)
index 0000000..bf16031
--- /dev/null
@@ -0,0 +1,736 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Author: Peter J. Braam <braam@clusterfs.com>
+ *  Copyright (C) 1998 Stelias Computing Inc
+ *  Copyright (C) 1999 Red Hat Inc.
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * This file implements basic routines supporting the semantics
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/string.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+int presto_walk(const char *name, struct nameidata *nd)
+{
+        int err;
+        /* we do not follow symlinks to support symlink operations 
+           correctly. The vfs should always hand us resolved dentries
+           so we should not be required to use LOOKUP_FOLLOW. At the
+           reintegrating end, lento again should be working with the 
+           resolved pathname and not the symlink. SHP
+           XXX: This code implies that direct symlinks do not work. SHP
+        */
+        unsigned int flags = 0; //LOOKUP_POSITIVE;
+
+        ENTRY;
+        err = path_lookup(name, flags, nd);
+        return err;
+}
+
+
+/* find the presto minor device for this inode */
+int presto_i2m(struct inode *inode)
+{
+        struct presto_cache *cache;
+        ENTRY;
+        cache = presto_get_cache(inode);
+        CDEBUG(D_PSDEV, "\n");
+        if ( !cache ) {
+                CERROR("PRESTO: BAD: cannot find cache for dev %s, ino %ld\n",
+                       inode->i_sb->s_id, inode->i_ino);
+                EXIT;
+                return -1;
+        }
+        EXIT;
+        return cache->cache_psdev->uc_minor;
+}
+
+inline int presto_f2m(struct presto_file_set *fset)
+{
+        return fset->fset_cache->cache_psdev->uc_minor;
+
+}
+
+inline int presto_c2m(struct presto_cache *cache)
+{
+        return cache->cache_psdev->uc_minor;
+
+}
+
+/* XXX check this out */
+struct presto_file_set *presto_path2fileset(const char *name)
+{
+        struct nameidata nd;
+        struct presto_file_set *fileset;
+        int error;
+        ENTRY;
+
+        error = presto_walk(name, &nd);
+        if (!error) { 
+#if 0
+                error = do_revalidate(nd.dentry);
+#endif
+                if (!error) 
+                        fileset = presto_fset(nd.dentry); 
+                path_release(&nd); 
+                EXIT;
+        } else 
+                fileset = ERR_PTR(error);
+
+        EXIT;
+        return fileset;
+}
+
+/* check a flag on this dentry or fset root.  Semantics:
+   - most flags: test if it is set
+   - PRESTO_ATTR, PRESTO_DATA return 1 if PRESTO_FSETINSYNC is set
+*/
+int presto_chk(struct dentry *dentry, int flag)
+{
+        int minor;
+        struct presto_file_set *fset = presto_fset(dentry);
+
+        ENTRY;
+        minor = presto_i2m(dentry->d_inode);
+        if ( izo_channels[minor].uc_no_filter ) {
+                EXIT;
+                return ~0;
+        }
+
+        /* if the fileset is in sync DATA and ATTR are OK */
+        if ( fset &&
+             (flag == PRESTO_ATTR || flag == PRESTO_DATA) &&
+             (fset->fset_flags & FSET_INSYNC) ) {
+                CDEBUG(D_INODE, "fset in sync (ino %ld)!\n",
+                       fset->fset_dentry->d_inode->i_ino);
+                EXIT;
+                return 1;
+        }
+
+        EXIT;
+        return (presto_d2d(dentry)->dd_flags & flag);
+}
+
+/* set a bit in the dentry flags */
+void presto_set(struct dentry *dentry, int flag)
+{
+        ENTRY;
+        if ( dentry->d_inode ) {
+                CDEBUG(D_INODE, "SET ino %ld, flag %x\n",
+                       dentry->d_inode->i_ino, flag);
+        }
+        if ( presto_d2d(dentry) == NULL) {
+                CERROR("dentry without d_fsdata in presto_set: %p: %*s", dentry,
+                                dentry->d_name.len, dentry->d_name.name);
+                BUG();
+        }
+        presto_d2d(dentry)->dd_flags |= flag;
+        EXIT;
+}
+
+/* given a path: complete the closes on the fset */
+int lento_complete_closes(char *path)
+{
+        struct nameidata nd;
+        struct dentry *dentry;
+        int error;
+        struct presto_file_set *fset;
+        ENTRY;
+
+        error = presto_walk(path, &nd);
+        if (error) {
+                EXIT;
+                return error;
+        }
+
+        dentry = nd.dentry;
+
+        error = -ENXIO;
+        if ( !presto_ispresto(dentry->d_inode) ) {
+                EXIT;
+                goto out_complete;
+        }
+        
+        fset = presto_fset(dentry);
+        error = -EINVAL;
+        if ( !fset ) {
+                CERROR("No fileset!\n");
+                EXIT;
+                goto out_complete;
+        }
+        
+        /* transactions and locking are internal to this function */ 
+        error = presto_complete_lml(fset);
+        
+        EXIT;
+ out_complete:
+        path_release(&nd); 
+        return error;
+}       
+
+#if 0
+/* given a path: write a close record and cancel an LML record, finally
+   call truncate LML.  Lento is doing this so it goes in with uid/gid's 
+   root. 
+*/ 
+int lento_cancel_lml(char *path, 
+                     __u64 lml_offset, 
+                     __u64 remote_ino, 
+                     __u32 remote_generation,
+                     __u32 remote_version, 
+                     struct lento_vfs_context *info)
+{
+        struct nameidata nd;
+        struct rec_info rec;
+        struct dentry *dentry;
+        int error;
+        struct presto_file_set *fset;
+        void *handle; 
+        struct presto_version new_ver;
+        ENTRY;
+
+
+        error = presto_walk(path, &nd);
+        if (error) {
+                EXIT;
+                return error;
+        }
+        dentry = nd.dentry;
+
+        error = -ENXIO;
+        if ( !presto_ispresto(dentry->d_inode) ) {
+                EXIT;
+                goto out_cancel_lml;
+        }
+        
+        fset = presto_fset(dentry);
+
+        error=-EINVAL;
+        if (fset==NULL) {
+                CERROR("No fileset!\n");
+                EXIT;
+                goto out_cancel_lml;
+        }
+        
+        /* this only requires a transaction below which is automatic */
+        handle = presto_trans_start(fset, dentry->d_inode, PRESTO_OP_RELEASE); 
+        if ( IS_ERR(handle) ) {
+                error = -ENOMEM; 
+                EXIT; 
+                goto out_cancel_lml; 
+        } 
+        
+        if (info->flags & LENTO_FL_CANCEL_LML) {
+                error = presto_clear_lml_close(fset, lml_offset);
+                if ( error ) {
+                        presto_trans_commit(fset, handle);
+                        EXIT; 
+                        goto out_cancel_lml;
+                }
+        }
+
+
+        if (info->flags & LENTO_FL_WRITE_KML) {
+                presto_getversion(&new_ver, dentry->d_inode);
+                error = presto_journal_close(&rec, fset, NULL, dentry,
+                                             &new_ver);
+                if ( error ) {
+                        EXIT; 
+                        presto_trans_commit(fset, handle);
+                        goto out_cancel_lml;
+                }
+        }
+
+        if (info->flags & LENTO_FL_WRITE_EXPECT) {
+                error = presto_write_last_rcvd(&rec, fset, info); 
+                if ( error < 0 ) {
+                        EXIT; 
+                        presto_trans_commit(fset, handle);
+                        goto out_cancel_lml;
+                }
+        }
+
+        presto_trans_commit(fset, handle);
+
+        if (info->flags & LENTO_FL_CANCEL_LML) {
+            presto_truncate_lml(fset); 
+        }
+                
+
+ out_cancel_lml:
+        EXIT;
+        path_release(&nd); 
+        return error;
+}       
+#endif 
+
+/* given a dentry, operate on the flags in its dentry.  Used by downcalls */
+int izo_mark_dentry(struct dentry *dentry, int and_flag, int or_flag, 
+                       int *res)
+{
+        int error = 0;
+
+        if (presto_d2d(dentry) == NULL) {
+                CERROR("InterMezzo: no ddata for inode %ld in %s\n",
+                       dentry->d_inode->i_ino, __FUNCTION__);
+                return -EINVAL;
+        }
+
+        CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n",
+               dentry->d_inode->i_ino, and_flag, or_flag,
+               presto_d2d(dentry)->dd_flags);
+
+        presto_d2d(dentry)->dd_flags &= and_flag;
+        presto_d2d(dentry)->dd_flags |= or_flag;
+        if (res) 
+                *res = presto_d2d(dentry)->dd_flags;
+
+        return error;
+}
+
+/* given a path, operate on the flags in its cache.  Used by mark_ioctl */
+int izo_mark_cache(struct dentry *dentry, int and_flag, int or_flag, 
+                   int *res)
+{
+        struct presto_cache *cache;
+
+        if (presto_d2d(dentry) == NULL) {
+                CERROR("InterMezzo: no ddata for inode %ld in %s\n",
+                       dentry->d_inode->i_ino, __FUNCTION__);
+                return -EINVAL;
+        }
+
+        CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n",
+               dentry->d_inode->i_ino, and_flag, or_flag,
+               presto_d2d(dentry)->dd_flags);
+
+        cache = presto_get_cache(dentry->d_inode);
+        if ( !cache ) {
+                CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n");
+                return -EBADF;
+        }
+
+        cache->cache_flags &= and_flag;
+        cache->cache_flags |= or_flag;
+        if (res)
+                *res = (int)cache->cache_flags;
+
+        return 0;
+}
+
+int presto_set_max_kml_size(const char *path, unsigned long max_size)
+{
+        struct presto_file_set *fset;
+
+        ENTRY;
+
+        fset = presto_path2fileset(path);
+        if (IS_ERR(fset)) {
+                EXIT;
+                return PTR_ERR(fset);
+        }
+
+        fset->kml_truncate_size = max_size;
+        CDEBUG(D_CACHE, "KML truncate size set to %lu bytes for fset %s.\n",
+               max_size, path);
+
+        EXIT;
+        return 0;
+}
+
+int izo_mark_fset(struct dentry *dentry, int and_flag, int or_flag, 
+                  int * res)
+{
+        struct presto_file_set *fset;
+        
+        fset = presto_fset(dentry);
+        if ( !fset ) {
+                CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n");
+                make_bad_inode(dentry->d_inode);
+                return -EBADF;
+        }
+        fset->fset_flags &= and_flag;
+        fset->fset_flags |= or_flag;
+        if (res)
+                *res = (int)fset->fset_flags;
+
+        return 0;
+}
+
+/* talk to Lento about the permit */
+static int presto_permit_upcall(struct dentry *dentry)
+{
+        int rc;
+        char *path, *buffer;
+        int pathlen;
+        int minor;
+        int fsetnamelen;
+        struct presto_file_set *fset = NULL;
+
+        ENTRY;
+
+        if ( (minor = presto_i2m(dentry->d_inode)) < 0) {
+                EXIT;
+                return -EINVAL;
+        }
+
+        fset = presto_fset(dentry);
+        if (!fset) {
+                EXIT;
+                return -ENOTCONN;
+        }
+        
+        if ( !presto_lento_up(minor) ) {
+                if ( fset->fset_flags & FSET_STEAL_PERMIT ) {
+                        EXIT;
+                        return 0;
+                } else {
+                        EXIT;
+                        return -ENOTCONN;
+                }
+        }
+
+        PRESTO_ALLOC(buffer, PAGE_SIZE);
+        if ( !buffer ) {
+                CERROR("PRESTO: out of memory!\n");
+                EXIT;
+                return -ENOMEM;
+        }
+        path = presto_path(dentry, fset->fset_dentry, buffer, PAGE_SIZE);
+        pathlen = MYPATHLEN(buffer, path);
+        fsetnamelen = strlen(fset->fset_name); 
+        rc = izo_upc_permit(minor, dentry, pathlen, path, fset->fset_name);
+        PRESTO_FREE(buffer, PAGE_SIZE);
+        EXIT;
+        return rc;
+}
+
+/* get a write permit for the fileset of this inode
+ *  - if this returns a negative value there was an error
+ *  - if 0 is returned the permit was already in the kernel -- or --
+ *    Lento gave us the permit without reintegration
+ *  - lento returns the number of records it reintegrated 
+ *
+ * Note that if this fileset has branches, a permit will -never- to a normal
+ * process for writing in the data area (ie, outside of .intermezzo)
+ */
+int presto_get_permit(struct inode * inode)
+{
+        struct dentry *de;
+        struct presto_file_set *fset;
+        int minor = presto_i2m(inode);
+        int rc = 0;
+
+        ENTRY;
+        if (minor < 0) {
+                EXIT;
+                return -1;
+        }
+
+        if ( ISLENTO(minor) ) {
+                EXIT;
+                return 0;
+        }
+
+        if (list_empty(&inode->i_dentry)) {
+                CERROR("No alias for inode %d\n", (int) inode->i_ino);
+                EXIT;
+                return -EINVAL;
+        }
+
+        de = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+
+        if (presto_chk(de, PRESTO_DONT_JOURNAL)) {
+                EXIT;
+                return 0;
+        }
+
+        fset = presto_fset(de);
+        if ( !fset ) {
+                CERROR("Presto: no fileset in presto_get_permit!\n");
+                EXIT;
+                return -EINVAL;
+        }
+
+        if (fset->fset_flags & FSET_HAS_BRANCHES) {
+                EXIT;
+                return -EROFS;
+        }
+
+        spin_lock(&fset->fset_permit_lock);
+        if (fset->fset_flags & FSET_HASPERMIT) {
+                fset->fset_permit_count++;
+                CDEBUG(D_INODE, "permit count now %d, inode %lx\n", 
+                       fset->fset_permit_count, inode->i_ino);
+                spin_unlock(&fset->fset_permit_lock);
+                EXIT;
+                return 0;
+        }
+
+        /* Allow reintegration to proceed without locks -SHP */
+        fset->fset_permit_upcall_count++;
+        if (fset->fset_permit_upcall_count == 1) {
+                spin_unlock(&fset->fset_permit_lock);
+                rc = presto_permit_upcall(fset->fset_dentry);
+                spin_lock(&fset->fset_permit_lock);
+                fset->fset_permit_upcall_count--;
+                if (rc == 0) {
+                        izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT,
+                                      NULL);
+                        fset->fset_permit_count++;
+                } else if (rc == ENOTCONN) {
+                        CERROR("InterMezzo: disconnected operation. stealing permit.\n");
+                        izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT,
+                                      NULL);
+                        fset->fset_permit_count++;
+                        /* set a disconnected flag here to stop upcalls */
+                        rc = 0;
+                } else {
+                        CERROR("InterMezzo: presto_permit_upcall failed: %d\n", rc);
+                        rc = -EROFS;
+                        /* go to sleep here and try again? */
+                }
+                wake_up_interruptible(&fset->fset_permit_queue);
+        } else {
+                /* Someone is already doing an upcall; go to sleep. */
+                DECLARE_WAITQUEUE(wait, current);
+
+                spin_unlock(&fset->fset_permit_lock);
+                add_wait_queue(&fset->fset_permit_queue, &wait);
+                while (1) {
+                        set_current_state(TASK_INTERRUPTIBLE);
+
+                        spin_lock(&fset->fset_permit_lock);
+                        if (fset->fset_permit_upcall_count == 0)
+                                break;
+                        spin_unlock(&fset->fset_permit_lock);
+
+                        if (signal_pending(current)) {
+                                remove_wait_queue(&fset->fset_permit_queue,
+                                                  &wait);
+                                return -ERESTARTSYS;
+                        }
+                        schedule();
+                }
+                remove_wait_queue(&fset->fset_permit_queue, &wait);
+                /* We've been woken up: do we have the permit? */
+                if (fset->fset_flags & FSET_HASPERMIT)
+                        /* FIXME: Is this the right thing? */
+                        rc = -EAGAIN;
+        }
+
+        CDEBUG(D_INODE, "permit count now %d, ino %ld (likely 1), "
+               "rc %d\n", fset->fset_permit_count, inode->i_ino, rc);
+        spin_unlock(&fset->fset_permit_lock);
+        EXIT;
+        return rc;
+}
+
+int presto_put_permit(struct inode * inode)
+{
+        struct dentry *de;
+        struct presto_file_set *fset;
+        int minor = presto_i2m(inode);
+
+        ENTRY;
+        if (minor < 0) {
+                EXIT;
+                return -1;
+        }
+
+        if ( ISLENTO(minor) ) {
+                EXIT;
+                return 0;
+        }
+
+        if (list_empty(&inode->i_dentry)) {
+                CERROR("No alias for inode %d\n", (int) inode->i_ino);
+                EXIT;
+                return -1;
+        }
+
+        de = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+
+        fset = presto_fset(de);
+        if ( !fset ) {
+                CERROR("InterMezzo: no fileset in %s!\n", __FUNCTION__);
+                EXIT;
+                return -1;
+        }
+
+        if (presto_chk(de, PRESTO_DONT_JOURNAL)) {
+                EXIT;
+                return 0;
+        }
+
+        spin_lock(&fset->fset_permit_lock);
+        if (fset->fset_flags & FSET_HASPERMIT) {
+                if (fset->fset_permit_count > 0)
+                        fset->fset_permit_count--;
+                else
+                        CERROR("Put permit while permit count is 0, "
+                               "inode %ld!\n", inode->i_ino); 
+        } else {
+                fset->fset_permit_count = 0;
+                CERROR("InterMezzo: put permit while no permit, inode %ld, "
+                       "flags %x!\n", inode->i_ino, fset->fset_flags);
+        }
+
+        CDEBUG(D_INODE, "permit count now %d, inode %ld\n",
+               fset->fset_permit_count, inode->i_ino);
+
+        if (fset->fset_flags & FSET_PERMIT_WAITING &&
+            fset->fset_permit_count == 0) {
+                CDEBUG(D_INODE, "permit count now 0, ino %ld, wake sleepers\n",
+                       inode->i_ino);
+                wake_up_interruptible(&fset->fset_permit_queue);
+        }
+        spin_unlock(&fset->fset_permit_lock);
+
+        EXIT;
+        return 0;
+}
+
+void presto_getversion(struct presto_version * presto_version,
+                       struct inode * inode)
+{
+        presto_version->pv_mtime_sec = inode->i_mtime.tv_sec;
+        presto_version->pv_mtime_nsec = inode->i_mtime.tv_nsec;
+        presto_version->pv_ctime_sec = inode->i_ctime.tv_sec;
+        presto_version->pv_ctime_nsec = inode->i_ctime.tv_nsec;
+        presto_version->pv_size  = (__u64)inode->i_size;
+}
+
+
+/* If uuid is non-null, it is the uuid of the peer that's making the revocation
+ * request.  If it is null, this request was made locally, without external
+ * pressure to give up the permit.  This most often occurs when a client
+ * starts up.
+ *
+ * FIXME: this function needs to be refactored slightly once we start handling
+ * multiple clients.
+ */
+int izo_revoke_permit(struct dentry *dentry, __u8 uuid[16])
+{
+        struct presto_file_set *fset; 
+        DECLARE_WAITQUEUE(wait, current);
+        int minor, rc;
+
+        ENTRY;
+
+        minor = presto_i2m(dentry->d_inode);
+        if (minor < 0) {
+                EXIT;
+                return -ENODEV;
+        }
+
+        fset = presto_fset(dentry);
+        if (fset == NULL) {
+                EXIT;
+                return -ENODEV;
+        }
+
+        spin_lock(&fset->fset_permit_lock);
+        if (fset->fset_flags & FSET_PERMIT_WAITING) {
+                CERROR("InterMezzo: Two processes are waiting on the same permit--this not yet supported!  Aborting this particular permit request...\n");
+                EXIT;
+                spin_unlock(&fset->fset_permit_lock);
+                return -EINVAL;
+        }
+
+        if (fset->fset_permit_count == 0)
+                goto got_permit;
+
+        /* Something is still using this permit.  Mark that we're waiting for it
+         * and go to sleep. */
+        rc = izo_mark_fset(dentry, ~0, FSET_PERMIT_WAITING, NULL);
+        spin_unlock(&fset->fset_permit_lock);
+        if (rc < 0) {
+                EXIT;
+                return rc;
+        }
+
+        add_wait_queue(&fset->fset_permit_queue, &wait);
+        while (1) {
+                set_current_state(TASK_INTERRUPTIBLE);
+
+                spin_lock(&fset->fset_permit_lock);
+                if (fset->fset_permit_count == 0)
+                        break;
+                spin_unlock(&fset->fset_permit_lock);
+
+                if (signal_pending(current)) {
+                        /* FIXME: there must be a better thing to return... */
+                        remove_wait_queue(&fset->fset_permit_queue, &wait);
+                        EXIT;
+                        return -ERESTARTSYS;
+                }
+
+                /* FIXME: maybe there should be a timeout here. */
+
+                schedule();
+        }
+
+        remove_wait_queue(&fset->fset_permit_queue, &wait);
+ got_permit:
+        /* By this point fset->fset_permit_count is zero and we're holding the
+         * lock. */
+        CDEBUG(D_CACHE, "InterMezzo: releasing permit inode %ld\n",
+               dentry->d_inode->i_ino);
+
+        if (uuid != NULL) {
+                rc = izo_upc_revoke_permit(minor, fset->fset_name, uuid);
+                if (rc < 0) {
+                        spin_unlock(&fset->fset_permit_lock);
+                        EXIT;
+                        return rc;
+                }
+        }
+
+        izo_mark_fset(fset->fset_dentry, ~FSET_PERMIT_WAITING, 0, NULL);
+        izo_mark_fset(fset->fset_dentry, ~FSET_HASPERMIT, 0, NULL);
+        spin_unlock(&fset->fset_permit_lock);
+        EXIT;
+        return 0;
+}
+
+inline int presto_is_read_only(struct presto_file_set * fset)
+{
+        int minor, mask;
+        struct presto_cache *cache = fset->fset_cache;
+
+        minor= cache->cache_psdev->uc_minor;
+        mask= (ISLENTO(minor)? FSET_LENTO_RO : FSET_CLIENT_RO);
+        if ( fset->fset_flags & mask )
+                return 1;
+        mask= (ISLENTO(minor)? CACHE_LENTO_RO : CACHE_CLIENT_RO);
+        return  ((cache->cache_flags & mask)? 1 : 0);
+}
diff --git a/fs/intermezzo/psdev.c b/fs/intermezzo/psdev.c
new file mode 100644 (file)
index 0000000..40a85cc
--- /dev/null
@@ -0,0 +1,647 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *              An implementation of a loadable kernel mode driver providing
+ *              multiple kernel/user space bidirectional communications links.
+ *
+ *              Author:         Alan Cox <alan@cymru.net>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              version 2 as published by the Free Software Foundation.
+ *
+ *              Adapted to become the Linux 2.0 Coda pseudo device
+ *              Peter  Braam  <braam@maths.ox.ac.uk>
+ *              Michael Callahan <mjc@emmy.smith.edu>
+ *
+ *              Changes for Linux 2.1
+ *              Copyright (c) 1997 Carnegie-Mellon University
+ *
+ *              Redone again for InterMezzo
+ *              Copyright (c) 1998 Peter J. Braam
+ *              Copyright (c) 2000 Mountain View Data, Inc.
+ *              Copyright (c) 2000 Tacitus Systems, Inc.
+ *              Copyright (c) 2001 Cluster File Systems, Inc.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/lp.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/fcntl.h>
+#include <linux/delay.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/devfs_fs_kernel.h>
+#include <asm/io.h>
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <asm/poll.h>
+#include <asm/uaccess.h>
+#include <linux/miscdevice.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+
+#ifdef PRESTO_DEVEL
+int  presto_print_entry = 1;
+int  presto_debug = 4095;
+#else
+int  presto_print_entry = 0;
+int  presto_debug = 0;
+#endif
+
+/* Like inode.c (presto_sym_iops), the initializer is just to prevent
+   izo_channels from appearing as a COMMON symbol (and therefore
+   interfering with other modules that use the same variable name). */
+struct upc_channel izo_channels[MAX_CHANNEL] = {{0}};
+
+int izo_psdev_get_free_channel(void)
+{
+        int i, result = -1;
+        
+        for (i = 0 ; i < MAX_CHANNEL ; i++ ) {
+                if (list_empty(&(izo_channels[i].uc_cache_list))) { 
+                    result = i;
+                    break;
+                }
+        }
+        return result;
+}
+
+
+int izo_psdev_setpid(int minor)
+{
+        struct upc_channel *channel; 
+        if (minor < 0 || minor >= MAX_CHANNEL) { 
+                return -EINVAL;
+        }
+
+        channel = &(izo_channels[minor]); 
+        /*
+         * This ioctl is performed by each Lento that starts up
+         * and wants to do further communication with presto.
+         */
+        CDEBUG(D_PSDEV, "Setting current pid to %d channel %d\n", 
+               current->pid, minor);
+        channel->uc_pid = current->pid;
+        spin_lock(&channel->uc_lock); 
+        if ( !list_empty(&channel->uc_processing) ) {
+                struct list_head *lh;
+                struct upc_req *req;
+                CERROR("WARNING: setpid & processing not empty!\n");
+               list_for_each(lh, &channel->uc_processing) {
+                        req = list_entry(lh, struct upc_req, rq_chain);
+                        /* freeing of req and data is done by the sleeper */
+                        wake_up(&req->rq_sleep);
+                }
+        }
+        if ( !list_empty(&channel->uc_processing) ) {
+                CERROR("BAD: FAILDED TO CLEAN PROCESSING LIST!\n");
+        }
+        spin_unlock(&channel->uc_lock); 
+        EXIT;
+        return 0;
+}
+
+int izo_psdev_setchannel(struct file *file, int fd)
+{
+
+        struct file *psdev_file = fget(fd); 
+        struct presto_cache *cache = presto_get_cache(file->f_dentry->d_inode);
+
+        if (!psdev_file) { 
+                CERROR("%s: no psdev_file!\n", __FUNCTION__);
+                return -EINVAL;
+        }
+
+        if (!cache) { 
+                CERROR("%s: no cache!\n", __FUNCTION__);
+                fput(psdev_file); 
+                return -EINVAL;
+        } 
+
+        if (psdev_file->private_data) { 
+                CERROR("%s: channel already set!\n", __FUNCTION__);
+                fput(psdev_file); 
+                return -EINVAL;
+        }
+
+        psdev_file->private_data = cache->cache_psdev;
+        fput(psdev_file); 
+        EXIT; 
+        return 0; 
+}
+
+inline int presto_lento_up(int minor) 
+{
+        return izo_channels[minor].uc_pid;
+}
+
+static unsigned int presto_psdev_poll(struct file *file, poll_table * wait)
+ {
+        struct upc_channel *channel = (struct upc_channel *)file->private_data;
+        unsigned int mask = POLLOUT | POLLWRNORM;
+
+        /* ENTRY; this will flood you */
+        if ( ! channel ) { 
+                CERROR("%s: bad psdev file\n", __FUNCTION__);
+                return -EBADF;
+        }
+
+        poll_wait(file, &(channel->uc_waitq), wait);
+
+        spin_lock(&channel->uc_lock);
+        if (!list_empty(&channel->uc_pending)) {
+                CDEBUG(D_PSDEV, "Non-empty pending list.\n");
+                mask |= POLLIN | POLLRDNORM;
+        }
+        spin_unlock(&channel->uc_lock);
+
+        /* EXIT; will flood you */
+        return mask;
+}
+
+/*
+ *      Receive a message written by Lento to the psdev
+ */
+static ssize_t presto_psdev_write(struct file *file, const char *buf,
+                                  size_t count, loff_t *off)
+{
+        struct upc_channel *channel = (struct upc_channel *)file->private_data;
+        struct upc_req *req = NULL;
+        struct upc_req *tmp;
+        struct list_head *lh;
+        struct izo_upcall_resp hdr;
+        int error;
+
+        if ( ! channel ) { 
+                CERROR("%s: bad psdev file\n", __FUNCTION__);
+                return -EBADF;
+        }
+
+        /* Peek at the opcode, uniquefier */
+        if ( count < sizeof(hdr) ) {
+              CERROR("presto_psdev_write: Lento didn't write full hdr.\n");
+                return -EINVAL;
+        }
+
+        error = copy_from_user(&hdr, buf, sizeof(hdr));
+        if ( error )
+                return -EFAULT;
+
+        CDEBUG(D_PSDEV, "(process,opc,uniq)=(%d,%d,%d)\n",
+               current->pid, hdr.opcode, hdr.unique);
+
+        spin_lock(&channel->uc_lock); 
+        /* Look for the message on the processing queue. */
+       list_for_each(lh, &channel->uc_processing) {
+                tmp = list_entry(lh, struct upc_req , rq_chain);
+                if (tmp->rq_unique == hdr.unique) {
+                        req = tmp;
+                        /* unlink here: keeps search length minimal */
+                        list_del_init(&req->rq_chain);
+                        CDEBUG(D_PSDEV,"Eureka opc %d uniq %d!\n",
+                               hdr.opcode, hdr.unique);
+                        break;
+                }
+        }
+        spin_unlock(&channel->uc_lock); 
+        if (!req) {
+                CERROR("psdev_write: msg (%d, %d) not found\n",
+                       hdr.opcode, hdr.unique);
+                return(-ESRCH);
+        }
+
+        /* move data into response buffer. */
+        if (req->rq_bufsize < count) {
+                CERROR("psdev_write: too much cnt: %d, cnt: %Zd, "
+                       "opc: %d, uniq: %d.\n",
+                       req->rq_bufsize, count, hdr.opcode, hdr.unique);
+                count = req->rq_bufsize; /* don't have more space! */
+        }
+        error = copy_from_user(req->rq_data, buf, count);
+        if ( error )
+                return -EFAULT;
+
+        /* adjust outsize: good upcalls can be aware of this */
+        req->rq_rep_size = count;
+        req->rq_flags |= REQ_WRITE;
+
+        wake_up(&req->rq_sleep);
+        return(count);
+}
+
+/*
+ *      Read a message from the kernel to Lento
+ */
+static ssize_t presto_psdev_read(struct file * file, char * buf,
+                                 size_t count, loff_t *off)
+{
+        struct upc_channel *channel = (struct upc_channel *)file->private_data;
+        struct upc_req *req;
+        int result = count;
+
+        if ( ! channel ) { 
+                CERROR("%s: bad psdev file\n", __FUNCTION__);
+                return -EBADF;
+        }
+
+        spin_lock(&channel->uc_lock); 
+        if (list_empty(&(channel->uc_pending))) {
+                CDEBUG(D_UPCALL, "Empty pending list in read, not good\n");
+                spin_unlock(&channel->uc_lock); 
+                return -EINVAL;
+        }
+        req = list_entry((channel->uc_pending.next), struct upc_req, rq_chain);
+        list_del(&(req->rq_chain));
+        if (! (req->rq_flags & REQ_ASYNC) ) {
+                list_add(&(req->rq_chain), channel->uc_processing.prev);
+        }
+        spin_unlock(&channel->uc_lock); 
+
+        req->rq_flags |= REQ_READ;
+
+        /* Move the input args into userspace */
+        CDEBUG(D_PSDEV, "\n");
+        if (req->rq_bufsize <= count) {
+                result = req->rq_bufsize;
+        }
+
+        if (count < req->rq_bufsize) {
+                CERROR ("psdev_read: buffer too small, read %Zd of %d bytes\n",
+                        count, req->rq_bufsize);
+        }
+
+        if ( copy_to_user(buf, req->rq_data, result) ) {
+                BUG();
+                return -EFAULT;
+        }
+
+        /* If request was asynchronous don't enqueue, but free */
+        if (req->rq_flags & REQ_ASYNC) {
+                CDEBUG(D_PSDEV, "psdev_read: async msg (%d, %d), result %d\n",
+                       req->rq_opcode, req->rq_unique, result);
+                PRESTO_FREE(req->rq_data, req->rq_bufsize);
+                PRESTO_FREE(req, sizeof(*req));
+                return result;
+        }
+
+        return result;
+}
+
+
+static int presto_psdev_open(struct inode * inode, struct file * file)
+{
+        ENTRY;
+
+        file->private_data = NULL;  
+
+        CDEBUG(D_PSDEV, "Psdev_open: caller: %d, flags: %d\n", current->pid, file->f_flags);
+
+        EXIT;
+        return 0;
+}
+
+
+
+static int presto_psdev_release(struct inode * inode, struct file * file)
+{
+        struct upc_channel *channel = (struct upc_channel *)file->private_data;
+        struct upc_req *req;
+        struct list_head *lh;
+        ENTRY;
+
+        if ( ! channel ) { 
+                CERROR("%s: bad psdev file\n", __FUNCTION__);
+                return -EBADF;
+        }
+
+        CDEBUG(D_PSDEV, "Lento: pid %d\n", current->pid);
+        channel->uc_pid = 0;
+
+        /* Wake up clients so they can return. */
+        CDEBUG(D_PSDEV, "Wake up clients sleeping for pending.\n");
+        spin_lock(&channel->uc_lock); 
+       list_for_each(lh, &channel->uc_pending) {
+                req = list_entry(lh, struct upc_req, rq_chain);
+
+                /* Async requests stay around for a new lento */
+                if (req->rq_flags & REQ_ASYNC) {
+                        continue;
+                }
+                /* the sleeper will free the req and data */
+                req->rq_flags |= REQ_DEAD; 
+                wake_up(&req->rq_sleep);
+        }
+
+        CDEBUG(D_PSDEV, "Wake up clients sleeping for processing\n");
+       list_for_each(lh, &channel->uc_processing) {
+                req = list_entry(lh, struct upc_req, rq_chain);
+                /* freeing of req and data is done by the sleeper */
+                req->rq_flags |= REQ_DEAD; 
+                wake_up(&req->rq_sleep);
+        }
+        spin_unlock(&channel->uc_lock); 
+        CDEBUG(D_PSDEV, "Done.\n");
+
+        EXIT;
+        return 0;
+}
+
+static struct file_operations presto_psdev_fops = {
+       .owner   = THIS_MODULE,
+        .read    = presto_psdev_read,
+        .write   = presto_psdev_write,
+        .poll    = presto_psdev_poll,
+        .open    = presto_psdev_open,
+        .release = presto_psdev_release
+};
+
+/* modules setup */
+static struct miscdevice intermezzo_psdev = {
+        INTERMEZZO_MINOR,
+        "intermezzo",
+        &presto_psdev_fops
+};
+
+int  presto_psdev_init(void)
+{
+        int i;
+        int err; 
+
+        if ( (err = misc_register(&intermezzo_psdev)) ) { 
+                CERROR("%s: cannot register %d err %d\n", 
+                       __FUNCTION__, INTERMEZZO_MINOR, err);
+                return -EIO;
+        }
+
+        memset(&izo_channels, 0, sizeof(izo_channels));
+        for ( i = 0 ; i < MAX_CHANNEL ; i++ ) {
+                struct upc_channel *channel = &(izo_channels[i]);
+                INIT_LIST_HEAD(&channel->uc_pending);
+                INIT_LIST_HEAD(&channel->uc_processing);
+                INIT_LIST_HEAD(&channel->uc_cache_list);
+                init_waitqueue_head(&channel->uc_waitq);
+                channel->uc_lock = SPIN_LOCK_UNLOCKED;
+                channel->uc_hard = 0;
+                channel->uc_no_filter = 0;
+                channel->uc_no_journal = 0;
+                channel->uc_no_upcall = 0;
+                channel->uc_timeout = 30;
+                channel->uc_errorval = 0;
+                channel->uc_minor = i;
+        }
+        return 0;
+}
+
+void presto_psdev_cleanup(void)
+{
+        int i;
+
+        misc_deregister(&intermezzo_psdev);
+
+        for ( i = 0 ; i < MAX_CHANNEL ; i++ ) {
+                struct upc_channel *channel = &(izo_channels[i]);
+                struct list_head *lh, *next;
+
+                spin_lock(&channel->uc_lock); 
+                if ( ! list_empty(&channel->uc_pending)) { 
+                        CERROR("Weird, tell Peter: module cleanup and pending list not empty dev %d\n", i);
+                }
+                if ( ! list_empty(&channel->uc_processing)) { 
+                        CERROR("Weird, tell Peter: module cleanup and processing list not empty dev %d\n", i);
+                }
+                if ( ! list_empty(&channel->uc_cache_list)) { 
+                        CERROR("Weird, tell Peter: module cleanup and cache listnot empty dev %d\n", i);
+                }
+               list_for_each_safe(lh, next, &channel->uc_pending) {
+                        struct upc_req *req;
+
+                        req = list_entry(lh, struct upc_req, rq_chain);
+                        if ( req->rq_flags & REQ_ASYNC ) {
+                                list_del(&(req->rq_chain));
+                                CDEBUG(D_UPCALL, "free pending upcall type %d\n",
+                                       req->rq_opcode);
+                                PRESTO_FREE(req->rq_data, req->rq_bufsize);
+                                PRESTO_FREE(req, sizeof(struct upc_req));
+                        } else {
+                                req->rq_flags |= REQ_DEAD; 
+                                wake_up(&req->rq_sleep);
+                        }
+                }
+               list_for_each(lh, &channel->uc_processing) {
+                        struct upc_req *req;
+                        req = list_entry(lh, struct upc_req, rq_chain);
+                        list_del(&(req->rq_chain));
+                        req->rq_flags |= REQ_DEAD; 
+                        wake_up(&req->rq_sleep);
+                }
+                spin_unlock(&channel->uc_lock); 
+        }
+}
+
+/*
+ * lento_upcall and lento_downcall routines
+ */
+static inline unsigned long lento_waitfor_upcall
+            (struct upc_channel *channel, struct upc_req *req, int minor)
+{
+        DECLARE_WAITQUEUE(wait, current);
+        unsigned long posttime;
+
+        req->rq_posttime = posttime = jiffies;
+
+        add_wait_queue(&req->rq_sleep, &wait);
+        for (;;) {
+                if ( izo_channels[minor].uc_hard == 0 )
+                        set_current_state(TASK_INTERRUPTIBLE);
+                else
+                        set_current_state(TASK_UNINTERRUPTIBLE);
+
+                /* got a reply */
+                if ( req->rq_flags & (REQ_WRITE | REQ_DEAD) )
+                        break;
+
+                /* these cases only apply when TASK_INTERRUPTIBLE */ 
+                if ( !izo_channels[minor].uc_hard && signal_pending(current) ) {
+                        /* if this process really wants to die, let it go */
+                        if (sigismember(&(current->pending.signal), SIGKILL)||
+                            sigismember(&(current->pending.signal), SIGINT) )
+                                break;
+                        /* signal is present: after timeout always return
+                           really smart idea, probably useless ... */
+                        if ( time_after(jiffies, req->rq_posttime +
+                             izo_channels[minor].uc_timeout * HZ) )
+                                break;
+                }
+                schedule();
+        }
+
+        spin_lock(&channel->uc_lock);
+        list_del_init(&req->rq_chain); 
+        spin_unlock(&channel->uc_lock);
+        remove_wait_queue(&req->rq_sleep, &wait);
+        set_current_state(TASK_RUNNING);
+
+        CDEBUG(D_SPECIAL, "posttime: %ld, returned: %ld\n",
+               posttime, jiffies-posttime);
+        return  (jiffies - posttime);
+}
+
+/*
+ * lento_upcall will return an error in the case of
+ * failed communication with Lento _or_ will peek at Lento
+ * reply and return Lento's error.
+ *
+ * As lento has 2 types of errors, normal errors (positive) and internal
+ * errors (negative), normal errors are negated, while internal errors
+ * are all mapped to -EINTR, while showing a nice warning message. (jh)
+ *
+ * lento_upcall will always free buffer, either directly, when an upcall
+ * is read (in presto_psdev_read), when the filesystem is unmounted, or
+ * when the module is unloaded.
+ */
+int izo_upc_upcall(int minor, int *size, struct izo_upcall_hdr *buffer, 
+                   int async)
+{
+        unsigned long runtime;
+        struct upc_channel *channel;
+        struct izo_upcall_resp *out;
+        struct upc_req *req;
+        int error = 0;
+
+        ENTRY;
+        channel = &(izo_channels[minor]);
+
+        if (channel->uc_no_upcall) {
+                EXIT;
+                goto exit_buf;
+        }
+        if (!channel->uc_pid && !async) {
+                EXIT;
+                error = -ENXIO;
+                goto exit_buf;
+        }
+
+        /* Format the request message. */
+        PRESTO_ALLOC(req, sizeof(struct upc_req));
+        if ( !req ) {
+                EXIT;
+                error = -ENOMEM;
+                goto exit_buf;
+        }
+        req->rq_data = (void *)buffer;
+        req->rq_flags = 0;
+        req->rq_bufsize = *size;
+        req->rq_rep_size = 0;
+        req->rq_opcode = buffer->u_opc;
+        req->rq_unique = ++channel->uc_seq;
+        init_waitqueue_head(&req->rq_sleep);
+
+        /* Fill in the common input args. */
+        buffer->u_uniq = req->rq_unique;
+        buffer->u_async = async;
+
+        /* Remove potential datarace possibility*/
+        if ( async ) 
+                req->rq_flags = REQ_ASYNC;
+
+        spin_lock(&channel->uc_lock); 
+        /* Append msg to pending queue and poke Lento. */
+        list_add(&req->rq_chain, channel->uc_pending.prev);
+        spin_unlock(&channel->uc_lock); 
+        CDEBUG(D_UPCALL,
+               "Proc %d waking Lento %d for(opc,uniq) =(%d,%d) msg at %p.\n",
+               current->pid, channel->uc_pid, req->rq_opcode,
+               req->rq_unique, req);
+        wake_up_interruptible(&channel->uc_waitq);
+
+        if ( async ) {
+                /* req, rq_data are freed in presto_psdev_read for async */
+                /* req->rq_flags = REQ_ASYNC;*/
+                EXIT;
+                return 0;
+        }
+
+        /* We can be interrupted while we wait for Lento to process
+         * our request.  If the interrupt occurs before Lento has read
+         * the request, we dequeue and return. If it occurs after the
+         * read but before the reply, we dequeue, send a signal
+         * message, and return. If it occurs after the reply we ignore
+         * it. In no case do we want to restart the syscall.  If it
+         * was interrupted by a lento shutdown (psdev_close), return
+         * ENODEV.  */
+
+        /* Go to sleep.  Wake up on signals only after the timeout. */
+        runtime = lento_waitfor_upcall(channel, req, minor);
+
+        CDEBUG(D_TIMING, "opc: %d time: %ld uniq: %d size: %d\n",
+               req->rq_opcode, jiffies - req->rq_posttime,
+               req->rq_unique, req->rq_rep_size);
+        CDEBUG(D_UPCALL,
+               "..process %d woken up by Lento for req at 0x%p, data at %p\n",
+               current->pid, req, req->rq_data);
+
+        if (channel->uc_pid) {      /* i.e. Lento is still alive */
+          /* Op went through, interrupt or not we go on */
+            if (req->rq_flags & REQ_WRITE) {
+                    out = (struct izo_upcall_resp *)req->rq_data;
+                    /* here we map positive Lento errors to kernel errors */
+                    if ( out->result < 0 ) {
+                            CERROR("Tell Peter: Lento returns negative error %d, for oc %d!\n",
+                                   out->result, out->opcode);
+                          out->result = EINVAL;
+                    }
+                    error = -out->result;
+                    CDEBUG(D_UPCALL, "upcall: (u,o,r) (%d, %d, %d) out at %p\n",
+                           out->unique, out->opcode, out->result, out);
+                    *size = req->rq_rep_size;
+                    EXIT;
+                    goto exit_req;
+            }
+            /* Interrupted before lento read it. */
+            if ( !(req->rq_flags & REQ_READ) && signal_pending(current)) {
+                    CDEBUG(D_UPCALL,
+                           "Interrupt before read: (op,un)=(%d,%d), flags %x\n",
+                           req->rq_opcode, req->rq_unique, req->rq_flags);
+                    /* perhaps the best way to convince the app to give up? */
+                    error = -EINTR;
+                    EXIT;
+                    goto exit_req;
+            }
+
+            /* interrupted after Lento did its read, send signal */
+            if ( (req->rq_flags & REQ_READ) && signal_pending(current) ) {
+                    CDEBUG(D_UPCALL,"Interrupt after read: op = %d.%d, flags = %x\n",
+                           req->rq_opcode, req->rq_unique, req->rq_flags);
+
+                    error = -EINTR;
+            } else {
+                  CERROR("Lento: Strange interruption - tell Peter.\n");
+                    error = -EINTR;
+            }
+        } else {        /* If lento died i.e. !UC_OPEN(channel) */
+                CERROR("lento_upcall: Lento dead on (op,un) (%d.%d) flags %d\n",
+                       req->rq_opcode, req->rq_unique, req->rq_flags);
+                error = -ENODEV;
+        }
+
+exit_req:
+        PRESTO_FREE(req, sizeof(struct upc_req));
+exit_buf:
+        PRESTO_FREE(buffer,*size);
+        return error;
+}
diff --git a/fs/intermezzo/replicator.c b/fs/intermezzo/replicator.c
new file mode 100644 (file)
index 0000000..e7a0c5c
--- /dev/null
@@ -0,0 +1,290 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ * Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Manage RCVD records for clients in the kernel
+ *
+ */
+
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/fsfilter.h>
+
+#include "intermezzo_fs.h"
+
+/*
+ * this file contains a hash table of replicators/clients for a
+ * fileset. It allows fast lookup and update of reintegration status
+ */
+
+struct izo_offset_rec {
+       struct list_head or_list;
+       char             or_uuid[16];
+       loff_t           or_offset;
+};
+
+#define RCACHE_BITS 8
+#define RCACHE_SIZE (1 << RCACHE_BITS)
+#define RCACHE_MASK (RCACHE_SIZE - 1)
+
+static struct list_head *
+izo_rep_cache(void)
+{
+       int i;
+       struct list_head *cache;
+       PRESTO_ALLOC(cache, sizeof(struct list_head) * RCACHE_SIZE);
+       if (cache == NULL) {
+               CERROR("intermezzo-fatal: no memory for replicator cache\n");
+                return NULL;
+       }
+       memset(cache, 0, sizeof(struct list_head) * RCACHE_SIZE);
+       for (i = 0; i < RCACHE_SIZE; i++)
+               INIT_LIST_HEAD(&cache[i]);
+
+       return cache;
+}
+
+static struct list_head *
+izo_rep_hash(struct list_head *cache, char *uuid)
+{
+        return &cache[(RCACHE_MASK & uuid[1])];
+}
+
+static void
+izo_rep_cache_clean(struct presto_file_set *fset)
+{
+       int i;
+       struct list_head *bucket;
+       struct list_head *tmp;
+
+        if (fset->fset_clients == NULL)
+               return;
+        for (i = 0; i < RCACHE_SIZE; i++) {
+               tmp = bucket = &fset->fset_clients[i];
+
+               tmp = tmp->next;
+                while (tmp != bucket) {
+                       struct izo_offset_rec *offrec;
+                       tmp = tmp->next;
+                       list_del(tmp);
+                       offrec = list_entry(tmp, struct izo_offset_rec,
+                                           or_list);
+                       PRESTO_FREE(offrec, sizeof(struct izo_offset_rec));
+               }
+       }
+}
+
+struct izo_offset_rec *
+izo_rep_cache_find(struct presto_file_set *fset, char *uuid)
+{
+       struct list_head *tmp, *buck = izo_rep_hash(fset->fset_clients, uuid);
+        struct izo_offset_rec *rec = NULL;
+
+       list_for_each(tmp, buck) {
+               rec = list_entry(tmp, struct izo_offset_rec, or_list);
+                if ( memcmp(rec->or_uuid, uuid, sizeof(rec->or_uuid)) == 0 )
+                       return rec;
+       }
+
+       return NULL;
+}
+
+static int
+izo_rep_cache_add(struct presto_file_set *fset, struct izo_rcvd_rec *rec,
+                  loff_t offset)
+{
+        struct izo_offset_rec *offrec;
+
+        if (izo_rep_cache_find(fset, rec->lr_uuid)) {
+                CERROR("izo: duplicate client entry %s off %Ld\n",
+                       fset->fset_name, offset);
+                return -EINVAL;
+        }
+
+        PRESTO_ALLOC(offrec, sizeof(*offrec));
+        if (offrec == NULL) {
+                CERROR("izo: cannot allocate offrec\n");
+                return -ENOMEM;
+        }
+
+        memcpy(offrec->or_uuid, rec->lr_uuid, sizeof(rec->lr_uuid));
+        offrec->or_offset = offset;
+
+        list_add(&offrec->or_list,
+                 izo_rep_hash(fset->fset_clients, rec->lr_uuid));
+        return 0;
+}
+
+int
+izo_rep_cache_init(struct presto_file_set *fset)
+{
+       struct izo_rcvd_rec rec;
+        loff_t offset = 0, last_offset = 0;
+
+       fset->fset_clients = izo_rep_cache();
+        if (fset->fset_clients == NULL) {
+               CERROR("Error initializing client cache\n");
+               return -ENOMEM;
+       }
+
+        while ( presto_fread(fset->fset_rcvd.fd_file, (char *)&rec,
+                             sizeof(rec), &offset) == sizeof(rec) ) {
+                int rc;
+
+                if ((rc = izo_rep_cache_add(fset, &rec, last_offset)) < 0) {
+                       izo_rep_cache_clean(fset);
+                       return rc;
+               }
+
+                last_offset = offset;
+       }
+
+       return 0;
+}
+
+/*
+ * Return local last_rcvd record for the client. Update or create 
+ * if necessary.
+ *
+ * XXX: After this call, any -EINVAL from izo_rcvd_get is a real error.
+ */
+int
+izo_repstatus(struct presto_file_set *fset,  __u64 client_kmlsize, 
+              struct izo_rcvd_rec *lr_client, struct izo_rcvd_rec *lr_server)
+{
+        int rc;
+        rc = izo_rcvd_get(lr_server, fset, lr_client->lr_uuid);
+        if (rc < 0 && rc != -EINVAL) {
+                return rc;
+        }
+
+        /* client is new or has been reset. */
+        if (rc < 0 || (client_kmlsize == 0 && lr_client->lr_remote_offset == 0)) {
+                memset(lr_server, 0, sizeof(*lr_server));
+                memcpy(lr_server->lr_uuid, lr_client->lr_uuid, sizeof(lr_server->lr_uuid));
+                rc = izo_rcvd_write(fset, lr_server);
+                if (rc < 0)
+                        return rc;
+        }
+
+        /* update intersync */
+        rc = izo_upc_repstatus(presto_f2m(fset), fset->fset_name, lr_server);
+        return rc;
+}
+
+loff_t
+izo_rcvd_get(struct izo_rcvd_rec *rec, struct presto_file_set *fset, char *uuid)
+{
+        struct izo_offset_rec *offrec;
+        struct izo_rcvd_rec tmprec;
+        loff_t offset;
+
+        offrec = izo_rep_cache_find(fset, uuid);
+        if (offrec == NULL) {
+                CDEBUG(D_SPECIAL, "izo_get_rcvd: uuid not in hash.\n");
+                return -EINVAL;
+        }
+        offset = offrec->or_offset;
+
+        if (rec == NULL)
+                return offset;
+
+        if (presto_fread(fset->fset_rcvd.fd_file, (char *)&tmprec,
+                         sizeof(tmprec), &offset) != sizeof(tmprec)) {
+                CERROR("izo_get_rcvd: Unable to read from last_rcvd file offset "
+                       "%Lu\n", offset);
+                return -EIO;
+        }
+
+        memcpy(rec->lr_uuid, tmprec.lr_uuid, sizeof(tmprec.lr_uuid));
+        rec->lr_remote_recno = le64_to_cpu(tmprec.lr_remote_recno);
+        rec->lr_remote_offset = le64_to_cpu(tmprec.lr_remote_offset);
+        rec->lr_local_recno = le64_to_cpu(tmprec.lr_local_recno);
+        rec->lr_local_offset = le64_to_cpu(tmprec.lr_local_offset);
+        rec->lr_last_ctime = le64_to_cpu(tmprec.lr_last_ctime);
+
+        return offrec->or_offset;
+}
+
+/* Try to lookup the UUID in the hash.  Insert it if it isn't found.  Write the
+ * data to the file.
+ *
+ * Returns the offset of the beginning of the record in the last_rcvd file. */
+loff_t
+izo_rcvd_write(struct presto_file_set *fset, struct izo_rcvd_rec *rec)
+{
+        struct izo_offset_rec *offrec;
+        loff_t offset, rc;
+
+        ENTRY;
+
+        offrec = izo_rep_cache_find(fset, rec->lr_uuid);
+        if (offrec == NULL) {
+                /* I don't think it should be possible for an entry to be not in
+                 * the hash table without also having an invalid offset, but we
+                 * handle it gracefully regardless. */
+                write_lock(&fset->fset_rcvd.fd_lock);
+                offset = fset->fset_rcvd.fd_offset;
+                fset->fset_rcvd.fd_offset += sizeof(*rec);
+                write_unlock(&fset->fset_rcvd.fd_lock);
+
+                rc = izo_rep_cache_add(fset, rec, offset);
+                if (rc < 0) {
+                        EXIT;
+                        return rc;
+                }
+        } else
+                offset = offrec->or_offset;
+        
+
+        rc = presto_fwrite(fset->fset_rcvd.fd_file, (char *)rec, sizeof(*rec),
+                           &offset);
+        if (rc == sizeof(*rec))
+                /* presto_fwrite() advances 'offset' */
+                rc = offset - sizeof(*rec);
+
+        EXIT;
+        return rc;
+}
+
+loff_t
+izo_rcvd_upd_remote(struct presto_file_set *fset, char * uuid,  __u64 remote_recno, 
+                    __u64 remote_offset)
+{
+        struct izo_rcvd_rec rec;
+        
+        loff_t rc;
+
+        ENTRY;
+        rc = izo_rcvd_get(&rec, fset, uuid);
+        if (rc < 0)
+                return rc;
+        rec.lr_remote_recno = remote_recno;
+        rec.lr_remote_offset = remote_offset;
+
+        rc = izo_rcvd_write(fset, &rec);
+        EXIT;
+        if (rc < 0)
+                return rc;
+        return 0;
+}
diff --git a/fs/intermezzo/super.c b/fs/intermezzo/super.c
new file mode 100644 (file)
index 0000000..9993ef2
--- /dev/null
@@ -0,0 +1,407 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
+ *  Copyright (C) 2000 Stelias Computing, Inc.
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  presto's super.c
+ */
+
+static char rcsid[] __attribute ((unused)) = "$Id: super.c,v 1.4 2002/10/12 02:16:19 rread Exp $";
+#define INTERMEZZO_VERSION "$Revision: 1.4 $"
+
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/module.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+#ifdef PRESTO_DEBUG
+long presto_vmemory = 0;
+long presto_kmemory = 0;
+#endif
+
+/* returns an allocated string, copied out from data if opt is found */
+static char *opt_read(const char *opt, char *data)
+{
+        char *value;
+        char *retval;
+
+        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
+        if ( strncmp(opt, data, strlen(opt)) )
+                return NULL;
+
+        if ( (value = strchr(data, '=')) == NULL )
+                return NULL;
+
+        value++;
+        PRESTO_ALLOC(retval, strlen(value) + 1);
+        if ( !retval ) {
+                CERROR("InterMezzo: Out of memory!\n");
+                return NULL;
+        }
+
+        strcpy(retval, value);
+        CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
+        return retval;
+}
+
+static void opt_store(char **dst, char *opt)
+{
+        if (!dst) 
+                CERROR("intermezzo: store_opt, error dst == NULL\n"); 
+
+        if (*dst)
+                PRESTO_FREE(*dst, strlen(*dst) + 1);
+        *dst = opt;
+}
+
+static void opt_set_default(char **dst, char *defval)
+{
+        if (!dst) 
+                CERROR("intermezzo: store_opt, error dst == NULL\n"); 
+
+        if (*dst)
+                PRESTO_FREE(*dst, strlen(*dst) + 1);
+        if (defval) {
+                char *def_alloced; 
+                PRESTO_ALLOC(def_alloced, strlen(defval)+1);
+                if (!def_alloced) {
+                        CERROR("InterMezzo: Out of memory!\n");
+                        return ;
+                }
+                strcpy(def_alloced, defval);
+                *dst = def_alloced; 
+        }
+}
+
+
+/* Find the options for InterMezzo in "options", saving them into the
+ * passed pointers.  If the pointer is null, the option is discarded.
+ * Copy out all non-InterMezzo options into cache_data (to be passed
+ * to the read_super operation of the cache).  The return value will
+ * be a pointer to the end of the cache_data.
+ */
+static char *presto_options(struct file_system_type *fstype, 
+                            char *options, char *cache_data,
+                            char **cache_type, char **fileset,
+                            char **channel)
+{
+        char *this_char;
+        char *opt_ptr = options;
+        char *cache_data_end = cache_data;
+
+        /* set the defaults */ 
+        if (strcmp(fstype->name, "intermezzo") == 0)
+            opt_set_default(cache_type, "ext3"); 
+        else 
+            opt_set_default(cache_type, "tmpfs"); 
+            
+        if (!options || !cache_data)
+                return cache_data_end;
+
+
+        CDEBUG(D_SUPER, "parsing options\n");
+        while ((this_char = strsep (&opt_ptr, ",")) != NULL) {
+                char *opt;
+                if (!*this_char)
+                        continue;
+                CDEBUG(D_SUPER, "this_char %s\n", this_char);
+
+                if ( (opt = opt_read("fileset", this_char)) ) {
+                        opt_store(fileset, opt);
+                        continue;
+                }
+                if ( (opt = opt_read("cache_type", this_char)) ) {
+                        opt_store(cache_type, opt);
+                        continue;
+                }
+                if ( (opt = opt_read("channel", this_char)) ) {
+                        opt_store(channel, opt);
+                        continue;
+                }
+
+                cache_data_end += 
+                        sprintf(cache_data_end, "%s%s",
+                                cache_data_end != cache_data ? ",":"", 
+                                this_char);
+        }
+
+        return cache_data_end;
+}
+
+static int presto_set_channel(struct presto_cache *cache, char *channel)
+{
+        int minor; 
+
+        ENTRY;
+        if (!channel) {
+                minor = izo_psdev_get_free_channel();
+        } else {
+                minor = simple_strtoul(channel, NULL, 0); 
+        }
+        if (minor < 0 || minor >= MAX_CHANNEL) { 
+                CERROR("all channels in use or channel too large %d\n", 
+                       minor);
+                return -EINVAL;
+        }
+        
+        cache->cache_psdev = &(izo_channels[minor]);
+        list_add(&cache->cache_channel_list, 
+                 &cache->cache_psdev->uc_cache_list); 
+
+        EXIT;
+        return minor;
+}
+
+/* We always need to remove the presto options before passing 
+   mount options to cache FS */
+struct super_block *
+presto_get_sb(struct file_system_type *izo_type, int flags,
+             const char *devname, void *data)
+{
+        struct file_system_type *fstype;
+        struct presto_cache *cache = NULL;
+        char *cache_data = NULL;
+        char *cache_data_end;
+        char *cache_type = NULL;
+        char *fileset = NULL;
+        char *channel = NULL;
+        struct super_block *sb;
+        int err; 
+        unsigned int minor;
+
+        ENTRY;
+
+        /* reserve space for the cache's data */
+        PRESTO_ALLOC(cache_data, PAGE_SIZE);
+        if ( !cache_data ) {
+                CERROR("presto_read_super: Cannot allocate data page.\n");
+                EXIT;
+                goto out_err;
+        }
+
+        /* read and validate options */
+        cache_data_end = presto_options(izo_type, data, cache_data, &cache_type, 
+                                        &fileset, &channel);
+
+        /* was there anything for the cache filesystem in the data? */
+        if (cache_data_end == cache_data) {
+                PRESTO_FREE(cache_data, PAGE_SIZE);
+                cache_data_end = cache_data = NULL;
+        } else {
+                CDEBUG(D_SUPER, "cache_data at %p is: %s\n", cache_data,
+                       cache_data);
+        }
+
+        /* set up the cache */
+        cache = presto_cache_init();
+        if ( !cache ) {
+                CERROR("presto_read_super: failure allocating cache.\n");
+                EXIT;
+                goto out_err;
+        }
+        cache->cache_type = cache_type;
+
+        /* link cache to channel */ 
+        minor = presto_set_channel(cache, channel);
+        if (minor < 0) { 
+                EXIT;
+                goto out_err;
+        }
+
+        CDEBUG(D_SUPER, "Presto: type=%s, fset=%s, dev= %d, flags %x\n",
+               cache_type, fileset?fileset:"NULL", minor, cache->cache_flags);
+
+        /* get the filter for the cache */
+        fstype = get_fs_type(cache_type);
+        cache->cache_filter = filter_get_filter_fs((const char *)cache_type); 
+        if ( !fstype || !cache->cache_filter) {
+                CERROR("Presto: unrecognized fs type or cache type\n");
+                EXIT;
+                goto out_err;
+        }
+
+        sb = fstype->get_sb(fstype, flags, devname, cache_data);
+
+        if ( !sb || IS_ERR(sb)) {
+                CERROR("InterMezzo: cache mount failure.\n");
+                EXIT;
+                goto out_err;
+        }
+
+        /* can we in fact mount the cache */ 
+        if (sb->s_bdev && (strcmp(fstype->name, "vintermezzo") == 0)) {
+                CERROR("vintermezzo must not be used with a  block device\n");
+                EXIT;
+                goto out_err;
+        }
+
+        /* this might have been freed above */
+        if (cache_data) {
+                PRESTO_FREE(cache_data, PAGE_SIZE);
+                cache_data = NULL;
+        }
+
+        cache->cache_sb = sb;
+        cache->cache_root = dget(sb->s_root);
+
+        /* we now know the dev of the cache: hash the cache */
+        presto_cache_add(cache);
+        err = izo_prepare_fileset(sb->s_root, fileset); 
+
+        filter_setup_journal_ops(cache->cache_filter, cache->cache_type); 
+
+        /* make sure we have our own super operations: sb
+           still contains the cache operations */
+        filter_setup_super_ops(cache->cache_filter, sb->s_op, 
+                               &presto_super_ops);
+        sb->s_op = filter_c2usops(cache->cache_filter);
+
+        /* get izo directory operations: sb->s_root->d_inode exists now */
+        filter_setup_dir_ops(cache->cache_filter, sb->s_root->d_inode,
+                             &presto_dir_iops, &presto_dir_fops);
+        filter_setup_dentry_ops(cache->cache_filter, sb->s_root->d_op, 
+                                &presto_dentry_ops);
+        sb->s_root->d_inode->i_op = filter_c2udiops(cache->cache_filter);
+        sb->s_root->d_inode->i_fop = filter_c2udfops(cache->cache_filter);
+        sb->s_root->d_op = filter_c2udops(cache->cache_filter);
+
+        EXIT;
+        return sb;
+
+ out_err:
+        CDEBUG(D_SUPER, "out_err called\n");
+        if (cache)
+                PRESTO_FREE(cache, sizeof(struct presto_cache));
+        if (cache_data)
+                PRESTO_FREE(cache_data, PAGE_SIZE);
+        if (fileset)
+                PRESTO_FREE(fileset, strlen(fileset) + 1);
+        if (channel)
+                PRESTO_FREE(channel, strlen(channel) + 1);
+        if (cache_type)
+                PRESTO_FREE(cache_type, strlen(cache_type) + 1);
+
+        CDEBUG(D_MALLOC, "mount error exit: kmem %ld, vmem %ld\n",
+               presto_kmemory, presto_vmemory);
+        return ERR_PTR(-EINVAL);
+}
+
+
+
+
+#ifdef PRESTO_DEVEL
+static DECLARE_FSTYPE(presto_fs_type, "izo", presto_read_super, FS_REQUIRES_DEV);
+static DECLARE_FSTYPE(vpresto_fs_type, "vintermezzo", presto_read_super, FS_LITTER);
+#else 
+static struct file_system_type vpresto_fs_type = {
+       .owner          = THIS_MODULE,
+       .name           = "vintermezzo",
+       .get_sb         = presto_get_sb,
+       .kill_sb        = kill_litter_super,
+};
+static struct file_system_type presto_fs_type = {
+       .owner          = THIS_MODULE,
+       .name           = "intermezzo",
+       .get_sb         = presto_get_sb,
+       .kill_sb        = kill_block_super,
+       .fs_flags       = FS_REQUIRES_DEV,
+};
+#endif
+
+
+
+int __init init_intermezzo_fs(void)
+{
+        int status;
+
+        printk(KERN_INFO "InterMezzo Kernel/Intersync communications " INTERMEZZO_VERSION
+               " info@clusterfs.com\n");
+
+        status = presto_psdev_init();
+        if ( status ) {
+                CERROR("Problem (%d) in init_intermezzo_psdev\n", status);
+                return status;
+        }
+
+        status = init_intermezzo_sysctl();
+        if (status) {
+                CERROR("presto: failed in init_intermezzo_sysctl!\n");
+        }
+
+        presto_cache_init_hash();
+
+        if (!presto_init_ddata_cache()) {
+                CERROR("presto out of memory!\n");
+                return -ENOMEM;
+        }
+
+        status = register_filesystem(&presto_fs_type);
+        if (status) {
+                CERROR("presto: failed in register_filesystem!\n");
+        }
+        status = register_filesystem(&vpresto_fs_type);
+        if (status) {
+                CERROR("vpresto: failed in register_filesystem!\n");
+        }
+        return status;
+}
+
+void __exit exit_intermezzo_fs(void)
+{
+        int err;
+
+        ENTRY;
+
+        if ( (err = unregister_filesystem(&presto_fs_type)) != 0 ) {
+                CERROR("presto: failed to unregister filesystem\n");
+        }
+        if ( (err = unregister_filesystem(&vpresto_fs_type)) != 0 ) {
+                CERROR("vpresto: failed to unregister filesystem\n");
+        }
+
+        presto_psdev_cleanup();
+        cleanup_intermezzo_sysctl();
+        presto_cleanup_ddata_cache();
+        CERROR("after cleanup: kmem %ld, vmem %ld\n",
+               presto_kmemory, presto_vmemory);
+}
+
+
+MODULE_AUTHOR("Cluster Filesystems Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("InterMezzo Kernel/Intersync communications " INTERMEZZO_VERSION);
+MODULE_LICENSE("GPL");
+
+module_init(init_intermezzo_fs)
+module_exit(exit_intermezzo_fs)
diff --git a/fs/intermezzo/sysctl.c b/fs/intermezzo/sysctl.c
new file mode 100644 (file)
index 0000000..9436adf
--- /dev/null
@@ -0,0 +1,368 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 1999 Peter J. Braam <braam@clusterfs.com>
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  Sysctrl entries for Intermezzo!
+ */
+
+#include <linux/config.h> /* for CONFIG_PROC_FS */
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/stat.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <asm/bitops.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+#include <linux/utsname.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+/* /proc entries */
+
+#ifdef CONFIG_PROC_FS
+struct proc_dir_entry *proc_fs_intermezzo;
+int intermezzo_mount_get_info( char * buffer, char ** start, off_t offset,
+                              int length)
+{
+       int len=0;
+
+       /* this works as long as we are below 1024 characters! */
+       *start = buffer + offset;
+       len -= offset;
+
+       if ( len < 0 )
+               return -EINVAL;
+
+       return len;
+}
+
+#endif
+
+
+/* SYSCTL below */
+
+static struct ctl_table_header *intermezzo_table_header = NULL;
+/* 0x100 to avoid any chance of collisions at any point in the tree with
+ * non-directories
+ */
+#define PSDEV_INTERMEZZO  (0x100)
+
+#define PSDEV_DEBUG       1      /* control debugging */
+#define PSDEV_TRACE       2      /* control enter/leave pattern */
+#define PSDEV_TIMEOUT      3      /* timeout on upcalls to become intrble */
+#define PSDEV_HARD         4      /* mount type "hard" or "soft" */
+#define PSDEV_NO_FILTER    5      /* controls presto_chk */
+#define PSDEV_NO_JOURNAL   6      /* controls presto_chk */
+#define PSDEV_NO_UPCALL    7      /* controls lento_upcall */
+#define PSDEV_ERRORVAL     8      /* controls presto_debug_fail_blkdev */
+#define PSDEV_EXCL_GID     9      /* which GID is ignored by presto */
+#define PSDEV_BYTES_TO_CLOSE 11   /* bytes to write before close */
+
+/* These are global presto control options */
+#define PRESTO_PRIMARY_CTLCNT 2
+static struct ctl_table presto_table[ PRESTO_PRIMARY_CTLCNT + MAX_CHANNEL + 1] =
+{
+       {PSDEV_DEBUG, "debug", &presto_debug, sizeof(int), 0644, NULL, &proc_dointvec},
+       {PSDEV_TRACE, "trace", &presto_print_entry, sizeof(int), 0644, NULL, &proc_dointvec},
+};
+
+/*
+ * Intalling the sysctl entries: strategy
+ * - have templates for each /proc/sys/intermezzo/ entry
+ *   such an entry exists for each /dev/presto
+ *    (proto_channel_entry)
+ * - have a template for the contents of such directories
+ *    (proto_psdev_table)
+ * - have the master table (presto_table)
+ *
+ * When installing, malloc, memcpy and fix up the pointers to point to
+ * the appropriate constants in izo_channels[your_minor]
+ */
+
+static ctl_table proto_psdev_table[] = {
+       {PSDEV_HARD, "hard", 0, sizeof(int), 0644, NULL, &proc_dointvec},
+       {PSDEV_NO_FILTER, "no_filter", 0, sizeof(int), 0644, NULL, &proc_dointvec},
+       {PSDEV_NO_JOURNAL, "no_journal", NULL, sizeof(int), 0644, NULL, &proc_dointvec},
+       {PSDEV_NO_UPCALL, "no_upcall", NULL, sizeof(int), 0644, NULL, &proc_dointvec},
+       {PSDEV_TIMEOUT, "timeout", NULL, sizeof(int), 0644, NULL, &proc_dointvec},
+#ifdef PRESTO_DEBUG
+       {PSDEV_ERRORVAL, "errorval", NULL, sizeof(int), 0644, NULL, &proc_dointvec},
+#endif
+       { 0 }
+};
+
+static ctl_table proto_channel_entry = {
+       PSDEV_INTERMEZZO, 0,  NULL, 0, 0555, 0,
+};
+
+static ctl_table intermezzo_table[2] = {
+       {PSDEV_INTERMEZZO, "intermezzo",    NULL, 0, 0555, presto_table},
+       {0}
+};
+
+/* support for external setting and getting of opts. */
+/* particularly via ioctl. The Right way to do this is via sysctl,
+ * but that will have to wait until intermezzo gets its own nice set of
+ * sysctl IDs
+ */
+/* we made these separate as setting may in future be more restricted
+ * than getting
+ */
+#ifdef RON_MINNICH
+int dosetopt(int minor, struct psdev_opt *opt)
+{
+       int retval = 0;
+       int newval = opt->optval;
+
+       ENTRY;
+
+       switch(opt->optname) {
+
+       case PSDEV_TIMEOUT:
+               izo_channels[minor].uc_timeout = newval;
+               break;
+
+       case PSDEV_HARD:
+               izo_channels[minor].uc_hard = newval;
+               break;
+
+       case PSDEV_NO_FILTER:
+               izo_channels[minor].uc_no_filter = newval;
+               break;
+
+       case PSDEV_NO_JOURNAL:
+               izo_channels[minor].uc_no_journal = newval;
+               break;
+
+       case PSDEV_NO_UPCALL:
+               izo_channels[minor].uc_no_upcall = newval;
+               break;
+
+#ifdef PRESTO_DEBUG
+       case PSDEV_ERRORVAL: {
+               /* If we have a positive arg, set a breakpoint for that
+                * value.  If we have a negative arg, make that device
+                * read-only.  FIXME  It would be much better to only
+                * allow setting the underlying device read-only for the
+                * current presto cache.
+                */
+               int errorval = izo_channels[minor].uc_errorval;
+               if (errorval < 0) {
+                       if (newval == 0)
+                               set_device_ro(-errorval, 0);
+                       else
+                               CERROR("device %s already read only\n",
+                                      kdevname(-errorval));
+               } else {
+                       if (newval < 0)
+                               set_device_ro(-newval, 1);
+                       izo_channels[minor].uc_errorval = newval;
+                       CDEBUG(D_PSDEV, "setting errorval to %d\n", newval);
+               }
+
+               break;
+       }
+#endif
+
+       case PSDEV_TRACE:
+       case PSDEV_DEBUG:
+       case PSDEV_BYTES_TO_CLOSE:
+       default:
+               CDEBUG(D_PSDEV,
+                      "ioctl: dosetopt: minor %d, bad optname 0x%x, \n",
+                      minor, opt->optname);
+
+               retval = -EINVAL;
+       }
+
+       EXIT;
+       return retval;
+}
+
+int dogetopt(int minor, struct psdev_opt *opt)
+{
+       int retval = 0;
+
+       ENTRY;
+
+       switch(opt->optname) {
+
+       case PSDEV_TIMEOUT:
+               opt->optval = izo_channels[minor].uc_timeout;
+               break;
+
+       case PSDEV_HARD:
+               opt->optval = izo_channels[minor].uc_hard;
+               break;
+
+       case PSDEV_NO_FILTER:
+               opt->optval = izo_channels[minor].uc_no_filter;
+               break;
+
+       case PSDEV_NO_JOURNAL:
+               opt->optval = izo_channels[minor].uc_no_journal;
+               break;
+
+       case PSDEV_NO_UPCALL:
+               opt->optval = izo_channels[minor].uc_no_upcall;
+               break;
+
+#ifdef PSDEV_DEBUG
+       case PSDEV_ERRORVAL: {
+               int errorval = izo_channels[minor].uc_errorval;
+               if (errorval < 0 && is_read_only(-errorval))
+                       CERROR("device %s has been set read-only\n",
+                              kdevname(-errorval));
+               opt->optval = izo_channels[minor].uc_errorval;
+               break;
+       }
+#endif
+
+       case PSDEV_TRACE:
+       case PSDEV_DEBUG:
+       case PSDEV_BYTES_TO_CLOSE:
+       default:
+               CDEBUG(D_PSDEV,
+                      "ioctl: dogetopt: minor %d, bad optval 0x%x, \n",
+                      minor, opt->optname);
+
+               retval = -EINVAL;
+       }
+
+       EXIT;
+       return retval;
+}
+#endif
+
+
+/* allocate the tables for the presto devices. We need
+ * sizeof(proto_channel_table)/sizeof(proto_channel_table[0])
+ * entries for each dev
+ */
+int /* __init */ init_intermezzo_sysctl(void)
+{
+       int i;
+       int total_dev = MAX_CHANNEL;
+       int entries_per_dev = sizeof(proto_psdev_table) /
+               sizeof(proto_psdev_table[0]);
+       int total_entries = entries_per_dev * total_dev;
+       ctl_table *dev_ctl_table;
+
+       PRESTO_ALLOC(dev_ctl_table, sizeof(ctl_table) * total_entries);
+
+       if (! dev_ctl_table) {
+               CERROR("WARNING: presto couldn't allocate dev_ctl_table\n");
+               EXIT;
+               return -ENOMEM;
+       }
+
+       /* now fill in the entries ... we put the individual presto<x>
+        * entries at the end of the table, and the per-presto stuff
+        * starting at the front.  We assume that the compiler makes
+        * this code more efficient, but really, who cares ... it
+        * happens once per reboot.
+        */
+       for(i = 0; i < total_dev; i++) {
+               void *p;
+
+               /* entry for this /proc/sys/intermezzo/intermezzo"i" */
+               ctl_table *psdev = &presto_table[i + PRESTO_PRIMARY_CTLCNT];
+               /* entries for the individual "files" in this "directory" */
+               ctl_table *psdev_entries = &dev_ctl_table[i * entries_per_dev];
+               /* init the psdev and psdev_entries with the prototypes */
+               *psdev = proto_channel_entry;
+               memcpy(psdev_entries, proto_psdev_table,
+                      sizeof(proto_psdev_table));
+               /* now specialize them ... */
+               /* the psdev has to point to psdev_entries, and fix the number */
+               psdev->ctl_name = psdev->ctl_name + i + 1; /* sorry */
+
+               PRESTO_ALLOC(p, PROCNAME_SIZE);
+               psdev->procname = p;
+               if (!psdev->procname) {
+                       PRESTO_FREE(dev_ctl_table,
+                                   sizeof(ctl_table) * total_entries);
+                       return -ENOMEM;
+               }
+               sprintf((char *) psdev->procname, "intermezzo%d", i);
+               /* hook presto into */
+               psdev->child = psdev_entries;
+
+               /* now for each psdev entry ... */
+               psdev_entries[0].data = &(izo_channels[i].uc_hard);
+               psdev_entries[1].data = &(izo_channels[i].uc_no_filter);
+               psdev_entries[2].data = &(izo_channels[i].uc_no_journal);
+               psdev_entries[3].data = &(izo_channels[i].uc_no_upcall);
+               psdev_entries[4].data = &(izo_channels[i].uc_timeout);
+#ifdef PRESTO_DEBUG
+               psdev_entries[5].data = &(izo_channels[i].uc_errorval);
+#endif
+       }
+
+
+#ifdef CONFIG_SYSCTL
+       if ( !intermezzo_table_header )
+               intermezzo_table_header =
+                       register_sysctl_table(intermezzo_table, 0);
+#endif
+#ifdef CONFIG_PROC_FS
+       proc_fs_intermezzo = proc_mkdir("intermezzo", proc_root_fs);
+       proc_fs_intermezzo->owner = THIS_MODULE;
+       create_proc_info_entry("mounts", 0, proc_fs_intermezzo, 
+                              intermezzo_mount_get_info);
+#endif
+       return 0;
+}
+
+void cleanup_intermezzo_sysctl(void)
+{
+       int total_dev = MAX_CHANNEL;
+       int entries_per_dev = sizeof(proto_psdev_table) /
+               sizeof(proto_psdev_table[0]);
+       int total_entries = entries_per_dev * total_dev;
+       int i;
+
+#ifdef CONFIG_SYSCTL
+       if ( intermezzo_table_header )
+               unregister_sysctl_table(intermezzo_table_header);
+       intermezzo_table_header = NULL;
+#endif
+       for(i = 0; i < total_dev; i++) {
+               /* entry for this /proc/sys/intermezzo/intermezzo"i" */
+               ctl_table *psdev = &presto_table[i + PRESTO_PRIMARY_CTLCNT];
+               PRESTO_FREE(psdev->procname, PROCNAME_SIZE);
+       }
+       /* presto_table[PRESTO_PRIMARY_CTLCNT].child points to the
+        * dev_ctl_table previously allocated in init_intermezzo_psdev()
+        */
+       PRESTO_FREE(presto_table[PRESTO_PRIMARY_CTLCNT].child, sizeof(ctl_table) * total_entries);
+
+#ifdef CONFIG_PROC_FS
+       remove_proc_entry("mounts", proc_fs_intermezzo);
+       remove_proc_entry("intermezzo", proc_root_fs);
+#endif
+}
+
diff --git a/fs/intermezzo/upcall.c b/fs/intermezzo/upcall.c
new file mode 100644 (file)
index 0000000..8019157
--- /dev/null
@@ -0,0 +1,559 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc. <braam@clusterfs.com>
+ * Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Mostly platform independent upcall operations to a cache manager:
+ *  -- upcalls
+ *  -- upcall routines
+ *
+ */
+
+#include <asm/system.h>
+#include <asm/segment.h>
+#include <asm/signal.h>
+#include <linux/signal.h>
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <asm/uaccess.h>
+
+#include "intermezzo_lib.h"
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+#include "intermezzo_idl.h"
+
+/*
+  At present:
+  -- Asynchronous calls:
+   - kml:            give a "more" kml indication to userland
+   - kml_truncate:   initiate KML truncation
+   - release_permit: kernel is done with permit
+  -- Synchronous
+   - open:           fetch file
+   - permit:         get a permit
+
+  Errors returned by user level code are positive
+
+ */
+
+static struct izo_upcall_hdr *upc_pack(__u32 opcode, int pathlen, char *path,
+                                       char *fsetname, int reclen, char *rec,
+                                       int *size)
+{
+        struct izo_upcall_hdr *hdr;
+        char *ptr;
+        ENTRY;
+
+        *size = sizeof(struct izo_upcall_hdr);
+        if ( fsetname ) {
+                *size += round_strlen(fsetname);
+        }
+        if ( path ) { 
+                *size += round_strlen(path);
+        }
+        if ( rec ) { 
+                *size += size_round(reclen);
+        }
+        PRESTO_ALLOC(hdr, *size);
+        if (!hdr) { 
+                CERROR("intermezzo upcall: out of memory (opc %d)\n", opcode);
+                EXIT;
+                return NULL;
+        }
+        memset(hdr, 0, *size);
+
+        ptr = (char *)hdr + sizeof(*hdr);
+
+        /* XXX do we need fsuid ? */
+        hdr->u_len = *size;
+        hdr->u_version = IZO_UPC_VERSION;
+        hdr->u_opc = opcode;
+        hdr->u_pid = current->pid;
+        hdr->u_uid = current->fsuid;
+
+        if (path) { 
+                /*XXX Robert: please review what len to pass in for 
+                  NUL terminated strings */
+                hdr->u_pathlen = strlen(path);
+                LOGL0(path, hdr->u_pathlen, ptr);
+        }
+        if (fsetname) { 
+                hdr->u_fsetlen = strlen(fsetname);
+                LOGL0(fsetname, strlen(fsetname), ptr);
+        }
+        if (rec) { 
+                hdr->u_reclen = reclen;
+                LOGL(rec, reclen, ptr);
+        }
+        
+        EXIT;
+        return hdr;
+}
+
+/* the upcalls */
+int izo_upc_kml(int minor, __u64 offset, __u32 first_recno, __u64 length, __u32 last_recno, char *fsetname)
+{
+        int size;
+        int error;
+        struct izo_upcall_hdr *hdr;
+
+        ENTRY;
+        if (!presto_lento_up(minor)) {
+                EXIT;
+                return 0;
+        }
+
+        hdr = upc_pack(IZO_UPC_KML, 0, NULL, fsetname, 0, NULL, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                EXIT;
+                return -PTR_ERR(hdr);
+        }
+
+        hdr->u_offset = offset;
+        hdr->u_first_recno = first_recno;
+        hdr->u_length = length;
+        hdr->u_last_recno = last_recno;
+
+        CDEBUG(D_UPCALL, "KML: fileset %s, offset %Lu, length %Lu, "
+               "first %u, last %d; minor %d\n",
+               fsetname,
+               (unsigned long long) hdr->u_offset,
+               (unsigned long long) hdr->u_length,
+               hdr->u_first_recno,
+               hdr->u_last_recno, minor);
+
+        error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS);
+
+        EXIT;
+        return -error;
+}
+
+int izo_upc_kml_truncate(int minor, __u64 length, __u32 last_recno, char *fsetname)
+{
+        int size;
+        int error;
+        struct izo_upcall_hdr *hdr;
+
+        ENTRY;
+        if (!presto_lento_up(minor)) {
+                EXIT;
+                return 0;
+        }
+
+        hdr = upc_pack(IZO_UPC_KML_TRUNC, 0, NULL, fsetname, 0, NULL, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                EXIT;
+                return -PTR_ERR(hdr);
+        }
+
+        hdr->u_length = length;
+        hdr->u_last_recno = last_recno;
+
+        CDEBUG(D_UPCALL, "KML TRUNCATE: fileset %s, length %Lu, "
+               "last recno %d, minor %d\n",
+               fsetname,
+               (unsigned long long) hdr->u_length,
+               hdr->u_last_recno, minor);
+
+        error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS);
+
+        EXIT;
+        return error;
+}
+
+int izo_upc_open(int minor, __u32 pathlen, char *path, char *fsetname, struct lento_vfs_context *info)
+{
+        int size;
+        int error;
+        struct izo_upcall_hdr *hdr;
+        ENTRY;
+
+        if (!presto_lento_up(minor)) {
+                EXIT;
+                return -EIO;
+        }
+
+        hdr = upc_pack(IZO_UPC_OPEN, pathlen, path, fsetname, 
+                       sizeof(*info), (char*)info, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                EXIT;
+                return -PTR_ERR(hdr);
+        }
+
+        CDEBUG(D_UPCALL, "path %s\n", path);
+
+        error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
+        if (error)
+                CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
+
+        EXIT;
+        return -error;
+}
+
+int izo_upc_get_fileid(int minor, __u32 reclen, char *rec, 
+                       __u32 pathlen, char *path, char *fsetname)
+{
+        int size;
+        int error;
+        struct izo_upcall_hdr *hdr;
+        ENTRY;
+
+        if (!presto_lento_up(minor)) {
+                EXIT;
+                return -EIO;
+        }
+
+        hdr = upc_pack(IZO_UPC_GET_FILEID, pathlen, path, fsetname, reclen, rec, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                EXIT;
+                return -PTR_ERR(hdr);
+        }
+
+        CDEBUG(D_UPCALL, "path %s\n", path);
+
+        error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
+        if (error)
+                CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
+
+        EXIT;
+        return -error;
+}
+
+int izo_upc_backfetch(int minor, char *path, char *fsetname, struct lento_vfs_context *info)
+{
+        int size;
+        int error;
+        struct izo_upcall_hdr *hdr;
+        ENTRY;
+
+        if (!presto_lento_up(minor)) {
+                EXIT;
+                return -EIO;
+        }
+
+        hdr = upc_pack(IZO_UPC_BACKFETCH, strlen(path), path, fsetname, 
+                       sizeof(*info), (char *)info, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                EXIT;
+                return -PTR_ERR(hdr);
+        }
+
+        /* This is currently synchronous, kml_reint_record blocks */
+        error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
+        if (error)
+                CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
+
+        EXIT;
+        return -error;
+}
+
+int izo_upc_permit(int minor, struct dentry *dentry, __u32 pathlen, char *path,
+                   char *fsetname)
+{
+        int size;
+        int error;
+        struct izo_upcall_hdr *hdr;
+
+        ENTRY;
+
+        hdr = upc_pack(IZO_UPC_PERMIT, pathlen, path, fsetname, 0, NULL, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                EXIT;
+                return -PTR_ERR(hdr);
+        }
+
+        CDEBUG(D_UPCALL, "Permit minor %d path %s\n", minor, path);
+
+        error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
+
+        if (error == -EROFS) {
+                int err;
+                CERROR("InterMezzo: ERROR - requested permit for read-only "
+                       "fileset.\n   Setting \"%s\" read-only!\n", path);
+                err = izo_mark_cache(dentry, 0xFFFFFFFF, CACHE_CLIENT_RO, NULL);
+                if (err)
+                        CERROR("InterMezzo ERROR: mark_cache %d\n", err);
+        } else if (error) {
+                CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
+        }
+
+        EXIT;
+        return error;
+}
+
+/* This is a ping-pong upcall handled on the server when a client (uuid)
+ * requests the permit for itself. */
+int izo_upc_revoke_permit(int minor, char *fsetname, __u8 uuid[16])
+{
+        int size;
+        int error;
+        struct izo_upcall_hdr *hdr;
+
+        ENTRY;
+
+        hdr = upc_pack(IZO_UPC_REVOKE_PERMIT, 0, NULL, fsetname, 0, NULL, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                EXIT;
+                return -PTR_ERR(hdr);
+        }
+
+        memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid));
+
+        error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
+
+        if (error)
+                CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
+
+        EXIT;
+        return -error;
+}
+
+int izo_upc_go_fetch_kml(int minor, char *fsetname, __u8 uuid[16],
+                         __u64 kmlsize)
+{
+        int size;
+        int error;
+        struct izo_upcall_hdr *hdr;
+        ENTRY;
+
+        if (!presto_lento_up(minor)) {
+                EXIT;
+                return -EIO;
+        }
+
+        hdr = upc_pack(IZO_UPC_GO_FETCH_KML, 0, NULL, fsetname, 0, NULL, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                EXIT;
+                return -PTR_ERR(hdr);
+        }
+
+        hdr->u_offset = kmlsize;
+        memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid));
+
+        error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS);
+        if (error)
+                CERROR("%s: error %d\n", __FUNCTION__, error);
+
+        EXIT;
+        return -error;
+}
+
+int izo_upc_connect(int minor, __u64 ip_address, __u64 port, __u8 uuid[16],
+                    int client_flag)
+{
+        int size;
+        int error;
+        struct izo_upcall_hdr *hdr;
+        ENTRY;
+
+        if (!presto_lento_up(minor)) {
+                EXIT;
+                return -EIO;
+        }
+
+        hdr = upc_pack(IZO_UPC_CONNECT, 0, NULL, NULL, 0, NULL, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                EXIT;
+                return -PTR_ERR(hdr);
+        }
+
+        hdr->u_offset = ip_address;
+        hdr->u_length = port;
+        memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid));
+        hdr->u_first_recno = client_flag;
+
+        error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
+        if (error) {
+                CERROR("%s: error %d\n", __FUNCTION__, error);
+        }
+
+        EXIT;
+        return -error;
+}
+
+int izo_upc_set_kmlsize(int minor, char *fsetname, __u8 uuid[16], __u64 kmlsize)
+{
+        int size;
+        int error;
+        struct izo_upcall_hdr *hdr;
+        ENTRY;
+
+        if (!presto_lento_up(minor)) {
+                EXIT;
+                return -EIO;
+        }
+
+        hdr = upc_pack(IZO_UPC_SET_KMLSIZE, 0, NULL, fsetname, 0, NULL, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                EXIT;
+                return -PTR_ERR(hdr);
+        }
+
+        memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid));
+        hdr->u_length = kmlsize;
+
+        error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
+        if (error)
+                CERROR("%s: error %d\n", __FUNCTION__, error);
+
+        EXIT;
+        return -error;
+}
+
+int izo_upc_repstatus(int minor,  char * fsetname, struct izo_rcvd_rec *lr_server)
+{
+        int size;
+        int error;
+        struct izo_upcall_hdr *hdr;
+        ENTRY;
+
+        if (!presto_lento_up(minor)) {
+                EXIT;
+                return -EIO;
+        }
+
+        hdr = upc_pack(IZO_UPC_REPSTATUS, 0, NULL, fsetname, 
+                       sizeof(*lr_server), (char*)lr_server, 
+                       &size);
+        if (!hdr || IS_ERR(hdr)) {
+                EXIT;
+                return -PTR_ERR(hdr);
+        }
+
+        error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
+        if (error)
+                CERROR("%s: error %d\n", __FUNCTION__, error);
+
+        EXIT;
+        return -error;
+}
+
+
+#if 0
+int izo_upc_client_make_branch(int minor, char *fsetname, char *tagname,
+                               char *branchname)
+{
+        int size, error;
+        struct izo_upcall_hdr *hdr;
+        int pathlen;
+        char *path;
+        ENTRY;
+
+        hdr = upc_pack(IZO_UPC_CLIENT_MAKE_BRANCH, strlen(tagname), tagname,
+                       fsetname, strlen(branchname) + 1, branchname, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                error = -PTR_ERR(hdr);
+                goto error;
+        }
+
+        error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
+        if (error)
+                CERROR("InterMezzo: error %d\n", error);
+
+ error:
+        PRESTO_FREE(path, pathlen);
+        EXIT;
+        return error;
+}
+#endif
+
+int izo_upc_server_make_branch(int minor, char *fsetname)
+{
+        int size, error;
+        struct izo_upcall_hdr *hdr;
+        ENTRY;
+
+        hdr = upc_pack(IZO_UPC_SERVER_MAKE_BRANCH, 0, NULL, fsetname, 0, NULL, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                error = -PTR_ERR(hdr);
+                goto error;
+        }
+
+        error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
+        if (error)
+                CERROR("InterMezzo: error %d\n", error);
+
+ error:
+        EXIT;
+        return -error;
+}
+
+int izo_upc_branch_undo(int minor, char *fsetname, char *branchname)
+{
+        int size;
+        int error;
+        struct izo_upcall_hdr *hdr;
+        ENTRY;
+
+        if (!presto_lento_up(minor)) {
+                EXIT;
+                return -EIO;
+        }
+
+        hdr = upc_pack(IZO_UPC_BRANCH_UNDO, strlen(branchname), branchname,
+                       fsetname, 0, NULL, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                EXIT;
+                return -PTR_ERR(hdr);
+        }
+
+        error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
+        if (error)
+                CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
+
+        EXIT;
+        return -error;
+}
+
+int izo_upc_branch_redo(int minor, char *fsetname, char *branchname)
+{
+        int size;
+        int error;
+        struct izo_upcall_hdr *hdr;
+        ENTRY;
+
+        if (!presto_lento_up(minor)) {
+                EXIT;
+                return -EIO;
+        }
+
+        hdr = upc_pack(IZO_UPC_BRANCH_REDO, strlen(branchname) + 1, branchname,
+                       fsetname, 0, NULL, &size);
+        if (!hdr || IS_ERR(hdr)) {
+                EXIT;
+                return -PTR_ERR(hdr);
+        }
+
+        error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
+        if (error)
+                CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
+
+        EXIT;
+        return -error;
+}
diff --git a/fs/intermezzo/vfs.c b/fs/intermezzo/vfs.c
new file mode 100644 (file)
index 0000000..84b5882
--- /dev/null
@@ -0,0 +1,2416 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *  Copyright (C) 2000 Stelias Computing, Inc.
+ *  Copyright (C) 2000 Red Hat, Inc.
+ *
+ *   This file is part of InterMezzo, http://www.inter-mezzo.org.
+ *
+ *   InterMezzo is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   InterMezzo is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with InterMezzo; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * vfs.c
+ *
+ * This file implements kernel downcalls from lento.
+ *
+ * Author: Rob Simmonds <simmonds@stelias.com>
+ *         Andreas Dilger <adilger@stelias.com>
+ * Copyright (C) 2000 Stelias Computing Inc
+ * Copyright (C) 2000 Red Hat Inc.
+ *
+ * Extended attribute support
+ * Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc.
+ *
+ * This code is based on code from namei.c in the linux file system;
+ * see copyright notice below.
+ */
+
+/** namei.c copyright **/
+
+/*
+ *  linux/fs/namei.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+/*
+ * Some corrections by tytso.
+ */
+
+/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
+ * lookup logic.
+ */
+
+/** end of namei.c copyright **/
+
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/quotaops.h>
+
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+#include <asm/semaphore.h>
+#include <asm/pgtable.h>
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/genhd.h>
+
+#include "intermezzo_fs.h"
+#include "intermezzo_psdev.h"
+
+#ifdef CONFIG_FS_EXT_ATTR
+# include <linux/ext_attr.h>
+
+# if 0 /* was a broken check for Posix ACLs */
+#  include <linux/posix_acl.h>
+# endif
+#endif
+
+extern struct inode_operations presto_sym_iops;
+
+/* Write the last_rcvd values to the last_rcvd file.  We don't know what the
+ * UUID or last_ctime values are, so we have to read from the file first
+ * (sigh). 
+ * exported for branch_reinter in kml_reint.c*/
+int presto_write_last_rcvd(struct rec_info *recinfo,
+                           struct presto_file_set *fset,
+                           struct lento_vfs_context *info)
+{
+        int rc;
+        struct izo_rcvd_rec rcvd_rec;
+
+        ENTRY;
+
+        memset(&rcvd_rec, 0, sizeof(rcvd_rec));
+        memcpy(rcvd_rec.lr_uuid, info->uuid, sizeof(rcvd_rec.lr_uuid));
+        rcvd_rec.lr_remote_recno = HTON__u64(info->recno);
+        rcvd_rec.lr_remote_offset = HTON__u64(info->kml_offset);
+        rcvd_rec.lr_local_recno = HTON__u64(recinfo->recno);
+        rcvd_rec.lr_local_offset = HTON__u64(recinfo->offset + recinfo->size);
+
+        rc = izo_rcvd_write(fset, &rcvd_rec);
+        if (rc < 0) {
+                /* izo_rcvd_write returns negative errors and non-negative
+                 * offsets */
+                CERROR("InterMezzo: izo_rcvd_write failed: %d\n", rc);
+                EXIT;
+                return rc;
+        }
+        EXIT;
+        return 0;
+}
+
+/*
+ * It's inline, so penalty for filesystems that don't use sticky bit is
+ * minimal.
+ */
+static inline int check_sticky(struct inode *dir, struct inode *inode)
+{
+        if (!(dir->i_mode & S_ISVTX))
+                return 0;
+        if (inode->i_uid == current->fsuid)
+                return 0;
+        if (dir->i_uid == current->fsuid)
+                return 0;
+        return !capable(CAP_FOWNER);
+}
+
+/* from linux/fs/namei.c */
+static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
+{
+        int error;
+        if (!victim->d_inode || victim->d_parent->d_inode != dir)
+                return -ENOENT;
+        error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
+        if (error)
+                return error;
+        if (IS_APPEND(dir))
+                return -EPERM;
+        if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
+            IS_IMMUTABLE(victim->d_inode))
+                return -EPERM;
+        if (isdir) {
+                if (!S_ISDIR(victim->d_inode->i_mode))
+                        return -ENOTDIR;
+                if (IS_ROOT(victim))
+                        return -EBUSY;
+        } else if (S_ISDIR(victim->d_inode->i_mode))
+                return -EISDIR;
+        return 0;
+}
+
+/* from linux/fs/namei.c */
+static inline int may_create(struct inode *dir, struct dentry *child) {
+        if (child->d_inode)
+                return -EEXIST;
+        if (IS_DEADDIR(dir))
+                return -ENOENT;
+        return permission(dir,MAY_WRITE | MAY_EXEC, NULL);
+}
+
+#ifdef PRESTO_DEBUG
+/* The loop_discard_io() function is available via a kernel patch to the
+ * loop block device.  It "works" by accepting writes, but throwing them
+ * away, rather than trying to write them to disk.  The old method worked
+ * by setting the underlying device read-only, but that has the problem
+ * that dirty buffers are kept in memory, and ext3 didn't like that at all.
+ */
+#ifdef CONFIG_LOOP_DISCARD
+#define BLKDEV_FAIL(dev,fail) loop_discard_io(dev,fail)
+#else
+#define BLKDEV_FAIL(dev,fail) set_device_ro(dev, 1)
+#endif
+
+/* If a breakpoint has been set via /proc/sys/intermezzo/intermezzoX/errorval,
+ * that is the same as "value", the underlying device will "fail" now.
+ */
+inline void presto_debug_fail_blkdev(struct presto_file_set *fset,
+                                     unsigned long value)
+{
+        int minor = presto_f2m(fset);
+        int errorval = izo_channels[minor].uc_errorval;
+       struct block_device *bdev = fset->fset_dentry->d_inode->i_sb->s_bdev;
+       char b[BDEVNAME_SIZE];
+
+        if (errorval && errorval == (long)value && !bdev_read_only(bdev)) {
+                CDEBUG(D_SUPER, "setting device %s read only\n",
+                               bdevname(bdev, b));
+                BLKDEV_FAIL(bdev, 1);
+                izo_channels[minor].uc_errorval = -bdev->bd_dev;
+        }
+}
+#else
+#define presto_debug_fail_blkdev(dev,value) do {} while (0)
+#endif
+
+
+static inline int presto_do_kml(struct lento_vfs_context *info,
+                                struct dentry *dentry)
+{
+        if ( ! (info->flags & LENTO_FL_KML) )
+                return 0;
+        if ( presto_chk(dentry, PRESTO_DONT_JOURNAL) )
+                return 0;
+        return 1;
+}
+
+static inline int presto_do_rcvd(struct lento_vfs_context *info,
+                                 struct dentry *dentry)
+{
+        if ( ! (info->flags & LENTO_FL_EXPECT) ) 
+                return 0;
+        if ( presto_chk(dentry, PRESTO_DONT_JOURNAL) )
+                return 0;
+        return 1;
+}
+
+
+/* XXX fixme: this should not fail, all these dentries are in memory
+   when _we_ call this */
+int presto_settime(struct presto_file_set *fset, 
+                   struct dentry *newobj,
+                   struct dentry *parent,
+                   struct dentry *target,
+                   struct lento_vfs_context *ctx, 
+                   int valid)
+{
+        int error = 0;
+        struct dentry *dentry;
+        struct inode *inode;
+        struct inode_operations *iops;
+        struct iattr iattr;
+
+        ENTRY;
+        if (ctx->flags &  LENTO_FL_IGNORE_TIME ) { 
+                EXIT;
+                return 0;
+        }
+
+        iattr.ia_ctime = ctx->updated_time;
+        iattr.ia_mtime = ctx->updated_time;
+        iattr.ia_valid = valid;
+
+        while (1) {
+                if (parent && ctx->flags & LENTO_FL_TOUCH_PARENT) {
+                        dentry = parent;
+                        parent = NULL;
+                } else if (newobj && ctx->flags & LENTO_FL_TOUCH_NEWOBJ) {
+                        dentry = newobj;
+                        newobj = NULL;
+                } else if (target) {
+                        dentry = target;
+                        target = NULL;
+                } else
+                        break;
+
+                inode = dentry->d_inode;
+
+                error = -EROFS;
+                if (IS_RDONLY(inode)) {
+                        EXIT;
+                        return -EROFS;
+                }
+
+                if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
+                        EXIT;
+                        return -EPERM;
+                }
+
+                error = -EPERM;
+                iops = filter_c2cdiops(fset->fset_cache->cache_filter); 
+                if (!iops) { 
+                        EXIT;
+                        return error;
+                }
+
+                if (iops->setattr != NULL)
+                        error = iops->setattr(dentry, &iattr);
+                else {
+                        error = 0;
+                        inode_setattr(dentry->d_inode, &iattr);
+                }
+        }
+        EXIT;
+        return error;
+}
+
+void izo_get_rollback_data(struct inode *inode, struct izo_rollback_data *rb)
+{
+        rb->rb_mode = (__u32)inode->i_mode;
+        rb->rb_rdev = (__u32)old_encode_dev(inode->i_rdev);
+        rb->rb_uid  = (__u64)inode->i_uid;
+        rb->rb_gid  = (__u64)inode->i_gid;
+}
+
+
+int presto_do_close(struct presto_file_set *fset, struct file *file)
+{
+        struct rec_info rec;
+        int rc = -ENOSPC; 
+        void *handle;
+        struct inode *inode = file->f_dentry->d_inode;
+        struct presto_file_data *fdata = 
+                (struct presto_file_data *)file->private_data;
+
+        ENTRY;
+        presto_getversion(&fdata->fd_info.remote_version, inode);
+
+        rc = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); 
+        if (rc) { 
+                EXIT;
+                return rc;
+        }
+
+        handle = presto_trans_start(fset, file->f_dentry->d_inode, 
+                                            KML_OPCODE_RELEASE);
+        if ( IS_ERR(handle) ) {
+                CERROR("presto_release: no space for transaction\n");
+                return rc;
+        }
+
+        if (fdata->fd_info.flags & LENTO_FL_KML) 
+                rc = presto_journal_close(&rec, fset, fdata, file->f_dentry,
+                                          &fdata->fd_version, 
+                                          &fdata->fd_info.remote_version);
+        if (rc) { 
+                CERROR("presto_close: cannot journal close\n");
+                goto out;
+        }
+
+        if (fdata->fd_info.flags & LENTO_FL_EXPECT) 
+                rc = presto_write_last_rcvd(&rec, fset, &fdata->fd_info);
+
+        if (rc) { 
+                CERROR("presto_close: cannot journal last_rcvd\n");
+                goto out;
+        }
+        presto_trans_commit(fset, handle); 
+        
+        /* cancel the LML record */ 
+        handle = presto_trans_start(fset, inode, KML_OPCODE_WRITE);
+        if ( IS_ERR(handle) ) {
+                CERROR("presto_release: no space for clear\n");
+                return -ENOSPC;
+        }
+
+        rc = presto_clear_lml_close(fset, fdata->fd_lml_offset); 
+        if (rc < 0 ) { 
+                CERROR("presto_close: cannot journal close\n");
+                goto out;
+        }
+        presto_truncate_lml(fset);
+
+ out:
+        presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
+        presto_trans_commit(fset, handle); 
+        EXIT;
+        return rc;
+}
+
+int presto_do_setattr(struct presto_file_set *fset, struct dentry *dentry,
+                      struct iattr *iattr, struct lento_vfs_context *info)
+{
+        struct rec_info rec;
+        struct inode *inode = dentry->d_inode;
+        struct inode_operations *iops;
+        int error;
+        struct presto_version old_ver, new_ver;
+        struct izo_rollback_data rb;
+        void *handle;
+        loff_t old_size=inode->i_size;
+
+        ENTRY;
+        error = -EROFS;
+        if (IS_RDONLY(inode)) {
+                EXIT;
+                return -EROFS;
+        }
+
+        if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
+                EXIT;
+                return -EPERM;
+        }
+
+        presto_getversion(&old_ver, dentry->d_inode);
+        izo_get_rollback_data(dentry->d_inode, &rb);
+        error = -EPERM;
+        iops = filter_c2cdiops(fset->fset_cache->cache_filter); 
+
+        error = presto_reserve_space(fset->fset_cache, 2*PRESTO_REQHIGH); 
+        if (error) {
+                EXIT;
+                return error;
+        }
+
+        if  (iattr->ia_valid & ATTR_SIZE) {
+                if (izo_mark_dentry(dentry, ~PRESTO_DATA, 0, NULL) != 0)
+                        CERROR("izo_mark_dentry(inode %ld, ~PRESTO_DATA) "
+                               "failed\n", dentry->d_inode->i_ino);
+                handle = presto_trans_start(fset, dentry->d_inode,
+                                            KML_OPCODE_TRUNC);
+        } else {
+                handle = presto_trans_start(fset, dentry->d_inode,
+                                            KML_OPCODE_SETATTR);
+        }
+
+        if ( IS_ERR(handle) ) {
+                CERROR("presto_do_setattr: no space for transaction\n");
+                presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH); 
+                return -ENOSPC;
+        }
+
+        if (dentry->d_inode && iops && iops->setattr) {
+                error = iops->setattr(dentry, iattr);
+        } else {
+                error = inode_change_ok(dentry->d_inode, iattr);
+                if (!error) 
+                        inode_setattr(inode, iattr);
+        }
+
+        if (!error && (iattr->ia_valid & ATTR_SIZE))
+                vmtruncate(inode, iattr->ia_size);
+
+        if (error) {
+                EXIT;
+                goto exit;
+        }
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x10);
+
+        if ( presto_do_kml(info, dentry) ) {
+                if ((iattr->ia_valid & ATTR_SIZE) && (old_size != inode->i_size)) {
+                        /* Journal a close whenever we see a potential truncate
+                        * At the receiving end, lento should explicitly remove
+                        * ATTR_SIZE from the list of valid attributes */
+                        presto_getversion(&new_ver, inode);
+                        error = presto_journal_close(&rec, fset, NULL, dentry,
+                                                     &old_ver, &new_ver);
+                }
+
+                if (!error)
+                        error = presto_journal_setattr(&rec, fset, dentry,
+                                                       &old_ver, &rb, iattr);
+        }
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x20);
+        if ( presto_do_rcvd(info, dentry) )
+                error = presto_write_last_rcvd(&rec, fset, info);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x30);
+
+        EXIT;
+exit:
+        presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH); 
+        presto_trans_commit(fset, handle);
+        return error;
+}
+
+int lento_setattr(const char *name, struct iattr *iattr,
+                  struct lento_vfs_context *info)
+{
+        struct nameidata nd;
+        struct dentry *dentry;
+        struct presto_file_set *fset;
+        int error;
+#if 0 /* was a broken check for Posix ACLs */
+        int (*set_posix_acl)(struct inode *, int type, posix_acl_t *)=NULL;
+#endif
+
+        ENTRY;
+        CDEBUG(D_PIOCTL,"name %s, valid %#x, mode %#o, uid %d, gid %d, size %Ld\n",
+               name, iattr->ia_valid, iattr->ia_mode, iattr->ia_uid,
+               iattr->ia_gid, iattr->ia_size);
+        CDEBUG(D_PIOCTL, "atime %#lx, mtime %#lx, ctime %#lx, attr_flags %#x\n",
+               iattr->ia_atime.tv_sec, iattr->ia_mtime.tv_sec, iattr->ia_ctime.tv_sec,
+               iattr->ia_attr_flags);
+        CDEBUG(D_PIOCTL, "offset %d, recno %d, flags %#x\n",
+               info->slot_offset, info->recno, info->flags);
+
+        lock_kernel();
+        error = presto_walk(name, &nd);
+        if (error) {
+                EXIT;
+                goto exit;
+        }
+        dentry = nd.dentry;
+        
+        fset = presto_fset(dentry);
+        error = -EINVAL;
+        if ( !fset ) {
+                CERROR("No fileset!\n");
+                EXIT;
+                goto exit_lock;
+        }
+
+        /* NOTE: this prevents us from changing the filetype on setattr,
+         *       as we normally only want to change permission bits.
+         *       If this is not correct, then we need to fix the perl code
+         *       to always send the file type OR'ed with the permission.
+         */
+        if (iattr->ia_valid & ATTR_MODE) {
+                int set_mode = iattr->ia_mode;
+                iattr->ia_mode = (iattr->ia_mode & S_IALLUGO) |
+                                 (dentry->d_inode->i_mode & ~S_IALLUGO);
+                CDEBUG(D_PIOCTL, "chmod: orig %#o, set %#o, result %#o\n",
+                       dentry->d_inode->i_mode, set_mode, iattr->ia_mode);
+#if 0 /* was a broken check for Posix ACLs */
+                /* ACl code interacts badly with setattr 
+                 * since it tries to modify the ACL using 
+                 * set_ext_attr which recurses back into presto.  
+                 * This only happens if ATTR_MODE is set.
+                 * Here we are doing a "forced" mode set 
+                 * (initiated by lento), so we disable the 
+                 * set_posix_acl operation which 
+                 * prevents such recursion.  -SHP
+                 *
+                 * This will probably still be required when native
+                 * acl journalling is in place.
+                 */
+                set_posix_acl=dentry->d_inode->i_op->set_posix_acl;
+                dentry->d_inode->i_op->set_posix_acl=NULL;
+#endif
+        }
+
+        error = presto_do_setattr(fset, dentry, iattr, info);
+
+        if (info->flags & LENTO_FL_SET_DDFILEID) {
+                struct presto_dentry_data *dd = presto_d2d(dentry);
+                if (dd) {
+                        dd->remote_ino = info->remote_ino;
+                        dd->remote_generation = info->remote_generation;
+                }
+        }
+
+#if 0 /* was a broken check for Posix ACLs */
+        /* restore the inode_operations if we changed them*/
+        if (iattr->ia_valid & ATTR_MODE) 
+                dentry->d_inode->i_op->set_posix_acl=set_posix_acl;
+#endif
+
+
+        EXIT;
+exit_lock:
+        path_release(&nd);
+exit:
+        unlock_kernel();
+        return error;
+}
+
+int presto_do_create(struct presto_file_set *fset, struct dentry *dir,
+                     struct dentry *dentry, int mode,
+                     struct lento_vfs_context *info)
+{
+        struct rec_info rec;
+        int error;
+        struct presto_version tgt_dir_ver, new_file_ver;
+        struct inode_operations *iops;
+        void *handle;
+
+        ENTRY;
+        mode &= S_IALLUGO;
+        mode |= S_IFREG;
+
+        //        down(&dir->d_inode->i_zombie);
+        error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); 
+        if (error) {
+                EXIT;
+                //                up(&dir->d_inode->i_zombie);
+                return error;
+        }
+
+        error = may_create(dir->d_inode, dentry);
+        if (error) {
+                EXIT;
+                goto exit_pre_lock;
+        }
+
+        error = -EPERM;
+        iops = filter_c2cdiops(fset->fset_cache->cache_filter);
+        if (!iops->create) {
+                EXIT;
+                goto exit_pre_lock;
+        }
+
+        presto_getversion(&tgt_dir_ver, dir->d_inode);
+        handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_CREATE);
+        if ( IS_ERR(handle) ) {
+                EXIT;
+                presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
+                CERROR("presto_do_create: no space for transaction\n");
+                error=-ENOSPC;
+                goto exit_pre_lock;
+        }
+        DQUOT_INIT(dir->d_inode);
+        lock_kernel();
+        error = iops->create(dir->d_inode, dentry, mode, NULL);
+        if (error) {
+                EXIT;
+                goto exit_lock;
+        }
+
+        if (dentry->d_inode) {
+                struct presto_cache *cache = fset->fset_cache;
+                /* was this already done? */
+                presto_set_ops(dentry->d_inode, cache->cache_filter);
+
+                filter_setup_dentry_ops(cache->cache_filter, 
+                                        dentry->d_op, 
+                                        &presto_dentry_ops);
+                dentry->d_op = filter_c2udops(cache->cache_filter);
+
+                /* if Lento creates this file, we won't have data */
+                if ( ISLENTO(presto_c2m(cache)) ) {
+                        presto_set(dentry, PRESTO_ATTR);
+                } else {
+                        presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
+                }
+        }
+
+        info->flags |= LENTO_FL_TOUCH_PARENT;
+        error = presto_settime(fset, NULL, dir, dentry,
+                               info, ATTR_CTIME | ATTR_MTIME);
+        if (error) { 
+                EXIT;
+                goto exit_lock;
+        }
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x10);
+
+        if ( presto_do_kml(info, dentry) ) { 
+                presto_getversion(&new_file_ver, dentry->d_inode);
+                error = presto_journal_create(&rec, fset, dentry, &tgt_dir_ver,
+                                              &new_file_ver, 
+                                              dentry->d_inode->i_mode);
+        }
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x20);
+
+        if ( presto_do_rcvd(info, dentry) )
+                error = presto_write_last_rcvd(&rec, fset, info);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x30);
+
+        /* add inode dentry */
+        if (fset->fset_cache->cache_filter->o_trops->tr_add_ilookup ) { 
+                struct dentry *d;
+                d = fset->fset_cache->cache_filter->o_trops->tr_add_ilookup
+                        (dir->d_inode->i_sb->s_root, dentry);
+        }
+
+        EXIT;
+
+ exit_lock:
+        unlock_kernel();
+        presto_trans_commit(fset, handle);
+ exit_pre_lock:
+        presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
+        //        up(&dir->d_inode->i_zombie);
+        return error;
+}
+
+int lento_create(const char *name, int mode, struct lento_vfs_context *info)
+{
+        int error;
+        struct nameidata nd;
+        char * pathname;
+        struct dentry *dentry;
+        struct presto_file_set *fset;
+
+        ENTRY;
+        pathname = getname(name);
+        error = PTR_ERR(pathname);
+        if (IS_ERR(pathname)) {
+                EXIT;
+                goto exit;
+        }
+
+        /* this looks up the parent */
+        error = path_lookup(pathname,  LOOKUP_PARENT, &nd);
+        if (error) {
+                EXIT;
+                goto exit;
+        }
+        dentry = lookup_create(&nd, 0);
+        error = PTR_ERR(dentry);
+        if (IS_ERR(dentry)) {
+                EXIT;
+                goto exit_lock;
+        }
+
+        fset = presto_fset(dentry);
+        error = -EINVAL;
+        if ( !fset ) {
+                CERROR("No fileset!\n");
+                EXIT;
+                goto exit_lock;
+        }
+        error = presto_do_create(fset, dentry->d_parent, dentry, (mode&S_IALLUGO)|S_IFREG,
+                                 info);
+
+        EXIT;
+
+ exit_lock:
+        path_release (&nd);
+        dput(dentry); 
+        up(&dentry->d_parent->d_inode->i_sem);
+        putname(pathname);
+exit:
+        return error;
+}
+
+int presto_do_link(struct presto_file_set *fset, struct dentry *old_dentry,
+                   struct dentry *dir, struct dentry *new_dentry,
+                   struct lento_vfs_context *info)
+{
+        struct rec_info rec;
+        struct inode *inode;
+        int error;
+        struct inode_operations *iops;
+        struct presto_version tgt_dir_ver;
+        struct presto_version new_link_ver;
+        void *handle;
+
+        //        down(&dir->d_inode->i_zombie);
+        error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); 
+        if (error) {
+                EXIT;
+                //                up(&dir->d_inode->i_zombie);
+                return error;
+        }
+        error = -ENOENT;
+        inode = old_dentry->d_inode;
+        if (!inode)
+                goto exit_lock;
+
+        error = may_create(dir->d_inode, new_dentry);
+        if (error)
+                goto exit_lock;
+
+        error = -EXDEV;
+        if (dir->d_inode->i_sb != inode->i_sb)
+                goto exit_lock;
+
+        /*
+         * A link to an append-only or immutable file cannot be created.
+         */
+        error = -EPERM;
+        if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
+                EXIT;
+                goto exit_lock;
+        }
+
+        iops = filter_c2cdiops(fset->fset_cache->cache_filter);
+        if (!iops->link) {
+                EXIT;
+                goto exit_lock;
+        }
+
+
+        presto_getversion(&tgt_dir_ver, dir->d_inode);
+        handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_LINK);
+        if ( IS_ERR(handle) ) {
+                presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
+                CERROR("presto_do_link: no space for transaction\n");
+                return -ENOSPC;
+        }
+
+        DQUOT_INIT(dir->d_inode);
+        lock_kernel();
+        error = iops->link(old_dentry, dir->d_inode, new_dentry);
+        unlock_kernel();
+        if (error) {
+                EXIT;
+                goto exit_lock;
+        }
+
+        /* link dd data to that of existing dentry */
+        old_dentry->d_op->d_release(new_dentry); 
+        if (!presto_d2d(old_dentry)) 
+                BUG();
+        presto_d2d(old_dentry)->dd_count++;
+
+        new_dentry->d_fsdata = presto_d2d(old_dentry);
+
+        info->flags |= LENTO_FL_TOUCH_PARENT;
+        error = presto_settime(fset, NULL, dir, new_dentry,
+                               info, ATTR_CTIME);
+        if (error) { 
+                EXIT;
+                goto exit_lock;
+        }
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x10);
+        presto_getversion(&new_link_ver, new_dentry->d_inode);
+        if ( presto_do_kml(info, old_dentry) )
+                error = presto_journal_link(&rec, fset, old_dentry, new_dentry,
+                                            &tgt_dir_ver, &new_link_ver);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x20);
+        if ( presto_do_rcvd(info, old_dentry) )
+                error = presto_write_last_rcvd(&rec, fset, info);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x30);
+        EXIT;
+        presto_trans_commit(fset, handle);
+exit_lock:
+        presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
+        //        up(&dir->d_inode->i_zombie);
+        return error;
+}
+
+
+int lento_link(const char * oldname, const char * newname, 
+                         struct lento_vfs_context *info)
+{
+        int error;
+        char * to;
+        struct presto_file_set *fset;
+
+        to = getname(newname);
+        error = PTR_ERR(to);
+        if (!IS_ERR(to)) {
+                struct dentry *new_dentry;
+                struct nameidata nd, old_nd;
+
+                error = __user_walk(oldname, 0, &old_nd);
+                if (error)
+                        goto exit;
+                error = path_lookup(to, LOOKUP_PARENT, &nd);
+                if (error)
+                        goto out;
+                error = -EXDEV;
+                if (old_nd.mnt != nd.mnt)
+                        goto out;
+                new_dentry = lookup_create(&nd, 0);
+                error = PTR_ERR(new_dentry);
+
+                if (!IS_ERR(new_dentry)) {
+                        fset = presto_fset(new_dentry);
+                        error = -EINVAL;
+                        if ( !fset ) {
+                                CERROR("No fileset!\n");
+                                EXIT;
+                                goto out2;
+                        }
+                        error = presto_do_link(fset, old_nd.dentry, 
+                                               nd.dentry,
+                                               new_dentry, info);
+                        dput(new_dentry);
+                }
+        out2:
+                up(&nd.dentry->d_inode->i_sem);
+                path_release(&nd);
+        out:
+                path_release(&old_nd);
+        exit:
+                putname(to);
+        }
+        return error;
+}
+
+int presto_do_unlink(struct presto_file_set *fset, struct dentry *dir,
+                     struct dentry *dentry, struct lento_vfs_context *info)
+{
+        struct rec_info rec;
+        struct inode_operations *iops;
+        struct presto_version tgt_dir_ver, old_file_ver;
+        struct izo_rollback_data rb;
+        void *handle;
+        int do_kml = 0, do_rcvd = 0, linkno = 0, error, old_targetlen = 0;
+        char *old_target = NULL;
+
+        ENTRY;
+        //        down(&dir->d_inode->i_zombie);
+        error = may_delete(dir->d_inode, dentry, 0);
+        if (error) {
+                EXIT;
+                //                up(&dir->d_inode->i_zombie);
+                return error;
+        }
+
+        error = -EPERM;
+        iops = filter_c2cdiops(fset->fset_cache->cache_filter);
+        if (!iops->unlink) {
+                EXIT;
+                //                up(&dir->d_inode->i_zombie);
+                return error;
+        }
+
+        error = presto_reserve_space(fset->fset_cache, PRESTO_REQLOW); 
+        if (error) {
+                EXIT;
+                //                up(&dir->d_inode->i_zombie);
+                return error;
+        }
+
+
+        if (presto_d2d(dentry)) { 
+                struct presto_dentry_data *dd = presto_d2d(dentry); 
+                struct dentry *de = dd->dd_inodentry;
+                if (de && dentry->d_inode->i_nlink == 1) { 
+                        dd->dd_count--;
+                        dd->dd_inodentry = NULL; 
+                        de->d_fsdata = NULL; 
+                        atomic_dec(&de->d_inode->i_count); 
+                        de->d_inode = NULL;
+                        dput(de); 
+                }
+        }
+
+        presto_getversion(&tgt_dir_ver, dir->d_inode);
+        presto_getversion(&old_file_ver, dentry->d_inode);
+        izo_get_rollback_data(dentry->d_inode, &rb);
+        handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_UNLINK);
+        if ( IS_ERR(handle) ) {
+                presto_release_space(fset->fset_cache, PRESTO_REQLOW); 
+                CERROR("ERROR: presto_do_unlink: no space for transaction. Tell Peter.\n");
+                //                up(&dir->d_inode->i_zombie);
+                return -ENOSPC;
+        }
+        DQUOT_INIT(dir->d_inode);
+        if (d_mountpoint(dentry))
+                error = -EBUSY;
+        else {
+                lock_kernel();
+                linkno = dentry->d_inode->i_nlink;
+                if (linkno > 1) {
+                        dget(dentry);
+                }
+
+                if (S_ISLNK(dentry->d_inode->i_mode)) {
+                        mm_segment_t old_fs;
+                        struct inode_operations *riops;
+                        riops = filter_c2csiops(fset->fset_cache->cache_filter);
+
+                        PRESTO_ALLOC(old_target, PATH_MAX);
+                        if (old_target == NULL) {
+                                error = -ENOMEM;
+                                EXIT;
+                                goto exit;
+                        }
+
+                        old_fs = get_fs();
+                        set_fs(get_ds());
+
+                        if (riops->readlink == NULL)
+                                CERROR("InterMezzo %s: no readlink iops.\n",
+                                       __FUNCTION__);
+                        else
+                                old_targetlen =
+                                        riops->readlink(dentry, old_target,
+                                                        PATH_MAX);
+                        if (old_targetlen < 0) {
+                                CERROR("InterMezzo: readlink failed: %ld\n",
+                                       PTR_ERR(old_target));
+                                PRESTO_FREE(old_target, PATH_MAX);
+                                old_target = NULL;
+                                old_targetlen = 0;
+                        }
+                        set_fs(old_fs);
+                }
+
+                do_kml = presto_do_kml(info, dir);
+                do_rcvd = presto_do_rcvd(info, dir);
+                error = iops->unlink(dir->d_inode, dentry);
+                unlock_kernel();
+        }
+
+        if (linkno > 1) { 
+                /* FIXME: Combine this with the next call? */
+                error = presto_settime(fset, NULL, NULL, dentry,
+                                       info, ATTR_CTIME);
+                dput(dentry); 
+                if (error) { 
+                        EXIT;
+                        goto exit;
+                }
+        }
+
+        error = presto_settime(fset, NULL, NULL, dir,
+                               info, ATTR_CTIME | ATTR_MTIME);
+        if (error) { 
+                EXIT;
+                goto exit;
+        }
+
+        //        up(&dir->d_inode->i_zombie);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x10);
+        if ( do_kml )
+                error = presto_journal_unlink(&rec, fset, dir, &tgt_dir_ver,
+                                              &old_file_ver, &rb, dentry,
+                                              old_target, old_targetlen);
+        presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x20);
+        if ( do_rcvd ) { 
+                error = presto_write_last_rcvd(&rec, fset, info);
+        }
+        presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x30);
+        EXIT;
+exit:
+        presto_release_space(fset->fset_cache, PRESTO_REQLOW); 
+        presto_trans_commit(fset, handle);
+        if (old_target != NULL)
+                PRESTO_FREE(old_target, PATH_MAX);
+        return error;
+}
+
+
+int lento_unlink(const char *pathname, struct lento_vfs_context *info)
+{
+        int error = 0;
+        char * name;
+        struct dentry *dentry;
+        struct nameidata nd;
+        struct presto_file_set *fset;
+
+        ENTRY;
+
+        name = getname(pathname);
+        if(IS_ERR(name))
+                return PTR_ERR(name);
+
+        error = path_lookup(name, LOOKUP_PARENT, &nd);
+        if (error)
+                goto exit;
+        error = -EISDIR;
+        if (nd.last_type != LAST_NORM)
+                goto exit1;
+        down(&nd.dentry->d_inode->i_sem);
+        dentry = lookup_hash(&nd.last, nd.dentry);
+        error = PTR_ERR(dentry);
+        if (!IS_ERR(dentry)) {
+                fset = presto_fset(dentry);
+                error = -EINVAL;
+                if ( !fset ) {
+                        CERROR("No fileset!\n");
+                        EXIT;
+                        goto exit2;
+                }
+                /* Why not before? Because we want correct error value */
+                if (nd.last.name[nd.last.len])
+                        goto slashes;
+                error = presto_do_unlink(fset, nd.dentry, dentry, info);
+                if (!error)
+                        d_delete(dentry);
+        exit2:
+                EXIT;
+                dput(dentry);
+        }
+        up(&nd.dentry->d_inode->i_sem);
+exit1:
+        path_release(&nd);
+exit:
+        putname(name);
+
+        return error;
+
+slashes:
+        error = !dentry->d_inode ? -ENOENT :
+                S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
+        goto exit2;
+}
+
+int presto_do_symlink(struct presto_file_set *fset, struct dentry *dir,
+                      struct dentry *dentry, const char *oldname,
+                      struct lento_vfs_context *info)
+{
+        struct rec_info rec;
+        int error;
+        struct presto_version tgt_dir_ver, new_link_ver;
+        struct inode_operations *iops;
+        void *handle;
+
+        ENTRY;
+        //        down(&dir->d_inode->i_zombie);
+        /* record + max path len + space to free */ 
+        error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096); 
+        if (error) {
+                EXIT;
+                //                up(&dir->d_inode->i_zombie);
+                return error;
+        }
+
+        error = may_create(dir->d_inode, dentry);
+        if (error) {
+                EXIT;
+                goto exit_lock;
+        }
+
+        error = -EPERM;
+        iops = filter_c2cdiops(fset->fset_cache->cache_filter);
+        if (!iops->symlink) {
+                EXIT;
+                goto exit_lock;
+        }
+
+        presto_getversion(&tgt_dir_ver, dir->d_inode);
+        handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_SYMLINK);
+        if ( IS_ERR(handle) ) {
+                presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); 
+                CERROR("ERROR: presto_do_symlink: no space for transaction. Tell Peter.\n"); 
+                EXIT;
+                //                up(&dir->d_inode->i_zombie);
+                return -ENOSPC;
+        }
+        DQUOT_INIT(dir->d_inode);
+        lock_kernel();
+        error = iops->symlink(dir->d_inode, dentry, oldname);
+        if (error) {
+                EXIT;
+                goto exit;
+        }
+
+        if (dentry->d_inode) {
+                struct presto_cache *cache = fset->fset_cache;
+                
+                presto_set_ops(dentry->d_inode, cache->cache_filter);
+
+                filter_setup_dentry_ops(cache->cache_filter, dentry->d_op, 
+                                        &presto_dentry_ops);
+                dentry->d_op = filter_c2udops(cache->cache_filter);
+                /* XXX ? Cache state ? if Lento creates a symlink */
+                if ( ISLENTO(presto_c2m(cache)) ) {
+                        presto_set(dentry, PRESTO_ATTR);
+                } else {
+                        presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
+                }
+        }
+
+        info->flags |= LENTO_FL_TOUCH_PARENT;
+        error = presto_settime(fset, NULL, dir, dentry,
+                               info, ATTR_CTIME | ATTR_MTIME);
+        if (error) { 
+                EXIT;
+                goto exit;
+        }
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x10);
+        presto_getversion(&new_link_ver, dentry->d_inode);
+        if ( presto_do_kml(info, dentry) )
+                error = presto_journal_symlink(&rec, fset, dentry, oldname,
+                                               &tgt_dir_ver, &new_link_ver);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x20);
+        if ( presto_do_rcvd(info, dentry) )
+                error = presto_write_last_rcvd(&rec, fset, info);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x30);
+        EXIT;
+exit:
+        unlock_kernel();
+        presto_trans_commit(fset, handle);
+ exit_lock:
+        presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); 
+        //        up(&dir->d_inode->i_zombie);
+        return error;
+}
+
+int lento_symlink(const char *oldname, const char *newname,
+                  struct lento_vfs_context *info)
+{
+        int error;
+        char *from;
+        char *to;
+        struct dentry *dentry;
+        struct presto_file_set *fset;
+        struct nameidata nd;
+
+        ENTRY;
+        lock_kernel();
+        from = getname(oldname);
+        error = PTR_ERR(from);
+        if (IS_ERR(from)) {
+                EXIT;
+                goto exit;
+        }
+
+        to = getname(newname);
+        error = PTR_ERR(to);
+        if (IS_ERR(to)) {
+                EXIT;
+                goto exit_from;
+        }
+
+        error = path_lookup(to, LOOKUP_PARENT, &nd);
+        if (error) {
+                EXIT;
+                goto exit_to;
+        }
+
+        dentry = lookup_create(&nd, 0);
+        error = PTR_ERR(dentry);
+        if (IS_ERR(dentry)) {
+                path_release(&nd);
+                EXIT;
+                goto exit_to;
+        }
+
+        fset = presto_fset(dentry);
+        error = -EINVAL;
+        if ( !fset ) {
+                CERROR("No fileset!\n");
+                path_release(&nd);
+                EXIT;
+                goto exit_lock;
+        }
+        error = presto_do_symlink(fset, nd.dentry,
+                                  dentry, from, info);
+        path_release(&nd);
+        EXIT;
+ exit_lock:
+        up(&nd.dentry->d_inode->i_sem);
+        dput(dentry);
+ exit_to:
+        putname(to);
+ exit_from:
+        putname(from);
+ exit:
+        unlock_kernel();
+        return error;
+}
+
+int presto_do_mkdir(struct presto_file_set *fset, struct dentry *dir,
+                    struct dentry *dentry, int mode,
+                    struct lento_vfs_context *info)
+{
+        struct rec_info rec;
+        int error;
+        struct presto_version tgt_dir_ver, new_dir_ver;
+        void *handle;
+
+        ENTRY;
+        //        down(&dir->d_inode->i_zombie);
+
+        /* one journal record + directory block + room for removals*/ 
+        error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096); 
+        if (error) { 
+                EXIT;
+                //                up(&dir->d_inode->i_zombie);
+                return error;
+        }
+
+        error = may_create(dir->d_inode, dentry);
+        if (error) {
+                EXIT;
+                goto exit_lock;
+        }
+
+        error = -EPERM;
+        if (!filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir) {
+                EXIT;
+                goto exit_lock;
+        }
+
+        error = -ENOSPC;
+        presto_getversion(&tgt_dir_ver, dir->d_inode);
+        handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_MKDIR);
+        if ( IS_ERR(handle) ) {
+                presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); 
+                CERROR("presto_do_mkdir: no space for transaction\n");
+                goto exit_lock;
+        }
+
+        DQUOT_INIT(dir->d_inode);
+        mode &= (S_IRWXUGO|S_ISVTX);
+        lock_kernel();
+        error = filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir(dir->d_inode, dentry, mode);
+        if (error) {
+                EXIT;
+                goto exit;
+        }
+
+        if ( dentry->d_inode && !error) {
+                struct presto_cache *cache = fset->fset_cache;
+
+                presto_set_ops(dentry->d_inode, cache->cache_filter);
+
+                filter_setup_dentry_ops(cache->cache_filter, 
+                                        dentry->d_op, 
+                                        &presto_dentry_ops);
+                dentry->d_op = filter_c2udops(cache->cache_filter);
+                /* if Lento does this, we won't have data */
+                if ( ISLENTO(presto_c2m(cache)) ) {
+                        presto_set(dentry, PRESTO_ATTR);
+                } else {
+                        presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
+                }
+        }
+
+        info->flags |= LENTO_FL_TOUCH_PARENT;
+        error = presto_settime(fset, NULL, dir, dentry,
+                             info, ATTR_CTIME | ATTR_MTIME);
+        if (error) { 
+                EXIT;
+                goto exit;
+        }
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x10);
+        presto_getversion(&new_dir_ver, dentry->d_inode);
+        if ( presto_do_kml(info, dir) )
+                error = presto_journal_mkdir(&rec, fset, dentry, &tgt_dir_ver,
+                                             &new_dir_ver, 
+                                             dentry->d_inode->i_mode);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x20);
+        if ( presto_do_rcvd(info, dentry) )
+                error = presto_write_last_rcvd(&rec, fset, info);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x30);
+        EXIT;
+exit:
+        unlock_kernel();
+        presto_trans_commit(fset, handle);
+ exit_lock:
+        presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); 
+        //        up(&dir->d_inode->i_zombie);
+        return error;
+}
+
+/*
+ * Look out: this function may change a normal dentry
+ * into a directory dentry (different size)..
+ */
+int lento_mkdir(const char *name, int mode, struct lento_vfs_context *info)
+{
+        int error;
+        char *pathname;
+        struct dentry *dentry;
+        struct presto_file_set *fset;
+        struct nameidata nd;
+
+        ENTRY;
+        CDEBUG(D_PIOCTL, "name: %s, mode %o, offset %d, recno %d, flags %x\n",
+               name, mode, info->slot_offset, info->recno, info->flags);
+        pathname = getname(name);
+        error = PTR_ERR(pathname);
+        if (IS_ERR(pathname)) {
+                EXIT;
+                return error;
+        }
+
+        error = path_lookup(pathname, LOOKUP_PARENT, &nd);
+        if (error)
+                goto out_name;
+
+        dentry = lookup_create(&nd, 1);
+        error = PTR_ERR(dentry);
+        if (!IS_ERR(dentry)) {
+                fset = presto_fset(dentry);
+                error = -EINVAL;
+                if (!fset) {
+                        CERROR("No fileset!\n");
+                        EXIT;
+                        goto out_dput;
+                }
+
+                error = presto_do_mkdir(fset, nd.dentry, dentry, 
+                                        mode & S_IALLUGO, info);
+out_dput:
+                dput(dentry);
+        }
+        up(&nd.dentry->d_inode->i_sem);
+        path_release(&nd);
+out_name:
+        EXIT;
+        putname(pathname);
+        CDEBUG(D_PIOCTL, "error: %d\n", error);
+        return error;
+}
+
+static void d_unhash(struct dentry *dentry)
+{
+        dget(dentry);
+        switch (atomic_read(&dentry->d_count)) {
+        default:
+                shrink_dcache_parent(dentry);
+                if (atomic_read(&dentry->d_count) != 2)
+                        break;
+        case 2:
+                d_drop(dentry);
+        }
+}
+
+int presto_do_rmdir(struct presto_file_set *fset, struct dentry *dir,
+                    struct dentry *dentry, struct lento_vfs_context *info)
+{
+        struct rec_info rec;
+        int error;
+        struct presto_version tgt_dir_ver, old_dir_ver;
+        struct izo_rollback_data rb;
+        struct inode_operations *iops;
+        void *handle;
+        int do_kml, do_rcvd;
+        int size;
+
+        ENTRY;
+        error = may_delete(dir->d_inode, dentry, 1);
+        if (error)
+                return error;
+
+        error = -EPERM;
+        iops = filter_c2cdiops(fset->fset_cache->cache_filter);
+        if (!iops->rmdir) {
+                EXIT;
+                return error;
+        }
+
+        size = PRESTO_REQHIGH - dentry->d_inode->i_size; 
+        error = presto_reserve_space(fset->fset_cache, size); 
+        if (error) { 
+                EXIT;
+                return error;
+        }
+
+        presto_getversion(&tgt_dir_ver, dir->d_inode);
+        presto_getversion(&old_dir_ver, dentry->d_inode);
+        izo_get_rollback_data(dentry->d_inode, &rb);
+        handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_RMDIR);
+        if ( IS_ERR(handle) ) {
+                presto_release_space(fset->fset_cache, size); 
+                CERROR("ERROR: presto_do_rmdir: no space for transaction. Tell Peter.\n");
+                return -ENOSPC;
+        }
+
+        DQUOT_INIT(dir->d_inode);
+
+        do_kml = presto_do_kml(info, dir);
+        do_rcvd = presto_do_rcvd(info, dir);
+
+        //        double_down(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie);
+        d_unhash(dentry);
+        if (IS_DEADDIR(dir->d_inode))
+                error = -ENOENT;
+        else if (d_mountpoint(dentry)) {
+                CERROR("foo: d_mountpoint(dentry): ino %ld\n",
+                       dentry->d_inode->i_ino);
+                error = -EBUSY;
+        } else {
+                lock_kernel();
+                error = iops->rmdir(dir->d_inode, dentry);
+                unlock_kernel();
+                if (!error) {
+                        dentry->d_inode->i_flags |= S_DEAD;
+                        error = presto_settime(fset, NULL, NULL, dir, info,
+                                               ATTR_CTIME | ATTR_MTIME);
+                }
+        }
+        //        double_up(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie);
+        if (!error)
+                d_delete(dentry);
+        dput(dentry);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x10);
+        if ( !error && do_kml )
+                error = presto_journal_rmdir(&rec, fset, dir, &tgt_dir_ver,
+                                             &old_dir_ver, &rb,
+                                             dentry->d_name.len,
+                                             dentry->d_name.name);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x20);
+        if ( !error && do_rcvd ) 
+                error = presto_write_last_rcvd(&rec, fset, info);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x30);
+        EXIT;
+
+        presto_trans_commit(fset, handle);
+        presto_release_space(fset->fset_cache, size); 
+        return error;
+}
+
+int lento_rmdir(const char *pathname, struct lento_vfs_context *info)
+{
+        int error = 0;
+        char * name;
+        struct dentry *dentry;
+        struct presto_file_set *fset;
+        struct nameidata nd;
+
+        ENTRY;
+        name = getname(pathname);
+        if(IS_ERR(name)) {
+                EXIT;
+                return PTR_ERR(name);
+        }
+
+        error = path_lookup(name, LOOKUP_PARENT, &nd);
+        if (error) {
+                EXIT;
+                goto exit;
+        }
+        switch(nd.last_type) {
+        case LAST_DOTDOT:
+                error = -ENOTEMPTY;
+                EXIT;
+                goto exit1;
+        case LAST_ROOT:
+        case LAST_DOT:
+                error = -EBUSY;
+                EXIT;
+                goto exit1;
+        }
+        down(&nd.dentry->d_inode->i_sem);
+        dentry = lookup_hash(&nd.last, nd.dentry);
+        error = PTR_ERR(dentry);
+        if (!IS_ERR(dentry)) {
+                fset = presto_fset(dentry);
+                error = -EINVAL;
+                if ( !fset ) {
+                        CERROR("No fileset!\n");
+                        EXIT;
+                        goto exit_put;
+                }
+                error = presto_do_rmdir(fset, nd.dentry, dentry, info);
+        exit_put:
+                dput(dentry);
+        }
+        up(&nd.dentry->d_inode->i_sem);
+exit1:
+        path_release(&nd);
+exit:
+        putname(name);
+        EXIT;
+        return error;
+}
+
+int presto_do_mknod(struct presto_file_set *fset, struct dentry *dir,
+                    struct dentry *dentry, int mode, dev_t dev,
+                    struct lento_vfs_context *info)
+{
+        struct rec_info rec;
+        int error = -EPERM;
+        struct presto_version tgt_dir_ver, new_node_ver;
+        struct inode_operations *iops;
+        void *handle;
+
+        ENTRY;
+
+        //        down(&dir->d_inode->i_zombie);
+        /* one KML entry */ 
+        error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); 
+        if (error) {
+                EXIT;
+                //                up(&dir->d_inode->i_zombie);
+                return error;
+        }
+
+        if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) {
+                EXIT;
+                goto exit_lock;
+        }
+
+        error = may_create(dir->d_inode, dentry);
+        if (error) {
+                EXIT;
+                goto exit_lock;
+        }
+
+        error = -EPERM;
+        iops = filter_c2cdiops(fset->fset_cache->cache_filter);
+        if (!iops->mknod) {
+                EXIT;
+                goto exit_lock;
+        }
+
+        DQUOT_INIT(dir->d_inode);
+        lock_kernel();
+        
+        error = -ENOSPC;
+        presto_getversion(&tgt_dir_ver, dir->d_inode);
+        handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_MKNOD);
+        if ( IS_ERR(handle) ) {
+                presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
+                CERROR("presto_do_mknod: no space for transaction\n");
+                goto exit_lock2;
+        }
+
+        error = iops->mknod(dir->d_inode, dentry, mode, dev);
+        if (error) {
+                EXIT;
+                goto exit_commit;
+        }
+        if ( dentry->d_inode) {
+                struct presto_cache *cache = fset->fset_cache;
+
+                presto_set_ops(dentry->d_inode, cache->cache_filter);
+
+                filter_setup_dentry_ops(cache->cache_filter, dentry->d_op, 
+                                        &presto_dentry_ops);
+                dentry->d_op = filter_c2udops(cache->cache_filter);
+
+                /* if Lento does this, we won't have data */
+                if ( ISLENTO(presto_c2m(cache)) ) {
+                        presto_set(dentry, PRESTO_ATTR);
+                } else {
+                        presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
+                }
+        }
+
+        error = presto_settime(fset, NULL, NULL, dir,
+                               info, ATTR_MTIME);
+        if (error) { 
+                EXIT;
+        }
+        error = presto_settime(fset, NULL, NULL, dentry,
+                               info, ATTR_CTIME | ATTR_MTIME);
+        if (error) { 
+                EXIT;
+        }
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x10);
+        presto_getversion(&new_node_ver, dentry->d_inode);
+        if ( presto_do_kml(info, dentry) )
+                error = presto_journal_mknod(&rec, fset, dentry, &tgt_dir_ver,
+                                             &new_node_ver, 
+                                             dentry->d_inode->i_mode,
+                                             MAJOR(dev), MINOR(dev) );
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x20);
+        if ( presto_do_rcvd(info, dentry) )
+                error = presto_write_last_rcvd(&rec, fset, info);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x30);
+        EXIT;
+ exit_commit:
+        presto_trans_commit(fset, handle);
+ exit_lock2:
+        unlock_kernel();
+ exit_lock:
+        presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
+        //        up(&dir->d_inode->i_zombie);
+        return error;
+}
+
+int lento_mknod(const char *filename, int mode, dev_t dev,
+                struct lento_vfs_context *info)
+{
+        int error = 0;
+        char * tmp;
+        struct dentry * dentry;
+        struct nameidata nd;
+        struct presto_file_set *fset;
+
+        ENTRY;
+
+        if (S_ISDIR(mode))
+                return -EPERM;
+        tmp = getname(filename);
+        if (IS_ERR(tmp))
+                return PTR_ERR(tmp);
+
+        error = path_lookup(tmp, LOOKUP_PARENT, &nd);
+        if (error)
+                goto out;
+        dentry = lookup_create(&nd, 0);
+        error = PTR_ERR(dentry);
+        if (!IS_ERR(dentry)) {
+                fset = presto_fset(dentry);
+                error = -EINVAL;
+                if ( !fset ) {
+                        CERROR("No fileset!\n");
+                        EXIT;
+                        goto exit_put;
+                }
+                switch (mode & S_IFMT) {
+                case 0: case S_IFREG:
+                        error = -EOPNOTSUPP;
+                        break;
+                case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
+                        error = presto_do_mknod(fset, nd.dentry, dentry, 
+                                                mode, dev, info);
+                        break;
+                case S_IFDIR:
+                        error = -EPERM;
+                        break;
+                default:
+                        error = -EINVAL;
+                }
+        exit_put:
+                dput(dentry);
+        }
+        up(&nd.dentry->d_inode->i_sem);
+        path_release(&nd);
+out:
+        putname(tmp);
+
+        return error;
+}
+
+int do_rename(struct presto_file_set *fset,
+                     struct dentry *old_parent, struct dentry *old_dentry,
+                     struct dentry *new_parent, struct dentry *new_dentry,
+                     struct lento_vfs_context *info)
+{
+        struct rec_info rec;
+        int error;
+        struct inode_operations *iops;
+        struct presto_version src_dir_ver, tgt_dir_ver;
+        void *handle;
+        int new_inode_unlink = 0;
+        struct inode *old_dir = old_parent->d_inode;
+        struct inode *new_dir = new_parent->d_inode;
+
+        ENTRY;
+        presto_getversion(&src_dir_ver, old_dir);
+        presto_getversion(&tgt_dir_ver, new_dir);
+
+        error = -EPERM;
+        iops = filter_c2cdiops(fset->fset_cache->cache_filter);
+        if (!iops || !iops->rename) {
+                EXIT;
+                return error;
+        }
+
+        error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); 
+        if (error) {
+                EXIT;
+                return error;
+        }
+        handle = presto_trans_start(fset, old_dir, KML_OPCODE_RENAME);
+        if ( IS_ERR(handle) ) {
+                presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
+                CERROR("presto_do_rename: no space for transaction\n");
+                return -ENOSPC;
+        }
+        if (new_dentry->d_inode && new_dentry->d_inode->i_nlink > 1) { 
+                dget(new_dentry); 
+                new_inode_unlink = 1;
+        }
+
+        error = iops->rename(old_dir, old_dentry, new_dir, new_dentry);
+
+        if (error) {
+                EXIT;
+                goto exit;
+        }
+
+        if (new_inode_unlink) { 
+                error = presto_settime(fset, NULL, NULL, old_dentry,
+                                       info, ATTR_CTIME);
+                dput(old_dentry); 
+                if (error) { 
+                        EXIT;
+                        goto exit;
+                }
+        }
+        info->flags |= LENTO_FL_TOUCH_PARENT;
+        error = presto_settime(fset, NULL, new_parent, old_parent,
+                               info, ATTR_CTIME | ATTR_MTIME);
+        if (error) { 
+                EXIT;
+                goto exit;
+        }
+
+        /* XXX make a distinction between cross file set
+         * and intra file set renames here
+         */
+        presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x10);
+        if ( presto_do_kml(info, old_dentry) )
+                error = presto_journal_rename(&rec, fset, old_dentry,
+                                              new_dentry,
+                                              &src_dir_ver, &tgt_dir_ver);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x20);
+
+        if ( presto_do_rcvd(info, old_dentry) )
+                error = presto_write_last_rcvd(&rec, fset, info);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x30);
+        EXIT;
+exit:
+        presto_trans_commit(fset, handle);
+        presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
+        return error;
+}
+
+static
+int presto_rename_dir(struct presto_file_set *fset, struct dentry *old_parent,
+                      struct dentry *old_dentry, struct dentry *new_parent,
+                      struct dentry *new_dentry, struct lento_vfs_context *info)
+{
+        int error;
+        struct inode *target;
+        struct inode *old_dir = old_parent->d_inode;
+        struct inode *new_dir = new_parent->d_inode;
+
+        if (old_dentry->d_inode == new_dentry->d_inode)
+                return 0;
+
+        error = may_delete(old_dir, old_dentry, 1);
+        if (error)
+                return error;
+
+        if (new_dir->i_sb != old_dir->i_sb)
+                return -EXDEV;
+
+        if (!new_dentry->d_inode)
+                error = may_create(new_dir, new_dentry);
+        else
+                error = may_delete(new_dir, new_dentry, 1);
+        if (error)
+                return error;
+
+        if (!old_dir->i_op || !old_dir->i_op->rename)
+                return -EPERM;
+
+        /*
+         * If we are going to change the parent - check write permissions,
+         * we'll need to flip '..'.
+         */
+        if (new_dir != old_dir) {
+                error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
+        }
+        if (error)
+                return error;
+
+        DQUOT_INIT(old_dir);
+        DQUOT_INIT(new_dir);
+        down(&old_dir->i_sb->s_vfs_rename_sem);
+        error = -EINVAL;
+        if (is_subdir(new_dentry, old_dentry))
+                goto out_unlock;
+        target = new_dentry->d_inode;
+        if (target) { /* Hastur! Hastur! Hastur! */
+                //                triple_down(&old_dir->i_zombie,
+                //                            &new_dir->i_zombie,
+                //                            &target->i_zombie);
+                d_unhash(new_dentry);
+        } else
+                //                double_down(&old_dir->i_zombie,
+                //                            &new_dir->i_zombie);
+        if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir))
+                error = -ENOENT;
+        else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
+                error = -EBUSY;
+        else 
+                error = do_rename(fset, old_parent, old_dentry,
+                                         new_parent, new_dentry, info);
+        if (target) {
+                if (!error)
+                        target->i_flags |= S_DEAD;
+                //                triple_up(&old_dir->i_zombie,
+                //                          &new_dir->i_zombie,
+                //                          &target->i_zombie);
+                if (d_unhashed(new_dentry))
+                        d_rehash(new_dentry);
+                dput(new_dentry);
+        } else
+                //                double_up(&old_dir->i_zombie,
+                //                          &new_dir->i_zombie);
+                
+        if (!error)
+                d_move(old_dentry,new_dentry);
+out_unlock:
+        up(&old_dir->i_sb->s_vfs_rename_sem);
+        return error;
+}
+
+static
+int presto_rename_other(struct presto_file_set *fset, struct dentry *old_parent,
+                        struct dentry *old_dentry, struct dentry *new_parent,
+                        struct dentry *new_dentry, struct lento_vfs_context *info)
+{
+        struct inode *old_dir = old_parent->d_inode;
+        struct inode *new_dir = new_parent->d_inode;
+        int error;
+
+        if (old_dentry->d_inode == new_dentry->d_inode)
+                return 0;
+
+        error = may_delete(old_dir, old_dentry, 0);
+        if (error)
+                return error;
+
+        if (new_dir->i_sb != old_dir->i_sb)
+                return -EXDEV;
+
+        if (!new_dentry->d_inode)
+                error = may_create(new_dir, new_dentry);
+        else
+                error = may_delete(new_dir, new_dentry, 0);
+        if (error)
+                return error;
+
+        if (!old_dir->i_op || !old_dir->i_op->rename)
+                return -EPERM;
+
+        DQUOT_INIT(old_dir);
+        DQUOT_INIT(new_dir);
+        //        double_down(&old_dir->i_zombie, &new_dir->i_zombie);
+        if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
+                error = -EBUSY;
+        else
+                error = do_rename(fset, old_parent, old_dentry,
+                                  new_parent, new_dentry, info);
+        //        double_up(&old_dir->i_zombie, &new_dir->i_zombie);
+        if (error)
+                return error;
+        /* The following d_move() should become unconditional */
+        if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) {
+                d_move(old_dentry, new_dentry);
+        }
+        return 0;
+}
+
+int presto_do_rename(struct presto_file_set *fset, 
+              struct dentry *old_parent, struct dentry *old_dentry,
+              struct dentry *new_parent, struct dentry *new_dentry,
+              struct lento_vfs_context *info)
+{
+        if (S_ISDIR(old_dentry->d_inode->i_mode))
+                return presto_rename_dir(fset, old_parent,old_dentry,new_parent,
+                                      new_dentry, info);
+        else
+                return presto_rename_other(fset, old_parent, old_dentry,
+                                           new_parent,new_dentry, info);
+}
+
+
+int lento_do_rename(const char *oldname, const char *newname,
+                 struct lento_vfs_context *info)
+{
+        int error = 0;
+        struct dentry * old_dir, * new_dir;
+        struct dentry * old_dentry, *new_dentry;
+        struct nameidata oldnd, newnd;
+        struct presto_file_set *fset;
+
+        ENTRY;
+
+        error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
+        if (error)
+                goto exit;
+
+        error = path_lookup(newname, LOOKUP_PARENT, &newnd);
+        if (error)
+                goto exit1;
+
+        error = -EXDEV;
+        if (oldnd.mnt != newnd.mnt)
+                goto exit2;
+
+        old_dir = oldnd.dentry;
+        error = -EBUSY;
+        if (oldnd.last_type != LAST_NORM)
+                goto exit2;
+
+        new_dir = newnd.dentry;
+        if (newnd.last_type != LAST_NORM)
+                goto exit2;
+
+        lock_rename(new_dir, old_dir);
+
+        old_dentry = lookup_hash(&oldnd.last, old_dir);
+        error = PTR_ERR(old_dentry);
+        if (IS_ERR(old_dentry))
+                goto exit3;
+        /* source must exist */
+        error = -ENOENT;
+        if (!old_dentry->d_inode)
+                goto exit4;
+        fset = presto_fset(old_dentry);
+        error = -EINVAL;
+        if ( !fset ) {
+                CERROR("No fileset!\n");
+                EXIT;
+                goto exit4;
+        }
+        /* unless the source is a directory trailing slashes give -ENOTDIR */
+        if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
+                error = -ENOTDIR;
+                if (oldnd.last.name[oldnd.last.len])
+                        goto exit4;
+                if (newnd.last.name[newnd.last.len])
+                        goto exit4;
+        }
+        new_dentry = lookup_hash(&newnd.last, new_dir);
+        error = PTR_ERR(new_dentry);
+        if (IS_ERR(new_dentry))
+                goto exit4;
+
+        lock_kernel();
+        error = presto_do_rename(fset, old_dir, old_dentry,
+                                   new_dir, new_dentry, info);
+        unlock_kernel();
+
+        dput(new_dentry);
+exit4:
+        dput(old_dentry);
+exit3:
+        unlock_rename(new_dir, old_dir);
+exit2:
+        path_release(&newnd);
+exit1:
+        path_release(&oldnd);
+exit:
+        return error;
+}
+
+int  lento_rename(const char * oldname, const char * newname,
+                  struct lento_vfs_context *info)
+{
+        int error;
+        char * from;
+        char * to;
+
+        from = getname(oldname);
+        if(IS_ERR(from))
+                return PTR_ERR(from);
+        to = getname(newname);
+        error = PTR_ERR(to);
+        if (!IS_ERR(to)) {
+                error = lento_do_rename(from,to, info);
+                putname(to);
+        } 
+        putname(from);
+        return error;
+}
+
+struct dentry *presto_iopen(struct dentry *dentry,
+                            ino_t ino, unsigned int generation)
+{
+        struct presto_file_set *fset;
+        char name[48];
+        int error;
+
+        ENTRY;
+        /* see if we already have the dentry we want */
+        if (dentry->d_inode && dentry->d_inode->i_ino == ino &&
+            dentry->d_inode->i_generation == generation) {
+                EXIT;
+                return dentry;
+        }
+
+        /* Make sure we have a cache beneath us.  We should always find at
+         * least one dentry inside the cache (if it exists), otherwise not
+         * even the cache root exists, or we passed in a bad name.
+         */
+        fset = presto_fset(dentry);
+        error = -EINVAL;
+        if (!fset) {
+                CERROR("No fileset for %*s!\n",
+                       dentry->d_name.len, dentry->d_name.name);
+                EXIT;
+                dput(dentry);
+                return ERR_PTR(error);
+        }
+        dput(dentry);
+
+        sprintf(name, "%s%#lx%c%#x",
+                PRESTO_ILOOKUP_MAGIC, ino, PRESTO_ILOOKUP_SEP, generation);
+        CDEBUG(D_PIOCTL, "opening %ld by number (as %s)\n", ino, name);
+        return lookup_one_len(name, fset->fset_dentry, strlen(name));
+}
+
+static struct file *presto_filp_dopen(struct dentry *dentry, int flags)
+{
+        struct file *f;
+        struct inode *inode;
+        int flag, error;
+
+        ENTRY;
+        error = -ENFILE;
+        f = get_empty_filp();
+        if (!f) {
+                CDEBUG(D_PIOCTL, "error getting file pointer\n");
+                EXIT;
+                goto out;
+        }
+        f->f_flags = flag = flags;
+        f->f_mode = (flag+1) & O_ACCMODE;
+        inode = dentry->d_inode;
+        if (f->f_mode & FMODE_WRITE) {
+                error = get_write_access(inode);
+                if (error) {
+                        CDEBUG(D_PIOCTL, "error getting write access\n");
+                        EXIT;                        goto cleanup_file;
+                }
+        }
+
+       /* XXX: where the fuck is ->f_vfsmnt? */
+        f->f_dentry = dentry;
+        f->f_mapping = dentry->d_inode->i_mapping;
+        f->f_pos = 0;
+        //f->f_reada = 0;
+        f->f_op = NULL;
+        if (inode->i_op)
+                /* XXX should we set to presto ops, or leave at cache ops? */
+                f->f_op = inode->i_fop;
+        if (f->f_op && f->f_op->open) {
+                error = f->f_op->open(inode, f);
+                if (error) {
+                        CDEBUG(D_PIOCTL, "error calling cache 'open'\n");
+                        EXIT;
+                        goto cleanup_all;
+                }
+        }
+        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+
+        return f;
+
+cleanup_all:
+        if (f->f_mode & FMODE_WRITE)
+                put_write_access(inode);
+cleanup_file:
+        put_filp(f);
+out:
+        return ERR_PTR(error);
+}
+
+
+/* Open an inode by number.  We pass in the cache root name (or a subdirectory
+ * from the cache that is guaranteed to exist) to be able to access the cache.
+ */
+int lento_iopen(const char *name, ino_t ino, unsigned int generation,
+                int flags)
+{
+        char * tmp;
+        struct dentry *dentry;
+        struct nameidata nd;
+        int fd;
+        int error;
+
+        ENTRY;
+        CDEBUG(D_PIOCTL,
+               "open %s:inode %#lx (%ld), generation %x (%d), flags %d \n",
+               name, ino, ino, generation, generation, flags);
+        /* We don't allow creation of files by number only, as it would
+         * lead to a dangling files not in any directory.  We could also
+         * just turn off the flag and ignore it.
+         */
+        if (flags & O_CREAT) {
+                CERROR("%s: create file by inode number (%ld) not allowed\n",
+                       __FUNCTION__, ino);
+                EXIT;
+                return -EACCES;
+        }
+
+        tmp = getname(name);
+        if (IS_ERR(tmp)) {
+                EXIT;
+                return PTR_ERR(tmp);
+        }
+
+        lock_kernel();
+again:  /* look the named file or a parent directory so we can get the cache */
+        error = presto_walk(tmp, &nd);
+        if ( error && error != -ENOENT ) {
+                EXIT;
+                unlock_kernel();
+               putname(tmp);
+                return error;
+        } 
+        if (error == -ENOENT)
+                dentry = NULL;
+        else 
+                dentry = nd.dentry;
+
+        /* we didn't find the named file, so see if a parent exists */
+        if (!dentry) {
+                char *slash;
+
+                slash = strrchr(tmp, '/');
+                if (slash && slash != tmp) {
+                        *slash = '\0';
+                        path_release(&nd);
+                        goto again;
+                }
+                /* we should never get here... */
+                CDEBUG(D_PIOCTL, "no more path components to try!\n");
+                fd = -ENOENT;
+                goto exit;
+        }
+        CDEBUG(D_PIOCTL, "returned dentry %p\n", dentry);
+
+        dentry = presto_iopen(dentry, ino, generation);
+        fd = PTR_ERR(dentry);
+        if (IS_ERR(dentry)) {
+                EXIT;
+                goto exit;
+        }
+
+        /* XXX start of code that might be replaced by something like:
+         * if (flags & (O_WRONLY | O_RDWR)) {
+         *      error = get_write_access(dentry->d_inode);
+         *      if (error) {
+         *              EXIT;
+         *              goto cleanup_dput;
+         *      }
+         * }
+         * fd = open_dentry(dentry, flags);
+         *
+         * including the presto_filp_dopen() function (check dget counts!)
+         */
+        fd = get_unused_fd();
+        if (fd < 0) {
+                EXIT;
+                goto exit;
+        }
+
+        {
+                int error;
+                struct file * f = presto_filp_dopen(dentry, flags);
+                error = PTR_ERR(f);
+                if (IS_ERR(f)) {
+                        put_unused_fd(fd);
+                        fd = error;
+                } else {
+                       fd_install(fd, f);
+               }
+        }
+        /* end of code that might be replaced by open_dentry */
+
+        EXIT;
+exit:
+        unlock_kernel();
+        path_release(&nd);
+        putname(tmp);
+        return fd;
+}
+
+#ifdef CONFIG_FS_EXT_ATTR
+
+#if 0 /* was a broken check for Posix ACLs */
+/* Posix ACL code changes i_mode without using a notify_change (or
+ * a mark_inode_dirty!). We need to duplicate this at the reintegrator
+ * which is done by this function. This function also takes care of 
+ * resetting the cached posix acls in this inode. If we don't reset these
+ * VFS continues using the old acl information, which by now may be out of
+ * date.
+ */
+int presto_setmode(struct presto_file_set *fset, struct dentry *dentry,
+                   mode_t mode)
+{
+        struct inode *inode = dentry->d_inode;
+
+        ENTRY;
+        /* The extended attributes for this inode were modified. 
+         * At this point we can not be sure if any of the ACL 
+         * information for this inode was updated. So we will 
+         * force VFS to reread the acls. Note that we do this 
+         * only when called from the SETEXTATTR ioctl, which is why we
+         * do this while setting the mode of the file. Also note
+         * that mark_inode_dirty is not be needed for i_*acl only
+         * to force i_mode info to disk, and should be removed once
+         * we use notify_change to update the mode.
+         * XXX: is mode setting really needed? Just setting acl's should
+         * be enough! VFS should change the i_mode as needed? SHP
+         */
+        if (inode->i_acl && 
+            inode->i_acl != POSIX_ACL_NOT_CACHED) 
+            posix_acl_release(inode->i_acl);
+        if (inode->i_default_acl && 
+            inode->i_default_acl != POSIX_ACL_NOT_CACHED) 
+            posix_acl_release(inode->i_default_acl);
+        inode->i_acl = POSIX_ACL_NOT_CACHED;
+        inode->i_default_acl = POSIX_ACL_NOT_CACHED;
+        inode->i_mode = mode;
+        /* inode should already be dirty...but just in case */
+        mark_inode_dirty(inode);
+        return 0;
+
+#if 0
+        /* XXX: The following code is the preferred way to set mode, 
+         * however, I need to carefully go through possible recursion
+         * paths back into presto. See comments in presto_do_setattr.
+         */
+        {    
+        int error=0; 
+        struct super_operations *sops;
+        struct iattr iattr;
+
+        iattr.ia_mode = mode;
+        iattr.ia_valid = ATTR_MODE|ATTR_FORCE;
+
+        error = -EPERM;
+        sops = filter_c2csops(fset->fset_cache->cache_filter); 
+        if (!sops &&
+            !sops->notify_change) {
+                EXIT;
+                return error;
+        }
+
+        error = sops->notify_change(dentry, &iattr);
+
+        EXIT;
+        return error;
+        }
+#endif
+}
+#endif
+
+/* setextattr Interface to cache filesystem */
+int presto_do_set_ext_attr(struct presto_file_set *fset, 
+                           struct dentry *dentry, 
+                           const char *name, void *buffer,
+                           size_t buffer_len, int flags, mode_t *mode,
+                           struct lento_vfs_context *info) 
+{
+        struct rec_info rec;
+        struct inode *inode = dentry->d_inode;
+        struct inode_operations *iops;
+        int error;
+        struct presto_version ver;
+        void *handle;
+        char temp[PRESTO_EXT_ATTR_NAME_MAX+1];
+
+        ENTRY;
+        error = -EROFS;
+        if (IS_RDONLY(inode)) {
+                EXIT;
+                return -EROFS;
+        }
+
+        if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
+                EXIT;
+                return -EPERM;
+        }
+
+        presto_getversion(&ver, inode);
+        error = -EPERM;
+        /* We need to invoke different filters based on whether
+         * this dentry is a regular file, directory or symlink.
+         */
+        switch (inode->i_mode & S_IFMT) {
+                case S_IFLNK: /* symlink */
+                    iops = filter_c2csiops(fset->fset_cache->cache_filter); 
+                    break;
+                case S_IFDIR: /* directory */
+                    iops = filter_c2cdiops(fset->fset_cache->cache_filter); 
+                    break;
+                case S_IFREG:
+                default: /* everything else including regular files */
+                    iops = filter_c2cfiops(fset->fset_cache->cache_filter); 
+        }
+
+        if (!iops && !iops->set_ext_attr) {
+                EXIT;
+                return error;
+        }
+
+        error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); 
+        if (error) {
+                EXIT;
+                return error;
+        }
+
+        
+        handle = presto_trans_start(fset,dentry->d_inode,KML_OPCODE_SETEXTATTR);
+        if ( IS_ERR(handle) ) {
+                CERROR("presto_do_set_ext_attr: no space for transaction\n");
+                presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
+                return -ENOSPC;
+        }
+
+        /* We first "truncate" name to the maximum allowable in presto */
+        /* This simulates the strncpy_from_use code in fs/ext_attr.c */
+        strlcpy(temp,name,sizeof(temp));
+
+        /* Pass down to cache*/
+        error = iops->set_ext_attr(inode,temp,buffer,buffer_len,flags);
+        if (error) {
+                EXIT;
+                goto exit;
+        }
+
+#if 0 /* was a broken check for Posix ACLs */
+        /* Reset mode if specified*/
+        /* XXX: when we do native acl support, move this code out! */
+        if (mode != NULL) {
+                error = presto_setmode(fset, dentry, *mode);
+                if (error) { 
+                    EXIT;
+                    goto exit;
+                }
+        }
+#endif
+
+        /* Reset ctime. Only inode change time (ctime) is affected */
+        error = presto_settime(fset, NULL, NULL, dentry, info, ATTR_CTIME);
+        if (error) { 
+                EXIT;
+                goto exit;
+        }
+
+        if (flags & EXT_ATTR_FLAG_USER) {
+                CERROR(" USER flag passed to presto_do_set_ext_attr!\n");
+                BUG();
+        }
+
+        /* We are here, so set_ext_attr succeeded. We no longer need to keep
+         * track of EXT_ATTR_FLAG_{EXISTS,CREATE}, instead, we will force
+         * the attribute value during log replay. -SHP
+         */
+        flags &= ~(EXT_ATTR_FLAG_EXISTS | EXT_ATTR_FLAG_CREATE);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x10);
+        if ( presto_do_kml(info, dentry) )
+                error = presto_journal_set_ext_attr
+                        (&rec, fset, dentry, &ver, name, buffer, 
+                         buffer_len, flags);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x20);
+        if ( presto_do_rcvd(info, dentry) )
+                error = presto_write_last_rcvd(&rec, fset, info);
+
+        presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x30);
+        EXIT;
+exit:
+        presto_release_space(fset->fset_cache, PRESTO_REQHIGH); 
+        presto_trans_commit(fset, handle);
+
+        return error;
+}
+#endif
diff --git a/fs/xfs/linux/kmem.h b/fs/xfs/linux/kmem.h
new file mode 100644 (file)
index 0000000..c9df164
--- /dev/null
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_KMEM_H__
+#define __XFS_SUPPORT_KMEM_H__
+
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+/*
+ * Cutoff point to use vmalloc instead of kmalloc.
+ */
+#define MAX_SLAB_SIZE  0x10000
+
+/*
+ * XFS uses slightly different names for these due to the
+ * IRIX heritage.
+ */
+#define        kmem_zone       kmem_cache_s
+#define kmem_zone_t    kmem_cache_t
+
+#define KM_SLEEP       0x0001
+#define KM_NOSLEEP     0x0002
+#define KM_NOFS                0x0004
+
+typedef unsigned long xfs_pflags_t;
+
+#define PFLAGS_TEST_FSTRANS()           (current->flags & PF_FSTRANS)
+
+/* these could be nested, so we save state */
+#define PFLAGS_SET_FSTRANS(STATEP) do {        \
+       *(STATEP) = current->flags;     \
+       current->flags |= PF_FSTRANS;   \
+} while (0)
+
+#define PFLAGS_CLEAR_FSTRANS(STATEP) do { \
+       *(STATEP) = current->flags;     \
+       current->flags &= ~PF_FSTRANS;  \
+} while (0)
+
+/* Restore the PF_FSTRANS state to what was saved in STATEP */
+#define PFLAGS_RESTORE_FSTRANS(STATEP) do {                    \
+       current->flags = ((current->flags & ~PF_FSTRANS) |      \
+                         (*(STATEP) & PF_FSTRANS));            \
+} while (0)
+
+#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \
+       *(NSTATEP) = *(OSTATEP);        \
+} while (0)
+
+/*
+ * XXX get rid of the unconditional  __GFP_NOFAIL by adding
+ * a KM_FAIL flag and using it where we're allowed to fail.
+ */
+static __inline unsigned int
+kmem_flags_convert(int flags)
+{
+       int lflags;
+
+#if DEBUG
+       if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS))) {
+               printk(KERN_WARNING
+                   "XFS: memory allocation with wrong flags (%x)\n", flags);
+               BUG();
+       }
+#endif
+
+       lflags = (flags & KM_NOSLEEP) ? GFP_ATOMIC : (GFP_KERNEL|__GFP_NOFAIL);
+
+       /* avoid recusive callbacks to filesystem during transactions */
+       if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS))
+               lflags &= ~__GFP_FS;
+
+       return lflags;
+}
+
+static __inline void *
+kmem_alloc(size_t size, int flags)
+{
+       if (unlikely(MAX_SLAB_SIZE < size))
+               /* Avoid doing filesystem sensitive stuff to get this */
+               return __vmalloc(size, kmem_flags_convert(flags), PAGE_KERNEL);
+       return kmalloc(size, kmem_flags_convert(flags));
+}
+
+static __inline void *
+kmem_zalloc(size_t size, int flags)
+{
+       void *ptr = kmem_alloc(size, flags);
+       if (likely(ptr != NULL))
+               memset(ptr, 0, size);
+       return ptr;
+}
+
+static __inline void
+kmem_free(void *ptr, size_t size)
+{
+       if (unlikely((unsigned long)ptr < VMALLOC_START ||
+                    (unsigned long)ptr >= VMALLOC_END))
+               kfree(ptr);
+       else
+               vfree(ptr);
+}
+
+static __inline void *
+kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
+{
+       void *new = kmem_alloc(newsize, flags);
+
+       if (likely(ptr != NULL)) {
+               if (likely(new != NULL))
+                       memcpy(new, ptr, min(oldsize, newsize));
+               kmem_free(ptr, oldsize);
+       }
+
+       return new;
+}
+
+static __inline kmem_zone_t *
+kmem_zone_init(int size, char *zone_name)
+{
+       return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL);
+}
+
+static __inline void *
+kmem_zone_alloc(kmem_zone_t *zone, int flags)
+{
+       return kmem_cache_alloc(zone, kmem_flags_convert(flags));
+}
+
+static __inline void *
+kmem_zone_zalloc(kmem_zone_t *zone, int flags)
+{
+       void *ptr = kmem_zone_alloc(zone, flags);
+       if (likely(ptr != NULL))
+               memset(ptr, 0, kmem_cache_size(zone));
+       return ptr;
+}
+
+static __inline void
+kmem_zone_free(kmem_zone_t *zone, void *ptr)
+{
+       kmem_cache_free(zone, ptr);
+}
+
+typedef struct shrinker *kmem_shaker_t;
+typedef int (*kmem_shake_func_t)(int, unsigned int);
+
+static __inline kmem_shaker_t
+kmem_shake_register(kmem_shake_func_t sfunc)
+{
+       return set_shrinker(DEFAULT_SEEKS, sfunc);
+}
+
+static __inline void
+kmem_shake_deregister(kmem_shaker_t shrinker)
+{
+       remove_shrinker(shrinker);
+}
+
+static __inline int
+kmem_shake_allow(unsigned int gfp_mask)
+{
+       return (gfp_mask & __GFP_WAIT);
+}
+
+#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux/mrlock.h b/fs/xfs/linux/mrlock.h
new file mode 100644 (file)
index 0000000..d2c11a0
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_MRLOCK_H__
+#define __XFS_SUPPORT_MRLOCK_H__
+
+#include <linux/rwsem.h>
+
+enum { MR_NONE, MR_ACCESS, MR_UPDATE };
+
+typedef struct {
+       struct rw_semaphore     mr_lock;
+       int                     mr_writer;
+} mrlock_t;
+
+#define mrinit(mrp, name)      \
+       ( (mrp)->mr_writer = 0, init_rwsem(&(mrp)->mr_lock) )
+#define mrlock_init(mrp, t,n,s)        mrinit(mrp, n)
+#define mrfree(mrp)            do { } while (0)
+#define mraccess(mrp)          mraccessf(mrp, 0)
+#define mrupdate(mrp)          mrupdatef(mrp, 0)
+
+static inline void mraccessf(mrlock_t *mrp, int flags)
+{
+       down_read(&mrp->mr_lock);
+}
+
+static inline void mrupdatef(mrlock_t *mrp, int flags)
+{
+       down_write(&mrp->mr_lock);
+       mrp->mr_writer = 1;
+}
+
+static inline int mrtryaccess(mrlock_t *mrp)
+{
+       return down_read_trylock(&mrp->mr_lock);
+}
+
+static inline int mrtryupdate(mrlock_t *mrp)
+{
+       if (!down_write_trylock(&mrp->mr_lock))
+               return 0;
+       mrp->mr_writer = 1;
+       return 1;
+}
+
+static inline void mrunlock(mrlock_t *mrp)
+{
+       if (mrp->mr_writer) {
+               mrp->mr_writer = 0;
+               up_write(&mrp->mr_lock);
+       } else {
+               up_read(&mrp->mr_lock);
+       }
+}
+
+static inline void mrdemote(mrlock_t *mrp)
+{
+       mrp->mr_writer = 0;
+       downgrade_write(&mrp->mr_lock);
+}
+
+#ifdef DEBUG
+/*
+ * Debug-only routine, without some platform-specific asm code, we can
+ * now only answer requests regarding whether we hold the lock for write
+ * (reader state is outside our visibility, we only track writer state).
+ * Note: means !ismrlocked would give false positivies, so don't do that.
+ */
+static inline int ismrlocked(mrlock_t *mrp, int type)
+{
+       if (mrp && type == MR_UPDATE)
+               return mrp->mr_writer;
+       return 1;
+}
+#endif
+
+#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux/mutex.h b/fs/xfs/linux/mutex.h
new file mode 100644 (file)
index 0000000..0b296bb
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_MUTEX_H__
+#define __XFS_SUPPORT_MUTEX_H__
+
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+
+/*
+ * Map the mutex'es from IRIX to Linux semaphores.
+ *
+ * Destroy just simply initializes to -99 which should block all other
+ * callers.
+ */
+#define MUTEX_DEFAULT          0x0
+typedef struct semaphore       mutex_t;
+
+#define mutex_init(lock, type, name)           sema_init(lock, 1)
+#define mutex_destroy(lock)                    sema_init(lock, -99)
+#define mutex_lock(lock, num)                  down(lock)
+#define mutex_trylock(lock)                    (down_trylock(lock) ? 0 : 1)
+#define mutex_unlock(lock)                     up(lock)
+
+#endif /* __XFS_SUPPORT_MUTEX_H__ */
diff --git a/fs/xfs/linux/sema.h b/fs/xfs/linux/sema.h
new file mode 100644 (file)
index 0000000..30b67b4
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SEMA_H__
+#define __XFS_SUPPORT_SEMA_H__
+
+#include <linux/time.h>
+#include <linux/wait.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+
+/*
+ * sema_t structure just maps to struct semaphore in Linux kernel.
+ */
+
+typedef struct semaphore sema_t;
+
+#define init_sema(sp, val, c, d)       sema_init(sp, val)
+#define initsema(sp, val)              sema_init(sp, val)
+#define initnsema(sp, val, name)       sema_init(sp, val)
+#define psema(sp, b)                   down(sp)
+#define vsema(sp)                      up(sp)
+#define valusema(sp)                   (atomic_read(&(sp)->count))
+#define freesema(sema)
+
+/*
+ * Map cpsema (try to get the sema) to down_trylock. We need to switch
+ * the return values since cpsema returns 1 (acquired) 0 (failed) and
+ * down_trylock returns the reverse 0 (acquired) 1 (failed).
+ */
+
+#define cpsema(sp)                     (down_trylock(sp) ? 0 : 1)
+
+/*
+ * Didn't do cvsema(sp). Not sure how to map this to up/down/...
+ * It does a vsema if the values is < 0 other wise nothing.
+ */
+
+#endif /* __XFS_SUPPORT_SEMA_H__ */
diff --git a/fs/xfs/linux/spin.h b/fs/xfs/linux/spin.h
new file mode 100644 (file)
index 0000000..80a3a6b
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SPIN_H__
+#define __XFS_SUPPORT_SPIN_H__
+
+#include <linux/sched.h>       /* preempt needs this */
+#include <linux/spinlock.h>
+
+/*
+ * Map lock_t from IRIX to Linux spinlocks.
+ *
+ * Note that linux turns on/off spinlocks depending on CONFIG_SMP.
+ * We don't need to worry about SMP or not here.
+ */
+
+#define SPLDECL(s)             unsigned long s
+
+typedef spinlock_t lock_t;
+
+#define spinlock_init(lock, name)      spin_lock_init(lock)
+#define        spinlock_destroy(lock)
+
+static inline unsigned long mutex_spinlock(lock_t *lock)
+{
+       spin_lock(lock);
+       return 0;
+}
+
+/*ARGSUSED*/
+static inline void mutex_spinunlock(lock_t *lock, unsigned long s)
+{
+       spin_unlock(lock);
+}
+
+static inline void nested_spinlock(lock_t *lock)
+{
+       spin_lock(lock);
+}
+
+static inline void nested_spinunlock(lock_t *lock)
+{
+       spin_unlock(lock);
+}
+
+#endif /* __XFS_SUPPORT_SPIN_H__ */
diff --git a/fs/xfs/linux/sv.h b/fs/xfs/linux/sv.h
new file mode 100644 (file)
index 0000000..821d316
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SV_H__
+#define __XFS_SUPPORT_SV_H__
+
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+/*
+ * Synchronisation variables.
+ *
+ * (Parameters "pri", "svf" and "rts" are not implemented)
+ */
+
+typedef struct sv_s {
+       wait_queue_head_t waiters;
+} sv_t;
+
+#define SV_FIFO                0x0             /* sv_t is FIFO type */
+#define SV_LIFO                0x2             /* sv_t is LIFO type */
+#define SV_PRIO                0x4             /* sv_t is PRIO type */
+#define SV_KEYED       0x6             /* sv_t is KEYED type */
+#define SV_DEFAULT      SV_FIFO
+
+
+static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
+                            unsigned long timeout)
+{
+       DECLARE_WAITQUEUE(wait, current);
+
+       add_wait_queue_exclusive(&sv->waiters, &wait);
+       __set_current_state(state);
+       spin_unlock(lock);
+
+       schedule_timeout(timeout);
+
+       remove_wait_queue(&sv->waiters, &wait);
+}
+
+#define init_sv(sv,type,name,flag) \
+       init_waitqueue_head(&(sv)->waiters)
+#define sv_init(sv,flag,name) \
+       init_waitqueue_head(&(sv)->waiters)
+#define sv_destroy(sv) \
+       /*NOTHING*/
+#define sv_wait(sv, pri, lock, s) \
+       _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
+#define sv_wait_sig(sv, pri, lock, s)   \
+       _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
+#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \
+       _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts))
+#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \
+       _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts))
+#define sv_signal(sv) \
+       wake_up(&(sv)->waiters)
+#define sv_broadcast(sv) \
+       wake_up_all(&(sv)->waiters)
+
+#endif /* __XFS_SUPPORT_SV_H__ */
diff --git a/fs/xfs/linux/time.h b/fs/xfs/linux/time.h
new file mode 100644 (file)
index 0000000..109b5c0
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_TIME_H__
+#define __XFS_SUPPORT_TIME_H__
+
+#include <linux/sched.h>
+#include <linux/time.h>
+
+typedef struct timespec timespec_t;
+
+static inline void delay(long ticks)
+{
+       current->state = TASK_UNINTERRUPTIBLE;
+       schedule_timeout(ticks);
+}
+
+static inline void nanotime(struct timespec *tvp)
+{
+       *tvp = CURRENT_TIME;
+}
+
+#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c
new file mode 100644 (file)
index 0000000..3afc61d
--- /dev/null
@@ -0,0 +1,1276 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.         Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_trans.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_rw.h"
+#include "xfs_iomap.h"
+#include <linux/mpage.h>
+
+STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
+STATIC void xfs_convert_page(struct inode *, struct page *,
+                               xfs_iomap_t *, void *, int, int);
+
+#if defined(XFS_RW_TRACE)
+void
+xfs_page_trace(
+       int             tag,
+       struct inode    *inode,
+       struct page     *page,
+       int             mask)
+{
+       xfs_inode_t     *ip;
+       bhv_desc_t      *bdp;
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+       loff_t          isize = i_size_read(inode);
+       loff_t          offset = page->index << PAGE_CACHE_SHIFT;
+       int             delalloc = -1, unmapped = -1, unwritten = -1;
+
+       if (page_has_buffers(page))
+               xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+
+       bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
+       ip = XFS_BHVTOI(bdp);
+       if (!ip->i_rwtrace)
+               return;
+
+       ktrace_enter(ip->i_rwtrace,
+               (void *)((unsigned long)tag),
+               (void *)ip,
+               (void *)inode,
+               (void *)page,
+               (void *)((unsigned long)mask),
+               (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
+               (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
+               (void *)((unsigned long)((isize >> 32) & 0xffffffff)),
+               (void *)((unsigned long)(isize & 0xffffffff)),
+               (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
+               (void *)((unsigned long)(offset & 0xffffffff)),
+               (void *)((unsigned long)delalloc),
+               (void *)((unsigned long)unmapped),
+               (void *)((unsigned long)unwritten),
+               (void *)NULL,
+               (void *)NULL);
+}
+#else
+#define xfs_page_trace(tag, inode, page, mask)
+#endif
+
+void
+linvfs_unwritten_done(
+       struct buffer_head      *bh,
+       int                     uptodate)
+{
+       xfs_buf_t               *pb = (xfs_buf_t *)bh->b_private;
+
+       ASSERT(buffer_unwritten(bh));
+       bh->b_end_io = NULL;
+       clear_buffer_unwritten(bh);
+       if (!uptodate)
+               pagebuf_ioerror(pb, EIO);
+       if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+               pagebuf_iodone(pb, 1, 1);
+       }
+       end_buffer_async_write(bh, uptodate);
+}
+
+/*
+ * Issue transactions to convert a buffer range from unwritten
+ * to written extents (buffered IO).
+ */
+STATIC void
+linvfs_unwritten_convert(
+       xfs_buf_t       *bp)
+{
+       vnode_t         *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *);
+       int             error;
+
+       BUG_ON(atomic_read(&bp->pb_hold) < 1);
+       VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
+                       BMAPI_UNWRITTEN, NULL, NULL, error);
+       XFS_BUF_SET_FSPRIVATE(bp, NULL);
+       XFS_BUF_CLR_IODONE_FUNC(bp);
+       XFS_BUF_UNDATAIO(bp);
+       iput(LINVFS_GET_IP(vp));
+       pagebuf_iodone(bp, 0, 0);
+}
+
+/*
+ * Issue transactions to convert a buffer range from unwritten
+ * to written extents (direct IO).
+ */
+STATIC void
+linvfs_unwritten_convert_direct(
+       struct inode    *inode,
+       loff_t          offset,
+       ssize_t         size,
+       void            *private)
+{
+       ASSERT(!private || inode == (struct inode *)private);
+
+       /* private indicates an unwritten extent lay beneath this IO,
+        * see linvfs_get_block_core.
+        */
+       if (private && size > 0) {
+               vnode_t *vp = LINVFS_GET_VP(inode);
+               int     error;
+
+               VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
+       }
+}
+
+STATIC int
+xfs_map_blocks(
+       struct inode            *inode,
+       loff_t                  offset,
+       ssize_t                 count,
+       xfs_iomap_t             *iomapp,
+       int                     flags)
+{
+       vnode_t                 *vp = LINVFS_GET_VP(inode);
+       int                     error, niomaps = 1;
+
+       if (((flags & (BMAPI_DIRECT|BMAPI_SYNC)) == BMAPI_DIRECT) &&
+           (offset >= i_size_read(inode)))
+               count = max_t(ssize_t, count, XFS_WRITE_IO_LOG);
+retry:
+       VOP_BMAP(vp, offset, count, flags, iomapp, &niomaps, error);
+       if ((error == EAGAIN) || (error == EIO))
+               return -error;
+       if (unlikely((flags & (BMAPI_WRITE|BMAPI_DIRECT)) ==
+                                       (BMAPI_WRITE|BMAPI_DIRECT) && niomaps &&
+                                       (iomapp->iomap_flags & IOMAP_DELAY))) {
+               flags = BMAPI_ALLOCATE;
+               goto retry;
+       }
+       if (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
+               VMODIFY(vp);
+       }
+       return -error;
+}
+
+/*
+ * Finds the corresponding mapping in block @map array of the
+ * given @offset within a @page.
+ */
+STATIC xfs_iomap_t *
+xfs_offset_to_map(
+       struct page             *page,
+       xfs_iomap_t             *iomapp,
+       unsigned long           offset)
+{
+       loff_t                  full_offset;    /* offset from start of file */
+
+       ASSERT(offset < PAGE_CACHE_SIZE);
+
+       full_offset = page->index;              /* NB: using 64bit number */
+       full_offset <<= PAGE_CACHE_SHIFT;       /* offset from file start */
+       full_offset += offset;                  /* offset from page start */
+
+       if (full_offset < iomapp->iomap_offset)
+               return NULL;
+       if (iomapp->iomap_offset + iomapp->iomap_bsize > full_offset)
+               return iomapp;
+       return NULL;
+}
+
+STATIC void
+xfs_map_at_offset(
+       struct page             *page,
+       struct buffer_head      *bh,
+       unsigned long           offset,
+       int                     block_bits,
+       xfs_iomap_t             *iomapp)
+{
+       xfs_daddr_t             bn;
+       loff_t                  delta;
+       int                     sector_shift;
+
+       ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
+       ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
+       ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
+
+       delta = page->index;
+       delta <<= PAGE_CACHE_SHIFT;
+       delta += offset;
+       delta -= iomapp->iomap_offset;
+       delta >>= block_bits;
+
+       sector_shift = block_bits - BBSHIFT;
+       bn = iomapp->iomap_bn >> sector_shift;
+       bn += delta;
+       ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
+
+       lock_buffer(bh);
+       bh->b_blocknr = bn;
+       bh->b_bdev = iomapp->iomap_target->pbr_bdev;
+       set_buffer_mapped(bh);
+       clear_buffer_delay(bh);
+}
+
+/*
+ * Look for a page at index which is unlocked and contains our
+ * unwritten extent flagged buffers at its head.  Returns page
+ * locked and with an extra reference count, and length of the
+ * unwritten extent component on this page that we can write,
+ * in units of filesystem blocks.
+ */
+STATIC struct page *
+xfs_probe_unwritten_page(
+       struct address_space    *mapping,
+       pgoff_t                 index,
+       xfs_iomap_t             *iomapp,
+       xfs_buf_t               *pb,
+       unsigned long           max_offset,
+       unsigned long           *fsbs,
+       unsigned int            bbits)
+{
+       struct page             *page;
+
+       page = find_trylock_page(mapping, index);
+       if (!page)
+               return 0;
+       if (PageWriteback(page))
+               goto out;
+
+       if (page->mapping && page_has_buffers(page)) {
+               struct buffer_head      *bh, *head;
+               unsigned long           p_offset = 0;
+
+               *fsbs = 0;
+               bh = head = page_buffers(page);
+               do {
+                       if (!buffer_unwritten(bh))
+                               break;
+                       if (!xfs_offset_to_map(page, iomapp, p_offset))
+                               break;
+                       if (p_offset >= max_offset)
+                               break;
+                       xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
+                       set_buffer_unwritten_io(bh);
+                       bh->b_private = pb;
+                       p_offset += bh->b_size;
+                       (*fsbs)++;
+               } while ((bh = bh->b_this_page) != head);
+
+               if (p_offset)
+                       return page;
+       }
+
+out:
+       unlock_page(page);
+       return NULL;
+}
+
+/*
+ * Look for a page at index which is unlocked and not mapped
+ * yet - clustering for mmap write case.
+ */
+STATIC unsigned int
+xfs_probe_unmapped_page(
+       struct address_space    *mapping,
+       pgoff_t                 index,
+       unsigned int            pg_offset)
+{
+       struct page             *page;
+       int                     ret = 0;
+
+       page = find_trylock_page(mapping, index);
+       if (!page)
+               return 0;
+       if (PageWriteback(page))
+               goto out;
+
+       if (page->mapping && PageDirty(page)) {
+               if (page_has_buffers(page)) {
+                       struct buffer_head      *bh, *head;
+
+                       bh = head = page_buffers(page);
+                       do {
+                               if (buffer_mapped(bh) || !buffer_uptodate(bh))
+                                       break;
+                               ret += bh->b_size;
+                               if (ret >= pg_offset)
+                                       break;
+                       } while ((bh = bh->b_this_page) != head);
+               } else
+                       ret = PAGE_CACHE_SIZE;
+       }
+
+out:
+       unlock_page(page);
+       return ret;
+}
+
+STATIC unsigned int
+xfs_probe_unmapped_cluster(
+       struct inode            *inode,
+       struct page             *startpage,
+       struct buffer_head      *bh,
+       struct buffer_head      *head)
+{
+       pgoff_t                 tindex, tlast, tloff;
+       unsigned int            pg_offset, len, total = 0;
+       struct address_space    *mapping = inode->i_mapping;
+
+       /* First sum forwards in this page */
+       do {
+               if (buffer_mapped(bh))
+                       break;
+               total += bh->b_size;
+       } while ((bh = bh->b_this_page) != head);
+
+       /* If we reached the end of the page, sum forwards in
+        * following pages.
+        */
+       if (bh == head) {
+               tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+               /* Prune this back to avoid pathological behavior */
+               tloff = min(tlast, startpage->index + 64);
+               for (tindex = startpage->index + 1; tindex < tloff; tindex++) {
+                       len = xfs_probe_unmapped_page(mapping, tindex,
+                                                       PAGE_CACHE_SIZE);
+                       if (!len)
+                               return total;
+                       total += len;
+               }
+               if (tindex == tlast &&
+                   (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+                       total += xfs_probe_unmapped_page(mapping,
+                                                       tindex, pg_offset);
+               }
+       }
+       return total;
+}
+
+/*
+ * Probe for a given page (index) in the inode and test if it is delayed
+ * and without unwritten buffers.  Returns page locked and with an extra
+ * reference count.
+ */
+STATIC struct page *
+xfs_probe_delalloc_page(
+       struct inode            *inode,
+       pgoff_t                 index)
+{
+       struct page             *page;
+
+       page = find_trylock_page(inode->i_mapping, index);
+       if (!page)
+               return NULL;
+       if (PageWriteback(page))
+               goto out;
+
+       if (page->mapping && page_has_buffers(page)) {
+               struct buffer_head      *bh, *head;
+               int                     acceptable = 0;
+
+               bh = head = page_buffers(page);
+               do {
+                       if (buffer_unwritten(bh)) {
+                               acceptable = 0;
+                               break;
+                       } else if (buffer_delay(bh)) {
+                               acceptable = 1;
+                       }
+               } while ((bh = bh->b_this_page) != head);
+
+               if (acceptable)
+                       return page;
+       }
+
+out:
+       unlock_page(page);
+       return NULL;
+}
+
+STATIC int
+xfs_map_unwritten(
+       struct inode            *inode,
+       struct page             *start_page,
+       struct buffer_head      *head,
+       struct buffer_head      *curr,
+       unsigned long           p_offset,
+       int                     block_bits,
+       xfs_iomap_t             *iomapp,
+       int                     startio,
+       int                     all_bh)
+{
+       struct buffer_head      *bh = curr;
+       xfs_iomap_t             *tmp;
+       xfs_buf_t               *pb;
+       loff_t                  offset, size;
+       unsigned long           nblocks = 0;
+
+       offset = start_page->index;
+       offset <<= PAGE_CACHE_SHIFT;
+       offset += p_offset;
+
+       /* get an "empty" pagebuf to manage IO completion
+        * Proper values will be set before returning */
+       pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0);
+       if (!pb)
+               return -EAGAIN;
+
+       /* Take a reference to the inode to prevent it from
+        * being reclaimed while we have outstanding unwritten
+        * extent IO on it.
+        */
+       if ((igrab(inode)) != inode) {
+               pagebuf_free(pb);
+               return -EAGAIN;
+       }
+
+       /* Set the count to 1 initially, this will stop an I/O
+        * completion callout which happens before we have started
+        * all the I/O from calling pagebuf_iodone too early.
+        */
+       atomic_set(&pb->pb_io_remaining, 1);
+
+       /* First map forwards in the page consecutive buffers
+        * covering this unwritten extent
+        */
+       do {
+               if (!buffer_unwritten(bh))
+                       break;
+               tmp = xfs_offset_to_map(start_page, iomapp, p_offset);
+               if (!tmp)
+                       break;
+               xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
+               set_buffer_unwritten_io(bh);
+               bh->b_private = pb;
+               p_offset += bh->b_size;
+               nblocks++;
+       } while ((bh = bh->b_this_page) != head);
+
+       atomic_add(nblocks, &pb->pb_io_remaining);
+
+       /* If we reached the end of the page, map forwards in any
+        * following pages which are also covered by this extent.
+        */
+       if (bh == head) {
+               struct address_space    *mapping = inode->i_mapping;
+               pgoff_t                 tindex, tloff, tlast;
+               unsigned long           bs;
+               unsigned int            pg_offset, bbits = inode->i_blkbits;
+               struct page             *page;
+
+               tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+               tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
+               tloff = min(tlast, tloff);
+               for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
+                       page = xfs_probe_unwritten_page(mapping,
+                                               tindex, iomapp, pb,
+                                               PAGE_CACHE_SIZE, &bs, bbits);
+                       if (!page)
+                               break;
+                       nblocks += bs;
+                       atomic_add(bs, &pb->pb_io_remaining);
+                       xfs_convert_page(inode, page, iomapp, pb,
+                                                       startio, all_bh);
+                       /* stop if converting the next page might add
+                        * enough blocks that the corresponding byte
+                        * count won't fit in our ulong page buf length */
+                       if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
+                               goto enough;
+               }
+
+               if (tindex == tlast &&
+                   (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
+                       page = xfs_probe_unwritten_page(mapping,
+                                                       tindex, iomapp, pb,
+                                                       pg_offset, &bs, bbits);
+                       if (page) {
+                               nblocks += bs;
+                               atomic_add(bs, &pb->pb_io_remaining);
+                               xfs_convert_page(inode, page, iomapp, pb,
+                                                       startio, all_bh);
+                               if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
+                                       goto enough;
+                       }
+               }
+       }
+
+enough:
+       size = nblocks;         /* NB: using 64bit number here */
+       size <<= block_bits;    /* convert fsb's to byte range */
+
+       XFS_BUF_DATAIO(pb);
+       XFS_BUF_ASYNC(pb);
+       XFS_BUF_SET_SIZE(pb, size);
+       XFS_BUF_SET_COUNT(pb, size);
+       XFS_BUF_SET_OFFSET(pb, offset);
+       XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
+       XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
+
+       if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+               pagebuf_iodone(pb, 1, 1);
+       }
+
+       return 0;
+}
+
+STATIC void
+xfs_submit_page(
+       struct page             *page,
+       struct buffer_head      *bh_arr[],
+       int                     cnt)
+{
+       struct buffer_head      *bh;
+       int                     i;
+
+       BUG_ON(PageWriteback(page));
+       set_page_writeback(page);
+       clear_page_dirty(page);
+       unlock_page(page);
+
+       if (cnt) {
+               for (i = 0; i < cnt; i++) {
+                       bh = bh_arr[i];
+                       mark_buffer_async_write(bh);
+                       if (buffer_unwritten(bh))
+                               set_buffer_unwritten_io(bh);
+                       set_buffer_uptodate(bh);
+                       clear_buffer_dirty(bh);
+               }
+
+               for (i = 0; i < cnt; i++)
+                       submit_bh(WRITE, bh_arr[i]);
+       } else
+               end_page_writeback(page);
+}
+
+/*
+ * Allocate & map buffers for page given the extent map. Write it out.
+ * except for the original page of a writepage, this is called on
+ * delalloc/unwritten pages only, for the original page it is possible
+ * that the page has no mapping at all.
+ */
+STATIC void
+xfs_convert_page(
+       struct inode            *inode,
+       struct page             *page,
+       xfs_iomap_t             *iomapp,
+       void                    *private,
+       int                     startio,
+       int                     all_bh)
+{
+       struct buffer_head      *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
+       xfs_iomap_t             *mp = iomapp, *tmp;
+       unsigned long           end, offset;
+       pgoff_t                 end_index;
+       int                     i = 0, index = 0;
+       int                     bbits = inode->i_blkbits;
+
+       end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+       if (page->index < end_index) {
+               end = PAGE_CACHE_SIZE;
+       } else {
+               end = i_size_read(inode) & (PAGE_CACHE_SIZE-1);
+       }
+       bh = head = page_buffers(page);
+       do {
+               offset = i << bbits;
+               if (!(PageUptodate(page) || buffer_uptodate(bh)))
+                       continue;
+               if (buffer_mapped(bh) && all_bh &&
+                   !buffer_unwritten(bh) && !buffer_delay(bh)) {
+                       if (startio && (offset < end)) {
+                               lock_buffer(bh);
+                               bh_arr[index++] = bh;
+                       }
+                       continue;
+               }
+               tmp = xfs_offset_to_map(page, mp, offset);
+               if (!tmp)
+                       continue;
+               ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));
+               ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));
+
+               /* If this is a new unwritten extent buffer (i.e. one
+                * that we haven't passed in private data for, we must
+                * now map this buffer too.
+                */
+               if (buffer_unwritten(bh) && !bh->b_end_io) {
+                       ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN);
+                       xfs_map_unwritten(inode, page, head, bh,
+                                       offset, bbits, tmp, startio, all_bh);
+               } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) {
+                       xfs_map_at_offset(page, bh, offset, bbits, tmp);
+                       if (buffer_unwritten(bh)) {
+                               set_buffer_unwritten_io(bh);
+                               bh->b_private = private;
+                               ASSERT(private);
+                       }
+               }
+               if (startio && (offset < end)) {
+                       bh_arr[index++] = bh;
+               } else {
+                       set_buffer_dirty(bh);
+                       unlock_buffer(bh);
+                       mark_buffer_dirty(bh);
+               }
+       } while (i++, (bh = bh->b_this_page) != head);
+
+       if (startio) {
+               xfs_submit_page(page, bh_arr, index);
+       } else {
+               unlock_page(page);
+       }
+}
+
+/*
+ * Convert & write out a cluster of pages in the same extent as defined
+ * by mp and following the start page.
+ */
+STATIC void
+xfs_cluster_write(
+       struct inode            *inode,
+       pgoff_t                 tindex,
+       xfs_iomap_t             *iomapp,
+       int                     startio,
+       int                     all_bh)
+{
+       pgoff_t                 tlast;
+       struct page             *page;
+
+       tlast = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
+       for (; tindex < tlast; tindex++) {
+               page = xfs_probe_delalloc_page(inode, tindex);
+               if (!page)
+                       break;
+               xfs_convert_page(inode, page, iomapp, NULL, startio, all_bh);
+       }
+}
+
+/*
+ * Calling this without startio set means we are being asked to make a dirty
+ * page ready for freeing it's buffers.  When called with startio set then
+ * we are coming from writepage.
+ *
+ * When called with startio set it is important that we write the WHOLE
+ * page if possible.
+ * The bh->b_state's cannot know if any of the blocks or which block for
+ * that matter are dirty due to mmap writes, and therefore bh uptodate is
+ * only vaild if the page itself isn't completely uptodate.  Some layers
+ * may clear the page dirty flag prior to calling write page, under the
+ * assumption the entire page will be written out; by not writing out the
+ * whole page the page can be reused before all valid dirty data is
+ * written out.  Note: in the case of a page that has been dirty'd by
+ * mapwrite and but partially setup by block_prepare_write the
+ * bh->b_states's will not agree and only ones setup by BPW/BCW will have
+ * valid state, thus the whole page must be written out thing.
+ */
+
+STATIC int
+xfs_page_state_convert(
+       struct inode    *inode,
+       struct page     *page,
+       int             startio,
+       int             unmapped) /* also implies page uptodate */
+{
+       struct buffer_head      *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
+       xfs_iomap_t             *iomp, iomap;
+       unsigned long           p_offset = 0;
+       pgoff_t                 end_index;
+       loff_t                  offset;
+       unsigned long long      end_offset;
+       int                     len, err, i, cnt = 0, uptodate = 1;
+       int                     flags = startio ? 0 : BMAPI_TRYLOCK;
+       int                     page_dirty = 1;
+
+
+       /* Are we off the end of the file ? */
+       end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+       if (page->index >= end_index) {
+               if ((page->index >= end_index + 1) ||
+                   !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+                       err = -EIO;
+                       goto error;
+               }
+       }
+
+       offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+       end_offset = min_t(unsigned long long,
+                       offset + PAGE_CACHE_SIZE, i_size_read(inode));
+
+       bh = head = page_buffers(page);
+       iomp = NULL;
+
+       len = bh->b_size;
+       do {
+               if (offset >= end_offset)
+                       break;
+               if (!buffer_uptodate(bh))
+                       uptodate = 0;
+               if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio)
+                       continue;
+
+               if (iomp) {
+                       iomp = xfs_offset_to_map(page, &iomap, p_offset);
+               }
+
+               /*
+                * First case, map an unwritten extent and prepare for
+                * extent state conversion transaction on completion.
+                */
+               if (buffer_unwritten(bh)) {
+                       if (!iomp) {
+                               err = xfs_map_blocks(inode, offset, len, &iomap,
+                                               BMAPI_READ|BMAPI_IGNSTATE);
+                               if (err) {
+                                       goto error;
+                               }
+                               iomp = xfs_offset_to_map(page, &iomap,
+                                                               p_offset);
+                       }
+                       if (iomp && startio) {
+                               if (!bh->b_end_io) {
+                                       err = xfs_map_unwritten(inode, page,
+                                                       head, bh, p_offset,
+                                                       inode->i_blkbits, iomp,
+                                                       startio, unmapped);
+                                       if (err) {
+                                               goto error;
+                                       }
+                               }
+                               bh_arr[cnt++] = bh;
+                               page_dirty = 0;
+                       }
+               /*
+                * Second case, allocate space for a delalloc buffer.
+                * We can return EAGAIN here in the release page case.
+                */
+               } else if (buffer_delay(bh)) {
+                       if (!iomp) {
+                               err = xfs_map_blocks(inode, offset, len, &iomap,
+                                               BMAPI_ALLOCATE | flags);
+                               if (err) {
+                                       goto error;
+                               }
+                               iomp = xfs_offset_to_map(page, &iomap,
+                                                               p_offset);
+                       }
+                       if (iomp) {
+                               xfs_map_at_offset(page, bh, p_offset,
+                                               inode->i_blkbits, iomp);
+                               if (startio) {
+                                       bh_arr[cnt++] = bh;
+                               } else {
+                                       set_buffer_dirty(bh);
+                                       unlock_buffer(bh);
+                                       mark_buffer_dirty(bh);
+                               }
+                               page_dirty = 0;
+                       }
+               } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
+                          (unmapped || startio)) {
+
+                       if (!buffer_mapped(bh)) {
+                               int     size;
+
+                               /*
+                                * Getting here implies an unmapped buffer
+                                * was found, and we are in a path where we
+                                * need to write the whole page out.
+                                */
+                               if (!iomp) {
+                                       size = xfs_probe_unmapped_cluster(
+                                                       inode, page, bh, head);
+                                       err = xfs_map_blocks(inode, offset,
+                                                       size, &iomap,
+                                                       BMAPI_WRITE|BMAPI_MMAP);
+                                       if (err) {
+                                               goto error;
+                                       }
+                                       iomp = xfs_offset_to_map(page, &iomap,
+                                                                    p_offset);
+                               }
+                               if (iomp) {
+                                       xfs_map_at_offset(page,
+                                                       bh, p_offset,
+                                                       inode->i_blkbits, iomp);
+                                       if (startio) {
+                                               bh_arr[cnt++] = bh;
+                                       } else {
+                                               set_buffer_dirty(bh);
+                                               unlock_buffer(bh);
+                                               mark_buffer_dirty(bh);
+                                       }
+                                       page_dirty = 0;
+                               }
+                       } else if (startio) {
+                               if (buffer_uptodate(bh) &&
+                                   !test_and_set_bit(BH_Lock, &bh->b_state)) {
+                                       bh_arr[cnt++] = bh;
+                                       page_dirty = 0;
+                               }
+                       }
+               }
+       } while (offset += len, p_offset += len,
+               ((bh = bh->b_this_page) != head));
+
+       if (uptodate && bh == head)
+               SetPageUptodate(page);
+
+       if (startio)
+               xfs_submit_page(page, bh_arr, cnt);
+
+       if (iomp)
+               xfs_cluster_write(inode, page->index + 1, iomp, startio, unmapped);
+
+       return page_dirty;
+
+error:
+       for (i = 0; i < cnt; i++) {
+               unlock_buffer(bh_arr[i]);
+       }
+
+       /*
+        * If it's delalloc and we have nowhere to put it,
+        * throw it away, unless the lower layers told
+        * us to try again.
+        */
+       if (err != -EAGAIN) {
+               if (!unmapped) {
+                       block_invalidatepage(page, 0);
+               }
+               ClearPageUptodate(page);
+       }
+       return err;
+}
+
+STATIC int
+linvfs_get_block_core(
+       struct inode            *inode,
+       sector_t                iblock,
+       unsigned long           blocks,
+       struct buffer_head      *bh_result,
+       int                     create,
+       int                     direct,
+       bmapi_flags_t           flags)
+{
+       vnode_t                 *vp = LINVFS_GET_VP(inode);
+       xfs_iomap_t             iomap;
+       int                     retpbbm = 1;
+       int                     error;
+       ssize_t                 size;
+       loff_t                  offset = (loff_t)iblock << inode->i_blkbits;
+
+       /* If we are doing writes at the end of the file,
+        * allocate in chunks
+        */
+       if (blocks)
+               size = blocks << inode->i_blkbits;
+       else if (create && (offset >= i_size_read(inode)))
+               size = 1 << XFS_WRITE_IO_LOG;
+       else
+               size = 1 << inode->i_blkbits;
+
+       VOP_BMAP(vp, offset, size,
+               create ? flags : BMAPI_READ, &iomap, &retpbbm, error);
+       if (error)
+               return -error;
+
+       if (retpbbm == 0)
+               return 0;
+
+       if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
+               xfs_daddr_t             bn;
+               loff_t                  delta;
+
+               /* For unwritten extents do not report a disk address on
+                * the read case (treat as if we're reading into a hole).
+                */
+               if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) {
+                       delta = offset - iomap.iomap_offset;
+                       delta >>= inode->i_blkbits;
+
+                       bn = iomap.iomap_bn >> (inode->i_blkbits - BBSHIFT);
+                       bn += delta;
+
+                       bh_result->b_blocknr = bn;
+                       bh_result->b_bdev = iomap.iomap_target->pbr_bdev;
+                       set_buffer_mapped(bh_result);
+               }
+               if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
+                       if (direct)
+                               bh_result->b_private = inode;
+                       set_buffer_unwritten(bh_result);
+                       set_buffer_delay(bh_result);
+               }
+       }
+
+       /* If this is a realtime file, data might be on a new device */
+       bh_result->b_bdev = iomap.iomap_target->pbr_bdev;
+
+       /* If we previously allocated a block out beyond eof and
+        * we are now coming back to use it then we will need to
+        * flag it as new even if it has a disk address.
+        */
+       if (create &&
+           ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
+            (offset >= i_size_read(inode)) || (iomap.iomap_flags & IOMAP_NEW))) {
+               set_buffer_new(bh_result);
+       }
+
+       if (iomap.iomap_flags & IOMAP_DELAY) {
+               if (unlikely(direct))
+                       BUG();
+               if (create) {
+                       set_buffer_mapped(bh_result);
+                       set_buffer_uptodate(bh_result);
+               }
+               bh_result->b_bdev = iomap.iomap_target->pbr_bdev;
+               set_buffer_delay(bh_result);
+       }
+
+       if (blocks) {
+               loff_t iosize;
+               iosize = (iomap.iomap_bsize - iomap.iomap_delta);
+               bh_result->b_size =
+                   (ssize_t)min(iosize, (loff_t)(blocks << inode->i_blkbits));
+       }
+
+       return 0;
+}
+
+int
+linvfs_get_block(
+       struct inode            *inode,
+       sector_t                iblock,
+       struct buffer_head      *bh_result,
+       int                     create)
+{
+       return linvfs_get_block_core(inode, iblock, 0, bh_result,
+                                       create, 0, BMAPI_WRITE);
+}
+
+STATIC int
+linvfs_get_block_sync(
+       struct inode            *inode,
+       sector_t                iblock,
+       struct buffer_head      *bh_result,
+       int                     create)
+{
+       return linvfs_get_block_core(inode, iblock, 0, bh_result,
+                                       create, 0, BMAPI_SYNC|BMAPI_WRITE);
+}
+
+STATIC int
+linvfs_get_blocks_direct(
+       struct inode            *inode,
+       sector_t                iblock,
+       unsigned long           max_blocks,
+       struct buffer_head      *bh_result,
+       int                     create)
+{
+       return linvfs_get_block_core(inode, iblock, max_blocks, bh_result,
+                                       create, 1, BMAPI_WRITE|BMAPI_DIRECT);
+}
+
+STATIC ssize_t
+linvfs_direct_IO(
+       int                     rw,
+       struct kiocb            *iocb,
+       const struct iovec      *iov,
+       loff_t                  offset,
+       unsigned long           nr_segs)
+{
+       struct file     *file = iocb->ki_filp;
+       struct inode    *inode = file->f_mapping->host;
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+       xfs_iomap_t     iomap;
+       int             maps = 1;
+       int             error;
+
+       VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error);
+       if (error)
+               return -error;
+
+       return blockdev_direct_IO_no_locking(rw, iocb, inode,
+               iomap.iomap_target->pbr_bdev,
+               iov, offset, nr_segs,
+               linvfs_get_blocks_direct,
+               linvfs_unwritten_convert_direct);
+}
+
+
+STATIC sector_t
+linvfs_bmap(
+       struct address_space    *mapping,
+       sector_t                block)
+{
+       struct inode            *inode = (struct inode *)mapping->host;
+       vnode_t                 *vp = LINVFS_GET_VP(inode);
+       int                     error;
+
+       vn_trace_entry(vp, "linvfs_bmap", (inst_t *)__return_address);
+
+       VOP_RWLOCK(vp, VRWLOCK_READ);
+       VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
+       VOP_RWUNLOCK(vp, VRWLOCK_READ);
+       return generic_block_bmap(mapping, block, linvfs_get_block);
+}
+
+STATIC int
+linvfs_readpage(
+       struct file             *unused,
+       struct page             *page)
+{
+       return mpage_readpage(page, linvfs_get_block);
+}
+
+STATIC int
+linvfs_readpages(
+       struct file             *unused,
+       struct address_space    *mapping,
+       struct list_head        *pages,
+       unsigned                nr_pages)
+{
+       return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block);
+}
+
+STATIC void
+xfs_count_page_state(
+       struct page             *page,
+       int                     *delalloc,
+       int                     *unmapped,
+       int                     *unwritten)
+{
+       struct buffer_head      *bh, *head;
+
+       *delalloc = *unmapped = *unwritten = 0;
+
+       bh = head = page_buffers(page);
+       do {
+               if (buffer_uptodate(bh) && !buffer_mapped(bh))
+                       (*unmapped) = 1;
+               else if (buffer_unwritten(bh) && !buffer_delay(bh))
+                       clear_buffer_unwritten(bh);
+               else if (buffer_unwritten(bh))
+                       (*unwritten) = 1;
+               else if (buffer_delay(bh))
+                       (*delalloc) = 1;
+       } while ((bh = bh->b_this_page) != head);
+}
+
+
+/*
+ * writepage: Called from one of two places:
+ *
+ * 1. we are flushing a delalloc buffer head.
+ *
+ * 2. we are writing out a dirty page. Typically the page dirty
+ *    state is cleared before we get here. In this case is it
+ *    conceivable we have no buffer heads.
+ *
+ * For delalloc space on the page we need to allocate space and
+ * flush it. For unmapped buffer heads on the page we should
+ * allocate space if the page is uptodate. For any other dirty
+ * buffer heads on the page we should flush them.
+ *
+ * If we detect that a transaction would be required to flush
+ * the page, we have to check the process flags first, if we
+ * are already in a transaction or disk I/O during allocations
+ * is off, we need to fail the writepage and redirty the page.
+ */
+
+STATIC int
+linvfs_writepage(
+       struct page             *page,
+       struct writeback_control *wbc)
+{
+       int                     error;
+       int                     need_trans;
+       int                     delalloc, unmapped, unwritten;
+       struct inode            *inode = page->mapping->host;
+
+       xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0);
+
+       /*
+        * We need a transaction if:
+        *  1. There are delalloc buffers on the page
+        *  2. The page is uptodate and we have unmapped buffers
+        *  3. The page is uptodate and we have no buffers
+        *  4. There are unwritten buffers on the page
+        */
+
+       if (!page_has_buffers(page)) {
+               unmapped = 1;
+               need_trans = 1;
+       } else {
+               xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+               if (!PageUptodate(page))
+                       unmapped = 0;
+               need_trans = delalloc + unmapped + unwritten;
+       }
+
+       /*
+        * If we need a transaction and the process flags say
+        * we are already in a transaction, or no IO is allowed
+        * then mark the page dirty again and leave the page
+        * as is.
+        */
+       if (PFLAGS_TEST_FSTRANS() && need_trans)
+               goto out_fail;
+
+       /*
+        * Delay hooking up buffer heads until we have
+        * made our go/no-go decision.
+        */
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+
+       /*
+        * Convert delayed allocate, unwritten or unmapped space
+        * to real space and flush out to disk.
+        */
+       error = xfs_page_state_convert(inode, page, 1, unmapped);
+       if (error == -EAGAIN)
+               goto out_fail;
+       if (unlikely(error < 0))
+               goto out_unlock;
+
+       return 0;
+
+out_fail:
+       set_page_dirty(page);
+       unlock_page(page);
+       return 0;
+out_unlock:
+       unlock_page(page);
+       return error;
+}
+
+/*
+ * Called to move a page into cleanable state - and from there
+ * to be released. Possibly the page is already clean. We always
+ * have buffer heads in this call.
+ *
+ * Returns 0 if the page is ok to release, 1 otherwise.
+ *
+ * Possible scenarios are:
+ *
+ * 1. We are being called to release a page which has been written
+ *    to via regular I/O. buffer heads will be dirty and possibly
+ *    delalloc. If no delalloc buffer heads in this case then we
+ *    can just return zero.
+ *
+ * 2. We are called to release a page which has been written via
+ *    mmap, all we need to do is ensure there is no delalloc
+ *    state in the buffer heads, if not we can let the caller
+ *    free them and we should come back later via writepage.
+ */
+STATIC int
+linvfs_release_page(
+       struct page             *page,
+       int                     gfp_mask)
+{
+       struct inode            *inode = page->mapping->host;
+       int                     dirty, delalloc, unmapped, unwritten;
+
+       xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask);
+
+       xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+       if (!delalloc && !unwritten)
+               goto free_buffers;
+
+       if (!(gfp_mask & __GFP_FS))
+               return 0;
+
+       /* If we are already inside a transaction or the thread cannot
+        * do I/O, we cannot release this page.
+        */
+       if (PFLAGS_TEST_FSTRANS())
+               return 0;
+
+       /*
+        * Convert delalloc space to real space, do not flush the
+        * data out to disk, that will be done by the caller.
+        * Never need to allocate space here - we will always
+        * come back to writepage in that case.
+        */
+       dirty = xfs_page_state_convert(inode, page, 0, 0);
+       if (dirty == 0 && !unwritten)
+               goto free_buffers;
+       return 0;
+
+free_buffers:
+       return try_to_free_buffers(page);
+}
+
+STATIC int
+linvfs_prepare_write(
+       struct file             *file,
+       struct page             *page,
+       unsigned int            from,
+       unsigned int            to)
+{
+       if (file && (file->f_flags & O_SYNC)) {
+               return block_prepare_write(page, from, to,
+                                               linvfs_get_block_sync);
+       } else {
+               return block_prepare_write(page, from, to,
+                                               linvfs_get_block);
+       }
+}
+
+struct address_space_operations linvfs_aops = {
+       .readpage               = linvfs_readpage,
+       .readpages              = linvfs_readpages,
+       .writepage              = linvfs_writepage,
+       .sync_page              = block_sync_page,
+       .releasepage            = linvfs_release_page,
+       .prepare_write          = linvfs_prepare_write,
+       .commit_write           = generic_commit_write,
+       .bmap                   = linvfs_bmap,
+       .direct_IO              = linvfs_direct_IO,
+};
diff --git a/fs/xfs/linux/xfs_buf.c b/fs/xfs/linux/xfs_buf.c
new file mode 100644 (file)
index 0000000..69050a0
--- /dev/null
@@ -0,0 +1,1811 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ *     The xfs_buf.c code provides an abstract buffer cache model on top
+ *     of the Linux page cache.  Cached metadata blocks for a file system
+ *     are hashed to the inode for the block device.  xfs_buf.c assembles
+ *     buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O.
+ *
+ *      Written by Steve Lord, Jim Mostek, Russell Cattelan
+ *                 and Rajagopal Ananthanarayanan ("ananth") at SGI.
+ *
+ */
+
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/bio.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/suspend.h>
+#include <linux/percpu.h>
+
+#include "xfs_linux.h"
+
+#ifndef GFP_READAHEAD
+#define GFP_READAHEAD  (__GFP_NOWARN|__GFP_NORETRY)
+#endif
+
+/*
+ * File wide globals
+ */
+
+STATIC kmem_cache_t *pagebuf_cache;
+STATIC void pagebuf_daemon_wakeup(void);
+STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
+STATIC struct workqueue_struct *pagebuf_logio_workqueue;
+STATIC struct workqueue_struct *pagebuf_dataio_workqueue;
+
+/*
+ * Pagebuf debugging
+ */
+
+#ifdef PAGEBUF_TRACE
+void
+pagebuf_trace(
+       xfs_buf_t       *pb,
+       char            *id,
+       void            *data,
+       void            *ra)
+{
+       ktrace_enter(pagebuf_trace_buf,
+               pb, id,
+               (void *)(unsigned long)pb->pb_flags,
+               (void *)(unsigned long)pb->pb_hold.counter,
+               (void *)(unsigned long)pb->pb_sema.count.counter,
+               (void *)current,
+               data, ra,
+               (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),
+               (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),
+               (void *)(unsigned long)pb->pb_buffer_length,
+               NULL, NULL, NULL, NULL, NULL);
+}
+ktrace_t *pagebuf_trace_buf;
+#define PAGEBUF_TRACE_SIZE     4096
+#define PB_TRACE(pb, id, data) \
+       pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))
+#else
+#define PB_TRACE(pb, id, data) do { } while (0)
+#endif
+
+#ifdef PAGEBUF_LOCK_TRACKING
+# define PB_SET_OWNER(pb)      ((pb)->pb_last_holder = current->pid)
+# define PB_CLEAR_OWNER(pb)    ((pb)->pb_last_holder = -1)
+# define PB_GET_OWNER(pb)      ((pb)->pb_last_holder)
+#else
+# define PB_SET_OWNER(pb)      do { } while (0)
+# define PB_CLEAR_OWNER(pb)    do { } while (0)
+# define PB_GET_OWNER(pb)      do { } while (0)
+#endif
+
+/*
+ * Pagebuf allocation / freeing.
+ */
+
+#define pb_to_gfp(flags) \
+       (((flags) & PBF_READ_AHEAD) ? GFP_READAHEAD : \
+        ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL)
+
+#define pb_to_km(flags) \
+        (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
+
+
+#define pagebuf_allocate(flags) \
+       kmem_zone_alloc(pagebuf_cache, pb_to_km(flags))
+#define pagebuf_deallocate(pb) \
+       kmem_zone_free(pagebuf_cache, (pb));
+
+/*
+ * Pagebuf hashing
+ */
+
+#define NBITS  8
+#define NHASH  (1<<NBITS)
+
+typedef struct {
+       struct list_head        pb_hash;
+       spinlock_t              pb_hash_lock;
+} pb_hash_t;
+
+STATIC pb_hash_t       pbhash[NHASH];
+#define pb_hash(pb)    &pbhash[pb->pb_hash_index]
+
+STATIC int
+_bhash(
+       struct block_device *bdev,
+       loff_t          base)
+{
+       int             bit, hval;
+
+       base >>= 9;
+       base ^= (unsigned long)bdev / L1_CACHE_BYTES;
+       for (bit = hval = 0; base && bit < sizeof(base) * 8; bit += NBITS) {
+               hval ^= (int)base & (NHASH-1);
+               base >>= NBITS;
+       }
+       return hval;
+}
+
+/*
+ * Mapping of multi-page buffers into contiguous virtual space
+ */
+
+typedef struct a_list {
+       void            *vm_addr;
+       struct a_list   *next;
+} a_list_t;
+
+STATIC a_list_t                *as_free_head;
+STATIC int             as_list_len;
+STATIC spinlock_t      as_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Try to batch vunmaps because they are costly.
+ */
+STATIC void
+free_address(
+       void            *addr)
+{
+       a_list_t        *aentry;
+
+       aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC);
+       if (aentry) {
+               spin_lock(&as_lock);
+               aentry->next = as_free_head;
+               aentry->vm_addr = addr;
+               as_free_head = aentry;
+               as_list_len++;
+               spin_unlock(&as_lock);
+       } else {
+               vunmap(addr);
+       }
+}
+
+STATIC void
+purge_addresses(void)
+{
+       a_list_t        *aentry, *old;
+
+       if (as_free_head == NULL)
+               return;
+
+       spin_lock(&as_lock);
+       aentry = as_free_head;
+       as_free_head = NULL;
+       as_list_len = 0;
+       spin_unlock(&as_lock);
+
+       while ((old = aentry) != NULL) {
+               vunmap(aentry->vm_addr);
+               aentry = aentry->next;
+               kfree(old);
+       }
+}
+
+/*
+ *     Internal pagebuf object manipulation
+ */
+
+STATIC void
+_pagebuf_initialize(
+       xfs_buf_t               *pb,
+       xfs_buftarg_t           *target,
+       loff_t                  range_base,
+       size_t                  range_length,
+       page_buf_flags_t        flags)
+{
+       /*
+        * We don't want certain flags to appear in pb->pb_flags.
+        */
+       flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);
+
+       memset(pb, 0, sizeof(xfs_buf_t));
+       atomic_set(&pb->pb_hold, 1);
+       init_MUTEX_LOCKED(&pb->pb_iodonesema);
+       INIT_LIST_HEAD(&pb->pb_list);
+       INIT_LIST_HEAD(&pb->pb_hash_list);
+       init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */
+       PB_SET_OWNER(pb);
+       pb->pb_target = target;
+       pb->pb_file_offset = range_base;
+       /*
+        * Set buffer_length and count_desired to the same value initially.
+        * I/O routines should use count_desired, which will be the same in
+        * most cases but may be reset (e.g. XFS recovery).
+        */
+       pb->pb_buffer_length = pb->pb_count_desired = range_length;
+       pb->pb_flags = flags | PBF_NONE;
+       pb->pb_bn = XFS_BUF_DADDR_NULL;
+       atomic_set(&pb->pb_pin_count, 0);
+       init_waitqueue_head(&pb->pb_waiters);
+
+       XFS_STATS_INC(pb_create);
+       PB_TRACE(pb, "initialize", target);
+}
+
+/*
+ * Allocate a page array capable of holding a specified number
+ * of pages, and point the page buf at it.
+ */
+STATIC int
+_pagebuf_get_pages(
+       xfs_buf_t               *pb,
+       int                     page_count,
+       page_buf_flags_t        flags)
+{
+       /* Make sure that we have a page list */
+       if (pb->pb_pages == NULL) {
+               pb->pb_offset = page_buf_poff(pb->pb_file_offset);
+               pb->pb_page_count = page_count;
+               if (page_count <= PB_PAGES) {
+                       pb->pb_pages = pb->pb_page_array;
+               } else {
+                       pb->pb_pages = kmem_alloc(sizeof(struct page *) *
+                                       page_count, pb_to_km(flags));
+                       if (pb->pb_pages == NULL)
+                               return -ENOMEM;
+               }
+               memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);
+       }
+       return 0;
+}
+
+/*
+ *     Frees pb_pages if it was malloced.
+ */
+STATIC void
+_pagebuf_free_pages(
+       xfs_buf_t       *bp)
+{
+       if (bp->pb_pages != bp->pb_page_array) {
+               kmem_free(bp->pb_pages,
+                         bp->pb_page_count * sizeof(struct page *));
+       }
+}
+
+/*
+ *     Releases the specified buffer.
+ *
+ *     The modification state of any associated pages is left unchanged.
+ *     The buffer most not be on any hash - use pagebuf_rele instead for
+ *     hashed and refcounted buffers
+ */
+void
+pagebuf_free(
+       xfs_buf_t               *bp)
+{
+       PB_TRACE(bp, "free", 0);
+
+       ASSERT(list_empty(&bp->pb_hash_list));
+
+       if (bp->pb_flags & _PBF_PAGE_CACHE) {
+               uint            i;
+
+               if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1))
+                       free_address(bp->pb_addr - bp->pb_offset);
+
+               for (i = 0; i < bp->pb_page_count; i++)
+                       page_cache_release(bp->pb_pages[i]);
+               _pagebuf_free_pages(bp);
+       } else if (bp->pb_flags & _PBF_KMEM_ALLOC) {
+                /*
+                 * XXX(hch): bp->pb_count_desired might be incorrect (see
+                 * pagebuf_associate_memory for details), but fortunately
+                 * the Linux version of kmem_free ignores the len argument..
+                 */
+               kmem_free(bp->pb_addr, bp->pb_count_desired);
+               _pagebuf_free_pages(bp);
+       }
+
+       pagebuf_deallocate(bp);
+}
+
+/*
+ *     Finds all pages for buffer in question and builds it's page list.
+ */
+STATIC int
+_pagebuf_lookup_pages(
+       xfs_buf_t               *bp,
+       uint                    flags)
+{
+       struct address_space    *mapping = bp->pb_target->pbr_mapping;
+       unsigned int            sectorshift = bp->pb_target->pbr_sshift;
+       size_t                  blocksize = bp->pb_target->pbr_bsize;
+       size_t                  size = bp->pb_count_desired;
+       size_t                  nbytes, offset;
+       int                     gfp_mask = pb_to_gfp(flags);
+       unsigned short          page_count, i;
+       pgoff_t                 first;
+       loff_t                  end;
+       int                     error;
+
+       end = bp->pb_file_offset + bp->pb_buffer_length;
+       page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset);
+
+       error = _pagebuf_get_pages(bp, page_count, flags);
+       if (unlikely(error))
+               return error;
+
+       offset = bp->pb_offset;
+       first = bp->pb_file_offset >> PAGE_CACHE_SHIFT;
+
+       for (i = 0; i < bp->pb_page_count; i++) {
+               struct page     *page;
+               uint            retries = 0;
+
+             retry:
+               page = find_or_create_page(mapping, first + i, gfp_mask);
+               if (unlikely(page == NULL)) {
+                       if (flags & PBF_READ_AHEAD)
+                               return -ENOMEM;
+
+                       /*
+                        * This could deadlock.
+                        *
+                        * But until all the XFS lowlevel code is revamped to
+                        * handle buffer allocation failures we can't do much.
+                        */
+                       if (!(++retries % 100)) {
+                               printk(KERN_ERR "possibly deadlocking in %s\n",
+                                               __FUNCTION__);
+                       }
+
+                       XFS_STATS_INC(pb_page_retries);
+                       pagebuf_daemon_wakeup();
+                       current->state = TASK_UNINTERRUPTIBLE;
+                       schedule_timeout(10);
+                       goto retry;
+               }
+
+               XFS_STATS_INC(pb_page_found);
+
+               nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
+               size -= nbytes;
+
+               if (!PageUptodate(page)) {
+                       page_count--;
+                       if (blocksize == PAGE_CACHE_SIZE) {
+                               if (flags & PBF_READ)
+                                       bp->pb_locked = 1;
+                       } else if (!PagePrivate(page)) {
+                               unsigned long   j, range;
+
+                               /*
+                                * In this case page->private holds a bitmap
+                                * of uptodate sectors within the page
+                                */
+                               ASSERT(blocksize < PAGE_CACHE_SIZE);
+                               range = (offset + nbytes) >> sectorshift;
+                               for (j = offset >> sectorshift; j < range; j++)
+                                       if (!test_bit(j, &page->private))
+                                               break;
+                               if (j == range)
+                                       page_count++;
+                       }
+               }
+
+               bp->pb_pages[i] = page;
+               offset = 0;
+       }
+
+       if (!bp->pb_locked) {
+               for (i = 0; i < bp->pb_page_count; i++)
+                       unlock_page(bp->pb_pages[i]);
+       }
+
+       bp->pb_flags |= _PBF_PAGE_CACHE;
+
+       if (page_count) {
+               /* if we have any uptodate pages, mark that in the buffer */
+               bp->pb_flags &= ~PBF_NONE;
+
+               /* if some pages aren't uptodate, mark that in the buffer */
+               if (page_count != bp->pb_page_count)
+                       bp->pb_flags |= PBF_PARTIAL;
+       }
+
+       PB_TRACE(bp, "lookup_pages", (long)page_count);
+       return error;
+}
+
+/*
+ *     Map buffer into kernel address-space if nessecary.
+ */
+STATIC int
+_pagebuf_map_pages(
+       xfs_buf_t               *bp,
+       uint                    flags)
+{
+       /* A single page buffer is always mappable */
+       if (bp->pb_page_count == 1) {
+               bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset;
+               bp->pb_flags |= PBF_MAPPED;
+       } else if (flags & PBF_MAPPED) {
+               if (as_list_len > 64)
+                       purge_addresses();
+               bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count,
+                               VM_MAP, PAGE_KERNEL);
+               if (unlikely(bp->pb_addr == NULL))
+                       return -ENOMEM;
+               bp->pb_addr += bp->pb_offset;
+               bp->pb_flags |= PBF_MAPPED;
+       }
+
+       return 0;
+}
+
+/*
+ *     Finding and Reading Buffers
+ */
+
+/*
+ *     _pagebuf_find
+ *
+ *     Looks up, and creates if absent, a lockable buffer for
+ *     a given range of an inode.  The buffer is returned
+ *     locked.  If other overlapping buffers exist, they are
+ *     released before the new buffer is created and locked,
+ *     which may imply that this call will block until those buffers
+ *     are unlocked.  No I/O is implied by this call.
+ */
+STATIC xfs_buf_t *
+_pagebuf_find(                         /* find buffer for block        */
+       xfs_buftarg_t           *target,/* target for block             */
+       loff_t                  ioff,   /* starting offset of range     */
+       size_t                  isize,  /* length of range              */
+       page_buf_flags_t        flags,  /* PBF_TRYLOCK                  */
+       xfs_buf_t               *new_pb)/* newly allocated buffer       */
+{
+       loff_t                  range_base;
+       size_t                  range_length;
+       int                     hval;
+       pb_hash_t               *h;
+       xfs_buf_t               *pb, *n;
+       int                     not_locked;
+
+       range_base = (ioff << BBSHIFT);
+       range_length = (isize << BBSHIFT);
+
+       /* Ensure we never do IOs smaller than the sector size */
+       BUG_ON(range_length < (1 << target->pbr_sshift));
+
+       /* Ensure we never do IOs that are not sector aligned */
+       BUG_ON(range_base & (loff_t)target->pbr_smask);
+
+       hval = _bhash(target->pbr_bdev, range_base);
+       h = &pbhash[hval];
+
+       spin_lock(&h->pb_hash_lock);
+       list_for_each_entry_safe(pb, n, &h->pb_hash, pb_hash_list) {
+               if (pb->pb_target == target &&
+                   pb->pb_file_offset == range_base &&
+                   pb->pb_buffer_length == range_length) {
+                       /* If we look at something bring it to the
+                        * front of the list for next time
+                        */
+                       atomic_inc(&pb->pb_hold);
+                       list_move(&pb->pb_hash_list, &h->pb_hash);
+                       goto found;
+               }
+       }
+
+       /* No match found */
+       if (new_pb) {
+               _pagebuf_initialize(new_pb, target, range_base,
+                               range_length, flags);
+               new_pb->pb_hash_index = hval;
+               list_add(&new_pb->pb_hash_list, &h->pb_hash);
+       } else {
+               XFS_STATS_INC(pb_miss_locked);
+       }
+
+       spin_unlock(&h->pb_hash_lock);
+       return (new_pb);
+
+found:
+       spin_unlock(&h->pb_hash_lock);
+
+       /* Attempt to get the semaphore without sleeping,
+        * if this does not work then we need to drop the
+        * spinlock and do a hard attempt on the semaphore.
+        */
+       not_locked = down_trylock(&pb->pb_sema);
+       if (not_locked) {
+               if (!(flags & PBF_TRYLOCK)) {
+                       /* wait for buffer ownership */
+                       PB_TRACE(pb, "get_lock", 0);
+                       pagebuf_lock(pb);
+                       XFS_STATS_INC(pb_get_locked_waited);
+               } else {
+                       /* We asked for a trylock and failed, no need
+                        * to look at file offset and length here, we
+                        * know that this pagebuf at least overlaps our
+                        * pagebuf and is locked, therefore our buffer
+                        * either does not exist, or is this buffer
+                        */
+
+                       pagebuf_rele(pb);
+                       XFS_STATS_INC(pb_busy_locked);
+                       return (NULL);
+               }
+       } else {
+               /* trylock worked */
+               PB_SET_OWNER(pb);
+       }
+
+       if (pb->pb_flags & PBF_STALE)
+               pb->pb_flags &= PBF_MAPPED;
+       PB_TRACE(pb, "got_lock", 0);
+       XFS_STATS_INC(pb_get_locked);
+       return (pb);
+}
+
+
+/*
+ *     pagebuf_find
+ *
+ *     pagebuf_find returns a buffer matching the specified range of
+ *     data for the specified target, if any of the relevant blocks
+ *     are in memory.  The buffer may have unallocated holes, if
+ *     some, but not all, of the blocks are in memory.  Even where
+ *     pages are present in the buffer, not all of every page may be
+ *     valid.
+ */
+xfs_buf_t *
+pagebuf_find(                          /* find buffer for block        */
+                                       /* if the block is in memory    */
+       xfs_buftarg_t           *target,/* target for block             */
+       loff_t                  ioff,   /* starting offset of range     */
+       size_t                  isize,  /* length of range              */
+       page_buf_flags_t        flags)  /* PBF_TRYLOCK                  */
+{
+       return _pagebuf_find(target, ioff, isize, flags, NULL);
+}
+
+/*
+ *     pagebuf_get
+ *
+ *     pagebuf_get assembles a buffer covering the specified range.
+ *     Some or all of the blocks in the range may be valid.  Storage
+ *     in memory for all portions of the buffer will be allocated,
+ *     although backing storage may not be.  If PBF_READ is set in
+ *     flags, pagebuf_iostart is called also.
+ */
+xfs_buf_t *
+pagebuf_get(                           /* allocate a buffer            */
+       xfs_buftarg_t           *target,/* target for buffer            */
+       loff_t                  ioff,   /* starting offset of range     */
+       size_t                  isize,  /* length of range              */
+       page_buf_flags_t        flags)  /* PBF_TRYLOCK                  */
+{
+       xfs_buf_t               *pb, *new_pb;
+       int                     error = 0, i;
+
+       new_pb = pagebuf_allocate(flags);
+       if (unlikely(!new_pb))
+               return NULL;
+
+       pb = _pagebuf_find(target, ioff, isize, flags, new_pb);
+       if (pb == new_pb) {
+               error = _pagebuf_lookup_pages(pb, flags);
+               if (unlikely(error)) {
+                       printk(KERN_WARNING
+                              "pagebuf_get: failed to lookup pages\n");
+                       goto no_buffer;
+               }
+       } else {
+               pagebuf_deallocate(new_pb);
+               if (unlikely(pb == NULL))
+                       return NULL;
+       }
+
+       for (i = 0; i < pb->pb_page_count; i++)
+               mark_page_accessed(pb->pb_pages[i]);
+
+       if (!(pb->pb_flags & PBF_MAPPED)) {
+               error = _pagebuf_map_pages(pb, flags);
+               if (unlikely(error)) {
+                       printk(KERN_WARNING
+                              "pagebuf_get: failed to map pages\n");
+                       goto no_buffer;
+               }
+       }
+
+       XFS_STATS_INC(pb_get);
+
+       /*
+        * Always fill in the block number now, the mapped cases can do
+        * their own overlay of this later.
+        */
+       pb->pb_bn = ioff;
+       pb->pb_count_desired = pb->pb_buffer_length;
+
+       if (flags & PBF_READ) {
+               if (PBF_NOT_DONE(pb)) {
+                       PB_TRACE(pb, "get_read", (unsigned long)flags);
+                       XFS_STATS_INC(pb_get_read);
+                       pagebuf_iostart(pb, flags);
+               } else if (flags & PBF_ASYNC) {
+                       PB_TRACE(pb, "get_read_async", (unsigned long)flags);
+                       /*
+                        * Read ahead call which is already satisfied,
+                        * drop the buffer
+                        */
+                       goto no_buffer;
+               } else {
+                       PB_TRACE(pb, "get_read_done", (unsigned long)flags);
+                       /* We do not want read in the flags */
+                       pb->pb_flags &= ~PBF_READ;
+               }
+       } else {
+               PB_TRACE(pb, "get_write", (unsigned long)flags);
+       }
+
+       return pb;
+
+no_buffer:
+       if (flags & (PBF_LOCK | PBF_TRYLOCK))
+               pagebuf_unlock(pb);
+       pagebuf_rele(pb);
+       return NULL;
+}
+
+/*
+ * Create a skeletal pagebuf (no pages associated with it).
+ */
+xfs_buf_t *
+pagebuf_lookup(
+       xfs_buftarg_t           *target,
+       loff_t                  ioff,
+       size_t                  isize,
+       page_buf_flags_t        flags)
+{
+       xfs_buf_t               *pb;
+
+       pb = pagebuf_allocate(flags);
+       if (pb) {
+               _pagebuf_initialize(pb, target, ioff, isize, flags);
+       }
+       return pb;
+}
+
+/*
+ * If we are not low on memory then do the readahead in a deadlock
+ * safe manner.
+ */
+void
+pagebuf_readahead(
+       xfs_buftarg_t           *target,
+       loff_t                  ioff,
+       size_t                  isize,
+       page_buf_flags_t        flags)
+{
+       struct backing_dev_info *bdi;
+
+       bdi = target->pbr_mapping->backing_dev_info;
+       if (bdi_read_congested(bdi))
+               return;
+       if (bdi_write_congested(bdi))
+               return;
+
+       flags |= (PBF_TRYLOCK|PBF_READ|PBF_ASYNC|PBF_READ_AHEAD);
+       pagebuf_get(target, ioff, isize, flags);
+}
+
+xfs_buf_t *
+pagebuf_get_empty(
+       size_t                  len,
+       xfs_buftarg_t           *target)
+{
+       xfs_buf_t               *pb;
+
+       pb = pagebuf_allocate(0);
+       if (pb)
+               _pagebuf_initialize(pb, target, 0, len, 0);
+       return pb;
+}
+
+static inline struct page *
+mem_to_page(
+       void                    *addr)
+{
+       if (((unsigned long)addr < VMALLOC_START) ||
+           ((unsigned long)addr >= VMALLOC_END)) {
+               return virt_to_page(addr);
+       } else {
+               return vmalloc_to_page(addr);
+       }
+}
+
+int
+pagebuf_associate_memory(
+       xfs_buf_t               *pb,
+       void                    *mem,
+       size_t                  len)
+{
+       int                     rval;
+       int                     i = 0;
+       size_t                  ptr;
+       size_t                  end, end_cur;
+       off_t                   offset;
+       int                     page_count;
+
+       page_count = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
+       offset = (off_t) mem - ((off_t)mem & PAGE_CACHE_MASK);
+       if (offset && (len > PAGE_CACHE_SIZE))
+               page_count++;
+
+       /* Free any previous set of page pointers */
+       if (pb->pb_pages)
+               _pagebuf_free_pages(pb);
+
+       pb->pb_pages = NULL;
+       pb->pb_addr = mem;
+
+       rval = _pagebuf_get_pages(pb, page_count, 0);
+       if (rval)
+               return rval;
+
+       pb->pb_offset = offset;
+       ptr = (size_t) mem & PAGE_CACHE_MASK;
+       end = PAGE_CACHE_ALIGN((size_t) mem + len);
+       end_cur = end;
+       /* set up first page */
+       pb->pb_pages[0] = mem_to_page(mem);
+
+       ptr += PAGE_CACHE_SIZE;
+       pb->pb_page_count = ++i;
+       while (ptr < end) {
+               pb->pb_pages[i] = mem_to_page((void *)ptr);
+               pb->pb_page_count = ++i;
+               ptr += PAGE_CACHE_SIZE;
+       }
+       pb->pb_locked = 0;
+
+       pb->pb_count_desired = pb->pb_buffer_length = len;
+       pb->pb_flags |= PBF_MAPPED;
+
+       return 0;
+}
+
+xfs_buf_t *
+pagebuf_get_no_daddr(
+       size_t                  len,
+       xfs_buftarg_t           *target)
+{
+       size_t                  malloc_len = len;
+       xfs_buf_t               *bp;
+       void                    *data;
+       int                     error;
+
+       if (unlikely(len > 0x20000))
+               goto fail;
+
+       bp = pagebuf_allocate(0);
+       if (unlikely(bp == NULL))
+               goto fail;
+       _pagebuf_initialize(bp, target, 0, len, PBF_FORCEIO);
+
+ try_again:
+       data = kmem_alloc(malloc_len, KM_SLEEP);
+       if (unlikely(data == NULL))
+               goto fail_free_buf;
+
+       /* check whether alignment matches.. */
+       if ((__psunsigned_t)data !=
+           ((__psunsigned_t)data & ~target->pbr_smask)) {
+               /* .. else double the size and try again */
+               kmem_free(data, malloc_len);
+               malloc_len <<= 1;
+               goto try_again;
+       }
+
+       error = pagebuf_associate_memory(bp, data, len);
+       if (error)
+               goto fail_free_mem;
+       bp->pb_flags |= _PBF_KMEM_ALLOC;
+
+       pagebuf_unlock(bp);
+
+       PB_TRACE(bp, "no_daddr", data);
+       return bp;
+ fail_free_mem:
+       kmem_free(data, malloc_len);
+ fail_free_buf:
+       pagebuf_free(bp);
+ fail:
+       return NULL;
+}
+
+/*
+ *     pagebuf_hold
+ *
+ *     Increment reference count on buffer, to hold the buffer concurrently
+ *     with another thread which may release (free) the buffer asynchronously.
+ *
+ *     Must hold the buffer already to call this function.
+ */
+void
+pagebuf_hold(
+       xfs_buf_t               *pb)
+{
+       atomic_inc(&pb->pb_hold);
+       PB_TRACE(pb, "hold", 0);
+}
+
+/*
+ *     pagebuf_rele
+ *
+ *     pagebuf_rele releases a hold on the specified buffer.  If the
+ *     the hold count is 1, pagebuf_rele calls pagebuf_free.
+ */
+void
+pagebuf_rele(
+       xfs_buf_t               *pb)
+{
+       pb_hash_t               *hash = pb_hash(pb);
+
+       PB_TRACE(pb, "rele", pb->pb_relse);
+
+       if (atomic_dec_and_lock(&pb->pb_hold, &hash->pb_hash_lock)) {
+               int             do_free = 1;
+
+               if (pb->pb_relse) {
+                       atomic_inc(&pb->pb_hold);
+                       spin_unlock(&hash->pb_hash_lock);
+                       (*(pb->pb_relse)) (pb);
+                       spin_lock(&hash->pb_hash_lock);
+                       do_free = 0;
+               }
+
+               if (pb->pb_flags & PBF_DELWRI) {
+                       pb->pb_flags |= PBF_ASYNC;
+                       atomic_inc(&pb->pb_hold);
+                       pagebuf_delwri_queue(pb, 0);
+                       do_free = 0;
+               } else if (pb->pb_flags & PBF_FS_MANAGED) {
+                       do_free = 0;
+               }
+
+               if (do_free) {
+                       list_del_init(&pb->pb_hash_list);
+                       spin_unlock(&hash->pb_hash_lock);
+                       pagebuf_free(pb);
+               } else {
+                       spin_unlock(&hash->pb_hash_lock);
+               }
+       }
+}
+
+
+/*
+ *     Mutual exclusion on buffers.  Locking model:
+ *
+ *     Buffers associated with inodes for which buffer locking
+ *     is not enabled are not protected by semaphores, and are
+ *     assumed to be exclusively owned by the caller.  There is a
+ *     spinlock in the buffer, used by the caller when concurrent
+ *     access is possible.
+ */
+
+/*
+ *     pagebuf_cond_lock
+ *
+ *     pagebuf_cond_lock locks a buffer object, if it is not already locked.
+ *     Note that this in no way
+ *     locks the underlying pages, so it is only useful for synchronizing
+ *     concurrent use of page buffer objects, not for synchronizing independent
+ *     access to the underlying pages.
+ */
+int
+pagebuf_cond_lock(                     /* lock buffer, if not locked   */
+                                       /* returns -EBUSY if locked)    */
+       xfs_buf_t               *pb)
+{
+       int                     locked;
+
+       locked = down_trylock(&pb->pb_sema) == 0;
+       if (locked) {
+               PB_SET_OWNER(pb);
+       }
+       PB_TRACE(pb, "cond_lock", (long)locked);
+       return(locked ? 0 : -EBUSY);
+}
+
+/*
+ *     pagebuf_lock_value
+ *
+ *     Return lock value for a pagebuf
+ */
+int
+pagebuf_lock_value(
+       xfs_buf_t               *pb)
+{
+       return(atomic_read(&pb->pb_sema.count));
+}
+
+/*
+ *     pagebuf_lock
+ *
+ *     pagebuf_lock locks a buffer object.  Note that this in no way
+ *     locks the underlying pages, so it is only useful for synchronizing
+ *     concurrent use of page buffer objects, not for synchronizing independent
+ *     access to the underlying pages.
+ */
+int
+pagebuf_lock(
+       xfs_buf_t               *pb)
+{
+       PB_TRACE(pb, "lock", 0);
+       if (atomic_read(&pb->pb_io_remaining))
+               blk_run_address_space(pb->pb_target->pbr_mapping);
+       down(&pb->pb_sema);
+       PB_SET_OWNER(pb);
+       PB_TRACE(pb, "locked", 0);
+       return 0;
+}
+
+/*
+ *     pagebuf_unlock
+ *
+ *     pagebuf_unlock releases the lock on the buffer object created by
+ *     pagebuf_lock or pagebuf_cond_lock (not any
+ *     pinning of underlying pages created by pagebuf_pin).
+ */
+void
+pagebuf_unlock(                                /* unlock buffer                */
+       xfs_buf_t               *pb)    /* buffer to unlock             */
+{
+       PB_CLEAR_OWNER(pb);
+       up(&pb->pb_sema);
+       PB_TRACE(pb, "unlock", 0);
+}
+
+
+/*
+ *     Pinning Buffer Storage in Memory
+ */
+
+/*
+ *     pagebuf_pin
+ *
+ *     pagebuf_pin locks all of the memory represented by a buffer in
+ *     memory.  Multiple calls to pagebuf_pin and pagebuf_unpin, for
+ *     the same or different buffers affecting a given page, will
+ *     properly count the number of outstanding "pin" requests.  The
+ *     buffer may be released after the pagebuf_pin and a different
+ *     buffer used when calling pagebuf_unpin, if desired.
+ *     pagebuf_pin should be used by the file system when it wants be
+ *     assured that no attempt will be made to force the affected
+ *     memory to disk.  It does not assure that a given logical page
+ *     will not be moved to a different physical page.
+ */
+void
+pagebuf_pin(
+       xfs_buf_t               *pb)
+{
+       atomic_inc(&pb->pb_pin_count);
+       PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter);
+}
+
+/*
+ *     pagebuf_unpin
+ *
+ *     pagebuf_unpin reverses the locking of memory performed by
+ *     pagebuf_pin.  Note that both functions affected the logical
+ *     pages associated with the buffer, not the buffer itself.
+ */
+void
+pagebuf_unpin(
+       xfs_buf_t               *pb)
+{
+       if (atomic_dec_and_test(&pb->pb_pin_count)) {
+               wake_up_all(&pb->pb_waiters);
+       }
+       PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);
+}
+
+int
+pagebuf_ispin(
+       xfs_buf_t               *pb)
+{
+       return atomic_read(&pb->pb_pin_count);
+}
+
+/*
+ *     pagebuf_wait_unpin
+ *
+ *     pagebuf_wait_unpin waits until all of the memory associated
+ *     with the buffer is not longer locked in memory.  It returns
+ *     immediately if none of the affected pages are locked.
+ */
+static inline void
+_pagebuf_wait_unpin(
+       xfs_buf_t               *pb)
+{
+       DECLARE_WAITQUEUE       (wait, current);
+
+       if (atomic_read(&pb->pb_pin_count) == 0)
+               return;
+
+       add_wait_queue(&pb->pb_waiters, &wait);
+       for (;;) {
+               current->state = TASK_UNINTERRUPTIBLE;
+               if (atomic_read(&pb->pb_pin_count) == 0)
+                       break;
+               if (atomic_read(&pb->pb_io_remaining))
+                       blk_run_address_space(pb->pb_target->pbr_mapping);
+               schedule();
+       }
+       remove_wait_queue(&pb->pb_waiters, &wait);
+       current->state = TASK_RUNNING;
+}
+
+/*
+ *     Buffer Utility Routines
+ */
+
+/*
+ *     pagebuf_iodone
+ *
+ *     pagebuf_iodone marks a buffer for which I/O is in progress
+ *     done with respect to that I/O.  The pb_iodone routine, if
+ *     present, will be called as a side-effect.
+ */
+void
+pagebuf_iodone_work(
+       void                    *v)
+{
+       xfs_buf_t               *bp = (xfs_buf_t *)v;
+
+       if (bp->pb_iodone)
+               (*(bp->pb_iodone))(bp);
+       else if (bp->pb_flags & PBF_ASYNC)
+               xfs_buf_relse(bp);
+}
+
+void
+pagebuf_iodone(
+       xfs_buf_t               *pb,
+       int                     dataio,
+       int                     schedule)
+{
+       pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
+       if (pb->pb_error == 0) {
+               pb->pb_flags &= ~(PBF_PARTIAL | PBF_NONE);
+       }
+
+       PB_TRACE(pb, "iodone", pb->pb_iodone);
+
+       if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
+               if (schedule) {
+                       INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
+                       queue_work(dataio ? pagebuf_dataio_workqueue :
+                               pagebuf_logio_workqueue, &pb->pb_iodone_work);
+               } else {
+                       pagebuf_iodone_work(pb);
+               }
+       } else {
+               up(&pb->pb_iodonesema);
+       }
+}
+
+/*
+ *     pagebuf_ioerror
+ *
+ *     pagebuf_ioerror sets the error code for a buffer.
+ */
+void
+pagebuf_ioerror(                       /* mark/clear buffer error flag */
+       xfs_buf_t               *pb,    /* buffer to mark               */
+       int                     error)  /* error to store (0 if none)   */
+{
+       ASSERT(error >= 0 && error <= 0xffff);
+       pb->pb_error = (unsigned short)error;
+       PB_TRACE(pb, "ioerror", (unsigned long)error);
+}
+
+/*
+ *     pagebuf_iostart
+ *
+ *     pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.
+ *     If necessary, it will arrange for any disk space allocation required,
+ *     and it will break up the request if the block mappings require it.
+ *     The pb_iodone routine in the buffer supplied will only be called
+ *     when all of the subsidiary I/O requests, if any, have been completed.
+ *     pagebuf_iostart calls the pagebuf_ioinitiate routine or
+ *     pagebuf_iorequest, if the former routine is not defined, to start
+ *     the I/O on a given low-level request.
+ */
+int
+pagebuf_iostart(                       /* start I/O on a buffer          */
+       xfs_buf_t               *pb,    /* buffer to start                */
+       page_buf_flags_t        flags)  /* PBF_LOCK, PBF_ASYNC, PBF_READ, */
+                                       /* PBF_WRITE, PBF_DELWRI,         */
+                                       /* PBF_DONT_BLOCK                 */
+{
+       int                     status = 0;
+
+       PB_TRACE(pb, "iostart", (unsigned long)flags);
+
+       if (flags & PBF_DELWRI) {
+               pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC);
+               pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC);
+               pagebuf_delwri_queue(pb, 1);
+               return status;
+       }
+
+       pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \
+                       PBF_READ_AHEAD | _PBF_RUN_QUEUES);
+       pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \
+                       PBF_READ_AHEAD | _PBF_RUN_QUEUES);
+
+       BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL);
+
+       /* For writes allow an alternate strategy routine to precede
+        * the actual I/O request (which may not be issued at all in
+        * a shutdown situation, for example).
+        */
+       status = (flags & PBF_WRITE) ?
+               pagebuf_iostrategy(pb) : pagebuf_iorequest(pb);
+
+       /* Wait for I/O if we are not an async request.
+        * Note: async I/O request completion will release the buffer,
+        * and that can already be done by this point.  So using the
+        * buffer pointer from here on, after async I/O, is invalid.
+        */
+       if (!status && !(flags & PBF_ASYNC))
+               status = pagebuf_iowait(pb);
+
+       return status;
+}
+
+/*
+ * Helper routine for pagebuf_iorequest
+ */
+
+STATIC __inline__ int
+_pagebuf_iolocked(
+       xfs_buf_t               *pb)
+{
+       ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE));
+       if (pb->pb_flags & PBF_READ)
+               return pb->pb_locked;
+       return 0;
+}
+
+STATIC __inline__ void
+_pagebuf_iodone(
+       xfs_buf_t               *pb,
+       int                     schedule)
+{
+       if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+               pb->pb_locked = 0;
+               pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), schedule);
+       }
+}
+
+STATIC int
+bio_end_io_pagebuf(
+       struct bio              *bio,
+       unsigned int            bytes_done,
+       int                     error)
+{
+       xfs_buf_t               *pb = (xfs_buf_t *)bio->bi_private;
+       unsigned int            i, blocksize = pb->pb_target->pbr_bsize;
+       unsigned int            sectorshift = pb->pb_target->pbr_sshift;
+       struct bio_vec          *bvec = bio->bi_io_vec;
+
+       if (bio->bi_size)
+               return 1;
+
+       if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+               pb->pb_error = EIO;
+
+       for (i = 0; i < bio->bi_vcnt; i++, bvec++) {
+               struct page     *page = bvec->bv_page;
+
+               if (pb->pb_error) {
+                       SetPageError(page);
+               } else if (blocksize == PAGE_CACHE_SIZE) {
+                       SetPageUptodate(page);
+               } else if (!PagePrivate(page) &&
+                               (pb->pb_flags & _PBF_PAGE_CACHE)) {
+                       unsigned long   j, range;
+
+                       ASSERT(blocksize < PAGE_CACHE_SIZE);
+                       range = (bvec->bv_offset + bvec->bv_len) >> sectorshift;
+                       for (j = bvec->bv_offset >> sectorshift; j < range; j++)
+                               set_bit(j, &page->private);
+                       if (page->private == (unsigned long)(PAGE_CACHE_SIZE-1))
+                               SetPageUptodate(page);
+               }
+
+               if (_pagebuf_iolocked(pb)) {
+                       unlock_page(page);
+               }
+       }
+
+       _pagebuf_iodone(pb, 1);
+       bio_put(bio);
+       return 0;
+}
+
+void
+_pagebuf_ioapply(
+       xfs_buf_t               *pb)
+{
+       int                     i, map_i, total_nr_pages, nr_pages;
+       struct bio              *bio;
+       int                     offset = pb->pb_offset;
+       int                     size = pb->pb_count_desired;
+       sector_t                sector = pb->pb_bn;
+       unsigned int            blocksize = pb->pb_target->pbr_bsize;
+       int                     locking = _pagebuf_iolocked(pb);
+
+       total_nr_pages = pb->pb_page_count;
+       map_i = 0;
+
+       /* Special code path for reading a sub page size pagebuf in --
+        * we populate up the whole page, and hence the other metadata
+        * in the same page.  This optimization is only valid when the
+        * filesystem block size and the page size are equal.
+        */
+       if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) &&
+           (pb->pb_flags & PBF_READ) && locking &&
+           (blocksize == PAGE_CACHE_SIZE)) {
+               bio = bio_alloc(GFP_NOIO, 1);
+
+               bio->bi_bdev = pb->pb_target->pbr_bdev;
+               bio->bi_sector = sector - (offset >> BBSHIFT);
+               bio->bi_end_io = bio_end_io_pagebuf;
+               bio->bi_private = pb;
+
+               bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0);
+               size = 0;
+
+               atomic_inc(&pb->pb_io_remaining);
+
+               goto submit_io;
+       }
+
+       /* Lock down the pages which we need to for the request */
+       if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) {
+               for (i = 0; size; i++) {
+                       int             nbytes = PAGE_CACHE_SIZE - offset;
+                       struct page     *page = pb->pb_pages[i];
+
+                       if (nbytes > size)
+                               nbytes = size;
+
+                       lock_page(page);
+
+                       size -= nbytes;
+                       offset = 0;
+               }
+               offset = pb->pb_offset;
+               size = pb->pb_count_desired;
+       }
+
+next_chunk:
+       atomic_inc(&pb->pb_io_remaining);
+       nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
+       if (nr_pages > total_nr_pages)
+               nr_pages = total_nr_pages;
+
+       bio = bio_alloc(GFP_NOIO, nr_pages);
+       bio->bi_bdev = pb->pb_target->pbr_bdev;
+       bio->bi_sector = sector;
+       bio->bi_end_io = bio_end_io_pagebuf;
+       bio->bi_private = pb;
+
+       for (; size && nr_pages; nr_pages--, map_i++) {
+               int     nbytes = PAGE_CACHE_SIZE - offset;
+
+               if (nbytes > size)
+                       nbytes = size;
+
+               if (bio_add_page(bio, pb->pb_pages[map_i],
+                                       nbytes, offset) < nbytes)
+                       break;
+
+               offset = 0;
+               sector += nbytes >> BBSHIFT;
+               size -= nbytes;
+               total_nr_pages--;
+       }
+
+submit_io:
+       if (likely(bio->bi_size)) {
+               submit_bio((pb->pb_flags & PBF_READ) ? READ : WRITE, bio);
+               if (size)
+                       goto next_chunk;
+       } else {
+               bio_put(bio);
+               pagebuf_ioerror(pb, EIO);
+       }
+
+       if (pb->pb_flags & _PBF_RUN_QUEUES) {
+               pb->pb_flags &= ~_PBF_RUN_QUEUES;
+               if (atomic_read(&pb->pb_io_remaining) > 1)
+                       blk_run_address_space(pb->pb_target->pbr_mapping);
+       }
+}
+
+/*
+ *     pagebuf_iorequest -- the core I/O request routine.
+ */
+int
+pagebuf_iorequest(                     /* start real I/O               */
+       xfs_buf_t               *pb)    /* buffer to convey to device   */
+{
+       PB_TRACE(pb, "iorequest", 0);
+
+       if (pb->pb_flags & PBF_DELWRI) {
+               pagebuf_delwri_queue(pb, 1);
+               return 0;
+       }
+
+       if (pb->pb_flags & PBF_WRITE) {
+               _pagebuf_wait_unpin(pb);
+       }
+
+       pagebuf_hold(pb);
+
+       /* Set the count to 1 initially, this will stop an I/O
+        * completion callout which happens before we have started
+        * all the I/O from calling pagebuf_iodone too early.
+        */
+       atomic_set(&pb->pb_io_remaining, 1);
+       _pagebuf_ioapply(pb);
+       _pagebuf_iodone(pb, 0);
+
+       pagebuf_rele(pb);
+       return 0;
+}
+
+/*
+ *     pagebuf_iowait
+ *
+ *     pagebuf_iowait waits for I/O to complete on the buffer supplied.
+ *     It returns immediately if no I/O is pending.  In any case, it returns
+ *     the error code, if any, or 0 if there is no error.
+ */
+int
+pagebuf_iowait(
+       xfs_buf_t               *pb)
+{
+       PB_TRACE(pb, "iowait", 0);
+       if (atomic_read(&pb->pb_io_remaining))
+               blk_run_address_space(pb->pb_target->pbr_mapping);
+       down(&pb->pb_iodonesema);
+       PB_TRACE(pb, "iowaited", (long)pb->pb_error);
+       return pb->pb_error;
+}
+
+caddr_t
+pagebuf_offset(
+       xfs_buf_t               *pb,
+       size_t                  offset)
+{
+       struct page             *page;
+
+       offset += pb->pb_offset;
+
+       page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT];
+       return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));
+}
+
+/*
+ *     pagebuf_iomove
+ *
+ *     Move data into or out of a buffer.
+ */
+void
+pagebuf_iomove(
+       xfs_buf_t               *pb,    /* buffer to process            */
+       size_t                  boff,   /* starting buffer offset       */
+       size_t                  bsize,  /* length to copy               */
+       caddr_t                 data,   /* data address                 */
+       page_buf_rw_t           mode)   /* read/write flag              */
+{
+       size_t                  bend, cpoff, csize;
+       struct page             *page;
+
+       bend = boff + bsize;
+       while (boff < bend) {
+               page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)];
+               cpoff = page_buf_poff(boff + pb->pb_offset);
+               csize = min_t(size_t,
+                             PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff);
+
+               ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
+
+               switch (mode) {
+               case PBRW_ZERO:
+                       memset(page_address(page) + cpoff, 0, csize);
+                       break;
+               case PBRW_READ:
+                       memcpy(data, page_address(page) + cpoff, csize);
+                       break;
+               case PBRW_WRITE:
+                       memcpy(page_address(page) + cpoff, data, csize);
+               }
+
+               boff += csize;
+               data += csize;
+       }
+}
+
+/*
+ *     Handling of buftargs.
+ */
+
+void
+xfs_free_buftarg(
+       xfs_buftarg_t           *btp,
+       int                     external)
+{
+       xfs_flush_buftarg(btp, 1);
+       if (external)
+               xfs_blkdev_put(btp->pbr_bdev);
+       kmem_free(btp, sizeof(*btp));
+}
+
+void
+xfs_incore_relse(
+       xfs_buftarg_t           *btp,
+       int                     delwri_only,
+       int                     wait)
+{
+       invalidate_bdev(btp->pbr_bdev, 1);
+       truncate_inode_pages(btp->pbr_mapping, 0LL);
+}
+
+void
+xfs_setsize_buftarg(
+       xfs_buftarg_t           *btp,
+       unsigned int            blocksize,
+       unsigned int            sectorsize)
+{
+       btp->pbr_bsize = blocksize;
+       btp->pbr_sshift = ffs(sectorsize) - 1;
+       btp->pbr_smask = sectorsize - 1;
+
+       if (set_blocksize(btp->pbr_bdev, sectorsize)) {
+               printk(KERN_WARNING
+                       "XFS: Cannot set_blocksize to %u on device %s\n",
+                       sectorsize, XFS_BUFTARG_NAME(btp));
+       }
+}
+
+xfs_buftarg_t *
+xfs_alloc_buftarg(
+       struct block_device     *bdev)
+{
+       xfs_buftarg_t           *btp;
+
+       btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+
+       btp->pbr_dev =  bdev->bd_dev;
+       btp->pbr_bdev = bdev;
+       btp->pbr_mapping = bdev->bd_inode->i_mapping;
+       xfs_setsize_buftarg(btp, PAGE_CACHE_SIZE, bdev_hardsect_size(bdev));
+
+       return btp;
+}
+
+
+/*
+ * Pagebuf delayed write buffer handling
+ */
+
+STATIC LIST_HEAD(pbd_delwrite_queue);
+STATIC spinlock_t pbd_delwrite_lock = SPIN_LOCK_UNLOCKED;
+
+STATIC void
+pagebuf_delwri_queue(
+       xfs_buf_t               *pb,
+       int                     unlock)
+{
+       PB_TRACE(pb, "delwri_q", (long)unlock);
+       ASSERT(pb->pb_flags & PBF_DELWRI);
+
+       spin_lock(&pbd_delwrite_lock);
+       /* If already in the queue, dequeue and place at tail */
+       if (!list_empty(&pb->pb_list)) {
+               if (unlock) {
+                       atomic_dec(&pb->pb_hold);
+               }
+               list_del(&pb->pb_list);
+       }
+
+       list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
+       pb->pb_queuetime = jiffies;
+       spin_unlock(&pbd_delwrite_lock);
+
+       if (unlock)
+               pagebuf_unlock(pb);
+}
+
+void
+pagebuf_delwri_dequeue(
+       xfs_buf_t               *pb)
+{
+       PB_TRACE(pb, "delwri_uq", 0);
+       spin_lock(&pbd_delwrite_lock);
+       list_del_init(&pb->pb_list);
+       pb->pb_flags &= ~PBF_DELWRI;
+       spin_unlock(&pbd_delwrite_lock);
+}
+
+STATIC void
+pagebuf_runall_queues(
+       struct workqueue_struct *queue)
+{
+       flush_workqueue(queue);
+}
+
+/* Defines for pagebuf daemon */
+STATIC DECLARE_COMPLETION(pagebuf_daemon_done);
+STATIC struct task_struct *pagebuf_daemon_task;
+STATIC int pagebuf_daemon_active;
+STATIC int force_flush;
+
+STATIC void
+pagebuf_daemon_wakeup(void)
+{
+       force_flush = 1;
+       barrier();
+       wake_up_process(pagebuf_daemon_task);
+}
+
+STATIC int
+pagebuf_daemon(
+       void                    *data)
+{
+       struct list_head        tmp;
+       xfs_buf_t               *pb, *n;
+
+       /*  Set up the thread  */
+       daemonize("xfsbufd");
+       current->flags |= PF_MEMALLOC;
+
+       pagebuf_daemon_task = current;
+       pagebuf_daemon_active = 1;
+       barrier();
+
+       INIT_LIST_HEAD(&tmp);
+       do {
+               /* swsusp */
+               if (current->flags & PF_FREEZE)
+                       refrigerator(PF_FREEZE);
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(xfs_flush_interval);
+
+               spin_lock(&pbd_delwrite_lock);
+               list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
+                       PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
+                       ASSERT(pb->pb_flags & PBF_DELWRI);
+
+                       if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
+                               if (!force_flush &&
+                                   time_before(jiffies,
+                                               pb->pb_queuetime +
+                                               xfs_age_buffer)) {
+                                       pagebuf_unlock(pb);
+                                       break;
+                               }
+
+                               pb->pb_flags &= ~PBF_DELWRI;
+                               pb->pb_flags |= PBF_WRITE;
+                               list_move(&pb->pb_list, &tmp);
+                       }
+               }
+               spin_unlock(&pbd_delwrite_lock);
+
+               while (!list_empty(&tmp)) {
+                       pb = list_entry(tmp.next, xfs_buf_t, pb_list);
+                       list_del_init(&pb->pb_list);
+                       pagebuf_iostrategy(pb);
+                       blk_run_address_space(pb->pb_target->pbr_mapping);
+               }
+
+               if (as_list_len > 0)
+                       purge_addresses();
+
+               force_flush = 0;
+       } while (pagebuf_daemon_active);
+
+       complete_and_exit(&pagebuf_daemon_done, 0);
+}
+
+/*
+ * Go through all incore buffers, and release buffers if they belong to
+ * the given device. This is used in filesystem error handling to
+ * preserve the consistency of its metadata.
+ */
+int
+xfs_flush_buftarg(
+       xfs_buftarg_t           *target,
+       int                     wait)
+{
+       struct list_head        tmp;
+       xfs_buf_t               *pb, *n;
+       int                     pincount = 0;
+
+       pagebuf_runall_queues(pagebuf_dataio_workqueue);
+       pagebuf_runall_queues(pagebuf_logio_workqueue);
+
+       INIT_LIST_HEAD(&tmp);
+       spin_lock(&pbd_delwrite_lock);
+       list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
+
+               if (pb->pb_target != target)
+                       continue;
+
+               ASSERT(pb->pb_flags & PBF_DELWRI);
+               PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
+               if (pagebuf_ispin(pb)) {
+                       pincount++;
+                       continue;
+               }
+
+               pb->pb_flags &= ~PBF_DELWRI;
+               pb->pb_flags |= PBF_WRITE;
+               list_move(&pb->pb_list, &tmp);
+       }
+       spin_unlock(&pbd_delwrite_lock);
+
+       /*
+        * Dropped the delayed write list lock, now walk the temporary list
+        */
+       list_for_each_entry_safe(pb, n, &tmp, pb_list) {
+               if (wait)
+                       pb->pb_flags &= ~PBF_ASYNC;
+               else
+                       list_del_init(&pb->pb_list);
+
+               pagebuf_lock(pb);
+               pagebuf_iostrategy(pb);
+       }
+
+       /*
+        * Remaining list items must be flushed before returning
+        */
+       while (!list_empty(&tmp)) {
+               pb = list_entry(tmp.next, xfs_buf_t, pb_list);
+
+               list_del_init(&pb->pb_list);
+               xfs_iowait(pb);
+               xfs_buf_relse(pb);
+       }
+
+       if (wait)
+               blk_run_address_space(target->pbr_mapping);
+
+       return pincount;
+}
+
+STATIC int
+pagebuf_daemon_start(void)
+{
+       int             rval;
+
+       pagebuf_logio_workqueue = create_workqueue("xfslogd");
+       if (!pagebuf_logio_workqueue)
+               return -ENOMEM;
+
+       pagebuf_dataio_workqueue = create_workqueue("xfsdatad");
+       if (!pagebuf_dataio_workqueue) {
+               destroy_workqueue(pagebuf_logio_workqueue);
+               return -ENOMEM;
+       }
+
+       rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES);
+       if (rval < 0) {
+               destroy_workqueue(pagebuf_logio_workqueue);
+               destroy_workqueue(pagebuf_dataio_workqueue);
+       }
+
+       return rval;
+}
+
+/*
+ * pagebuf_daemon_stop
+ *
+ * Note: do not mark as __exit, it is called from pagebuf_terminate.
+ */
+STATIC void
+pagebuf_daemon_stop(void)
+{
+       pagebuf_daemon_active = 0;
+       barrier();
+       wait_for_completion(&pagebuf_daemon_done);
+
+       destroy_workqueue(pagebuf_logio_workqueue);
+       destroy_workqueue(pagebuf_dataio_workqueue);
+}
+
+/*
+ *     Initialization and Termination
+ */
+
+int __init
+pagebuf_init(void)
+{
+       int                     i;
+
+       pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0,
+                       SLAB_HWCACHE_ALIGN, NULL, NULL);
+       if (pagebuf_cache == NULL) {
+               printk("pagebuf: couldn't init pagebuf cache\n");
+               pagebuf_terminate();
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < NHASH; i++) {
+               spin_lock_init(&pbhash[i].pb_hash_lock);
+               INIT_LIST_HEAD(&pbhash[i].pb_hash);
+       }
+
+#ifdef PAGEBUF_TRACE
+       pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
+#endif
+
+       pagebuf_daemon_start();
+       return 0;
+}
+
+
+/*
+ *     pagebuf_terminate.
+ *
+ *     Note: do not mark as __exit, this is also called from the __init code.
+ */
+void
+pagebuf_terminate(void)
+{
+       pagebuf_daemon_stop();
+
+#ifdef PAGEBUF_TRACE
+       ktrace_free(pagebuf_trace_buf);
+#endif
+
+       kmem_cache_destroy(pagebuf_cache);
+}
diff --git a/fs/xfs/linux/xfs_buf.h b/fs/xfs/linux/xfs_buf.h
new file mode 100644 (file)
index 0000000..f97e6c0
--- /dev/null
@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Written by Steve Lord, Jim Mostek, Russell Cattelan at SGI
+ */
+
+#ifndef __XFS_BUF_H__
+#define __XFS_BUF_H__
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/uio.h>
+
+/*
+ *     Base types
+ */
+
+#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
+
+#define page_buf_ctob(pp)      ((pp) * PAGE_CACHE_SIZE)
+#define page_buf_btoc(dd)      (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
+#define page_buf_btoct(dd)     ((dd) >> PAGE_CACHE_SHIFT)
+#define page_buf_poff(aa)      ((aa) & ~PAGE_CACHE_MASK)
+
+typedef enum page_buf_rw_e {
+       PBRW_READ = 1,                  /* transfer into target memory */
+       PBRW_WRITE = 2,                 /* transfer from target memory */
+       PBRW_ZERO = 3                   /* Zero target memory */
+} page_buf_rw_t;
+
+
+typedef enum page_buf_flags_e {                /* pb_flags values */
+       PBF_READ = (1 << 0),    /* buffer intended for reading from device */
+       PBF_WRITE = (1 << 1),   /* buffer intended for writing to device   */
+       PBF_MAPPED = (1 << 2),  /* buffer mapped (pb_addr valid)           */
+       PBF_PARTIAL = (1 << 3), /* buffer partially read                   */
+       PBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
+       PBF_NONE = (1 << 5),    /* buffer not read at all                  */
+       PBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
+       PBF_STALE = (1 << 7),   /* buffer has been staled, do not find it  */
+       PBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
+       PBF_FS_DATAIOD = (1 << 9),  /* schedule IO completion on fs datad  */
+       PBF_FORCEIO = (1 << 10),    /* ignore any cache state              */
+       PBF_FLUSH = (1 << 11),      /* flush disk write cache              */
+       PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead             */
+
+       /* flags used only as arguments to access routines */
+       PBF_LOCK = (1 << 14),       /* lock requested                      */
+       PBF_TRYLOCK = (1 << 15),    /* lock requested, but do not wait     */
+       PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread      */
+
+       /* flags used only internally */
+       _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache                 */
+       _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()              */
+       _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue         */
+} page_buf_flags_t;
+
+#define PBF_UPDATE (PBF_READ | PBF_WRITE)
+#define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0)
+#define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0)
+
+typedef struct xfs_buftarg {
+       dev_t                   pbr_dev;
+       struct block_device     *pbr_bdev;
+       struct address_space    *pbr_mapping;
+       unsigned int            pbr_bsize;
+       unsigned int            pbr_sshift;
+       size_t                  pbr_smask;
+} xfs_buftarg_t;
+
+/*
+ *     xfs_buf_t:  Buffer structure for page cache-based buffers
+ *
+ * This buffer structure is used by the page cache buffer management routines
+ * to refer to an assembly of pages forming a logical buffer.  The actual
+ * I/O is performed with buffer_head or bio structures, as required by drivers,
+ * for drivers which do not understand this structure.  The buffer structure is
+ * used on temporary basis only, and discarded when released.
+ *
+ * The real data storage is recorded in the page cache.  Metadata is
+ * hashed to the inode for the block device on which the file system resides.
+ * File data is hashed to the inode for the file.  Pages which are only
+ * partially filled with data have bits set in their block_map entry
+ * to indicate which disk blocks in the page are not valid.
+ */
+
+struct xfs_buf;
+typedef void (*page_buf_iodone_t)(struct xfs_buf *);
+                       /* call-back function on I/O completion */
+typedef void (*page_buf_relse_t)(struct xfs_buf *);
+                       /* call-back function on I/O completion */
+typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);
+
+#define PB_PAGES       4
+
+typedef struct xfs_buf {
+       struct semaphore        pb_sema;        /* semaphore for lockables  */
+       unsigned long           pb_queuetime;   /* time buffer was queued   */
+       atomic_t                pb_pin_count;   /* pin count                */
+       wait_queue_head_t       pb_waiters;     /* unpin waiters            */
+       struct list_head        pb_list;
+       page_buf_flags_t        pb_flags;       /* status flags */
+       struct list_head        pb_hash_list;
+       xfs_buftarg_t           *pb_target;     /* logical object */
+       atomic_t                pb_hold;        /* reference count */
+       xfs_daddr_t             pb_bn;          /* block number for I/O */
+       loff_t                  pb_file_offset; /* offset in file */
+       size_t                  pb_buffer_length; /* size of buffer in bytes */
+       size_t                  pb_count_desired; /* desired transfer size */
+       void                    *pb_addr;       /* virtual address of buffer */
+       struct work_struct      pb_iodone_work;
+       atomic_t                pb_io_remaining;/* #outstanding I/O requests */
+       page_buf_iodone_t       pb_iodone;      /* I/O completion function */
+       page_buf_relse_t        pb_relse;       /* releasing function */
+       page_buf_bdstrat_t      pb_strat;       /* pre-write function */
+       struct semaphore        pb_iodonesema;  /* Semaphore for I/O waiters */
+       void                    *pb_fspriv;
+       void                    *pb_fspriv2;
+       void                    *pb_fspriv3;
+       unsigned short          pb_error;       /* error code on I/O */
+       unsigned short          pb_page_count;  /* size of page array */
+       unsigned short          pb_offset;      /* page offset in first page */
+       unsigned char           pb_locked;      /* page array is locked */
+       unsigned char           pb_hash_index;  /* hash table index     */
+       struct page             **pb_pages;     /* array of page pointers */
+       struct page             *pb_page_array[PB_PAGES]; /* inline pages */
+#ifdef PAGEBUF_LOCK_TRACKING
+       int                     pb_last_holder;
+#endif
+} xfs_buf_t;
+
+
+/* Finding and Reading Buffers */
+
+extern xfs_buf_t *pagebuf_find(        /* find buffer for block if     */
+                                       /* the block is in memory       */
+               xfs_buftarg_t *,        /* inode for block              */
+               loff_t,                 /* starting offset of range     */
+               size_t,                 /* length of range              */
+               page_buf_flags_t);      /* PBF_LOCK                     */
+
+extern xfs_buf_t *pagebuf_get(         /* allocate a buffer            */
+               xfs_buftarg_t *,        /* inode for buffer             */
+               loff_t,                 /* starting offset of range     */
+               size_t,                 /* length of range              */
+               page_buf_flags_t);      /* PBF_LOCK, PBF_READ,          */
+                                       /* PBF_ASYNC                    */
+
+extern xfs_buf_t *pagebuf_lookup(
+               xfs_buftarg_t *,
+               loff_t,                 /* starting offset of range     */
+               size_t,                 /* length of range              */
+               page_buf_flags_t);      /* PBF_READ, PBF_WRITE,         */
+                                       /* PBF_FORCEIO,                 */
+
+extern xfs_buf_t *pagebuf_get_empty(   /* allocate pagebuf struct with */
+                                       /*  no memory or disk address   */
+               size_t len,
+               xfs_buftarg_t *);       /* mount point "fake" inode     */
+
+extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct      */
+                                       /* without disk address         */
+               size_t len,
+               xfs_buftarg_t *);       /* mount point "fake" inode     */
+
+extern int pagebuf_associate_memory(
+               xfs_buf_t *,
+               void *,
+               size_t);
+
+extern void pagebuf_hold(              /* increment reference count    */
+               xfs_buf_t *);           /* buffer to hold               */
+
+extern void pagebuf_readahead(         /* read ahead into cache        */
+               xfs_buftarg_t  *,       /* target for buffer (or NULL)  */
+               loff_t,                 /* starting offset of range     */
+               size_t,                 /* length of range              */
+               page_buf_flags_t);      /* additional read flags        */
+
+/* Releasing Buffers */
+
+extern void pagebuf_free(              /* deallocate a buffer          */
+               xfs_buf_t *);           /* buffer to deallocate         */
+
+extern void pagebuf_rele(              /* release hold on a buffer     */
+               xfs_buf_t *);           /* buffer to release            */
+
+/* Locking and Unlocking Buffers */
+
+extern int pagebuf_cond_lock(          /* lock buffer, if not locked   */
+                                       /* (returns -EBUSY if locked)   */
+               xfs_buf_t *);           /* buffer to lock               */
+
+extern int pagebuf_lock_value(         /* return count on lock         */
+               xfs_buf_t *);          /* buffer to check              */
+
+extern int pagebuf_lock(               /* lock buffer                  */
+               xfs_buf_t *);          /* buffer to lock               */
+
+extern void pagebuf_unlock(            /* unlock buffer                */
+               xfs_buf_t *);           /* buffer to unlock             */
+
+/* Buffer Read and Write Routines */
+
+extern void pagebuf_iodone(            /* mark buffer I/O complete     */
+               xfs_buf_t *,            /* buffer to mark               */
+               int,                    /* use data/log helper thread.  */
+               int);                   /* run completion locally, or in
+                                        * a helper thread.             */
+
+extern void pagebuf_ioerror(           /* mark buffer in error (or not) */
+               xfs_buf_t *,            /* buffer to mark               */
+               int);                   /* error to store (0 if none)   */
+
+extern int pagebuf_iostart(            /* start I/O on a buffer        */
+               xfs_buf_t *,            /* buffer to start              */
+               page_buf_flags_t);      /* PBF_LOCK, PBF_ASYNC,         */
+                                       /* PBF_READ, PBF_WRITE,         */
+                                       /* PBF_DELWRI                   */
+
+extern int pagebuf_iorequest(          /* start real I/O               */
+               xfs_buf_t *);           /* buffer to convey to device   */
+
+extern int pagebuf_iowait(             /* wait for buffer I/O done     */
+               xfs_buf_t *);           /* buffer to wait on            */
+
+extern void pagebuf_iomove(            /* move data in/out of pagebuf  */
+               xfs_buf_t *,            /* buffer to manipulate         */
+               size_t,                 /* starting buffer offset       */
+               size_t,                 /* length in buffer             */
+               caddr_t,                /* data pointer                 */
+               page_buf_rw_t);         /* direction                    */
+
+static inline int pagebuf_iostrategy(xfs_buf_t *pb)
+{
+       return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
+}
+
+static inline int pagebuf_geterror(xfs_buf_t *pb)
+{
+       return pb ? pb->pb_error : ENOMEM;
+}
+
+/* Buffer Utility Routines */
+
+extern caddr_t pagebuf_offset(         /* pointer at offset in buffer  */
+               xfs_buf_t *,            /* buffer to offset into        */
+               size_t);                /* offset                       */
+
+/* Pinning Buffer Storage in Memory */
+
+extern void pagebuf_pin(               /* pin buffer in memory         */
+               xfs_buf_t *);           /* buffer to pin                */
+
+extern void pagebuf_unpin(             /* unpin buffered data          */
+               xfs_buf_t *);           /* buffer to unpin              */
+
+extern int pagebuf_ispin(              /* check if buffer is pinned    */
+               xfs_buf_t *);           /* buffer to check              */
+
+/* Delayed Write Buffer Routines */
+
+extern void pagebuf_delwri_dequeue(xfs_buf_t *);
+
+/* Buffer Daemon Setup Routines */
+
+extern int pagebuf_init(void);
+extern void pagebuf_terminate(void);
+
+
+#ifdef PAGEBUF_TRACE
+extern ktrace_t *pagebuf_trace_buf;
+extern void pagebuf_trace(
+               xfs_buf_t *,            /* buffer being traced          */
+               char *,                 /* description of operation     */
+               void *,                 /* arbitrary diagnostic value   */
+               void *);                /* return address               */
+#else
+# define pagebuf_trace(pb, id, ptr, ra)        do { } while (0)
+#endif
+
+#define pagebuf_target_name(target)    \
+       ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; })
+
+
+
+
+
+/* These are just for xfs_syncsub... it sets an internal variable
+ * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
+ */
+#define XFS_B_ASYNC            PBF_ASYNC
+#define XFS_B_DELWRI           PBF_DELWRI
+#define XFS_B_READ             PBF_READ
+#define XFS_B_WRITE            PBF_WRITE
+#define XFS_B_STALE            PBF_STALE
+
+#define XFS_BUF_TRYLOCK                PBF_TRYLOCK
+#define XFS_INCORE_TRYLOCK     PBF_TRYLOCK
+#define XFS_BUF_LOCK           PBF_LOCK
+#define XFS_BUF_MAPPED         PBF_MAPPED
+
+#define BUF_BUSY               PBF_DONT_BLOCK
+
+#define XFS_BUF_BFLAGS(x)      ((x)->pb_flags)
+#define XFS_BUF_ZEROFLAGS(x)   \
+       ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI))
+
+#define XFS_BUF_STALE(x)       ((x)->pb_flags |= XFS_B_STALE)
+#define XFS_BUF_UNSTALE(x)     ((x)->pb_flags &= ~XFS_B_STALE)
+#define XFS_BUF_ISSTALE(x)     ((x)->pb_flags & XFS_B_STALE)
+#define XFS_BUF_SUPER_STALE(x) do {                            \
+                                       XFS_BUF_STALE(x);       \
+                                       xfs_buf_undelay(x);     \
+                                       XFS_BUF_DONE(x);        \
+                               } while (0)
+
+#define XFS_BUF_MANAGE         PBF_FS_MANAGED
+#define XFS_BUF_UNMANAGE(x)    ((x)->pb_flags &= ~PBF_FS_MANAGED)
+
+static inline void xfs_buf_undelay(xfs_buf_t *pb)
+{
+       if (pb->pb_flags & PBF_DELWRI) {
+               if (pb->pb_list.next != &pb->pb_list) {
+                       pagebuf_delwri_dequeue(pb);
+                       pagebuf_rele(pb);
+               } else {
+                       pb->pb_flags &= ~PBF_DELWRI;
+               }
+       }
+}
+
+#define XFS_BUF_DELAYWRITE(x)   ((x)->pb_flags |= PBF_DELWRI)
+#define XFS_BUF_UNDELAYWRITE(x)         xfs_buf_undelay(x)
+#define XFS_BUF_ISDELAYWRITE(x)         ((x)->pb_flags & PBF_DELWRI)
+
+#define XFS_BUF_ERROR(x,no)     pagebuf_ioerror(x,no)
+#define XFS_BUF_GETERROR(x)     pagebuf_geterror(x)
+#define XFS_BUF_ISERROR(x)      (pagebuf_geterror(x)?1:0)
+
+#define XFS_BUF_DONE(x)                 ((x)->pb_flags &= ~(PBF_PARTIAL|PBF_NONE))
+#define XFS_BUF_UNDONE(x)       ((x)->pb_flags |= PBF_PARTIAL|PBF_NONE)
+#define XFS_BUF_ISDONE(x)       (!(PBF_NOT_DONE(x)))
+
+#define XFS_BUF_BUSY(x)                 ((x)->pb_flags |= PBF_FORCEIO)
+#define XFS_BUF_UNBUSY(x)       ((x)->pb_flags &= ~PBF_FORCEIO)
+#define XFS_BUF_ISBUSY(x)       (1)
+
+#define XFS_BUF_ASYNC(x)        ((x)->pb_flags |= PBF_ASYNC)
+#define XFS_BUF_UNASYNC(x)      ((x)->pb_flags &= ~PBF_ASYNC)
+#define XFS_BUF_ISASYNC(x)      ((x)->pb_flags & PBF_ASYNC)
+
+#define XFS_BUF_FLUSH(x)        ((x)->pb_flags |= PBF_FLUSH)
+#define XFS_BUF_UNFLUSH(x)      ((x)->pb_flags &= ~PBF_FLUSH)
+#define XFS_BUF_ISFLUSH(x)      ((x)->pb_flags & PBF_FLUSH)
+
+#define XFS_BUF_SHUT(x)                 printk("XFS_BUF_SHUT not implemented yet\n")
+#define XFS_BUF_UNSHUT(x)       printk("XFS_BUF_UNSHUT not implemented yet\n")
+#define XFS_BUF_ISSHUT(x)       (0)
+
+#define XFS_BUF_HOLD(x)                pagebuf_hold(x)
+#define XFS_BUF_READ(x)                ((x)->pb_flags |= PBF_READ)
+#define XFS_BUF_UNREAD(x)      ((x)->pb_flags &= ~PBF_READ)
+#define XFS_BUF_ISREAD(x)      ((x)->pb_flags & PBF_READ)
+
+#define XFS_BUF_WRITE(x)       ((x)->pb_flags |= PBF_WRITE)
+#define XFS_BUF_UNWRITE(x)     ((x)->pb_flags &= ~PBF_WRITE)
+#define XFS_BUF_ISWRITE(x)     ((x)->pb_flags & PBF_WRITE)
+
+#define XFS_BUF_ISUNINITIAL(x)  (0)
+#define XFS_BUF_UNUNINITIAL(x)  (0)
+
+#define XFS_BUF_BP_ISMAPPED(bp)         1
+
+#define XFS_BUF_DATAIO(x)      ((x)->pb_flags |= PBF_FS_DATAIOD)
+#define XFS_BUF_UNDATAIO(x)    ((x)->pb_flags &= ~PBF_FS_DATAIOD)
+
+#define XFS_BUF_IODONE_FUNC(buf)       (buf)->pb_iodone
+#define XFS_BUF_SET_IODONE_FUNC(buf, func)     \
+                       (buf)->pb_iodone = (func)
+#define XFS_BUF_CLR_IODONE_FUNC(buf)           \
+                       (buf)->pb_iodone = NULL
+#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func)    \
+                       (buf)->pb_strat = (func)
+#define XFS_BUF_CLR_BDSTRAT_FUNC(buf)          \
+                       (buf)->pb_strat = NULL
+
+#define XFS_BUF_FSPRIVATE(buf, type)           \
+                       ((type)(buf)->pb_fspriv)
+#define XFS_BUF_SET_FSPRIVATE(buf, value)      \
+                       (buf)->pb_fspriv = (void *)(value)
+#define XFS_BUF_FSPRIVATE2(buf, type)          \
+                       ((type)(buf)->pb_fspriv2)
+#define XFS_BUF_SET_FSPRIVATE2(buf, value)     \
+                       (buf)->pb_fspriv2 = (void *)(value)
+#define XFS_BUF_FSPRIVATE3(buf, type)          \
+                       ((type)(buf)->pb_fspriv3)
+#define XFS_BUF_SET_FSPRIVATE3(buf, value)     \
+                       (buf)->pb_fspriv3  = (void *)(value)
+#define XFS_BUF_SET_START(buf)
+
+#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \
+                       (buf)->pb_relse = (value)
+
+#define XFS_BUF_PTR(bp)                (xfs_caddr_t)((bp)->pb_addr)
+
+extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
+{
+       if (bp->pb_flags & PBF_MAPPED)
+               return XFS_BUF_PTR(bp) + offset;
+       return (xfs_caddr_t) pagebuf_offset(bp, offset);
+}
+
+#define XFS_BUF_SET_PTR(bp, val, count)                \
+                               pagebuf_associate_memory(bp, val, count)
+#define XFS_BUF_ADDR(bp)       ((bp)->pb_bn)
+#define XFS_BUF_SET_ADDR(bp, blk)              \
+                       ((bp)->pb_bn = (blk))
+#define XFS_BUF_OFFSET(bp)     ((bp)->pb_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off)            \
+                       ((bp)->pb_file_offset = (off))
+#define XFS_BUF_COUNT(bp)      ((bp)->pb_count_desired)
+#define XFS_BUF_SET_COUNT(bp, cnt)             \
+                       ((bp)->pb_count_desired = (cnt))
+#define XFS_BUF_SIZE(bp)       ((bp)->pb_buffer_length)
+#define XFS_BUF_SET_SIZE(bp, cnt)              \
+                       ((bp)->pb_buffer_length = (cnt))
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_SET_REF(bp, ref)
+
+#define XFS_BUF_ISPINNED(bp)   pagebuf_ispin(bp)
+
+#define XFS_BUF_VALUSEMA(bp)   pagebuf_lock_value(bp)
+#define XFS_BUF_CPSEMA(bp)     (pagebuf_cond_lock(bp) == 0)
+#define XFS_BUF_VSEMA(bp)      pagebuf_unlock(bp)
+#define XFS_BUF_PSEMA(bp,x)    pagebuf_lock(bp)
+#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema);
+
+/* setup the buffer target from a buftarg structure */
+#define XFS_BUF_SET_TARGET(bp, target) \
+               (bp)->pb_target = (target)
+#define XFS_BUF_TARGET(bp)     ((bp)->pb_target)
+#define XFS_BUFTARG_NAME(target)       \
+               pagebuf_target_name(target)
+
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_SET_REF(bp, ref)
+
+#define xfs_buf_read(target, blkno, len, flags) \
+               pagebuf_get((target), (blkno), (len), \
+                       PBF_LOCK | PBF_READ | PBF_MAPPED)
+#define xfs_buf_get(target, blkno, len, flags) \
+               pagebuf_get((target), (blkno), (len), \
+                       PBF_LOCK | PBF_MAPPED)
+
+#define xfs_buf_read_flags(target, blkno, len, flags) \
+               pagebuf_get((target), (blkno), (len), PBF_READ | (flags))
+#define xfs_buf_get_flags(target, blkno, len, flags) \
+               pagebuf_get((target), (blkno), (len), (flags))
+
+static inline int      xfs_bawrite(void *mp, xfs_buf_t *bp)
+{
+       bp->pb_fspriv3 = mp;
+       bp->pb_strat = xfs_bdstrat_cb;
+       xfs_buf_undelay(bp);
+       return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES);
+}
+
+static inline void     xfs_buf_relse(xfs_buf_t *bp)
+{
+       if (!bp->pb_relse)
+               pagebuf_unlock(bp);
+       pagebuf_rele(bp);
+}
+
+#define xfs_bpin(bp)           pagebuf_pin(bp)
+#define xfs_bunpin(bp)         pagebuf_unpin(bp)
+
+#define xfs_buftrace(id, bp)   \
+           pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
+
+#define xfs_biodone(pb)                    \
+           pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), 0)
+
+#define xfs_incore(buftarg,blkno,len,lockit) \
+           pagebuf_find(buftarg, blkno ,len, lockit)
+
+
+#define xfs_biomove(pb, off, len, data, rw) \
+           pagebuf_iomove((pb), (off), (len), (data), \
+               ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ)
+
+#define xfs_biozero(pb, off, len) \
+           pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO)
+
+
+static inline int      XFS_bwrite(xfs_buf_t *pb)
+{
+       int     iowait = (pb->pb_flags & PBF_ASYNC) == 0;
+       int     error = 0;
+
+       if (!iowait)
+               pb->pb_flags |= _PBF_RUN_QUEUES;
+
+       xfs_buf_undelay(pb);
+       pagebuf_iostrategy(pb);
+       if (iowait) {
+               error = pagebuf_iowait(pb);
+               xfs_buf_relse(pb);
+       }
+       return error;
+}
+
+#define XFS_bdwrite(pb)                     \
+           pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
+
+static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
+{
+       bp->pb_strat = xfs_bdstrat_cb;
+       bp->pb_fspriv3 = mp;
+
+       return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);
+}
+
+#define XFS_bdstrat(bp) pagebuf_iorequest(bp)
+
+#define xfs_iowait(pb) pagebuf_iowait(pb)
+
+#define xfs_baread(target, rablkno, ralen)  \
+       pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK)
+
+#define xfs_buf_get_empty(len, target) pagebuf_get_empty((len), (target))
+#define xfs_buf_get_noaddr(len, target)        pagebuf_get_no_daddr((len), (target))
+#define xfs_buf_free(bp)               pagebuf_free(bp)
+
+
+/*
+ *     Handling of buftargs.
+ */
+
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *);
+extern void xfs_free_buftarg(xfs_buftarg_t *, int);
+extern void xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
+extern void xfs_incore_relse(xfs_buftarg_t *, int, int);
+extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
+
+#define xfs_getsize_buftarg(buftarg) \
+       block_size((buftarg)->pbr_bdev)
+#define xfs_readonly_buftarg(buftarg) \
+       bdev_read_only((buftarg)->pbr_bdev)
+#define xfs_binval(buftarg) \
+       xfs_flush_buftarg(buftarg, 1)
+#define XFS_bflush(buftarg) \
+       xfs_flush_buftarg(buftarg, 1)
+
+#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux/xfs_cred.h b/fs/xfs/linux/xfs_cred.h
new file mode 100644 (file)
index 0000000..00c4584
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_CRED_H__
+#define __XFS_CRED_H__
+
+/*
+ * Credentials
+ */
+typedef struct cred {
+       /* EMPTY */
+} cred_t;
+
+extern struct cred *sys_cred;
+
+/* this is a hack.. (assums sys_cred is the only cred_t in the system) */
+static __inline int capable_cred(cred_t *cr, int cid)
+{
+       return (cr == sys_cred) ? 1 : capable(cid);
+}
+
+#endif  /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux/xfs_file.c b/fs/xfs/linux/xfs_file.c
new file mode 100644 (file)
index 0000000..8d9f3b5
--- /dev/null
@@ -0,0 +1,546 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_trans.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_rw.h"
+
+#include <linux/dcache.h>
+
+static struct vm_operations_struct linvfs_file_vm_ops;
+
+
+STATIC inline ssize_t
+__linvfs_read(
+       struct kiocb            *iocb,
+       char __user             *buf,
+       int                     ioflags,
+       size_t                  count,
+       loff_t                  pos)
+{
+       struct iovec            iov = {buf, count};
+       struct file             *file = iocb->ki_filp;
+       vnode_t                 *vp = LINVFS_GET_VP(file->f_dentry->d_inode);
+       ssize_t                 rval;
+
+       BUG_ON(iocb->ki_pos != pos);
+
+       if (unlikely(file->f_flags & O_DIRECT))
+               ioflags |= IO_ISDIRECT;
+       VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+       return rval;
+}
+
+
+STATIC ssize_t
+linvfs_read(
+       struct kiocb            *iocb,
+       char __user             *buf,
+       size_t                  count,
+       loff_t                  pos)
+{
+       return __linvfs_read(iocb, buf, 0, count, pos);
+}
+
+STATIC ssize_t
+linvfs_read_invis(
+       struct kiocb            *iocb,
+       char __user             *buf,
+       size_t                  count,
+       loff_t                  pos)
+{
+       return __linvfs_read(iocb, buf, IO_INVIS, count, pos);
+}
+
+
+STATIC inline ssize_t
+__linvfs_write(
+       struct kiocb    *iocb,
+       const char      *buf,
+       int             ioflags,
+       size_t          count,
+       loff_t          pos)
+{
+       struct iovec    iov = {(void *)buf, count};
+       struct file     *file = iocb->ki_filp;
+       struct inode    *inode = file->f_mapping->host;
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+       ssize_t         rval;
+
+       BUG_ON(iocb->ki_pos != pos);
+       if (unlikely(file->f_flags & O_DIRECT)) {
+               ioflags |= IO_ISDIRECT;
+               VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos,
+                               ioflags, NULL, rval);
+       } else {
+               down(&inode->i_sem);
+               VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos,
+                               ioflags, NULL, rval);
+               up(&inode->i_sem);
+       }
+
+       return rval;
+}
+
+
+STATIC ssize_t
+linvfs_write(
+       struct kiocb            *iocb,
+       const char __user       *buf,
+       size_t                  count,
+       loff_t                  pos)
+{
+       return __linvfs_write(iocb, buf, 0, count, pos);
+}
+
+STATIC ssize_t
+linvfs_write_invis(
+       struct kiocb            *iocb,
+       const char __user       *buf,
+       size_t                  count,
+       loff_t                  pos)
+{
+       return __linvfs_write(iocb, buf, IO_INVIS, count, pos);
+}
+
+
+STATIC inline ssize_t
+__linvfs_readv(
+       struct file             *file,
+       const struct iovec      *iov,
+       int                     ioflags,
+       unsigned long           nr_segs,
+       loff_t                  *ppos)
+{
+       struct inode    *inode = file->f_mapping->host;
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+       struct          kiocb kiocb;
+       ssize_t         rval;
+
+       init_sync_kiocb(&kiocb, file);
+       kiocb.ki_pos = *ppos;
+
+       if (unlikely(file->f_flags & O_DIRECT))
+               ioflags |= IO_ISDIRECT;
+       VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
+       if (rval == -EIOCBQUEUED)
+               rval = wait_on_sync_kiocb(&kiocb);
+
+       *ppos = kiocb.ki_pos;
+       return rval;
+}
+
+STATIC ssize_t
+linvfs_readv(
+       struct file             *file,
+       const struct iovec      *iov,
+       unsigned long           nr_segs,
+       loff_t                  *ppos)
+{
+       return __linvfs_readv(file, iov, 0, nr_segs, ppos);
+}
+
+STATIC ssize_t
+linvfs_readv_invis(
+       struct file             *file,
+       const struct iovec      *iov,
+       unsigned long           nr_segs,
+       loff_t                  *ppos)
+{
+       return __linvfs_readv(file, iov, IO_INVIS, nr_segs, ppos);
+}
+
+
+STATIC inline ssize_t
+__linvfs_writev(
+       struct file             *file,
+       const struct iovec      *iov,
+       int                     ioflags,
+       unsigned long           nr_segs,
+       loff_t                  *ppos)
+{
+       struct inode    *inode = file->f_mapping->host;
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+       struct          kiocb kiocb;
+       ssize_t         rval;
+
+       init_sync_kiocb(&kiocb, file);
+       kiocb.ki_pos = *ppos;
+       if (unlikely(file->f_flags & O_DIRECT)) {
+               ioflags |= IO_ISDIRECT;
+               VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos,
+                               ioflags, NULL, rval);
+       } else {
+               down(&inode->i_sem);
+               VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos,
+                               ioflags, NULL, rval);
+               up(&inode->i_sem);
+       }
+
+       if (rval == -EIOCBQUEUED)
+               rval = wait_on_sync_kiocb(&kiocb);
+
+       *ppos = kiocb.ki_pos;
+       return rval;
+}
+
+
+STATIC ssize_t
+linvfs_writev(
+       struct file             *file,
+       const struct iovec      *iov,
+       unsigned long           nr_segs,
+       loff_t                  *ppos)
+{
+       return __linvfs_writev(file, iov, 0, nr_segs, ppos);
+}
+
+STATIC ssize_t
+linvfs_writev_invis(
+       struct file             *file,
+       const struct iovec      *iov,
+       unsigned long           nr_segs,
+       loff_t                  *ppos)
+{
+       return __linvfs_writev(file, iov, IO_INVIS, nr_segs, ppos);
+}
+
+STATIC ssize_t
+linvfs_sendfile(
+       struct file             *filp,
+       loff_t                  *ppos,
+       size_t                  count,
+       read_actor_t            actor,
+       void                    *target)
+{
+       vnode_t                 *vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
+       int                     error;
+
+       VOP_SENDFILE(vp, filp, ppos, 0, count, actor, target, NULL, error);
+       return error;
+}
+
+
+STATIC int
+linvfs_open(
+       struct inode    *inode,
+       struct file     *filp)
+{
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+       int             error;
+
+       if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+               return -EFBIG;
+
+       ASSERT(vp);
+       VOP_OPEN(vp, NULL, error);
+       return -error;
+}
+
+
+STATIC int
+linvfs_release(
+       struct inode    *inode,
+       struct file     *filp)
+{
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+       int             error = 0;
+
+       if (vp)
+               VOP_RELEASE(vp, error);
+       return -error;
+}
+
+
+STATIC int
+linvfs_fsync(
+       struct file     *filp,
+       struct dentry   *dentry,
+       int             datasync)
+{
+       struct inode    *inode = dentry->d_inode;
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+       int             error;
+       int             flags = FSYNC_WAIT;
+
+       if (datasync)
+               flags |= FSYNC_DATA;
+
+       ASSERT(vp);
+       VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error);
+       return -error;
+}
+
+/*
+ * linvfs_readdir maps to VOP_READDIR().
+ * We need to build a uio, cred, ...
+ */
+
+#define nextdp(dp)      ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen))
+
+STATIC int
+linvfs_readdir(
+       struct file     *filp,
+       void            *dirent,
+       filldir_t       filldir)
+{
+       int             error = 0;
+       vnode_t         *vp;
+       uio_t           uio;
+       iovec_t         iov;
+       int             eof = 0;
+       caddr_t         read_buf;
+       int             namelen, size = 0;
+       size_t          rlen = PAGE_CACHE_SIZE;
+       xfs_off_t       start_offset, curr_offset;
+       xfs_dirent_t    *dbp = NULL;
+
+       vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
+       ASSERT(vp);
+
+       /* Try fairly hard to get memory */
+       do {
+               if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL)))
+                       break;
+               rlen >>= 1;
+       } while (rlen >= 1024);
+
+       if (read_buf == NULL)
+               return -ENOMEM;
+
+       uio.uio_iov = &iov;
+       uio.uio_segflg = UIO_SYSSPACE;
+       curr_offset = filp->f_pos;
+       if (filp->f_pos != 0x7fffffff)
+               uio.uio_offset = filp->f_pos;
+       else
+               uio.uio_offset = 0xffffffff;
+
+       while (!eof) {
+               uio.uio_resid = iov.iov_len = rlen;
+               iov.iov_base = read_buf;
+               uio.uio_iovcnt = 1;
+
+               start_offset = uio.uio_offset;
+
+               VOP_READDIR(vp, &uio, NULL, &eof, error);
+               if ((uio.uio_offset == start_offset) || error) {
+                       size = 0;
+                       break;
+               }
+
+               size = rlen - uio.uio_resid;
+               dbp = (xfs_dirent_t *)read_buf;
+               while (size > 0) {
+                       namelen = strlen(dbp->d_name);
+
+                       if (filldir(dirent, dbp->d_name, namelen,
+                                       (loff_t) curr_offset & 0x7fffffff,
+                                       (ino_t) dbp->d_ino,
+                                       DT_UNKNOWN)) {
+                               goto done;
+                       }
+                       size -= dbp->d_reclen;
+                       curr_offset = (loff_t)dbp->d_off /* & 0x7fffffff */;
+                       dbp = nextdp(dbp);
+               }
+       }
+done:
+       if (!error) {
+               if (size == 0)
+                       filp->f_pos = uio.uio_offset & 0x7fffffff;
+               else if (dbp)
+                       filp->f_pos = curr_offset;
+       }
+
+       kfree(read_buf);
+       return -error;
+}
+
+
+STATIC int
+linvfs_file_mmap(
+       struct file     *filp,
+       struct vm_area_struct *vma)
+{
+       struct inode    *ip = filp->f_dentry->d_inode;
+       vnode_t         *vp = LINVFS_GET_VP(ip);
+       vattr_t         va = { .va_mask = XFS_AT_UPDATIME };
+       int             error;
+
+       if ((vp->v_type == VREG) && (vp->v_vfsp->vfs_flag & VFS_DMI)) {
+               xfs_mount_t     *mp = XFS_VFSTOM(vp->v_vfsp);
+
+               error = -XFS_SEND_MMAP(mp, vma, 0);
+               if (error)
+                       return error;
+       }
+
+       vma->vm_ops = &linvfs_file_vm_ops;
+
+       VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
+       return 0;
+}
+
+
+STATIC int
+linvfs_ioctl(
+       struct inode    *inode,
+       struct file     *filp,
+       unsigned int    cmd,
+       unsigned long   arg)
+{
+       int             error;
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+
+       ASSERT(vp);
+       VOP_IOCTL(vp, inode, filp, 0, cmd, arg, error);
+       VMODIFY(vp);
+
+       /* NOTE:  some of the ioctl's return positive #'s as a
+        *        byte count indicating success, such as
+        *        readlink_by_handle.  So we don't "sign flip"
+        *        like most other routines.  This means true
+        *        errors need to be returned as a negative value.
+        */
+       return error;
+}
+
+STATIC int
+linvfs_ioctl_invis(
+       struct inode    *inode,
+       struct file     *filp,
+       unsigned int    cmd,
+       unsigned long   arg)
+{
+       int             error;
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+
+       ASSERT(vp);
+       VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, arg, error);
+       VMODIFY(vp);
+
+       /* NOTE:  some of the ioctl's return positive #'s as a
+        *        byte count indicating success, such as
+        *        readlink_by_handle.  So we don't "sign flip"
+        *        like most other routines.  This means true
+        *        errors need to be returned as a negative value.
+        */
+       return error;
+}
+
+#ifdef HAVE_VMOP_MPROTECT
+STATIC int
+linvfs_mprotect(
+       struct vm_area_struct *vma,
+       unsigned int    newflags)
+{
+       vnode_t         *vp = LINVFS_GET_VP(vma->vm_file->f_dentry->d_inode);
+       int             error = 0;
+
+       if ((vp->v_type == VREG) && (vp->v_vfsp->vfs_flag & VFS_DMI)) {
+               if ((vma->vm_flags & VM_MAYSHARE) &&
+                   (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE)) {
+                       xfs_mount_t     *mp = XFS_VFSTOM(vp->v_vfsp);
+
+                       error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
+                   }
+       }
+       return error;
+}
+#endif /* HAVE_VMOP_MPROTECT */
+
+
+struct file_operations linvfs_file_operations = {
+       .llseek         = generic_file_llseek,
+       .read           = do_sync_read,
+       .write          = do_sync_write,
+       .readv          = linvfs_readv,
+       .writev         = linvfs_writev,
+       .aio_read       = linvfs_read,
+       .aio_write      = linvfs_write,
+       .sendfile       = linvfs_sendfile,
+       .ioctl          = linvfs_ioctl,
+       .mmap           = linvfs_file_mmap,
+       .open           = linvfs_open,
+       .release        = linvfs_release,
+       .fsync          = linvfs_fsync,
+};
+
+struct file_operations linvfs_invis_file_operations = {
+       .llseek         = generic_file_llseek,
+       .read           = do_sync_read,
+       .write          = do_sync_write,
+       .readv          = linvfs_readv_invis,
+       .writev         = linvfs_writev_invis,
+       .aio_read       = linvfs_read_invis,
+       .aio_write      = linvfs_write_invis,
+       .sendfile       = linvfs_sendfile,
+       .ioctl          = linvfs_ioctl_invis,
+       .mmap           = linvfs_file_mmap,
+       .open           = linvfs_open,
+       .release        = linvfs_release,
+       .fsync          = linvfs_fsync,
+};
+
+
+struct file_operations linvfs_dir_operations = {
+       .read           = generic_read_dir,
+       .readdir        = linvfs_readdir,
+       .ioctl          = linvfs_ioctl,
+       .fsync          = linvfs_fsync,
+};
+
+static struct vm_operations_struct linvfs_file_vm_ops = {
+       .nopage         = filemap_nopage,
+#ifdef HAVE_VMOP_MPROTECT
+       .mprotect       = linvfs_mprotect,
+#endif
+};
diff --git a/fs/xfs/linux/xfs_fs_subr.c b/fs/xfs/linux/xfs_fs_subr.c
new file mode 100644 (file)
index 0000000..afad970
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+/*
+ * Stub for no-op vnode operations that return error status.
+ */
+int
+fs_noerr()
+{
+       return 0;
+}
+
+/*
+ * Operation unsupported under this file system.
+ */
+int
+fs_nosys()
+{
+       return ENOSYS;
+}
+
+/*
+ * Stub for inactive, strategy, and read/write lock/unlock.  Does nothing.
+ */
+/* ARGSUSED */
+void
+fs_noval()
+{
+}
+
+/*
+ * vnode pcache layer for vnode_tosspages.
+ * 'last' parameter unused but left in for IRIX compatibility
+ */
+void
+fs_tosspages(
+       bhv_desc_t      *bdp,
+       xfs_off_t       first,
+       xfs_off_t       last,
+       int             fiopt)
+{
+       vnode_t         *vp = BHV_TO_VNODE(bdp);
+       struct inode    *ip = LINVFS_GET_IP(vp);
+
+       if (VN_CACHED(vp))
+               truncate_inode_pages(ip->i_mapping, first);
+}
+
+
+/*
+ * vnode pcache layer for vnode_flushinval_pages.
+ * 'last' parameter unused but left in for IRIX compatibility
+ */
+void
+fs_flushinval_pages(
+       bhv_desc_t      *bdp,
+       xfs_off_t       first,
+       xfs_off_t       last,
+       int             fiopt)
+{
+       vnode_t         *vp = BHV_TO_VNODE(bdp);
+       struct inode    *ip = LINVFS_GET_IP(vp);
+
+       if (VN_CACHED(vp)) {
+               filemap_fdatawrite(ip->i_mapping);
+               filemap_fdatawait(ip->i_mapping);
+
+               truncate_inode_pages(ip->i_mapping, first);
+       }
+}
+
+/*
+ * vnode pcache layer for vnode_flush_pages.
+ * 'last' parameter unused but left in for IRIX compatibility
+ */
+int
+fs_flush_pages(
+       bhv_desc_t      *bdp,
+       xfs_off_t       first,
+       xfs_off_t       last,
+       uint64_t        flags,
+       int             fiopt)
+{
+       vnode_t         *vp = BHV_TO_VNODE(bdp);
+       struct inode    *ip = LINVFS_GET_IP(vp);
+
+       if (VN_CACHED(vp)) {
+               filemap_fdatawrite(ip->i_mapping);
+               filemap_fdatawait(ip->i_mapping);
+       }
+
+       return 0;
+}
diff --git a/fs/xfs/linux/xfs_fs_subr.h b/fs/xfs/linux/xfs_fs_subr.h
new file mode 100644 (file)
index 0000000..198b8dd
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2000, 2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef        __XFS_SUBR_H__
+#define __XFS_SUBR_H__
+
+/*
+ * Utilities shared among file system implementations.
+ */
+
+struct cred;
+
+extern int     fs_noerr(void);
+extern int     fs_nosys(void);
+extern int     fs_nodev(void);
+extern void    fs_noval(void);
+extern void    fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+extern void    fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+extern int     fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int);
+
+#endif /* __XFS_FS_SUBR_H__ */
diff --git a/fs/xfs/linux/xfs_globals.c b/fs/xfs/linux/xfs_globals.c
new file mode 100644 (file)
index 0000000..1144a8b
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * This file contains globals needed by XFS that were normally defined
+ * somewhere else in IRIX.
+ */
+
+#include "xfs.h"
+#include "xfs_cred.h"
+#include "xfs_sysctl.h"
+
+/*
+ * System memory size - used to scale certain data structures in XFS.
+ */
+unsigned long xfs_physmem;
+
+/*
+ * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
+ * other XFS code uses these values.
+ */
+
+xfs_param_t xfs_params = {
+                         /*    MIN             DFLT            MAX     */
+       .restrict_chown = {     0,              1,              1       },
+       .sgid_inherit   = {     0,              0,              1       },
+       .symlink_mode   = {     0,              0,              1       },
+       .panic_mask     = {     0,              0,              127     },
+       .error_level    = {     0,              3,              11      },
+       .sync_interval  = {     USER_HZ,        30*USER_HZ,     7200*USER_HZ },
+       .stats_clear    = {     0,              0,              1       },
+       .inherit_sync   = {     0,              1,              1       },
+       .inherit_nodump = {     0,              1,              1       },
+       .inherit_noatim = {     0,              1,              1       },
+       .flush_interval = {     USER_HZ/2,      USER_HZ,        30*USER_HZ },
+       .age_buffer     = {     1*USER_HZ,      15*USER_HZ,     7200*USER_HZ },
+};
+
+/*
+ * Global system credential structure.
+ */
+cred_t sys_cred_val, *sys_cred = &sys_cred_val;
+
diff --git a/fs/xfs/linux/xfs_globals.h b/fs/xfs/linux/xfs_globals.h
new file mode 100644 (file)
index 0000000..e81e2f3
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_GLOBALS_H__
+#define __XFS_GLOBALS_H__
+
+/*
+ * This file declares globals needed by XFS that were normally defined
+ * somewhere else in IRIX.
+ */
+
+extern uint64_t        xfs_panic_mask;         /* set to cause more panics */
+extern unsigned long xfs_physmem;
+extern struct cred *sys_cred;
+
+#endif /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux/xfs_ioctl.c b/fs/xfs/linux/xfs_ioctl.c
new file mode 100644 (file)
index 0000000..d6402d7
--- /dev/null
@@ -0,0 +1,1236 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_dfrag.h"
+#include "xfs_fsops.h"
+
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+
+/*
+ * ioctl commands that are used by Linux filesystems
+ */
+#define XFS_IOC_GETXFLAGS      _IOR('f', 1, long)
+#define XFS_IOC_SETXFLAGS      _IOW('f', 2, long)
+#define XFS_IOC_GETVERSION     _IOR('v', 1, long)
+
+
+/*
+ * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
+ * a file or fs handle.
+ *
+ * XFS_IOC_PATH_TO_FSHANDLE
+ *    returns fs handle for a mount point or path within that mount point
+ * XFS_IOC_FD_TO_HANDLE
+ *    returns full handle for a FD opened in user space
+ * XFS_IOC_PATH_TO_HANDLE
+ *    returns full handle for a path
+ */
+STATIC int
+xfs_find_handle(
+       unsigned int            cmd,
+       unsigned long           arg)
+{
+       int                     hsize;
+       xfs_handle_t            handle;
+       xfs_fsop_handlereq_t    hreq;
+       struct inode            *inode;
+       struct vnode            *vp;
+
+       if (copy_from_user(&hreq, (xfs_fsop_handlereq_t *)arg, sizeof(hreq)))
+               return -XFS_ERROR(EFAULT);
+
+       memset((char *)&handle, 0, sizeof(handle));
+
+       switch (cmd) {
+       case XFS_IOC_PATH_TO_FSHANDLE:
+       case XFS_IOC_PATH_TO_HANDLE: {
+               struct nameidata        nd;
+               int                     error;
+
+               error = user_path_walk_link(hreq.path, &nd);
+               if (error)
+                       return error;
+
+               ASSERT(nd.dentry);
+               ASSERT(nd.dentry->d_inode);
+               inode = igrab(nd.dentry->d_inode);
+               path_release(&nd);
+               break;
+       }
+
+       case XFS_IOC_FD_TO_HANDLE: {
+               struct file     *file;
+
+               file = fget(hreq.fd);
+               if (!file)
+                   return -EBADF;
+
+               ASSERT(file->f_dentry);
+               ASSERT(file->f_dentry->d_inode);
+               inode = igrab(file->f_dentry->d_inode);
+               fput(file);
+               break;
+       }
+
+       default:
+               ASSERT(0);
+               return -XFS_ERROR(EINVAL);
+       }
+
+       if (inode->i_sb->s_magic != XFS_SB_MAGIC) {
+               /* we're not in XFS anymore, Toto */
+               iput(inode);
+               return -XFS_ERROR(EINVAL);
+       }
+
+       /* we need the vnode */
+       vp = LINVFS_GET_VP(inode);
+       if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
+               iput(inode);
+               return -XFS_ERROR(EBADF);
+       }
+
+       /* now we can grab the fsid */
+       memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t));
+       hsize = sizeof(xfs_fsid_t);
+
+       if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
+               xfs_inode_t     *ip;
+               bhv_desc_t      *bhv;
+               int             lock_mode;
+
+               /* need to get access to the xfs_inode to read the generation */
+               bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
+               ASSERT(bhv);
+               ip = XFS_BHVTOI(bhv);
+               ASSERT(ip);
+               lock_mode = xfs_ilock_map_shared(ip);
+
+               /* fill in fid section of handle from inode */
+               handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) -
+                                           sizeof(handle.ha_fid.xfs_fid_len);
+               handle.ha_fid.xfs_fid_pad = 0;
+               handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen;
+               handle.ha_fid.xfs_fid_ino = ip->i_ino;
+
+               xfs_iunlock_map_shared(ip, lock_mode);
+
+               hsize = XFS_HSIZE(handle);
+       }
+
+       /* now copy our handle into the user buffer & write out the size */
+       if (copy_to_user((xfs_handle_t *)hreq.ohandle, &handle, hsize) ||
+           copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) {
+               iput(inode);
+               return -XFS_ERROR(EFAULT);
+       }
+
+       iput(inode);
+       return 0;
+}
+
+
+/*
+ * Convert userspace handle data into vnode (and inode).
+ * We [ab]use the fact that all the fsop_handlereq ioctl calls
+ * have a data structure argument whose first component is always
+ * a xfs_fsop_handlereq_t, so we can cast to and from this type.
+ * This allows us to optimise the copy_from_user calls and gives
+ * a handy, shared routine.
+ *
+ * If no error, caller must always VN_RELE the returned vp.
+ */
+STATIC int
+xfs_vget_fsop_handlereq(
+       xfs_mount_t             *mp,
+       struct inode            *parinode,      /* parent inode pointer    */
+       int                     cap,            /* capability level for op */
+       unsigned long           arg,            /* userspace data pointer  */
+       unsigned long           size,           /* size of expected struct */
+       /* output arguments */
+       xfs_fsop_handlereq_t    *hreq,
+       vnode_t                 **vp,
+       struct inode            **inode)
+{
+       void                    *hanp;
+       size_t                  hlen;
+       xfs_fid_t               *xfid;
+       xfs_handle_t            *handlep;
+       xfs_handle_t            handle;
+       xfs_inode_t             *ip;
+       struct inode            *inodep;
+       vnode_t                 *vpp;
+       xfs_ino_t               ino;
+       __u32                   igen;
+       int                     error;
+
+       if (!capable(cap))
+               return XFS_ERROR(EPERM);
+
+       /*
+        * Only allow handle opens under a directory.
+        */
+       if (!S_ISDIR(parinode->i_mode))
+               return XFS_ERROR(ENOTDIR);
+
+       /*
+        * Copy the handle down from the user and validate
+        * that it looks to be in the correct format.
+        */
+       if (copy_from_user(hreq, (struct xfs_fsop_handlereq *)arg, size))
+               return XFS_ERROR(EFAULT);
+
+       hanp = hreq->ihandle;
+       hlen = hreq->ihandlen;
+       handlep = &handle;
+
+       if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep))
+               return XFS_ERROR(EINVAL);
+       if (copy_from_user(handlep, hanp, hlen))
+               return XFS_ERROR(EFAULT);
+       if (hlen < sizeof(*handlep))
+               memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
+       if (hlen > sizeof(handlep->ha_fsid)) {
+               if (handlep->ha_fid.xfs_fid_len !=
+                               (hlen - sizeof(handlep->ha_fsid)
+                                       - sizeof(handlep->ha_fid.xfs_fid_len))
+                   || handlep->ha_fid.xfs_fid_pad)
+                       return XFS_ERROR(EINVAL);
+       }
+
+       /*
+        * Crack the handle, obtain the inode # & generation #
+        */
+       xfid = (struct xfs_fid *)&handlep->ha_fid;
+       if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) {
+               ino  = xfid->xfs_fid_ino;
+               igen = xfid->xfs_fid_gen;
+       } else {
+               return XFS_ERROR(EINVAL);
+       }
+
+       /*
+        * Get the XFS inode, building a vnode to go with it.
+        */
+       error = xfs_iget(mp, NULL, ino, XFS_ILOCK_SHARED, &ip, 0);
+       if (error)
+               return error;
+       if (ip == NULL)
+               return XFS_ERROR(EIO);
+       if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) {
+               xfs_iput_new(ip, XFS_ILOCK_SHARED);
+               return XFS_ERROR(ENOENT);
+       }
+
+       vpp = XFS_ITOV(ip);
+       inodep = LINVFS_GET_IP(vpp);
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+       *vp = vpp;
+       *inode = inodep;
+       return 0;
+}
+
+STATIC int
+xfs_open_by_handle(
+       xfs_mount_t             *mp,
+       unsigned long           arg,
+       struct file             *parfilp,
+       struct inode            *parinode)
+{
+       int                     error;
+       int                     new_fd;
+       int                     permflag;
+       struct file             *filp;
+       struct inode            *inode;
+       struct dentry           *dentry;
+       vnode_t                 *vp;
+       xfs_fsop_handlereq_t    hreq;
+
+       error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg,
+                                       sizeof(xfs_fsop_handlereq_t),
+                                       &hreq, &vp, &inode);
+       if (error)
+               return -error;
+
+       /* Restrict xfs_open_by_handle to directories & regular files. */
+       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+               iput(inode);
+               return -XFS_ERROR(EINVAL);
+       }
+
+#if BITS_PER_LONG != 32
+       hreq.oflags |= O_LARGEFILE;
+#endif
+       /* Put open permission in namei format. */
+       permflag = hreq.oflags;
+       if ((permflag+1) & O_ACCMODE)
+               permflag++;
+       if (permflag & O_TRUNC)
+               permflag |= 2;
+
+       if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
+           (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
+               iput(inode);
+               return -XFS_ERROR(EPERM);
+       }
+
+       if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+               iput(inode);
+               return -XFS_ERROR(EACCES);
+       }
+
+       /* Can't write directories. */
+       if ( S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
+               iput(inode);
+               return -XFS_ERROR(EISDIR);
+       }
+
+       if ((new_fd = get_unused_fd()) < 0) {
+               iput(inode);
+               return new_fd;
+       }
+
+       dentry = d_alloc_anon(inode);
+       if (dentry == NULL) {
+               iput(inode);
+               put_unused_fd(new_fd);
+               return -XFS_ERROR(ENOMEM);
+       }
+
+       /* Ensure umount returns EBUSY on umounts while this file is open. */
+       mntget(parfilp->f_vfsmnt);
+
+       /* Create file pointer. */
+       filp = dentry_open(dentry, parfilp->f_vfsmnt, hreq.oflags);
+       if (IS_ERR(filp)) {
+               put_unused_fd(new_fd);
+               return -XFS_ERROR(-PTR_ERR(filp));
+       }
+       if (inode->i_mode & S_IFREG)
+               filp->f_op = &linvfs_invis_file_operations;
+
+       fd_install(new_fd, filp);
+       return new_fd;
+}
+
+STATIC int
+xfs_readlink_by_handle(
+       xfs_mount_t             *mp,
+       unsigned long           arg,
+       struct file             *parfilp,
+       struct inode            *parinode)
+{
+       int                     error;
+       struct iovec            aiov;
+       struct uio              auio;
+       struct inode            *inode;
+       xfs_fsop_handlereq_t    hreq;
+       vnode_t                 *vp;
+       __u32                   olen;
+
+       error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg,
+                                       sizeof(xfs_fsop_handlereq_t),
+                                       &hreq, &vp, &inode);
+       if (error)
+               return -error;
+
+       /* Restrict this handle operation to symlinks only. */
+       if (vp->v_type != VLNK) {
+               VN_RELE(vp);
+               return -XFS_ERROR(EINVAL);
+       }
+
+       if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) {
+               VN_RELE(vp);
+               return -XFS_ERROR(EFAULT);
+       }
+       aiov.iov_len    = olen;
+       aiov.iov_base   = hreq.ohandle;
+
+       auio.uio_iov    = &aiov;
+       auio.uio_iovcnt = 1;
+       auio.uio_offset = 0;
+       auio.uio_segflg = UIO_USERSPACE;
+       auio.uio_resid  = olen;
+
+       VOP_READLINK(vp, &auio, IO_INVIS, NULL, error);
+
+       VN_RELE(vp);
+       return (olen - auio.uio_resid);
+}
+
+STATIC int
+xfs_fssetdm_by_handle(
+       xfs_mount_t             *mp,
+       unsigned long           arg,
+       struct file             *parfilp,
+       struct inode            *parinode)
+{
+       int                     error;
+       struct fsdmidata        fsd;
+       xfs_fsop_setdm_handlereq_t dmhreq;
+       struct inode            *inode;
+       bhv_desc_t              *bdp;
+       vnode_t                 *vp;
+
+       error = xfs_vget_fsop_handlereq(mp, parinode, CAP_MKNOD, arg,
+                                       sizeof(xfs_fsop_setdm_handlereq_t),
+                                       (xfs_fsop_handlereq_t *)&dmhreq,
+                                       &vp, &inode);
+       if (error)
+               return -error;
+
+       if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
+               VN_RELE(vp);
+               return -XFS_ERROR(EPERM);
+       }
+
+       if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
+               VN_RELE(vp);
+               return -XFS_ERROR(EFAULT);
+       }
+
+       bdp = bhv_base_unlocked(VN_BHV_HEAD(vp));
+       error = xfs_set_dmattrs(bdp, fsd.fsd_dmevmask, fsd.fsd_dmstate, NULL);
+
+       VN_RELE(vp);
+       if (error)
+               return -error;
+       return 0;
+}
+
+STATIC int
+xfs_attrlist_by_handle(
+       xfs_mount_t             *mp,
+       unsigned long           arg,
+       struct file             *parfilp,
+       struct inode            *parinode)
+{
+       int                     error;
+       attrlist_cursor_kern_t  *cursor;
+       xfs_fsop_attrlist_handlereq_t al_hreq;
+       struct inode            *inode;
+       vnode_t                 *vp;
+
+       error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg,
+                                       sizeof(xfs_fsop_attrlist_handlereq_t),
+                                       (xfs_fsop_handlereq_t *)&al_hreq,
+                                       &vp, &inode);
+       if (error)
+               return -error;
+
+       cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+       VOP_ATTR_LIST(vp, al_hreq.buffer, al_hreq.buflen, al_hreq.flags,
+                       cursor, NULL, error);
+       VN_RELE(vp);
+       if (error)
+               return -error;
+       return 0;
+}
+
+STATIC int
+xfs_attrmulti_by_handle(
+       xfs_mount_t             *mp,
+       unsigned long           arg,
+       struct file             *parfilp,
+       struct inode            *parinode)
+{
+       int                     error;
+       xfs_attr_multiop_t      *ops;
+       xfs_fsop_attrmulti_handlereq_t am_hreq;
+       struct inode            *inode;
+       vnode_t                 *vp;
+       int                     i, size;
+
+       error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg,
+                                       sizeof(xfs_fsop_attrmulti_handlereq_t),
+                                       (xfs_fsop_handlereq_t *)&am_hreq,
+                                       &vp, &inode);
+       if (error)
+               return -error;
+
+       size = am_hreq.opcount * sizeof(attr_multiop_t);
+       ops = (xfs_attr_multiop_t *)kmalloc(size, GFP_KERNEL);
+       if (!ops) {
+               VN_RELE(vp);
+               return -XFS_ERROR(ENOMEM);
+       }
+
+       if (copy_from_user(ops, am_hreq.ops, size)) {
+               kfree(ops);
+               VN_RELE(vp);
+               return -XFS_ERROR(EFAULT);
+       }
+
+       for (i = 0; i < am_hreq.opcount; i++) {
+               switch(ops[i].am_opcode) {
+               case ATTR_OP_GET:
+                       VOP_ATTR_GET(vp,ops[i].am_attrname, ops[i].am_attrvalue,
+                                       &ops[i].am_length, ops[i].am_flags,
+                                       NULL, ops[i].am_error);
+                       break;
+               case ATTR_OP_SET:
+                       if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
+                               ops[i].am_error = EPERM;
+                               break;
+                       }
+                       VOP_ATTR_SET(vp,ops[i].am_attrname, ops[i].am_attrvalue,
+                                       ops[i].am_length, ops[i].am_flags,
+                                       NULL, ops[i].am_error);
+                       break;
+               case ATTR_OP_REMOVE:
+                       if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
+                               ops[i].am_error = EPERM;
+                               break;
+                       }
+                       VOP_ATTR_REMOVE(vp, ops[i].am_attrname, ops[i].am_flags,
+                                       NULL, ops[i].am_error);
+                       break;
+               default:
+                       ops[i].am_error = EINVAL;
+               }
+       }
+
+       if (copy_to_user(am_hreq.ops, ops, size))
+               error = -XFS_ERROR(EFAULT);
+
+       kfree(ops);
+       VN_RELE(vp);
+       return error;
+}
+
+/* prototypes for a few of the stack-hungry cases that have
+ * their own functions.  Functions are defined after their use
+ * so gcc doesn't get fancy and inline them with -03 */
+
+STATIC int
+xfs_ioc_space(
+       bhv_desc_t              *bdp,
+       vnode_t                 *vp,
+       struct file             *filp,
+       int                     flags,
+       unsigned int            cmd,
+       unsigned long           arg);
+
+STATIC int
+xfs_ioc_bulkstat(
+       xfs_mount_t             *mp,
+       unsigned int            cmd,
+       unsigned long           arg);
+
+STATIC int
+xfs_ioc_fsgeometry_v1(
+       xfs_mount_t             *mp,
+       unsigned long           arg);
+
+STATIC int
+xfs_ioc_fsgeometry(
+       xfs_mount_t             *mp,
+       unsigned long           arg);
+
+STATIC int
+xfs_ioc_xattr(
+       vnode_t                 *vp,
+       xfs_inode_t             *ip,
+       struct file             *filp,
+       unsigned int            cmd,
+       unsigned long           arg);
+
+STATIC int
+xfs_ioc_getbmap(
+       bhv_desc_t              *bdp,
+       struct file             *filp,
+       int                     flags,
+       unsigned int            cmd,
+       unsigned long           arg);
+
+STATIC int
+xfs_ioc_getbmapx(
+       bhv_desc_t              *bdp,
+       unsigned long           arg);
+
+int
+xfs_ioctl(
+       bhv_desc_t              *bdp,
+       struct inode            *inode,
+       struct file             *filp,
+       int                     ioflags,
+       unsigned int            cmd,
+       unsigned long           arg)
+{
+       int                     error;
+       vnode_t                 *vp;
+       xfs_inode_t             *ip;
+       xfs_mount_t             *mp;
+
+       vp = LINVFS_GET_VP(inode);
+
+       vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address);
+
+       ip = XFS_BHVTOI(bdp);
+       mp = ip->i_mount;
+
+       switch (cmd) {
+
+       case XFS_IOC_ALLOCSP:
+       case XFS_IOC_FREESP:
+       case XFS_IOC_RESVSP:
+       case XFS_IOC_UNRESVSP:
+       case XFS_IOC_ALLOCSP64:
+       case XFS_IOC_FREESP64:
+       case XFS_IOC_RESVSP64:
+       case XFS_IOC_UNRESVSP64:
+               /*
+                * Only allow the sys admin to reserve space unless
+                * unwritten extents are enabled.
+                */
+               if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) &&
+                   !capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg);
+
+       case XFS_IOC_DIOINFO: {
+               struct dioattr  da;
+               xfs_buftarg_t   *target =
+                       (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+                       mp->m_rtdev_targp : mp->m_ddev_targp;
+
+               da.d_mem = da.d_miniosz = 1 << target->pbr_sshift;
+               /* The size dio will do in one go */
+               da.d_maxiosz = 64 * PAGE_CACHE_SIZE;
+
+               if (copy_to_user((struct dioattr *)arg, &da, sizeof(da)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_FSBULKSTAT_SINGLE:
+       case XFS_IOC_FSBULKSTAT:
+       case XFS_IOC_FSINUMBERS:
+               return xfs_ioc_bulkstat(mp, cmd, arg);
+
+       case XFS_IOC_FSGEOMETRY_V1:
+               return xfs_ioc_fsgeometry_v1(mp, arg);
+
+       case XFS_IOC_FSGEOMETRY:
+               return xfs_ioc_fsgeometry(mp, arg);
+
+       case XFS_IOC_GETVERSION:
+       case XFS_IOC_GETXFLAGS:
+       case XFS_IOC_SETXFLAGS:
+       case XFS_IOC_FSGETXATTR:
+       case XFS_IOC_FSSETXATTR:
+       case XFS_IOC_FSGETXATTRA:
+               return xfs_ioc_xattr(vp, ip, filp, cmd, arg);
+
+       case XFS_IOC_FSSETDM: {
+               struct fsdmidata        dmi;
+
+               if (copy_from_user(&dmi, (struct fsdmidata *)arg, sizeof(dmi)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_set_dmattrs(bdp, dmi.fsd_dmevmask, dmi.fsd_dmstate,
+                                                       NULL);
+               return -error;
+       }
+
+       case XFS_IOC_GETBMAP:
+       case XFS_IOC_GETBMAPA:
+               return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg);
+
+       case XFS_IOC_GETBMAPX:
+               return xfs_ioc_getbmapx(bdp, arg);
+
+       case XFS_IOC_FD_TO_HANDLE:
+       case XFS_IOC_PATH_TO_HANDLE:
+       case XFS_IOC_PATH_TO_FSHANDLE:
+               return xfs_find_handle(cmd, arg);
+
+       case XFS_IOC_OPEN_BY_HANDLE:
+               return xfs_open_by_handle(mp, arg, filp, inode);
+
+       case XFS_IOC_FSSETDM_BY_HANDLE:
+               return xfs_fssetdm_by_handle(mp, arg, filp, inode);
+
+       case XFS_IOC_READLINK_BY_HANDLE:
+               return xfs_readlink_by_handle(mp, arg, filp, inode);
+
+       case XFS_IOC_ATTRLIST_BY_HANDLE:
+               return xfs_attrlist_by_handle(mp, arg, filp, inode);
+
+       case XFS_IOC_ATTRMULTI_BY_HANDLE:
+               return xfs_attrmulti_by_handle(mp, arg, filp, inode);
+
+       case XFS_IOC_SWAPEXT: {
+               error = xfs_swapext((struct xfs_swapext *)arg);
+               return -error;
+       }
+
+       case XFS_IOC_FSCOUNTS: {
+               xfs_fsop_counts_t out;
+
+               error = xfs_fs_counts(mp, &out);
+               if (error)
+                       return -error;
+
+               if (copy_to_user((char *)arg, &out, sizeof(out)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_SET_RESBLKS: {
+               xfs_fsop_resblks_t inout;
+               __uint64_t         in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (copy_from_user(&inout, (char *)arg, sizeof(inout)))
+                       return -XFS_ERROR(EFAULT);
+
+               /* input parameter is passed in resblks field of structure */
+               in = inout.resblks;
+               error = xfs_reserve_blocks(mp, &in, &inout);
+               if (error)
+                       return -error;
+
+               if (copy_to_user((char *)arg, &inout, sizeof(inout)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_GET_RESBLKS: {
+               xfs_fsop_resblks_t out;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               error = xfs_reserve_blocks(mp, NULL, &out);
+               if (error)
+                       return -error;
+
+               if (copy_to_user((char *)arg, &out, sizeof(out)))
+                       return -XFS_ERROR(EFAULT);
+
+               return 0;
+       }
+
+       case XFS_IOC_FSGROWFSDATA: {
+               xfs_growfs_data_t in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (copy_from_user(&in, (char *)arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_growfs_data(mp, &in);
+               return -error;
+       }
+
+       case XFS_IOC_FSGROWFSLOG: {
+               xfs_growfs_log_t in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (copy_from_user(&in, (char *)arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_growfs_log(mp, &in);
+               return -error;
+       }
+
+       case XFS_IOC_FSGROWFSRT: {
+               xfs_growfs_rt_t in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (copy_from_user(&in, (char *)arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_growfs_rt(mp, &in);
+               return -error;
+       }
+
+       case XFS_IOC_FREEZE:
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               freeze_bdev(inode->i_sb->s_bdev);
+               return 0;
+
+       case XFS_IOC_THAW:
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+               thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
+               return 0;
+
+       case XFS_IOC_GOINGDOWN: {
+               __uint32_t in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (get_user(in, (__uint32_t *)arg))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_fs_goingdown(mp, in);
+               return -error;
+       }
+
+       case XFS_IOC_ERROR_INJECTION: {
+               xfs_error_injection_t in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (copy_from_user(&in, (char *)arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_errortag_add(in.errtag, mp);
+               return -error;
+       }
+
+       case XFS_IOC_ERROR_CLEARALL:
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               error = xfs_errortag_clearall(mp);
+               return -error;
+
+       default:
+               return -ENOTTY;
+       }
+}
+
+STATIC int
+xfs_ioc_space(
+       bhv_desc_t              *bdp,
+       vnode_t                 *vp,
+       struct file             *filp,
+       int                     ioflags,
+       unsigned int            cmd,
+       unsigned long           arg)
+{
+       xfs_flock64_t           bf;
+       int                     attr_flags = 0;
+       int                     error;
+
+       if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
+               return -XFS_ERROR(EPERM);
+
+       if (!(filp->f_flags & FMODE_WRITE))
+               return -XFS_ERROR(EBADF);
+
+       if (vp->v_type != VREG)
+               return -XFS_ERROR(EINVAL);
+
+       if (copy_from_user(&bf, (xfs_flock64_t *)arg, sizeof(bf)))
+               return -XFS_ERROR(EFAULT);
+
+       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+               attr_flags |= ATTR_NONBLOCK;
+       if (ioflags & IO_INVIS)
+               attr_flags |= ATTR_DMI;
+
+       error = xfs_change_file_space(bdp, cmd, &bf, filp->f_pos,
+                                             NULL, attr_flags);
+       return -error;
+}
+
+STATIC int
+xfs_ioc_bulkstat(
+       xfs_mount_t             *mp,
+       unsigned int            cmd,
+       unsigned long           arg)
+{
+       xfs_fsop_bulkreq_t      bulkreq;
+       int                     count;  /* # of records returned */
+       xfs_ino_t               inlast; /* last inode number */
+       int                     done;
+       int                     error;
+
+       /* done = 1 if there are more stats to get and if bulkstat */
+       /* should be called again (unused here, but used in dmapi) */
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       if (copy_from_user(&bulkreq, (xfs_fsop_bulkreq_t *)arg,
+                                       sizeof(xfs_fsop_bulkreq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       if (copy_from_user(&inlast, (__s64 *)bulkreq.lastip,
+                                               sizeof(__s64)))
+               return -XFS_ERROR(EFAULT);
+
+       if ((count = bulkreq.icount) <= 0)
+               return -XFS_ERROR(EINVAL);
+
+       if (cmd == XFS_IOC_FSINUMBERS)
+               error = xfs_inumbers(mp, NULL, &inlast, &count,
+                                               bulkreq.ubuffer);
+       else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
+               error = xfs_bulkstat_single(mp, &inlast,
+                                               bulkreq.ubuffer, &done);
+       else {  /* XFS_IOC_FSBULKSTAT */
+               if (count == 1 && inlast != 0) {
+                       inlast++;
+                       error = xfs_bulkstat_single(mp, &inlast,
+                                       bulkreq.ubuffer, &done);
+               } else {
+                       error = xfs_bulkstat(mp, NULL, &inlast, &count,
+                               (bulkstat_one_pf)xfs_bulkstat_one, NULL,
+                               sizeof(xfs_bstat_t), bulkreq.ubuffer,
+                               BULKSTAT_FG_QUICK, &done);
+               }
+       }
+
+       if (error)
+               return -error;
+
+       if (bulkreq.ocount != NULL) {
+               if (copy_to_user((xfs_ino_t *)bulkreq.lastip, &inlast,
+                                               sizeof(xfs_ino_t)))
+                       return -XFS_ERROR(EFAULT);
+
+               if (copy_to_user((__s32 *)bulkreq.ocount, &count,
+                                               sizeof(count)))
+                       return -XFS_ERROR(EFAULT);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry_v1(
+       xfs_mount_t             *mp,
+       unsigned long           arg)
+{
+       xfs_fsop_geom_v1_t      fsgeo;
+       int                     error;
+
+       error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3);
+       if (error)
+               return -error;
+
+       if (copy_to_user((xfs_fsop_geom_t *)arg, &fsgeo, sizeof(fsgeo)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry(
+       xfs_mount_t             *mp,
+       unsigned long           arg)
+{
+       xfs_fsop_geom_t         fsgeo;
+       int                     error;
+
+       error = xfs_fs_geometry(mp, &fsgeo, 4);
+       if (error)
+               return -error;
+
+       if (copy_to_user((xfs_fsop_geom_t *)arg, &fsgeo, sizeof(fsgeo)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+/*
+ * Linux extended inode flags interface.
+ */
+#define LINUX_XFLAG_SYNC       0x00000008 /* Synchronous updates */
+#define LINUX_XFLAG_IMMUTABLE  0x00000010 /* Immutable file */
+#define LINUX_XFLAG_APPEND     0x00000020 /* writes to file may only append */
+#define LINUX_XFLAG_NODUMP     0x00000040 /* do not dump file */
+#define LINUX_XFLAG_NOATIME    0x00000080 /* do not update atime */
+
+STATIC unsigned int
+xfs_merge_ioc_xflags(
+       unsigned int    flags,
+       unsigned int    start)
+{
+       unsigned int    xflags = start;
+
+       if (flags & LINUX_XFLAG_IMMUTABLE)
+               xflags |= XFS_XFLAG_IMMUTABLE;
+       else
+               xflags &= ~XFS_XFLAG_IMMUTABLE;
+       if (flags & LINUX_XFLAG_APPEND)
+               xflags |= XFS_XFLAG_APPEND;
+       else
+               xflags &= ~XFS_XFLAG_APPEND;
+       if (flags & LINUX_XFLAG_SYNC)
+               xflags |= XFS_XFLAG_SYNC;
+       else
+               xflags &= ~XFS_XFLAG_SYNC;
+       if (flags & LINUX_XFLAG_NOATIME)
+               xflags |= XFS_XFLAG_NOATIME;
+       else
+               xflags &= ~XFS_XFLAG_NOATIME;
+       if (flags & LINUX_XFLAG_NODUMP)
+               xflags |= XFS_XFLAG_NODUMP;
+       else
+               xflags &= ~XFS_XFLAG_NODUMP;
+
+       return xflags;
+}
+
+STATIC int
+xfs_ioc_xattr(
+       vnode_t                 *vp,
+       xfs_inode_t             *ip,
+       struct file             *filp,
+       unsigned int            cmd,
+       unsigned long           arg)
+{
+       struct fsxattr          fa;
+       vattr_t                 va;
+       int                     error;
+       int                     attr_flags;
+       unsigned int            flags;
+
+       switch (cmd) {
+       case XFS_IOC_FSGETXATTR: {
+               va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS;
+               VOP_GETATTR(vp, &va, 0, NULL, error);
+               if (error)
+                       return -error;
+
+               fa.fsx_xflags   = va.va_xflags;
+               fa.fsx_extsize  = va.va_extsize;
+               fa.fsx_nextents = va.va_nextents;
+
+               if (copy_to_user((struct fsxattr *)arg, &fa, sizeof(fa)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_FSSETXATTR: {
+               if (copy_from_user(&fa, (struct fsxattr *)arg, sizeof(fa)))
+                       return -XFS_ERROR(EFAULT);
+
+               attr_flags = 0;
+               if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+                       attr_flags |= ATTR_NONBLOCK;
+
+               va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE;
+               va.va_xflags  = fa.fsx_xflags;
+               va.va_extsize = fa.fsx_extsize;
+
+               VOP_SETATTR(vp, &va, attr_flags, NULL, error);
+               if (!error)
+                       vn_revalidate(vp);      /* update Linux inode flags */
+               return -error;
+       }
+
+       case XFS_IOC_FSGETXATTRA: {
+               va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_ANEXTENTS;
+               VOP_GETATTR(vp, &va, 0, NULL, error);
+               if (error)
+                       return -error;
+
+               fa.fsx_xflags   = va.va_xflags;
+               fa.fsx_extsize  = va.va_extsize;
+               fa.fsx_nextents = va.va_anextents;
+
+               if (copy_to_user((struct fsxattr *)arg, &fa, sizeof(fa)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_GETXFLAGS: {
+               flags = 0;
+               if (ip->i_d.di_flags & XFS_XFLAG_IMMUTABLE)
+                       flags |= LINUX_XFLAG_IMMUTABLE;
+               if (ip->i_d.di_flags & XFS_XFLAG_APPEND)
+                       flags |= LINUX_XFLAG_APPEND;
+               if (ip->i_d.di_flags & XFS_XFLAG_SYNC)
+                       flags |= LINUX_XFLAG_SYNC;
+               if (ip->i_d.di_flags & XFS_XFLAG_NOATIME)
+                       flags |= LINUX_XFLAG_NOATIME;
+               if (ip->i_d.di_flags & XFS_XFLAG_NODUMP)
+                       flags |= LINUX_XFLAG_NODUMP;
+               if (copy_to_user((unsigned int *)arg, &flags, sizeof(flags)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_SETXFLAGS: {
+               if (copy_from_user(&flags, (unsigned int *)arg, sizeof(flags)))
+                       return -XFS_ERROR(EFAULT);
+
+               if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \
+                             LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \
+                             LINUX_XFLAG_SYNC))
+                       return -XFS_ERROR(EOPNOTSUPP);
+
+               attr_flags = 0;
+               if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+                       attr_flags |= ATTR_NONBLOCK;
+
+               va.va_mask = XFS_AT_XFLAGS;
+               va.va_xflags = xfs_merge_ioc_xflags(flags, ip->i_d.di_flags);
+
+               VOP_SETATTR(vp, &va, attr_flags, NULL, error);
+               if (!error)
+                       vn_revalidate(vp);      /* update Linux inode flags */
+               return -error;
+       }
+
+       case XFS_IOC_GETVERSION: {
+               flags = LINVFS_GET_IP(vp)->i_generation;
+               if (copy_to_user((unsigned int *)arg, &flags, sizeof(flags)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       default:
+               return -ENOTTY;
+       }
+}
+
+STATIC int
+xfs_ioc_getbmap(
+       bhv_desc_t              *bdp,
+       struct file             *filp,
+       int                     ioflags,
+       unsigned int            cmd,
+       unsigned long           arg)
+{
+       struct getbmap          bm;
+       int                     iflags;
+       int                     error;
+
+       if (copy_from_user(&bm, (struct getbmap *)arg, sizeof(bm)))
+               return -XFS_ERROR(EFAULT);
+
+       if (bm.bmv_count < 2)
+               return -XFS_ERROR(EINVAL);
+
+       iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
+       if (ioflags & IO_INVIS)
+               iflags |= BMV_IF_NO_DMAPI_READ;
+
+       error = xfs_getbmap(bdp, &bm, (struct getbmap *)arg+1, iflags);
+       if (error)
+               return -error;
+
+       if (copy_to_user((struct getbmap *)arg, &bm, sizeof(bm)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_ioc_getbmapx(
+       bhv_desc_t              *bdp,
+       unsigned long           arg)
+{
+       struct getbmapx         bmx;
+       struct getbmap          bm;
+       int                     iflags;
+       int                     error;
+
+       if (copy_from_user(&bmx, (struct getbmapx *)arg, sizeof(bmx)))
+               return -XFS_ERROR(EFAULT);
+
+       if (bmx.bmv_count < 2)
+               return -XFS_ERROR(EINVAL);
+
+       /*
+        * Map input getbmapx structure to a getbmap
+        * structure for xfs_getbmap.
+        */
+       GETBMAP_CONVERT(bmx, bm);
+
+       iflags = bmx.bmv_iflags;
+
+       if (iflags & (~BMV_IF_VALID))
+               return -XFS_ERROR(EINVAL);
+
+       iflags |= BMV_IF_EXTENDED;
+
+       error = xfs_getbmap(bdp, &bm, (struct getbmapx *)arg+1, iflags);
+       if (error)
+               return -error;
+
+       GETBMAP_CONVERT(bm, bmx);
+
+       if (copy_to_user((struct getbmapx *)arg, &bmx, sizeof(bmx)))
+               return -XFS_ERROR(EFAULT);
+
+       return 0;
+}
diff --git a/fs/xfs/linux/xfs_iops.c b/fs/xfs/linux/xfs_iops.c
new file mode 100644 (file)
index 0000000..4b3e61d
--- /dev/null
@@ -0,0 +1,708 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+
+#include <linux/xattr.h>
+
+
+/*
+ * Pull the link count and size up from the xfs inode to the linux inode
+ */
+STATIC void
+validate_fields(
+       struct inode    *ip)
+{
+       vnode_t         *vp = LINVFS_GET_VP(ip);
+       vattr_t         va;
+       int             error;
+
+       va.va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
+       VOP_GETATTR(vp, &va, ATTR_LAZY, NULL, error);
+       if (likely(!error)) {
+               ip->i_nlink = va.va_nlink;
+               ip->i_blocks = va.va_nblocks;
+
+               /* we're under i_sem so i_size can't change under us */
+               if (i_size_read(ip) != va.va_size)
+                       i_size_write(ip, va.va_size);
+       }
+}
+
+/*
+ * Determine whether a process has a valid fs_struct (kernel daemons
+ * like knfsd don't have an fs_struct).
+ *
+ * XXX(hch):  nfsd is broken, better fix it instead.
+ */
+STATIC inline int
+has_fs_struct(struct task_struct *task)
+{
+       return (task->fs != init_task.fs);
+}
+
+STATIC int
+linvfs_mknod(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       int             mode,
+       dev_t           rdev)
+{
+       struct inode    *ip;
+       vattr_t         va;
+       vnode_t         *vp = NULL, *dvp = LINVFS_GET_VP(dir);
+       xfs_acl_t       *default_acl = NULL;
+       attrexists_t    test_default_acl = _ACL_DEFAULT_EXISTS;
+       int             error;
+
+       /*
+        * Irix uses Missed'em'V split, but doesn't want to see
+        * the upper 5 bits of (14bit) major.
+        */
+       if (!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)
+               return -EINVAL;
+
+       if (test_default_acl && test_default_acl(dvp)) {
+               if (!_ACL_ALLOC(default_acl))
+                       return -ENOMEM;
+               if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
+                       _ACL_FREE(default_acl);
+                       default_acl = NULL;
+               }
+       }
+
+       if (IS_POSIXACL(dir) && !default_acl && has_fs_struct(current))
+               mode &= ~current->fs->umask;
+
+       memset(&va, 0, sizeof(va));
+       va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+       va.va_type = IFTOVT(mode);
+       va.va_mode = mode;
+
+       switch (mode & S_IFMT) {
+       case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
+               va.va_rdev = sysv_encode_dev(rdev);
+               va.va_mask |= XFS_AT_RDEV;
+               /*FALLTHROUGH*/
+       case S_IFREG:
+               VOP_CREATE(dvp, dentry, &va, &vp, NULL, error);
+               break;
+       case S_IFDIR:
+               VOP_MKDIR(dvp, dentry, &va, &vp, NULL, error);
+               break;
+       default:
+               error = EINVAL;
+               break;
+       }
+
+       if (default_acl) {
+               if (!error) {
+                       error = _ACL_INHERIT(vp, &va, default_acl);
+                       if (!error) {
+                               VMODIFY(vp);
+                       } else {
+                               struct dentry   teardown = {};
+                               int             err2;
+
+                               /* Oh, the horror.
+                                * If we can't add the ACL we must back out.
+                                * ENOSPC can hit here, among other things.
+                                */
+                               teardown.d_inode = ip = LINVFS_GET_IP(vp);
+                               teardown.d_name = dentry->d_name;
+                               remove_inode_hash(ip);
+                               make_bad_inode(ip);
+                               if (S_ISDIR(mode))
+                                       VOP_RMDIR(dvp, &teardown, NULL, err2);
+                               else
+                                       VOP_REMOVE(dvp, &teardown, NULL, err2);
+                               VN_RELE(vp);
+                       }
+               }
+               _ACL_FREE(default_acl);
+       }
+
+       if (!error) {
+               ASSERT(vp);
+               ip = LINVFS_GET_IP(vp);
+
+               if (S_ISCHR(mode) || S_ISBLK(mode))
+                       ip->i_rdev = rdev;
+               else if (S_ISDIR(mode))
+                       validate_fields(ip);
+               d_instantiate(dentry, ip);
+               validate_fields(dir);
+       }
+       return -error;
+}
+
+STATIC int
+linvfs_create(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       int             mode,
+       struct nameidata *nd)
+{
+       return linvfs_mknod(dir, dentry, mode, 0);
+}
+
+STATIC int
+linvfs_mkdir(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       int             mode)
+{
+       return linvfs_mknod(dir, dentry, mode|S_IFDIR, 0);
+}
+
+STATIC struct dentry *
+linvfs_lookup(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       struct nameidata *nd)
+{
+       struct inode    *ip = NULL;
+       vnode_t         *vp, *cvp = NULL;
+       int             error;
+
+       if (dentry->d_name.len >= MAXNAMELEN)
+               return ERR_PTR(-ENAMETOOLONG);
+
+       vp = LINVFS_GET_VP(dir);
+       VOP_LOOKUP(vp, dentry, &cvp, 0, NULL, NULL, error);
+       if (!error) {
+               ASSERT(cvp);
+               ip = LINVFS_GET_IP(cvp);
+               if (!ip) {
+                       VN_RELE(cvp);
+                       return ERR_PTR(-EACCES);
+               }
+       }
+       if (error && (error != ENOENT))
+               return ERR_PTR(-error);
+       return d_splice_alias(ip, dentry);
+}
+
+STATIC int
+linvfs_link(
+       struct dentry   *old_dentry,
+       struct inode    *dir,
+       struct dentry   *dentry)
+{
+       struct inode    *ip;    /* inode of guy being linked to */
+       vnode_t         *tdvp;  /* target directory for new name/link */
+       vnode_t         *vp;    /* vp of name being linked */
+       int             error;
+
+       ip = old_dentry->d_inode;       /* inode being linked to */
+       if (S_ISDIR(ip->i_mode))
+               return -EPERM;
+
+       tdvp = LINVFS_GET_VP(dir);
+       vp = LINVFS_GET_VP(ip);
+
+       VOP_LINK(tdvp, vp, dentry, NULL, error);
+       if (!error) {
+               VMODIFY(tdvp);
+               VN_HOLD(vp);
+               validate_fields(ip);
+               d_instantiate(dentry, ip);
+       }
+       return -error;
+}
+
+STATIC int
+linvfs_unlink(
+       struct inode    *dir,
+       struct dentry   *dentry)
+{
+       struct inode    *inode;
+       vnode_t         *dvp;   /* directory containing name to remove */
+       int             error;
+
+       inode = dentry->d_inode;
+       dvp = LINVFS_GET_VP(dir);
+
+       VOP_REMOVE(dvp, dentry, NULL, error);
+       if (!error) {
+               validate_fields(dir);   /* For size only */
+               validate_fields(inode);
+       }
+
+       return -error;
+}
+
+STATIC int
+linvfs_symlink(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       const char      *symname)
+{
+       struct inode    *ip;
+       vattr_t         va;
+       vnode_t         *dvp;   /* directory containing name to remove */
+       vnode_t         *cvp;   /* used to lookup symlink to put in dentry */
+       int             error;
+
+       dvp = LINVFS_GET_VP(dir);
+       cvp = NULL;
+
+       memset(&va, 0, sizeof(va));
+       va.va_type = VLNK;
+       va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO;
+       va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+
+       error = 0;
+       VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
+       if (!error && cvp) {
+               ASSERT(cvp->v_type == VLNK);
+               ip = LINVFS_GET_IP(cvp);
+               d_instantiate(dentry, ip);
+               validate_fields(dir);
+               validate_fields(ip); /* size needs update */
+       }
+       return -error;
+}
+
+STATIC int
+linvfs_rmdir(
+       struct inode    *dir,
+       struct dentry   *dentry)
+{
+       struct inode    *inode = dentry->d_inode;
+       vnode_t         *dvp = LINVFS_GET_VP(dir);
+       int             error;
+
+       VOP_RMDIR(dvp, dentry, NULL, error);
+       if (!error) {
+               validate_fields(inode);
+               validate_fields(dir);
+       }
+       return -error;
+}
+
+STATIC int
+linvfs_rename(
+       struct inode    *odir,
+       struct dentry   *odentry,
+       struct inode    *ndir,
+       struct dentry   *ndentry)
+{
+       struct inode    *new_inode = ndentry->d_inode;
+       vnode_t         *fvp;   /* from directory */
+       vnode_t         *tvp;   /* target directory */
+       int             error;
+
+       fvp = LINVFS_GET_VP(odir);
+       tvp = LINVFS_GET_VP(ndir);
+
+       VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
+       if (error)
+               return -error;
+
+       if (new_inode)
+               validate_fields(new_inode);
+
+       validate_fields(odir);
+       if (ndir != odir)
+               validate_fields(ndir);
+       return 0;
+}
+
+STATIC int
+linvfs_readlink(
+       struct dentry   *dentry,
+       char            *buf,
+       int             size)
+{
+       vnode_t         *vp = LINVFS_GET_VP(dentry->d_inode);
+       uio_t           uio;
+       iovec_t         iov;
+       int             error;
+
+       iov.iov_base = buf;
+       iov.iov_len = size;
+
+       uio.uio_iov = &iov;
+       uio.uio_offset = 0;
+       uio.uio_segflg = UIO_USERSPACE;
+       uio.uio_resid = size;
+       uio.uio_iovcnt = 1;
+
+       VOP_READLINK(vp, &uio, 0, NULL, error);
+       if (error)
+               return -error;
+
+       return (size - uio.uio_resid);
+}
+
+/*
+ * careful here - this function can get called recursively, so
+ * we need to be very careful about how much stack we use.
+ * uio is kmalloced for this reason...
+ */
+STATIC int
+linvfs_follow_link(
+       struct dentry           *dentry,
+       struct nameidata        *nd)
+{
+       vnode_t                 *vp;
+       uio_t                   *uio;
+       iovec_t                 iov;
+       int                     error;
+       char                    *link;
+
+       ASSERT(dentry);
+       ASSERT(nd);
+
+       link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL);
+       if (!link)
+               return -ENOMEM;
+
+       uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL);
+       if (!uio) {
+               kfree(link);
+               return -ENOMEM;
+       }
+
+       vp = LINVFS_GET_VP(dentry->d_inode);
+
+       iov.iov_base = link;
+       iov.iov_len = MAXNAMELEN;
+
+       uio->uio_iov = &iov;
+       uio->uio_offset = 0;
+       uio->uio_segflg = UIO_SYSSPACE;
+       uio->uio_resid = MAXNAMELEN;
+       uio->uio_iovcnt = 1;
+
+       VOP_READLINK(vp, uio, 0, NULL, error);
+       if (error) {
+               kfree(uio);
+               kfree(link);
+               return -error;
+       }
+
+       link[MAXNAMELEN - uio->uio_resid] = '\0';
+       kfree(uio);
+
+       /* vfs_follow_link returns (-) errors */
+       error = vfs_follow_link(nd, link);
+       kfree(link);
+       return error;
+}
+
+#ifdef CONFIG_XFS_POSIX_ACL
+STATIC int
+linvfs_permission(
+       struct inode    *inode,
+       int             mode,
+       struct nameidata *nd)
+{
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+       int             error;
+
+       mode <<= 6;             /* convert from linux to vnode access bits */
+       VOP_ACCESS(vp, mode, NULL, error);
+       return -error;
+}
+#else
+#define linvfs_permission NULL
+#endif
+
+STATIC int
+linvfs_getattr(
+       struct vfsmount *mnt,
+       struct dentry   *dentry,
+       struct kstat    *stat)
+{
+       struct inode    *inode = dentry->d_inode;
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+       int             error = 0;
+
+       if (unlikely(vp->v_flag & VMODIFIED))
+               error = vn_revalidate(vp);
+       if (!error)
+               generic_fillattr(inode, stat);
+       return 0;
+}
+
+STATIC int
+linvfs_setattr(
+       struct dentry   *dentry,
+       struct iattr    *attr)
+{
+       struct inode    *inode = dentry->d_inode;
+       unsigned int    ia_valid = attr->ia_valid;
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+       vattr_t         vattr;
+       int             flags = 0;
+       int             error;
+
+       memset(&vattr, 0, sizeof(vattr_t));
+       if (ia_valid & ATTR_UID) {
+               vattr.va_mask |= XFS_AT_UID;
+               vattr.va_uid = attr->ia_uid;
+       }
+       if (ia_valid & ATTR_GID) {
+               vattr.va_mask |= XFS_AT_GID;
+               vattr.va_gid = attr->ia_gid;
+       }
+       if (ia_valid & ATTR_SIZE) {
+               vattr.va_mask |= XFS_AT_SIZE;
+               vattr.va_size = attr->ia_size;
+       }
+       if (ia_valid & ATTR_ATIME) {
+               vattr.va_mask |= XFS_AT_ATIME;
+               vattr.va_atime = attr->ia_atime;
+       }
+       if (ia_valid & ATTR_MTIME) {
+               vattr.va_mask |= XFS_AT_MTIME;
+               vattr.va_mtime = attr->ia_mtime;
+       }
+       if (ia_valid & ATTR_CTIME) {
+               vattr.va_mask |= XFS_AT_CTIME;
+               vattr.va_ctime = attr->ia_ctime;
+       }
+       if (ia_valid & ATTR_MODE) {
+               vattr.va_mask |= XFS_AT_MODE;
+               vattr.va_mode = attr->ia_mode;
+               if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+                       inode->i_mode &= ~S_ISGID;
+       }
+
+       if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))
+               flags = ATTR_UTIME;
+#ifdef ATTR_NO_BLOCK
+       if ((ia_valid & ATTR_NO_BLOCK))
+               flags |= ATTR_NONBLOCK;
+#endif
+
+       VOP_SETATTR(vp, &vattr, flags, NULL, error);
+       if (error)
+               return(-error); /* Positive error up from XFS */
+       if (ia_valid & ATTR_SIZE) {
+               error = vmtruncate(inode, attr->ia_size);
+       }
+
+       if (!error) {
+               vn_revalidate(vp);
+       }
+       return error;
+}
+
+STATIC void
+linvfs_truncate(
+       struct inode    *inode)
+{
+       block_truncate_page(inode->i_mapping, inode->i_size, linvfs_get_block);
+}
+
+STATIC int
+linvfs_setxattr(
+       struct dentry   *dentry,
+       const char      *name,
+       const void      *data,
+       size_t          size,
+       int             flags)
+{
+       vnode_t         *vp = LINVFS_GET_VP(dentry->d_inode);
+       char            *attr = (char *)name;
+       attrnames_t     *namesp;
+       int             xflags = 0;
+       int             error;
+
+       namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
+       if (!namesp)
+               return -EOPNOTSUPP;
+       attr += namesp->attr_namelen;
+       error = namesp->attr_capable(vp, NULL);
+       if (error)
+               return error;
+
+       /* Convert Linux syscall to XFS internal ATTR flags */
+       if (flags & XATTR_CREATE)
+               xflags |= ATTR_CREATE;
+       if (flags & XATTR_REPLACE)
+               xflags |= ATTR_REPLACE;
+       xflags |= namesp->attr_flag;
+       return namesp->attr_set(vp, attr, (void *)data, size, xflags);
+}
+
+STATIC ssize_t
+linvfs_getxattr(
+       struct dentry   *dentry,
+       const char      *name,
+       void            *data,
+       size_t          size)
+{
+       vnode_t         *vp = LINVFS_GET_VP(dentry->d_inode);
+       char            *attr = (char *)name;
+       attrnames_t     *namesp;
+       int             xflags = 0;
+       ssize_t         error;
+
+       namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
+       if (!namesp)
+               return -EOPNOTSUPP;
+       attr += namesp->attr_namelen;
+       error = namesp->attr_capable(vp, NULL);
+       if (error)
+               return error;
+
+       /* Convert Linux syscall to XFS internal ATTR flags */
+       if (!size) {
+               xflags |= ATTR_KERNOVAL;
+               data = NULL;
+       }
+       xflags |= namesp->attr_flag;
+       return namesp->attr_get(vp, attr, (void *)data, size, xflags);
+}
+
+STATIC ssize_t
+linvfs_listxattr(
+       struct dentry           *dentry,
+       char                    *data,
+       size_t                  size)
+{
+       vnode_t                 *vp = LINVFS_GET_VP(dentry->d_inode);
+       int                     error, xflags = ATTR_KERNAMELS;
+       ssize_t                 result;
+
+       if (!size)
+               xflags |= ATTR_KERNOVAL;
+       xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS;
+
+       error = attr_generic_list(vp, data, size, xflags, &result);
+       if (error < 0)
+               return error;
+       return result;
+}
+
+STATIC int
+linvfs_removexattr(
+       struct dentry   *dentry,
+       const char      *name)
+{
+       vnode_t         *vp = LINVFS_GET_VP(dentry->d_inode);
+       char            *attr = (char *)name;
+       attrnames_t     *namesp;
+       int             xflags = 0;
+       int             error;
+
+       namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
+       if (!namesp)
+               return -EOPNOTSUPP;
+       attr += namesp->attr_namelen;
+       error = namesp->attr_capable(vp, NULL);
+       if (error)
+               return error;
+       xflags |= namesp->attr_flag;
+       return namesp->attr_remove(vp, attr, xflags);
+}
+
+
+struct inode_operations linvfs_file_inode_operations = {
+       .permission             = linvfs_permission,
+       .truncate               = linvfs_truncate,
+       .getattr                = linvfs_getattr,
+       .setattr                = linvfs_setattr,
+       .setxattr               = linvfs_setxattr,
+       .getxattr               = linvfs_getxattr,
+       .listxattr              = linvfs_listxattr,
+       .removexattr            = linvfs_removexattr,
+};
+
+struct inode_operations linvfs_dir_inode_operations = {
+       .create                 = linvfs_create,
+       .lookup                 = linvfs_lookup,
+       .link                   = linvfs_link,
+       .unlink                 = linvfs_unlink,
+       .symlink                = linvfs_symlink,
+       .mkdir                  = linvfs_mkdir,
+       .rmdir                  = linvfs_rmdir,
+       .mknod                  = linvfs_mknod,
+       .rename                 = linvfs_rename,
+       .permission             = linvfs_permission,
+       .getattr                = linvfs_getattr,
+       .setattr                = linvfs_setattr,
+       .setxattr               = linvfs_setxattr,
+       .getxattr               = linvfs_getxattr,
+       .listxattr              = linvfs_listxattr,
+       .removexattr            = linvfs_removexattr,
+};
+
+struct inode_operations linvfs_symlink_inode_operations = {
+       .readlink               = linvfs_readlink,
+       .follow_link            = linvfs_follow_link,
+       .permission             = linvfs_permission,
+       .getattr                = linvfs_getattr,
+       .setattr                = linvfs_setattr,
+       .setxattr               = linvfs_setxattr,
+       .getxattr               = linvfs_getxattr,
+       .listxattr              = linvfs_listxattr,
+       .removexattr            = linvfs_removexattr,
+};
diff --git a/fs/xfs/linux/xfs_iops.h b/fs/xfs/linux/xfs_iops.h
new file mode 100644 (file)
index 0000000..f0f5c87
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_IOPS_H__
+#define __XFS_IOPS_H__
+
+extern struct inode_operations linvfs_file_inode_operations;
+extern struct inode_operations linvfs_dir_inode_operations;
+extern struct inode_operations linvfs_symlink_inode_operations;
+
+extern struct file_operations linvfs_file_operations;
+extern struct file_operations linvfs_invis_file_operations;
+extern struct file_operations linvfs_dir_operations;
+
+extern struct address_space_operations linvfs_aops;
+
+extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern void linvfs_unwritten_done(struct buffer_head *, int);
+
+extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
+                        int, unsigned int, unsigned long);
+
+#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux/xfs_linux.h b/fs/xfs/linux/xfs_linux.h
new file mode 100644 (file)
index 0000000..70481f8
--- /dev/null
@@ -0,0 +1,367 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_LINUX__
+#define __XFS_LINUX__
+
+#include <linux/types.h>
+#include <linux/config.h>
+
+/*
+ * Some types are conditional depending on the target system.
+ * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
+ * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well
+ * as requiring XFS_BIG_BLKNOS to be set.
+ */
+#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
+# define XFS_BIG_BLKNOS        1
+# if BITS_PER_LONG == 64
+#  define XFS_BIG_INUMS        1
+# else
+#  define XFS_BIG_INUMS        0
+# endif
+#else
+# define XFS_BIG_BLKNOS        0
+# define XFS_BIG_INUMS 0
+#endif
+
+#include <xfs_types.h>
+#include <xfs_arch.h>
+
+#include <kmem.h>
+#include <mrlock.h>
+#include <spin.h>
+#include <sv.h>
+#include <mutex.h>
+#include <sema.h>
+#include <time.h>
+
+#include <support/qsort.h>
+#include <support/ktrace.h>
+#include <support/debug.h>
+#include <support/move.h>
+#include <support/uuid.h>
+
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/major.h>
+#include <linux/pagemap.h>
+#include <linux/vfs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/version.h>
+
+#include <asm/page.h>
+#include <asm/div64.h>
+#include <asm/param.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+#include <xfs_behavior.h>
+#include <xfs_vfs.h>
+#include <xfs_cred.h>
+#include <xfs_vnode.h>
+#include <xfs_stats.h>
+#include <xfs_sysctl.h>
+#include <xfs_iops.h>
+#include <xfs_super.h>
+#include <xfs_globals.h>
+#include <xfs_fs_subr.h>
+#include <xfs_lrw.h>
+#include <xfs_buf.h>
+
+/*
+ * Feature macros (disable/enable)
+ */
+#undef  HAVE_REFCACHE  /* reference cache not needed for NFS in 2.6 */
+#define HAVE_SENDFILE  /* sendfile(2) exists in 2.6, but not in 2.4 */
+
+/*
+ * State flag for unwritten extent buffers.
+ *
+ * We need to be able to distinguish between these and delayed
+ * allocate buffers within XFS.  The generic IO path code does
+ * not need to distinguish - we use the BH_Delay flag for both
+ * delalloc and these ondisk-uninitialised buffers.
+ */
+BUFFER_FNS(PrivateStart, unwritten);
+static inline void set_buffer_unwritten_io(struct buffer_head *bh)
+{
+       bh->b_end_io = linvfs_unwritten_done;
+}
+
+#define xfs_refcache_size      xfs_params.refcache_size.val
+#define xfs_refcache_purge_count xfs_params.refcache_purge.val
+#define restricted_chown       xfs_params.restrict_chown.val
+#define irix_sgid_inherit      xfs_params.sgid_inherit.val
+#define irix_symlink_mode      xfs_params.symlink_mode.val
+#define xfs_panic_mask         xfs_params.panic_mask.val
+#define xfs_error_level                xfs_params.error_level.val
+#define xfs_syncd_interval     (xfs_params.sync_interval.val * HZ / USER_HZ)
+#define xfs_stats_clear                xfs_params.stats_clear.val
+#define xfs_inherit_sync       xfs_params.inherit_sync.val
+#define xfs_inherit_nodump     xfs_params.inherit_nodump.val
+#define xfs_inherit_noatime    xfs_params.inherit_noatim.val
+#define xfs_flush_interval     (xfs_params.flush_interval.val * HZ / USER_HZ)
+#define xfs_age_buffer         (xfs_params.age_buffer.val * HZ / USER_HZ)
+
+#define current_cpu()          smp_processor_id()
+#define current_pid()          (current->pid)
+#define current_fsuid(cred)    (current->fsuid)
+#define current_fsgid(cred)    (current->fsgid)
+
+#define NBPP           PAGE_SIZE
+#define DPPSHFT                (PAGE_SHIFT - 9)
+#define NDPP           (1 << (PAGE_SHIFT - 9))
+#define dtop(DD)       (((DD) + NDPP - 1) >> DPPSHFT)
+#define dtopt(DD)      ((DD) >> DPPSHFT)
+#define dpoff(DD)      ((DD) & (NDPP-1))
+
+#define NBBY           8               /* number of bits per byte */
+#define        NBPC            PAGE_SIZE       /* Number of bytes per click */
+#define        BPCSHIFT        PAGE_SHIFT      /* LOG2(NBPC) if exact */
+
+/*
+ * Size of block device i/o is parameterized here.
+ * Currently the system supports page-sized i/o.
+ */
+#define        BLKDEV_IOSHIFT          BPCSHIFT
+#define        BLKDEV_IOSIZE           (1<<BLKDEV_IOSHIFT)
+/* number of BB's per block device block */
+#define        BLKDEV_BB               BTOBB(BLKDEV_IOSIZE)
+
+/* bytes to clicks */
+#define        btoc(x)         (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
+#define        btoct(x)        ((__psunsigned_t)(x)>>BPCSHIFT)
+#define        btoc64(x)       (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
+#define        btoct64(x)      ((__uint64_t)(x)>>BPCSHIFT)
+#define        io_btoc(x)      (((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT)
+#define        io_btoct(x)     ((__psunsigned_t)(x)>>IO_BPCSHIFT)
+
+/* off_t bytes to clicks */
+#define offtoc(x)       (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
+#define offtoct(x)      ((xfs_off_t)(x)>>BPCSHIFT)
+
+/* clicks to off_t bytes */
+#define        ctooff(x)       ((xfs_off_t)(x)<<BPCSHIFT)
+
+/* clicks to bytes */
+#define        ctob(x)         ((__psunsigned_t)(x)<<BPCSHIFT)
+#define btoct(x)        ((__psunsigned_t)(x)>>BPCSHIFT)
+#define        ctob64(x)       ((__uint64_t)(x)<<BPCSHIFT)
+#define        io_ctob(x)      ((__psunsigned_t)(x)<<IO_BPCSHIFT)
+
+/* bytes to clicks */
+#define btoc(x)         (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
+
+#ifndef CELL_CAPABLE
+#define FSC_NOTIFY_NAME_CHANGED(vp)
+#endif
+
+#ifndef ENOATTR
+#define ENOATTR                ENODATA         /* Attribute not found */
+#endif
+
+/* Note: EWRONGFS never visible outside the kernel */
+#define        EWRONGFS        EINVAL          /* Mount with wrong filesystem type */
+
+/*
+ * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't
+ *     return codes out of its known range in errno.
+ * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't
+ *     conflict with any code we use already or any code a driver may use)
+ * XXX Some options (currently we do #2):
+ *     1/ New error code ["Filesystem is corrupted", _after_ glibc updated]
+ *     2/ 990 ["Unknown error 990"]
+ *     3/ EUCLEAN ["Structure needs cleaning"]
+ *     4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace]
+ */
+#define EFSCORRUPTED    990            /* Filesystem is corrupted */
+
+#define SYNCHRONIZE()  barrier()
+#define __return_address __builtin_return_address(0)
+
+/*
+ * IRIX (BSD) quotactl makes use of separate commands for user/group,
+ * whereas on Linux the syscall encodes this information into the cmd
+ * field (see the QCMD macro in quota.h).  These macros help keep the
+ * code portable - they are not visible from the syscall interface.
+ */
+#define Q_XSETGQLIM    XQM_CMD(0x8)    /* set groups disk limits */
+#define Q_XGETGQUOTA   XQM_CMD(0x9)    /* get groups disk limits */
+
+/* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */
+/* we may well need to fine-tune this if it ever becomes an issue.  */
+#define DQUOT_MAX_HEURISTIC    1024    /* NR_DQUOTS */
+#define ndquot                 DQUOT_MAX_HEURISTIC
+
+/* IRIX uses the current size of the name cache to guess a good value */
+/* - this isn't the same but is a good enough starting point for now. */
+#define DQUOT_HASH_HEURISTIC   files_stat.nr_files
+
+/* IRIX inodes maintain the project ID also, zero this field on Linux */
+#define DEFAULT_PROJID 0
+#define dfltprid       DEFAULT_PROJID
+
+#define MAXPATHLEN     1024
+
+#define MIN(a,b)       (min(a,b))
+#define MAX(a,b)       (max(a,b))
+#define howmany(x, y)  (((x)+((y)-1))/(y))
+#define roundup(x, y)  ((((x)+((y)-1))/(y))*(y))
+
+#define xfs_stack_trace()      dump_stack()
+
+#define xfs_itruncate_data(ip, off)    \
+       (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
+
+
+/* Move the kernel do_div definition off to one side */
+
+#if defined __i386__
+/* For ia32 we need to pull some tricks to get past various versions
+ * of the compiler which do not like us using do_div in the middle
+ * of large functions.
+ */
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+       __u32   mod;
+
+       switch (n) {
+               case 4:
+                       mod = *(__u32 *)a % b;
+                       *(__u32 *)a = *(__u32 *)a / b;
+                       return mod;
+               case 8:
+                       {
+                       unsigned long __upper, __low, __high, __mod;
+                       __u64   c = *(__u64 *)a;
+                       __upper = __high = c >> 32;
+                       __low = c;
+                       if (__high) {
+                               __upper = __high % (b);
+                               __high = __high / (b);
+                       }
+                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+                       asm("":"=A" (c):"a" (__low),"d" (__high));
+                       *(__u64 *)a = c;
+                       return __mod;
+                       }
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+       switch (n) {
+               case 4:
+                       return *(__u32 *)a % b;
+               case 8:
+                       {
+                       unsigned long __upper, __low, __high, __mod;
+                       __u64   c = *(__u64 *)a;
+                       __upper = __high = c >> 32;
+                       __low = c;
+                       if (__high) {
+                               __upper = __high % (b);
+                               __high = __high / (b);
+                       }
+                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+                       asm("":"=A" (c):"a" (__low),"d" (__high));
+                       return __mod;
+                       }
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+#else
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+       __u32   mod;
+
+       switch (n) {
+               case 4:
+                       mod = *(__u32 *)a % b;
+                       *(__u32 *)a = *(__u32 *)a / b;
+                       return mod;
+               case 8:
+                       mod = do_div(*(__u64 *)a, b);
+                       return mod;
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+       switch (n) {
+               case 4:
+                       return *(__u32 *)a % b;
+               case 8:
+                       {
+                       __u64   c = *(__u64 *)a;
+                       return do_div(c, b);
+                       }
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+#endif
+
+#undef do_div
+#define do_div(a, b)   xfs_do_div(&(a), (b), sizeof(a))
+#define do_mod(a, b)   xfs_do_mod(&(a), (b), sizeof(a))
+
+static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
+{
+       x += y - 1;
+       do_div(x, y);
+       return(x * y);
+}
+
+#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux/xfs_lrw.c b/fs/xfs/linux/xfs_lrw.c
new file mode 100644 (file)
index 0000000..4bacdb7
--- /dev/null
@@ -0,0 +1,1028 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+/*
+ *  fs/xfs/linux/xfs_lrw.c (Linux Read Write stuff)
+ *
+ */
+
+#include "xfs.h"
+
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_inode_item.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_iomap.h"
+
+#include <linux/capability.h>
+
+
+#if defined(XFS_RW_TRACE)
+void
+xfs_rw_enter_trace(
+       int                     tag,
+       xfs_iocore_t            *io,
+       const struct iovec      *iovp,
+       size_t                  segs,
+       loff_t                  offset,
+       int                     ioflags)
+{
+       xfs_inode_t     *ip = XFS_IO_INODE(io);
+
+       if (ip->i_rwtrace == NULL)
+               return;
+       ktrace_enter(ip->i_rwtrace,
+               (void *)(unsigned long)tag,
+               (void *)ip,
+               (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
+               (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
+               (void *)(__psint_t)iovp,
+               (void *)((unsigned long)segs),
+               (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
+               (void *)((unsigned long)(offset & 0xffffffff)),
+               (void *)((unsigned long)ioflags),
+               (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
+               (void *)((unsigned long)(io->io_new_size & 0xffffffff)),
+               (void *)NULL,
+               (void *)NULL,
+               (void *)NULL,
+               (void *)NULL,
+               (void *)NULL);
+}
+
+void
+xfs_inval_cached_trace(
+       xfs_iocore_t    *io,
+       xfs_off_t       offset,
+       xfs_off_t       len,
+       xfs_off_t       first,
+       xfs_off_t       last)
+{
+       xfs_inode_t     *ip = XFS_IO_INODE(io);
+
+       if (ip->i_rwtrace == NULL)
+               return;
+       ktrace_enter(ip->i_rwtrace,
+               (void *)(__psint_t)XFS_INVAL_CACHED,
+               (void *)ip,
+               (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
+               (void *)((unsigned long)(offset & 0xffffffff)),
+               (void *)((unsigned long)((len >> 32) & 0xffffffff)),
+               (void *)((unsigned long)(len & 0xffffffff)),
+               (void *)((unsigned long)((first >> 32) & 0xffffffff)),
+               (void *)((unsigned long)(first & 0xffffffff)),
+               (void *)((unsigned long)((last >> 32) & 0xffffffff)),
+               (void *)((unsigned long)(last & 0xffffffff)),
+               (void *)NULL,
+               (void *)NULL,
+               (void *)NULL,
+               (void *)NULL,
+               (void *)NULL,
+               (void *)NULL);
+}
+#endif
+
+/*
+ *     xfs_iozero
+ *
+ *     xfs_iozero clears the specified range of buffer supplied,
+ *     and marks all the affected blocks as valid and modified.  If
+ *     an affected block is not allocated, it will be allocated.  If
+ *     an affected block is not completely overwritten, and is not
+ *     valid before the operation, it will be read from disk before
+ *     being partially zeroed.
+ */
+STATIC int
+xfs_iozero(
+       struct inode            *ip,    /* inode                        */
+       loff_t                  pos,    /* offset in file               */
+       size_t                  count,  /* size of data to zero         */
+       loff_t                  end_size)       /* max file size to set */
+{
+       unsigned                bytes;
+       struct page             *page;
+       struct address_space    *mapping;
+       char                    *kaddr;
+       int                     status;
+
+       mapping = ip->i_mapping;
+       do {
+               unsigned long index, offset;
+
+               offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+               index = pos >> PAGE_CACHE_SHIFT;
+               bytes = PAGE_CACHE_SIZE - offset;
+               if (bytes > count)
+                       bytes = count;
+
+               status = -ENOMEM;
+               page = grab_cache_page(mapping, index);
+               if (!page)
+                       break;
+
+               kaddr = kmap(page);
+               status = mapping->a_ops->prepare_write(NULL, page, offset,
+                                                       offset + bytes);
+               if (status) {
+                       goto unlock;
+               }
+
+               memset((void *) (kaddr + offset), 0, bytes);
+               flush_dcache_page(page);
+               status = mapping->a_ops->commit_write(NULL, page, offset,
+                                                       offset + bytes);
+               if (!status) {
+                       pos += bytes;
+                       count -= bytes;
+                       if (pos > i_size_read(ip))
+                               i_size_write(ip, pos < end_size ? pos : end_size);
+               }
+
+unlock:
+               kunmap(page);
+               unlock_page(page);
+               page_cache_release(page);
+               if (status)
+                       break;
+       } while (count);
+
+       return (-status);
+}
+
+/*
+ * xfs_inval_cached_pages
+ * 
+ * This routine is responsible for keeping direct I/O and buffered I/O
+ * somewhat coherent.  From here we make sure that we're at least
+ * temporarily holding the inode I/O lock exclusively and then call
+ * the page cache to flush and invalidate any cached pages.  If there
+ * are no cached pages this routine will be very quick.
+ */
+void
+xfs_inval_cached_pages(
+       vnode_t         *vp,
+       xfs_iocore_t    *io,
+       xfs_off_t       offset,
+       int             write,
+       int             relock)
+{
+       xfs_mount_t     *mp;
+
+       if (!VN_CACHED(vp)) {
+               return;
+       }
+
+       mp = io->io_mount;
+
+       /*
+        * We need to get the I/O lock exclusively in order
+        * to safely invalidate pages and mappings.
+        */
+       if (relock) {
+               XFS_IUNLOCK(mp, io, XFS_IOLOCK_SHARED);
+               XFS_ILOCK(mp, io, XFS_IOLOCK_EXCL);
+       }
+
+       /* Writing beyond EOF creates a hole that must be zeroed */
+       if (write && (offset > XFS_SIZE(mp, io))) {
+               xfs_fsize_t     isize;
+
+               XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+               isize = XFS_SIZE(mp, io);
+               if (offset > isize) {
+                       xfs_zero_eof(vp, io, offset, isize, offset);
+               }
+               XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+       }
+
+       xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1);
+       VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED);
+       if (relock) {
+               XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
+       }
+}
+
+ssize_t                        /* bytes read, or (-)  error */
+xfs_read(
+       bhv_desc_t              *bdp,
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned int            segs,
+       loff_t                  *offset,
+       int                     ioflags,
+       cred_t                  *credp)
+{
+       struct file             *file = iocb->ki_filp;
+       size_t                  size = 0;
+       ssize_t                 ret;
+       xfs_fsize_t             n;
+       xfs_inode_t             *ip;
+       xfs_mount_t             *mp;
+       vnode_t                 *vp;
+       unsigned long           seg;
+
+       ip = XFS_BHVTOI(bdp);
+       vp = BHV_TO_VNODE(bdp);
+       mp = ip->i_mount;
+
+       XFS_STATS_INC(xs_read_calls);
+
+       /* START copy & waste from filemap.c */
+       for (seg = 0; seg < segs; seg++) {
+               const struct iovec *iv = &iovp[seg];
+
+               /*
+                * If any segment has a negative length, or the cumulative
+                * length ever wraps negative then return -EINVAL.
+                */
+               size += iv->iov_len;
+               if (unlikely((ssize_t)(size|iv->iov_len) < 0))
+                       return XFS_ERROR(-EINVAL);
+       }
+       /* END copy & waste from filemap.c */
+
+       if (ioflags & IO_ISDIRECT) {
+               xfs_buftarg_t   *target =
+                       (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+                               mp->m_rtdev_targp : mp->m_ddev_targp;
+               if ((*offset & target->pbr_smask) ||
+                   (size & target->pbr_smask)) {
+                       if (*offset == ip->i_d.di_size) {
+                               return (0);
+                       }
+                       return -XFS_ERROR(EINVAL);
+               }
+       }
+
+       n = XFS_MAXIOFFSET(mp) - *offset;
+       if ((n <= 0) || (size == 0))
+               return 0;
+
+       if (n < size)
+               size = n;
+
+       if (XFS_FORCED_SHUTDOWN(mp)) {
+               return -EIO;
+       }
+
+       /* OK so we are holding the I/O lock for the duration
+        * of the submission, then what happens if the I/O
+        * does not really happen here, but is scheduled 
+        * later?
+        */
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+
+       if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
+           !(ioflags & IO_INVIS)) {
+               vrwlock_t locktype = VRWLOCK_READ;
+
+               ret = XFS_SEND_DATA(mp, DM_EVENT_READ,
+                                       BHV_TO_VNODE(bdp), *offset, size,
+                                       FILP_DELAY_FLAG(file), &locktype);
+               if (ret) {
+                       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+                       return -ret;
+               }
+       }
+
+       xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
+                               iovp, segs, *offset, ioflags);
+       ret = __generic_file_aio_read(iocb, iovp, segs, offset);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+       if (ret > 0)
+               XFS_STATS_ADD(xs_read_bytes, ret);
+
+       if (likely(!(ioflags & IO_INVIS)))
+               xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
+
+       return ret;
+}
+
+ssize_t
+xfs_sendfile(
+       bhv_desc_t              *bdp,
+       struct file             *filp,
+       loff_t                  *offset,
+       int                     ioflags,
+       size_t                  count,
+       read_actor_t            actor,
+       void                    *target,
+       cred_t                  *credp)
+{
+       ssize_t                 ret;
+       xfs_fsize_t             n;
+       xfs_inode_t             *ip;
+       xfs_mount_t             *mp;
+       vnode_t                 *vp;
+
+       ip = XFS_BHVTOI(bdp);
+       vp = BHV_TO_VNODE(bdp);
+       mp = ip->i_mount;
+
+       XFS_STATS_INC(xs_read_calls);
+
+       n = XFS_MAXIOFFSET(mp) - *offset;
+       if ((n <= 0) || (count == 0))
+               return 0;
+
+       if (n < count)
+               count = n;
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return -EIO;
+
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+
+       if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
+           (!(ioflags & IO_INVIS))) {
+               vrwlock_t locktype = VRWLOCK_READ;
+               int error;
+
+               error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count,
+                                     FILP_DELAY_FLAG(filp), &locktype);
+               if (error) {
+                       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+                       return -error;
+               }
+       }
+       xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
+                               target, count, *offset, ioflags);
+       ret = generic_file_sendfile(filp, offset, count, actor, target);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+       XFS_STATS_ADD(xs_read_bytes, ret);
+       xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
+       return ret;
+}
+
+/*
+ * This routine is called to handle zeroing any space in the last
+ * block of the file that is beyond the EOF.  We do this since the
+ * size is being increased without writing anything to that block
+ * and we don't want anyone to read the garbage on the disk.
+ */
+STATIC int                             /* error (positive) */
+xfs_zero_last_block(
+       struct inode    *ip,
+       xfs_iocore_t    *io,
+       xfs_off_t       offset,
+       xfs_fsize_t     isize,
+       xfs_fsize_t     end_size)
+{
+       xfs_fileoff_t   last_fsb;
+       xfs_mount_t     *mp;
+       int             nimaps;
+       int             zero_offset;
+       int             zero_len;
+       int             isize_fsb_offset;
+       int             error = 0;
+       xfs_bmbt_irec_t imap;
+       loff_t          loff;
+       size_t          lsize;
+
+       ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
+       ASSERT(offset > isize);
+
+       mp = io->io_mount;
+
+       isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize);
+       if (isize_fsb_offset == 0) {
+               /*
+                * There are no extra bytes in the last block on disk to
+                * zero, so return.
+                */
+               return 0;
+       }
+
+       last_fsb = XFS_B_TO_FSBT(mp, isize);
+       nimaps = 1;
+       error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap,
+                         &nimaps, NULL);
+       if (error) {
+               return error;
+       }
+       ASSERT(nimaps > 0);
+       /*
+        * If the block underlying isize is just a hole, then there
+        * is nothing to zero.
+        */
+       if (imap.br_startblock == HOLESTARTBLOCK) {
+               return 0;
+       }
+       /*
+        * Zero the part of the last block beyond the EOF, and write it
+        * out sync.  We need to drop the ilock while we do this so we
+        * don't deadlock when the buffer cache calls back to us.
+        */
+       XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
+       loff = XFS_FSB_TO_B(mp, last_fsb);
+       lsize = XFS_FSB_TO_B(mp, 1);
+
+       zero_offset = isize_fsb_offset;
+       zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset;
+
+       error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
+
+       XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+       ASSERT(error >= 0);
+       return error;
+}
+
+/*
+ * Zero any on disk space between the current EOF and the new,
+ * larger EOF.  This handles the normal case of zeroing the remainder
+ * of the last block in the file and the unusual case of zeroing blocks
+ * out beyond the size of the file.  This second case only happens
+ * with fixed size extents and when the system crashes before the inode
+ * size was updated but after blocks were allocated.  If fill is set,
+ * then any holes in the range are filled and zeroed.  If not, the holes
+ * are left alone as holes.
+ */
+
+int                                    /* error (positive) */
+xfs_zero_eof(
+       vnode_t         *vp,
+       xfs_iocore_t    *io,
+       xfs_off_t       offset,         /* starting I/O offset */
+       xfs_fsize_t     isize,          /* current inode size */
+       xfs_fsize_t     end_size)       /* terminal inode size */
+{
+       struct inode    *ip = LINVFS_GET_IP(vp);
+       xfs_fileoff_t   start_zero_fsb;
+       xfs_fileoff_t   end_zero_fsb;
+       xfs_fileoff_t   prev_zero_fsb;
+       xfs_fileoff_t   zero_count_fsb;
+       xfs_fileoff_t   last_fsb;
+       xfs_extlen_t    buf_len_fsb;
+       xfs_extlen_t    prev_zero_count;
+       xfs_mount_t     *mp;
+       int             nimaps;
+       int             error = 0;
+       xfs_bmbt_irec_t imap;
+       loff_t          loff;
+       size_t          lsize;
+
+       ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
+       ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+
+       mp = io->io_mount;
+
+       /*
+        * First handle zeroing the block on which isize resides.
+        * We only zero a part of that block so it is handled specially.
+        */
+       error = xfs_zero_last_block(ip, io, offset, isize, end_size);
+       if (error) {
+               ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
+               ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+               return error;
+       }
+
+       /*
+        * Calculate the range between the new size and the old
+        * where blocks needing to be zeroed may exist.  To get the
+        * block where the last byte in the file currently resides,
+        * we need to subtract one from the size and truncate back
+        * to a block boundary.  We subtract 1 in case the size is
+        * exactly on a block boundary.
+        */
+       last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
+       start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
+       end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
+       ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
+       if (last_fsb == end_zero_fsb) {
+               /*
+                * The size was only incremented on its last block.
+                * We took care of that above, so just return.
+                */
+               return 0;
+       }
+
+       ASSERT(start_zero_fsb <= end_zero_fsb);
+       prev_zero_fsb = NULLFILEOFF;
+       prev_zero_count = 0;
+       while (start_zero_fsb <= end_zero_fsb) {
+               nimaps = 1;
+               zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
+               error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb,
+                                 0, NULL, 0, &imap, &nimaps, NULL);
+               if (error) {
+                       ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
+                       ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+                       return error;
+               }
+               ASSERT(nimaps > 0);
+
+               if (imap.br_state == XFS_EXT_UNWRITTEN ||
+                   imap.br_startblock == HOLESTARTBLOCK) {
+                       /*
+                        * This loop handles initializing pages that were
+                        * partially initialized by the code below this
+                        * loop. It basically zeroes the part of the page
+                        * that sits on a hole and sets the page as P_HOLE
+                        * and calls remapf if it is a mapped file.
+                        */
+                       prev_zero_fsb = NULLFILEOFF;
+                       prev_zero_count = 0;
+                       start_zero_fsb = imap.br_startoff +
+                                        imap.br_blockcount;
+                       ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+                       continue;
+               }
+
+               /*
+                * There are blocks in the range requested.
+                * Zero them a single write at a time.  We actually
+                * don't zero the entire range returned if it is
+                * too big and simply loop around to get the rest.
+                * That is not the most efficient thing to do, but it
+                * is simple and this path should not be exercised often.
+                */
+               buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount,
+                                             mp->m_writeio_blocks << 8);
+               /*
+                * Drop the inode lock while we're doing the I/O.
+                * We'll still have the iolock to protect us.
+                */
+               XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+
+               loff = XFS_FSB_TO_B(mp, start_zero_fsb);
+               lsize = XFS_FSB_TO_B(mp, buf_len_fsb);
+
+               error = xfs_iozero(ip, loff, lsize, end_size);
+
+               if (error) {
+                       goto out_lock;
+               }
+
+               prev_zero_fsb = start_zero_fsb;
+               prev_zero_count = buf_len_fsb;
+               start_zero_fsb = imap.br_startoff + buf_len_fsb;
+               ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+
+               XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+       }
+
+       return 0;
+
+out_lock:
+
+       XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+       ASSERT(error >= 0);
+       return error;
+}
+
+ssize_t                                /* bytes written, or (-) error */
+xfs_write(
+       bhv_desc_t              *bdp,
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned int            segs,
+       loff_t                  *offset,
+       int                     ioflags,
+       cred_t                  *credp)
+{
+       struct file             *file = iocb->ki_filp;
+       size_t                  size = 0;
+       xfs_inode_t             *xip;
+       xfs_mount_t             *mp;
+       ssize_t                 ret;
+       int                     error = 0;
+       xfs_fsize_t             isize, new_size;
+       xfs_fsize_t             n, limit;
+       xfs_iocore_t            *io;
+       vnode_t                 *vp;
+       unsigned long           seg;
+       int                     iolock;
+       int                     eventsent = 0;
+       vrwlock_t               locktype;
+
+       XFS_STATS_INC(xs_write_calls);
+
+       vp = BHV_TO_VNODE(bdp);
+       xip = XFS_BHVTOI(bdp);
+
+       /* START copy & waste from filemap.c */
+       for (seg = 0; seg < segs; seg++) {
+               const struct iovec *iv = &iovp[seg];
+
+               /*
+                * If any segment has a negative length, or the cumulative
+                * length ever wraps negative then return -EINVAL.
+                */
+               size += iv->iov_len;
+               if (unlikely((ssize_t)(size|iv->iov_len) < 0))
+                       return XFS_ERROR(-EINVAL);
+       }
+       /* END copy & waste from filemap.c */
+
+       if (size == 0)
+               return 0;
+
+       io = &xip->i_iocore;
+       mp = io->io_mount;
+
+       if (XFS_FORCED_SHUTDOWN(mp)) {
+               return -EIO;
+       }
+
+       if (ioflags & IO_ISDIRECT) {
+               xfs_buftarg_t   *target =
+                       (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+                               mp->m_rtdev_targp : mp->m_ddev_targp;
+
+               if ((*offset & target->pbr_smask) ||
+                   (size & target->pbr_smask)) {
+                       return XFS_ERROR(-EINVAL);
+               }
+               iolock = XFS_IOLOCK_SHARED;
+               locktype = VRWLOCK_WRITE_DIRECT;
+       } else {
+               iolock = XFS_IOLOCK_EXCL;
+               locktype = VRWLOCK_WRITE;
+       }
+
+       xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
+
+       isize = xip->i_d.di_size;
+       limit = XFS_MAXIOFFSET(mp);
+
+       if (file->f_flags & O_APPEND)
+               *offset = isize;
+
+start:
+       n = limit - *offset;
+       if (n <= 0) {
+               xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+               return -EFBIG;
+       }
+
+       if (n < size)
+               size = n;
+
+       new_size = *offset + size;
+       if (new_size > isize) {
+               io->io_new_size = new_size;
+       }
+
+       if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) &&
+           !(ioflags & IO_INVIS) && !eventsent)) {
+               loff_t          savedsize = *offset;
+               int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
+
+               xfs_iunlock(xip, XFS_ILOCK_EXCL);
+               error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
+                                     *offset, size,
+                                     dmflags, &locktype);
+               if (error) {
+                       xfs_iunlock(xip, iolock);
+                       return -error;
+               }
+               xfs_ilock(xip, XFS_ILOCK_EXCL);
+               eventsent = 1;
+
+               /*
+                * The iolock was dropped and reaquired in XFS_SEND_DATA
+                * so we have to recheck the size when appending.
+                * We will only "goto start;" once, since having sent the
+                * event prevents another call to XFS_SEND_DATA, which is
+                * what allows the size to change in the first place.
+                */
+               if ((file->f_flags & O_APPEND) &&
+                   savedsize != xip->i_d.di_size) {
+                       *offset = isize = xip->i_d.di_size;
+                       goto start;
+               }
+       }
+
+       /*
+        * On Linux, generic_file_write updates the times even if
+        * no data is copied in so long as the write had a size.
+        *
+        * We must update xfs' times since revalidate will overcopy xfs.
+        */
+       if (size && !(ioflags & IO_INVIS))
+               xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+       /*
+        * If the offset is beyond the size of the file, we have a couple
+        * of things to do. First, if there is already space allocated
+        * we need to either create holes or zero the disk or ...
+        *
+        * If there is a page where the previous size lands, we need
+        * to zero it out up to the new size.
+        */
+
+       if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) {
+               error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset,
+                       isize, *offset + size);
+               if (error) {
+                       xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+                       return(-error);
+               }
+       }
+       xfs_iunlock(xip, XFS_ILOCK_EXCL);
+
+       /*
+        * If we're writing the file then make sure to clear the
+        * setuid and setgid bits if the process is not being run
+        * by root.  This keeps people from modifying setuid and
+        * setgid binaries.
+        */
+
+       if (((xip->i_d.di_mode & S_ISUID) ||
+           ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) ==
+               (S_ISGID | S_IXGRP))) &&
+            !capable(CAP_FSETID)) {
+               error = xfs_write_clear_setuid(xip);
+               if (error) {
+                       xfs_iunlock(xip, iolock);
+                       return -error;
+               }
+       }
+
+retry:
+       if (ioflags & IO_ISDIRECT) {
+               xfs_inval_cached_pages(vp, io, *offset, 1, 1);
+               xfs_rw_enter_trace(XFS_DIOWR_ENTER,
+                               io, iovp, segs, *offset, ioflags);
+       } else {
+               xfs_rw_enter_trace(XFS_WRITE_ENTER,
+                               io, iovp, segs, *offset, ioflags);
+       }
+       ret = generic_file_aio_write_nolock(iocb, iovp, segs, offset);
+
+       if ((ret == -ENOSPC) &&
+           DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) &&
+           !(ioflags & IO_INVIS)) {
+
+               xfs_rwunlock(bdp, locktype);
+               error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
+                               DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
+                               0, 0, 0); /* Delay flag intentionally  unused */
+               if (error)
+                       return -error;
+               xfs_rwlock(bdp, locktype);
+               *offset = xip->i_d.di_size;
+               goto retry;
+       }
+
+       if (*offset > xip->i_d.di_size) {
+               xfs_ilock(xip, XFS_ILOCK_EXCL);
+               if (*offset > xip->i_d.di_size) {
+                       struct inode    *inode = LINVFS_GET_IP(vp);
+
+                       xip->i_d.di_size = *offset;
+                       i_size_write(inode, *offset);
+                       xip->i_update_core = 1;
+                       xip->i_update_size = 1;
+               }
+               xfs_iunlock(xip, XFS_ILOCK_EXCL);
+       }
+
+       if (ret <= 0) {
+               xfs_rwunlock(bdp, locktype);
+               return ret;
+       }
+
+       XFS_STATS_ADD(xs_write_bytes, ret);
+
+       /* Handle various SYNC-type writes */
+       if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) {
+
+               /*
+                * If we're treating this as O_DSYNC and we have not updated the
+                * size, force the log.
+                */
+
+               if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC)
+                       && !(xip->i_update_size)) {
+                       /*
+                        * If an allocation transaction occurred
+                        * without extending the size, then we have to force
+                        * the log up the proper point to ensure that the
+                        * allocation is permanent.  We can't count on
+                        * the fact that buffered writes lock out direct I/O
+                        * writes - the direct I/O write could have extended
+                        * the size nontransactionally, then finished before
+                        * we started.  xfs_write_file will think that the file
+                        * didn't grow but the update isn't safe unless the
+                        * size change is logged.
+                        *
+                        * Force the log if we've committed a transaction
+                        * against the inode or if someone else has and
+                        * the commit record hasn't gone to disk (e.g.
+                        * the inode is pinned).  This guarantees that
+                        * all changes affecting the inode are permanent
+                        * when we return.
+                        */
+
+                       xfs_inode_log_item_t *iip;
+                       xfs_lsn_t lsn;
+
+                       iip = xip->i_itemp;
+                       if (iip && iip->ili_last_lsn) {
+                               lsn = iip->ili_last_lsn;
+                               xfs_log_force(mp, lsn,
+                                               XFS_LOG_FORCE | XFS_LOG_SYNC);
+                       } else if (xfs_ipincount(xip) > 0) {
+                               xfs_log_force(mp, (xfs_lsn_t)0,
+                                               XFS_LOG_FORCE | XFS_LOG_SYNC);
+                       }
+
+               } else {
+                       xfs_trans_t     *tp;
+
+                       /*
+                        * O_SYNC or O_DSYNC _with_ a size update are handled
+                        * the same way.
+                        *
+                        * If the write was synchronous then we need to make
+                        * sure that the inode modification time is permanent.
+                        * We'll have updated the timestamp above, so here
+                        * we use a synchronous transaction to log the inode.
+                        * It's not fast, but it's necessary.
+                        *
+                        * If this a dsync write and the size got changed
+                        * non-transactionally, then we need to ensure that
+                        * the size change gets logged in a synchronous
+                        * transaction.
+                        */
+
+                       tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
+                       if ((error = xfs_trans_reserve(tp, 0,
+                                                     XFS_SWRITE_LOG_RES(mp),
+                                                     0, 0, 0))) {
+                               /* Transaction reserve failed */
+                               xfs_trans_cancel(tp, 0);
+                       } else {
+                               /* Transaction reserve successful */
+                               xfs_ilock(xip, XFS_ILOCK_EXCL);
+                               xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL);
+                               xfs_trans_ihold(tp, xip);
+                               xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE);
+                               xfs_trans_set_sync(tp);
+                               error = xfs_trans_commit(tp, 0, (xfs_lsn_t)0);
+                               xfs_iunlock(xip, XFS_ILOCK_EXCL);
+                       }
+               }
+       } /* (ioflags & O_SYNC) */
+
+       xfs_rwunlock(bdp, locktype);
+       return(ret);
+}
+
+/*
+ * All xfs metadata buffers except log state machine buffers
+ * get this attached as their b_bdstrat callback function.
+ * This is so that we can catch a buffer
+ * after prematurely unpinning it to forcibly shutdown the filesystem.
+ */
+int
+xfs_bdstrat_cb(struct xfs_buf *bp)
+{
+       xfs_mount_t     *mp;
+
+       mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
+       if (!XFS_FORCED_SHUTDOWN(mp)) {
+               pagebuf_iorequest(bp);
+               return 0;
+       } else {
+               xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
+               /*
+                * Metadata write that didn't get logged but
+                * written delayed anyway. These aren't associated
+                * with a transaction, and can be ignored.
+                */
+               if (XFS_BUF_IODONE_FUNC(bp) == NULL &&
+                   (XFS_BUF_ISREAD(bp)) == 0)
+                       return (xfs_bioerror_relse(bp));
+               else
+                       return (xfs_bioerror(bp));
+       }
+}
+
+
+int
+xfs_bmap(bhv_desc_t    *bdp,
+       xfs_off_t       offset,
+       ssize_t         count,
+       int             flags,
+       xfs_iomap_t     *iomapp,
+       int             *niomaps)
+{
+       xfs_inode_t     *ip = XFS_BHVTOI(bdp);
+       xfs_iocore_t    *io = &ip->i_iocore;
+
+       ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
+       ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
+              ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
+
+       return xfs_iomap(io, offset, count, flags, iomapp, niomaps);
+}
+
+/*
+ * Wrapper around bdstrat so that we can stop data
+ * from going to disk in case we are shutting down the filesystem.
+ * Typically user data goes thru this path; one of the exceptions
+ * is the superblock.
+ */
+int
+xfsbdstrat(
+       struct xfs_mount        *mp,
+       struct xfs_buf          *bp)
+{
+       ASSERT(mp);
+       if (!XFS_FORCED_SHUTDOWN(mp)) {
+               /* Grio redirection would go here
+                * if (XFS_BUF_IS_GRIO(bp)) {
+                */
+
+               pagebuf_iorequest(bp);
+               return 0;
+       }
+
+       xfs_buftrace("XFSBDSTRAT IOERROR", bp);
+       return (xfs_bioerror_relse(bp));
+}
+
+/*
+ * If the underlying (data/log/rt) device is readonly, there are some
+ * operations that cannot proceed.
+ */
+int
+xfs_dev_is_read_only(
+       xfs_mount_t             *mp,
+       char                    *message)
+{
+       if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
+           xfs_readonly_buftarg(mp->m_logdev_targp) ||
+           (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
+               cmn_err(CE_NOTE,
+                       "XFS: %s required on read-only device.", message);
+               cmn_err(CE_NOTE,
+                       "XFS: write access unavailable, cannot proceed.");
+               return EROFS;
+       }
+       return 0;
+}
diff --git a/fs/xfs/linux/xfs_lrw.h b/fs/xfs/linux/xfs_lrw.h
new file mode 100644 (file)
index 0000000..faf0afc
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_LRW_H__
+#define __XFS_LRW_H__
+
+struct vnode;
+struct bhv_desc;
+struct xfs_mount;
+struct xfs_iocore;
+struct xfs_inode;
+struct xfs_bmbt_irec;
+struct xfs_buf;
+struct xfs_iomap;
+
+#if defined(XFS_RW_TRACE)
+/*
+ * Defines for the trace mechanisms in xfs_lrw.c.
+ */
+#define        XFS_RW_KTRACE_SIZE      128
+
+#define        XFS_READ_ENTER          1
+#define        XFS_WRITE_ENTER         2
+#define XFS_IOMAP_READ_ENTER   3
+#define        XFS_IOMAP_WRITE_ENTER   4
+#define        XFS_IOMAP_READ_MAP      5
+#define        XFS_IOMAP_WRITE_MAP     6
+#define        XFS_IOMAP_WRITE_NOSPACE 7
+#define        XFS_ITRUNC_START        8
+#define        XFS_ITRUNC_FINISH1      9
+#define        XFS_ITRUNC_FINISH2      10
+#define        XFS_CTRUNC1             11
+#define        XFS_CTRUNC2             12
+#define        XFS_CTRUNC3             13
+#define        XFS_CTRUNC4             14
+#define        XFS_CTRUNC5             15
+#define        XFS_CTRUNC6             16
+#define        XFS_BUNMAPI             17
+#define        XFS_INVAL_CACHED        18
+#define        XFS_DIORD_ENTER         19
+#define        XFS_DIOWR_ENTER         20
+#define        XFS_SENDFILE_ENTER      21
+#define        XFS_WRITEPAGE_ENTER     22
+#define        XFS_RELEASEPAGE_ENTER   23
+#define        XFS_IOMAP_ALLOC_ENTER   24
+#define        XFS_IOMAP_ALLOC_MAP     25
+#define        XFS_IOMAP_UNWRITTEN     26
+extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
+                       const struct iovec *, size_t, loff_t, int);
+extern void xfs_inval_cached_trace(struct xfs_iocore *,
+                       xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t);
+#else
+#define xfs_rw_enter_trace(tag, io, iovec, segs, offset, ioflags)
+#define xfs_inval_cached_trace(io, offset, len, first, last)
+#endif
+
+/*
+ * Maximum count of bmaps used by read and write paths.
+ */
+#define        XFS_MAX_RW_NBMAPS       4
+
+extern int xfs_bmap(struct bhv_desc *, xfs_off_t, ssize_t, int,
+                       struct xfs_iomap *, int *);
+extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
+extern int xfs_bdstrat_cb(struct xfs_buf *);
+
+extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
+                               xfs_fsize_t, xfs_fsize_t);
+extern void xfs_inval_cached_pages(struct vnode        *, struct xfs_iocore *,
+                               xfs_off_t, int, int);
+extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
+                               const struct iovec *, unsigned int,
+                               loff_t *, int, struct cred *);
+extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
+                               const struct iovec *, unsigned int,
+                               loff_t *, int, struct cred *);
+extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
+                               loff_t *, int, size_t, read_actor_t,
+                               void *, struct cred *);
+
+extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
+
+#define XFS_FSB_TO_DB_IO(io,fsb) \
+               (((io)->io_flags & XFS_IOCORE_RT) ? \
+                XFS_FSB_TO_BB((io)->io_mount, (fsb)) : \
+                XFS_FSB_TO_DADDR((io)->io_mount, (fsb)))
+
+#endif /* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux/xfs_stats.c b/fs/xfs/linux/xfs_stats.c
new file mode 100644 (file)
index 0000000..b7de296
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include <linux/proc_fs.h>
+
+DEFINE_PER_CPU(struct xfsstats, xfsstats);
+
+STATIC int
+xfs_read_xfsstats(
+       char            *buffer,
+       char            **start,
+       off_t           offset,
+       int             count,
+       int             *eof,
+       void            *data)
+{
+       int             c, i, j, len, val;
+       __uint64_t      xs_xstrat_bytes = 0;
+       __uint64_t      xs_write_bytes = 0;
+       __uint64_t      xs_read_bytes = 0;
+
+       static struct xstats_entry {
+               char    *desc;
+               int     endpoint;
+       } xstats[] = {
+               { "extent_alloc",       XFSSTAT_END_EXTENT_ALLOC        },
+               { "abt",                XFSSTAT_END_ALLOC_BTREE         },
+               { "blk_map",            XFSSTAT_END_BLOCK_MAPPING       },
+               { "bmbt",               XFSSTAT_END_BLOCK_MAP_BTREE     },
+               { "dir",                XFSSTAT_END_DIRECTORY_OPS       },
+               { "trans",              XFSSTAT_END_TRANSACTIONS        },
+               { "ig",                 XFSSTAT_END_INODE_OPS           },
+               { "log",                XFSSTAT_END_LOG_OPS             },
+               { "push_ail",           XFSSTAT_END_TAIL_PUSHING        },
+               { "xstrat",             XFSSTAT_END_WRITE_CONVERT       },
+               { "rw",                 XFSSTAT_END_READ_WRITE_OPS      },
+               { "attr",               XFSSTAT_END_ATTRIBUTE_OPS       },
+               { "icluster",           XFSSTAT_END_INODE_CLUSTER       },
+               { "vnodes",             XFSSTAT_END_VNODE_OPS           },
+               { "buf",                XFSSTAT_END_BUF                 },
+       };
+
+       /* Loop over all stats groups */
+       for (i=j=len = 0; i < sizeof(xstats)/sizeof(struct xstats_entry); i++) {
+               len += sprintf(buffer + len, xstats[i].desc);
+               /* inner loop does each group */
+               while (j < xstats[i].endpoint) {
+                       val = 0;
+                       /* sum over all cpus */
+                       for (c = 0; c < NR_CPUS; c++) {
+                               if (!cpu_possible(c)) continue;
+                               val += *(((__u32*)&per_cpu(xfsstats, c) + j));
+                       }
+                       len += sprintf(buffer + len, " %u", val);
+                       j++;
+               }
+               buffer[len++] = '\n';
+       }
+       /* extra precision counters */
+       for (i = 0; i < NR_CPUS; i++) {
+               if (!cpu_possible(i)) continue;
+               xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
+               xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
+               xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
+       }
+
+       len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n",
+                       xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
+       len += sprintf(buffer + len, "debug %u\n",
+#if defined(XFSDEBUG)
+               1);
+#else
+               0);
+#endif
+
+       if (offset >= len) {
+               *start = buffer;
+               *eof = 1;
+               return 0;
+       }
+       *start = buffer + offset;
+       if ((len -= offset) > count)
+               return count;
+       *eof = 1;
+
+       return len;
+}
+
+void
+xfs_init_procfs(void)
+{
+       if (!proc_mkdir("fs/xfs", 0))
+               return;
+       create_proc_read_entry("fs/xfs/stat", 0, 0, xfs_read_xfsstats, NULL);
+}
+
+void
+xfs_cleanup_procfs(void)
+{
+       remove_proc_entry("fs/xfs/stat", NULL);
+       remove_proc_entry("fs/xfs", NULL);
+}
diff --git a/fs/xfs/linux/xfs_stats.h b/fs/xfs/linux/xfs_stats.h
new file mode 100644 (file)
index 0000000..0456600
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_STATS_H__
+#define __XFS_STATS_H__
+
+
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+
+#include <linux/percpu.h>
+
+/*
+ * XFS global statistics
+ */
+struct xfsstats {
+# define XFSSTAT_END_EXTENT_ALLOC      4
+       __uint32_t              xs_allocx;
+       __uint32_t              xs_allocb;
+       __uint32_t              xs_freex;
+       __uint32_t              xs_freeb;
+# define XFSSTAT_END_ALLOC_BTREE       (XFSSTAT_END_EXTENT_ALLOC+4)
+       __uint32_t              xs_abt_lookup;
+       __uint32_t              xs_abt_compare;
+       __uint32_t              xs_abt_insrec;
+       __uint32_t              xs_abt_delrec;
+# define XFSSTAT_END_BLOCK_MAPPING     (XFSSTAT_END_ALLOC_BTREE+7)
+       __uint32_t              xs_blk_mapr;
+       __uint32_t              xs_blk_mapw;
+       __uint32_t              xs_blk_unmap;
+       __uint32_t              xs_add_exlist;
+       __uint32_t              xs_del_exlist;
+       __uint32_t              xs_look_exlist;
+       __uint32_t              xs_cmp_exlist;
+# define XFSSTAT_END_BLOCK_MAP_BTREE   (XFSSTAT_END_BLOCK_MAPPING+4)
+       __uint32_t              xs_bmbt_lookup;
+       __uint32_t              xs_bmbt_compare;
+       __uint32_t              xs_bmbt_insrec;
+       __uint32_t              xs_bmbt_delrec;
+# define XFSSTAT_END_DIRECTORY_OPS     (XFSSTAT_END_BLOCK_MAP_BTREE+4)
+       __uint32_t              xs_dir_lookup;
+       __uint32_t              xs_dir_create;
+       __uint32_t              xs_dir_remove;
+       __uint32_t              xs_dir_getdents;
+# define XFSSTAT_END_TRANSACTIONS      (XFSSTAT_END_DIRECTORY_OPS+3)
+       __uint32_t              xs_trans_sync;
+       __uint32_t              xs_trans_async;
+       __uint32_t              xs_trans_empty;
+# define XFSSTAT_END_INODE_OPS         (XFSSTAT_END_TRANSACTIONS+7)
+       __uint32_t              xs_ig_attempts;
+       __uint32_t              xs_ig_found;
+       __uint32_t              xs_ig_frecycle;
+       __uint32_t              xs_ig_missed;
+       __uint32_t              xs_ig_dup;
+       __uint32_t              xs_ig_reclaims;
+       __uint32_t              xs_ig_attrchg;
+# define XFSSTAT_END_LOG_OPS           (XFSSTAT_END_INODE_OPS+5)
+       __uint32_t              xs_log_writes;
+       __uint32_t              xs_log_blocks;
+       __uint32_t              xs_log_noiclogs;
+       __uint32_t              xs_log_force;
+       __uint32_t              xs_log_force_sleep;
+# define XFSSTAT_END_TAIL_PUSHING      (XFSSTAT_END_LOG_OPS+10)
+       __uint32_t              xs_try_logspace;
+       __uint32_t              xs_sleep_logspace;
+       __uint32_t              xs_push_ail;
+       __uint32_t              xs_push_ail_success;
+       __uint32_t              xs_push_ail_pushbuf;
+       __uint32_t              xs_push_ail_pinned;
+       __uint32_t              xs_push_ail_locked;
+       __uint32_t              xs_push_ail_flushing;
+       __uint32_t              xs_push_ail_restarts;
+       __uint32_t              xs_push_ail_flush;
+# define XFSSTAT_END_WRITE_CONVERT     (XFSSTAT_END_TAIL_PUSHING+2)
+       __uint32_t              xs_xstrat_quick;
+       __uint32_t              xs_xstrat_split;
+# define XFSSTAT_END_READ_WRITE_OPS    (XFSSTAT_END_WRITE_CONVERT+2)
+       __uint32_t              xs_write_calls;
+       __uint32_t              xs_read_calls;
+# define XFSSTAT_END_ATTRIBUTE_OPS     (XFSSTAT_END_READ_WRITE_OPS+4)
+       __uint32_t              xs_attr_get;
+       __uint32_t              xs_attr_set;
+       __uint32_t              xs_attr_remove;
+       __uint32_t              xs_attr_list;
+# define XFSSTAT_END_INODE_CLUSTER     (XFSSTAT_END_ATTRIBUTE_OPS+3)
+       __uint32_t              xs_iflush_count;
+       __uint32_t              xs_icluster_flushcnt;
+       __uint32_t              xs_icluster_flushinode;
+# define XFSSTAT_END_VNODE_OPS         (XFSSTAT_END_INODE_CLUSTER+8)
+       __uint32_t              vn_active;      /* # vnodes not on free lists */
+       __uint32_t              vn_alloc;       /* # times vn_alloc called */
+       __uint32_t              vn_get;         /* # times vn_get called */
+       __uint32_t              vn_hold;        /* # times vn_hold called */
+       __uint32_t              vn_rele;        /* # times vn_rele called */
+       __uint32_t              vn_reclaim;     /* # times vn_reclaim called */
+       __uint32_t              vn_remove;      /* # times vn_remove called */
+       __uint32_t              vn_free;        /* # times vn_free called */
+#define XFSSTAT_END_BUF                        (XFSSTAT_END_VNODE_OPS+9)
+       __uint32_t              pb_get;
+       __uint32_t              pb_create;
+       __uint32_t              pb_get_locked;
+       __uint32_t              pb_get_locked_waited;
+       __uint32_t              pb_busy_locked;
+       __uint32_t              pb_miss_locked;
+       __uint32_t              pb_page_retries;
+       __uint32_t              pb_page_found;
+       __uint32_t              pb_get_read;
+/* Extra precision counters */
+       __uint64_t              xs_xstrat_bytes;
+       __uint64_t              xs_write_bytes;
+       __uint64_t              xs_read_bytes;
+};
+
+DECLARE_PER_CPU(struct xfsstats, xfsstats);
+
+/* We don't disable preempt, not too worried about poking the
+ * wrong cpu's stat for now */
+#define XFS_STATS_INC(count)           (__get_cpu_var(xfsstats).count++)
+#define XFS_STATS_DEC(count)           (__get_cpu_var(xfsstats).count--)
+#define XFS_STATS_ADD(count, inc)      (__get_cpu_var(xfsstats).count += (inc))
+
+extern void xfs_init_procfs(void);
+extern void xfs_cleanup_procfs(void);
+
+
+#else  /* !CONFIG_PROC_FS */
+
+# define XFS_STATS_INC(count)
+# define XFS_STATS_DEC(count)
+# define XFS_STATS_ADD(count, inc)
+
+static __inline void xfs_init_procfs(void) { };
+static __inline void xfs_cleanup_procfs(void) { };
+
+#endif /* !CONFIG_PROC_FS */
+
+#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c
new file mode 100644 (file)
index 0000000..bbaf61b
--- /dev/null
@@ -0,0 +1,850 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_clnt.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_version.h"
+
+#include <linux/namei.h>
+#include <linux/init.h>
+#include <linux/mount.h>
+#include <linux/suspend.h>
+
+STATIC struct quotactl_ops linvfs_qops;
+STATIC struct super_operations linvfs_sops;
+STATIC struct export_operations linvfs_export_ops;
+STATIC kmem_cache_t * linvfs_inode_cachep;
+
+STATIC struct xfs_mount_args *
+xfs_args_allocate(
+       struct super_block      *sb)
+{
+       struct xfs_mount_args   *args;
+
+       args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP);
+       args->logbufs = args->logbufsize = -1;
+       strncpy(args->fsname, sb->s_id, MAXNAMELEN);
+
+       /* Copy the already-parsed mount(2) flags we're interested in */
+       if (sb->s_flags & MS_NOATIME)
+               args->flags |= XFSMNT_NOATIME;
+
+       /* Default to 32 bit inodes on Linux all the time */
+       args->flags |= XFSMNT_32BITINODES;
+
+       return args;
+}
+
+__uint64_t
+xfs_max_file_offset(
+       unsigned int            blockshift)
+{
+       unsigned int            pagefactor = 1;
+       unsigned int            bitshift = BITS_PER_LONG - 1;
+
+       /* Figure out maximum filesize, on Linux this can depend on
+        * the filesystem blocksize (on 32 bit platforms).
+        * __block_prepare_write does this in an [unsigned] long...
+        *      page->index << (PAGE_CACHE_SHIFT - bbits)
+        * So, for page sized blocks (4K on 32 bit platforms),
+        * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
+        *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
+        * but for smaller blocksizes it is less (bbits = log2 bsize).
+        * Note1: get_block_t takes a long (implicit cast from above)
+        * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
+        * can optionally convert the [unsigned] long from above into
+        * an [unsigned] long long.
+        */
+
+#if BITS_PER_LONG == 32
+# if defined(CONFIG_LBD)
+       ASSERT(sizeof(sector_t) == 8);
+       pagefactor = PAGE_CACHE_SIZE;
+       bitshift = BITS_PER_LONG;
+# else
+       pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+# endif
+#endif
+
+       return (((__uint64_t)pagefactor) << bitshift) - 1;
+}
+
+STATIC __inline__ void
+xfs_set_inodeops(
+       struct inode            *inode)
+{
+       vnode_t                 *vp = LINVFS_GET_VP(inode);
+
+       if (vp->v_type == VNON) {
+               make_bad_inode(inode);
+       } else if (S_ISREG(inode->i_mode)) {
+               inode->i_op = &linvfs_file_inode_operations;
+               inode->i_fop = &linvfs_file_operations;
+               inode->i_mapping->a_ops = &linvfs_aops;
+       } else if (S_ISDIR(inode->i_mode)) {
+               inode->i_op = &linvfs_dir_inode_operations;
+               inode->i_fop = &linvfs_dir_operations;
+       } else if (S_ISLNK(inode->i_mode)) {
+               inode->i_op = &linvfs_symlink_inode_operations;
+               if (inode->i_blocks)
+                       inode->i_mapping->a_ops = &linvfs_aops;
+       } else {
+               inode->i_op = &linvfs_file_inode_operations;
+               init_special_inode(inode, inode->i_mode, inode->i_rdev);
+       }
+}
+
+STATIC __inline__ void
+xfs_revalidate_inode(
+       xfs_mount_t             *mp,
+       vnode_t                 *vp,
+       xfs_inode_t             *ip)
+{
+       struct inode            *inode = LINVFS_GET_IP(vp);
+
+       inode->i_mode   = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type);
+       inode->i_nlink  = ip->i_d.di_nlink;
+       inode->i_uid    = ip->i_d.di_uid;
+       inode->i_gid    = ip->i_d.di_gid;
+       if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
+               inode->i_rdev = 0;
+       } else {
+               xfs_dev_t dev = ip->i_df.if_u2.if_rdev;
+               inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
+       }
+       inode->i_blksize = PAGE_CACHE_SIZE;
+       inode->i_generation = ip->i_d.di_gen;
+       i_size_write(inode, ip->i_d.di_size);
+       inode->i_blocks =
+               XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+       inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
+       inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
+       inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
+       inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
+       inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
+       inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
+       if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+               inode->i_flags |= S_IMMUTABLE;
+       else
+               inode->i_flags &= ~S_IMMUTABLE;
+       if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+               inode->i_flags |= S_APPEND;
+       else
+               inode->i_flags &= ~S_APPEND;
+       if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
+               inode->i_flags |= S_SYNC;
+       else
+               inode->i_flags &= ~S_SYNC;
+       if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
+               inode->i_flags |= S_NOATIME;
+       else
+               inode->i_flags &= ~S_NOATIME;
+       vp->v_flag &= ~VMODIFIED;
+}
+
+void
+xfs_initialize_vnode(
+       bhv_desc_t              *bdp,
+       vnode_t                 *vp,
+       bhv_desc_t              *inode_bhv,
+       int                     unlock)
+{
+       xfs_inode_t             *ip = XFS_BHVTOI(inode_bhv);
+       struct inode            *inode = LINVFS_GET_IP(vp);
+
+       if (!inode_bhv->bd_vobj) {
+               vp->v_vfsp = bhvtovfs(bdp);
+               bhv_desc_init(inode_bhv, ip, vp, &xfs_vnodeops);
+               bhv_insert(VN_BHV_HEAD(vp), inode_bhv);
+       }
+
+       vp->v_type = IFTOVT(ip->i_d.di_mode);
+
+       /* Have we been called during the new inode create process,
+        * in which case we are too early to fill in the Linux inode.
+        */
+       if (vp->v_type == VNON)
+               return;
+
+       xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
+
+       /* For new inodes we need to set the ops vectors,
+        * and unlock the inode.
+        */
+       if (unlock && (inode->i_state & I_NEW)) {
+               xfs_set_inodeops(inode);
+               unlock_new_inode(inode);
+       }
+}
+
+void
+xfs_flush_inode(
+       xfs_inode_t     *ip)
+{
+       struct inode    *inode = LINVFS_GET_IP(XFS_ITOV(ip));
+
+       filemap_flush(inode->i_mapping);
+}
+
+void
+xfs_flush_device(
+       xfs_inode_t     *ip)
+{
+       sync_blockdev(XFS_ITOV(ip)->v_vfsp->vfs_super->s_bdev);
+       xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
+}
+
+int
+xfs_blkdev_get(
+       xfs_mount_t             *mp,
+       const char              *name,
+       struct block_device     **bdevp)
+{
+       int                     error = 0;
+
+       *bdevp = open_bdev_excl(name, 0, mp);
+       if (IS_ERR(*bdevp)) {
+               error = PTR_ERR(*bdevp);
+               printk("XFS: Invalid device [%s], error=%d\n", name, error);
+       }
+
+       return -error;
+}
+
+void
+xfs_blkdev_put(
+       struct block_device     *bdev)
+{
+       if (bdev)
+               close_bdev_excl(bdev);
+}
+
+
+STATIC struct inode *
+linvfs_alloc_inode(
+       struct super_block      *sb)
+{
+       vnode_t                 *vp;
+
+       vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_cachep, 
+                kmem_flags_convert(KM_SLEEP));
+       if (!vp)
+               return NULL;
+       return LINVFS_GET_IP(vp);
+}
+
+STATIC void
+linvfs_destroy_inode(
+       struct inode            *inode)
+{
+       kmem_cache_free(linvfs_inode_cachep, LINVFS_GET_VP(inode));
+}
+
+STATIC void
+init_once(
+       void                    *data,
+       kmem_cache_t            *cachep,
+       unsigned long           flags)
+{
+       vnode_t                 *vp = (vnode_t *)data;
+
+       if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+           SLAB_CTOR_CONSTRUCTOR)
+               inode_init_once(LINVFS_GET_IP(vp));
+}
+
+STATIC int
+init_inodecache( void )
+{
+       linvfs_inode_cachep = kmem_cache_create("linvfs_icache",
+                               sizeof(vnode_t), 0,
+                               SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
+                               init_once, NULL);
+
+       if (linvfs_inode_cachep == NULL)
+               return -ENOMEM;
+       return 0;
+}
+
+STATIC void
+destroy_inodecache( void )
+{
+       if (kmem_cache_destroy(linvfs_inode_cachep))
+               printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__);
+}
+
+/*
+ * Attempt to flush the inode, this will actually fail
+ * if the inode is pinned, but we dirty the inode again
+ * at the point when it is unpinned after a log write,
+ * since this is when the inode itself becomes flushable. 
+ */
+STATIC void
+linvfs_write_inode(
+       struct inode            *inode,
+       int                     sync)
+{
+       vnode_t                 *vp = LINVFS_GET_VP(inode);
+       int                     error, flags = FLUSH_INODE;
+
+       if (vp) {
+               vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+               if (sync)
+                       flags |= FLUSH_SYNC;
+               VOP_IFLUSH(vp, flags, error);
+       }
+}
+
+STATIC void
+linvfs_clear_inode(
+       struct inode            *inode)
+{
+       vnode_t                 *vp = LINVFS_GET_VP(inode);
+
+       if (vp) {
+               vn_rele(vp);
+               vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+               /*
+                * Do all our cleanup, and remove this vnode.
+                */
+               vn_remove(vp);
+       }
+}
+
+
+#define SYNCD_FLAGS    (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)
+
+STATIC int
+xfssyncd(
+       void                    *arg)
+{
+       vfs_t                   *vfsp = (vfs_t *) arg;
+       int                     error;
+
+       daemonize("xfssyncd");
+
+       vfsp->vfs_sync_task = current;
+       wmb();
+       wake_up(&vfsp->vfs_wait_sync_task);
+
+       for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(xfs_syncd_interval);
+               /* swsusp */
+               if (current->flags & PF_FREEZE)
+                       refrigerator(PF_FREEZE);
+               if (vfsp->vfs_flag & VFS_UMOUNT)
+                       break;
+               if (vfsp->vfs_flag & VFS_RDONLY)
+                       continue;
+               VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error);
+       }
+
+       vfsp->vfs_sync_task = NULL;
+       wmb();
+       wake_up(&vfsp->vfs_wait_sync_task);
+
+       return 0;
+}
+
+STATIC int
+linvfs_start_syncd(
+       vfs_t                   *vfsp)
+{
+       int                     pid;
+
+       pid = kernel_thread(xfssyncd, (void *) vfsp,
+                       CLONE_VM | CLONE_FS | CLONE_FILES);
+       if (pid < 0)
+               return -pid;
+       wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task);
+       return 0;
+}
+
+STATIC void
+linvfs_stop_syncd(
+       vfs_t                   *vfsp)
+{
+       vfsp->vfs_flag |= VFS_UMOUNT;
+       wmb();
+
+       wake_up_process(vfsp->vfs_sync_task);
+       wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task);
+}
+
+STATIC void
+linvfs_put_super(
+       struct super_block      *sb)
+{
+       vfs_t                   *vfsp = LINVFS_GET_VFS(sb);
+       int                     error;
+
+       linvfs_stop_syncd(vfsp);
+       VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error);
+       if (!error)
+               VFS_UNMOUNT(vfsp, 0, NULL, error);
+       if (error) {
+               printk("XFS unmount got error %d\n", error);
+               printk("%s: vfsp/0x%p left dangling!\n", __FUNCTION__, vfsp);
+               return;
+       }
+
+       vfs_deallocate(vfsp);
+}
+
+STATIC void
+linvfs_write_super(
+       struct super_block      *sb)
+{
+       vfs_t                   *vfsp = LINVFS_GET_VFS(sb);
+       int                     error;
+
+       if (sb->s_flags & MS_RDONLY) {
+               sb->s_dirt = 0; /* paranoia */
+               return;
+       }
+       /* Push the log and superblock a little */
+       VFS_SYNC(vfsp, SYNC_FSDATA, NULL, error);
+       sb->s_dirt = 0;
+}
+
+STATIC int
+linvfs_sync_super(
+       struct super_block      *sb,
+       int                     wait)
+{
+       vfs_t           *vfsp = LINVFS_GET_VFS(sb);
+       int             error;
+       int             flags = SYNC_FSDATA;
+
+       if (wait)
+               flags |= SYNC_WAIT;
+
+       VFS_SYNC(vfsp, flags, NULL, error);
+       sb->s_dirt = 0;
+
+       return -error;
+}
+
+STATIC int
+linvfs_statfs(
+       struct super_block      *sb,
+       struct kstatfs          *statp)
+{
+       vfs_t                   *vfsp = LINVFS_GET_VFS(sb);
+       int                     error;
+
+       VFS_STATVFS(vfsp, statp, NULL, error);
+       return -error;
+}
+
+STATIC int
+linvfs_remount(
+       struct super_block      *sb,
+       int                     *flags,
+       char                    *options)
+{
+       vfs_t                   *vfsp = LINVFS_GET_VFS(sb);
+       struct xfs_mount_args   *args = xfs_args_allocate(sb);
+       int                     error;
+
+       VFS_PARSEARGS(vfsp, options, args, 1, error);
+       if (!error)
+               VFS_MNTUPDATE(vfsp, flags, args, error);
+       kmem_free(args, sizeof(*args));
+       return -error;
+}
+
+STATIC void
+linvfs_freeze_fs(
+       struct super_block      *sb)
+{
+       VFS_FREEZE(LINVFS_GET_VFS(sb));
+}
+
+STATIC struct dentry *
+linvfs_get_parent(
+       struct dentry           *child)
+{
+       int                     error;
+       vnode_t                 *vp, *cvp;
+       struct dentry           *parent;
+       struct inode            *ip = NULL;
+       struct dentry           dotdot;
+
+       dotdot.d_name.name = "..";
+       dotdot.d_name.len = 2;
+       dotdot.d_inode = 0;
+
+       cvp = NULL;
+       vp = LINVFS_GET_VP(child->d_inode);
+       VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error);
+
+       if (!error) {
+               ASSERT(cvp);
+               ip = LINVFS_GET_IP(cvp);
+               if (!ip) {
+                       VN_RELE(cvp);
+                       return ERR_PTR(-EACCES);
+               }
+       }
+       if (error)
+               return ERR_PTR(-error);
+       parent = d_alloc_anon(ip);
+       if (!parent) {
+               VN_RELE(cvp);
+               parent = ERR_PTR(-ENOMEM);
+       }
+       return parent;
+}
+
+STATIC struct dentry *
+linvfs_get_dentry(
+       struct super_block      *sb,
+       void                    *data)
+{
+       vnode_t                 *vp;
+       struct inode            *inode;
+       struct dentry           *result;
+       xfs_fid2_t              xfid;
+       vfs_t                   *vfsp = LINVFS_GET_VFS(sb);
+       int                     error;
+
+       xfid.fid_len = sizeof(xfs_fid2_t) - sizeof(xfid.fid_len);
+       xfid.fid_pad = 0;
+       xfid.fid_gen = ((__u32 *)data)[1];
+       xfid.fid_ino = ((__u32 *)data)[0];
+
+       VFS_VGET(vfsp, &vp, (fid_t *)&xfid, error);
+       if (error || vp == NULL)
+               return ERR_PTR(-ESTALE) ;
+
+       inode = LINVFS_GET_IP(vp);
+       result = d_alloc_anon(inode);
+        if (!result) {
+               iput(inode);
+               return ERR_PTR(-ENOMEM);
+       }
+       return result;
+}
+
+STATIC int
+linvfs_show_options(
+       struct seq_file         *m,
+       struct vfsmount         *mnt)
+{
+       struct vfs              *vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
+       int                     error;
+
+       VFS_SHOWARGS(vfsp, m, error);
+       return error;
+}
+
+STATIC int
+linvfs_getxstate(
+       struct super_block      *sb,
+       struct fs_quota_stat    *fqs)
+{
+       struct vfs              *vfsp = LINVFS_GET_VFS(sb);
+       int                     error;
+
+       VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
+       return -error;
+}
+
+STATIC int
+linvfs_setxstate(
+       struct super_block      *sb,
+       unsigned int            flags,
+       int                     op)
+{
+       struct vfs              *vfsp = LINVFS_GET_VFS(sb);
+       int                     error;
+
+       VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
+       return -error;
+}
+
+STATIC int
+linvfs_getxquota(
+       struct super_block      *sb,
+       int                     type,
+       qid_t                   id,
+       struct fs_disk_quota    *fdq)
+{
+       struct vfs              *vfsp = LINVFS_GET_VFS(sb);
+       int                     error, getmode;
+
+       getmode = (type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETQUOTA;
+       VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error);
+       return -error;
+}
+
+STATIC int
+linvfs_setxquota(
+       struct super_block      *sb,
+       int                     type,
+       qid_t                   id,
+       struct fs_disk_quota    *fdq)
+{
+       struct vfs              *vfsp = LINVFS_GET_VFS(sb);
+       int                     error, setmode;
+
+       setmode = (type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETQLIM;
+       VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error);
+       return -error;
+}
+
+STATIC int
+linvfs_fill_super(
+       struct super_block      *sb,
+       void                    *data,
+       int                     silent)
+{
+       vnode_t                 *rootvp;
+       struct vfs              *vfsp = vfs_allocate();
+       struct xfs_mount_args   *args = xfs_args_allocate(sb);
+       struct kstatfs          statvfs;
+       int                     error, error2;
+
+       vfsp->vfs_super = sb;
+       LINVFS_SET_VFS(sb, vfsp);
+       if (sb->s_flags & MS_RDONLY)
+               vfsp->vfs_flag |= VFS_RDONLY;
+       bhv_insert_all_vfsops(vfsp);
+
+       VFS_PARSEARGS(vfsp, (char *)data, args, 0, error);
+       if (error) {
+               bhv_remove_all_vfsops(vfsp, 1);
+               goto fail_vfsop;
+       }
+
+       sb_min_blocksize(sb, BBSIZE);
+       sb->s_export_op = &linvfs_export_ops;
+       sb->s_qcop = &linvfs_qops;
+       sb->s_op = &linvfs_sops;
+
+       VFS_MOUNT(vfsp, args, NULL, error);
+       if (error) {
+               bhv_remove_all_vfsops(vfsp, 1);
+               goto fail_vfsop;
+       }
+
+       VFS_STATVFS(vfsp, &statvfs, NULL, error);
+       if (error)
+               goto fail_unmount;
+
+       sb->s_dirt = 1;
+       sb->s_magic = statvfs.f_type;
+       sb->s_blocksize = statvfs.f_bsize;
+       sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1;
+       sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
+       set_posix_acl_flag(sb);
+
+       VFS_ROOT(vfsp, &rootvp, error);
+       if (error)
+               goto fail_unmount;
+
+       sb->s_root = d_alloc_root(LINVFS_GET_IP(rootvp));
+       if (!sb->s_root) {
+               error = ENOMEM;
+               goto fail_vnrele;
+       }
+       if (is_bad_inode(sb->s_root->d_inode)) {
+               error = EINVAL;
+               goto fail_vnrele;
+       }
+       if ((error = linvfs_start_syncd(vfsp)))
+               goto fail_vnrele;
+       vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address);
+
+       kmem_free(args, sizeof(*args));
+       return 0;
+
+fail_vnrele:
+       if (sb->s_root) {
+               dput(sb->s_root);
+               sb->s_root = NULL;
+       } else {
+               VN_RELE(rootvp);
+       }
+
+fail_unmount:
+       VFS_UNMOUNT(vfsp, 0, NULL, error2);
+
+fail_vfsop:
+       vfs_deallocate(vfsp);
+       kmem_free(args, sizeof(*args));
+       return -error;
+}
+
+STATIC struct super_block *
+linvfs_get_sb(
+       struct file_system_type *fs_type,
+       int                     flags,
+       const char              *dev_name,
+       void                    *data)
+{
+       return get_sb_bdev(fs_type, flags, dev_name, data, linvfs_fill_super);
+}
+
+
+STATIC struct export_operations linvfs_export_ops = {
+       .get_parent             = linvfs_get_parent,
+       .get_dentry             = linvfs_get_dentry,
+};
+
+STATIC struct super_operations linvfs_sops = {
+       .alloc_inode            = linvfs_alloc_inode,
+       .destroy_inode          = linvfs_destroy_inode,
+       .write_inode            = linvfs_write_inode,
+       .clear_inode            = linvfs_clear_inode,
+       .put_super              = linvfs_put_super,
+       .write_super            = linvfs_write_super,
+       .sync_fs                = linvfs_sync_super,
+       .write_super_lockfs     = linvfs_freeze_fs,
+       .statfs                 = linvfs_statfs,
+       .remount_fs             = linvfs_remount,
+       .show_options           = linvfs_show_options,
+};
+
+STATIC struct quotactl_ops linvfs_qops = {
+       .get_xstate             = linvfs_getxstate,
+       .set_xstate             = linvfs_setxstate,
+       .get_xquota             = linvfs_getxquota,
+       .set_xquota             = linvfs_setxquota,
+};
+
+STATIC struct file_system_type xfs_fs_type = {
+       .owner                  = THIS_MODULE,
+       .name                   = "xfs",
+       .get_sb                 = linvfs_get_sb,
+       .kill_sb                = kill_block_super,
+       .fs_flags               = FS_REQUIRES_DEV,
+};
+
+
+STATIC int __init
+init_xfs_fs( void )
+{
+       int                     error;
+       struct sysinfo          si;
+       static char             message[] __initdata = KERN_INFO \
+               XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n";
+
+       printk(message);
+
+       si_meminfo(&si);
+       xfs_physmem = si.totalram;
+
+       ktrace_init(64);
+
+       error = init_inodecache();
+       if (error < 0)
+               goto undo_inodecache;
+
+       error = pagebuf_init();
+       if (error < 0)
+               goto undo_pagebuf;
+
+       vn_init();
+       xfs_init();
+       uuid_init();
+       vfs_initdmapi();
+       vfs_initquota();
+
+       error = register_filesystem(&xfs_fs_type);
+       if (error)
+               goto undo_register;
+       return 0;
+
+undo_register:
+       pagebuf_terminate();
+
+undo_pagebuf:
+       destroy_inodecache();
+
+undo_inodecache:
+       return error;
+}
+
+STATIC void __exit
+exit_xfs_fs( void )
+{
+       vfs_exitquota();
+       vfs_exitdmapi();
+       unregister_filesystem(&xfs_fs_type);
+       xfs_cleanup();
+       pagebuf_terminate();
+       destroy_inodecache();
+       ktrace_uninit();
+}
+
+module_init(init_xfs_fs);
+module_exit(exit_xfs_fs);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
+MODULE_LICENSE("GPL");
diff --git a/fs/xfs/linux/xfs_super.h b/fs/xfs/linux/xfs_super.h
new file mode 100644 (file)
index 0000000..5576269
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPER_H__
+#define __XFS_SUPER_H__
+
+#ifdef CONFIG_XFS_DMAPI
+# define vfs_insertdmapi(vfs)  vfs_insertops(vfsp, &xfs_dmops)
+# define vfs_initdmapi()       dmapi_init()
+# define vfs_exitdmapi()       dmapi_uninit()
+#else
+# define vfs_insertdmapi(vfs)  do { } while (0)
+# define vfs_initdmapi()       do { } while (0)
+# define vfs_exitdmapi()       do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_QUOTA
+# define vfs_insertquota(vfs)  vfs_insertops(vfsp, &xfs_qmops)
+extern void xfs_qm_init(void);
+extern void xfs_qm_exit(void);
+# define vfs_initquota()       xfs_qm_init()
+# define vfs_exitquota()       xfs_qm_exit()
+#else
+# define vfs_insertquota(vfs)  do { } while (0)
+# define vfs_initquota()       do { } while (0)
+# define vfs_exitquota()       do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_POSIX_ACL
+# define XFS_ACL_STRING                "ACLs, "
+# define set_posix_acl_flag(sb)        ((sb)->s_flags |= MS_POSIXACL)
+#else
+# define XFS_ACL_STRING
+# define set_posix_acl_flag(sb)        do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_SECURITY
+# define XFS_SECURITY_STRING   "security attributes, "
+# define ENOSECURITY           0
+#else
+# define XFS_SECURITY_STRING
+# define ENOSECURITY           EOPNOTSUPP
+#endif
+
+#ifdef CONFIG_XFS_RT
+# define XFS_REALTIME_STRING   "realtime, "
+#else
+# define XFS_REALTIME_STRING
+#endif
+
+#if XFS_BIG_BLKNOS
+# if XFS_BIG_INUMS
+#  define XFS_BIGFS_STRING     "large block/inode numbers, "
+# else
+#  define XFS_BIGFS_STRING     "large block numbers, "
+# endif
+#else
+# define XFS_BIGFS_STRING
+#endif
+
+#ifdef CONFIG_XFS_TRACE
+# define XFS_TRACE_STRING      "tracing, "
+#else
+# define XFS_TRACE_STRING
+#endif
+
+#ifdef XFSDEBUG
+# define XFS_DBG_STRING                "debug"
+#else
+# define XFS_DBG_STRING                "no debug"
+#endif
+
+#define XFS_BUILD_OPTIONS      XFS_ACL_STRING \
+                               XFS_SECURITY_STRING \
+                               XFS_REALTIME_STRING \
+                               XFS_BIGFS_STRING \
+                               XFS_TRACE_STRING \
+                               XFS_DBG_STRING /* DBG must be last */
+
+#define LINVFS_GET_VFS(s) \
+       (vfs_t *)((s)->s_fs_info)
+#define LINVFS_SET_VFS(s, vfsp) \
+       ((s)->s_fs_info = vfsp)
+
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_buftarg;
+struct block_device;
+
+extern __uint64_t xfs_max_file_offset(unsigned int);
+
+extern void xfs_initialize_vnode(bhv_desc_t *, vnode_t *, bhv_desc_t *, int);
+
+extern void xfs_flush_inode(struct xfs_inode *);
+extern void xfs_flush_device(struct xfs_inode *);
+
+extern int  xfs_blkdev_get(struct xfs_mount *, const char *,
+                               struct block_device **);
+extern void xfs_blkdev_put(struct block_device *);
+
+#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/linux/xfs_sysctl.c b/fs/xfs/linux/xfs_sysctl.c
new file mode 100644 (file)
index 0000000..b9a97c9
--- /dev/null
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_rw.h"
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+
+
+static struct ctl_table_header *xfs_table_header;
+
+
+#ifdef CONFIG_PROC_FS
+STATIC int
+xfs_stats_clear_proc_handler(
+       ctl_table       *ctl,
+       int             write,
+       struct file     *filp,
+       void            *buffer,
+       size_t          *lenp)
+{
+       int             c, ret, *valp = ctl->data;
+       __uint32_t      vn_active;
+
+       ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp);
+
+       if (!ret && write && *valp) {
+               printk("XFS Clearing xfsstats\n");
+               for (c = 0; c < NR_CPUS; c++) {
+                       if (!cpu_possible(c)) continue;
+                       preempt_disable();
+                       /* save vn_active, it's a universal truth! */
+                       vn_active = per_cpu(xfsstats, c).vn_active;
+                       memset(&per_cpu(xfsstats, c), 0,
+                              sizeof(struct xfsstats));
+                       per_cpu(xfsstats, c).vn_active = vn_active;
+                       preempt_enable();
+               }
+               xfs_stats_clear = 0;
+       }
+
+       return ret;
+}
+#endif /* CONFIG_PROC_FS */
+
+STATIC ctl_table xfs_table[] = {
+       {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val,
+       sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+       &sysctl_intvec, NULL, 
+       &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max},
+
+       {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val,
+       sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+       &sysctl_intvec, NULL,
+       &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max},
+
+       {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val,
+       sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+       &sysctl_intvec, NULL, 
+       &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max},
+
+       {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val,
+       sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+       &sysctl_intvec, NULL, 
+       &xfs_params.panic_mask.min, &xfs_params.panic_mask.max},
+
+       {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val,
+       sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+       &sysctl_intvec, NULL, 
+       &xfs_params.error_level.min, &xfs_params.error_level.max},
+
+       {XFS_SYNC_INTERVAL, "sync_interval", &xfs_params.sync_interval.val,
+       sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+       &sysctl_intvec, NULL, 
+       &xfs_params.sync_interval.min, &xfs_params.sync_interval.max},
+
+       {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val,
+       sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+       &sysctl_intvec, NULL,
+       &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max},
+
+       {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val,
+       sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+       &sysctl_intvec, NULL,
+       &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max},
+
+       {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val,
+       sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+       &sysctl_intvec, NULL,
+       &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max},
+       
+       {XFS_FLUSH_INTERVAL, "flush_interval", &xfs_params.flush_interval.val,
+       sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+       &sysctl_intvec, NULL,
+       &xfs_params.flush_interval.min, &xfs_params.flush_interval.max},
+
+       {XFS_AGE_BUFFER, "age_buffer", &xfs_params.age_buffer.val,
+       sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+       &sysctl_intvec, NULL,
+       &xfs_params.age_buffer.min, &xfs_params.age_buffer.max},
+
+       /* please keep this the last entry */
+#ifdef CONFIG_PROC_FS
+       {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val,
+       sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler,
+       &sysctl_intvec, NULL, 
+       &xfs_params.stats_clear.min, &xfs_params.stats_clear.max},
+#endif /* CONFIG_PROC_FS */
+
+       {0}
+};
+
+STATIC ctl_table xfs_dir_table[] = {
+       {FS_XFS, "xfs", NULL, 0, 0555, xfs_table},
+       {0}
+};
+
+STATIC ctl_table xfs_root_table[] = {
+       {CTL_FS, "fs",  NULL, 0, 0555, xfs_dir_table},
+       {0}
+};
+
+void
+xfs_sysctl_register(void)
+{
+       xfs_table_header = register_sysctl_table(xfs_root_table, 1);
+}
+
+void
+xfs_sysctl_unregister(void)
+{
+       if (xfs_table_header)
+               unregister_sysctl_table(xfs_table_header);
+}
diff --git a/fs/xfs/linux/xfs_sysctl.h b/fs/xfs/linux/xfs_sysctl.h
new file mode 100644 (file)
index 0000000..0532d40
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef __XFS_SYSCTL_H__
+#define __XFS_SYSCTL_H__
+
+#include <linux/sysctl.h>
+
+/*
+ * Tunable xfs parameters
+ */
+
+typedef struct xfs_sysctl_val {
+       int min;
+       int val;
+       int max;
+} xfs_sysctl_val_t;
+
+typedef struct xfs_param {
+       xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/
+       xfs_sysctl_val_t sgid_inherit;  /* Inherit S_ISGID bit if process' GID 
+                                        * is not a member of the parent dir
+                                        * GID */
+       xfs_sysctl_val_t symlink_mode;  /* Link creat mode affected by umask */
+       xfs_sysctl_val_t panic_mask;    /* bitmask to cause panic on errors. */
+       xfs_sysctl_val_t error_level;   /* Degree of reporting for problems  */
+       xfs_sysctl_val_t sync_interval; /* time between sync calls           */
+       xfs_sysctl_val_t stats_clear;   /* Reset all XFS statistics to zero. */
+       xfs_sysctl_val_t inherit_sync;  /* Inherit the "sync" inode flag. */
+       xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
+       xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
+       xfs_sysctl_val_t flush_interval;/* interval between runs of the
+                                        * delwri flush daemon.  */
+       xfs_sysctl_val_t age_buffer;    /* time for buffer to age before
+                                        * we flush it.  */
+} xfs_param_t;
+
+/*
+ * xfs_error_level:
+ *
+ * How much error reporting will be done when internal problems are
+ * encountered.  These problems normally return an EFSCORRUPTED to their
+ * caller, with no other information reported.
+ *
+ * 0   No error reports
+ * 1   Report EFSCORRUPTED errors that will cause a filesystem shutdown
+ * 5   Report all EFSCORRUPTED errors (all of the above errors, plus any
+ *     additional errors that are known to not cause shutdowns)
+ *
+ * xfs_panic_mask bit 0x8 turns the error reports into panics
+ */
+
+enum {
+       XFS_RESTRICT_CHOWN = 3,
+       XFS_SGID_INHERIT = 4,
+       XFS_SYMLINK_MODE = 5,
+       XFS_PANIC_MASK = 6,
+       XFS_ERRLEVEL = 7,
+       XFS_SYNC_INTERVAL = 8,
+       XFS_STATS_CLEAR = 12,
+       XFS_INHERIT_SYNC = 13,
+       XFS_INHERIT_NODUMP = 14,
+       XFS_INHERIT_NOATIME = 15,
+       XFS_FLUSH_INTERVAL = 16,
+       XFS_AGE_BUFFER = 17,
+};
+
+extern xfs_param_t     xfs_params;
+
+#ifdef CONFIG_SYSCTL
+extern void xfs_sysctl_register(void);
+extern void xfs_sysctl_unregister(void);
+#else
+# define xfs_sysctl_register()         do { } while (0)
+# define xfs_sysctl_unregister()       do { } while (0)
+#endif /* CONFIG_SYSCTL */
+
+#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/linux/xfs_version.h b/fs/xfs/linux/xfs_version.h
new file mode 100644 (file)
index 0000000..96f9639
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.         Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Dummy file that can contain a timestamp to put into the
+ * XFS init string, to help users keep track of what they're
+ * running
+ */
+
+#ifndef __XFS_VERSION_H__
+#define __XFS_VERSION_H__
+
+#define XFS_VERSION_STRING "SGI XFS"
+
+#endif /* __XFS_VERSION_H__ */
diff --git a/fs/xfs/linux/xfs_vfs.c b/fs/xfs/linux/xfs_vfs.c
new file mode 100644 (file)
index 0000000..2b75ccc
--- /dev/null
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_macros.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_clnt.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_imap.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+
+int
+vfs_mount(
+       struct bhv_desc         *bdp,
+       struct xfs_mount_args   *args,
+       struct cred             *cr)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_mount)
+               next = BHV_NEXT(next);
+       return ((*bhvtovfsops(next)->vfs_mount)(next, args, cr));
+}
+
+int
+vfs_parseargs(
+       struct bhv_desc         *bdp,
+       char                    *s,
+       struct xfs_mount_args   *args,
+       int                     f)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_parseargs)
+               next = BHV_NEXT(next);
+       return ((*bhvtovfsops(next)->vfs_parseargs)(next, s, args, f));
+}
+
+int
+vfs_showargs(
+       struct bhv_desc         *bdp,
+       struct seq_file         *m)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_showargs)
+               next = BHV_NEXT(next);
+       return ((*bhvtovfsops(next)->vfs_showargs)(next, m));
+}
+
+int
+vfs_unmount(
+       struct bhv_desc         *bdp,
+       int                     fl,
+       struct cred             *cr)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_unmount)
+               next = BHV_NEXT(next);
+       return ((*bhvtovfsops(next)->vfs_unmount)(next, fl, cr));
+}
+
+int
+vfs_mntupdate(
+       struct bhv_desc         *bdp,
+       int                     *fl,
+       struct xfs_mount_args   *args)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_mntupdate)
+               next = BHV_NEXT(next);
+       return ((*bhvtovfsops(next)->vfs_mntupdate)(next, fl, args));
+}
+
+int
+vfs_root(
+       struct bhv_desc         *bdp,
+       struct vnode            **vpp)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_root)
+               next = BHV_NEXT(next);
+       return ((*bhvtovfsops(next)->vfs_root)(next, vpp));
+}
+
+int
+vfs_statvfs(
+       struct bhv_desc         *bdp,
+       xfs_statfs_t            *sp,
+       struct vnode            *vp)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_statvfs)
+               next = BHV_NEXT(next);
+       return ((*bhvtovfsops(next)->vfs_statvfs)(next, sp, vp));
+}
+
+int
+vfs_sync(
+       struct bhv_desc         *bdp,
+       int                     fl,
+       struct cred             *cr)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_sync)
+               next = BHV_NEXT(next);
+       return ((*bhvtovfsops(next)->vfs_sync)(next, fl, cr));
+}
+
+int
+vfs_vget(
+       struct bhv_desc         *bdp,
+       struct vnode            **vpp,
+       struct fid              *fidp)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_vget)
+               next = BHV_NEXT(next);
+       return ((*bhvtovfsops(next)->vfs_vget)(next, vpp, fidp));
+}
+
+int
+vfs_dmapiops(
+       struct bhv_desc         *bdp,
+       caddr_t                 addr)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_dmapiops)
+               next = BHV_NEXT(next);
+       return ((*bhvtovfsops(next)->vfs_dmapiops)(next, addr));
+}
+
+int
+vfs_quotactl(
+       struct bhv_desc         *bdp,
+       int                     cmd,
+       int                     id,
+       caddr_t                 addr)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_quotactl)
+               next = BHV_NEXT(next);
+       return ((*bhvtovfsops(next)->vfs_quotactl)(next, cmd, id, addr));
+}
+
+void
+vfs_init_vnode(
+       struct bhv_desc         *bdp,
+       struct vnode            *vp,
+       struct bhv_desc         *bp,
+       int                     unlock)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_init_vnode)
+               next = BHV_NEXT(next);
+       ((*bhvtovfsops(next)->vfs_init_vnode)(next, vp, bp, unlock));
+}
+
+void
+vfs_force_shutdown(
+       struct bhv_desc         *bdp,
+       int                     fl,
+       char                    *file,
+       int                     line)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_force_shutdown)
+               next = BHV_NEXT(next);
+       ((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line));
+}
+
+void
+vfs_freeze(
+       struct bhv_desc         *bdp)
+{
+       struct bhv_desc         *next = bdp;
+
+       ASSERT(next);
+       while (! (bhvtovfsops(next))->vfs_freeze)
+               next = BHV_NEXT(next);
+       ((*bhvtovfsops(next)->vfs_freeze)(next));
+}
+
+vfs_t *
+vfs_allocate( void )
+{
+       struct vfs              *vfsp;
+
+       vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP);
+       bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
+       init_waitqueue_head(&vfsp->vfs_wait_sync_task);
+       return vfsp;
+}
+
+void
+vfs_deallocate(
+       struct vfs              *vfsp)
+{
+       bhv_head_destroy(VFS_BHVHEAD(vfsp));
+       kmem_free(vfsp, sizeof(vfs_t));
+}
+
+void
+vfs_insertops(
+       struct vfs              *vfsp,
+       struct bhv_vfsops       *vfsops)
+{
+       struct bhv_desc         *bdp;
+
+       bdp = kmem_alloc(sizeof(struct bhv_desc), KM_SLEEP);
+       bhv_desc_init(bdp, NULL, vfsp, vfsops);
+       bhv_insert(&vfsp->vfs_bh, bdp);
+}
+
+void
+vfs_insertbhv(
+       struct vfs              *vfsp,
+       struct bhv_desc         *bdp,
+       struct vfsops           *vfsops,
+       void                    *mount)
+{
+       bhv_desc_init(bdp, mount, vfsp, vfsops);
+       bhv_insert_initial(&vfsp->vfs_bh, bdp);
+}
+
+void
+bhv_remove_vfsops(
+       struct vfs              *vfsp,
+       int                     pos)
+{
+       struct bhv_desc         *bhv;
+
+       bhv = bhv_lookup_range(&vfsp->vfs_bh, pos, pos);
+       if (!bhv)
+               return;
+       bhv_remove(&vfsp->vfs_bh, bhv);
+       kmem_free(bhv, sizeof(*bhv));
+}
+
+void
+bhv_remove_all_vfsops(
+       struct vfs              *vfsp,
+       int                     freebase)
+{
+       struct xfs_mount        *mp;
+
+       bhv_remove_vfsops(vfsp, VFS_POSITION_QM);
+       bhv_remove_vfsops(vfsp, VFS_POSITION_DM);
+       if (!freebase)
+               return;
+       mp = XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfsp), &xfs_vfsops));
+       VFS_REMOVEBHV(vfsp, &mp->m_bhv);
+       xfs_mount_free(mp, 0);
+}
+
+void
+bhv_insert_all_vfsops(
+       struct vfs              *vfsp)
+{
+       struct xfs_mount        *mp;
+
+       mp = xfs_mount_init();
+       vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);
+       vfs_insertdmapi(vfsp);
+       vfs_insertquota(vfsp);
+}
diff --git a/fs/xfs/linux/xfs_vfs.h b/fs/xfs/linux/xfs_vfs.h
new file mode 100644 (file)
index 0000000..dc1cd19
--- /dev/null
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_VFS_H__
+#define __XFS_VFS_H__
+
+#include <linux/vfs.h>
+#include "xfs_fs.h"
+
+struct fid;
+struct cred;
+struct vnode;
+struct kstatfs;
+struct seq_file;
+struct super_block;
+struct xfs_mount_args;
+
+typedef struct kstatfs xfs_statfs_t;
+
+typedef struct vfs {
+       u_int                   vfs_flag;       /* flags */
+       xfs_fsid_t              vfs_fsid;       /* file system ID */
+       xfs_fsid_t              *vfs_altfsid;   /* An ID fixed for life of FS */
+       bhv_head_t              vfs_bh;         /* head of vfs behavior chain */
+       struct super_block      *vfs_super;     /* Linux superblock structure */
+       struct task_struct      *vfs_sync_task;
+       wait_queue_head_t       vfs_wait_sync_task;
+} vfs_t;
+
+#define vfs_fbhv               vfs_bh.bh_first /* 1st on vfs behavior chain */
+
+#define bhvtovfs(bdp)          ( (struct vfs *)BHV_VOBJ(bdp) )
+#define bhvtovfsops(bdp)       ( (struct vfsops *)BHV_OPS(bdp) )
+#define VFS_BHVHEAD(vfs)       ( &(vfs)->vfs_bh )
+#define VFS_REMOVEBHV(vfs, bdp)        ( bhv_remove(VFS_BHVHEAD(vfs), bdp) )
+
+#define VFS_POSITION_BASE      BHV_POSITION_BASE       /* chain bottom */
+#define VFS_POSITION_TOP       BHV_POSITION_TOP        /* chain top */
+#define VFS_POSITION_INVALID   BHV_POSITION_INVALID    /* invalid pos. num */
+
+typedef enum {
+       VFS_BHV_UNKNOWN,        /* not specified */
+       VFS_BHV_XFS,            /* xfs */
+       VFS_BHV_DM,             /* data migration */
+       VFS_BHV_QM,             /* quota manager */
+       VFS_BHV_IO,             /* IO path */
+       VFS_BHV_END             /* housekeeping end-of-range */
+} vfs_bhv_t;
+
+#define VFS_POSITION_XFS       (BHV_POSITION_BASE)
+#define VFS_POSITION_DM                (VFS_POSITION_BASE+10)
+#define VFS_POSITION_QM                (VFS_POSITION_BASE+20)
+#define VFS_POSITION_IO                (VFS_POSITION_BASE+30)
+
+#define VFS_RDONLY             0x0001  /* read-only vfs */
+#define VFS_GRPID              0x0002  /* group-ID assigned from directory */
+#define VFS_DMI                        0x0004  /* filesystem has the DMI enabled */
+#define VFS_UMOUNT             0x0008  /* unmount in progress */
+#define VFS_END                        0x0008  /* max flag */
+
+#define SYNC_ATTR              0x0001  /* sync attributes */
+#define SYNC_CLOSE             0x0002  /* close file system down */
+#define SYNC_DELWRI            0x0004  /* look at delayed writes */
+#define SYNC_WAIT              0x0008  /* wait for i/o to complete */
+#define SYNC_BDFLUSH           0x0010  /* BDFLUSH is calling -- don't block */
+#define SYNC_FSDATA            0x0020  /* flush fs data (e.g. superblocks) */
+#define SYNC_REFCACHE          0x0040  /* prune some of the nfs ref cache */
+#define SYNC_REMOUNT           0x0080  /* remount readonly, no dummy LRs */
+
+typedef int    (*vfs_mount_t)(bhv_desc_t *,
+                               struct xfs_mount_args *, struct cred *);
+typedef int    (*vfs_parseargs_t)(bhv_desc_t *, char *,
+                               struct xfs_mount_args *, int);
+typedef        int     (*vfs_showargs_t)(bhv_desc_t *, struct seq_file *);
+typedef int    (*vfs_unmount_t)(bhv_desc_t *, int, struct cred *);
+typedef int    (*vfs_mntupdate_t)(bhv_desc_t *, int *,
+                               struct xfs_mount_args *);
+typedef int    (*vfs_root_t)(bhv_desc_t *, struct vnode **);
+typedef int    (*vfs_statvfs_t)(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
+typedef int    (*vfs_sync_t)(bhv_desc_t *, int, struct cred *);
+typedef int    (*vfs_vget_t)(bhv_desc_t *, struct vnode **, struct fid *);
+typedef int    (*vfs_dmapiops_t)(bhv_desc_t *, caddr_t);
+typedef int    (*vfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t);
+typedef void   (*vfs_init_vnode_t)(bhv_desc_t *,
+                               struct vnode *, bhv_desc_t *, int);
+typedef void   (*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int);
+typedef void   (*vfs_freeze_t)(bhv_desc_t *);
+
+typedef struct vfsops {
+       bhv_position_t          vf_position;    /* behavior chain position */
+       vfs_mount_t             vfs_mount;      /* mount file system */
+       vfs_parseargs_t         vfs_parseargs;  /* parse mount options */
+       vfs_showargs_t          vfs_showargs;   /* unparse mount options */
+       vfs_unmount_t           vfs_unmount;    /* unmount file system */
+       vfs_mntupdate_t         vfs_mntupdate;  /* update file system options */
+       vfs_root_t              vfs_root;       /* get root vnode */
+       vfs_statvfs_t           vfs_statvfs;    /* file system statistics */
+       vfs_sync_t              vfs_sync;       /* flush files */
+       vfs_vget_t              vfs_vget;       /* get vnode from fid */
+       vfs_dmapiops_t          vfs_dmapiops;   /* data migration */
+       vfs_quotactl_t          vfs_quotactl;   /* disk quota */
+       vfs_init_vnode_t        vfs_init_vnode; /* initialize a new vnode */
+       vfs_force_shutdown_t    vfs_force_shutdown;     /* crash and burn */
+       vfs_freeze_t            vfs_freeze;     /* freeze fs for snapshot */
+} vfsops_t;
+
+/*
+ * VFS's.  Operates on vfs structure pointers (starts at bhv head).
+ */
+#define VHEAD(v)                       ((v)->vfs_fbhv)
+#define VFS_MOUNT(v, ma,cr, rv)                ((rv) = vfs_mount(VHEAD(v), ma,cr))
+#define VFS_PARSEARGS(v, o,ma,f, rv)   ((rv) = vfs_parseargs(VHEAD(v), o,ma,f))
+#define VFS_SHOWARGS(v, m, rv)         ((rv) = vfs_showargs(VHEAD(v), m))
+#define VFS_UNMOUNT(v, f, cr, rv)      ((rv) = vfs_unmount(VHEAD(v), f,cr))
+#define VFS_MNTUPDATE(v, fl, args, rv) ((rv) = vfs_mntupdate(VHEAD(v), fl, args))
+#define VFS_ROOT(v, vpp, rv)           ((rv) = vfs_root(VHEAD(v), vpp))
+#define VFS_STATVFS(v, sp,vp, rv)      ((rv) = vfs_statvfs(VHEAD(v), sp,vp))
+#define VFS_SYNC(v, flag,cr, rv)       ((rv) = vfs_sync(VHEAD(v), flag,cr))
+#define VFS_VGET(v, vpp,fidp, rv)      ((rv) = vfs_vget(VHEAD(v), vpp,fidp))
+#define VFS_DMAPIOPS(v, p, rv)         ((rv) = vfs_dmapiops(VHEAD(v), p))
+#define VFS_QUOTACTL(v, c,id,p, rv)    ((rv) = vfs_quotactl(VHEAD(v), c,id,p))
+#define VFS_INIT_VNODE(v, vp,b,ul)     ( vfs_init_vnode(VHEAD(v), vp,b,ul) )
+#define VFS_FORCE_SHUTDOWN(v, fl,f,l)  ( vfs_force_shutdown(VHEAD(v), fl,f,l) )
+#define VFS_FREEZE(v)                  ( vfs_freeze(VHEAD(v)) )
+
+/*
+ * PVFS's.  Operates on behavior descriptor pointers.
+ */
+#define PVFS_MOUNT(b, ma,cr, rv)       ((rv) = vfs_mount(b, ma,cr))
+#define PVFS_PARSEARGS(b, o,ma,f, rv)  ((rv) = vfs_parseargs(b, o,ma,f))
+#define PVFS_SHOWARGS(b, m, rv)                ((rv) = vfs_showargs(b, m))
+#define PVFS_UNMOUNT(b, f,cr, rv)      ((rv) = vfs_unmount(b, f,cr))
+#define PVFS_MNTUPDATE(b, fl, args, rv)        ((rv) = vfs_mntupdate(b, fl, args))
+#define PVFS_ROOT(b, vpp, rv)          ((rv) = vfs_root(b, vpp))
+#define PVFS_STATVFS(b, sp,vp, rv)     ((rv) = vfs_statvfs(b, sp,vp))
+#define PVFS_SYNC(b, flag,cr, rv)      ((rv) = vfs_sync(b, flag,cr))
+#define PVFS_VGET(b, vpp,fidp, rv)     ((rv) = vfs_vget(b, vpp,fidp))
+#define PVFS_DMAPIOPS(b, p, rv)                ((rv) = vfs_dmapiops(b, p))
+#define PVFS_QUOTACTL(b, c,id,p, rv)   ((rv) = vfs_quotactl(b, c,id,p))
+#define PVFS_INIT_VNODE(b, vp,b2,ul)   ( vfs_init_vnode(b, vp,b2,ul) )
+#define PVFS_FORCE_SHUTDOWN(b, fl,f,l) ( vfs_force_shutdown(b, fl,f,l) )
+#define PVFS_FREEZE(b)                 ( vfs_freeze(b) )
+
+extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *);
+extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
+extern int vfs_showargs(bhv_desc_t *, struct seq_file *);
+extern int vfs_unmount(bhv_desc_t *, int, struct cred *);
+extern int vfs_mntupdate(bhv_desc_t *, int *, struct xfs_mount_args *);
+extern int vfs_root(bhv_desc_t *, struct vnode **);
+extern int vfs_statvfs(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
+extern int vfs_sync(bhv_desc_t *, int, struct cred *);
+extern int vfs_vget(bhv_desc_t *, struct vnode **, struct fid *);
+extern int vfs_dmapiops(bhv_desc_t *, caddr_t);
+extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t);
+extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int);
+extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int);
+extern void vfs_freeze(bhv_desc_t *);
+
+typedef struct bhv_vfsops {
+       struct vfsops           bhv_common;
+       void *                  bhv_custom;
+} bhv_vfsops_t;
+
+#define vfs_bhv_lookup(v, id)  ( bhv_lookup_range(&(v)->vfs_bh, (id), (id)) )
+#define vfs_bhv_custom(b)      ( ((bhv_vfsops_t *)BHV_OPS(b))->bhv_custom )
+#define vfs_bhv_set_custom(b,o)        ( (b)->bhv_custom = (void *)(o))
+#define vfs_bhv_clr_custom(b)  ( (b)->bhv_custom = NULL )
+
+extern vfs_t *vfs_allocate(void);
+extern void vfs_deallocate(vfs_t *);
+extern void vfs_insertops(vfs_t *, bhv_vfsops_t *);
+extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *);
+
+extern void bhv_insert_all_vfsops(struct vfs *);
+extern void bhv_remove_all_vfsops(struct vfs *, int);
+extern void bhv_remove_vfsops(struct vfs *, int);
+
+#endif /* __XFS_VFS_H__ */
diff --git a/fs/xfs/linux/xfs_vnode.c b/fs/xfs/linux/xfs_vnode.c
new file mode 100644 (file)
index 0000000..9240efb
--- /dev/null
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+
+uint64_t vn_generation;                /* vnode generation number */
+spinlock_t vnumber_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Dedicated vnode inactive/reclaim sync semaphores.
+ * Prime number of hash buckets since address is used as the key.
+ */
+#define NVSYNC                  37
+#define vptosync(v)             (&vsync[((unsigned long)v) % NVSYNC])
+sv_t vsync[NVSYNC];
+
+/*
+ * Translate stat(2) file types to vnode types and vice versa.
+ * Aware of numeric order of S_IFMT and vnode type values.
+ */
+enum vtype iftovt_tab[] = {
+       VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
+       VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
+};
+
+u_short vttoif_tab[] = {
+       0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK
+};
+
+
+void
+vn_init(void)
+{
+       register sv_t *svp;
+       register int i;
+
+       for (svp = vsync, i = 0; i < NVSYNC; i++, svp++)
+               init_sv(svp, SV_DEFAULT, "vsy", i);
+}
+
+/*
+ * Clean a vnode of filesystem-specific data and prepare it for reuse.
+ */
+STATIC int
+vn_reclaim(
+       struct vnode    *vp)
+{
+       int             error;
+
+       XFS_STATS_INC(vn_reclaim);
+       vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address);
+
+       /*
+        * Only make the VOP_RECLAIM call if there are behaviors
+        * to call.
+        */
+       if (vp->v_fbhv) {
+               VOP_RECLAIM(vp, error);
+               if (error)
+                       return -error;
+       }
+       ASSERT(vp->v_fbhv == NULL);
+
+       VN_LOCK(vp);
+       vp->v_flag &= (VRECLM|VWAIT);
+       VN_UNLOCK(vp, 0);
+
+       vp->v_type = VNON;
+       vp->v_fbhv = NULL;
+
+#ifdef XFS_VNODE_TRACE
+       ktrace_free(vp->v_trace);
+       vp->v_trace = NULL;
+#endif
+
+       return 0;
+}
+
+STATIC void
+vn_wakeup(
+       struct vnode    *vp)
+{
+       VN_LOCK(vp);
+       if (vp->v_flag & VWAIT)
+               sv_broadcast(vptosync(vp));
+       vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED);
+       VN_UNLOCK(vp, 0);
+}
+
+int
+vn_wait(
+       struct vnode    *vp)
+{
+       VN_LOCK(vp);
+       if (vp->v_flag & (VINACT | VRECLM)) {
+               vp->v_flag |= VWAIT;
+               sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
+               return 1;
+       }
+       VN_UNLOCK(vp, 0);
+       return 0;
+}
+
+struct vnode *
+vn_initialize(
+       struct inode    *inode)
+{
+       struct vnode    *vp = LINVFS_GET_VP(inode);
+
+       XFS_STATS_INC(vn_active);
+       XFS_STATS_INC(vn_alloc);
+
+       vp->v_flag = VMODIFIED;
+       spinlock_init(&vp->v_lock, "v_lock");
+
+       spin_lock(&vnumber_lock);
+       if (!++vn_generation)   /* v_number shouldn't be zero */
+               vn_generation++;
+       vp->v_number = vn_generation;
+       spin_unlock(&vnumber_lock);
+
+       ASSERT(VN_CACHED(vp) == 0);
+
+       /* Initialize the first behavior and the behavior chain head. */
+       vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode");
+
+#ifdef XFS_VNODE_TRACE
+       vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
+       printk("Allocated VNODE_TRACE at 0x%p\n", vp->v_trace);
+#endif /* XFS_VNODE_TRACE */
+
+       vn_trace_exit(vp, "vn_initialize", (inst_t *)__return_address);
+       return vp;
+}
+
+/*
+ * Get a reference on a vnode.
+ */
+vnode_t *
+vn_get(
+       struct vnode    *vp,
+       vmap_t          *vmap)
+{
+       struct inode    *inode;
+
+       XFS_STATS_INC(vn_get);
+       inode = LINVFS_GET_IP(vp);
+       if (inode->i_state & I_FREEING)
+               return NULL;
+
+       inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino);
+       if (!inode)     /* Inode not present */
+               return NULL;
+
+       vn_trace_exit(vp, "vn_get", (inst_t *)__return_address);
+
+       return vp;
+}
+
+/*
+ * Revalidate the Linux inode from the vnode.
+ */
+int
+vn_revalidate(
+       struct vnode    *vp)
+{
+       struct inode    *inode;
+       vattr_t         va;
+       int             error;
+
+       vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address);
+       ASSERT(vp->v_fbhv != NULL);
+
+       va.va_mask = XFS_AT_STAT|XFS_AT_XFLAGS;
+       VOP_GETATTR(vp, &va, 0, NULL, error);
+       if (!error) {
+               inode = LINVFS_GET_IP(vp);
+               inode->i_mode       = VTTOIF(va.va_type) | va.va_mode;
+               inode->i_nlink      = va.va_nlink;
+               inode->i_uid        = va.va_uid;
+               inode->i_gid        = va.va_gid;
+               inode->i_blocks     = va.va_nblocks;
+               inode->i_mtime      = va.va_mtime;
+               inode->i_ctime      = va.va_ctime;
+               inode->i_atime      = va.va_atime;
+               if (va.va_xflags & XFS_XFLAG_IMMUTABLE)
+                       inode->i_flags |= S_IMMUTABLE;
+               else
+                       inode->i_flags &= ~S_IMMUTABLE;
+               if (va.va_xflags & XFS_XFLAG_APPEND)
+                       inode->i_flags |= S_APPEND;
+               else
+                       inode->i_flags &= ~S_APPEND;
+               if (va.va_xflags & XFS_XFLAG_SYNC)
+                       inode->i_flags |= S_SYNC;
+               else
+                       inode->i_flags &= ~S_SYNC;
+               if (va.va_xflags & XFS_XFLAG_NOATIME)
+                       inode->i_flags |= S_NOATIME;
+               else
+                       inode->i_flags &= ~S_NOATIME;
+               VUNMODIFY(vp);
+       }
+       return -error;
+}
+
+/*
+ * purge a vnode from the cache
+ * At this point the vnode is guaranteed to have no references (vn_count == 0)
+ * The caller has to make sure that there are no ways someone could
+ * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock).
+ */
+void
+vn_purge(
+       struct vnode    *vp,
+       vmap_t          *vmap)
+{
+       vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address);
+
+again:
+       /*
+        * Check whether vp has already been reclaimed since our caller
+        * sampled its version while holding a filesystem cache lock that
+        * its VOP_RECLAIM function acquires.
+        */
+       VN_LOCK(vp);
+       if (vp->v_number != vmap->v_number) {
+               VN_UNLOCK(vp, 0);
+               return;
+       }
+
+       /*
+        * If vp is being reclaimed or inactivated, wait until it is inert,
+        * then proceed.  Can't assume that vnode is actually reclaimed
+        * just because the reclaimed flag is asserted -- a vn_alloc
+        * reclaim can fail.
+        */
+       if (vp->v_flag & (VINACT | VRECLM)) {
+               ASSERT(vn_count(vp) == 0);
+               vp->v_flag |= VWAIT;
+               sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
+               goto again;
+       }
+
+       /*
+        * Another process could have raced in and gotten this vnode...
+        */
+       if (vn_count(vp) > 0) {
+               VN_UNLOCK(vp, 0);
+               return;
+       }
+
+       XFS_STATS_DEC(vn_active);
+       vp->v_flag |= VRECLM;
+       VN_UNLOCK(vp, 0);
+
+       /*
+        * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells
+        * vp's filesystem to flush and invalidate all cached resources.
+        * When vn_reclaim returns, vp should have no private data,
+        * either in a system cache or attached to v_data.
+        */
+       if (vn_reclaim(vp) != 0)
+               panic("vn_purge: cannot reclaim");
+
+       /*
+        * Wakeup anyone waiting for vp to be reclaimed.
+        */
+       vn_wakeup(vp);
+}
+
+/*
+ * Add a reference to a referenced vnode.
+ */
+struct vnode *
+vn_hold(
+       struct vnode    *vp)
+{
+       struct inode    *inode;
+
+       XFS_STATS_INC(vn_hold);
+
+       VN_LOCK(vp);
+       inode = igrab(LINVFS_GET_IP(vp));
+       ASSERT(inode);
+       VN_UNLOCK(vp, 0);
+
+       return vp;
+}
+
+/*
+ *  Call VOP_INACTIVE on last reference.
+ */
+void
+vn_rele(
+       struct vnode    *vp)
+{
+       int             vcnt;
+       int             cache;
+
+       XFS_STATS_INC(vn_rele);
+
+       VN_LOCK(vp);
+
+       vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address);
+       vcnt = vn_count(vp);
+
+       /*
+        * Since we always get called from put_inode we know
+        * that i_count won't be decremented after we
+        * return.
+        */
+       if (!vcnt) {
+               /*
+                * As soon as we turn this on, noone can find us in vn_get
+                * until we turn off VINACT or VRECLM
+                */
+               vp->v_flag |= VINACT;
+               VN_UNLOCK(vp, 0);
+
+               /*
+                * Do not make the VOP_INACTIVE call if there
+                * are no behaviors attached to the vnode to call.
+                */
+               if (vp->v_fbhv)
+                       VOP_INACTIVE(vp, NULL, cache);
+
+               VN_LOCK(vp);
+               if (vp->v_flag & VWAIT)
+                       sv_broadcast(vptosync(vp));
+
+               vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED);
+       }
+
+       VN_UNLOCK(vp, 0);
+
+       vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address);
+}
+
+/*
+ * Finish the removal of a vnode.
+ */
+void
+vn_remove(
+       struct vnode    *vp)
+{
+       vmap_t          vmap;
+
+       /* Make sure we don't do this to the same vnode twice */
+       if (!(vp->v_fbhv))
+               return;
+
+       XFS_STATS_INC(vn_remove);
+       vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address);
+
+       /*
+        * After the following purge the vnode
+        * will no longer exist.
+        */
+       VMAP(vp, vmap);
+       vn_purge(vp, &vmap);
+}
+
+
+#ifdef XFS_VNODE_TRACE
+
+#define KTRACE_ENTER(vp, vk, s, line, ra)                      \
+       ktrace_enter(   (vp)->v_trace,                          \
+/*  0 */               (void *)(__psint_t)(vk),                \
+/*  1 */               (void *)(s),                            \
+/*  2 */               (void *)(__psint_t) line,               \
+/*  3 */               (void *)(vn_count(vp)), \
+/*  4 */               (void *)(ra),                           \
+/*  5 */               (void *)(__psunsigned_t)(vp)->v_flag,   \
+/*  6 */               (void *)(__psint_t)smp_processor_id(),  \
+/*  7 */               (void *)(__psint_t)(current->pid),      \
+/*  8 */               (void *)__return_address,               \
+/*  9 */               0, 0, 0, 0, 0, 0, 0)
+
+/*
+ * Vnode tracing code.
+ */
+void
+vn_trace_entry(vnode_t *vp, char *func, inst_t *ra)
+{
+       KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra);
+}
+
+void
+vn_trace_exit(vnode_t *vp, char *func, inst_t *ra)
+{
+       KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra);
+}
+
+void
+vn_trace_hold(vnode_t *vp, char *file, int line, inst_t *ra)
+{
+       KTRACE_ENTER(vp, VNODE_KTRACE_HOLD, file, line, ra);
+}
+
+void
+vn_trace_ref(vnode_t *vp, char *file, int line, inst_t *ra)
+{
+       KTRACE_ENTER(vp, VNODE_KTRACE_REF, file, line, ra);
+}
+
+void
+vn_trace_rele(vnode_t *vp, char *file, int line, inst_t *ra)
+{
+       KTRACE_ENTER(vp, VNODE_KTRACE_RELE, file, line, ra);
+}
+#endif /* XFS_VNODE_TRACE */
diff --git a/fs/xfs/linux/xfs_vnode.h b/fs/xfs/linux/xfs_vnode.h
new file mode 100644 (file)
index 0000000..af0b65f
--- /dev/null
@@ -0,0 +1,651 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *
+ * Portions Copyright (c) 1989, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef __XFS_VNODE_H__
+#define __XFS_VNODE_H__
+
+struct uio;
+struct file;
+struct vattr;
+struct xfs_iomap;
+struct attrlist_cursor_kern;
+
+/*
+ * Vnode types.  VNON means no type.
+ */
+enum vtype     { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK };
+
+typedef xfs_ino_t vnumber_t;
+typedef struct dentry vname_t;
+typedef bhv_head_t vn_bhv_head_t;
+
+/*
+ * MP locking protocols:
+ *     v_flag, v_vfsp                          VN_LOCK/VN_UNLOCK
+ *     v_type                                  read-only or fs-dependent
+ */
+typedef struct vnode {
+       __u32           v_flag;                 /* vnode flags (see below) */
+       enum vtype      v_type;                 /* vnode type */
+       struct vfs      *v_vfsp;                /* ptr to containing VFS */
+       vnumber_t       v_number;               /* in-core vnode number */
+       vn_bhv_head_t   v_bh;                   /* behavior head */
+       spinlock_t      v_lock;                 /* VN_LOCK/VN_UNLOCK */
+       struct inode    v_inode;                /* Linux inode */
+#ifdef XFS_VNODE_TRACE
+       struct ktrace   *v_trace;               /* trace header structure    */
+#endif
+} vnode_t;
+
+#define v_fbhv                 v_bh.bh_first          /* first behavior */
+#define v_fops                 v_bh.bh_first->bd_ops  /* first behavior ops */
+
+#define VNODE_POSITION_BASE    BHV_POSITION_BASE       /* chain bottom */
+#define VNODE_POSITION_TOP     BHV_POSITION_TOP        /* chain top */
+#define VNODE_POSITION_INVALID BHV_POSITION_INVALID    /* invalid pos. num */
+
+typedef enum {
+       VN_BHV_UNKNOWN,         /* not specified */
+       VN_BHV_XFS,             /* xfs */
+       VN_BHV_DM,              /* data migration */
+       VN_BHV_QM,              /* quota manager */
+       VN_BHV_IO,              /* IO path */
+       VN_BHV_END              /* housekeeping end-of-range */
+} vn_bhv_t;
+
+#define VNODE_POSITION_XFS     (VNODE_POSITION_BASE)
+#define VNODE_POSITION_DM      (VNODE_POSITION_BASE+10)
+#define VNODE_POSITION_QM      (VNODE_POSITION_BASE+20)
+#define VNODE_POSITION_IO      (VNODE_POSITION_BASE+30)
+
+/*
+ * Macros for dealing with the behavior descriptor inside of the vnode.
+ */
+#define BHV_TO_VNODE(bdp)      ((vnode_t *)BHV_VOBJ(bdp))
+#define BHV_TO_VNODE_NULL(bdp) ((vnode_t *)BHV_VOBJNULL(bdp))
+
+#define VN_BHV_HEAD(vp)                        ((bhv_head_t *)(&((vp)->v_bh)))
+#define vn_bhv_head_init(bhp,name)     bhv_head_init(bhp,name)
+#define vn_bhv_remove(bhp,bdp)         bhv_remove(bhp,bdp)
+#define vn_bhv_lookup(bhp,ops)         bhv_lookup(bhp,ops)
+#define vn_bhv_lookup_unlocked(bhp,ops) bhv_lookup_unlocked(bhp,ops)
+
+/*
+ * Vnode to Linux inode mapping.
+ */
+#define LINVFS_GET_VP(inode)   ((vnode_t *)list_entry(inode, vnode_t, v_inode))
+#define LINVFS_GET_IP(vp)      (&(vp)->v_inode)
+
+/*
+ * Convert between vnode types and inode formats (since POSIX.1
+ * defines mode word of stat structure in terms of inode formats).
+ */
+extern enum vtype      iftovt_tab[];
+extern u_short         vttoif_tab[];
+#define IFTOVT(mode)   (iftovt_tab[((mode) & S_IFMT) >> 12])
+#define VTTOIF(indx)   (vttoif_tab[(int)(indx)])
+#define MAKEIMODE(indx, mode)  (int)(VTTOIF(indx) | (mode))
+
+
+/*
+ * Vnode flags.
+ */
+#define VINACT                0x1      /* vnode is being inactivated   */
+#define VRECLM                0x2      /* vnode is being reclaimed     */
+#define VWAIT                 0x4      /* waiting for VINACT/VRECLM to end */
+#define VMODIFIED             0x8      /* XFS inode state possibly differs */
+                                       /* to the Linux inode state.    */
+
+/*
+ * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter.
+ */
+typedef enum vrwlock {
+       VRWLOCK_NONE,
+       VRWLOCK_READ,
+       VRWLOCK_WRITE,
+       VRWLOCK_WRITE_DIRECT,
+       VRWLOCK_TRY_READ,
+       VRWLOCK_TRY_WRITE
+} vrwlock_t;
+
+/*
+ * Return values for VOP_INACTIVE.  A return value of
+ * VN_INACTIVE_NOCACHE implies that the file system behavior
+ * has disassociated its state and bhv_desc_t from the vnode.
+ */
+#define        VN_INACTIVE_CACHE       0
+#define        VN_INACTIVE_NOCACHE     1
+
+/*
+ * Values for the cmd code given to VOP_VNODE_CHANGE.
+ */
+typedef enum vchange {
+       VCHANGE_FLAGS_FRLOCKS           = 0,
+       VCHANGE_FLAGS_ENF_LOCKING       = 1,
+       VCHANGE_FLAGS_TRUNCATED         = 2,
+       VCHANGE_FLAGS_PAGE_DIRTY        = 3,
+       VCHANGE_FLAGS_IOEXCL_COUNT      = 4
+} vchange_t;
+
+
+typedef int    (*vop_open_t)(bhv_desc_t *, struct cred *);
+typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
+                               const struct iovec *, unsigned int,
+                               loff_t *, int, struct cred *);
+typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
+                               const struct iovec *, unsigned int,
+                               loff_t *, int, struct cred *);
+typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
+                               loff_t *, int, size_t, read_actor_t,
+                               void *, struct cred *);
+typedef int    (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
+                               int, unsigned int, unsigned long);
+typedef int    (*vop_getattr_t)(bhv_desc_t *, struct vattr *, int,
+                               struct cred *);
+typedef int    (*vop_setattr_t)(bhv_desc_t *, struct vattr *, int,
+                               struct cred *);
+typedef int    (*vop_access_t)(bhv_desc_t *, int, struct cred *);
+typedef int    (*vop_lookup_t)(bhv_desc_t *, vname_t *, vnode_t **,
+                               int, vnode_t *, struct cred *);
+typedef int    (*vop_create_t)(bhv_desc_t *, vname_t *, struct vattr *,
+                               vnode_t **, struct cred *);
+typedef int    (*vop_remove_t)(bhv_desc_t *, vname_t *, struct cred *);
+typedef int    (*vop_link_t)(bhv_desc_t *, vnode_t *, vname_t *,
+                               struct cred *);
+typedef int    (*vop_rename_t)(bhv_desc_t *, vname_t *, vnode_t *, vname_t *,
+                               struct cred *);
+typedef int    (*vop_mkdir_t)(bhv_desc_t *, vname_t *, struct vattr *,
+                               vnode_t **, struct cred *);
+typedef int    (*vop_rmdir_t)(bhv_desc_t *, vname_t *, struct cred *);
+typedef int    (*vop_readdir_t)(bhv_desc_t *, struct uio *, struct cred *,
+                               int *);
+typedef int    (*vop_symlink_t)(bhv_desc_t *, vname_t *, struct vattr *,
+                               char *, vnode_t **, struct cred *);
+typedef int    (*vop_readlink_t)(bhv_desc_t *, struct uio *, int,
+                               struct cred *);
+typedef int    (*vop_fsync_t)(bhv_desc_t *, int, struct cred *,
+                               xfs_off_t, xfs_off_t);
+typedef int    (*vop_inactive_t)(bhv_desc_t *, struct cred *);
+typedef int    (*vop_fid2_t)(bhv_desc_t *, struct fid *);
+typedef int    (*vop_release_t)(bhv_desc_t *);
+typedef int    (*vop_rwlock_t)(bhv_desc_t *, vrwlock_t);
+typedef void   (*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t);
+typedef int    (*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int,
+                               struct xfs_iomap *, int *);
+typedef int    (*vop_reclaim_t)(bhv_desc_t *);
+typedef int    (*vop_attr_get_t)(bhv_desc_t *, char *, char *, int *, int,
+                               struct cred *);
+typedef        int     (*vop_attr_set_t)(bhv_desc_t *, char *, char *, int, int,
+                               struct cred *);
+typedef        int     (*vop_attr_remove_t)(bhv_desc_t *, char *, int, struct cred *);
+typedef        int     (*vop_attr_list_t)(bhv_desc_t *, char *, int, int,
+                               struct attrlist_cursor_kern *, struct cred *);
+typedef void   (*vop_link_removed_t)(bhv_desc_t *, vnode_t *, int);
+typedef void   (*vop_vnode_change_t)(bhv_desc_t *, vchange_t, __psint_t);
+typedef void   (*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+typedef void   (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+typedef int    (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t,
+                               uint64_t, int);
+typedef int    (*vop_iflush_t)(bhv_desc_t *, int);
+
+
+typedef struct vnodeops {
+       bhv_position_t  vn_position;    /* position within behavior chain */
+       vop_open_t              vop_open;
+       vop_read_t              vop_read;
+       vop_write_t             vop_write;
+       vop_sendfile_t          vop_sendfile;
+       vop_ioctl_t             vop_ioctl;
+       vop_getattr_t           vop_getattr;
+       vop_setattr_t           vop_setattr;
+       vop_access_t            vop_access;
+       vop_lookup_t            vop_lookup;
+       vop_create_t            vop_create;
+       vop_remove_t            vop_remove;
+       vop_link_t              vop_link;
+       vop_rename_t            vop_rename;
+       vop_mkdir_t             vop_mkdir;
+       vop_rmdir_t             vop_rmdir;
+       vop_readdir_t           vop_readdir;
+       vop_symlink_t           vop_symlink;
+       vop_readlink_t          vop_readlink;
+       vop_fsync_t             vop_fsync;
+       vop_inactive_t          vop_inactive;
+       vop_fid2_t              vop_fid2;
+       vop_rwlock_t            vop_rwlock;
+       vop_rwunlock_t          vop_rwunlock;
+       vop_bmap_t              vop_bmap;
+       vop_reclaim_t           vop_reclaim;
+       vop_attr_get_t          vop_attr_get;
+       vop_attr_set_t          vop_attr_set;
+       vop_attr_remove_t       vop_attr_remove;
+       vop_attr_list_t         vop_attr_list;
+       vop_link_removed_t      vop_link_removed;
+       vop_vnode_change_t      vop_vnode_change;
+       vop_ptossvp_t           vop_tosspages;
+       vop_pflushinvalvp_t     vop_flushinval_pages;
+       vop_pflushvp_t          vop_flush_pages;
+       vop_release_t           vop_release;
+       vop_iflush_t            vop_iflush;
+} vnodeops_t;
+
+/*
+ * VOP's.
+ */
+#define _VOP_(op, vp)  (*((vnodeops_t *)(vp)->v_fops)->op)
+
+#define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv)                        \
+       rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
+#define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv)               \
+       rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
+#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv)              \
+       rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr)
+#define VOP_BMAP(vp,of,sz,rw,b,n,rv)                                   \
+       rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
+#define VOP_OPEN(vp, cr, rv)                                           \
+       rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr)
+#define VOP_GETATTR(vp, vap, f, cr, rv)                                        \
+       rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr)
+#define        VOP_SETATTR(vp, vap, f, cr, rv)                                 \
+       rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr)
+#define        VOP_ACCESS(vp, mode, cr, rv)                                    \
+       rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr)
+#define        VOP_LOOKUP(vp,d,vpp,f,rdir,cr,rv)                               \
+       rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr)
+#define VOP_CREATE(dvp,d,vap,vpp,cr,rv)                                        \
+       rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr)
+#define VOP_REMOVE(dvp,d,cr,rv)                                                \
+       rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr)
+#define        VOP_LINK(tdvp,fvp,d,cr,rv)                                      \
+       rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr)
+#define        VOP_RENAME(fvp,fnm,tdvp,tnm,cr,rv)                              \
+       rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr)
+#define        VOP_MKDIR(dp,d,vap,vpp,cr,rv)                                   \
+       rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr)
+#define        VOP_RMDIR(dp,d,cr,rv)                                           \
+       rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr)
+#define        VOP_READDIR(vp,uiop,cr,eofp,rv)                                 \
+       rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp)
+#define        VOP_SYMLINK(dvp,d,vap,tnm,vpp,cr,rv)                            \
+       rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr)
+#define        VOP_READLINK(vp,uiop,fl,cr,rv)                                  \
+       rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,fl,cr)
+#define        VOP_FSYNC(vp,f,cr,b,e,rv)                                       \
+       rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e)
+#define VOP_INACTIVE(vp, cr, rv)                                       \
+       rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr)
+#define VOP_RELEASE(vp, rv)                                            \
+       rv = _VOP_(vop_release, vp)((vp)->v_fbhv)
+#define VOP_FID2(vp, fidp, rv)                                         \
+       rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp)
+#define VOP_RWLOCK(vp,i)                                               \
+       (void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
+#define VOP_RWLOCK_TRY(vp,i)                                           \
+       _VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
+#define VOP_RWUNLOCK(vp,i)                                             \
+       (void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i)
+#define VOP_FRLOCK(vp,c,fl,flags,offset,fr,rv)                         \
+       rv = _VOP_(vop_frlock, vp)((vp)->v_fbhv,c,fl,flags,offset,fr)
+#define VOP_RECLAIM(vp, rv)                                            \
+       rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv)
+#define VOP_ATTR_GET(vp, name, val, vallenp, fl, cred, rv)             \
+       rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred)
+#define        VOP_ATTR_SET(vp, name, val, vallen, fl, cred, rv)               \
+       rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred)
+#define        VOP_ATTR_REMOVE(vp, name, flags, cred, rv)                      \
+       rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred)
+#define        VOP_ATTR_LIST(vp, buf, buflen, fl, cursor, cred, rv)            \
+       rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred)
+#define VOP_LINK_REMOVED(vp, dvp, linkzero)                            \
+       (void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero)
+#define VOP_VNODE_CHANGE(vp, cmd, val)                                 \
+       (void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val)
+/*
+ * These are page cache functions that now go thru VOPs.
+ * 'last' parameter is unused and left in for IRIX compatibility
+ */
+#define VOP_TOSS_PAGES(vp, first, last, fiopt)                         \
+       _VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt)
+/*
+ * 'last' parameter is unused and left in for IRIX compatibility
+ */
+#define VOP_FLUSHINVAL_PAGES(vp, first, last, fiopt)                   \
+       _VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt)
+/*
+ * 'last' parameter is unused and left in for IRIX compatibility
+ */
+#define VOP_FLUSH_PAGES(vp, first, last, flags, fiopt, rv)             \
+       rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt)
+#define VOP_IOCTL(vp, inode, filp, fl, cmd, arg, rv)                   \
+       rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,fl,cmd,arg)
+#define VOP_IFLUSH(vp, flags, rv)                                      \
+       rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags)
+
+/*
+ * Flags for read/write calls - same values as IRIX
+ */
+#define IO_ISDIRECT    0x00004         /* bypass page cache */
+#define IO_INVIS       0x00020         /* don't update inode timestamps */
+
+/*
+ * Flags for VOP_IFLUSH call
+ */
+#define FLUSH_SYNC             1       /* wait for flush to complete   */
+#define FLUSH_INODE            2       /* flush the inode itself       */
+#define FLUSH_LOG              4       /* force the last log entry for
+                                        * this inode out to disk       */
+
+/*
+ * Flush/Invalidate options for VOP_TOSS_PAGES, VOP_FLUSHINVAL_PAGES and
+ *     VOP_FLUSH_PAGES.
+ */
+#define FI_NONE                        0       /* none */
+#define FI_REMAPF              1       /* Do a remapf prior to the operation */
+#define FI_REMAPF_LOCKED       2       /* Do a remapf prior to the operation.
+                                          Prevent VM access to the pages until
+                                          the operation completes. */
+
+/*
+ * Vnode attributes.  va_mask indicates those attributes the caller
+ * wants to set or extract.
+ */
+typedef struct vattr {
+       int             va_mask;        /* bit-mask of attributes present */
+       enum vtype      va_type;        /* vnode type (for create) */
+       mode_t          va_mode;        /* file access mode and type */
+       nlink_t         va_nlink;       /* number of references to file */
+       uid_t           va_uid;         /* owner user id */
+       gid_t           va_gid;         /* owner group id */
+       xfs_ino_t       va_nodeid;      /* file id */
+       xfs_off_t       va_size;        /* file size in bytes */
+       u_long          va_blocksize;   /* blocksize preferred for i/o */
+       struct timespec va_atime;       /* time of last access */
+       struct timespec va_mtime;       /* time of last modification */
+       struct timespec va_ctime;       /* time file changed */
+       u_int           va_gen;         /* generation number of file */
+       xfs_dev_t       va_rdev;        /* device the special file represents */
+       __int64_t       va_nblocks;     /* number of blocks allocated */
+       u_long          va_xflags;      /* random extended file flags */
+       u_long          va_extsize;     /* file extent size */
+       u_long          va_nextents;    /* number of extents in file */
+       u_long          va_anextents;   /* number of attr extents in file */
+       int             va_projid;      /* project id */
+} vattr_t;
+
+/*
+ * setattr or getattr attributes
+ */
+#define XFS_AT_TYPE            0x00000001
+#define XFS_AT_MODE            0x00000002
+#define XFS_AT_UID             0x00000004
+#define XFS_AT_GID             0x00000008
+#define XFS_AT_FSID            0x00000010
+#define XFS_AT_NODEID          0x00000020
+#define XFS_AT_NLINK           0x00000040
+#define XFS_AT_SIZE            0x00000080
+#define XFS_AT_ATIME           0x00000100
+#define XFS_AT_MTIME           0x00000200
+#define XFS_AT_CTIME           0x00000400
+#define XFS_AT_RDEV            0x00000800
+#define XFS_AT_BLKSIZE         0x00001000
+#define XFS_AT_NBLOCKS         0x00002000
+#define XFS_AT_VCODE           0x00004000
+#define XFS_AT_MAC             0x00008000
+#define XFS_AT_UPDATIME                0x00010000
+#define XFS_AT_UPDMTIME                0x00020000
+#define XFS_AT_UPDCTIME                0x00040000
+#define XFS_AT_ACL             0x00080000
+#define XFS_AT_CAP             0x00100000
+#define XFS_AT_INF             0x00200000
+#define XFS_AT_XFLAGS          0x00400000
+#define XFS_AT_EXTSIZE         0x00800000
+#define XFS_AT_NEXTENTS                0x01000000
+#define XFS_AT_ANEXTENTS       0x02000000
+#define XFS_AT_PROJID          0x04000000
+#define XFS_AT_SIZE_NOPERM     0x08000000
+#define XFS_AT_GENCOUNT                0x10000000
+
+#define XFS_AT_ALL     (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
+               XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
+               XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
+               XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
+               XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
+               XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
+
+#define XFS_AT_STAT    (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
+               XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
+               XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
+               XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID)
+
+#define XFS_AT_TIMES   (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
+
+#define XFS_AT_UPDTIMES        (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
+
+#define XFS_AT_NOSET   (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\
+               XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
+               XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
+
+/*
+ *  Modes.
+ */
+#define VSUID  S_ISUID         /* set user id on execution */
+#define VSGID  S_ISGID         /* set group id on execution */
+#define VSVTX  S_ISVTX         /* save swapped text even after use */
+#define VREAD  S_IRUSR         /* read, write, execute permissions */
+#define VWRITE S_IWUSR
+#define VEXEC  S_IXUSR
+
+#define MODEMASK S_IALLUGO     /* mode bits plus permission bits */
+
+/*
+ * Check whether mandatory file locking is enabled.
+ */
+#define MANDLOCK(vp, mode)     \
+       ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
+
+extern void    vn_init(void);
+extern int     vn_wait(struct vnode *);
+extern vnode_t *vn_initialize(struct inode *);
+
+/*
+ * Acquiring and invalidating vnodes:
+ *
+ *     if (vn_get(vp, version, 0))
+ *             ...;
+ *     vn_purge(vp, version);
+ *
+ * vn_get and vn_purge must be called with vmap_t arguments, sampled
+ * while a lock that the vnode's VOP_RECLAIM function acquires is
+ * held, to ensure that the vnode sampled with the lock held isn't
+ * recycled (VOP_RECLAIMed) or deallocated between the release of the lock
+ * and the subsequent vn_get or vn_purge.
+ */
+
+/*
+ * vnode_map structures _must_ match vn_epoch and vnode structure sizes.
+ */
+typedef struct vnode_map {
+       vfs_t           *v_vfsp;
+       vnumber_t       v_number;               /* in-core vnode number */
+       xfs_ino_t       v_ino;                  /* inode #      */
+} vmap_t;
+
+#define VMAP(vp, vmap) {(vmap).v_vfsp   = (vp)->v_vfsp,        \
+                        (vmap).v_number = (vp)->v_number,      \
+                        (vmap).v_ino    = (vp)->v_inode.i_ino; }
+
+extern void    vn_purge(struct vnode *, vmap_t *);
+extern vnode_t *vn_get(struct vnode *, vmap_t *);
+extern int     vn_revalidate(struct vnode *);
+extern void    vn_remove(struct vnode *);
+
+static inline int vn_count(struct vnode *vp)
+{
+       return atomic_read(&LINVFS_GET_IP(vp)->i_count);
+}
+
+/*
+ * Vnode reference counting functions (and macros for compatibility).
+ */
+extern vnode_t *vn_hold(struct vnode *);
+extern void    vn_rele(struct vnode *);
+
+#if defined(XFS_VNODE_TRACE)
+#define VN_HOLD(vp)            \
+       ((void)vn_hold(vp),     \
+         vn_trace_hold(vp, __FILE__, __LINE__, (inst_t *)__return_address))
+#define VN_RELE(vp)            \
+         (vn_trace_rele(vp, __FILE__, __LINE__, (inst_t *)__return_address), \
+          iput(LINVFS_GET_IP(vp)))
+#else
+#define VN_HOLD(vp)            ((void)vn_hold(vp))
+#define VN_RELE(vp)            (iput(LINVFS_GET_IP(vp)))
+#endif
+
+/*
+ * Vname handling macros.
+ */
+#define VNAME(dentry)          ((char *) (dentry)->d_name.name)
+#define VNAMELEN(dentry)       ((dentry)->d_name.len)
+#define VNAME_TO_VNODE(dentry) (LINVFS_GET_VP((dentry)->d_inode))
+
+/*
+ * Vnode spinlock manipulation.
+ */
+#define VN_LOCK(vp)            mutex_spinlock(&(vp)->v_lock)
+#define VN_UNLOCK(vp, s)       mutex_spinunlock(&(vp)->v_lock, s)
+#define VN_FLAGSET(vp,b)       vn_flagset(vp,b)
+#define VN_FLAGCLR(vp,b)       vn_flagclr(vp,b)
+
+static __inline__ void vn_flagset(struct vnode *vp, uint flag)
+{
+       spin_lock(&vp->v_lock);
+       vp->v_flag |= flag;
+       spin_unlock(&vp->v_lock);
+}
+
+static __inline__ void vn_flagclr(struct vnode *vp, uint flag)
+{
+       spin_lock(&vp->v_lock);
+       vp->v_flag &= ~flag;
+       spin_unlock(&vp->v_lock);
+}
+
+/*
+ * Update modify/access/change times on the vnode
+ */
+#define VN_MTIMESET(vp, tvp)   (LINVFS_GET_IP(vp)->i_mtime = *(tvp))
+#define VN_ATIMESET(vp, tvp)   (LINVFS_GET_IP(vp)->i_atime = *(tvp))
+#define VN_CTIMESET(vp, tvp)   (LINVFS_GET_IP(vp)->i_ctime = *(tvp))
+
+/*
+ * Some useful predicates.
+ */
+#define VN_MAPPED(vp)  mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)
+#define VN_CACHED(vp)  (LINVFS_GET_IP(vp)->i_mapping->nrpages)
+#define VN_DIRTY(vp)   mapping_tagged(LINVFS_GET_IP(vp)->i_mapping, \
+                                       PAGECACHE_TAG_DIRTY)
+#define VMODIFY(vp)    VN_FLAGSET(vp, VMODIFIED)
+#define VUNMODIFY(vp)  VN_FLAGCLR(vp, VMODIFIED)
+
+/*
+ * Flags to VOP_SETATTR/VOP_GETATTR.
+ */
+#define        ATTR_UTIME      0x01    /* non-default utime(2) request */
+#define        ATTR_DMI        0x08    /* invocation from a DMI function */
+#define        ATTR_LAZY       0x80    /* set/get attributes lazily */
+#define        ATTR_NONBLOCK   0x100   /* return EAGAIN if operation would block */
+
+/*
+ * Flags to VOP_FSYNC and VOP_RECLAIM.
+ */
+#define FSYNC_NOWAIT   0       /* asynchronous flush */
+#define FSYNC_WAIT     0x1     /* synchronous fsync or forced reclaim */
+#define FSYNC_INVAL    0x2     /* flush and invalidate cached data */
+#define FSYNC_DATA     0x4     /* synchronous fsync of data only */
+
+/*
+ * Tracking vnode activity.
+ */
+#if defined(XFS_VNODE_TRACE)
+
+#define        VNODE_TRACE_SIZE        16              /* number of trace entries */
+#define        VNODE_KTRACE_ENTRY      1
+#define        VNODE_KTRACE_EXIT       2
+#define        VNODE_KTRACE_HOLD       3
+#define        VNODE_KTRACE_REF        4
+#define        VNODE_KTRACE_RELE       5
+
+extern void vn_trace_entry(struct vnode *, char *, inst_t *);
+extern void vn_trace_exit(struct vnode *, char *, inst_t *);
+extern void vn_trace_hold(struct vnode *, char *, int, inst_t *);
+extern void vn_trace_ref(struct vnode *, char *, int, inst_t *);
+extern void vn_trace_rele(struct vnode *, char *, int, inst_t *);
+
+#define        VN_TRACE(vp)            \
+       vn_trace_ref(vp, __FILE__, __LINE__, (inst_t *)__return_address)
+#else
+#define        vn_trace_entry(a,b,c)
+#define        vn_trace_exit(a,b,c)
+#define        vn_trace_hold(a,b,c,d)
+#define        vn_trace_ref(a,b,c,d)
+#define        vn_trace_rele(a,b,c,d)
+#define        VN_TRACE(vp)
+#endif
+
+#endif /* __XFS_VNODE_H__ */
diff --git a/include/asm-alpha/rmap.h b/include/asm-alpha/rmap.h
new file mode 100644 (file)
index 0000000..08b2236
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _ALPHA_RMAP_H
+#define _ALPHA_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
diff --git a/include/asm-arm/arch-cl7500/ide.h b/include/asm-arm/arch-cl7500/ide.h
new file mode 100644 (file)
index 0000000..78f97a3
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * linux/include/asm-arm/arch-cl7500/ide.h
+ *
+ * Copyright (c) 1997 Russell King
+ *
+ * Modifications:
+ *  29-07-1998 RMK     Major re-work of IDE architecture specific code
+ */
+#include <asm/irq.h>
+#include <asm/arch/hardware.h>
+
+/*
+ * Set up a hw structure for a specified data port, control port and IRQ.
+ * This should follow whatever the default interface uses.
+ */
+static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
+                                      unsigned long ctrl_port, int *irq)
+{
+       unsigned long reg = data_port;
+       int i;
+
+       memset(hw, 0, sizeof(*hw));
+
+       for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
+               hw->io_ports[i] = reg;
+               reg += 1;
+       }
+       if (ctrl_port) {
+               hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+       } else {
+               hw->io_ports[IDE_CONTROL_OFFSET] = data_port + 0x206;
+       }
+       if (irq != NULL)
+               *irq = 0;
+       hw->io_ports[IDE_IRQ_OFFSET] = 0;
+}
+
+/*
+ * This registers the standard ports for this architecture with the IDE
+ * driver.
+ */
+static __inline__ void
+ide_init_default_hwifs(void)
+{
+       hw_regs_t hw;
+
+       ide_init_hwif_ports(&hw, ISASLOT_IO + 0x1f0, ISASLOT_IO + 0x3f6, NULL);
+       hw.irq = IRQ_ISA_14;
+       ide_register_hw(&hw);
+}
diff --git a/include/asm-arm/arch-cl7500/keyboard.h b/include/asm-arm/arch-cl7500/keyboard.h
new file mode 100644 (file)
index 0000000..660b31a
--- /dev/null
@@ -0,0 +1,16 @@
+/*
+ * linux/include/asm-arm/arch-cl7500/keyboard.h
+ *  from linux/include/asm-arm/arch-rpc/keyboard.h
+ *
+ * Keyboard driver definitions for CL7500 architecture
+ *
+ * Copyright (C) 1998-2001 Russell King
+ */
+#include <asm/irq.h>
+#define NR_SCANCODES 128
+
+extern int ps2kbd_init_hw(void);
+
+#define kbd_disable_irq()      disable_irq(IRQ_KEYBOARDRX)
+#define kbd_enable_irq()       enable_irq(IRQ_KEYBOARDRX)
+#define kbd_init_hw()          ps2kbd_init_hw()
diff --git a/include/asm-arm/arch-clps711x/keyboard.h b/include/asm-arm/arch-clps711x/keyboard.h
new file mode 100644 (file)
index 0000000..30ab219
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * linux/include/asm-arm/arch-clps711x/keyboard.h
+ *
+ * Copyright (C) 1998-2001 Russell King
+ */
+#include <asm/mach-types.h>
+
+#define NR_SCANCODES 128
+
+#define kbd_disable_irq()      do { } while (0)
+#define kbd_enable_irq()       do { } while (0)
+
+/*
+ * EDB7211 keyboard driver
+ */
+extern void edb7211_kbd_init_hw(void);
+extern void clps711x_kbd_init_hw(void);
+
+static inline void kbd_init_hw(void)
+{
+       if (machine_is_edb7211())
+               edb7211_kbd_init_hw();
+
+       if (machine_is_autcpu12())
+               clps711x_kbd_init_hw();
+}
diff --git a/include/asm-arm/arch-ebsa110/ide.h b/include/asm-arm/arch-ebsa110/ide.h
new file mode 100644 (file)
index 0000000..35eff5c
--- /dev/null
@@ -0,0 +1 @@
+/* no ide */
diff --git a/include/asm-arm/arch-ebsa285/ide.h b/include/asm-arm/arch-ebsa285/ide.h
new file mode 100644 (file)
index 0000000..09c0310
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ *  linux/include/asm-arm/arch-ebsa285/ide.h
+ *
+ *  Copyright (C) 1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Modifications:
+ *   29-07-1998        RMK     Major re-work of IDE architecture specific code
+ */
+#include <asm/irq.h>
+
+/*
+ * Set up a hw structure for a specified data port, control port and IRQ.
+ * This should follow whatever the default interface uses.
+ */
+static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
+                                      unsigned long ctrl_port, int *irq)
+{
+       unsigned long reg = data_port;
+       int i;
+
+       for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
+               hw->io_ports[i] = reg;
+               reg += 1;
+       }
+       hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+       if (irq)
+               *irq = 0;
+}
+
+/*
+ * This registers the standard ports for this architecture with the IDE
+ * driver.
+ */
+static __inline__ void ide_init_default_hwifs(void)
+{
+#if 0
+       hw_regs_t hw;
+
+       memset(hw, 0, sizeof(*hw));
+
+       ide_init_hwif_ports(&hw, 0x1f0, 0x3f6, NULL);
+       hw.irq = IRQ_HARDDISK;
+       ide_register_hw(&hw);
+#endif
+}
diff --git a/include/asm-arm/arch-iop3xx/ide.h b/include/asm-arm/arch-iop3xx/ide.h
new file mode 100644 (file)
index 0000000..c2b0265
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * include/asm-arm/arch-iop3xx/ide.h
+ *
+ * Generic IDE functions for IOP310 systems
+ *
+ * Author: Deepak Saxena <dsaxena@mvista.com>
+ *
+ * Copyright 2001 MontaVista Software Inc.
+ *
+ * 09/26/2001 - Sharon Baartmans
+ *     Fixed so it actually works.
+ */
+
+#ifndef _ASM_ARCH_IDE_H_
+#define _ASM_ARCH_IDE_H_
+
+/*
+ * Set up a hw structure for a specified data port, control port and IRQ.
+ * This should follow whatever the default interface uses.
+ */
+static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
+                                      unsigned long ctrl_port, int *irq)
+{
+       unsigned long reg = data_port;
+       int i;
+       int regincr = 1;
+
+       memset(hw, 0, sizeof(*hw));
+
+       for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
+               hw->io_ports[i] = reg;
+               reg += regincr;
+       }
+
+       hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+
+       if (irq) *irq = 0;
+}
+
+/*
+ * This registers the standard ports for this architecture with the IDE
+ * driver.
+ */
+static __inline__ void ide_init_default_hwifs(void)
+{
+       /* There are no standard ports */
+}
+
+#endif
diff --git a/include/asm-arm/arch-l7200/ide.h b/include/asm-arm/arch-l7200/ide.h
new file mode 100644 (file)
index 0000000..62ee12a
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * linux/include/asm-arm/arch-l7200/ide.h
+ *
+ * Copyright (c) 2000 Steve Hill (sjhill@cotw.com)
+ *
+ * Changelog:
+ *  03-29-2000 SJH     Created file placeholder
+ */
+#include <asm/irq.h>
+
+/*
+ * Set up a hw structure for a specified data port, control port and IRQ.
+ * This should follow whatever the default interface uses.
+ */
+static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
+                                      unsigned long ctrl_port, int *irq)
+{
+}
+
+/*
+ * This registers the standard ports for this architecture with the IDE
+ * driver.
+ */
+static __inline__ void
+ide_init_default_hwifs(void)
+{
+}
diff --git a/include/asm-arm/arch-l7200/keyboard.h b/include/asm-arm/arch-l7200/keyboard.h
new file mode 100644 (file)
index 0000000..6628bd3
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ *  linux/include/asm-arm/arch-l7200/keyboard.h
+ *
+ *  Keyboard driver definitions for LinkUp Systems L7200 architecture
+ *
+ *  Copyright (C) 2000 Scott A McConnell (samcconn@cotw.com)
+ *                     Steve Hill (sjhill@cotw.com)
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License. See the file COPYING in the main directory of this archive for
+ *  more details.
+ *
+ * Changelog:
+ *   07-18-2000        SAM     Created file
+ *   07-28-2000        SJH     Complete rewrite
+ */
+
+#include <asm/irq.h>
+
+#error This needs fixing --rmk
+
+/*
+ * Layout of L7200 keyboard registers
+ */
+struct KBD_Port {       
+       unsigned int KBDR;
+       unsigned int KBDMR;
+       unsigned int KBSBSR;
+       unsigned int Reserved;
+       unsigned int KBKSR;
+};
+
+#define KBD_BASE        IO_BASE_2 + 0x4000
+#define l7200kbd_hwregs ((volatile struct KBD_Port *) (KBD_BASE))
+
+extern void l7200kbd_init_hw(void);
+extern int l7200kbd_translate(unsigned char scancode, unsigned char *keycode,
+                             char raw_mode);
+
+#define kbd_setkeycode(sc,kc)          (-EINVAL)
+#define kbd_getkeycode(sc)             (-EINVAL)
+
+#define kbd_translate(sc, kcp, rm)      ({ *(kcp) = (sc); 1; })
+#define kbd_unexpected_up(kc)           (0200)
+#define kbd_leds(leds)                  do {} while (0)
+#define kbd_init_hw()                   l7200kbd_init_hw()
+#define kbd_sysrq_xlate                 ((unsigned char *)NULL)
+#define kbd_disable_irq()               disable_irq(IRQ_GCTC2)
+#define kbd_enable_irq()                enable_irq(IRQ_GCTC2)
+
+#define SYSRQ_KEY      13
diff --git a/include/asm-arm/arch-nexuspci/ide.h b/include/asm-arm/arch-nexuspci/ide.h
new file mode 100644 (file)
index 0000000..5514808
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * linux/include/asm-arm/arch-nexuspci/ide.h
+ *
+ * Copyright (c) 1998 Russell King
+ *
+ * Modifications:
+ *  29-07-1998 RMK     Major re-work of IDE architecture specific code
+ */
+#include <asm/irq.h>
+
+/*
+ * Set up a hw structure for a specified data port, control port and IRQ.
+ * This should follow whatever the default interface uses.
+ */
+static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
+                                      unsigned long ctrl_port, int *irq)
+{
+       unsigned long reg = data_port;
+       int i;
+
+       for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
+               hw->io_ports[i] = reg;
+               reg += 1;
+       }
+       hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+       if (irq)
+               *irq = 0;
+}
+
+/*
+ * This registers the standard ports for this architecture with the IDE
+ * driver.
+ */
+static __inline__ void ide_init_default_hwifs(void)
+{
+       /* There are no standard ports */
+}
diff --git a/include/asm-arm/arch-pxa/ide.h b/include/asm-arm/arch-pxa/ide.h
new file mode 100644 (file)
index 0000000..a9efdce
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * linux/include/asm-arm/arch-pxa/ide.h
+ *
+ * Author:     George Davis
+ * Created:    Jan 10, 2002
+ * Copyright:  MontaVista Software Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *
+ * Originally based upon linux/include/asm-arm/arch-sa1100/ide.h
+ *
+ */
+
+#include <asm/irq.h>
+#include <asm/hardware.h>
+#include <asm/mach-types.h>
+
+
+/*
+ * Set up a hw structure for a specified data port, control port and IRQ.
+ * This should follow whatever the default interface uses.
+ */
+static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
+                                      unsigned long ctrl_port, int *irq)
+{
+       unsigned long reg = data_port;
+       int i;
+       int regincr = 1;
+
+       memset(hw, 0, sizeof(*hw));
+
+       for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
+               hw->io_ports[i] = reg;
+               reg += regincr;
+       }
+
+       hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+
+       if (irq)
+               *irq = 0;
+}
+
+
+/*
+ * Register the standard ports for this architecture with the IDE driver.
+ */
+static __inline__ void
+ide_init_default_hwifs(void)
+{
+       /* Nothing to declare... */
+}
diff --git a/include/asm-arm/arch-pxa/keyboard.h b/include/asm-arm/arch-pxa/keyboard.h
new file mode 100644 (file)
index 0000000..7bec317
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ *  linux/include/asm-arm/arch-pxa/keyboard.h
+ *
+ *  This file contains the architecture specific keyboard definitions
+ */
+
+#ifndef _PXA_KEYBOARD_H
+#define _PXA_KEYBOARD_H
+
+#include <asm/mach-types.h>
+#include <asm/hardware.h>
+
+extern struct kbd_ops_struct *kbd_ops;
+
+#define kbd_disable_irq()      do { } while(0);
+#define kbd_enable_irq()       do { } while(0);
+
+extern int sa1111_kbd_init_hw(void);
+
+static inline void kbd_init_hw(void)
+{
+       if (machine_is_lubbock())
+               sa1111_kbd_init_hw();
+}
+
+
+#endif  /* _PXA_KEYBOARD_H */
+
diff --git a/include/asm-arm/arch-rpc/ide.h b/include/asm-arm/arch-rpc/ide.h
new file mode 100644 (file)
index 0000000..92c7030
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ *  linux/include/asm-arm/arch-rpc/ide.h
+ *
+ *  Copyright (C) 1997 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Modifications:
+ *   29-07-1998        RMK     Major re-work of IDE architecture specific code
+ */
+#include <asm/irq.h>
+
+/*
+ * Set up a hw structure for a specified data port, control port and IRQ.
+ * This should follow whatever the default interface uses.
+ */
+static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
+                                      unsigned long ctrl_port, int *irq)
+{
+       unsigned long reg = data_port;
+       int i;
+
+       memset(hw, 0, sizeof(*hw));
+
+       for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
+               hw->io_ports[i] = reg;
+               reg += 1;
+       }
+       hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+       if (irq)
+               *irq = 0;
+}
+
+/*
+ * This registers the standard ports for this architecture with the IDE
+ * driver.
+ */
+static __inline__ void
+ide_init_default_hwifs(void)
+{
+       hw_regs_t hw;
+
+       ide_init_hwif_ports(&hw, 0x1f0, 0x3f6, NULL);
+       hw.irq = IRQ_HARDDISK;
+       ide_register_hw(&hw, NULL);
+}
diff --git a/include/asm-arm/arch-s3c2410/ide.h b/include/asm-arm/arch-s3c2410/ide.h
new file mode 100644 (file)
index 0000000..de651e7
--- /dev/null
@@ -0,0 +1,49 @@
+/* linux/include/asm-arm/arch-s3c2410/ide.h
+ *
+ *  Copyright (C) 1997 Russell King
+ *  Copyright (C) 2003 Simtec Electronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Modifications:
+ *   29-07-1998        RMK     Major re-work of IDE architecture specific code
+ *   16-05-2003 BJD    Changed to work with BAST IDE ports
+ *   04-09-2003 BJD    Modifications for V2.6
+ */
+
+#ifndef __ASM_ARCH_IDE_H
+#define __ASM_ARCH_IDE_H
+
+#include <asm/irq.h>
+
+/*
+ * Set up a hw structure for a specified data port, control port and IRQ.
+ * This should follow whatever the default interface uses.
+ */
+
+static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
+                                      unsigned long ctrl_port, int *irq)
+{
+       unsigned long reg = data_port;
+       int i;
+
+       memset(hw, 0, sizeof(*hw));
+
+       for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
+               hw->io_ports[i] = reg;
+               reg += 1;
+       }
+       hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+       if (irq)
+               *irq = 0;
+}
+
+/* we initialise our ide devices from the main ide core, due to problems
+ * with doing it in this function
+*/
+
+#define ide_init_default_hwifs() do { } while(0)
+
+#endif /* __ASM_ARCH_IDE_H */
diff --git a/include/asm-arm/arch-sa1100/keyboard.h b/include/asm-arm/arch-sa1100/keyboard.h
new file mode 100644 (file)
index 0000000..3dacd71
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ *  linux/include/asm-arm/arch-sa1100/keyboard.h
+ *  Created 16 Dec 1999 by Nicolas Pitre <nico@cam.org>
+ *  This file contains the SA1100 architecture specific keyboard definitions
+ */
+#ifndef _SA1100_KEYBOARD_H
+#define _SA1100_KEYBOARD_H
+
+#include <linux/config.h>
+#include <asm/mach-types.h>
+
+extern void gc_kbd_init_hw(void);
+extern void smartio_kbd_init_hw(void);
+
+static inline void kbd_init_hw(void)
+{
+       if (machine_is_graphicsclient())
+               gc_kbd_init_hw();
+       if (machine_is_adsbitsy())
+               smartio_kbd_init_hw();
+}
+
+#endif  /* _SA1100_KEYBOARD_H */
diff --git a/include/asm-arm/arch-shark/ide.h b/include/asm-arm/arch-shark/ide.h
new file mode 100644 (file)
index 0000000..f6a99b2
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * linux/include/asm-arm/arch-shark/ide.h
+ *
+ * by Alexander Schulz
+ *
+ * derived from:
+ * linux/include/asm-arm/arch-ebsa285/ide.h
+ * Copyright (c) 1998 Russell King
+ */
+
+#include <asm/irq.h>
+
+/*
+ * Set up a hw structure for a specified data port, control port and IRQ.
+ * This should follow whatever the default interface uses.
+ */
+static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
+                                      unsigned long ctrl_port, int *irq)
+{
+       unsigned long reg = data_port;
+       int i;
+
+       memset(hw, 0, sizeof(*hw));
+
+       for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
+               hw->io_ports[i] = reg;
+               reg += 1;
+       }
+       hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+       if (irq)
+               *irq = 0;
+}
+
+/*
+ * This registers the standard ports for this architecture with the IDE
+ * driver.
+ */
+static __inline__ void
+ide_init_default_hwifs(void)
+{
+       hw_regs_t hw;
+
+       ide_init_hwif_ports(&hw, 0x1f0, 0x3f6, NULL);
+       hw.irq = 14;
+       ide_register_hw(&hw,NULL);
+}
+
diff --git a/include/asm-arm/arch-shark/keyboard.h b/include/asm-arm/arch-shark/keyboard.h
new file mode 100644 (file)
index 0000000..52b5ed6
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * linux/include/asm-arm/arch-shark/keyboard.h
+ * by Alexander Schulz
+ * 
+ * Derived from linux/include/asm-arm/arch-ebsa285/keyboard.h
+ * (C) 1998 Russell King
+ * (C) 1998 Phil Blundell
+ */
+#include <linux/config.h>
+#include <linux/ioport.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/system.h>
+
+#define KEYBOARD_IRQ                   IRQ_ISA_KEYBOARD
+#define NR_SCANCODES                   128
+
+#define kbd_disable_irq()              do { } while (0)
+#define kbd_enable_irq()               do { } while (0)
+
+extern int pckbd_setkeycode(unsigned int scancode, unsigned int keycode);
+extern int pckbd_getkeycode(unsigned int scancode);
+extern int pckbd_translate(unsigned char scancode, unsigned char *keycode,
+                          char raw_mode);
+extern char pckbd_unexpected_up(unsigned char keycode);
+extern void pckbd_leds(unsigned char leds);
+extern void pckbd_init_hw(void);
+extern unsigned char pckbd_sysrq_xlate[128];
+
+static inline void kbd_init_hw(void)
+{
+               k_setkeycode    = pckbd_setkeycode;
+               k_getkeycode    = pckbd_getkeycode;
+               k_translate     = pckbd_translate;
+               k_unexpected_up = pckbd_unexpected_up;
+               k_leds          = pckbd_leds;
+#ifdef CONFIG_MAGIC_SYSRQ
+               k_sysrq_key     = 0x54;
+               k_sysrq_xlate   = pckbd_sysrq_xlate;
+#endif
+               pckbd_init_hw();
+}
+
+/*
+ * PC Keyboard specifics
+ */
+
+/* resource allocation */
+#define kbd_request_region() request_region(0x60, 16, "keyboard")
+#define kbd_request_irq(handler) request_irq(KEYBOARD_IRQ, handler, 0, \
+                                             "keyboard", NULL)
+
+/* How to access the keyboard macros on this platform.  */
+#define kbd_read_input() inb(KBD_DATA_REG)
+#define kbd_read_status() inb(KBD_STATUS_REG)
+#define kbd_write_output(val) outb(val, KBD_DATA_REG)
+#define kbd_write_command(val) outb(val, KBD_CNTL_REG)
+
+/* Some stoneage hardware needs delays after some operations.  */
+#define kbd_pause() do { } while(0)
+
+/*
+ * Machine specific bits for the PS/2 driver
+ */
+#define aux_request_irq(hand, dev_id)                                  \
+       request_irq(AUX_IRQ, hand, SA_SHIRQ, "PS/2 Mouse", dev_id)
+
+#define aux_free_irq(dev_id) free_irq(AUX_IRQ, dev_id)
diff --git a/include/asm-arm/arch-tbox/ide.h b/include/asm-arm/arch-tbox/ide.h
new file mode 100644 (file)
index 0000000..d66e67c
--- /dev/null
@@ -0,0 +1,3 @@
+/*
+ * linux/include/asm-arm/arch-tbox/ide.h
+ */
diff --git a/include/asm-arm/rmap.h b/include/asm-arm/rmap.h
new file mode 100644 (file)
index 0000000..bb9ee93
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _ARM_RMAP_H
+#define _ARM_RMAP_H
+
+#include <asm-generic/rmap.h>
+
+#endif /* _ARM_RMAP_H */
diff --git a/include/asm-arm26/rmap.h b/include/asm-arm26/rmap.h
new file mode 100644 (file)
index 0000000..6d5b6e0
--- /dev/null
@@ -0,0 +1,66 @@
+#ifndef _ARM_RMAP_H
+#define _ARM_RMAP_H
+
+/*
+ * linux/include/asm-arm26/proc-armv/rmap.h
+ *
+ * Architecture dependant parts of the reverse mapping code,
+ *
+ * ARM is different since hardware page tables are smaller than
+ * the page size and Linux uses a "duplicate" one with extra info.
+ * For rmap this means that the first 2 kB of a page are the hardware
+ * page tables and the last 2 kB are the software page tables.
+ */
+
+static inline void pgtable_add_rmap(struct page *page, struct mm_struct * mm, unsigned long address)
+{
+        page->mapping = (void *)mm;
+        page->index = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1);
+        inc_page_state(nr_page_table_pages);
+}
+
+static inline void pgtable_remove_rmap(struct page *page)
+{
+        page->mapping = NULL;
+        page->index = 0;
+        dec_page_state(nr_page_table_pages);
+}
+
+static inline struct mm_struct * ptep_to_mm(pte_t * ptep)
+{
+       struct page * page = virt_to_page(ptep);
+        return (struct mm_struct *)page->mapping;
+}
+
+/* The page table takes half of the page */
+#define PTE_MASK  ((PAGE_SIZE / 2) - 1)
+
+static inline unsigned long ptep_to_address(pte_t * ptep)
+{
+        struct page * page = virt_to_page(ptep);
+        unsigned long low_bits;
+
+        low_bits = ((unsigned long)ptep & PTE_MASK) * PTRS_PER_PTE;
+        return page->index + low_bits;
+}
+//FIXME!!! IS these correct?
+static inline pte_addr_t ptep_to_paddr(pte_t *ptep)
+{
+        return (pte_addr_t)ptep;
+}
+
+static inline pte_t *rmap_ptep_map(pte_addr_t pte_paddr)
+{
+        return (pte_t *)pte_paddr;
+}
+
+static inline void rmap_ptep_unmap(pte_t *pte)
+{
+        return;
+}
+
+
+//#include <asm-generic/rmap.h>
+
+#endif /* _ARM_RMAP_H */
diff --git a/include/asm-cris/rmap.h b/include/asm-cris/rmap.h
new file mode 100644 (file)
index 0000000..c5bf2a8
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _CRIS_RMAP_H
+#define _CRIS_RMAP_H
+
+/* nothing to see, move along :) */
+#include <asm-generic/rmap.h>
+
+#endif
diff --git a/include/asm-generic/rmap.h b/include/asm-generic/rmap.h
new file mode 100644 (file)
index 0000000..f743d9f
--- /dev/null
@@ -0,0 +1,91 @@
+#ifndef _GENERIC_RMAP_H
+#define _GENERIC_RMAP_H
+/*
+ * linux/include/asm-generic/rmap.h
+ *
+ * Architecture dependent parts of the reverse mapping code,
+ * this version should work for most architectures with a
+ * 'normal' page table layout.
+ *
+ * We use the struct page of the page table page to find out
+ * the process and full address of a page table entry:
+ * - page->mapping points to the process' mm_struct
+ * - page->index has the high bits of the address
+ * - the lower bits of the address are calculated from the
+ *   offset of the page table entry within the page table page
+ *
+ * For CONFIG_HIGHPTE, we need to represent the address of a pte in a
+ * scalar pte_addr_t.  The pfn of the pte's page is shifted left by PAGE_SIZE
+ * bits and is then ORed with the byte offset of the pte within its page.
+ *
+ * For CONFIG_HIGHMEM4G, the pte_addr_t is 32 bits.  20 for the pfn, 12 for
+ * the offset.
+ *
+ * For CONFIG_HIGHMEM64G, the pte_addr_t is 64 bits.  52 for the pfn, 12 for
+ * the offset.
+ */
+#include <linux/mm.h>
+
+static inline void pgtable_add_rmap(struct page * page, struct mm_struct * mm, unsigned long address)
+{
+#ifdef BROKEN_PPC_PTE_ALLOC_ONE
+       /* OK, so PPC calls pte_alloc() before mem_map[] is setup ... ;( */
+       extern int mem_init_done;
+
+       if (!mem_init_done)
+               return;
+#endif
+       page->mapping = (void *)mm;
+       page->index = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1);
+       inc_page_state(nr_page_table_pages);
+}
+
+static inline void pgtable_remove_rmap(struct page * page)
+{
+       page->mapping = NULL;
+       page->index = 0;
+       dec_page_state(nr_page_table_pages);
+}
+
+static inline struct mm_struct * ptep_to_mm(pte_t * ptep)
+{
+       struct page * page = kmap_atomic_to_page(ptep);
+       return (struct mm_struct *) page->mapping;
+}
+
+static inline unsigned long ptep_to_address(pte_t * ptep)
+{
+       struct page * page = kmap_atomic_to_page(ptep);
+       unsigned long low_bits;
+       low_bits = ((unsigned long)ptep & (PTRS_PER_PTE*sizeof(pte_t) - 1))
+                       * (PAGE_SIZE/sizeof(pte_t));
+       return page->index + low_bits;
+}
+
+#ifdef CONFIG_HIGHPTE
+static inline pte_addr_t ptep_to_paddr(pte_t *ptep)
+{
+       pte_addr_t paddr;
+       paddr = ((pte_addr_t)page_to_pfn(kmap_atomic_to_page(ptep))) << PAGE_SHIFT;
+       return paddr + (pte_addr_t)((unsigned long)ptep & ~PAGE_MASK);
+}
+#else
+static inline pte_addr_t ptep_to_paddr(pte_t *ptep)
+{
+       return (pte_addr_t)ptep;
+}
+#endif
+
+#ifndef CONFIG_HIGHPTE
+static inline pte_t *rmap_ptep_map(pte_addr_t pte_paddr)
+{
+       return (pte_t *)pte_paddr;
+}
+
+static inline void rmap_ptep_unmap(pte_t *pte)
+{
+       return;
+}
+#endif
+
+#endif /* _GENERIC_RMAP_H */
diff --git a/include/asm-h8300/aki3068net/machine-depend.h b/include/asm-h8300/aki3068net/machine-depend.h
new file mode 100644 (file)
index 0000000..510b86b
--- /dev/null
@@ -0,0 +1,29 @@
+/* AE-3068 board depend header */
+
+/* TIMER rate define */
+#ifdef H8300_TIMER_DEFINE
+#define H8300_TIMER_COUNT_DATA 20000*10/8192
+#define H8300_TIMER_FREQ 20000*1000/8192
+#endif
+
+/* AE-3068 RTL8019AS Config */
+#ifdef H8300_NE_DEFINE
+
+#define NE2000_ADDR            0x200000
+#define NE2000_IRQ              5
+#define        NE2000_BYTE             volatile unsigned short
+
+#define WCRL                    0xfee023
+#define MAR0A                   0xffff20
+#define ETCR0A                  0xffff24
+#define DTCR0A                  0xffff27
+#define MAR0B                   0xffff28
+#define DTCR0B                  0xffff2f
+
+#define H8300_INIT_NE()                  \
+do {                                     \
+       wordlength = 1;                  \
+        outb_p(0x48, ioaddr + EN0_DCFG); \
+} while(0)
+
+#endif
diff --git a/include/asm-h8300/edosk2674/machine-depend.h b/include/asm-h8300/edosk2674/machine-depend.h
new file mode 100644 (file)
index 0000000..1e98b40
--- /dev/null
@@ -0,0 +1,70 @@
+/* EDOSK2674 board depend header */
+
+/* TIMER rate define */
+#ifdef H8300_TIMER_DEFINE
+#define H8300_TIMER_COUNT_DATA 33000*10/8192
+#define H8300_TIMER_FREQ 33000*1000/8192
+#endif
+
+/* EDOSK-2674R SMSC Network Controler Target Depend impliments */
+#ifdef H8300_SMSC_DEFINE
+
+#define SMSC_BASE 0xf80000
+#define SMSC_IRQ 16
+
+/* sorry quick hack */
+#if defined(outw)
+# undef outw
+#endif
+#define outw(d,a) edosk2674_smsc_outw(d,(volatile unsigned short *)(a))
+#if defined(inw)
+# undef inw
+#endif
+#define inw(a) edosk2674_smsc_inw((volatile unsigned short *)(a))
+#if defined(outsw)
+# undef outsw
+#endif
+#define outsw(a,p,l) edosk2674_smsc_outsw((volatile unsigned short *)(a),p,l)
+#if defined(insw)
+# undef insw
+#endif
+#define insw(a,p,l) edosk2674_smsc_insw((volatile unsigned short *)(a),p,l)
+
+static inline void edosk2674_smsc_outw(
+       unsigned short d,
+       volatile unsigned short *a
+       )
+{
+       *a = (d >> 8) | (d << 8);
+}
+
+static inline unsigned short edosk2674_smsc_inw(
+       volatile unsigned short *a
+       )
+{
+       unsigned short d;
+       d = *a;
+       return (d >> 8) | (d << 8);
+}
+
+static inline void edosk2674_smsc_outsw(
+       volatile unsigned short *a,
+       unsigned short *p,
+       unsigned long l
+       )
+{
+       for (; l != 0; --l, p++)
+               *a = *p;
+}
+
+static inline void edosk2674_smsc_insw(
+       volatile unsigned short *a,
+       unsigned short *p,
+       unsigned long l
+       )
+{
+       for (; l != 0; --l, p++)
+               *p = *a;
+}
+
+#endif
diff --git a/include/asm-h8300/generic/machine-depend.h b/include/asm-h8300/generic/machine-depend.h
new file mode 100644 (file)
index 0000000..2d78096
--- /dev/null
@@ -0,0 +1,17 @@
+/* machine depend header */
+
+/* TIMER rate define */
+#ifdef H8300_TIMER_DEFINE
+#include <linux/config.h>
+#if defined(CONFIG_H83007) || defined(CONFIG_H83068) || defined(CONFIG_H8S2678)
+#define H8300_TIMER_COUNT_DATA CONFIG_CPU_CLOCK*10/8192
+#define H8300_TIMER_FREQ CONFIG_CPU_CLOCK*1000/8192
+#endif
+
+#if defined(CONFIG_H8_3002) || defined(CONFIG_H83048)
+#define H8300_TIMER_COUNT_DATA  CONFIG_CPU_CLOCK*10/8
+#define H8300_TIMER_FREQ CONFIG_CPU_CLOCK*1000/8
+#endif
+
+#endif
+
diff --git a/include/asm-h8300/generic/timer_rate.h b/include/asm-h8300/generic/timer_rate.h
new file mode 100644 (file)
index 0000000..0f6f419
--- /dev/null
@@ -0,0 +1,15 @@
+#include <linux/config.h>
+
+#if defined(CONFIG_H83007) || defined(CONFIG_H83068) || defined(CONFIG_H8S2678)
+#define H8300_TIMER_COUNT_DATA CONFIG_CPU_CLOCK*10/8192
+#define H8300_TIMER_FREQ CONFIG_CPU_CLOCK*1000/8192
+#endif
+
+#if defined(H8_3002) || defined(CONFIG_H83048)
+#define H8300_TIMER_COUNT_DATA  CONFIG_CPU_CLOCK*10/8
+#define H8300_TIMER_FREQ CONFIG_CPU_CLOCK*1000/8
+#endif
+
+#if !defined(H8300_TIMER_COUNT_DATA)
+#error illigal configuration
+#endif
diff --git a/include/asm-h8300/h8300_smsc.h b/include/asm-h8300/h8300_smsc.h
new file mode 100644 (file)
index 0000000..f8fa7f9
--- /dev/null
@@ -0,0 +1,20 @@
+/****************************************************************************/
+
+/*
+ *     h8300_smsc.h -- SMSC in H8/300H and H8S Evalution Board.
+ *      
+ *     (C) Copyright 2003, Yoshinori Sato <ysato@users.sourceforge.jp>
+ */
+
+/****************************************************************************/
+#ifndef        h8300smsc_h
+#define        h8300smsc_h
+/****************************************************************************/
+
+/* Such a description is OK ? */
+#define H8300_SMSC_DEFINE
+#include <asm/machine-depend.h>
+#undef  H8300_SMSC_DEFINE
+
+/****************************************************************************/
+#endif /* h8300smsc_h */
diff --git a/include/asm-h8300/h8max/machine-depend.h b/include/asm-h8300/h8max/machine-depend.h
new file mode 100644 (file)
index 0000000..e87d22e
--- /dev/null
@@ -0,0 +1,100 @@
+/* H8MAX board depend header */
+
+/* TIMER rate define */
+#ifdef H8300_TIMER_DEFINE
+#define H8300_TIMER_COUNT_DATA 25000*10/8192
+#define H8300_TIMER_FREQ 25000*1000/8192
+#endif
+
+/* H8MAX RTL8019AS Config */
+#ifdef H8300_NE_DEFINE
+
+#define NE2000_ADDR            0x800600
+#define NE2000_IRQ              4
+#define NE2000_IRQ_VECTOR      (12 + NE2000_IRQ)
+#define        NE2000_BYTE             volatile unsigned short
+
+/* sorry quick hack */
+#if defined(outb)
+# undef outb
+#endif
+#define outb(d,a)               h8max_outb((d),(a) - NE2000_ADDR)
+#if defined(inb)
+# undef inb
+#endif
+#define inb(a)                  h8max_inb((a) - NE2000_ADDR)
+#if defined(outb_p)
+# undef outb_p
+#endif
+#define outb_p(d,a)             h8max_outb((d),(a) - NE2000_ADDR)
+#if defined(inb_p)
+# undef inb_p
+#endif
+#define inb_p(a)                h8max_inb((a) - NE2000_ADDR)
+#if defined(outsw)
+# undef outsw
+#endif
+#define outsw(a,p,l)            h8max_outsw((a) - NE2000_ADDR,(unsigned short *)p,l)
+#if defined(insw)
+# undef insw
+#endif
+#define insw(a,p,l)             h8max_insw((a) - NE2000_ADDR,(unsigned short *)p,l)
+#if defined(outsb)
+# undef outsb
+#endif
+#define outsb(a,p,l)            h8max_outsb((a) - NE2000_ADDR,(unsigned char *)p,l)
+#if defined(insb)
+# undef insb
+#endif
+#define insb(a,p,l)             h8max_insb((a) - NE2000_ADDR,(unsigned char *)p,l)
+
+#define H8300_INIT_NE()                  \
+do {                                     \
+       wordlength = 2;                  \
+       h8max_outb(0x49, ioaddr + EN0_DCFG); \
+       SA_prom[14] = SA_prom[15] = 0x57;\
+} while(0)
+
+static inline void h8max_outb(unsigned char d,unsigned char a)
+{
+       *(unsigned short *)(NE2000_ADDR + (a << 1)) = d;
+}
+
+static inline unsigned char h8max_inb(unsigned char a)
+{
+       return *(unsigned char *)(NE2000_ADDR + (a << 1) +1);
+}
+
+static inline void h8max_outsw(unsigned char a,unsigned short *p,unsigned long l)
+{
+       unsigned short d;
+       for (; l != 0; --l, p++) {
+               d = (((*p) >> 8) & 0xff) | ((*p) << 8);
+               *(unsigned short *)(NE2000_ADDR + (a << 1)) = d;
+       }
+}
+
+static inline void h8max_insw(unsigned char a,unsigned short *p,unsigned long l)
+{
+       unsigned short d;
+       for (; l != 0; --l, p++) {
+               d = *(unsigned short *)(NE2000_ADDR + (a << 1));
+               *p = (d << 8)|((d >> 8) & 0xff);
+       }
+}
+
+static inline void h8max_outsb(unsigned char a,unsigned char *p,unsigned long l)
+{
+       for (; l != 0; --l, p++) {
+               *(unsigned short *)(NE2000_ADDR + (a << 1)) = *p;
+       }
+}
+
+static inline void h8max_insb(unsigned char a,unsigned char *p,unsigned long l)
+{
+       for (; l != 0; --l, p++) {
+               *p = *((unsigned char *)(NE2000_ADDR + (a << 1))+1);
+       }
+}
+
+#endif
diff --git a/include/asm-i386/rmap.h b/include/asm-i386/rmap.h
new file mode 100644 (file)
index 0000000..353afee
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef _I386_RMAP_H
+#define _I386_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#ifdef CONFIG_HIGHPTE
+static inline pte_t *rmap_ptep_map(pte_addr_t pte_paddr)
+{
+       unsigned long pfn = (unsigned long)(pte_paddr >> PAGE_SHIFT);
+       unsigned long off = ((unsigned long)pte_paddr) & ~PAGE_MASK;
+       return (pte_t *)((char *)kmap_atomic(pfn_to_page(pfn), KM_PTE2) + off);
+}
+
+static inline void rmap_ptep_unmap(pte_t *pte)
+{
+       kunmap_atomic(pte, KM_PTE2);
+}
+#endif
+
+#endif
diff --git a/include/asm-ia64/rmap.h b/include/asm-ia64/rmap.h
new file mode 100644 (file)
index 0000000..179c565
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _ASM_IA64_RMAP_H
+#define _ASM_IA64_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif /* _ASM_IA64_RMAP_H */
diff --git a/include/asm-m68k/rmap.h b/include/asm-m68k/rmap.h
new file mode 100644 (file)
index 0000000..85119e4
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _M68K_RMAP_H
+#define _M68K_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
diff --git a/include/asm-m68knommu/rmap.h b/include/asm-m68knommu/rmap.h
new file mode 100644 (file)
index 0000000..b3664cc
--- /dev/null
@@ -0,0 +1,2 @@
+/* Do not need anything here */
+
diff --git a/include/asm-mips/rmap.h b/include/asm-mips/rmap.h
new file mode 100644 (file)
index 0000000..c9efd7b
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef __ASM_RMAP_H
+#define __ASM_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif /* __ASM_RMAP_H */
diff --git a/include/asm-parisc/rmap.h b/include/asm-parisc/rmap.h
new file mode 100644 (file)
index 0000000..4ea8eb4
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _PARISC_RMAP_H
+#define _PARISC_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
diff --git a/include/asm-ppc/rmap.h b/include/asm-ppc/rmap.h
new file mode 100644 (file)
index 0000000..50556b5
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _PPC_RMAP_H
+#define _PPC_RMAP_H
+
+/* PPC calls pte_alloc() before mem_map[] is setup ... */
+#define BROKEN_PPC_PTE_ALLOC_ONE
+
+#include <asm-generic/rmap.h>
+
+#endif
diff --git a/include/asm-ppc64/rmap.h b/include/asm-ppc64/rmap.h
new file mode 100644 (file)
index 0000000..cf58a01
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _PPC64_RMAP_H
+#define _PPC64_RMAP_H
+
+/* PPC64 calls pte_alloc() before mem_map[] is setup ... */
+#define BROKEN_PPC_PTE_ALLOC_ONE
+
+#include <asm-generic/rmap.h>
+
+#endif
diff --git a/include/asm-s390/rmap.h b/include/asm-s390/rmap.h
new file mode 100644 (file)
index 0000000..43d6a87
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _S390_RMAP_H
+#define _S390_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
diff --git a/include/asm-sh/rmap.h b/include/asm-sh/rmap.h
new file mode 100644 (file)
index 0000000..31db8cc
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _SH_RMAP_H
+#define _SH_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
diff --git a/include/asm-sparc/rmap.h b/include/asm-sparc/rmap.h
new file mode 100644 (file)
index 0000000..06063cf
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _SPARC_RMAP_H
+#define _SPARC_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
diff --git a/include/asm-sparc64/rmap.h b/include/asm-sparc64/rmap.h
new file mode 100644 (file)
index 0000000..681849b
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _SPARC64_RMAP_H
+#define _SPARC64_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
diff --git a/include/asm-um/rmap.h b/include/asm-um/rmap.h
new file mode 100644 (file)
index 0000000..a244d48
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef __UM_RMAP_H
+#define __UM_RMAP_H
+
+#include "asm/arch/rmap.h"
+
+#endif
diff --git a/include/asm-v850/rmap.h b/include/asm-v850/rmap.h
new file mode 100644 (file)
index 0000000..c0ebee6
--- /dev/null
@@ -0,0 +1 @@
+/* Do not need anything here */
diff --git a/include/asm-x86_64/rmap.h b/include/asm-x86_64/rmap.h
new file mode 100644 (file)
index 0000000..24c1783
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _X8664_RMAP_H
+#define _X8664_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
diff --git a/include/linux/ninline.h b/include/linux/ninline.h
new file mode 100644 (file)
index 0000000..d3f7525
--- /dev/null
@@ -0,0 +1,151 @@
+#ifndef _NX_INLINE_H
+#define _NX_INLINE_H
+
+
+// #define NX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include "vserver/network.h"
+
+#if defined(NX_DEBUG)
+#define nxdprintk(x...) printk("nxd: " x)
+#else
+#define nxdprintk(x...)
+#endif
+
+
+void free_nx_info(struct nx_info *);
+
+extern int proc_pid_nx_info(struct task_struct *, char *);
+
+
+#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct nx_info *__get_nx_info(struct nx_info *nxi, const char *_file, int _line)
+{
+       if (!nxi)
+               return NULL;
+       nxdprintk("get_nx_info(%p[%d.%d])\t%s:%d\n",
+               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0,
+               _file, _line);
+       atomic_inc(&nxi->nx_refcount);
+       return nxi;
+}
+
+#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__)
+
+static __inline__ void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
+{
+       if (!nxi)
+               return;
+       nxdprintk("put_nx_info(%p[%d.%d])\t%s:%d\n",
+               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0,
+               _file, _line);
+       if (atomic_dec_and_lock(&nxi->nx_refcount, &nxlist_lock)) {
+               list_del(&nxi->nx_list);
+               spin_unlock(&nxlist_lock);
+               free_nx_info(nxi);
+       }
+}
+
+
+#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__)
+
+static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
+       const char *_file, int _line)
+{
+       BUG_ON(*nxp);
+       if (!nxi)
+               return;
+       nxdprintk("set_nx_info(%p[#%d.%d])\t%s:%d\n",
+               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0,
+               _file, _line);
+       *nxp = __get_nx_info(nxi, _file, _line);
+}
+
+#define        clr_nx_info(p)  __clr_nx_info(p,__FILE__,__LINE__)
+
+static inline void __clr_nx_info(struct nx_info **nxp,
+       const char *_file, int _line)
+{
+       struct nx_info *nxo = *nxp;
+
+       if (!nxo)
+               return;
+       nxdprintk("clr_nx_info(%p[#%d.%d])\t%s:%d\n",
+               nxo, nxo?nxo->nx_id:0, nxo?atomic_read(&nxo->nx_refcount):0,
+               _file, _line);
+       *nxp = NULL;
+       wmb();
+       __put_nx_info(nxo, _file, _line);
+}
+
+
+#define task_get_nx_info(i)    __task_get_nx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
+       const char *_file, int _line)
+{
+       struct nx_info *nxi;
+       
+       task_lock(p);
+       nxi = __get_nx_info(p->nx_info, _file, _line);
+       task_unlock(p);
+       return nxi;
+}
+
+#define nx_verify_info(p,i)    \
+       __nx_verify_info((p)->nx_info,i,__FILE__,__LINE__)
+
+static __inline__ void __nx_verify_info(
+       struct nx_info *ipa, struct nx_info *ipb,
+       const char *_file, int _line)
+{
+       if (ipa == ipb)
+               return;
+       printk(KERN_ERR "ip bad assumption (%p==%p) at %s:%d\n",
+               ipa, ipb, _file, _line);
+}
+
+
+#define nx_task_nid(t) ((t)->nid)
+
+#define nx_current_nid() nx_task_nid(current)
+
+#define nx_check(c,m)  __nx_check(nx_current_nid(),c,m)
+
+#define nx_weak_check(c,m)     ((m) ? nx_check(c,m) : 1)
+
+#undef nxdprintk
+#define nxdprintk(x...)
+
+
+#define __nx_flags(v,m,f)      (((v) & (m)) ^ (f))
+
+#define        __nx_task_flags(t,m,f) \
+       (((t) && ((t)->nx_info)) ? \
+               __nx_flags((t)->nx_info->nx_flags,(m),(f)) : 0)
+
+#define nx_current_flags() \
+       ((current->nx_info) ? current->nx_info->nx_flags : 0)
+
+#define nx_flags(m,f)  __nx_flags(nx_current_flags(),(m),(f))
+
+
+#define nx_current_ncaps() \
+       ((current->nx_info) ? current->nx_info->nx_ncaps : 0)
+
+#define nx_ncaps(c)    (nx_current_ncaps() & (c))
+
+
+
+#define        sock_nx_init(s)  do {           \
+       (s)->sk_nid = 0;                \
+       (s)->sk_nx_info = NULL;         \
+       } while (0)
+
+
+
+#endif
diff --git a/include/linux/vinline.h b/include/linux/vinline.h
new file mode 100644 (file)
index 0000000..07bb369
--- /dev/null
@@ -0,0 +1,462 @@
+#ifndef _VX_INLINE_H
+#define _VX_INLINE_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/limit.h"
+#include "vserver/cvirt.h"
+
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+
+void free_vx_info(struct vx_info *);
+
+extern int proc_pid_vx_info(struct task_struct *, char *);
+
+
+#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct vx_info *__get_vx_info(struct vx_info *vxi,
+       const char *_file, int _line)
+{
+       if (!vxi)
+               return NULL;
+       vxdprintk("get_vx_info(%p[#%d.%d])\t%s:%d\n",
+               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0,
+               _file, _line);
+       atomic_inc(&vxi->vx_refcount);
+       return vxi;
+}
+
+#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__)
+
+static __inline__ void __put_vx_info(struct vx_info *vxi, const char *_file, int _line)
+{
+       if (!vxi)
+               return;
+       vxdprintk("put_vx_info(%p[#%d.%d])\t%s:%d\n",
+               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0,
+               _file, _line);
+       if (atomic_dec_and_lock(&vxi->vx_refcount, &vxlist_lock)) {
+               list_del(&vxi->vx_list);
+               spin_unlock(&vxlist_lock);
+               free_vx_info(vxi);
+       }
+}
+
+#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__)
+
+static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
+       const char *_file, int _line)
+{
+       BUG_ON(*vxp);
+       if (!vxi)
+               return;
+       vxdprintk("set_vx_info(%p[#%d.%d])\t%s:%d\n",
+               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0,
+               _file, _line);
+       *vxp = __get_vx_info(vxi, _file, _line);
+}
+
+#define        clr_vx_info(p)  __clr_vx_info(p,__FILE__,__LINE__)
+
+static inline void __clr_vx_info(struct vx_info **vxp,
+       const char *_file, int _line)
+{
+       struct vx_info *vxo = *vxp;
+
+       vxdprintk("clr_vx_info(%p[#%d.%d])\t%s:%d\n",
+               vxo, vxo?vxo->vx_id:0, vxo?atomic_read(&vxo->vx_refcount):0,
+               _file, _line);
+       *vxp = NULL;
+       wmb();
+       __put_vx_info(vxo, _file, _line);
+}
+
+
+#define task_get_vx_info(i)    __task_get_vx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct vx_info *__task_get_vx_info(struct task_struct *p,
+       const char *_file, int _line)
+{
+       struct vx_info *vxi;
+       
+       task_lock(p);
+       vxi = __get_vx_info(p->vx_info, _file, _line);
+       task_unlock(p);
+       return vxi;
+}
+
+
+#define vx_verify_info(p,i)    \
+       __vx_verify_info((p)->vx_info,i,__FILE__,__LINE__)
+
+static __inline__ void __vx_verify_info(
+       struct vx_info *vxa, struct vx_info *vxb,
+       const char *_file, int _line)
+{
+       if (vxa == vxb)
+               return;
+       printk(KERN_ERR "vx bad assumption (%p==%p) at %s:%d\n",
+               vxa, vxb, _file, _line);
+}
+
+
+#define vx_task_xid(t) ((t)->xid)
+
+#define vx_current_xid() vx_task_xid(current)
+
+#define vx_check(c,m)  __vx_check(vx_current_xid(),c,m)
+
+#define vx_weak_check(c,m)     ((m) ? vx_check(c,m) : 1)
+
+
+/*
+ * check current context for ADMIN/WATCH and
+ * optionally agains supplied argument
+ */
+static __inline__ int __vx_check(xid_t cid, xid_t id, unsigned int mode)
+{
+       if (mode & VX_ARG_MASK) {
+               if ((mode & VX_IDENT) &&
+                       (id == cid))
+                       return 1;
+       }
+       if (mode & VX_ATR_MASK) {
+               if ((mode & VX_DYNAMIC) &&
+                       (id >= MIN_D_CONTEXT) &&
+                       (id <= MAX_S_CONTEXT))
+                       return 1;
+               if ((mode & VX_STATIC) &&
+                       (id > 1) && (id < MIN_D_CONTEXT))
+                       return 1;
+       }
+       return (((mode & VX_ADMIN) && (cid == 0)) ||
+               ((mode & VX_WATCH) && (cid == 1)));
+}
+
+
+#define __vx_flags(v,m,f)      (((v) & (m)) ^ (f))
+
+#define        __vx_task_flags(t,m,f) \
+       (((t) && ((t)->vx_info)) ? \
+               __vx_flags((t)->vx_info->vx_flags,(m),(f)) : 0)
+
+#define vx_current_flags() \
+       ((current->vx_info) ? current->vx_info->vx_flags : 0)
+
+#define vx_flags(m,f)  __vx_flags(vx_current_flags(),(m),(f))
+
+
+#define vx_current_ccaps() \
+       ((current->vx_info) ? current->vx_info->vx_ccaps : 0)
+
+#define vx_ccaps(c)    (vx_current_ccaps() & (c))
+
+#define vx_current_bcaps() \
+       (((current->vx_info) && !vx_flags(VXF_STATE_SETUP, 0)) ? \
+       current->vx_info->vx_bcaps : cap_bset)
+
+
+#define VX_DEBUG_ACC_RSS   0
+#define VX_DEBUG_ACC_VM    0
+#define VX_DEBUG_ACC_VML   0
+
+#undef vxdprintk
+#if    (VX_DEBUG_ACC_RSS) || (VX_DEBUG_ACC_VM) || (VX_DEBUG_ACC_VML)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+#define vx_acc_page(m, d, v, r) \
+       __vx_acc_page(&(m->v), m->mm_vx_info, r, d, __FILE__, __LINE__)
+
+static inline void __vx_acc_page(unsigned long *v, struct vx_info *vxi,
+                int res, int dir, char *file, int line)
+{
+        if (v) {
+                if (dir > 0)
+                        ++(*v);
+                else
+                        --(*v);
+        }
+        if (vxi) {
+                if (dir > 0)
+                        atomic_inc(&vxi->limit.res[res]);
+                else
+                        atomic_dec(&vxi->limit.res[res]);
+        }
+}
+
+
+#define vx_acc_pages(m, p, v, r) \
+       __vx_acc_pages(&(m->v), m->mm_vx_info, r, p, __FILE__, __LINE__)
+
+static inline void __vx_acc_pages(unsigned long *v, struct vx_info *vxi,
+                int res, int pages, char *file, int line)
+{
+        if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
+               (res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
+               (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
+               vxdprintk("vx_acc_pages  [%5d,%2d]: %5d += %5d in %s:%d\n",
+                       (vxi?vxi->vx_id:-1), res,
+                       (vxi?atomic_read(&vxi->limit.res[res]):0),
+                       pages, file, line);
+        if (pages == 0)
+                return;
+        if (v)
+                *v += pages;
+        if (vxi)
+                atomic_add(pages, &vxi->limit.res[res]);
+}
+
+
+
+#define vx_acc_vmpage(m,d)     vx_acc_page(m, d, total_vm,  RLIMIT_AS)
+#define vx_acc_vmlpage(m,d)    vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspage(m,d)    vx_acc_page(m, d, rss,      RLIMIT_RSS)
+
+#define vx_acc_vmpages(m,p)    vx_acc_pages(m, p, total_vm,  RLIMIT_AS)
+#define vx_acc_vmlpages(m,p)   vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspages(m,p)   vx_acc_pages(m, p, rss,       RLIMIT_RSS)
+
+#define vx_pages_add(s,r,p)    __vx_acc_pages(0, s, r, p, __FILE__, __LINE__)
+#define vx_pages_sub(s,r,p)    __vx_pages_add(s, r, -(p))
+
+#define vx_vmpages_inc(m)      vx_acc_vmpage(m, 1)
+#define vx_vmpages_dec(m)      vx_acc_vmpage(m,-1)
+#define vx_vmpages_add(m,p)    vx_acc_vmpages(m, p)
+#define vx_vmpages_sub(m,p)    vx_acc_vmpages(m,-(p))
+
+#define vx_vmlocked_inc(m)     vx_acc_vmlpage(m, 1)
+#define vx_vmlocked_dec(m)     vx_acc_vmlpage(m,-1)
+#define vx_vmlocked_add(m,p)   vx_acc_vmlpages(m, p)
+#define vx_vmlocked_sub(m,p)   vx_acc_vmlpages(m,-(p))
+
+#define vx_rsspages_inc(m)     vx_acc_rsspage(m, 1)
+#define vx_rsspages_dec(m)     vx_acc_rsspage(m,-1)
+#define vx_rsspages_add(m,p)   vx_acc_rsspages(m, p)
+#define vx_rsspages_sub(m,p)   vx_acc_rsspages(m,-(p))
+
+
+
+#define vx_pages_avail(m, p, r) \
+        __vx_pages_avail((m)->mm_vx_info, (r), (p), __FILE__, __LINE__)
+
+static inline int __vx_pages_avail(struct vx_info *vxi,
+                int res, int pages, char *file, int line)
+{
+        if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
+                (res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
+                (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
+                printk("vx_pages_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
+                        (vxi?vxi->vx_id:-1), res,
+                       (vxi?vxi->limit.rlim[res]:1),
+                        (vxi?atomic_read(&vxi->limit.res[res]):0),
+                       pages, file, line);
+        if (!vxi)
+                return 1;
+        if (vxi->limit.rlim[res] == RLIM_INFINITY)
+                return 1;
+        if (atomic_read(&vxi->limit.res[res]) + pages < vxi->limit.rlim[res])
+                return 1;
+        return 0;
+}
+
+#define vx_vmpages_avail(m,p)  vx_pages_avail(m, p, RLIMIT_AS)
+#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK)
+#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS)
+
+/* file limits */
+
+#define VX_DEBUG_ACC_FILE      0
+#define VX_DEBUG_ACC_OPENFD    0
+
+#undef vxdprintk
+#if    (VX_DEBUG_ACC_FILE) || (VX_DEBUG_ACC_OPENFD)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+#define vx_acc_cres(v,d,r) \
+       __vx_acc_cres((v), (r), (d), __FILE__, __LINE__)
+
+static inline void __vx_acc_cres(struct vx_info *vxi,
+       int res, int dir, char *file, int line)
+{
+        if (vxi) {
+       if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
+                       (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
+       printk("vx_acc_cres[%5d,%2d]: %5d%s in %s:%d\n",
+                        (vxi?vxi->vx_id:-1), res,
+                        (vxi?atomic_read(&vxi->limit.res[res]):0),
+                       (dir>0)?"++":"--", file, line);
+                if (dir > 0)
+                        atomic_inc(&vxi->limit.res[res]);
+                else
+                        atomic_dec(&vxi->limit.res[res]);
+        }
+}
+
+#define vx_files_inc(f)        vx_acc_cres(current->vx_info, 1, RLIMIT_NOFILE)
+#define vx_files_dec(f)        vx_acc_cres(current->vx_info,-1, RLIMIT_NOFILE)
+
+#define vx_openfd_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD)
+#define vx_openfd_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD)
+
+#define vx_cres_avail(v,n,r) \
+        __vx_cres_avail((v), (r), (n), __FILE__, __LINE__)
+
+static inline int __vx_cres_avail(struct vx_info *vxi,
+                int res, int num, char *file, int line)
+{
+       if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
+               (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
+                printk("vx_cres_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
+                        (vxi?vxi->vx_id:-1), res,
+                       (vxi?vxi->limit.rlim[res]:1),
+                        (vxi?atomic_read(&vxi->limit.res[res]):0),
+                       num, file, line);
+        if (!vxi)
+                return 1;
+        if (vxi->limit.rlim[res] == RLIM_INFINITY)
+                return 1;
+        if (vxi->limit.rlim[res] < atomic_read(&vxi->limit.res[res]) + num)
+                return 0;
+        return 1;
+}
+
+#define vx_files_avail(n) \
+       vx_cres_avail(current->vx_info, (n), RLIMIT_NOFILE)
+
+#define vx_openfd_avail(n) \
+       vx_cres_avail(current->vx_info, (n), RLIMIT_OPENFD)
+
+/* socket limits */
+
+#define vx_sock_inc(f) vx_acc_cres(current->vx_info, 1, VLIMIT_SOCK)
+#define vx_sock_dec(f) vx_acc_cres(current->vx_info,-1, VLIMIT_SOCK)
+
+#define vx_sock_avail(n) \
+       vx_cres_avail(current->vx_info, (n), VLIMIT_SOCK)
+
+/* procfs ioctls */
+
+#define        FIOC_GETXFLG    _IOR('x', 5, long)
+#define        FIOC_SETXFLG    _IOW('x', 6, long)
+
+/* utsname virtualization */
+
+static inline struct new_utsname *vx_new_utsname(void)
+{
+       if (current->vx_info)
+               return &current->vx_info->cvirt.utsname;
+       return &system_utsname;
+}
+
+#define vx_new_uts(x)          ((vx_new_utsname())->x)
+
+/* generic flag merging */
+
+#define        vx_mask_flags(v,f,m)    (((v) & ~(m)) | ((f) & (m)))
+
+#define        vx_mask_mask(v,f,m)     (((v) & ~(m)) | ((v) & (f) & (m)))
+
+
+/* socket accounting */
+
+#include <linux/socket.h>
+
+static inline int vx_sock_type(int family)
+{
+       int type = 4;
+
+       if (family > 0 && family < 3)
+               type = family;
+       else if (family == PF_INET6)
+               type = 3;
+       return type;
+}
+
+#define vx_acc_sock(v,f,p,s) \
+       __vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__)
+
+static inline void __vx_acc_sock(struct vx_info *vxi,
+       int family, int pos, int size, char *file, int line)
+{
+        if (vxi) {
+               int type = vx_sock_type(family);
+
+               atomic_inc(&vxi->cacct.sock[type][pos].count);
+               atomic_add(size, &vxi->cacct.sock[type][pos].total);
+        }
+}
+
+#define vx_sock_recv(sk,s) \
+       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s))
+#define vx_sock_send(sk,s) \
+       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s))
+#define vx_sock_fail(sk,s) \
+       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s))
+
+
+#define        sock_vx_init(s)  do {           \
+       (s)->sk_xid = 0;                \
+       (s)->sk_vx_info = NULL;         \
+       } while (0)
+
+
+/* pid faking stuff */
+
+
+#define vx_map_tgid(v,p) \
+       __vx_map_tgid((v), (p), __FILE__, __LINE__)
+
+static inline int __vx_map_tgid(struct vx_info *vxi, int pid,
+       char *file, int line)
+{
+       if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
+               vxdprintk("vx_map_tgid: %p/%llx: %d -> %d in %s:%d\n",
+                       vxi, vxi->vx_flags, pid,
+                       (pid == vxi->vx_initpid)?1:pid,
+                       file, line);
+               if (pid == vxi->vx_initpid)
+                       return 1;
+       }
+       return pid;
+}
+
+#define vx_rmap_tgid(v,p) \
+       __vx_rmap_tgid((v), (p), __FILE__, __LINE__)
+
+static inline int __vx_rmap_tgid(struct vx_info *vxi, int pid,
+       char *file, int line)
+{
+       if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
+               vxdprintk("vx_rmap_tgid: %p/%llx: %d -> %d in %s:%d\n",
+                       vxi, vxi->vx_flags, pid,
+                       (pid == 1)?vxi->vx_initpid:pid,
+                       file, line);
+               if ((pid == 1) && vxi->vx_initpid)
+                       return vxi->vx_initpid;
+       }
+       return pid;
+}
+
+#undef vxdprintk
+#define vxdprintk(x...)
+
+#endif
diff --git a/include/linux/vserver.h b/include/linux/vserver.h
new file mode 100644 (file)
index 0000000..2c39ebb
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _LINUX_VSERVER_H
+#define _LINUX_VSERVER_H
+
+#include <linux/vserver/context.h>
+#include <linux/vserver/network.h>
+#include <linux/vinline.h>
+#include <linux/ninline.h>
+
+#endif
diff --git a/include/linux/vserver/context.h b/include/linux/vserver/context.h
new file mode 100644 (file)
index 0000000..7692603
--- /dev/null
@@ -0,0 +1,176 @@
+#ifndef _VX_CONTEXT_H
+#define _VX_CONTEXT_H
+
+#include <linux/types.h>
+
+#define MAX_S_CONTEXT  65535   /* Arbitrary limit */
+#define MIN_D_CONTEXT  49152   /* dynamic contexts start here */
+
+#define VX_DYNAMIC_ID  ((uint32_t)-1)          /* id for dynamic context */
+
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#define _VX_INFO_DEF_
+#include "cvirt.h"
+#include "limit.h"
+#include "sched.h"
+#undef _VX_INFO_DEF_
+
+struct vx_info {
+       struct list_head vx_list;               /* linked list of contexts */
+       xid_t vx_id;                            /* context id */
+       atomic_t vx_refcount;                   /* refcount */
+       struct vx_info *vx_parent;              /* parent context */
+
+       struct namespace *vx_namespace;         /* private namespace */
+       struct fs_struct *vx_fs;                /* private namespace fs */
+       uint64_t vx_flags;                      /* VX_INFO_xxx */
+       uint64_t vx_bcaps;                      /* bounding caps (system) */
+       uint64_t vx_ccaps;                      /* context caps (vserver) */
+
+       pid_t vx_initpid;                       /* PID of fake init process */
+
+       struct _vx_limit limit;                 /* vserver limits */
+       struct _vx_sched sched;                 /* vserver scheduler */
+       struct _vx_cvirt cvirt;                 /* virtual/bias stuff */
+       struct _vx_cacct cacct;                 /* context accounting */
+
+       char vx_name[65];                       /* vserver name */
+};
+
+
+extern spinlock_t vxlist_lock;
+extern struct list_head vx_infos;
+
+
+#define VX_ADMIN       0x0001
+#define VX_WATCH       0x0002
+#define VX_DUMMY       0x0008
+
+#define VX_IDENT       0x0010
+#define VX_EQUIV       0x0020
+#define VX_PARENT      0x0040
+#define VX_CHILD       0x0080
+
+#define VX_ARG_MASK    0x00F0
+
+#define VX_DYNAMIC     0x0100
+#define VX_STATIC      0x0200
+
+#define VX_ATR_MASK    0x0F00
+
+
+void free_vx_info(struct vx_info *);
+
+extern struct vx_info *find_vx_info(int);
+extern struct vx_info *find_or_create_vx_info(int);
+extern int vx_info_id_valid(int);
+
+extern int vx_migrate_task(struct task_struct *, struct vx_info *);
+
+#endif /* __KERNEL__ */
+
+#include "switch.h"
+
+/* vinfo commands */
+
+#define VCMD_task_xid          VC_CMD(VINFO, 1, 0)
+#define VCMD_task_nid          VC_CMD(VINFO, 2, 0)
+
+#ifdef __KERNEL__
+extern int vc_task_xid(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_vx_info           VC_CMD(VINFO, 5, 0)
+#define VCMD_nx_info           VC_CMD(VINFO, 6, 0)
+
+struct  vcmd_vx_info_v0 {
+       uint32_t xid;
+       uint32_t initpid;
+       /* more to come */      
+};
+
+#ifdef __KERNEL__
+extern int vc_vx_info(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_ctx_create                VC_CMD(VPROC, 1, 0)
+#define VCMD_ctx_migrate       VC_CMD(PROCMIG, 1, 0)
+
+#ifdef __KERNEL__
+extern int vc_ctx_create(uint32_t, void __user *);
+extern int vc_ctx_migrate(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_get_cflags                VC_CMD(FLAGS, 1, 0)
+#define VCMD_set_cflags                VC_CMD(FLAGS, 2, 0)
+
+struct  vcmd_ctx_flags_v0 {
+       uint64_t flagword;
+       uint64_t mask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_cflags(uint32_t, void __user *);
+extern int vc_set_cflags(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VXF_INFO_LOCK          0x00000001
+#define VXF_INFO_SCHED         0x00000002
+#define VXF_INFO_NPROC         0x00000004
+#define VXF_INFO_PRIVATE       0x00000008
+
+#define VXF_INFO_INIT          0x00000010
+#define VXF_INFO_HIDE          0x00000020
+#define VXF_INFO_ULIMIT                0x00000040
+#define VXF_INFO_NSPACE                0x00000080
+
+#define VXF_SCHED_HARD         0x00000100
+#define VXF_SCHED_PRIO         0x00000200
+#define VXF_SCHED_PAUSE                0x00000400
+
+#define VXF_VIRT_MEM           0x00010000
+#define VXF_VIRT_UPTIME                0x00020000
+#define VXF_VIRT_CPU           0x00040000
+
+#define VXF_HIDE_MOUNT         0x01000000
+#define VXF_HIDE_NETIF         0x02000000
+
+#define VXF_STATE_SETUP                (1ULL<<32)
+#define VXF_STATE_INIT         (1ULL<<33)
+
+#define        VXF_FORK_RSS            (1ULL<<48)
+
+#define VXF_ONE_TIME           (0x0003ULL<<32)
+
+#define VCMD_get_ccaps         VC_CMD(FLAGS, 3, 0)
+#define VCMD_set_ccaps         VC_CMD(FLAGS, 4, 0)
+
+struct  vcmd_ctx_caps_v0 {
+       uint64_t bcaps;
+       uint64_t ccaps;
+       uint64_t cmask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_ccaps(uint32_t, void __user *);
+extern int vc_set_ccaps(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VXC_SET_UTSNAME                0x00000001
+#define VXC_SET_RLIMIT         0x00000002
+
+#define VXC_ICMP_PING          0x00000100
+
+#define VXC_SECURE_MOUNT       0x00010000
+
+
+#endif /* _VX_CONTEXT_H */
diff --git a/include/linux/vserver/cvirt.h b/include/linux/vserver/cvirt.h
new file mode 100644 (file)
index 0000000..ba3a253
--- /dev/null
@@ -0,0 +1,133 @@
+#if    defined(__KERNEL__) && defined(_VX_INFO_DEF_)
+
+#include <linux/utsname.h>
+#include <linux/rwsem.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <asm/atomic.h>
+
+/* context sub struct */
+
+struct _vx_cvirt {
+       int max_threads;
+
+       unsigned int bias_cswtch;
+       struct timespec bias_idle;
+       struct timespec bias_tp;
+       uint64_t bias_jiffies;
+
+       struct new_utsname utsname;
+};
+
+struct sock_acc {
+       atomic_t count;
+       atomic_t total;
+};
+
+struct _vx_cacct {
+       atomic_t nr_threads;
+       int nr_running;
+
+       unsigned long total_forks;
+
+       struct sock_acc sock[5][3];
+};
+
+
+static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
+{
+       return atomic_read(&cacct->sock[type][pos].count);
+}
+
+
+static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
+{
+       return atomic_read(&cacct->sock[type][pos].total);
+}
+
+
+extern uint64_t vx_idle_jiffies(void);
+
+static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
+{
+       uint64_t idle_jiffies = vx_idle_jiffies();
+
+       // new->virt.bias_cswtch = kstat.context_swtch;
+       cvirt->bias_jiffies = get_jiffies_64();
+
+       jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
+       do_posix_clock_monotonic_gettime(&cvirt->bias_tp);
+
+       down_read(&uts_sem);
+       cvirt->utsname = system_utsname;
+       up_read(&uts_sem);
+}
+
+static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
+{
+       return;
+}
+
+static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
+{
+       int i,j;
+
+       atomic_set(&cacct->nr_threads, 1);
+       for (i=0; i<5; i++) {
+               for (j=0; j<3; j++) {
+                       atomic_set(&cacct->sock[i][j].count, 0);
+                       atomic_set(&cacct->sock[i][j].total, 0);
+               }
+       }
+}
+
+static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
+{
+       return;
+}
+
+static inline int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
+{
+       int length = 0;
+       return length;
+}
+
+static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
+{
+       int i,j, length = 0;
+       static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" };
+
+       for (i=0; i<5; i++) {
+               length += sprintf(buffer + length,
+                       "%s:", type[i]);
+               for (j=0; j<3; j++) {
+                       length += sprintf(buffer + length,
+                               "\t%12lu/%-12lu"
+                               ,vx_sock_count(cacct, i, j)
+                               ,vx_sock_total(cacct, i, j)
+                               );
+               }       
+               buffer[length++] = '\n';
+       }
+       return length;
+}
+
+#else  /* _VX_INFO_DEF_ */
+#ifndef _VX_CVIRT_H
+#define _VX_CVIRT_H
+
+#include "switch.h"
+
+/*  cvirt vserver commands */
+
+
+#ifdef __KERNEL__
+
+struct timespec;
+
+void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle);
+
+#endif /* __KERNEL__ */
+
+#endif /* _VX_CVIRT_H */
+#endif
diff --git a/include/linux/vserver/inode.h b/include/linux/vserver/inode.h
new file mode 100644 (file)
index 0000000..aa8852f
--- /dev/null
@@ -0,0 +1,67 @@
+#ifndef _VX_INODE_H
+#define _VX_INODE_H
+
+#include "switch.h"
+
+/*  inode vserver commands */
+
+#define VCMD_get_iattr_v0      VC_CMD(INODE, 1, 0)
+#define VCMD_set_iattr_v0      VC_CMD(INODE, 2, 0)
+
+#define VCMD_get_iattr         VC_CMD(INODE, 1, 1)
+#define VCMD_set_iattr         VC_CMD(INODE, 2, 1)
+
+struct  vcmd_ctx_iattr_v0 {
+       /* device handle in id */
+       uint64_t ino;
+       uint32_t xid;
+       uint32_t flags;
+       uint32_t mask;
+};
+
+struct  vcmd_ctx_iattr_v1 {
+       const char __user *name;
+       uint32_t xid;
+       uint32_t flags;
+       uint32_t mask;
+};
+
+
+#define IATTR_XID      0x01000000
+
+#define IATTR_ADMIN    0x00000001
+#define IATTR_WATCH    0x00000002
+#define IATTR_HIDE     0x00000004
+#define IATTR_FLAGS    0x00000007
+
+#define IATTR_BARRIER  0x00010000
+#define IATTR_IUNLINK  0x00020000
+#define IATTR_IMMUTABLE        0x00040000
+
+
+#ifdef CONFIG_PROC_SECURE
+#define IATTR_PROC_DEFAULT     ( IATTR_ADMIN | IATTR_HIDE )
+#define IATTR_PROC_SYMLINK     ( IATTR_ADMIN )
+#else
+#define IATTR_PROC_DEFAULT     ( IATTR_ADMIN )
+#define IATTR_PROC_SYMLINK     ( IATTR_ADMIN )
+#endif
+
+#ifdef __KERNEL__
+
+#define vx_hide_check(c,m)      (((m) & IATTR_HIDE) ? vx_check(c,m) : 1)
+
+extern int vc_get_iattr_v0(uint32_t, void __user *);
+extern int vc_set_iattr_v0(uint32_t, void __user *);
+
+extern int vc_get_iattr(uint32_t, void __user *);
+extern int vc_set_iattr(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+/* inode ioctls */
+
+#define FIOC_GETXFLG   _IOR('x', 5, long)
+#define FIOC_SETXFLG   _IOW('x', 6, long)
+
+#endif /* _VX_INODE_H */
diff --git a/include/linux/vserver/legacy.h b/include/linux/vserver/legacy.h
new file mode 100644 (file)
index 0000000..1372c0f
--- /dev/null
@@ -0,0 +1,54 @@
+#ifndef _VX_LEGACY_H
+#define _VX_LEGACY_H
+
+#include "switch.h"
+#include "network.h"
+
+/*  compatibiliy vserver commands */
+
+#define VCMD_new_s_context     VC_CMD(COMPAT, 1, 1)
+#define VCMD_set_ipv4root      VC_CMD(COMPAT, 2, 3)
+
+#define VCMD_create_context    VC_CMD(VSETUP, 1, 0)
+
+/*  compatibiliy vserver arguments */
+
+struct  vcmd_new_s_context_v1 {
+       uint32_t remove_cap;
+       uint32_t flags;
+};
+
+struct  vcmd_set_ipv4root_v3 {
+       /* number of pairs in id */
+       uint32_t broadcast;
+       struct {
+               uint32_t ip;
+               uint32_t mask;
+       } nx_mask_pair[NB_IPV4ROOT];
+};
+
+
+#define VX_INFO_LOCK           1       /* Can't request a new vx_id */
+#define VX_INFO_NPROC          4       /* Limit number of processes in a context */
+#define VX_INFO_PRIVATE                8       /* Noone can join this security context */
+#define VX_INFO_INIT           16      /* This process wants to become the */
+                                       /* logical process 1 of the security */
+                                       /* context */
+#define VX_INFO_HIDEINFO       32      /* Hide some information in /proc */
+#define VX_INFO_ULIMIT         64      /* Use ulimit of the current process */
+                                       /* to become the global limits */
+                                       /* of the context */
+#define VX_INFO_NAMESPACE      128     /* save private namespace */
+
+       
+#define NB_S_CONTEXT   16
+
+#define NB_IPV4ROOT    16
+
+
+#ifdef __KERNEL__
+extern int vc_new_s_context(uint32_t, void __user *);
+extern int vc_set_ipv4root(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_LEGACY_H */
diff --git a/include/linux/vserver/limit.h b/include/linux/vserver/limit.h
new file mode 100644 (file)
index 0000000..27496c1
--- /dev/null
@@ -0,0 +1,117 @@
+#if    defined(__KERNEL__) && defined(_VX_INFO_DEF_)
+
+#include <asm/atomic.h>
+#include <asm/resource.h>
+
+/* context sub struct */
+
+#define        RLIMIT_OPENFD   12
+
+#define NUM_RLIMITS    16
+
+#define VLIMIT_SOCK    16
+
+
+struct _vx_limit {
+       atomic_t ticks;
+
+       unsigned long rlim[NUM_RLIMITS];        /* Per context limit */
+       atomic_t res[NUM_RLIMITS];              /* Current value */
+};
+
+static inline void vx_info_init_limit(struct _vx_limit *limit)
+{
+       int lim;
+
+       for (lim=0; lim<NUM_RLIMITS; lim++) {
+               limit->rlim[lim] = RLIM_INFINITY;
+               atomic_set(&limit->res[lim], 0);
+       }
+}
+
+extern unsigned int vx_debug_limit;
+
+static inline void vx_info_exit_limit(struct _vx_limit *limit)
+{
+       int lim, value;
+
+       for (lim=0; lim<NUM_RLIMITS; lim++) {
+               value = atomic_read(&limit->res[lim]);
+               if (value && vx_debug_limit)
+                       printk("!!! limit: %p[%d] = %d on exit.\n",
+                               limit, lim, value);
+       }
+}
+
+
+static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
+{
+       return sprintf(buffer,
+               "PROC:\t%8d/%ld\n"
+               "VM:\t%8d/%ld\n"
+               "VML:\t%8d/%ld\n"               
+               "RSS:\t%8d/%ld\n"
+               "FILES:\t%8d/%ld\n"
+               "OFD:\t%8d/%ld\n"
+               ,atomic_read(&limit->res[RLIMIT_NPROC])
+               ,limit->rlim[RLIMIT_NPROC]
+               ,atomic_read(&limit->res[RLIMIT_AS])
+               ,limit->rlim[RLIMIT_AS]
+               ,atomic_read(&limit->res[RLIMIT_MEMLOCK])
+               ,limit->rlim[RLIMIT_MEMLOCK]
+               ,atomic_read(&limit->res[RLIMIT_RSS])
+               ,limit->rlim[RLIMIT_RSS]
+               ,atomic_read(&limit->res[RLIMIT_NOFILE])
+               ,limit->rlim[RLIMIT_NOFILE]
+               ,atomic_read(&limit->res[RLIMIT_OPENFD])
+               ,limit->rlim[RLIMIT_OPENFD]
+               );
+}
+
+#else  /* _VX_INFO_DEF_ */
+#ifndef _VX_LIMIT_H
+#define _VX_LIMIT_H
+
+#include "switch.h"
+
+/*  rlimit vserver commands */
+
+#define VCMD_get_rlimit                VC_CMD(RLIMIT, 1, 0)
+#define VCMD_set_rlimit                VC_CMD(RLIMIT, 2, 0)
+#define VCMD_get_rlimit_mask   VC_CMD(RLIMIT, 3, 0)
+
+struct  vcmd_ctx_rlimit_v0 {
+       uint32_t id;
+       uint64_t minimum;
+       uint64_t softlimit;
+       uint64_t maximum;
+};
+
+struct  vcmd_ctx_rlimit_mask_v0 {
+       uint32_t minimum;
+       uint32_t softlimit;
+       uint32_t maximum;
+};
+
+#define CRLIM_UNSET            (0ULL)
+#define CRLIM_INFINITY         (~0ULL)
+#define CRLIM_KEEP             (~1ULL)
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+
+extern int vc_get_rlimit(uint32_t, void __user *);
+extern int vc_set_rlimit(uint32_t, void __user *);
+extern int vc_get_rlimit_mask(uint32_t, void __user *);
+
+struct sysinfo;
+
+void vx_vsi_meminfo(struct sysinfo *);
+void vx_vsi_swapinfo(struct sysinfo *);
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _VX_LIMIT_H */
+#endif
diff --git a/include/linux/vserver/namespace.h b/include/linux/vserver/namespace.h
new file mode 100644 (file)
index 0000000..140fc79
--- /dev/null
@@ -0,0 +1,55 @@
+#ifndef _VX_NAMESPACE_H
+#define _VX_NAMESPACE_H
+
+#include <linux/types.h>
+
+       
+/* virtual host info names */
+
+#define VCMD_vx_set_vhi_name   VC_CMD(VHOST, 1, 0)
+#define VCMD_vx_get_vhi_name   VC_CMD(VHOST, 2, 0)
+
+struct  vcmd_vx_vhi_name_v0 {
+       uint32_t field;
+       char name[65];
+};
+
+
+enum vx_vhi_name_field {
+       VHIN_CONTEXT=0,
+       VHIN_SYSNAME,
+       VHIN_NODENAME,
+       VHIN_RELEASE,
+       VHIN_VERSION,
+       VHIN_MACHINE,
+       VHIN_DOMAINNAME,
+};
+
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+
+extern int vc_set_vhi_name(uint32_t, void __user *);
+extern int vc_get_vhi_name(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_enter_namespace   VC_CMD(PROCALT, 1, 0)
+#define VCMD_cleanup_namespace VC_CMD(PROCALT, 2, 0)
+#define VCMD_set_namespace     VC_CMD(PROCALT, 3, 0)
+
+#ifdef __KERNEL__
+
+struct vx_info;
+struct namespace;
+struct fs_struct;
+
+extern int vx_set_namespace(struct vx_info *, struct namespace *, struct fs_struct *);
+
+extern int vc_enter_namespace(uint32_t, void __user *);
+extern int vc_cleanup_namespace(uint32_t, void __user *);
+extern int vc_set_namespace(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_NAMESPACE_H */
diff --git a/include/linux/vserver/network.h b/include/linux/vserver/network.h
new file mode 100644 (file)
index 0000000..b3c39b0
--- /dev/null
@@ -0,0 +1,142 @@
+#ifndef _VX_NETWORK_H
+#define _VX_NETWORK_H
+
+#define MAX_N_CONTEXT  65535   /* Arbitrary limit */
+
+#define IP_DYNAMIC_ID  ((uint32_t)-1)          /* id for dynamic context */
+
+#define NB_IPV4ROOT    16
+
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/utsname.h>
+#include <asm/resource.h>
+#include <asm/atomic.h>
+
+
+struct nx_info {
+       struct list_head nx_list;       /* linked list of nxinfos */
+       nid_t nx_id;                    /* vnet id */
+       atomic_t nx_refcount;
+
+       uint64_t nx_flags;              /* network flag word */
+       uint64_t nx_ncaps;              /* network capabilities */
+
+       int nbipv4;
+       __u32 ipv4[NB_IPV4ROOT];        /* Process can only bind to these IPs */
+                                       /* The first one is used to connect */
+                                       /* and for bind any service */
+                                       /* The other must be used explicity */
+       __u32 mask[NB_IPV4ROOT];        /* Netmask for each ipv4 */
+                                       /* Used to select the proper source */
+                                       /* address for sockets */
+       __u32 v4_bcast;                 /* Broadcast address to receive UDP  */
+
+       char nx_name[65];               /* network context name */
+};
+
+
+extern spinlock_t nxlist_lock;
+extern struct list_head nx_infos;
+
+
+void free_nx_info(struct nx_info *);
+struct nx_info *create_nx_info(void);
+
+extern struct nx_info *find_nx_info(int);
+extern int nx_info_id_valid(int);
+
+struct in_ifaddr;
+struct net_device;
+
+int ifa_in_nx_info(struct in_ifaddr *, struct nx_info *);
+int dev_in_nx_info(struct net_device *, struct nx_info *);
+
+
+#endif /* __KERNEL__ */
+
+#include "switch.h"
+
+/* vinfo commands */
+
+#define VCMD_task_nid          VC_CMD(VINFO, 2, 0)
+
+#ifdef __KERNEL__
+extern int vc_task_nid(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_nx_info           VC_CMD(VINFO, 6, 0)
+
+struct  vcmd_nx_info_v0 {
+       uint32_t nid;
+       /* more to come */      
+};
+
+#ifdef __KERNEL__
+extern int vc_nx_info(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_net_create                VC_CMD(VNET, 1, 0)
+#define VCMD_net_migrate       VC_CMD(NETMIG, 1, 0)
+
+#define VCMD_net_add           VC_CMD(NETALT, 1, 0)
+#define VCMD_net_remove                VC_CMD(NETALT, 2, 0)
+
+struct  vcmd_net_nx_v0 {
+       uint16_t type;
+       uint16_t count;
+       uint32_t ip[4];
+       uint32_t mask[4];
+       /* more to come */      
+};
+
+//     IPN_TYPE_IPV4   
+
+
+#ifdef __KERNEL__
+extern int vc_net_create(uint32_t, void __user *);
+extern int vc_net_migrate(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_get_nflags                VC_CMD(FLAGS, 5, 0)
+#define VCMD_set_nflags                VC_CMD(FLAGS, 6, 0)
+
+struct  vcmd_net_flags_v0 {
+       uint64_t flagword;
+       uint64_t mask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_nflags(uint32_t, void __user *);
+extern int vc_set_nflags(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define IPF_STATE_SETUP                (1ULL<<32)
+
+
+#define IPF_ONE_TIME           (0x0001ULL<<32)
+
+#define VCMD_get_ncaps         VC_CMD(FLAGS, 7, 0)
+#define VCMD_set_ncaps         VC_CMD(FLAGS, 8, 0)
+
+struct  vcmd_net_caps_v0 {
+       uint64_t ncaps;
+       uint64_t cmask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_ncaps(uint32_t, void __user *);
+extern int vc_set_ncaps(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define IPC_WOSSNAME           0x00000001
+
+
+#endif /* _VX_NETWORK_H */
diff --git a/include/linux/vserver/sched.h b/include/linux/vserver/sched.h
new file mode 100644 (file)
index 0000000..d1a2068
--- /dev/null
@@ -0,0 +1,139 @@
+#if    defined(__KERNEL__) && defined(_VX_INFO_DEF_)
+
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <asm/atomic.h>
+#include <asm/param.h>
+#include <asm/cpumask.h>
+
+/* context sub struct */
+
+struct _vx_sched {
+       spinlock_t tokens_lock; /* lock for this structure */
+
+       int fill_rate;          /* Fill rate: add X tokens... */
+       int interval;           /* Divisor:   per Y jiffies   */
+       atomic_t tokens;        /* number of CPU tokens in this context */
+       int tokens_min;         /* Limit:     minimum for unhold */
+       int tokens_max;         /* Limit:     no more than N tokens */
+       uint32_t jiffies;       /* add an integral multiple of Y to this */
+
+       uint64_t ticks;         /* token tick events */
+       cpumask_t cpus_allowed; /* cpu mask for context */
+};
+
+static inline void vx_info_init_sched(struct _vx_sched *sched)
+{
+        /* scheduling; hard code starting values as constants */
+        sched->fill_rate       = 1;
+        sched->interval                = 4;
+        sched->tokens_min      = HZ >> 4;
+        sched->tokens_max      = HZ >> 1;
+        sched->jiffies         = jiffies;
+        sched->tokens_lock     = SPIN_LOCK_UNLOCKED;
+
+        atomic_set(&sched->tokens, HZ >> 2);
+       sched->cpus_allowed     = CPU_MASK_ALL;
+}
+
+static inline void vx_info_exit_sched(struct _vx_sched *sched)
+{
+       return;
+}
+
+static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
+{
+       return sprintf(buffer,
+               "Ticks:\t%16lld\n"
+               "Token:\t\t%8d\n"
+               "FillRate:\t%8d\n"
+               "Interval:\t%8d\n"              
+               "TokensMin:\t%8d\n"
+               "TokensMax:\t%8d\n"
+               ,sched->ticks
+               ,atomic_read(&sched->tokens)
+               ,sched->fill_rate
+               ,sched->interval
+               ,sched->tokens_min
+               ,sched->tokens_max
+               );
+}
+
+
+#else  /* _VX_INFO_DEF_ */
+#ifndef _VX_SCHED_H
+#define _VX_SCHED_H
+
+#include "switch.h"
+
+/*  sched vserver commands */
+
+#define VCMD_set_sched         VC_CMD(SCHED, 1, 2)
+
+struct  vcmd_set_sched_v2 {
+       int32_t fill_rate;
+       int32_t interval;
+       int32_t tokens;
+       int32_t tokens_min;
+       int32_t tokens_max;
+       uint64_t cpu_mask;
+};
+
+#define SCHED_KEEP             (-2)
+
+#ifdef __KERNEL__
+
+extern int vc_set_sched_v1(uint32_t, void __user *);
+extern int vc_set_sched(uint32_t, void __user *);
+
+
+#define VAVAVOOM_RATIO         50
+
+#include "context.h"
+
+
+/* scheduling stuff */
+
+int effective_vavavoom(struct task_struct *, int);
+
+int vx_tokens_recalc(struct vx_info *);
+
+/* new stuff ;) */
+
+static inline int vx_tokens_avail(struct vx_info *vxi)
+{
+       return atomic_read(&vxi->sched.tokens);
+}
+
+static inline void vx_consume_token(struct vx_info *vxi)
+{
+       atomic_dec(&vxi->sched.tokens);
+}
+
+static inline int vx_need_resched(struct task_struct *p)
+{
+#ifdef CONFIG_VSERVER_HARDCPU
+       struct vx_info *vxi = p->vx_info;
+
+       if (vxi) {
+               int tokens;
+
+               p->time_slice--;
+               if (atomic_read(&vxi->vx_refcount) < 1)
+                       printk("need_resched: p=%p, s=%ld, ref=%d, id=%d/%d\n",
+                               p, p->state, atomic_read(&vxi->vx_refcount),
+                               vxi->vx_id, p->xid);
+               if ((tokens = vx_tokens_avail(vxi)) > 0)
+                       vx_consume_token(vxi);
+               return ((p->time_slice == 0) || (tokens < 1));
+       }
+#endif
+       p->time_slice--;
+       return (p->time_slice == 0);
+}
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _VX_SCHED_H */
+#endif
diff --git a/include/linux/vserver/signal.h b/include/linux/vserver/signal.h
new file mode 100644 (file)
index 0000000..3911127
--- /dev/null
@@ -0,0 +1,19 @@
+#ifndef _VX_SIGNAL_H
+#define _VX_SIGNAL_H
+
+#include "switch.h"
+
+/*  context signalling */
+
+#define VCMD_ctx_kill          VC_CMD(PROCTRL, 1, 0)
+
+struct  vcmd_ctx_kill_v0 {
+       int32_t pid;
+       int32_t sig;
+};
+
+#ifdef __KERNEL__
+extern int vc_ctx_kill(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_SIGNAL_H */
diff --git a/include/linux/vserver/switch.h b/include/linux/vserver/switch.h
new file mode 100644 (file)
index 0000000..5fef690
--- /dev/null
@@ -0,0 +1,95 @@
+#ifndef _VX_SWITCH_H
+#define _VX_SWITCH_H
+
+#include <linux/types.h>
+
+#define VC_CATEGORY(c)         (((c) >> 24) & 0x3F)
+#define VC_COMMAND(c)          (((c) >> 16) & 0xFF)
+#define VC_VERSION(c)          ((c) & 0xFFF)
+
+#define VC_CMD(c,i,v)          ((((VC_CAT_ ## c) & 0x3F) << 24) \
+                               | (((i) & 0xFF) << 16) | ((v) & 0xFFF))
+
+/*
+
+  Syscall Matrix V2.6
+
+         |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
+         |STATS  |DESTROY|ALTER  |CHANGE |LIMIT  |TEST   | |       |       |
+         |INFO   |SETUP  |       |MOVE   |       |       | |       |       |
+  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+  SYSTEM |VERSION|VSETUP |VHOST  |       |       |       | |DEVICES|       |
+  HOST   |     00|     01|     02|     03|     04|     05| |     06|     07|
+  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+  CPU    |       |VPROC  |PROCALT|PROCMIG|PROCTRL|       | |SCHED. |       |
+  PROCESS|     08|     09|     10|     11|     12|     13| |     14|     15|
+  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+  MEMORY |       |       |       |       |       |       | |SWAP   |       |
+         |     16|     17|     18|     19|     20|     21| |     22|     23|
+  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+  NETWORK|       |VNET   |NETALT |NETMIG |NETCTL |       | |SERIAL |       |
+         |     24|     25|     26|     27|     28|     29| |     30|     31|
+  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+  DISK   |       |       |       |       |       |       | |INODE  |       |
+  VFS    |     32|     33|     34|     35|     36|     37| |     38|     39|
+  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+  OTHER  |       |       |       |       |       |       | |VINFO  |       |
+         |     40|     41|     42|     43|     44|     45| |     46|     47|
+  =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
+  SPECIAL|       |       |       |       |FLAGS  |       | |       |       |
+         |     48|     49|     50|     51|     52|     53| |     54|     55|
+  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+  SPECIAL|       |       |       |       |RLIMIT |SYSCALL| |       |COMPAT |
+         |     56|     57|     58|     59|     60|TEST 61| |     62|     63|
+  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
+
+*/
+
+#define VC_CAT_VERSION         0
+
+#define VC_CAT_VSETUP          1
+#define VC_CAT_VHOST           2
+       
+#define VC_CAT_VPROC           9
+#define VC_CAT_PROCALT         10
+#define VC_CAT_PROCMIG         11
+#define VC_CAT_PROCTRL         12
+
+#define VC_CAT_SCHED           14
+
+#define VC_CAT_VNET            25
+#define VC_CAT_NETALT          26
+#define VC_CAT_NETMIG          27
+#define VC_CAT_NETCTRL         28
+
+#define VC_CAT_INODE           38
+
+#define VC_CAT_VINFO           46
+
+#define VC_CAT_FLAGS           52
+#define VC_CAT_RLIMIT          60
+
+#define VC_CAT_SYSTEST         61
+#define VC_CAT_COMPAT          63
+       
+/*  interface version */
+
+#define VCI_VERSION            0x00010016
+
+
+/*  query version */
+
+#define VCMD_get_version       VC_CMD(VERSION, 0, 0)
+
+
+#ifdef __KERNEL__
+
+#include <linux/errno.h>
+
+#define ENOTSUP                -EOPNOTSUPP
+
+#else  /* __KERNEL__ */
+#define __user
+#endif /* __KERNEL__ */
+
+#endif /* _VX_SWITCH_H */
diff --git a/include/linux/vserver/xid.h b/include/linux/vserver/xid.h
new file mode 100644 (file)
index 0000000..ba52c25
--- /dev/null
@@ -0,0 +1,94 @@
+#ifndef _LINUX_XID_H_
+#define _LINUX_XID_H_
+
+#ifdef CONFIG_INOXID_NONE
+
+#define MAX_UID                0xFFFFFFFF
+#define MAX_GID                0xFFFFFFFF
+
+#define INOXID_XID(uid, gid, xid)      (0)
+
+#define XIDINO_UID(uid, xid)           (uid)
+#define XIDINO_GID(gid, xid)           (gid)
+
+#endif
+
+
+#ifdef CONFIG_INOXID_GID16
+
+#define MAX_UID                0xFFFFFFFF
+#define MAX_GID                0x0000FFFF
+
+#define INOXID_XID(uid, gid, xid)      (((gid) >> 16) & 0xFFFF)
+
+#define XIDINO_UID(uid, xid)           (uid)
+#define XIDINO_GID(gid, xid)           (((gid) & 0xFFFF) | ((xid) << 16))
+
+
+#endif
+
+
+#ifdef CONFIG_INOXID_GID24
+
+#define MAX_UID                0x00FFFFFF
+#define MAX_GID                0x00FFFFFF
+
+#define INOXID_XID(uid, gid, xid)      ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF))
+
+#define XIDINO_UID(uid, xid)           (((uid) & 0xFFFFFF) | (((xid) & 0xFF00) << 16))
+#define XIDINO_GID(gid, xid)           (((gid) & 0xFFFFFF) | (((xid) & 0x00FF) << 24))
+
+#endif
+
+
+#ifdef CONFIG_INOXID_GID32
+
+#define MAX_UID                0xFFFFFFFF
+#define MAX_GID                0xFFFFFFFF
+
+#define INOXID_XID(uid, gid, xid)      (xid)
+
+#define XIDINO_UID(uid, xid)           (uid)
+#define XIDINO_GID(gid, xid)           (gid)
+
+#endif
+
+
+#ifdef CONFIG_INOXID_RUNTIME
+
+#define MAX_UID                0xFFFFFFFF
+#define MAX_GID                0xFFFFFFFF
+
+#define INOXID_XID(uid, gid, xid)      (0)
+
+#define XIDINO_UID(uid, xid)           (uid)
+#define XIDINO_GID(gid, xid)           (gid)
+
+#endif
+
+
+#define INOXID_UID(uid, gid)           ((uid) & MAX_UID)
+#define INOXID_GID(uid, gid)           ((gid) & MAX_GID)
+
+static inline uid_t vx_map_uid(uid_t uid)
+{
+       if ((uid > MAX_UID) && (uid != -1))
+               uid = -2;
+       return (uid & MAX_UID);
+}
+
+static inline gid_t vx_map_gid(gid_t gid)
+{
+       if ((gid > MAX_GID) && (gid != -1))
+               gid = -2;
+       return (gid & MAX_GID);
+}
+
+
+#ifdef CONFIG_VSERVER_LEGACY           
+#define FIOC_GETXID    _IOR('x', 1, long)
+#define FIOC_SETXID    _IOW('x', 2, long)
+#define FIOC_SETXIDJ   _IOW('x', 3, long)
+#endif
+
+#endif /* _LINUX_XID_H_ */
diff --git a/kernel/vserver/Kconfig b/kernel/vserver/Kconfig
new file mode 100644 (file)
index 0000000..635d8d4
--- /dev/null
@@ -0,0 +1,72 @@
+#
+# Linux VServer configuration
+#
+
+menu "Linux VServer"
+
+config VSERVER_LEGACY
+       bool    "Enable Legacy Kernel API"
+       default y
+       help
+         This enables the legacy API used in vs1.xx, which allows
+         to use older tools (for migration purposes).
+
+config PROC_SECURE
+       bool    "Enable Proc Security"
+       depends on PROC_FS
+       default y
+       help
+         Hide proc entries by default for xid>1
+
+config VSERVER_HARDCPU
+       bool    "Enable Hard CPU Limits"
+       depends on EXPERIMENTAL
+       default n
+       help
+         Activate the Hard CPU Limits
+
+choice
+       prompt  "Persistent Inode Context Tagging"
+       default INOXID_GID24
+       help
+         This adds persistent context information to filesystems
+         mounted with the tagxid option. Tagging is a requirement
+         for per context disk limits and per context quota.
+
+
+config INOXID_NONE
+       bool    "Disabled"
+       help
+         no context information is store for inodes
+
+config INOXID_GID16
+       bool    "UID32/GID16"
+       help
+         reduces GID to 16 bit, but leaves UID at 32 bit.
+
+config INOXID_GID24
+       bool    "UID24/GID24"
+       help
+         uses the upper 8bit from UID and GID for XID tagging
+         which leaves 24bit for UID/GID each, which should be
+         more than sufficient for normal use.
+
+config INOXID_GID32
+       bool    "UID32/GID32"
+       help
+         this uses otherwise reserved inode fields in the on
+         disk representation, which limits the use to a few
+         filesystems (currently ext2 and ext3)
+
+config INOXID_MAGIC
+       bool    "Runtime"
+       depends on EXPERIMENTAL
+       help
+         inodes are tagged when first accessed, this doesn't
+         require any persistant information, but might give
+         funny results for mixed access.
+
+endchoice
+
+endmenu
+
diff --git a/kernel/vserver/Makefile b/kernel/vserver/Makefile
new file mode 100644 (file)
index 0000000..c035a77
--- /dev/null
@@ -0,0 +1,12 @@
+#
+# Makefile for the Linux vserver routines.
+#
+
+
+obj-y          += vserver.o
+
+vserver-y      := switch.o context.o namespace.o sched.o network.o inode.o \
+                  limit.o cvirt.o signal.o proc.o sysctl.o init.o
+
+vserver-$(CONFIG_VSERVER_LEGACY) += legacy.o
+
diff --git a/kernel/vserver/context.c b/kernel/vserver/context.c
new file mode 100644 (file)
index 0000000..538834c
--- /dev/null
@@ -0,0 +1,558 @@
+/*
+ *  linux/kernel/vserver/context.c
+ *
+ *  Virtual Server: Context Support
+ *
+ *  Copyright (C) 2003-2004  Herbert Pötzl
+ *
+ *  V0.01  context helper
+ *  V0.02  vx_ctx_kill syscall command
+ *  V0.03  replaced context_info calls
+ *  V0.04  redesign of struct (de)alloc
+ *  V0.05  rlimit basic implementation
+ *  V0.06  task_xid and info commands
+ *  V0.07  context flags and caps
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/legacy.h>
+#include <linux/vinline.h>
+#include <linux/kernel_stat.h>
+#include <linux/namespace.h>
+
+#include <asm/errno.h>
+
+
+/*  system functions */
+
+
+LIST_HEAD(vx_infos);
+
+spinlock_t vxlist_lock
+       __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+
+
+/*
+ *     struct vx_info allocation and deallocation
+ */
+
+static struct vx_info *alloc_vx_info(int id)
+{
+       struct vx_info *new = NULL;
+       
+       vxdprintk("alloc_vx_info(%d)\n", id);
+       /* would this benefit from a slab cache? */
+       new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
+       if (!new)
+               return 0;
+
+       memset (new, 0, sizeof(struct vx_info));
+       new->vx_id = id;
+       INIT_LIST_HEAD(&new->vx_list);
+       /* rest of init goes here */
+       
+       vx_info_init_limit(&new->limit);
+       vx_info_init_sched(&new->sched);
+       vx_info_init_cvirt(&new->cvirt);
+       vx_info_init_cacct(&new->cacct);
+
+       new->vx_flags = VXF_STATE_SETUP|VXF_STATE_INIT;
+       new->vx_bcaps = CAP_INIT_EFF_SET;
+       new->vx_ccaps = 0;
+
+       vxdprintk("alloc_vx_info(%d) = %p\n", id, new);
+       return new;
+}
+
+void free_vx_info(struct vx_info *vxi)
+{
+       vxdprintk("free_vx_info(%p)\n", vxi);
+       if (vxi->vx_namespace)
+               put_namespace(vxi->vx_namespace);
+       if (vxi->vx_fs)
+               put_fs_struct(vxi->vx_fs);
+       
+       vx_info_exit_limit(&vxi->limit);
+       vx_info_exit_sched(&vxi->sched);
+       vx_info_exit_cvirt(&vxi->cvirt);
+       vx_info_exit_cacct(&vxi->cacct);
+       
+       BUG_ON(atomic_read(&vxi->vx_refcount));
+       vxi->vx_id = -1;
+
+       kfree(vxi);
+}
+
+
+/*
+ *     struct vx_info search by id
+ *     assumes vxlist_lock is held
+ */
+
+static __inline__ struct vx_info *__find_vx_info(int id)
+{
+       struct vx_info *vxi;
+
+       list_for_each_entry(vxi, &vx_infos, vx_list)
+               if (vxi->vx_id == id)
+                       return vxi;
+       return 0;
+}
+
+
+/*
+ *     struct vx_info ref stuff
+ */
+
+struct vx_info *find_vx_info(int id)
+{
+       struct vx_info *vxi;
+       
+       if (id < 0) {
+               vxi = current->vx_info;
+               get_vx_info(vxi);
+       } else {
+               spin_lock(&vxlist_lock);
+               if ((vxi = __find_vx_info(id)))
+                       get_vx_info(vxi);
+               spin_unlock(&vxlist_lock);
+       }
+       return vxi;
+}
+
+/*
+ *     verify that id is a valid xid
+ */
+
+int vx_info_id_valid(int id)
+{
+       int valid;
+
+       spin_lock(&vxlist_lock);
+       valid = (__find_vx_info(id) != NULL);
+       spin_unlock(&vxlist_lock);
+       return valid;
+}
+
+
+/*
+ *     dynamic context id ...
+ */
+
+static __inline__ xid_t __vx_dynamic_id(void)
+{
+       static xid_t seq = MAX_S_CONTEXT;
+       xid_t barrier = seq;
+       
+       do {
+               if (++seq > MAX_S_CONTEXT)
+                       seq = MIN_D_CONTEXT;
+               if (!__find_vx_info(seq))
+                       return seq;
+       } while (barrier != seq);
+       return 0;
+}
+
+static struct vx_info * __foc_vx_info(int id, int *err)
+{
+       struct vx_info *new, *vxi = NULL;
+       
+       vxdprintk("foc_vx_info(%d)\n", id);
+       if (!(new = alloc_vx_info(id))) {
+               *err = -ENOMEM;
+               return NULL;
+       }
+
+       /* dirty hack until Spectator becomes a cap */
+       if (id == 0 || id == 1) {
+               *err = -EBUSY;
+               return NULL;
+       }
+
+       spin_lock(&vxlist_lock);
+
+       /* dynamic context requested */
+       if (id == VX_DYNAMIC_ID) {
+               id = __vx_dynamic_id();
+               if (!id) {
+                       printk(KERN_ERR "no dynamic context available.\n");
+                       goto out_unlock;
+               }
+               new->vx_id = id;
+       }
+       /* existing context requested */
+       else if ((vxi = __find_vx_info(id))) {
+               /* context in setup is not available */
+               if (vxi->vx_flags & VXF_STATE_SETUP) {
+                       vxdprintk("foc_vx_info(%d) = %p (not available)\n", id, vxi);
+                       vxi = NULL;
+                       *err = -EBUSY;
+               } else {
+                       vxdprintk("foc_vx_info(%d) = %p (found)\n", id, vxi);
+                       get_vx_info(vxi);
+                       *err = 0;
+               }
+               goto out_unlock;
+       }
+
+       /* new context requested */
+       vxdprintk("foc_vx_info(%d) = %p (new)\n", id, new);
+       atomic_set(&new->vx_refcount, 1);
+       list_add(&new->vx_list, &vx_infos);
+       vxi = new, new = NULL;
+       *err = 1;
+
+out_unlock:
+       spin_unlock(&vxlist_lock);
+       if (new)
+               free_vx_info(new);
+       return vxi;
+}
+
+
+struct vx_info *find_or_create_vx_info(int id)
+{
+       int err;
+
+       return __foc_vx_info(id, &err);
+}
+
+
+int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
+{
+       struct user_struct *new_user, *old_user;
+       
+       if (!p || !vxi)
+               BUG();
+       new_user = alloc_uid(vxi->vx_id, p->uid);
+       if (!new_user)
+               return -ENOMEM;
+
+       old_user = p->user;
+       if (new_user != old_user) {
+               atomic_inc(&new_user->processes);
+               atomic_dec(&old_user->processes);
+               p->user = new_user;
+       }
+       free_uid(old_user);
+       return 0;
+}
+
+void vx_mask_bcaps(struct task_struct *p)
+{
+       struct vx_info *vxi = p->vx_info;
+
+       p->cap_effective &= vxi->vx_bcaps;
+       p->cap_inheritable &= vxi->vx_bcaps;
+       p->cap_permitted &= vxi->vx_bcaps;
+}
+
+
+#include <linux/file.h>
+
+static inline int vx_nofiles_task(struct task_struct *tsk)
+{
+       struct files_struct *files = tsk->files;
+       const unsigned long *obptr, *cbptr;
+       int count, total;
+
+       spin_lock(&files->file_lock);
+       obptr = files->open_fds->fds_bits;
+       cbptr = files->close_on_exec->fds_bits;
+       count = files->max_fds / (sizeof(unsigned long) * 8);
+       for (total = 0; count > 0; count--) {
+               if (*obptr)
+                       total += hweight_long(*obptr);
+               obptr++;
+       /*      if (*cbptr)
+                       total += hweight_long(*cbptr);
+               cbptr++; */
+       }
+       spin_unlock(&files->file_lock);
+       return total;
+}
+
+static inline int vx_openfd_task(struct task_struct *tsk)
+{
+       struct files_struct *files = tsk->files;
+       const unsigned long *bptr;
+       int count, total;
+
+       spin_lock(&files->file_lock);
+       bptr = files->open_fds->fds_bits;
+       count = files->max_fds / (sizeof(unsigned long) * 8);
+       for (total = 0; count > 0; count--) {
+               if (*bptr)
+                       total += hweight_long(*bptr);
+               bptr++;
+       }
+       spin_unlock(&files->file_lock);
+       return total;
+}
+
+/*
+ *     migrate task to new context
+ *     gets vxi, puts old_vxi on change
+ */
+
+int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
+{
+       struct vx_info *old_vxi = task_get_vx_info(p);
+       int ret = 0;
+       
+       if (!p || !vxi)
+               BUG();
+
+       vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi,
+               vxi->vx_id, atomic_read(&vxi->vx_refcount));
+       if (old_vxi == vxi)
+               goto out;
+
+       if (!(ret = vx_migrate_user(p, vxi))) {
+               task_lock(p);
+               if (old_vxi) {
+                       atomic_dec(&old_vxi->cacct.nr_threads);
+                       atomic_dec(&old_vxi->limit.res[RLIMIT_NPROC]);
+               }               
+               atomic_inc(&vxi->cacct.nr_threads);
+               atomic_inc(&vxi->limit.res[RLIMIT_NPROC]);
+               atomic_add(vx_nofiles_task(p), &vxi->limit.res[RLIMIT_NOFILE]);
+               atomic_add(vx_openfd_task(p), &vxi->limit.res[RLIMIT_OPENFD]);
+               set_vx_info(&p->vx_info, vxi);
+               p->xid = vxi->vx_id;
+               vx_mask_bcaps(p);
+               task_unlock(p);
+
+               put_vx_info(old_vxi);
+       }
+out:
+       put_vx_info(old_vxi);
+       return ret;
+}
+
+int vx_set_init(struct vx_info *vxi, struct task_struct *p)
+{
+       if (!vxi)
+               return -EINVAL;
+        if (vxi->vx_initpid)
+                return -EPERM;
+
+        vxi->vx_initpid = p->tgid;
+       return 0;
+}
+
+
+/* vserver syscall commands below here */
+
+/* taks xid and vx_info functions */
+
+#include <asm/uaccess.h>
+
+
+int vc_task_xid(uint32_t id, void __user *data)
+{
+        xid_t xid;
+
+        if (id) {
+                struct task_struct *tsk;
+
+                if (!vx_check(0, VX_ADMIN|VX_WATCH))
+                        return -EPERM;
+
+                read_lock(&tasklist_lock);
+                tsk = find_task_by_pid(id);
+                xid = (tsk) ? tsk->xid : -ESRCH;
+                read_unlock(&tasklist_lock);
+        }
+        else
+                xid = current->xid;
+        return xid;
+}
+
+
+int vc_vx_info(uint32_t id, void __user *data)
+{
+       struct vx_info *vxi;
+       struct vcmd_vx_info_v0 vc_data;
+
+       if (!vx_check(0, VX_ADMIN))
+               return -ENOSYS;
+       if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
+               return -EPERM;
+
+       vxi = find_vx_info(id);
+       if (!vxi)
+               return -ESRCH;
+
+       vc_data.xid = vxi->vx_id;
+       vc_data.initpid = vxi->vx_initpid;
+       put_vx_info(vxi);
+
+       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+               return -EFAULT;
+       return 0;
+}
+
+
+/* context functions */
+
+int vc_ctx_create(uint32_t xid, void __user *data)
+{
+        // int ret = -ENOMEM;
+       struct vx_info *new_vxi;
+       int ret;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if ((xid >= MIN_D_CONTEXT) && (xid != VX_DYNAMIC_ID))
+               return -EINVAL;
+
+       if (xid < 1)
+               return -EINVAL;
+
+       new_vxi = __foc_vx_info(xid, &ret);
+       if (!new_vxi)
+               return ret;
+       if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) {
+               ret = -EEXIST;
+               goto out_put;
+       }
+
+       ret = new_vxi->vx_id;
+       vx_migrate_task(current, new_vxi);
+out_put:
+       put_vx_info(new_vxi);
+       return ret;
+}
+
+
+int vc_ctx_migrate(uint32_t id, void __user *data)
+{
+       struct vx_info *vxi;
+       
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       /* dirty hack until Spectator becomes a cap */
+       if (id == 1) {
+               current->xid = 1;
+               return 0;
+       }
+
+       vxi = find_vx_info(id);
+       if (!vxi)
+               return -ESRCH;
+       vx_migrate_task(current, vxi);
+       put_vx_info(vxi);
+       return 0;
+}
+
+
+int vc_get_cflags(uint32_t id, void __user *data)
+{
+       struct vx_info *vxi;
+       struct vcmd_ctx_flags_v0 vc_data;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       vxi = find_vx_info(id);
+       if (!vxi)
+               return -ESRCH;
+
+       vc_data.flagword = vxi->vx_flags;
+
+       // vc_data.mask = ~0UL;
+       /* special STATE flag handling */
+       vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME);
+
+       put_vx_info(vxi);
+
+       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+               return -EFAULT;
+       return 0;
+}
+
+int vc_set_cflags(uint32_t id, void __user *data)
+{
+       struct vx_info *vxi;
+       struct vcmd_ctx_flags_v0 vc_data;
+       uint64_t mask, trigger;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       vxi = find_vx_info(id);
+       if (!vxi)
+               return -ESRCH;
+
+       /* special STATE flag handling */
+       mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
+       trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
+
+       if (trigger & VXF_STATE_SETUP)
+               vx_mask_bcaps(current);
+       if (trigger & VXF_STATE_INIT)
+               if (vxi == current->vx_info)
+                       vx_set_init(vxi, current);
+
+       vxi->vx_flags = vx_mask_flags(vxi->vx_flags,
+               vc_data.flagword, mask);
+       put_vx_info(vxi);
+       return 0;
+}
+
+int vc_get_ccaps(uint32_t id, void __user *data)
+{
+       struct vx_info *vxi;
+       struct vcmd_ctx_caps_v0 vc_data;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       vxi = find_vx_info(id);
+       if (!vxi)
+               return -ESRCH;
+
+       vc_data.bcaps = vxi->vx_bcaps;
+       vc_data.ccaps = vxi->vx_ccaps;
+       vc_data.cmask = ~0UL;
+       put_vx_info(vxi);
+
+       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+               return -EFAULT;
+       return 0;
+}
+
+int vc_set_ccaps(uint32_t id, void __user *data)
+{
+       struct vx_info *vxi;
+       struct vcmd_ctx_caps_v0 vc_data;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       vxi = find_vx_info(id);
+       if (!vxi)
+               return -ESRCH;
+
+       vxi->vx_bcaps &= vc_data.bcaps;
+       vxi->vx_ccaps = vx_mask_flags(vxi->vx_ccaps,
+               vc_data.ccaps, vc_data.cmask);
+       put_vx_info(vxi);
+       return 0;
+}
+
+#include <linux/module.h>
+
+EXPORT_SYMBOL_GPL(free_vx_info);
+EXPORT_SYMBOL_GPL(vxlist_lock);
+
diff --git a/kernel/vserver/cvirt.c b/kernel/vserver/cvirt.c
new file mode 100644 (file)
index 0000000..2b5c81e
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ *  linux/kernel/vserver/cvirt.c
+ *
+ *  Virtual Server: Context Virtualization
+ *
+ *  Copyright (C) 2004  Herbert Pötzl
+ *
+ *  V0.01  broken out from limit.c
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/vserver/cvirt.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/switch.h>
+#include <linux/vinline.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+
+void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
+{
+       struct vx_info *vxi = current->vx_info;
+
+       set_normalized_timespec(uptime,
+               uptime->tv_sec - vxi->cvirt.bias_tp.tv_sec,
+               uptime->tv_nsec - vxi->cvirt.bias_tp.tv_nsec);
+       if (!idle)
+               return;
+       set_normalized_timespec(idle,
+               idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
+               idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
+       return;
+}
+
+uint64_t vx_idle_jiffies()
+{
+       return init_task.utime + init_task.stime;
+}
+
diff --git a/kernel/vserver/init.c b/kernel/vserver/init.c
new file mode 100644 (file)
index 0000000..8afd1fc
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ *  linux/kernel/init.c
+ *
+ *  Virtual Server Init
+ *
+ *  Copyright (C) 2004  Herbert Pötzl
+ *
+ *  V0.01  basic structure
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/vserver.h>
+// #include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+int    vserver_register_sysctl(void);
+void   vserver_unregister_sysctl(void);
+
+
+static int __init init_vserver(void)
+{
+       int ret = 0;
+
+       vserver_register_sysctl();
+       return ret;
+}
+
+
+static void __exit exit_vserver(void)
+{
+
+       vserver_unregister_sysctl();
+       return;
+}
+
+
+module_init(init_vserver);
+module_exit(exit_vserver);
+
diff --git a/kernel/vserver/inode.c b/kernel/vserver/inode.c
new file mode 100644 (file)
index 0000000..87e2849
--- /dev/null
@@ -0,0 +1,220 @@
+/*
+ *  linux/kernel/vserver/inode.c
+ *
+ *  Virtual Server: File System Support
+ *
+ *  Copyright (C) 2004  Herbert Pötzl
+ *
+ *  V0.01  separated from vcontext V0.05
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/vinline.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/namei.h>
+#include <linux/vserver/inode.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+
+static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint32_t *mask)
+{
+       if (!in || !in->i_sb)
+               return -ESRCH;
+
+       *flags = IATTR_XID
+               | (IS_BARRIER(in) ? IATTR_BARRIER : 0)
+               | (IS_IUNLINK(in) ? IATTR_IUNLINK : 0)
+               | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0);     
+       *mask = IATTR_IUNLINK | IATTR_IMMUTABLE;
+
+       if (S_ISDIR(in->i_mode))
+               *mask |= IATTR_BARRIER;
+
+       if (in->i_sb->s_flags & MS_TAGXID) {
+               *xid = in->i_xid;
+               *mask |= IATTR_XID;
+       }
+
+       if (in->i_sb->s_magic == PROC_SUPER_MAGIC) {
+               struct proc_dir_entry *entry = PROC_I(in)->pde;
+               
+               // check for specific inodes ?
+               if (entry)
+                       *mask |= IATTR_FLAGS;
+               if (entry)
+                       *flags |= (entry->vx_flags & IATTR_FLAGS);      
+               else
+                       *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
+       }
+       return 0;
+}
+
+int vc_get_iattr(uint32_t id, void __user *data)
+{
+       struct nameidata nd;
+       struct vcmd_ctx_iattr_v1 vc_data;
+       int ret;
+
+       if (!vx_check(0, VX_ADMIN))
+               return -ENOSYS;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       ret = user_path_walk_link(vc_data.name, &nd);
+       if (!ret) {
+               ret = __vc_get_iattr(nd.dentry->d_inode,
+                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
+               path_release(&nd);
+       }
+
+       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+               ret = -EFAULT;
+       return ret;
+}
+
+static int __vc_set_iattr(struct dentry *de, uint32_t *xid, uint32_t *flags, uint32_t *mask)
+{
+       struct inode *in = de->d_inode;
+       int error = 0, is_proc = 0;
+
+       if (!in || !in->i_sb)
+               return -ESRCH;
+
+       is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
+       if ((*mask & IATTR_FLAGS) && !is_proc)
+               return -EINVAL;
+       if ((*mask & IATTR_XID) && !(in->i_sb->s_flags & MS_TAGXID))
+               return -EINVAL;
+
+       down(&in->i_sem);
+       if (*mask & IATTR_XID)
+               in->i_xid = *xid;
+
+       if (*mask & IATTR_FLAGS) {
+               struct proc_dir_entry *entry = PROC_I(in)->pde;
+               unsigned int iflags = PROC_I(in)->vx_flags;
+
+               iflags = (iflags & ~(*mask & IATTR_FLAGS))
+                       | (*flags & IATTR_FLAGS);
+               PROC_I(in)->vx_flags = iflags;
+               if (entry)
+                       entry->vx_flags = iflags;
+       }
+       
+       if (*mask & (IATTR_BARRIER | IATTR_IUNLINK | IATTR_IMMUTABLE)) {
+               struct iattr attr;
+
+               attr.ia_valid = ATTR_ATTR_FLAG;
+               attr.ia_attr_flags =
+                       (IS_IMMUTABLE(in) ? ATTR_FLAG_IMMUTABLE : 0) |
+                       (IS_IUNLINK(in) ? ATTR_FLAG_IUNLINK : 0) |
+                       (IS_BARRIER(in) ? ATTR_FLAG_BARRIER : 0);
+
+               if (*mask & IATTR_IMMUTABLE) {
+                       if (*flags & IATTR_IMMUTABLE)
+                               attr.ia_attr_flags |= ATTR_FLAG_IMMUTABLE;
+                       else
+                               attr.ia_attr_flags &= ~ATTR_FLAG_IMMUTABLE;
+               }
+               if (*mask & IATTR_IUNLINK) {
+                       if (*flags & IATTR_IUNLINK)
+                               attr.ia_attr_flags |= ATTR_FLAG_IUNLINK;
+                       else
+                               attr.ia_attr_flags &= ~ATTR_FLAG_IUNLINK;
+               }
+               if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
+                       if (*flags & IATTR_BARRIER)
+                               attr.ia_attr_flags |= ATTR_FLAG_BARRIER;
+                       else
+                               attr.ia_attr_flags &= ~ATTR_FLAG_BARRIER;
+               }
+               if (in->i_op && in->i_op->setattr)
+                       error = in->i_op->setattr(de, &attr);
+               else {
+                       error = inode_change_ok(in, &attr);
+                       if (!error)
+                               error = inode_setattr(in, &attr);
+               }
+       }
+               
+       mark_inode_dirty(in);
+       up(&in->i_sem);
+       return 0;
+}
+
+int vc_set_iattr(uint32_t id, void __user *data)
+{
+       struct nameidata nd;
+       struct vcmd_ctx_iattr_v1 vc_data;
+       int ret;
+
+       if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE))
+               return -EPERM;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       ret = user_path_walk_link(vc_data.name, &nd);
+       if (!ret) {
+               ret = __vc_set_iattr(nd.dentry,
+                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
+               path_release(&nd);
+       }
+
+       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+               ret = -EFAULT;
+       return ret;
+}
+
+
+#ifdef CONFIG_VSERVER_LEGACY           
+#include <linux/proc_fs.h>
+
+#define PROC_DYNAMIC_FIRST 0xF0000000UL
+
+int vx_proc_ioctl(struct inode * inode, struct file * filp,
+       unsigned int cmd, unsigned long arg)
+{
+       struct proc_dir_entry *entry;
+       int error = 0;
+       int flags;
+
+       if (inode->i_ino < PROC_DYNAMIC_FIRST)
+               return -ENOTTY;
+
+       entry = PROC_I(inode)->pde;
+
+       switch(cmd) {
+       case FIOC_GETXFLG: {
+               /* fixme: if stealth, return -ENOTTY */
+               error = -EPERM;
+               flags = entry->vx_flags;
+               if (capable(CAP_CONTEXT))
+                       error = put_user(flags, (int *) arg);
+               break;
+       }
+       case FIOC_SETXFLG: {
+               /* fixme: if stealth, return -ENOTTY */
+               error = -EPERM;
+               if (!capable(CAP_CONTEXT))
+                       break;
+               error = -EROFS;
+               if (IS_RDONLY(inode))
+                       break;
+               error = -EFAULT;
+               if (get_user(flags, (int *) arg))
+                       break;
+               error = 0;
+               entry->vx_flags = flags;
+               break;
+       }
+       default:
+               return -ENOTTY;
+       }
+       return error;
+}
+#endif
+
diff --git a/kernel/vserver/legacy.c b/kernel/vserver/legacy.c
new file mode 100644 (file)
index 0000000..a620ae3
--- /dev/null
@@ -0,0 +1,161 @@
+/*
+ *  linux/kernel/vserver/legacy.c
+ *
+ *  Virtual Server: Legacy Funtions
+ *
+ *  Copyright (C) 2001-2003  Jacques Gelinas
+ *  Copyright (C) 2003-2004  Herbert Pötzl
+ *
+ *  V0.01  broken out from vcontext.c V0.05
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/vserver/legacy.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/namespace.h>
+#include <linux/vserver.h>
+#include <linux/sched.h>
+#include <linux/namespace.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+
+
+static int vx_set_initpid(struct vx_info *vxi, int pid)
+{
+       if (vxi->vx_initpid)
+               return -EPERM;
+
+       vxi->vx_initpid = pid;
+       return 0;
+}
+
+int vc_new_s_context(uint32_t ctx, void __user *data)
+{
+       int ret = -ENOMEM;
+       struct vcmd_new_s_context_v1 vc_data;
+       struct vx_info *new_vxi;
+
+       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       /* legacy hack, will be removed soon */
+       if (ctx == -2) {
+               /* assign flags and initpid */
+               if (!current->vx_info)
+                       return -EINVAL;
+               ret = 0;
+               if (vc_data.flags & VX_INFO_INIT)
+                       ret = vx_set_initpid(current->vx_info, current->tgid);
+               if (ret == 0) {
+                       /* We keep the same vx_id, but lower the capabilities */
+                       current->vx_info->vx_bcaps &= (~vc_data.remove_cap);
+                       // current->cap_bset &= (~vc_data.remove_cap);
+                       ret = vx_current_xid();
+                       current->vx_info->vx_flags |= vc_data.flags;
+               }
+               return ret;
+       }
+       
+       if (!vx_check(0, VX_ADMIN) ||
+               !capable(CAP_SYS_ADMIN) || vx_flags(VX_INFO_LOCK, 0))
+               return -EPERM;
+
+       /* ugly hack for Spectator */
+       if (ctx == 1) {
+               current->xid = 1;
+               return 0;
+       }
+
+       if (((ctx > MAX_S_CONTEXT) && (ctx != VX_DYNAMIC_ID)) ||
+               (ctx == 0))
+               return -EINVAL;
+               
+       if ((ctx == VX_DYNAMIC_ID) || (ctx < MIN_D_CONTEXT))
+               new_vxi = find_or_create_vx_info(ctx);
+       else
+               new_vxi = find_vx_info(ctx);
+
+       if (!new_vxi)
+               return -EINVAL;
+       new_vxi->vx_flags &= ~(VXF_STATE_SETUP|VXF_STATE_INIT);
+       
+       ret = vx_migrate_task(current, new_vxi);
+       if (ret == 0) {
+               current->vx_info->vx_bcaps &= (~vc_data.remove_cap);
+               // current->cap_bset &= (~vc_data.remove_cap);
+               new_vxi->vx_flags |= vc_data.flags;
+               if (vc_data.flags & VX_INFO_INIT)
+                       vx_set_initpid(new_vxi, current->tgid);
+               if (vc_data.flags & VX_INFO_NAMESPACE)
+                       vx_set_namespace(new_vxi,
+                               current->namespace, current->fs);
+               if (vc_data.flags & VX_INFO_NPROC)
+                       new_vxi->limit.rlim[RLIMIT_NPROC] =
+                               current->rlim[RLIMIT_NPROC].rlim_max;
+               ret = new_vxi->vx_id;
+       }
+       put_vx_info(new_vxi);
+       return ret;
+}
+
+
+
+/*  set ipv4 root (syscall) */
+
+int vc_set_ipv4root(uint32_t nbip, void __user *data)
+{
+       int i, err = -EPERM;
+       struct vcmd_set_ipv4root_v3 vc_data;
+       struct nx_info *new_nxi, *nxi = current->nx_info;
+
+       if (nbip < 0 || nbip > NB_IPV4ROOT)
+               return -EINVAL;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       if (!nxi || nxi->ipv4[0] == 0 || capable(CAP_NET_ADMIN))
+               // We are allowed to change everything
+               err = 0;
+       else if (nxi) {
+               int found = 0;
+               
+               // We are allowed to select a subset of the currently
+               // installed IP numbers. No new one allowed
+               // We can't change the broadcast address though
+               for (i=0; i<nbip; i++) {
+                       int j;
+                       __u32 nxip = vc_data.nx_mask_pair[i].ip;
+                       for (j=0; j<nxi->nbipv4; j++) {
+                               if (nxip == nxi->ipv4[j]) {
+                                       found++;
+                                       break;
+                               }
+                       }
+               }
+               if ((found == nbip) &&
+                       (vc_data.broadcast == nxi->v4_bcast))
+                       err = 0;
+       }
+       if (err)
+               return err;
+
+       new_nxi = create_nx_info();
+       if (!new_nxi)
+               return -EINVAL;
+
+       new_nxi->nbipv4 = nbip;
+       for (i=0; i<nbip; i++) {
+               new_nxi->ipv4[i] = vc_data.nx_mask_pair[i].ip;
+               new_nxi->mask[i] = vc_data.nx_mask_pair[i].mask;
+       }
+       new_nxi->v4_bcast = vc_data.broadcast;
+       current->nx_info = new_nxi;
+       current->nid = new_nxi->nx_id;
+       put_nx_info(nxi);
+       return 0;
+}
+
+
diff --git a/kernel/vserver/limit.c b/kernel/vserver/limit.c
new file mode 100644 (file)
index 0000000..5bd2fdc
--- /dev/null
@@ -0,0 +1,149 @@
+/*
+ *  linux/kernel/vserver/limit.c
+ *
+ *  Virtual Server: Context Limits
+ *
+ *  Copyright (C) 2004  Herbert Pötzl
+ *
+ *  V0.01  broken out from vcontext V0.05
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/vserver/limit.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/switch.h>
+#include <linux/vinline.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+
+static int is_valid_rlimit(int id)
+{
+       int valid = 0;
+
+       switch (id) {
+               case RLIMIT_NPROC:
+               case RLIMIT_AS:
+               case RLIMIT_RSS:
+               case RLIMIT_MEMLOCK:
+               case RLIMIT_NOFILE:
+                       valid = 1;
+                       break;
+       }
+       return valid;
+}
+
+static inline uint64_t vc_get_rlim(struct vx_info *vxi, int id)
+{
+       unsigned long limit;
+
+       limit = vxi->limit.rlim[id];
+       if (limit == RLIM_INFINITY)
+               return CRLIM_INFINITY;
+       return limit;   
+}
+
+int vc_get_rlimit(uint32_t id, void __user *data)
+{
+       struct vx_info *vxi;
+       struct vcmd_ctx_rlimit_v0 vc_data;
+
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+       if (!is_valid_rlimit(vc_data.id))
+               return -ENOTSUPP;
+               
+       vxi = find_vx_info(id);
+       if (!vxi)
+               return -ESRCH;
+
+       vc_data.maximum = vc_get_rlim(vxi, vc_data.id);
+       vc_data.minimum = CRLIM_UNSET;
+       vc_data.softlimit = CRLIM_UNSET;
+       put_vx_info(vxi);
+
+       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+               return -EFAULT;
+       return 0;
+}
+
+int vc_set_rlimit(uint32_t id, void __user *data)
+{
+       struct vx_info *vxi;
+       struct vcmd_ctx_rlimit_v0 vc_data;
+
+       if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
+               return -EPERM;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+       if (!is_valid_rlimit(vc_data.id))
+               return -ENOTSUPP;
+
+       vxi = find_vx_info(id);
+       if (!vxi)
+               return -ESRCH;
+
+       if (vc_data.maximum != CRLIM_KEEP)
+               vxi->limit.rlim[vc_data.id] = vc_data.maximum;
+       printk("setting [%d] = %d\n", vc_data.id, (int)vc_data.maximum);
+       put_vx_info(vxi);
+
+       return 0;
+}
+
+int vc_get_rlimit_mask(uint32_t id, void __user *data)
+{
+       static struct vcmd_ctx_rlimit_mask_v0 mask = {
+                       /* minimum */
+               0
+               ,       /* softlimit */
+               0
+               ,       /* maximum */
+               (1 << RLIMIT_NPROC) |
+               (1 << RLIMIT_NOFILE) |
+               (1 << RLIMIT_MEMLOCK) |
+               (1 << RLIMIT_AS) |
+               (1 << RLIMIT_RSS)
+               };
+
+       if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
+               return -EPERM;
+       if (copy_to_user(data, &mask, sizeof(mask)))
+                return -EFAULT;
+       return 0;
+}
+
+
+void vx_vsi_meminfo(struct sysinfo *val)
+{
+       struct vx_info *vxi = current->vx_info;
+       unsigned long v;
+
+       v = vxi->limit.rlim[RLIMIT_RSS];
+       if (v != RLIM_INFINITY)
+               val->totalram = min(val->totalram, v);
+       v = atomic_read(&vxi->limit.res[RLIMIT_RSS]);
+       val->freeram = (v < val->totalram) ? val->totalram - v : 0;
+       val->bufferram = 0;
+        val->totalhigh = 0;
+        val->freehigh = 0;
+       return;
+}
+
+void vx_vsi_swapinfo(struct sysinfo *val)
+{
+       struct vx_info *vxi = current->vx_info;
+       unsigned long w,v;
+
+       v = vxi->limit.rlim[RLIMIT_RSS];
+       w = vxi->limit.rlim[RLIMIT_AS];
+       if (w != RLIM_INFINITY)
+               val->totalswap = min(val->totalswap, w -
+               ((v != RLIM_INFINITY) ? v : 0));
+       w = atomic_read(&vxi->limit.res[RLIMIT_AS]);
+       val->freeswap = (w < val->totalswap) ? val->totalswap - w : 0;
+       return;
+}
+
diff --git a/kernel/vserver/namespace.c b/kernel/vserver/namespace.c
new file mode 100644 (file)
index 0000000..2c76c6f
--- /dev/null
@@ -0,0 +1,195 @@
+/*
+ *  linux/kernel/vserver/namespace.c
+ *
+ *  Virtual Server: Context Namespace Support
+ *
+ *  Copyright (C) 2003-2004  Herbert Pötzl
+ *
+ *  V0.01  broken out from context.c 0.07
+ *  V0.02  added task locking for namespace
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/utsname.h>
+#include <linux/vserver/namespace.h>
+#include <linux/vinline.h>
+#include <linux/namespace.h>
+#include <linux/dcache.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+
+/* virtual host info names */
+
+static char * vx_vhi_name(struct vx_info *vxi, int id)
+{
+       switch (id) {
+               case VHIN_CONTEXT:
+                       return vxi->vx_name;
+               case VHIN_SYSNAME:
+                       return vxi->cvirt.utsname.sysname;
+               case VHIN_NODENAME:
+                       return vxi->cvirt.utsname.nodename;
+               case VHIN_RELEASE:
+                       return vxi->cvirt.utsname.release;
+               case VHIN_VERSION:
+                       return vxi->cvirt.utsname.version;
+               case VHIN_MACHINE:
+                       return vxi->cvirt.utsname.machine;
+               case VHIN_DOMAINNAME:
+                       return vxi->cvirt.utsname.domainname;
+               default:
+                       return NULL;
+       }
+       return NULL;
+}
+
+int vc_set_vhi_name(uint32_t id, void __user *data)
+{
+       struct vx_info *vxi;
+       struct vcmd_vx_vhi_name_v0 vc_data;
+       char *name;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+       
+       vxi = find_vx_info(id);
+       if (!vxi)
+               return -ESRCH;
+       
+       name = vx_vhi_name(vxi, vc_data.field);
+       if (name)
+               memcpy(name, vc_data.name, 65);
+       put_vx_info(vxi);
+       return (name ? 0 : -EFAULT);
+}
+
+int vc_get_vhi_name(uint32_t id, void __user *data)
+{
+       struct vx_info *vxi;
+       struct vcmd_vx_vhi_name_v0 vc_data;
+       char *name;
+
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       vxi = find_vx_info(id);
+       if (!vxi)
+               return -ESRCH;
+
+       name = vx_vhi_name(vxi, vc_data.field);
+       if (!name)
+               goto out_put;
+                       
+       memcpy(vc_data.name, name, 65);
+       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+               return -EFAULT;
+out_put:
+       put_vx_info(vxi);
+       return (name ? 0 : -EFAULT);
+}
+
+/* namespace functions */
+
+#include <linux/namespace.h>
+
+int vx_set_namespace(struct vx_info *vxi, struct namespace *ns, struct fs_struct *fs)
+{
+       struct fs_struct *fs_copy;
+
+       if (vxi->vx_namespace)
+               return -EPERM;
+       if (!ns || !fs)
+               return -EINVAL;
+
+       fs_copy = copy_fs_struct(fs);
+       if (!fs_copy)
+               return -ENOMEM;
+
+       get_namespace(ns);
+       vxi->vx_namespace = ns;
+       vxi->vx_fs = fs_copy;
+       return 0;
+}
+
+int vc_enter_namespace(uint32_t id, void *data)
+{
+       struct vx_info *vxi;
+       struct fs_struct *old_fs, *fs;
+       struct namespace *old_ns;
+       int ret = 0;
+
+       if (!vx_check(0, VX_ADMIN))
+               return -ENOSYS;
+
+       vxi = find_vx_info(id);
+       if (!vxi)
+               return -ESRCH;
+
+       ret = -EINVAL;
+       if (!vxi->vx_namespace)
+               goto out_put;
+
+       ret = -ENOMEM;
+       fs = copy_fs_struct(vxi->vx_fs);
+       if (!fs)
+               goto out_put;
+
+       ret = 0;
+       task_lock(current);
+       old_ns = current->namespace;
+       old_fs = current->fs;
+       get_namespace(vxi->vx_namespace);
+       current->namespace = vxi->vx_namespace; 
+       current->fs = fs;
+       task_unlock(current);
+
+       put_namespace(old_ns);
+       put_fs_struct(old_fs);
+out_put:
+       put_vx_info(vxi);
+       return ret;
+}
+
+int vc_cleanup_namespace(uint32_t id, void *data)
+{
+       down_write(&current->namespace->sem);
+       // spin_lock(&dcache_lock);
+       spin_lock(&vfsmount_lock);
+       umount_unused(current->namespace->root, current->fs);
+       spin_unlock(&vfsmount_lock);
+       // spin_unlock(&dcache_lock);
+       up_write(&current->namespace->sem);
+       return 0;
+}
+
+int vc_set_namespace(uint32_t id, void __user *data)
+{
+       struct fs_struct *fs;
+       struct namespace *ns;
+       struct vx_info *vxi;
+       int ret;
+
+       if (vx_check(0, VX_ADMIN|VX_WATCH))
+               return -ENOSYS;
+
+       task_lock(current);
+       vxi = get_vx_info(current->vx_info);
+       fs = current->fs;
+       atomic_inc(&fs->count);
+       ns = current->namespace;
+       get_namespace(current->namespace);
+       task_unlock(current);
+
+       ret = vx_set_namespace(vxi, ns, fs);
+
+       put_namespace(ns);
+       put_fs_struct(fs);
+       put_vx_info(vxi);
+       return ret;
+}
+
diff --git a/kernel/vserver/network.c b/kernel/vserver/network.c
new file mode 100644 (file)
index 0000000..479a19b
--- /dev/null
@@ -0,0 +1,513 @@
+/*
+ *  linux/kernel/vserver/network.c
+ *
+ *  Virtual Server: Network Support
+ *
+ *  Copyright (C) 2003-2004  Herbert Pötzl
+ *
+ *  V0.01  broken out from vcontext V0.05
+ *  V0.02  cleaned up implementation
+ *  V0.03  added equiv nx commands
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/vserver/network.h>
+#include <linux/ninline.h>
+
+#include <asm/errno.h>
+
+
+LIST_HEAD(nx_infos);
+
+spinlock_t nxlist_lock
+       __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+
+
+/*
+ *     struct nx_info allocation and deallocation
+ */
+
+static struct nx_info *alloc_nx_info(void)
+{
+       struct nx_info *new = NULL;
+       
+       nxdprintk("alloc_nx_info()\n");
+       /* would this benefit from a slab cache? */
+       new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
+       if (!new)
+               return 0;
+       
+       memset (new, 0, sizeof(struct nx_info));
+       /* rest of init goes here */
+       
+       nxdprintk("alloc_nx_info() = %p\n", new);
+       return new;
+}
+
+void free_nx_info(struct nx_info *nxi)
+{
+       nxdprintk("free_nx_info(%p)\n", nxi);
+       kfree(nxi);
+}
+
+struct nx_info *create_nx_info(void)
+{
+       struct nx_info *new;
+       static int gnid = 1;
+       
+       nxdprintk("create_nx_info()\n");
+       if (!(new = alloc_nx_info()))
+               return 0;
+
+       spin_lock(&nxlist_lock);
+
+       /* new ip info */
+       atomic_set(&new->nx_refcount, 1);
+       new->nx_id = gnid++;
+       list_add(&new->nx_list, &nx_infos);
+
+       spin_unlock(&nxlist_lock);
+       return new;
+}
+
+
+/*
+ *     struct nx_info search by id
+ *     assumes nxlist_lock is held
+ */
+
+static __inline__ struct nx_info *__find_nx_info(int id)
+{
+       struct nx_info *nxi;
+
+       list_for_each_entry(nxi, &nx_infos, nx_list)
+               if (nxi->nx_id == id)
+                       return nxi;
+       return 0;
+}
+
+
+/*
+ *     struct nx_info ref stuff
+ */
+
+struct nx_info *find_nx_info(int id)
+{
+       struct nx_info *nxi;
+       
+       if (id < 0) {
+               nxi = current->nx_info;
+               get_nx_info(nxi);
+       } else {
+               spin_lock(&nxlist_lock);
+               if ((nxi = __find_nx_info(id)))
+                       get_nx_info(nxi);
+               spin_unlock(&nxlist_lock);
+       }
+       return nxi;
+}
+
+/*
+ *      verify that id is a valid nid
+ */
+
+int nx_info_id_valid(int id)
+{
+       int valid;
+       
+       spin_lock(&nxlist_lock);
+       valid = (__find_nx_info(id) != NULL);
+       spin_unlock(&nxlist_lock);
+       return valid;
+}
+
+
+/*
+ *     dynamic context id ...
+ */
+
+static __inline__ nid_t __nx_dynamic_id(void)
+{
+       static nid_t seq = MAX_N_CONTEXT;
+       nid_t barrier = seq;
+       
+       do {
+               if (++seq > MAX_N_CONTEXT)
+                       seq = MIN_D_CONTEXT;
+               if (!__find_nx_info(seq))
+                       return seq;
+       } while (barrier != seq);
+       return 0;
+}
+
+static struct nx_info * __foc_nx_info(int id, int *err)
+{
+       struct nx_info *new, *nxi = NULL;
+       
+       nxdprintk("foc_nx_info(%d)\n", id);
+       // if (!(new = alloc_nx_info(id))) {
+       if (!(new = alloc_nx_info())) {
+               *err = -ENOMEM;
+               return NULL;
+       }
+
+       spin_lock(&nxlist_lock);
+
+       /* dynamic context requested */
+       if (id == IP_DYNAMIC_ID) {
+               id = __nx_dynamic_id();
+               if (!id) {
+                       printk(KERN_ERR "no dynamic context available.\n");
+                       goto out_unlock;
+               }
+               new->nx_id = id;
+       }
+       /* existing context requested */
+       else if ((nxi = __find_nx_info(id))) {
+               /* context in setup is not available */
+               if (nxi->nx_flags & VXF_STATE_SETUP) {
+                       nxdprintk("foc_nx_info(%d) = %p (not available)\n", id, nxi);
+                       nxi = NULL;
+                       *err = -EBUSY;
+               } else {
+                       nxdprintk("foc_nx_info(%d) = %p (found)\n", id, nxi);
+                       get_nx_info(nxi);
+                       *err = 0;
+               }
+               goto out_unlock;
+       }
+
+       /* new context requested */
+       nxdprintk("foc_nx_info(%d) = %p (new)\n", id, new);
+       atomic_set(&new->nx_refcount, 1);
+       list_add(&new->nx_list, &nx_infos);
+       nxi = new, new = NULL;
+       *err = 1;
+
+out_unlock:
+       spin_unlock(&nxlist_lock);
+       if (new)
+               free_nx_info(new);
+       return nxi;
+}
+
+
+struct nx_info *find_or_create_nx_info(int id)
+{
+       int err;
+
+       return __foc_nx_info(id, &err);
+}
+
+/*
+ *     migrate task to new network
+ */
+
+int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
+{
+       struct nx_info *old_nxi = task_get_nx_info(p);
+       int ret = 0;
+       
+       if (!p || !nxi)
+               BUG();
+
+       nxdprintk("nx_migrate_task(%p,%p[#%d.%d)\n", p, nxi,
+               nxi->nx_id, atomic_read(&nxi->nx_refcount));
+       if (old_nxi == nxi)
+               goto out;
+
+       task_lock(p);
+       set_nx_info(&p->nx_info, nxi);
+       p->nid = nxi->nx_id;
+       task_unlock(p);
+
+       put_nx_info(old_nxi);
+out:
+       put_nx_info(old_nxi);
+       return ret;
+}
+
+
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+
+static inline int __addr_in_nx_info(u32 addr, struct nx_info *nxi)
+{
+       int i, nbip;
+
+       nbip = nxi->nbipv4;
+       for (i=0; i<nbip; i++)
+               if (nxi->ipv4[i] == addr)
+                       return 1;
+       return 0;
+}
+
+int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
+{
+       if (!nxi)
+               return 1;
+       
+       return __addr_in_nx_info(ifa->ifa_address, nxi);
+}
+
+int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
+{
+       struct in_device *in_dev = __in_dev_get(dev);
+       struct in_ifaddr **ifap = NULL;
+       struct in_ifaddr *ifa = NULL;
+
+       if (!nxi)
+               return 1;
+       if (!in_dev)
+               return 0;
+
+       for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
+               ifap = &ifa->ifa_next) {
+               if (__addr_in_nx_info(ifa->ifa_address, nxi))
+                       return 1;
+       }
+       return 0;
+}
+
+
+
+
+/* vserver syscall commands below here */
+
+/* taks nid and nx_info functions */
+
+#include <asm/uaccess.h>
+
+
+int vc_task_nid(uint32_t id, void __user *data)
+{
+        nid_t nid;
+
+        if (id) {
+                struct task_struct *tsk;
+
+                if (!vx_check(0, VX_ADMIN|VX_WATCH))
+                        return -EPERM;
+
+                read_lock(&tasklist_lock);
+                tsk = find_task_by_pid(id);
+                nid = (tsk) ? tsk->nid : -ESRCH;
+                read_unlock(&tasklist_lock);
+        }
+        else
+                nid = current->nid;
+        return nid;
+}
+
+
+int vc_nx_info(uint32_t id, void __user *data)
+{
+       struct nx_info *nxi;
+       struct vcmd_nx_info_v0 vc_data;
+
+       if (!vx_check(0, VX_ADMIN))
+               return -ENOSYS;
+       if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
+               return -EPERM;
+
+       nxi = find_nx_info(id);
+       if (!nxi)
+               return -ESRCH;
+
+       vc_data.nid = nxi->nx_id;
+       put_nx_info(nxi);
+
+       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+               return -EFAULT;
+       return 0;
+}
+
+
+/* network functions */
+
+int vc_net_create(uint32_t nid, void __user *data)
+{
+        // int ret = -ENOMEM;
+       struct nx_info *new_nxi;
+       int ret;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if ((nid >= MIN_D_CONTEXT) && (nid != VX_DYNAMIC_ID))
+               return -EINVAL;
+
+       if (nid < 1)
+               return -EINVAL;
+
+       new_nxi = __foc_nx_info(nid, &ret);
+       if (!new_nxi)
+               return ret;
+       if (!(new_nxi->nx_flags & VXF_STATE_SETUP)) {
+               ret = -EEXIST;
+               goto out_put;
+       }
+
+       ret = new_nxi->nx_id;
+       nx_migrate_task(current, new_nxi);
+out_put:
+       put_nx_info(new_nxi);
+       return ret;
+}
+
+
+int vc_net_migrate(uint32_t id, void __user *data)
+{
+       struct nx_info *nxi;
+       
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       nxi = find_nx_info(id);
+       if (!nxi)
+               return -ESRCH;
+       nx_migrate_task(current, nxi);
+       put_nx_info(nxi);
+       return 0;
+}
+
+int vc_net_add(uint32_t id, void __user *data)
+{
+       struct nx_info *nxi;
+       struct vcmd_net_nx_v0 vc_data;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       nxi = find_nx_info(id);
+       if (!nxi)
+               return -ESRCH;
+
+       // add ip to net context here
+       put_nx_info(nxi);
+       return 0;
+}
+
+int vc_net_remove(uint32_t id, void __user *data)
+{
+       struct nx_info *nxi;
+       struct vcmd_net_nx_v0 vc_data;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       nxi = find_nx_info(id);
+       if (!nxi)
+               return -ESRCH;
+
+       // rem ip from net context here
+       put_nx_info(nxi);
+       return 0;
+}
+
+
+
+int vc_get_nflags(uint32_t id, void __user *data)
+{
+       struct nx_info *nxi;
+       struct vcmd_net_flags_v0 vc_data;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       nxi = find_nx_info(id);
+       if (!nxi)
+               return -ESRCH;
+
+       vc_data.flagword = nxi->nx_flags;
+
+       // vc_data.mask = ~0UL;
+       /* special STATE flag handling */
+       vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, IPF_ONE_TIME);
+
+       put_nx_info(nxi);
+
+       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+               return -EFAULT;
+       return 0;
+}
+
+int vc_set_nflags(uint32_t id, void __user *data)
+{
+       struct nx_info *nxi;
+       struct vcmd_net_flags_v0 vc_data;
+       uint64_t mask, trigger;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       nxi = find_nx_info(id);
+       if (!nxi)
+               return -ESRCH;
+
+       /* special STATE flag handling */
+       mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, IPF_ONE_TIME);
+       trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
+       // if (trigger & IPF_STATE_SETUP)
+
+       nxi->nx_flags = vx_mask_flags(nxi->nx_flags,
+               vc_data.flagword, mask);
+       put_nx_info(nxi);
+       return 0;
+}
+
+int vc_get_ncaps(uint32_t id, void __user *data)
+{
+       struct nx_info *nxi;
+       struct vcmd_net_caps_v0 vc_data;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       nxi = find_nx_info(id);
+       if (!nxi)
+               return -ESRCH;
+
+       vc_data.ncaps = nxi->nx_ncaps;
+       vc_data.cmask = ~0UL;
+       put_nx_info(nxi);
+
+       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+               return -EFAULT;
+       return 0;
+}
+
+int vc_set_ncaps(uint32_t id, void __user *data)
+{
+       struct nx_info *nxi;
+       struct vcmd_net_caps_v0 vc_data;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       nxi = find_nx_info(id);
+       if (!nxi)
+               return -ESRCH;
+
+       nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps,
+               vc_data.ncaps, vc_data.cmask);
+       put_nx_info(nxi);
+       return 0;
+}
+
+
+#include <linux/module.h>
+
+EXPORT_SYMBOL_GPL(free_nx_info);
+EXPORT_SYMBOL_GPL(nxlist_lock);
+
diff --git a/kernel/vserver/proc.c b/kernel/vserver/proc.c
new file mode 100644 (file)
index 0000000..42bc182
--- /dev/null
@@ -0,0 +1,905 @@
+/*
+ *  linux/kernel/vserver/proc.c
+ *
+ *  Virtual Context Support
+ *
+ *  Copyright (C) 2003-2004  Herbert Pötzl
+ *
+ *  V0.01  basic structure
+ *  V0.02  adaptation vs1.3.0
+ *  V0.03  proc permissions
+ *  V0.04  locking/generic
+ *  V0.05  next generation procfs
+ *  V0.06  inode validation
+ *  V0.07  generic rewrite vid
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/vserver.h>
+
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+
+static struct proc_dir_entry *proc_virtual;
+
+static struct proc_dir_entry *proc_vnet;
+
+
+enum vid_directory_inos {
+       PROC_XID_INO = 32,
+       PROC_XID_INFO,
+       PROC_XID_STATUS,
+       PROC_XID_LIMIT,
+       PROC_XID_SCHED,
+       PROC_XID_CVIRT,
+       PROC_XID_CACCT,
+
+       PROC_NID_INO = 64,
+       PROC_NID_INFO,
+       PROC_NID_STATUS,
+};
+
+#define        PROC_VID_MASK   0x60
+
+
+/* first the actual feeds */
+
+
+static int proc_virtual_info(int vid, char *buffer)
+{
+       return sprintf(buffer,
+               "VCIVersion:\t%04x:%04x\n"
+               "VCISyscall:\t%d\n"
+               ,VCI_VERSION >> 16
+               ,VCI_VERSION & 0xFFFF
+               ,__NR_vserver
+               );
+}
+
+
+int proc_xid_info (int vid, char *buffer)
+{
+       struct vx_info *vxi;
+       int length;
+
+       vxi = find_vx_info(vid);
+       if (!vxi)
+               return 0;
+       length = sprintf(buffer,
+               "ID:\t%d\n"
+               "Info:\t%p\n"
+               "Init:\t%d\n"
+               ,vxi->vx_id
+               ,vxi
+               ,vxi->vx_initpid
+               );
+       put_vx_info(vxi);
+       return length;
+}
+
+int proc_xid_status (int vid, char *buffer)
+{
+       struct vx_info *vxi;
+       int length;
+
+       vxi = find_vx_info(vid);
+       if (!vxi)
+               return 0;
+       length = sprintf(buffer,
+               "RefC:\t%d\n"           
+               "Flags:\t%016llx\n"
+               "BCaps:\t%016llx\n"
+               "CCaps:\t%016llx\n"
+               "Ticks:\t%d\n"          
+               ,atomic_read(&vxi->vx_refcount)
+               ,vxi->vx_flags
+               ,vxi->vx_bcaps
+               ,vxi->vx_ccaps
+               ,atomic_read(&vxi->limit.ticks)
+               );
+       put_vx_info(vxi);
+       return length;
+}
+
+int proc_xid_limit (int vid, char *buffer)
+{
+       struct vx_info *vxi;
+       int length;
+
+       vxi = find_vx_info(vid);
+       if (!vxi)
+               return 0;
+       length = vx_info_proc_limit(&vxi->limit, buffer);
+       put_vx_info(vxi);
+       return length;
+}
+
+int proc_xid_sched (int vid, char *buffer)
+{
+       struct vx_info *vxi;
+       int length;
+
+       vxi = find_vx_info(vid);
+       if (!vxi)
+               return 0;
+       length = vx_info_proc_sched(&vxi->sched, buffer);
+       put_vx_info(vxi);
+       return length;
+}
+
+int proc_xid_cvirt (int vid, char *buffer)
+{
+       struct vx_info *vxi;
+       int length;
+
+       vxi = find_vx_info(vid);
+       if (!vxi)
+               return 0;
+       length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
+       put_vx_info(vxi);
+       return length;
+}
+
+int proc_xid_cacct (int vid, char *buffer)
+{
+       struct vx_info *vxi;
+       int length;
+
+       vxi = find_vx_info(vid);
+       if (!vxi)
+               return 0;
+       length = vx_info_proc_cacct(&vxi->cacct, buffer);
+       put_vx_info(vxi);
+       return length;
+}
+
+
+static int proc_vnet_info(int vid, char *buffer)
+{
+       return sprintf(buffer,
+               "VCIVersion:\t%04x:%04x\n"
+               "VCISyscall:\t%d\n"
+               ,VCI_VERSION >> 16
+               ,VCI_VERSION & 0xFFFF
+               ,__NR_vserver
+               );
+}
+
+#define        atoquad(a) \
+       (((a)>>0) & 0xff), (((a)>>8) & 0xff), \
+       (((a)>>16) & 0xff), (((a)>>24) & 0xff)
+
+int proc_nid_info (int vid, char *buffer)
+{
+       struct nx_info *nxi;
+       int length, i;
+
+       nxi = find_nx_info(vid);
+       if (!nxi)
+               return 0;
+       length = sprintf(buffer,
+               "ID:\t%d\n"
+               "Info:\t%p\n"
+               ,nxi->nx_id
+               ,nxi
+               );
+       for (i=0; i<nxi->nbipv4; i++) {
+               length += sprintf(buffer + length,
+                       "%d:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i,
+                       atoquad(nxi->ipv4[i]),
+                       atoquad(nxi->mask[i]));
+       }
+       put_nx_info(nxi);
+       return length;
+}
+
+int proc_nid_status (int vid, char *buffer)
+{
+       struct nx_info *nxi;
+       int length;
+
+       nxi = find_nx_info(vid);
+       if (!nxi)
+               return 0;
+       length = sprintf(buffer,
+               "RefC:\t%d\n"           
+               ,atomic_read(&nxi->nx_refcount)
+               );
+       put_nx_info(nxi);
+       return length;
+}
+
+/* here the inode helpers */
+
+
+
+#define fake_ino(id,ino) (((id)<<16)|(ino))
+
+#define        inode_vid(i)    ((i)->i_ino >> 16)
+#define        inode_type(i)   ((i)->i_ino & 0xFFFF)
+
+#define MAX_MULBY10    ((~0U-9)/10)
+
+
+static struct inode *proc_vid_make_inode(struct super_block * sb,
+       int vid, int ino)
+{
+       struct inode *inode = new_inode(sb);
+
+       if (!inode)
+               goto out;
+
+       inode->i_mtime = inode->i_atime =
+               inode->i_ctime = CURRENT_TIME;
+       inode->i_ino = fake_ino(vid, ino);
+
+       inode->i_uid = 0;
+       inode->i_gid = 0;
+       // inode->i_xid = xid;
+out:
+       return inode;
+}
+
+static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd)
+{
+       struct inode * inode = dentry->d_inode;
+       int vid, valid=0;
+
+       vid = inode_vid(inode);
+       switch (inode_type(inode) & PROC_VID_MASK) {
+               case PROC_XID_INO:
+                       valid = vx_info_id_valid(vid);
+                       break;
+               case PROC_NID_INO:
+                       valid = nx_info_id_valid(vid);
+                       break;
+       }       
+       if (valid)
+               return 1;
+       d_drop(dentry);
+       return 0;
+}
+
+/*
+static int proc_vid_delete_dentry(struct dentry * dentry)
+{
+        return 1;
+}
+*/
+
+
+#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
+
+static ssize_t proc_vid_info_read(struct file * file, char * buf,
+                         size_t count, loff_t *ppos)
+{
+       struct inode * inode = file->f_dentry->d_inode;
+       unsigned long page;
+       ssize_t length;
+       ssize_t end;
+       int vid;
+
+       if (count > PROC_BLOCK_SIZE)
+               count = PROC_BLOCK_SIZE;
+       if (!(page = __get_free_page(GFP_KERNEL)))
+               return -ENOMEM;
+
+       vid = inode_vid(inode);
+       length = PROC_I(inode)->op.proc_vid_read(vid, (char*)page);
+
+       if (length < 0) {
+               free_page(page);
+               return length;
+       }
+       /* Static 4kB (or whatever) block capacity */
+       if (*ppos >= length) {
+               free_page(page);
+               return 0;
+       }
+       if (count + *ppos > length)
+               count = length - *ppos;
+       end = count + *ppos;
+       copy_to_user(buf, (char *) page + *ppos, count);
+       *ppos = end;
+       free_page(page);
+       return count;
+}
+
+
+
+
+
+/* here comes the lower level (vid) */
+
+static struct file_operations proc_vid_info_file_operations = {
+       read:           proc_vid_info_read,
+};
+
+static struct dentry_operations proc_vid_dentry_operations = {
+       d_revalidate:   proc_vid_revalidate,
+//     d_delete:       proc_vid_delete_dentry,
+};
+
+
+struct vid_entry {
+       int type;
+       int len;
+       char *name;
+       mode_t mode;
+};
+
+#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
+
+static struct vid_entry vx_base_stuff[] = {
+       E(PROC_XID_INFO,        "info",         S_IFREG|S_IRUGO),
+       E(PROC_XID_STATUS,      "status",       S_IFREG|S_IRUGO),
+       E(PROC_XID_LIMIT,       "limit",        S_IFREG|S_IRUGO),
+       E(PROC_XID_SCHED,       "sched",        S_IFREG|S_IRUGO),
+       E(PROC_XID_CVIRT,       "cvirt",        S_IFREG|S_IRUGO),
+       E(PROC_XID_CACCT,       "cacct",        S_IFREG|S_IRUGO),
+       {0,0,NULL,0}
+};
+
+static struct vid_entry vn_base_stuff[] = {
+       E(PROC_NID_INFO,        "info",         S_IFREG|S_IRUGO),
+       E(PROC_NID_STATUS,      "status",       S_IFREG|S_IRUGO),
+       {0,0,NULL,0}
+};
+
+
+
+static struct dentry *proc_vid_lookup(struct inode *dir,
+       struct dentry *dentry, struct nameidata *nd)
+{
+       struct inode *inode;
+       struct vid_entry *p;
+       int error;
+
+       error = -ENOENT;
+       inode = NULL;
+
+       switch (inode_type(dir)) {
+               case PROC_XID_INO:
+                       p = vx_base_stuff;      
+                       break;
+               case PROC_NID_INO:
+                       p = vn_base_stuff;      
+                       break;
+               default:
+                       goto out;
+       }
+
+       for (; p->name; p++) {
+               if (p->len != dentry->d_name.len)
+                       continue;
+               if (!memcmp(dentry->d_name.name, p->name, p->len))
+                       break;
+       }
+       if (!p->name)
+               goto out;
+
+       error = -EINVAL;
+       inode = proc_vid_make_inode(dir->i_sb, inode_vid(dir), p->type);
+       if (!inode)
+               goto out;
+
+       switch(p->type) {
+               case PROC_XID_INFO:
+                       PROC_I(inode)->op.proc_vid_read = proc_xid_info;
+                       break;
+               case PROC_XID_STATUS:
+                       PROC_I(inode)->op.proc_vid_read = proc_xid_status;
+                       break;
+               case PROC_XID_LIMIT:
+                       PROC_I(inode)->op.proc_vid_read = proc_xid_limit;
+                       break;
+               case PROC_XID_SCHED:
+                       PROC_I(inode)->op.proc_vid_read = proc_xid_sched;
+                       break;
+               case PROC_XID_CVIRT:
+                       PROC_I(inode)->op.proc_vid_read = proc_xid_cvirt;
+                       break;
+               case PROC_XID_CACCT:
+                       PROC_I(inode)->op.proc_vid_read = proc_xid_cacct;
+                       break;
+
+               case PROC_NID_INFO:
+                       PROC_I(inode)->op.proc_vid_read = proc_nid_info;
+                       break;
+               case PROC_NID_STATUS:
+                       PROC_I(inode)->op.proc_vid_read = proc_nid_status;
+                       break;
+               
+               default:
+                       printk("procfs: impossible type (%d)",p->type);
+                       iput(inode);
+                       return ERR_PTR(-EINVAL);
+       }
+       inode->i_mode = p->mode;
+//     inode->i_op = &proc_vid_info_inode_operations;
+       inode->i_fop = &proc_vid_info_file_operations;
+       inode->i_nlink = 1;
+       inode->i_flags|=S_IMMUTABLE;
+       
+       dentry->d_op = &proc_vid_dentry_operations;
+       d_add(dentry, inode);
+       error = 0;
+out:
+       return ERR_PTR(error);
+}
+
+
+static int proc_vid_readdir(struct file * filp,
+       void * dirent, filldir_t filldir)
+{
+       int i, size;
+       struct inode *inode = filp->f_dentry->d_inode;
+       struct vid_entry *p;
+       
+       i = filp->f_pos;
+       switch (i) {
+               case 0:
+                       if (filldir(dirent, ".", 1, i,
+                               inode->i_ino, DT_DIR) < 0)
+                               return 0;
+                       i++;
+                       filp->f_pos++;
+                       /* fall through */
+               case 1:
+                       if (filldir(dirent, "..", 2, i,
+                               PROC_ROOT_INO, DT_DIR) < 0)
+                               return 0;
+                       i++;
+                       filp->f_pos++;
+                       /* fall through */
+               default:
+                       i -= 2;
+                       switch (inode_type(inode)) {
+                               case PROC_XID_INO:
+                                       size = sizeof(vx_base_stuff);
+                                       p = vx_base_stuff + i;  
+                                       break;
+                               case PROC_NID_INO:
+                                       size = sizeof(vn_base_stuff);
+                                       p = vn_base_stuff + i;  
+                                       break;
+                               default:
+                                       return 1;
+                       }
+                       if (i >= size/sizeof(struct vid_entry))
+                               return 1;
+                       while (p->name) {
+                               if (filldir(dirent, p->name, p->len,
+                                       filp->f_pos, fake_ino(inode_vid(inode),
+                                       p->type), p->mode >> 12) < 0)
+                                       return 0;
+                               filp->f_pos++;
+                               p++;
+                       }
+       }
+       return 1;
+}
+
+
+
+
+/* now the upper level (virtual) */
+
+static struct file_operations proc_vid_file_operations = {
+       read:           generic_read_dir,
+       readdir:        proc_vid_readdir,
+};
+
+static struct inode_operations proc_vid_inode_operations = {
+       lookup:         proc_vid_lookup,
+};
+
+
+
+static __inline__ int atovid(const char *str, int len)
+{
+       int vid, c;
+
+       vid = 0;
+       while (len-- > 0) {
+               c = *str - '0';
+               str++;
+               if (c > 9)
+                       return -1;
+               if (vid >= MAX_MULBY10)
+                       return -1;
+               vid *= 10;
+               vid += c;
+               if (!vid)
+                       return -1;
+       }
+       return vid;
+}
+
+
+struct dentry *proc_virtual_lookup(struct inode *dir,
+       struct dentry * dentry, struct nameidata *nd)
+{
+       int xid, len, ret;
+       struct vx_info *vxi;
+       const char *name;
+       struct inode *inode;
+
+       name = dentry->d_name.name;
+       len = dentry->d_name.len;
+       ret = -ENOMEM;
+
+       if (len == 7 && !memcmp(name, "current", 7)) {
+               inode = new_inode(dir->i_sb);
+               if (!inode)
+                       goto out;
+               inode->i_mtime = inode->i_atime =
+                       inode->i_ctime = CURRENT_TIME;
+               inode->i_ino = fake_ino(1, PROC_XID_INO);
+               inode->i_mode = S_IFLNK|S_IRWXUGO;
+               inode->i_uid = inode->i_gid = 0;
+               inode->i_size = 64;
+//             inode->i_op = &proc_current_inode_operations;
+               d_add(dentry, inode);
+               return NULL;
+       }
+       if (len == 4 && !memcmp(name, "info", 4)) {
+               inode = proc_vid_make_inode(dir->i_sb, 0, PROC_XID_INFO);
+               if (!inode)
+                       goto out;
+               inode->i_fop = &proc_vid_info_file_operations;
+               PROC_I(inode)->op.proc_vid_read = proc_virtual_info;
+               inode->i_mode = S_IFREG|S_IRUGO;
+//             inode->i_size = 64;
+//             inode->i_op = &proc_current_inode_operations;
+               d_add(dentry, inode);
+               return NULL;
+       }
+
+       ret = -ENOENT;
+       xid = atovid(name, len);
+       if (xid < 0)
+               goto out;
+       vxi = find_vx_info(xid);
+       if (!vxi)
+               goto out;
+
+       inode = NULL;
+       if (vx_check(xid, VX_ADMIN|VX_WATCH|VX_IDENT))
+               inode = proc_vid_make_inode(dir->i_sb,
+                       vxi->vx_id, PROC_XID_INO);
+       if (!inode)
+               goto out_release;
+
+       inode->i_mode = S_IFDIR|S_IRUGO;
+       inode->i_op = &proc_vid_inode_operations;
+       inode->i_fop = &proc_vid_file_operations;
+       inode->i_nlink = 2;
+       inode->i_flags|=S_IMMUTABLE;
+
+       dentry->d_op = &proc_vid_dentry_operations;
+       d_add(dentry, inode);
+       ret = 0;
+       
+out_release:
+       put_vx_info(vxi);
+out:
+       return ERR_PTR(ret);
+}
+
+
+struct dentry *proc_vnet_lookup(struct inode *dir,
+       struct dentry * dentry, struct nameidata *nd)
+{
+       int nid, len, ret;
+       struct nx_info *nxi;
+       const char *name;
+       struct inode *inode;
+
+       name = dentry->d_name.name;
+       len = dentry->d_name.len;
+       ret = -ENOMEM;
+       if (len == 7 && !memcmp(name, "current", 7)) {
+               inode = new_inode(dir->i_sb);
+               if (!inode)
+                       goto out;
+               inode->i_mtime = inode->i_atime =
+                       inode->i_ctime = CURRENT_TIME;
+               inode->i_ino = fake_ino(1, PROC_NID_INO);
+               inode->i_mode = S_IFLNK|S_IRWXUGO;
+               inode->i_uid = inode->i_gid = 0;
+               inode->i_size = 64;
+//             inode->i_op = &proc_current_inode_operations;
+               d_add(dentry, inode);
+               return NULL;
+       }
+       if (len == 4 && !memcmp(name, "info", 4)) {
+               inode = proc_vid_make_inode(dir->i_sb, 0, PROC_NID_INFO);
+               if (!inode)
+                       goto out;
+               inode->i_fop = &proc_vid_info_file_operations;
+               PROC_I(inode)->op.proc_vid_read = proc_vnet_info;
+               inode->i_mode = S_IFREG|S_IRUGO;
+//             inode->i_size = 64;
+//             inode->i_op = &proc_current_inode_operations;
+               d_add(dentry, inode);
+               return NULL;
+       }
+
+       ret = -ENOENT;
+       nid = atovid(name, len);
+       if (nid < 0)
+               goto out;
+       nxi = find_nx_info(nid);
+       if (!nxi)
+               goto out;
+
+       inode = NULL;
+       if (1)
+               inode = proc_vid_make_inode(dir->i_sb,
+                       nxi->nx_id, PROC_NID_INO);
+       if (!inode)
+               goto out_release;
+
+       inode->i_mode = S_IFDIR|S_IRUGO;
+       inode->i_op = &proc_vid_inode_operations;
+       inode->i_fop = &proc_vid_file_operations;
+       inode->i_nlink = 2;
+       inode->i_flags|=S_IMMUTABLE;
+
+       dentry->d_op = &proc_vid_dentry_operations;
+       d_add(dentry, inode);
+       ret = 0;
+       
+out_release:
+       put_nx_info(nxi);
+out:
+       return ERR_PTR(ret);
+}
+
+
+
+
+#define PROC_NUMBUF 10
+#define PROC_MAXVIDS 32
+
+
+static int get_xid_list(int index, unsigned int *xids)
+{
+       struct vx_info *p;
+       int nr_xids = 0;
+
+       index--;
+       spin_lock(&vxlist_lock);
+       list_for_each_entry(p, &vx_infos, vx_list) {
+               int xid = p->vx_id;
+
+               if (--index >= 0)
+                       continue;
+               xids[nr_xids] = xid;
+               if (++nr_xids >= PROC_MAXVIDS)
+                       break;
+       }
+       spin_unlock(&vxlist_lock);
+       return nr_xids;
+}
+
+int proc_virtual_readdir(struct file * filp,
+       void * dirent, filldir_t filldir)
+{
+       unsigned int xid_array[PROC_MAXVIDS];
+       char buf[PROC_NUMBUF];
+       unsigned int nr = filp->f_pos-3;
+       unsigned int nr_xids, i;
+       ino_t ino;
+
+       switch ((long)filp->f_pos) {
+               case 0:
+                       ino = fake_ino(0, PROC_XID_INO);
+                       if (filldir(dirent, ".", 1,
+                               filp->f_pos, ino, DT_DIR) < 0)
+                               return 0;
+                       filp->f_pos++;
+                       /* fall through */
+               case 1:
+                       ino = filp->f_dentry->d_parent->d_inode->i_ino;
+                       if (filldir(dirent, "..", 2,
+                               filp->f_pos, ino, DT_DIR) < 0)
+                               return 0;
+                       filp->f_pos++;
+                       /* fall through */
+               case 2:
+                       ino = fake_ino(0, PROC_XID_INFO);
+                       if (filldir(dirent, "info", 4,
+                               filp->f_pos, ino, DT_LNK) < 0)
+                               return 0;
+                       filp->f_pos++;
+                       /* fall through */
+               case 3:
+                       if (current->xid > 1) {
+                               ino = fake_ino(1, PROC_XID_INO);
+                               if (filldir(dirent, "current", 7,
+                                       filp->f_pos, ino, DT_LNK) < 0)
+                                       return 0;
+                       }
+                       filp->f_pos++;
+       }
+
+       nr_xids = get_xid_list(nr, xid_array);
+
+       for (i = 0; i < nr_xids; i++) {
+               int xid = xid_array[i];
+               ino_t ino = fake_ino(xid, PROC_XID_INO);
+               unsigned long j = PROC_NUMBUF;
+
+               do buf[--j] = '0' + (xid % 10); while (xid/=10);
+
+               if (filldir(dirent, buf+j, PROC_NUMBUF-j,
+                       filp->f_pos, ino, DT_DIR) < 0)
+                       break;
+               filp->f_pos++;
+       }
+       return 0;
+}
+
+
+static struct file_operations proc_virtual_dir_operations = {
+       read:           generic_read_dir,
+       readdir:        proc_virtual_readdir,
+};
+
+static struct inode_operations proc_virtual_dir_inode_operations = {
+       lookup:         proc_virtual_lookup,
+};
+
+
+
+static int get_nid_list(int index, unsigned int *nids)
+{
+       struct nx_info *p;
+       int nr_nids = 0;
+
+       index--;
+       spin_lock(&nxlist_lock);
+       list_for_each_entry(p, &nx_infos, nx_list) {
+               int nid = p->nx_id;
+
+               if (--index >= 0)
+                       continue;
+               nids[nr_nids] = nid;
+               if (++nr_nids >= PROC_MAXVIDS)
+                       break;
+       }
+       spin_unlock(&nxlist_lock);
+       return nr_nids;
+}
+
+int proc_vnet_readdir(struct file * filp,
+       void * dirent, filldir_t filldir)
+{
+       unsigned int nid_array[PROC_MAXVIDS];
+       char buf[PROC_NUMBUF];
+       unsigned int nr = filp->f_pos-3;
+       unsigned int nr_nids, i;
+       ino_t ino;
+
+       switch ((long)filp->f_pos) {
+               case 0:
+                       ino = fake_ino(0, PROC_NID_INO);
+                       if (filldir(dirent, ".", 1,
+                               filp->f_pos, ino, DT_DIR) < 0)
+                               return 0;
+                       filp->f_pos++;
+                       /* fall through */
+               case 1:
+                       ino = filp->f_dentry->d_parent->d_inode->i_ino;
+                       if (filldir(dirent, "..", 2,
+                               filp->f_pos, ino, DT_DIR) < 0)
+                               return 0;
+                       filp->f_pos++;
+                       /* fall through */
+               case 2:
+                       ino = fake_ino(0, PROC_NID_INFO);
+                       if (filldir(dirent, "info", 4,
+                               filp->f_pos, ino, DT_LNK) < 0)
+                               return 0;
+                       filp->f_pos++;
+                       /* fall through */
+               case 3:
+                       if (current->xid > 1) {
+                               ino = fake_ino(1, PROC_NID_INO);
+                               if (filldir(dirent, "current", 7,
+                                       filp->f_pos, ino, DT_LNK) < 0)
+                                       return 0;
+                       }
+                       filp->f_pos++;
+       }
+
+       nr_nids = get_nid_list(nr, nid_array);
+
+       for (i = 0; i < nr_nids; i++) {
+               int nid = nid_array[i];
+               ino_t ino = fake_ino(nid, PROC_NID_INO);
+               unsigned long j = PROC_NUMBUF;
+
+               do buf[--j] = '0' + (nid % 10); while (nid/=10);
+
+               if (filldir(dirent, buf+j, PROC_NUMBUF-j,
+                       filp->f_pos, ino, DT_DIR) < 0)
+                       break;
+               filp->f_pos++;
+       }
+       return 0;
+}
+
+
+static struct file_operations proc_vnet_dir_operations = {
+       read:           generic_read_dir,
+       readdir:        proc_vnet_readdir,
+};
+
+static struct inode_operations proc_vnet_dir_inode_operations = {
+       lookup:         proc_vnet_lookup,
+};
+
+
+
+void proc_vx_init(void)
+{
+       struct proc_dir_entry *ent;
+
+       ent = proc_mkdir("virtual", 0);
+       if (ent) {
+               ent->proc_fops = &proc_virtual_dir_operations;
+               ent->proc_iops = &proc_virtual_dir_inode_operations;
+       }
+       proc_virtual = ent;
+
+       ent = proc_mkdir("vnet", 0);
+       if (ent) {
+               ent->proc_fops = &proc_vnet_dir_operations;
+               ent->proc_iops = &proc_vnet_dir_inode_operations;
+       }
+       proc_vnet = ent;
+}
+
+
+
+
+/* per pid info */
+
+
+char *task_vx_info(struct task_struct *p, char *buffer)
+{
+       return buffer + sprintf(buffer,
+               "XID:\t%d\n"
+               ,p->xid);
+}
+
+int proc_pid_vx_info(struct task_struct *p, char *buffer)
+{
+       char * orig = buffer;
+
+       buffer = task_vx_info(p, buffer);
+       return buffer - orig;
+}
+
+char *task_nx_info(struct task_struct *p, char *buffer)
+{
+       return buffer + sprintf(buffer,
+               "NID:\t%d\n"
+               ,p->nid);
+}
+
+int proc_pid_nx_info(struct task_struct *p, char *buffer)
+{
+       char * orig = buffer;
+
+       buffer = task_nx_info(p, buffer);
+       return buffer - orig;
+}
+
diff --git a/kernel/vserver/sched.c b/kernel/vserver/sched.c
new file mode 100644 (file)
index 0000000..a75195a
--- /dev/null
@@ -0,0 +1,162 @@
+/*
+ *  linux/kernel/vserver/sched.c
+ *
+ *  Virtual Server: Scheduler Support
+ *
+ *  Copyright (C) 2004  Herbert Pötzl
+ *
+ *  V0.01  adapted Sam Vilains version to 2.6.3
+ *  V0.02  removed legacy interface
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/vinline.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/sched.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+
+/*
+ * recalculate the context's scheduling tokens
+ *
+ * ret > 0 : number of tokens available
+ * ret = 0 : context is paused
+ * ret < 0 : number of jiffies until new tokens arrive
+ *
+ */
+int vx_tokens_recalc(struct vx_info *vxi)
+{
+       long delta, tokens = 0;
+
+       if (__vx_flags(vxi->vx_flags, VXF_SCHED_PAUSE, 0))
+               /* we are paused */
+               return 0;
+
+       delta = jiffies - vxi->sched.jiffies;
+
+       if (delta >= vxi->sched.interval) {
+               /* lockdown scheduler info */
+               spin_lock(&vxi->sched.tokens_lock);
+
+               /* calc integral token part */
+               delta = jiffies - vxi->sched.jiffies;
+               tokens = delta / vxi->sched.interval;
+               delta = tokens * vxi->sched.interval;
+               tokens *= vxi->sched.fill_rate;
+
+               atomic_add(tokens, &vxi->sched.tokens);
+               vxi->sched.jiffies += delta;
+               tokens = atomic_read(&vxi->sched.tokens);
+       
+               if (tokens > vxi->sched.tokens_max) {
+                       tokens = vxi->sched.tokens_max;
+                       atomic_set(&vxi->sched.tokens, tokens);
+               }
+               spin_unlock(&vxi->sched.tokens_lock);
+       } else {
+               /* no new tokens */
+               if ((tokens = vx_tokens_avail(vxi)) < vxi->sched.tokens_min) {
+                       /* enough tokens will be available in */
+                       if (vxi->sched.tokens_min == 0)
+                               return delta - vxi->sched.interval;
+                       return delta - vxi->sched.interval *
+                               vxi->sched.tokens_min / vxi->sched.fill_rate;
+               }
+       }
+       /* we have some tokens left */
+       return tokens;
+}
+
+/*
+ * effective_prio - return the priority that is based on the static
+ * priority but is modified by bonuses/penalties.
+ *
+ * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
+ * into a -4 ... 0 ... +4 bonus/penalty range.
+ *
+ * Additionally, we scale another amount based on the number of
+ * CPU tokens currently held by the context, if the process is
+ * part of a context (and the appropriate SCHED flag is set).
+ * This ranges from -5 ... 0 ... +15, quadratically.
+ *
+ * So, the total bonus is -9 .. 0 .. +19
+ * We use ~50% of the full 0...39 priority range so that:
+ *
+ * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
+ * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
+ *    unless that context is far exceeding its CPU allocation.
+ *
+ * Both properties are important to certain workloads.
+ */
+int effective_vavavoom(task_t *p, int max_prio)
+{
+       struct vx_info *vxi = p->vx_info;
+       int vavavoom, max;
+
+       /* lots of tokens = lots of vavavoom
+        *      no tokens = no vavavoom      */
+       if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
+               max = vxi->sched.tokens_max;
+               vavavoom = max - vavavoom;
+               max = max * max;
+               vavavoom = max_prio * VAVAVOOM_RATIO / 100
+                       * (vavavoom*vavavoom - (max >> 2)) / max;
+               /*  alternative, geometric mapping
+               vavavoom = -( MAX_USER_PRIO*VAVAVOOM_RATIO/100 * vavavoom
+                       / vxi->sched.tokens_max -
+                       MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */
+       } else
+               vavavoom = 0;
+       /* vavavoom = ( MAX_USER_PRIO*VAVAVOOM_RATIO/100*tokens_left(p) -
+               MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */
+
+       return vavavoom;
+}
+
+
+int vc_set_sched(uint32_t xid, void __user *data)
+{
+       struct vcmd_set_sched_v2 vc_data;
+       struct vx_info *vxi;
+
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+       
+       vxi = find_vx_info(xid);
+       if (!vxi)
+               return -EINVAL;
+
+       spin_lock(&vxi->sched.tokens_lock);
+
+       if (vc_data.interval != SCHED_KEEP)
+               vxi->sched.interval = vc_data.interval;
+       if (vc_data.fill_rate != SCHED_KEEP)
+               vxi->sched.fill_rate = vc_data.fill_rate;
+       if (vc_data.tokens_min != SCHED_KEEP)
+               vxi->sched.tokens_min = vc_data.tokens_min;
+       if (vc_data.tokens_max != SCHED_KEEP)
+               vxi->sched.tokens_max = vc_data.tokens_max;
+       if (vc_data.tokens != SCHED_KEEP)
+               atomic_set(&vxi->sched.tokens, vc_data.tokens);
+
+       /* Sanity check the resultant values */
+       if (vxi->sched.fill_rate <= 0)
+               vxi->sched.fill_rate = 1;
+       if (vxi->sched.interval <= 0)
+               vxi->sched.interval = HZ;
+       if (vxi->sched.tokens_max == 0)
+               vxi->sched.tokens_max = 1;
+       if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
+               atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
+       if (vxi->sched.tokens_min > vxi->sched.tokens_max)
+               vxi->sched.tokens_min = vxi->sched.tokens_max;
+
+       spin_unlock(&vxi->sched.tokens_lock);
+       put_vx_info(vxi);
+       return 0;
+}
+
diff --git a/kernel/vserver/signal.c b/kernel/vserver/signal.c
new file mode 100644 (file)
index 0000000..464ea1b
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ *  linux/kernel/vserver/signal.c
+ *
+ *  Virtual Server: Signal Support
+ *
+ *  Copyright (C) 2003-2004  Herbert Pötzl
+ *
+ *  V0.01  broken out from vcontext V0.05
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+#include <linux/vinline.h>
+#include <linux/vserver/signal.h>
+
+
+int vc_ctx_kill(uint32_t id, void __user *data)
+{
+       int retval, count=0;
+       struct vcmd_ctx_kill_v0 vc_data;
+       struct siginfo info;
+       struct task_struct *p;
+       struct vx_info *vxi;
+
+       if (!vx_check(0, VX_ADMIN))
+               return -ENOSYS;
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+       
+       info.si_signo = vc_data.sig;
+       info.si_errno = 0;
+       info.si_code = SI_USER;
+       info.si_pid = current->pid;
+       info.si_uid = current->uid;
+
+       vxi = find_vx_info(id);
+       if (!vxi)
+               return -ESRCH;
+
+       retval = -ESRCH;
+       read_lock(&tasklist_lock);
+       switch (vc_data.pid) {
+       case -1:
+       case  0:
+               for_each_process(p) {
+                       int err = 0;
+
+                       if (vx_task_xid(p) != id || p->pid <= 1 ||
+                               (vc_data.pid && vxi->vx_initpid == p->pid) ||
+                               !thread_group_leader(p))
+                               continue;
+
+                       err = send_sig_info(vc_data.sig, &info, p);
+                       ++count;
+                       if (err != -EPERM)
+                               retval = err;
+               }
+               break;
+               
+       default:
+       p = find_task_by_pid(vc_data.pid);
+               if (p) {
+                       if (!thread_group_leader(p)) {
+                               struct task_struct *tg;
+                       
+                               tg = find_task_by_pid(p->tgid);
+                               if (tg)
+                                       p = tg;
+                       }
+                       if ((id == -1) || (vx_task_xid(p) == id))
+                               retval = send_sig_info(vc_data.sig, &info, p);
+               }
+               break;
+       }
+       read_unlock(&tasklist_lock);
+       put_vx_info(vxi);
+       return retval;
+}
+
+
diff --git a/kernel/vserver/switch.c b/kernel/vserver/switch.c
new file mode 100644 (file)
index 0000000..90fee14
--- /dev/null
@@ -0,0 +1,170 @@
+/*
+ *  linux/kernel/vserver/switch.c
+ *
+ *  Virtual Server: Syscall Switch
+ *
+ *  Copyright (C) 2003-2004  Herbert Pötzl
+ *
+ *  V0.01  syscall switch
+ *  V0.02  added signal to context
+ *  V0.03  added rlimit functions
+ *  V0.04  added iattr, task/xid functions
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/errno.h>
+
+#include <linux/vserver/switch.h>
+#include <linux/vinline.h>
+
+
+static inline int
+vc_get_version(uint32_t id)
+{
+       return VCI_VERSION;
+}
+
+
+#include <linux/vserver/legacy.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/network.h>
+#include <linux/vserver/namespace.h>
+#include <linux/vserver/sched.h>
+#include <linux/vserver/limit.h>
+#include <linux/vserver/inode.h>
+#include <linux/vserver/signal.h>
+
+
+extern unsigned int vx_debug_switch;
+
+
+extern asmlinkage long
+sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
+{
+
+       if (vx_debug_switch)
+               printk( "vc: VCMD_%02d_%d[%d], %d\n",
+                       VC_CATEGORY(cmd), VC_COMMAND(cmd),
+                       VC_VERSION(cmd), id);
+
+       switch (cmd) {
+       case VCMD_get_version:
+               return vc_get_version(id);
+
+#ifdef CONFIG_VSERVER_LEGACY           
+       case VCMD_new_s_context:
+               return vc_new_s_context(id, data);
+       case VCMD_set_ipv4root:
+               return vc_set_ipv4root(id, data);
+#endif
+
+       case VCMD_task_xid:
+               return vc_task_xid(id, data);
+       case VCMD_vx_info:
+               return vc_vx_info(id, data);
+
+       case VCMD_task_nid:
+               return vc_task_nid(id, data);
+       case VCMD_nx_info:
+               return vc_nx_info(id, data);
+
+       case VCMD_set_namespace:
+               return vc_set_namespace(id, data);
+       case VCMD_cleanup_namespace:
+               return vc_cleanup_namespace(id, data);
+       }
+
+       /* those are allowed while in setup too */
+       if (!vx_check(0, VX_ADMIN|VX_WATCH) &&
+               !vx_flags(VXF_STATE_SETUP,0))
+               return -EPERM;
+
+#ifdef CONFIG_VSERVER_LEGACY
+       switch (cmd) {
+       case VCMD_set_cflags:
+       case VCMD_set_ccaps:
+               if (vx_check(0, VX_WATCH))
+                       return 0;
+       }
+#endif
+
+       switch (cmd) {
+       case VCMD_get_rlimit:
+               return vc_get_rlimit(id, data);
+       case VCMD_set_rlimit:
+               return vc_set_rlimit(id, data);
+       case VCMD_get_rlimit_mask:
+               return vc_get_rlimit_mask(id, data);
+               
+       case VCMD_vx_get_vhi_name:
+               return vc_get_vhi_name(id, data);
+       case VCMD_vx_set_vhi_name:
+               return vc_set_vhi_name(id, data);
+
+       case VCMD_set_cflags:
+               return vc_set_cflags(id, data);
+       case VCMD_get_cflags:
+               return vc_get_cflags(id, data);
+
+       case VCMD_set_ccaps:
+               return vc_set_ccaps(id, data);
+       case VCMD_get_ccaps:
+               return vc_get_ccaps(id, data);
+
+       case VCMD_set_nflags:
+               return vc_set_nflags(id, data);
+       case VCMD_get_nflags:
+               return vc_get_nflags(id, data);
+
+       case VCMD_set_ncaps:
+               return vc_set_ncaps(id, data);
+       case VCMD_get_ncaps:
+               return vc_get_ncaps(id, data);
+
+       case VCMD_set_sched:
+               return vc_set_sched(id, data);
+       }
+
+       /* below here only with VX_ADMIN */
+       if (!vx_check(0, VX_ADMIN|VX_WATCH))
+               return -EPERM;
+
+       switch (cmd) {
+       case VCMD_ctx_kill:
+               return vc_ctx_kill(id, data);
+
+#ifdef CONFIG_VSERVER_LEGACY           
+       case VCMD_create_context:
+               return vc_ctx_create(id, data);
+#endif
+
+       case VCMD_get_iattr:
+               return vc_get_iattr(id, data);
+       case VCMD_set_iattr:
+               return vc_set_iattr(id, data);
+
+       case VCMD_enter_namespace:
+               return vc_enter_namespace(id, data);
+
+       case VCMD_ctx_create:
+#ifdef CONFIG_VSERVER_LEGACY           
+               if (id == 1) {
+                       current->xid = 1;
+                       return 1;
+               }
+#endif
+               return vc_ctx_create(id, data);
+       case VCMD_ctx_migrate:
+               return vc_ctx_migrate(id, data);
+
+       case VCMD_net_create:
+               return vc_net_create(id, data);
+       case VCMD_net_migrate:
+               return vc_net_migrate(id, data);
+
+       }
+       return -ENOSYS;
+}
+
diff --git a/kernel/vserver/sysctl.c b/kernel/vserver/sysctl.c
new file mode 100644 (file)
index 0000000..562fc0e
--- /dev/null
@@ -0,0 +1,150 @@
+/*
+ *  linux/kernel/sysctl.c
+ *
+ *  Virtual Context Support
+ *
+ *  Copyright (C) 2004  Herbert Pötzl
+ *
+ *  V0.01  basic structure
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/vserver.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/sysctl.h>
+#include <linux/fs.h>
+
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+
+#define CTL_VSERVER    4242    /* unused? */
+
+enum {
+        CTL_DEBUG_SWITCH = 1,
+        CTL_DEBUG_LIMIT,
+};
+
+
+unsigned int vx_debug_switch = 0;
+unsigned int vx_debug_limit = 0;
+
+
+static struct ctl_table_header *vserver_table_header;
+static ctl_table vserver_table[];
+
+
+void vserver_register_sysctl(void)
+{
+       if (!vserver_table_header) {
+               vserver_table_header = register_sysctl_table(vserver_table, 1);
+#ifdef CONFIG_PROC_FS
+//             if (vserver_table[0].de)
+//                     vserver_table[0].de->owner = THIS_MODULE;
+#endif
+       }
+                       
+}
+
+void vserver_unregister_sysctl(void)
+{
+       if (vserver_table_header) {
+               unregister_sysctl_table(vserver_table_header);
+               vserver_table_header = NULL;
+       }
+}
+
+
+static int proc_dodebug(ctl_table *table, int write,
+       struct file *file, void *buffer, size_t *lenp)
+{
+       char            tmpbuf[20], *p, c;
+       unsigned int    value;
+       size_t          left, len;
+
+       if ((file->f_pos && !write) || !*lenp) {
+               *lenp = 0;
+               return 0;
+       }
+
+       left = *lenp;
+
+       if (write) {
+               if (!access_ok(VERIFY_READ, buffer, left))
+                       return -EFAULT;
+               p = (char *) buffer;
+               while (left && __get_user(c, p) >= 0 && isspace(c))
+                       left--, p++;
+               if (!left)
+                       goto done;
+
+               if (left > sizeof(tmpbuf) - 1)
+                       return -EINVAL;
+               if (copy_from_user(tmpbuf, p, left))
+                       return -EFAULT;
+               tmpbuf[left] = '\0';
+
+               for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
+                       value = 10 * value + (*p - '0');
+               if (*p && !isspace(*p))
+                       return -EINVAL;
+               while (left && isspace(*p))
+                       left--, p++;
+               *(unsigned int *) table->data = value;
+       } else {
+               if (!access_ok(VERIFY_WRITE, buffer, left))
+                       return -EFAULT;
+               len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data);
+               if (len > left)
+                       len = left;
+               if (__copy_to_user(buffer, tmpbuf, len))
+                       return -EFAULT;
+               if ((left -= len) > 0) {
+                       if (put_user('\n', (char *)buffer + len))
+                               return -EFAULT;
+                       left--;
+               }
+       }
+
+done:
+       *lenp -= left;
+       file->f_pos += *lenp;
+       return 0;
+}
+       
+
+
+static ctl_table debug_table[] = {
+        {
+                .ctl_name       = CTL_DEBUG_SWITCH,
+                .procname       = "debug_switch",
+                .data           = &vx_debug_switch,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dodebug
+        },
+        {
+                .ctl_name       = CTL_DEBUG_LIMIT,
+                .procname       = "debug_limit",
+                .data           = &vx_debug_limit,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dodebug
+        },
+        { .ctl_name = 0 }
+};
+
+static ctl_table vserver_table[] = {
+        {
+                .ctl_name       = CTL_VSERVER,
+                .procname       = "vserver",
+                .mode           = 0555,
+                .child          = debug_table
+        },
+        { .ctl_name = 0 }
+};
+
diff --git a/net/bluetooth/syms.c b/net/bluetooth/syms.c
new file mode 100644 (file)
index 0000000..20d8101
--- /dev/null
@@ -0,0 +1,84 @@
+/* 
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2000-2001 Qualcomm Incorporated
+
+   Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/*
+ * Bluetooth symbols.
+ *
+ * $Id: syms.c,v 1.1 2002/03/08 21:06:59 maxk Exp $
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+/* HCI Core */
+EXPORT_SYMBOL(hci_alloc_dev);
+EXPORT_SYMBOL(hci_free_dev);
+EXPORT_SYMBOL(hci_register_dev);
+EXPORT_SYMBOL(hci_unregister_dev);
+EXPORT_SYMBOL(hci_suspend_dev);
+EXPORT_SYMBOL(hci_resume_dev);
+
+EXPORT_SYMBOL(hci_register_proto);
+EXPORT_SYMBOL(hci_unregister_proto);
+
+EXPORT_SYMBOL(hci_get_route);
+EXPORT_SYMBOL(hci_connect);
+EXPORT_SYMBOL(hci_dev_get);
+EXPORT_SYMBOL(hci_conn_auth);
+EXPORT_SYMBOL(hci_conn_encrypt);
+
+EXPORT_SYMBOL(hci_send_acl);
+EXPORT_SYMBOL(hci_send_sco);
+EXPORT_SYMBOL(hci_send_cmd);
+EXPORT_SYMBOL(hci_si_event);
+
+/* Bluetooth lib */
+EXPORT_SYMBOL(bt_dump);
+EXPORT_SYMBOL(baswap);
+EXPORT_SYMBOL(batostr);
+EXPORT_SYMBOL(bt_err);
+
+/* Bluetooth sockets */
+EXPORT_SYMBOL(bt_sock_register);
+EXPORT_SYMBOL(bt_sock_unregister);
+EXPORT_SYMBOL(bt_sock_alloc);
+EXPORT_SYMBOL(bt_sock_link);
+EXPORT_SYMBOL(bt_sock_unlink);
+EXPORT_SYMBOL(bt_sock_recvmsg);
+EXPORT_SYMBOL(bt_sock_poll);
+EXPORT_SYMBOL(bt_accept_enqueue);
+EXPORT_SYMBOL(bt_accept_dequeue);
+EXPORT_SYMBOL(bt_sock_wait_state);
+
+EXPORT_SYMBOL(proc_bt);
diff --git a/sound/pci/ice1712/prodigy.c b/sound/pci/ice1712/prodigy.c
new file mode 100644 (file)
index 0000000..eee13e6
--- /dev/null
@@ -0,0 +1,663 @@
+/*
+ *   ALSA driver for ICEnsemble VT1724 (Envy24HT)
+ *
+ *   Lowlevel functions for AudioTrak Prodigy 7.1 (and possibly 192) cards
+ *      Copyright (c) 2003 Dimitromanolakis Apostolos <apostol@cs.utoronto.ca>
+ *     based on the aureon.c code (c) 2003 by Takashi Iwai <tiwai@suse.de>
+ *
+ *   version 0.82: Stable / not all features work yet (no communication with AC97 secondary)
+ *       added 64x/128x oversampling switch (should be 64x only for 96khz)
+ *       fixed some recording labels (still need to check the rest)
+ *       recording is working probably thanks to correct wm8770 initialization
+ *
+ *   version 0.5: Initial release:
+ *           working: analog output, mixer, headphone amplifier switch
+ *       not working: prety much everything else, at least i could verify that
+ *                    we have no digital output, no capture, pretty bad clicks and poops
+ *                    on mixer switch and other coll stuff.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *
+ * NOTES:
+ *
+ *
+ *
+ * - we reuse the akm4xxx_t record for storing the wm8770 codec data.
+ *   both wm and akm codecs are pretty similar, so we can integrate
+ *   both controls in the future, once if wm codecs are reused in
+ *   many boards.
+ *
+ * - writing over SPI is implemented but reading is not yet.
+ *   the SPDIF-in channel status, etc. can be read from CS chip.
+ *
+ * - DAC digital volumes are not implemented in the mixer.
+ *   if they show better response than DAC analog volumes, we can use them
+ *   instead.
+ *
+ * - Prodigy boards are equipped with AC97 STAC9744 chip , too.  it's used to do
+ *   the analog mixing but not easily controllable (it's not connected
+ *   directly from envy24ht chip).  so let's leave it as it is.
+ *
+ */
+
+#define REVISION 0.82b
+
+#include <sound/driver.h>
+#include <asm/io.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <sound/core.h>
+
+#include "ice1712.h"
+#include "envy24ht.h"
+#include "prodigy.h"
+
+
+static int prodigy_set_headphone_amp(ice1712_t *ice, int enable)
+{
+       unsigned int tmp, tmp2;
+
+       tmp2 = tmp = snd_ice1712_gpio_read(ice);
+       if (enable)
+               tmp |= PRODIGY_HP_AMP_EN;
+       else
+               tmp &= ~ PRODIGY_HP_AMP_EN;
+       if (tmp != tmp2) {
+               snd_ice1712_gpio_write(ice, tmp);
+               return 1;
+       }
+       return 0;
+}
+
+
+static int prodigy_get_headphone_amp(ice1712_t *ice)
+{
+       unsigned int tmp = snd_ice1712_gpio_read(ice);
+
+       return ( tmp & PRODIGY_HP_AMP_EN )!= 0;
+}
+
+
+/*
+ * write data in the SPI mode
+ */
+static void prodigy_spi_write(ice1712_t *ice, unsigned int cs, unsigned int data, int bits)
+{
+       unsigned int tmp;
+       int i;
+
+       tmp = snd_ice1712_gpio_read(ice);
+
+       snd_ice1712_gpio_set_mask(ice, ~(PRODIGY_WM_RW|PRODIGY_WM_DATA|PRODIGY_WM_CLK|
+                                        PRODIGY_WM_CS|PRODIGY_CS8415_CS|PRODIGY_HP_AMP_EN));
+       tmp |= PRODIGY_WM_RW;
+       tmp &= ~cs;
+       snd_ice1712_gpio_write(ice, tmp);
+       udelay(1);
+
+       for (i = bits - 1; i >= 0; i--) {
+               tmp &= ~PRODIGY_WM_CLK;
+               snd_ice1712_gpio_write(ice, tmp);
+               udelay(1);
+               if (data & (1 << i))
+                       tmp |= PRODIGY_WM_DATA;
+               else
+                       tmp &= ~PRODIGY_WM_DATA;
+               snd_ice1712_gpio_write(ice, tmp);
+               udelay(1);
+               tmp |= PRODIGY_WM_CLK;
+               snd_ice1712_gpio_write(ice, tmp);
+               udelay(1);
+       }
+
+       tmp &= ~PRODIGY_WM_CLK;
+       tmp |= cs;
+       snd_ice1712_gpio_write(ice, tmp);
+       udelay(1);
+       tmp |= PRODIGY_WM_CLK;
+       snd_ice1712_gpio_write(ice, tmp);
+       udelay(1);
+}
+
+
+/*
+ * get the current register value of WM codec
+ */
+static unsigned short wm_get(ice1712_t *ice, int reg)
+{
+       reg <<= 1;
+       return ((unsigned short)ice->akm[0].images[reg] << 8) |
+               ice->akm[0].images[reg + 1];
+}
+
+/*
+ * set the register value of WM codec and remember it
+ */
+static void wm_put(ice1712_t *ice, int reg, unsigned short val)
+{
+       prodigy_spi_write(ice, PRODIGY_WM_CS, (reg << 9) | (val & 0x1ff), 16);
+       reg <<= 1;
+       ice->akm[0].images[reg] = val >> 8;
+       ice->akm[0].images[reg + 1] = val;
+}
+
+
+/*********************************
+ ********* Controls section ******
+ *********************************/
+
+#define PRODIGY_CON_HPAMP \
+        {                                            \
+                .iface = SNDRV_CTL_ELEM_IFACE_MIXER,      \
+                .name =  "Headphone Amplifier", \
+                .info =  prodigy_hpamp_info,         \
+                .get =   prodigy_hpamp_get, \
+                .put =   prodigy_hpamp_put  \
+        }
+
+static int prodigy_hpamp_info(snd_kcontrol_t *k, snd_ctl_elem_info_t *uinfo)
+{
+       static char *texts[2] = {
+               "Off", "On"
+       };
+
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+       uinfo->count = 1;
+       uinfo->value.enumerated.items = 2;
+
+       if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items)
+               uinfo->value.enumerated.item = uinfo->value.enumerated.items - 1;
+       strcpy(uinfo->value.enumerated.name, texts[uinfo->value.enumerated.item]);
+
+        return 0;
+}
+
+
+static int prodigy_hpamp_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+{
+       ice1712_t *ice = snd_kcontrol_chip(kcontrol);
+
+       ucontrol->value.integer.value[0] = prodigy_get_headphone_amp(ice);
+       return 0;
+}
+
+
+static int prodigy_hpamp_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+{
+       ice1712_t *ice = snd_kcontrol_chip(kcontrol);
+
+       return prodigy_set_headphone_amp(ice,ucontrol->value.integer.value[0]);
+}
+
+
+
+#define PRODIGY_CON_DEEMP \
+        {                                            \
+                .iface = SNDRV_CTL_ELEM_IFACE_MIXER,      \
+                .name =  "DAC De-emphasis", \
+                .info =  prodigy_deemp_info,         \
+                .get =   prodigy_deemp_get, \
+                .put =   prodigy_deemp_put  \
+        }
+
+static int prodigy_deemp_info(snd_kcontrol_t *k, snd_ctl_elem_info_t *uinfo)
+{
+       static char *texts[2] = { "Off", "On" };
+
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+       uinfo->count = 1;
+       uinfo->value.enumerated.items = 2;
+
+       if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items)
+               uinfo->value.enumerated.item = uinfo->value.enumerated.items - 1;
+       strcpy(uinfo->value.enumerated.name, texts[uinfo->value.enumerated.item]);
+
+        return 0;
+}
+
+static int prodigy_deemp_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+{
+       ice1712_t *ice = snd_kcontrol_chip(kcontrol);
+       ucontrol->value.integer.value[0] = (wm_get(ice, 0x15) & 0xf) == 0xf;
+       return 0;
+}
+
+static int prodigy_deemp_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+{
+       ice1712_t *ice = snd_kcontrol_chip(kcontrol);
+       int temp, temp2;
+       temp2 = temp = wm_get(ice, 0x15);
+       temp = (temp & ~0xf) | ((ucontrol->value.integer.value[0])*0xf);
+       if (temp != temp2) {
+               wm_put(ice,0x15,temp);
+               return 1;
+       }
+       return 0;
+}
+
+
+#define PRODIGY_CON_OVERSAMPLING \
+        {                                            \
+                .iface = SNDRV_CTL_ELEM_IFACE_MIXER,      \
+                .name =  "ADC Oversampling", \
+                .info =  prodigy_oversampling_info,         \
+                .get =   prodigy_oversampling_get, \
+                .put =   prodigy_oversampling_put  \
+        }
+
+static int prodigy_oversampling_info(snd_kcontrol_t *k, snd_ctl_elem_info_t *uinfo)
+{
+       static char *texts[2] = { "128x", "64x" };
+
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+       uinfo->count = 1;
+       uinfo->value.enumerated.items = 2;
+
+       if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items)
+               uinfo->value.enumerated.item = uinfo->value.enumerated.items - 1;
+       strcpy(uinfo->value.enumerated.name, texts[uinfo->value.enumerated.item]);
+
+        return 0;
+}
+
+static int prodigy_oversampling_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+{
+       ice1712_t *ice = snd_kcontrol_chip(kcontrol);
+       ucontrol->value.integer.value[0] = (wm_get(ice, 0x17) & 0x8) == 0x8;
+       return 0;
+}
+
+static int prodigy_oversampling_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+{
+       int temp, temp2;
+       ice1712_t *ice = snd_kcontrol_chip(kcontrol);
+
+       temp2 = temp = wm_get(ice, 0x17);
+
+       if( ucontrol->value.integer.value[0] ) {
+               temp |= 0x8;
+       } else {
+               temp &= ~0x8;
+       }
+
+       if (temp != temp2) {
+               wm_put(ice,0x17,temp);
+               return 1;
+       }
+       return 0;
+}
+
+
+
+
+/*
+ * DAC volume attenuation mixer control
+ */
+static int wm_dac_vol_info(snd_kcontrol_t *kcontrol, snd_ctl_elem_info_t *uinfo)
+{
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+       uinfo->count = 1;
+       uinfo->value.integer.min = 0;           /* mute */
+       uinfo->value.integer.max = 101;         /* 0dB */
+       return 0;
+}
+
+static int wm_dac_vol_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+{
+       ice1712_t *ice = snd_kcontrol_chip(kcontrol);
+       int idx;
+       unsigned short vol;
+
+       down(&ice->gpio_mutex);
+       if (kcontrol->private_value)
+               idx = WM_DAC_MASTER_ATTEN;
+       else
+               idx  = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id) + WM_DAC_ATTEN;
+       vol = wm_get(ice, idx) & 0x7f;
+       if (vol <= 0x1a)
+               ucontrol->value.integer.value[0] = 0;
+       else
+               ucontrol->value.integer.value[0] = vol - 0x1a;
+       up(&ice->gpio_mutex);
+
+       return 0;
+}
+
+static int wm_dac_vol_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+{
+       ice1712_t *ice = snd_kcontrol_chip(kcontrol);
+       int idx;
+       unsigned short ovol, nvol;
+       int change;
+
+       snd_ice1712_save_gpio_status(ice);
+       if (kcontrol->private_value)
+               idx = WM_DAC_MASTER_ATTEN;
+       else
+               idx  = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id) + WM_DAC_ATTEN;
+       nvol = ucontrol->value.integer.value[0] + 0x1a;
+       ovol = wm_get(ice, idx) & 0x7f;
+       change = (ovol != nvol);
+       if (change) {
+               if (nvol <= 0x1a && ovol <= 0x1a)
+                       change = 0;
+               else
+                       wm_put(ice, idx, nvol | 0x180); /* update on zero detect */
+       }
+       snd_ice1712_restore_gpio_status(ice);
+       return change;
+}
+
+/*
+ * ADC gain mixer control
+ */
+static int wm_adc_vol_info(snd_kcontrol_t *kcontrol, snd_ctl_elem_info_t *uinfo)
+{
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+       uinfo->count = 1;
+       uinfo->value.integer.min = 0;           /* -12dB */
+       uinfo->value.integer.max = 0x1f;        /* 19dB */
+       return 0;
+}
+
+static int wm_adc_vol_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+{
+       ice1712_t *ice = snd_kcontrol_chip(kcontrol);
+       int idx;
+       unsigned short vol;
+
+       down(&ice->gpio_mutex);
+       idx  = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id) + WM_ADC_GAIN;
+       vol = wm_get(ice, idx) & 0x1f;
+       ucontrol->value.integer.value[0] = vol;
+       up(&ice->gpio_mutex);
+       return 0;
+}
+
+static int wm_adc_vol_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+{
+       ice1712_t *ice = snd_kcontrol_chip(kcontrol);
+       int idx;
+       unsigned short ovol, nvol;
+       int change;
+
+       snd_ice1712_save_gpio_status(ice);
+       idx  = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id) + WM_ADC_GAIN;
+       nvol = ucontrol->value.integer.value[0];
+       ovol = wm_get(ice, idx) & 0x1f;
+       change = (ovol != nvol);
+       if (change)
+               wm_put(ice, idx, nvol);
+       snd_ice1712_restore_gpio_status(ice);
+       return change;
+}
+
+/*
+ * ADC input mux mixer control
+ */
+static int wm_adc_mux_info(snd_kcontrol_t *kcontrol, snd_ctl_elem_info_t *uinfo)
+{
+       static char *texts[] = {
+               "CD Left",
+               "CD Right",
+               "Line Left",
+               "Line Right",
+               "Aux Left",
+               "Aux Right",
+               "Mic Left",
+               "Mic Right",
+       };
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+       uinfo->count = 2;
+       uinfo->value.enumerated.items = 8;
+       if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items)
+               uinfo->value.enumerated.item = uinfo->value.enumerated.items - 1;
+       strcpy(uinfo->value.enumerated.name, texts[uinfo->value.enumerated.item]);
+       return 0;
+}
+
+static int wm_adc_mux_get(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t *ucontrol)
+{
+       ice1712_t *ice = snd_kcontrol_chip(kcontrol);
+       unsigned short val;
+
+       down(&ice->gpio_mutex);
+       val = wm_get(ice, WM_ADC_MUX);
+       ucontrol->value.integer.value[0] = val & 7;
+       ucontrol->value.integer.value[1] = (val >> 4) & 7;
+       up(&ice->gpio_mutex);
+       return 0;
+}
+
+static int wm_adc_mux_put(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t *ucontrol)
+{
+       ice1712_t *ice = snd_kcontrol_chip(kcontrol);
+       unsigned short oval, nval;
+       int change;
+
+       snd_ice1712_save_gpio_status(ice);
+       oval = wm_get(ice, WM_ADC_MUX);
+       nval = oval & ~0x77;
+       nval |= ucontrol->value.integer.value[0] & 7;
+       nval |= (ucontrol->value.integer.value[1] & 7) << 4;
+       change = (oval != nval);
+       if (change)
+               wm_put(ice, WM_ADC_MUX, nval);
+       snd_ice1712_restore_gpio_status(ice);
+       return 0;
+}
+
+/*
+ * mixers
+ */
+
+static snd_kcontrol_new_t prodigy71_dac_control __devinitdata = {
+       .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+       .name = "DAC Volume",
+       .count = 8,
+       .info = wm_dac_vol_info,
+       .get = wm_dac_vol_get,
+       .put = wm_dac_vol_put,
+};
+
+static snd_kcontrol_new_t wm_controls[] __devinitdata = {
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Master Playback Volume",
+               .info = wm_dac_vol_info,
+               .get = wm_dac_vol_get,
+               .put = wm_dac_vol_put,
+               .private_value = 1,
+       },
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "ADC Volume",
+               .count = 2,
+               .info = wm_adc_vol_info,
+               .get = wm_adc_vol_get,
+               .put = wm_adc_vol_put,
+       },
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Capture Route",
+               .info = wm_adc_mux_info,
+               .get = wm_adc_mux_get,
+               .put = wm_adc_mux_put,
+       },
+       PRODIGY_CON_HPAMP ,
+       PRODIGY_CON_DEEMP ,
+       PRODIGY_CON_OVERSAMPLING
+};
+
+
+static int __devinit prodigy_add_controls(ice1712_t *ice)
+{
+       unsigned int i;
+       int err;
+
+       err = snd_ctl_add(ice->card, snd_ctl_new1(&prodigy71_dac_control, ice));
+       if (err < 0)
+               return err;
+
+       for (i = 0; i < ARRAY_SIZE(wm_controls); i++) {
+               err = snd_ctl_add(ice->card, snd_ctl_new1(&wm_controls[i], ice));
+               if (err < 0)
+                       return err;
+       }
+       return 0;
+}
+
+
+/*
+ * initialize the chip
+ */
+static int __devinit prodigy_init(ice1712_t *ice)
+{
+       static unsigned short wm_inits[] = {
+
+               /* These come first to reduce init pop noise */
+               0x1b, 0x000,            /* ADC Mux */
+               0x1c, 0x009,            /* Out Mux1 */
+               0x1d, 0x009,            /* Out Mux2 */
+
+               0x18, 0x000,            /* All power-up */
+
+               0x16, 0x022,            /* I2S, normal polarity, 24bit, high-pass on */
+               0x17, 0x006,            /* 128fs, slave mode */
+
+               0x00, 0,                /* DAC1 analog mute */
+               0x01, 0,                /* DAC2 analog mute */
+               0x02, 0,                /* DAC3 analog mute */
+               0x03, 0,                /* DAC4 analog mute */
+               0x04, 0,                /* DAC5 analog mute */
+               0x05, 0,                /* DAC6 analog mute */
+               0x06, 0,                /* DAC7 analog mute */
+               0x07, 0,                /* DAC8 analog mute */
+               0x08, 0x100,            /* master analog mute */
+
+               0x09, 0x7f,             /* DAC1 digital full */
+               0x0a, 0x7f,             /* DAC2 digital full */
+               0x0b, 0x7f,             /* DAC3 digital full */
+               0x0c, 0x7f,             /* DAC4 digital full */
+               0x0d, 0x7f,             /* DAC5 digital full */
+               0x0e, 0x7f,             /* DAC6 digital full */
+               0x0f, 0x7f,             /* DAC7 digital full */
+               0x10, 0x7f,             /* DAC8 digital full */
+               0x11, 0x1FF,            /* master digital full */
+
+               0x12, 0x000,            /* phase normal */
+               0x13, 0x090,            /* unmute DAC L/R */
+               0x14, 0x000,            /* all unmute */
+               0x15, 0x000,            /* no deemphasis, no ZFLG */
+
+               0x19, 0x000,            /* -12dB ADC/L */
+               0x1a, 0x000             /* -12dB ADC/R */
+
+       };
+
+       static unsigned short cs_inits[] = {
+               0x0441, /* RUN */
+               0x0100, /* no mute */
+               0x0200, /* */
+               0x0600, /* slave, 24bit */
+       };
+
+       unsigned int tmp;
+       unsigned int i;
+
+       printk(KERN_INFO "ice1724: AudioTrak Prodigy 7.1 driver rev. 0.82b\n");
+       printk(KERN_INFO "ice1724:   This driver is in beta stage. Forsuccess/failure reporting contact\n");
+       printk(KERN_INFO "ice1724:   Apostolos Dimitromanolakis <apostol@cs.utoronto.ca>\n");
+
+       ice->num_total_dacs = 8;
+       ice->num_total_adcs = 8;
+
+       /* to remeber the register values */
+       ice->akm = snd_kcalloc(sizeof(akm4xxx_t), GFP_KERNEL);
+       if (! ice->akm)
+               return -ENOMEM;
+       ice->akm_codecs = 1;
+
+       snd_ice1712_gpio_set_dir(ice, 0xbfffff); /* fix this for the time being */
+
+       /* reset the wm codec as the SPI mode */
+       snd_ice1712_save_gpio_status(ice);
+       snd_ice1712_gpio_set_mask(ice,~( PRODIGY_WM_RESET|PRODIGY_WM_CS|
+               PRODIGY_CS8415_CS|PRODIGY_HP_AMP_EN ));
+
+       tmp = snd_ice1712_gpio_read(ice);
+       tmp &= ~PRODIGY_WM_RESET;
+       snd_ice1712_gpio_write(ice, tmp);
+       udelay(1);
+       tmp |= PRODIGY_WM_CS | PRODIGY_CS8415_CS;
+       snd_ice1712_gpio_write(ice, tmp);
+       udelay(1);
+       tmp |= PRODIGY_WM_RESET;
+       snd_ice1712_gpio_write(ice, tmp);
+       udelay(1);
+
+       /* initialize WM8770 codec */
+       for (i = 0; i < ARRAY_SIZE(wm_inits); i += 2)
+               wm_put(ice, wm_inits[i], wm_inits[i+1]);
+
+       /* initialize CS8415A codec */
+       for (i = 0; i < ARRAY_SIZE(cs_inits); i++)
+               prodigy_spi_write(ice, PRODIGY_CS8415_CS,
+                                cs_inits[i] | 0x200000, 24);
+
+
+       prodigy_set_headphone_amp(ice, 1);
+
+       snd_ice1712_restore_gpio_status(ice);
+
+       return 0;
+}
+
+/*
+ * Prodigy boards don't provide the EEPROM data except for the vendor IDs.
+ * hence the driver needs to sets up it properly.
+ */
+
+static unsigned char prodigy71_eeprom[] __devinitdata = {
+       0x2b,   /* SYSCONF: clock 512, mpu401, spdif-in/ADC, 4DACs */
+       0x80,   /* ACLINK: I2S */
+       0xf8,   /* I2S: vol, 96k, 24bit, 192k */
+       0xc3,   /* SPDIF: out-en, out-int, spdif-in */
+       0xff,   /* GPIO_DIR */
+       0xff,   /* GPIO_DIR1 */
+       0xbf,   /* GPIO_DIR2 */
+       0x00,   /* GPIO_MASK */
+       0x00,   /* GPIO_MASK1 */
+       0x00,   /* GPIO_MASK2 */
+       0x00,   /* GPIO_STATE */
+       0x00,   /* GPIO_STATE1 */
+       0x00,   /* GPIO_STATE2 */
+};
+
+/* entry point */
+struct snd_ice1712_card_info snd_vt1724_prodigy_cards[] __devinitdata = {
+       {
+               .subvendor = VT1724_SUBDEVICE_PRODIGY71,
+               .name = "Audiotrak Prodigy 7.1",
+               .chip_init = prodigy_init,
+               .build_controls = prodigy_add_controls,
+               .eeprom_size = sizeof(prodigy71_eeprom),
+               .eeprom_data = prodigy71_eeprom,
+       },
+       { } /* terminator */
+};
diff --git a/sound/pci/ice1712/prodigy.h b/sound/pci/ice1712/prodigy.h
new file mode 100644 (file)
index 0000000..1ff29fe
--- /dev/null
@@ -0,0 +1,67 @@
+#ifndef __SOUND_PRODIGY_H
+#define __SOUND_PRODIGY_H
+
+/*
+ *   ALSA driver for VIA VT1724 (Envy24HT)
+ *
+ *   Lowlevel functions for Terratec PRODIGY cards
+ *
+ *     Copyright (c) 2003 Takashi Iwai <tiwai@suse.de>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */      
+
+#define  PRODIGY_DEVICE_DESC          "{AudioTrak,Prodigy 7.1},"
+
+#define VT1724_SUBDEVICE_PRODIGY71     0x33495345      /* PRODIGY 7.1 */
+
+extern struct snd_ice1712_card_info  snd_vt1724_prodigy_cards[];
+
+/* GPIO bits */
+#define PRODIGY_CS8415_CS      (1 << 23)
+#define PRODIGY_CS8415_CDTO    (1 << 22)
+#define PRODIGY_WM_RESET       (1 << 20)
+#define PRODIGY_WM_CLK         (1 << 19)
+#define PRODIGY_WM_DATA                (1 << 18)
+#define PRODIGY_WM_RW          (1 << 17)
+#define PRODIGY_AC97_RESET     (1 << 16)
+#define PRODIGY_DIGITAL_SEL1   (1 << 15)
+// #define PRODIGY_HP_SEL              (1 << 14)
+#define PRODIGY_WM_CS          (1 << 12)
+
+#define PRODIGY_HP_AMP_EN      (1 << 14)
+
+
+/* WM8770 registers */
+#define WM_DAC_ATTEN           0x00    /* DAC1-8 analog attenuation */
+#define WM_DAC_MASTER_ATTEN    0x08    /* DAC master analog attenuation */
+#define WM_DAC_DIG_ATTEN       0x09    /* DAC1-8 digital attenuation */
+#define WM_DAC_DIG_MATER_ATTEN 0x11    /* DAC master digital attenuation */
+#define WM_PHASE_SWAP          0x12    /* DAC phase */
+#define WM_DAC_CTRL1           0x13    /* DAC control bits */
+#define WM_MUTE                        0x14    /* mute controls */
+#define WM_DAC_CTRL2           0x15    /* de-emphasis and zefo-flag */
+#define WM_INT_CTRL            0x16    /* interface control */
+#define WM_MASTER              0x17    /* master clock and mode */
+#define WM_POWERDOWN           0x18    /* power-down controls */
+#define WM_ADC_GAIN            0x19    /* ADC gain L(19)/R(1a) */
+#define WM_ADC_MUX             0x1b    /* input MUX */
+#define WM_OUT_MUX1            0x1c    /* output MUX */
+#define WM_OUT_MUX2            0x1e    /* output MUX */
+#define WM_RESET               0x1f    /* software reset */
+
+
+#endif /* __SOUND_PRODIGY_H */