From b76fcd5f0c655b6e3e9bf534594357025421c66a Mon Sep 17 00:00:00 2001 From: Planet-Lab Support Date: Wed, 2 Jun 2004 20:45:38 +0000 Subject: [PATCH] This commit was manufactured by cvs2svn to create branch 'vserver'. --- Documentation/arm/SA1100/PCMCIA | 374 +++ Documentation/arm/XScale/ADIFCC/80200EVB | 110 + Documentation/arm/XScale/IOP3XX/IQ80310 | 247 ++ Documentation/arm/XScale/IOP3XX/IQ80321 | 215 ++ Documentation/arm/XScale/IOP3XX/aau.txt | 178 + Documentation/arm/XScale/IOP3XX/dma.txt | 214 ++ Documentation/arm/XScale/IOP3XX/message.txt | 110 + Documentation/arm/XScale/IOP3XX/pmon.txt | 71 + Documentation/arm/XScale/cache-lock.txt | 123 + Documentation/arm/XScale/pmu.txt | 168 + Documentation/arm/XScale/tlb-lock.txt | 64 + arch/arm/mach-omap/innovator1510.c | 99 + arch/arm/mach-omap/innovator1610.c | 91 + arch/arm/mach-omap/irq.h | 172 + arch/arm/mach-omap/omap-generic.c | 77 + arch/arm/mach-omap/omap-perseus2.c | 116 + arch/i386/mach-es7000/es7000.c | 279 ++ arch/i386/mach-es7000/setup.c | 106 + arch/i386/mach-es7000/topology.c | 64 + arch/ia64/kernel/perfmon_hpsim.h | 75 + arch/ppc/mm/cachemap.c | 174 + arch/ppc/ocp/Makefile | 6 + arch/ppc/ocp/ocp-driver.c | 195 ++ arch/ppc/ocp/ocp-probe.c | 113 + arch/ppc/ocp/ocp.c | 109 + drivers/char/dz.c | 1540 +++++++++ drivers/char/dz.h | 230 ++ drivers/char/sh-sci.c | 1646 ++++++++++ drivers/char/sh-sci.h | 478 +++ drivers/i2c/busses/i2c-ixp42x.c | 176 + drivers/ide/pci/cmd640.h | 32 + drivers/ide/ppc/swarm.c | 101 + drivers/net/auto_irq.c | 68 + drivers/net/rcif.h | 292 ++ drivers/net/rclanmtl.c | 2029 ++++++++++++ drivers/net/rclanmtl.h | 701 ++++ drivers/net/rcpci45.c | 1049 ++++++ drivers/net/wan/comx-hw-comx.c | 1450 +++++++++ drivers/net/wan/comx-hw-locomx.c | 496 +++ drivers/net/wan/comx-hw-mixcom.c | 960 ++++++ drivers/net/wan/comx-hw-munich.c | 2854 +++++++++++++++++ drivers/net/wan/comx-proto-fr.c | 1014 ++++++ drivers/net/wan/comx-proto-lapb.c | 551 ++++ drivers/net/wan/comx-proto-ppp.c | 269 ++ drivers/net/wan/comx.c | 1128 +++++++ drivers/net/wan/comx.h | 232 ++ drivers/net/wan/comxhw.h | 113 + drivers/net/wan/falc-lh.h | 102 + drivers/net/wan/hscx.h | 103 + drivers/net/wan/mixcom.h | 35 + drivers/net/wan/munich32x.h | 191 ++ drivers/pcmcia/sa1100.h | 164 + drivers/pcmcia/sa11xx_core.c | 971 ++++++ drivers/pcmcia/sa11xx_core.h | 121 + drivers/scsi/pcmcia/qlogic_core.c | 2 + drivers/scsi/qlogicfas.h | 124 + drivers/usb/core/driverfs.c | 229 ++ fs/intermezzo/Makefile | 11 + fs/intermezzo/cache.c | 207 ++ fs/intermezzo/dcache.c | 342 ++ fs/intermezzo/dir.c | 1333 ++++++++ fs/intermezzo/ext_attr.c | 197 ++ fs/intermezzo/file.c | 534 +++ fs/intermezzo/fileset.c | 674 ++++ fs/intermezzo/inode.c | 179 ++ fs/intermezzo/intermezzo_fs.h | 923 ++++++ fs/intermezzo/intermezzo_idl.h | 304 ++ fs/intermezzo/intermezzo_journal.h | 24 + fs/intermezzo/intermezzo_kml.h | 260 ++ fs/intermezzo/intermezzo_lib.h | 162 + fs/intermezzo/intermezzo_psdev.h | 55 + fs/intermezzo/intermezzo_upcall.h | 146 + fs/intermezzo/journal.c | 2452 ++++++++++++++ fs/intermezzo/journal_ext2.c | 90 + fs/intermezzo/journal_ext3.c | 283 ++ fs/intermezzo/journal_obdfs.c | 193 ++ fs/intermezzo/journal_reiserfs.c | 140 + fs/intermezzo/journal_tmpfs.c | 107 + fs/intermezzo/journal_xfs.c | 161 + fs/intermezzo/kml.c | 194 ++ fs/intermezzo/kml_decode.c | 1016 ++++++ fs/intermezzo/kml_reint.c | 647 ++++ fs/intermezzo/kml_setup.c | 58 + fs/intermezzo/kml_unpack.c | 712 ++++ fs/intermezzo/kml_utils.c | 43 + fs/intermezzo/methods.c | 493 +++ fs/intermezzo/presto.c | 736 +++++ fs/intermezzo/psdev.c | 647 ++++ fs/intermezzo/replicator.c | 290 ++ fs/intermezzo/super.c | 407 +++ fs/intermezzo/sysctl.c | 368 +++ fs/intermezzo/upcall.c | 559 ++++ fs/intermezzo/vfs.c | 2416 ++++++++++++++ fs/xfs/linux/kmem.h | 197 ++ fs/xfs/linux/mrlock.h | 106 + fs/xfs/linux/mutex.h | 53 + fs/xfs/linux/sema.h | 67 + fs/xfs/linux/spin.h | 74 + fs/xfs/linux/sv.h | 89 + fs/xfs/linux/time.h | 51 + fs/xfs/linux/xfs_aops.c | 1276 ++++++++ fs/xfs/linux/xfs_buf.c | 1811 +++++++++++ fs/xfs/linux/xfs_buf.h | 594 ++++ fs/xfs/linux/xfs_cred.h | 50 + fs/xfs/linux/xfs_file.c | 546 ++++ fs/xfs/linux/xfs_fs_subr.c | 124 + fs/xfs/linux/xfs_fs_subr.h | 49 + fs/xfs/linux/xfs_globals.c | 72 + fs/xfs/linux/xfs_globals.h | 44 + fs/xfs/linux/xfs_ioctl.c | 1236 +++++++ fs/xfs/linux/xfs_iops.c | 708 ++++ fs/xfs/linux/xfs_iops.h | 51 + fs/xfs/linux/xfs_linux.h | 367 +++ fs/xfs/linux/xfs_lrw.c | 1028 ++++++ fs/xfs/linux/xfs_lrw.h | 116 + fs/xfs/linux/xfs_stats.c | 132 + fs/xfs/linux/xfs_stats.h | 164 + fs/xfs/linux/xfs_super.c | 850 +++++ fs/xfs/linux/xfs_super.h | 129 + fs/xfs/linux/xfs_sysctl.c | 163 + fs/xfs/linux/xfs_sysctl.h | 107 + fs/xfs/linux/xfs_version.h | 44 + fs/xfs/linux/xfs_vfs.c | 327 ++ fs/xfs/linux/xfs_vfs.h | 206 ++ fs/xfs/linux/xfs_vnode.c | 442 +++ fs/xfs/linux/xfs_vnode.h | 651 ++++ include/asm-alpha/rmap.h | 7 + include/asm-arm/arch-cl7500/ide.h | 50 + include/asm-arm/arch-cl7500/keyboard.h | 16 + include/asm-arm/arch-clps711x/keyboard.h | 26 + include/asm-arm/arch-ebsa110/ide.h | 1 + include/asm-arm/arch-ebsa285/ide.h | 49 + include/asm-arm/arch-iop3xx/ide.h | 49 + include/asm-arm/arch-l7200/ide.h | 27 + include/asm-arm/arch-l7200/keyboard.h | 51 + include/asm-arm/arch-nexuspci/ide.h | 37 + include/asm-arm/arch-pxa/ide.h | 54 + include/asm-arm/arch-pxa/keyboard.h | 28 + include/asm-arm/arch-rpc/ide.h | 48 + include/asm-arm/arch-s3c2410/ide.h | 49 + include/asm-arm/arch-sa1100/keyboard.h | 23 + include/asm-arm/arch-shark/ide.h | 47 + include/asm-arm/arch-shark/keyboard.h | 68 + include/asm-arm/arch-tbox/ide.h | 3 + include/asm-arm/rmap.h | 6 + include/asm-arm26/rmap.h | 66 + include/asm-cris/rmap.h | 7 + include/asm-generic/rmap.h | 91 + include/asm-h8300/aki3068net/machine-depend.h | 29 + include/asm-h8300/edosk2674/machine-depend.h | 70 + include/asm-h8300/generic/machine-depend.h | 17 + include/asm-h8300/generic/timer_rate.h | 15 + include/asm-h8300/h8300_smsc.h | 20 + include/asm-h8300/h8max/machine-depend.h | 100 + include/asm-i386/rmap.h | 21 + include/asm-ia64/rmap.h | 7 + include/asm-m68k/rmap.h | 7 + include/asm-m68knommu/rmap.h | 2 + include/asm-mips/rmap.h | 7 + include/asm-parisc/rmap.h | 7 + include/asm-ppc/rmap.h | 9 + include/asm-ppc64/rmap.h | 9 + include/asm-s390/rmap.h | 7 + include/asm-sh/rmap.h | 7 + include/asm-sparc/rmap.h | 7 + include/asm-sparc64/rmap.h | 7 + include/asm-um/rmap.h | 6 + include/asm-v850/rmap.h | 1 + include/asm-x86_64/rmap.h | 7 + include/linux/ninline.h | 151 + include/linux/vinline.h | 462 +++ include/linux/vserver.h | 9 + include/linux/vserver/context.h | 176 + include/linux/vserver/cvirt.h | 133 + include/linux/vserver/inode.h | 67 + include/linux/vserver/legacy.h | 54 + include/linux/vserver/limit.h | 117 + include/linux/vserver/namespace.h | 55 + include/linux/vserver/network.h | 142 + include/linux/vserver/sched.h | 139 + include/linux/vserver/signal.h | 19 + include/linux/vserver/switch.h | 95 + include/linux/vserver/xid.h | 94 + kernel/vserver/Kconfig | 72 + kernel/vserver/Makefile | 12 + kernel/vserver/context.c | 558 ++++ kernel/vserver/cvirt.c | 41 + kernel/vserver/init.c | 42 + kernel/vserver/inode.c | 220 ++ kernel/vserver/legacy.c | 161 + kernel/vserver/limit.c | 149 + kernel/vserver/namespace.c | 195 ++ kernel/vserver/network.c | 513 +++ kernel/vserver/proc.c | 905 ++++++ kernel/vserver/sched.c | 162 + kernel/vserver/signal.c | 85 + kernel/vserver/switch.c | 170 + kernel/vserver/sysctl.c | 150 + net/bluetooth/syms.c | 84 + sound/pci/ice1712/prodigy.c | 663 ++++ sound/pci/ice1712/prodigy.h | 67 + 201 files changed, 59415 insertions(+) create mode 100644 Documentation/arm/SA1100/PCMCIA create mode 100644 Documentation/arm/XScale/ADIFCC/80200EVB create mode 100644 Documentation/arm/XScale/IOP3XX/IQ80310 create mode 100644 Documentation/arm/XScale/IOP3XX/IQ80321 create mode 100644 Documentation/arm/XScale/IOP3XX/aau.txt create mode 100644 Documentation/arm/XScale/IOP3XX/dma.txt create mode 100644 Documentation/arm/XScale/IOP3XX/message.txt create mode 100644 Documentation/arm/XScale/IOP3XX/pmon.txt create mode 100644 Documentation/arm/XScale/cache-lock.txt create mode 100644 Documentation/arm/XScale/pmu.txt create mode 100644 Documentation/arm/XScale/tlb-lock.txt create mode 100644 arch/arm/mach-omap/innovator1510.c create mode 100644 arch/arm/mach-omap/innovator1610.c create mode 100644 arch/arm/mach-omap/irq.h create mode 100644 arch/arm/mach-omap/omap-generic.c create mode 100644 arch/arm/mach-omap/omap-perseus2.c create mode 100644 arch/i386/mach-es7000/es7000.c create mode 100644 arch/i386/mach-es7000/setup.c create mode 100644 arch/i386/mach-es7000/topology.c create mode 100644 arch/ia64/kernel/perfmon_hpsim.h create mode 100644 arch/ppc/mm/cachemap.c create mode 100644 arch/ppc/ocp/Makefile create mode 100644 arch/ppc/ocp/ocp-driver.c create mode 100644 arch/ppc/ocp/ocp-probe.c create mode 100644 arch/ppc/ocp/ocp.c create mode 100644 drivers/char/dz.c create mode 100644 drivers/char/dz.h create mode 100644 drivers/char/sh-sci.c create mode 100644 drivers/char/sh-sci.h create mode 100644 drivers/i2c/busses/i2c-ixp42x.c create mode 100644 drivers/ide/pci/cmd640.h create mode 100644 drivers/ide/ppc/swarm.c create mode 100644 drivers/net/auto_irq.c create mode 100644 drivers/net/rcif.h create mode 100644 drivers/net/rclanmtl.c create mode 100644 drivers/net/rclanmtl.h create mode 100644 drivers/net/rcpci45.c create mode 100644 drivers/net/wan/comx-hw-comx.c create mode 100644 drivers/net/wan/comx-hw-locomx.c create mode 100644 drivers/net/wan/comx-hw-mixcom.c create mode 100644 drivers/net/wan/comx-hw-munich.c create mode 100644 drivers/net/wan/comx-proto-fr.c create mode 100644 drivers/net/wan/comx-proto-lapb.c create mode 100644 drivers/net/wan/comx-proto-ppp.c create mode 100644 drivers/net/wan/comx.c create mode 100644 drivers/net/wan/comx.h create mode 100644 drivers/net/wan/comxhw.h create mode 100644 drivers/net/wan/falc-lh.h create mode 100644 drivers/net/wan/hscx.h create mode 100644 drivers/net/wan/mixcom.h create mode 100644 drivers/net/wan/munich32x.h create mode 100644 drivers/pcmcia/sa1100.h create mode 100644 drivers/pcmcia/sa11xx_core.c create mode 100644 drivers/pcmcia/sa11xx_core.h create mode 100644 drivers/scsi/pcmcia/qlogic_core.c create mode 100644 drivers/scsi/qlogicfas.h create mode 100644 drivers/usb/core/driverfs.c create mode 100644 fs/intermezzo/Makefile create mode 100644 fs/intermezzo/cache.c create mode 100644 fs/intermezzo/dcache.c create mode 100644 fs/intermezzo/dir.c create mode 100644 fs/intermezzo/ext_attr.c create mode 100644 fs/intermezzo/file.c create mode 100644 fs/intermezzo/fileset.c create mode 100644 fs/intermezzo/inode.c create mode 100644 fs/intermezzo/intermezzo_fs.h create mode 100644 fs/intermezzo/intermezzo_idl.h create mode 100644 fs/intermezzo/intermezzo_journal.h create mode 100644 fs/intermezzo/intermezzo_kml.h create mode 100644 fs/intermezzo/intermezzo_lib.h create mode 100644 fs/intermezzo/intermezzo_psdev.h create mode 100644 fs/intermezzo/intermezzo_upcall.h create mode 100644 fs/intermezzo/journal.c create mode 100644 fs/intermezzo/journal_ext2.c create mode 100644 fs/intermezzo/journal_ext3.c create mode 100644 fs/intermezzo/journal_obdfs.c create mode 100644 fs/intermezzo/journal_reiserfs.c create mode 100644 fs/intermezzo/journal_tmpfs.c create mode 100644 fs/intermezzo/journal_xfs.c create mode 100644 fs/intermezzo/kml.c create mode 100644 fs/intermezzo/kml_decode.c create mode 100644 fs/intermezzo/kml_reint.c create mode 100644 fs/intermezzo/kml_setup.c create mode 100644 fs/intermezzo/kml_unpack.c create mode 100644 fs/intermezzo/kml_utils.c create mode 100644 fs/intermezzo/methods.c create mode 100644 fs/intermezzo/presto.c create mode 100644 fs/intermezzo/psdev.c create mode 100644 fs/intermezzo/replicator.c create mode 100644 fs/intermezzo/super.c create mode 100644 fs/intermezzo/sysctl.c create mode 100644 fs/intermezzo/upcall.c create mode 100644 fs/intermezzo/vfs.c create mode 100644 fs/xfs/linux/kmem.h create mode 100644 fs/xfs/linux/mrlock.h create mode 100644 fs/xfs/linux/mutex.h create mode 100644 fs/xfs/linux/sema.h create mode 100644 fs/xfs/linux/spin.h create mode 100644 fs/xfs/linux/sv.h create mode 100644 fs/xfs/linux/time.h create mode 100644 fs/xfs/linux/xfs_aops.c create mode 100644 fs/xfs/linux/xfs_buf.c create mode 100644 fs/xfs/linux/xfs_buf.h create mode 100644 fs/xfs/linux/xfs_cred.h create mode 100644 fs/xfs/linux/xfs_file.c create mode 100644 fs/xfs/linux/xfs_fs_subr.c create mode 100644 fs/xfs/linux/xfs_fs_subr.h create mode 100644 fs/xfs/linux/xfs_globals.c create mode 100644 fs/xfs/linux/xfs_globals.h create mode 100644 fs/xfs/linux/xfs_ioctl.c create mode 100644 fs/xfs/linux/xfs_iops.c create mode 100644 fs/xfs/linux/xfs_iops.h create mode 100644 fs/xfs/linux/xfs_linux.h create mode 100644 fs/xfs/linux/xfs_lrw.c create mode 100644 fs/xfs/linux/xfs_lrw.h create mode 100644 fs/xfs/linux/xfs_stats.c create mode 100644 fs/xfs/linux/xfs_stats.h create mode 100644 fs/xfs/linux/xfs_super.c create mode 100644 fs/xfs/linux/xfs_super.h create mode 100644 fs/xfs/linux/xfs_sysctl.c create mode 100644 fs/xfs/linux/xfs_sysctl.h create mode 100644 fs/xfs/linux/xfs_version.h create mode 100644 fs/xfs/linux/xfs_vfs.c create mode 100644 fs/xfs/linux/xfs_vfs.h create mode 100644 fs/xfs/linux/xfs_vnode.c create mode 100644 fs/xfs/linux/xfs_vnode.h create mode 100644 include/asm-alpha/rmap.h create mode 100644 include/asm-arm/arch-cl7500/ide.h create mode 100644 include/asm-arm/arch-cl7500/keyboard.h create mode 100644 include/asm-arm/arch-clps711x/keyboard.h create mode 100644 include/asm-arm/arch-ebsa110/ide.h create mode 100644 include/asm-arm/arch-ebsa285/ide.h create mode 100644 include/asm-arm/arch-iop3xx/ide.h create mode 100644 include/asm-arm/arch-l7200/ide.h create mode 100644 include/asm-arm/arch-l7200/keyboard.h create mode 100644 include/asm-arm/arch-nexuspci/ide.h create mode 100644 include/asm-arm/arch-pxa/ide.h create mode 100644 include/asm-arm/arch-pxa/keyboard.h create mode 100644 include/asm-arm/arch-rpc/ide.h create mode 100644 include/asm-arm/arch-s3c2410/ide.h create mode 100644 include/asm-arm/arch-sa1100/keyboard.h create mode 100644 include/asm-arm/arch-shark/ide.h create mode 100644 include/asm-arm/arch-shark/keyboard.h create mode 100644 include/asm-arm/arch-tbox/ide.h create mode 100644 include/asm-arm/rmap.h create mode 100644 include/asm-arm26/rmap.h create mode 100644 include/asm-cris/rmap.h create mode 100644 include/asm-generic/rmap.h create mode 100644 include/asm-h8300/aki3068net/machine-depend.h create mode 100644 include/asm-h8300/edosk2674/machine-depend.h create mode 100644 include/asm-h8300/generic/machine-depend.h create mode 100644 include/asm-h8300/generic/timer_rate.h create mode 100644 include/asm-h8300/h8300_smsc.h create mode 100644 include/asm-h8300/h8max/machine-depend.h create mode 100644 include/asm-i386/rmap.h create mode 100644 include/asm-ia64/rmap.h create mode 100644 include/asm-m68k/rmap.h create mode 100644 include/asm-m68knommu/rmap.h create mode 100644 include/asm-mips/rmap.h create mode 100644 include/asm-parisc/rmap.h create mode 100644 include/asm-ppc/rmap.h create mode 100644 include/asm-ppc64/rmap.h create mode 100644 include/asm-s390/rmap.h create mode 100644 include/asm-sh/rmap.h create mode 100644 include/asm-sparc/rmap.h create mode 100644 include/asm-sparc64/rmap.h create mode 100644 include/asm-um/rmap.h create mode 100644 include/asm-v850/rmap.h create mode 100644 include/asm-x86_64/rmap.h create mode 100644 include/linux/ninline.h create mode 100644 include/linux/vinline.h create mode 100644 include/linux/vserver.h create mode 100644 include/linux/vserver/context.h create mode 100644 include/linux/vserver/cvirt.h create mode 100644 include/linux/vserver/inode.h create mode 100644 include/linux/vserver/legacy.h create mode 100644 include/linux/vserver/limit.h create mode 100644 include/linux/vserver/namespace.h create mode 100644 include/linux/vserver/network.h create mode 100644 include/linux/vserver/sched.h create mode 100644 include/linux/vserver/signal.h create mode 100644 include/linux/vserver/switch.h create mode 100644 include/linux/vserver/xid.h create mode 100644 kernel/vserver/Kconfig create mode 100644 kernel/vserver/Makefile create mode 100644 kernel/vserver/context.c create mode 100644 kernel/vserver/cvirt.c create mode 100644 kernel/vserver/init.c create mode 100644 kernel/vserver/inode.c create mode 100644 kernel/vserver/legacy.c create mode 100644 kernel/vserver/limit.c create mode 100644 kernel/vserver/namespace.c create mode 100644 kernel/vserver/network.c create mode 100644 kernel/vserver/proc.c create mode 100644 kernel/vserver/sched.c create mode 100644 kernel/vserver/signal.c create mode 100644 kernel/vserver/switch.c create mode 100644 kernel/vserver/sysctl.c create mode 100644 net/bluetooth/syms.c create mode 100644 sound/pci/ice1712/prodigy.c create mode 100644 sound/pci/ice1712/prodigy.h diff --git a/Documentation/arm/SA1100/PCMCIA b/Documentation/arm/SA1100/PCMCIA new file mode 100644 index 000000000..5eb5d3ab3 --- /dev/null +++ b/Documentation/arm/SA1100/PCMCIA @@ -0,0 +1,374 @@ +Kernel Low-Level PCMCIA Interface Documentation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +John G Dorsey +Updated: 30 June, 2000 + + +Note: this interface has not been finalized! +See also: http://www.cs.cmu.edu/~wearable/software/pcmcia-arm.html + + +Introduction + +Early versions of PCMCIA Card Services for StrongARM were designed to +permit a single socket driver to run on a variety of SA-1100 boards by +using a userland configuration process. During the conversion to the 2.3 +kernel series, all of the configuration has moved into sub-drivers in the +kernel proper (see linux/drivers/pcmcia/sa1100*). This document describes +the low-level interface between those sub-drivers and the sa1100 socket +driver module. + +Presently, there are six operations which must be provided by the +board-specific code. Only functions whose implementation is likely to +differ across board designs are required at this level. Some examples +include: + + - configuring card detect lines to generate interrupts + - sensing the legal voltage levels for inserted cards + - asserting the reset signal for a card + +Functions which are assumed to be the same across all designs are +performed within the generic socket driver itself. Some examples of these +kinds of operations include: + + - configuring memory access times based on the core clock frequency + - reads/writes on memory, byte swizzling, ... + +The current implementation allows the specific per-board set of low-level +operations to be determined at run time. For each specific board, the +following structure should be filled in: + + struct pcmcia_low_level { + int (*init)(struct pcmcia_init *); + int (*shutdown)(void); + int (*socket_state)(struct pcmcia_state_array *); + int (*get_irq_info)(struct pcmcia_irq_info *); + int (*configure_socket)(const struct pcmcia_configure *); + }; + +The component functions are described in detail below. Using the +machine_is_*() tests, the pointer `pcmcia_low_level' should be assigned to +the location of the table for your board. + + +0. init(struct pcmcia_init *init) + +This operation has three responsibilities: + + - perform any board-specific initialization tasks + - associate the given handler with any interrupt-generating signals + such as card detection, or battery voltage detection + - set up any necessary edge detection for card ready signals + +Argument passing for this operation is implemented by the following +structure: + + struct pcmcia_init { + void (*handler)(int irq, void *dev, struct pt_regs *regs); + struct pcmcia_maps *maps; + }; + +Here, `handler' is provided by the socket driver, and `maps' must be +modified if the default mapping isn't appropriate. This operation should +return one of two values: + + - the highest-numbered socket available, plus one + - a negative number, indicating an error in configuration + +Note that the former case is _not_ the same as "the number of sockets +available." In particular, if your design uses SA-1100 slot "one" but +not slot "zero," you MUST report "2" to the socket driver. + + +1. shutdown(void) + +This operation takes no arguments, and will be called during cleanup for +the socket driver module. Any state associated with the socket controller, +including allocated data structures, reserved IRQs, etc. should be +released in this routine. + +The return value for this operation is not examined. + + +2. socket_state(struct pcmcia_state_array *state_array) + +This operation will be invoked from the interrupt handler which was set up +in the earlier call to init(). Note, however, that it should not include +any side effects which would be inappropriate if the operation were to +occur when no interrupt is pending. (An extra invocation of this operation +currently takes place to initialize state in the socket driver.) + +Argument passing for this operation is handled by a structure which +contains an array of the following type: + + struct pcmcia_state { + unsigned detect: 1, + ready: 1, + bvd1: 1, + bvd2: 1, + wrprot: 1, + vs_3v: 1, + vs_Xv: 1; + }; + +Upon return from the operation, a struct pcmcia_state should be filled in +for each socket available in the hardware. For every array element (up to +`size' in the struct pcmcia_state_saarray) which does not correspond to an +available socket, zero the element bits. (This includes element [0] if +socket zero is not used.) + +Regardless of how the various signals are routed to the SA-1100, the bits +in struct pcmcia_state always have the following semantics: + + detect - 1 if a card is fully inserted, 0 otherwise + ready - 1 if the card ready signal is asserted, 0 otherwise + bvd1 - the value of the Battery Voltage Detect 1 signal + bvd2 - the value of the Battery Voltage Detect 2 signal + wrprot - 1 if the card is write-protected, 0 otherwise + vs_3v - 1 if the card must be operated at 3.3V, 0 otherwise + vs_Xv - 1 if the card must be operated at X.XV, 0 otherwise + +A note about the BVD signals: if your board does not make both lines +directly observable to the processor, just return reasonable values. The +standard interpretation of the BVD signals is: + + BVD1 BVD2 + + 0 x battery is dead + 1 0 battery warning + 1 1 battery ok + +Regarding the voltage sense flags (vs_3v, vs_Xv), these bits should be set +based on a sampling of the Voltage Sense pins, if available. The standard +interpretation of the VS signals (for a "low-voltage" socket) is: + + VS1 VS2 + + 0 0 X.XV, else 3.3V, else none + 0 1 3.3V, else none + 1 0 X.XV, else none + 1 1 5V, else none + +More information about the BVD and VS conventions is available in chapter +5 of "PCMCIA System Architecture," 2nd ed., by Don Anderson. + +This operation should return 1 if an IRQ is actually pending for the +socket controller, 0 if no IRQ is pending (but no error condition exists, +such as an undersized state array), or -1 on any error. + + +3. get_irq_info(struct pcmcia_irq_info *info) + +This operation obtains the IRQ assignment which is legal for the given +socket. An argument of the following type is passed: + + struct pcmcia_irq_info { + unsigned int sock; + unsigned int irq ; + }; + +The `sock' field contains the socket index being queried. The `irq' field +should contain the IRQ number corresponding to the card ready signal from +the device. + +This operation should return 0 on success, or -1 on any error. + + +4. configure_socket(const struct pcmcia_configure *configure) + +This operation allows the caller to apply power to the socket, issue a +reset, or enable various outputs. The argument is of the following type: + + struct pcmcia_configure { + unsigned sock: 8, + vcc: 8, + vpp: 8, + output: 1, + speaker: 1, + reset: 1; + }; + +The `sock' field contains the index of the socket to be configured. The +`vcc' and `vpp' fields contain the voltages to be applied for Vcc and Vpp, +respectively, in units of 0.1V. (Note that vpp==120 indicates that +programming voltage should be applied.) + +The two output enables, `output' and `speaker', refer to the card data +signal enable and the card speaker enable, respectively. The `reset' bit, +when set, indicates that the card reset should be asserted. + +This operation should return 0 on success, or -1 on any error. + + +Board-Specific Notes + +The following information is known about various SA-11x0 board designs +which may be used as reference while adding support to the kernel. + + +Carnegie Mellon Itsy/Cue (http://www.cs.cmu.edu/~wearable/itsy/) + + Itsy Chip Select 3 (CS3) Interface + ("ITSY MEMORY/PCMCIA ADD-ON BOARD with BATTERY and CHARGER CIRCUITRY," + memo dated 5-20-99, from Tim Manns to Richard Martin, et. al) + + Read: + ABVD2 (SS)D0 A slot, Battery Voltage Detect + ABVD1 (SS)D1 + AVSS2 (SS)D2 A slot, Voltage Sense + AVSS1 (SS)D3 + GND (SS)D4 + GND (SS)D5 + GND (SS)D6 + GND (SS)D7 + + BBVD2 (SS)D8 B slot, Battery Voltage Detect + BBVD1 (SS)D9 + BVSS2 (SS)D10 B slot, Voltage Sense + BVSS1 (SS)D11 + GND (SS)D12 + GND (SS)D13 + GND (SS)D14 + GND (SS)D15 + + Write: + (SS)D0 A_VPP_VCC LTC1472 VPPEN1 + (SS)D1 A_VPP_PGM LTC1472 VPPEN0 + (SS)D2 A_VCC_3 LTC1472 VCCEN0 + (SS)D3 A_VCC_5 LTC1472 VCCEN1 + (SS)D4 RESET (A SLOT) + (SS)D5 GND + (SS)D6 GND + (SS)D7 GND + + (SS)D8 B_VPP_VCC LTC1472 VPPEN1 + (SS)D9 B_VPP_PGM LTC1472 VPPEN0 + (SS)D10 B_VCC_3 LTC1472 VCCEN0 + (SS)D11 B_VCC_5 LTC1472 VCCEN1 + (SS)D12 RESET (B SLOT) + (SS)D13 GND + (SS)D14 GND + (SS)D15 GND + + GPIO pin assignments are as follows: (from schematics) + + GPIO 10 Slot 0 Card Detect + GPIO 11 Slot 1 Card Detect + GPIO 12 Slot 0 Ready/Interrupt + GPIO 13 Slot 1 Ready/Interrupt + + + +Intel SA-1100 Multimedia Board (http://developer.intel.com/design/strong/) + + CPLD Registers + SA-1100 Multimedia Development Board with Companion SA-1101 Development + Board User's Guide, p.4-42 + + This SA-1100/1101 development package uses only one GPIO pin (24) to + signal changes in card status, and requires software to inspect a + PCMCIA status register to determine the source. + + Read: (PCMCIA Power Sense Register - 0x19400000) + S0VS1 0 Slot 0 voltage sense + S0VS2 1 + S0BVD1 2 Slot 0 battery voltage sense + S0BVD2 3 + S1VS1 4 Slot 1 voltage sense + S1VS2 5 + S1BVD1 6 Slot 1 battery voltage sense + S1BVD2 7 + + Read/Write: (PCMCIA Power Control Register - 0x19400002) + S0VPP0 0 Slot 0 Vpp + S0VPP1 1 + S0VCC0 2 Slot 0 Vcc + S0VCC1 3 + S1VPP0 4 Slot 1 Vpp + S1VPP1 5 + S1VCC0 6 Slot 1 Vcc + S1VCC1 7 + + Read: (PCMCIA Status Register - 0x19400004) + S0CD1 0 Slot 0 Card Detect 1 + S0RDY 1 Slot 0 Ready/Interrupt + S0STSCHG 2 Slot 0 Status Change + S0Reset 3 Slot 0 Reset (RW) + S1CD1 4 Slot 1 Card Detect 1 + S1RDY 5 Slot 1 Ready/Interrupt + S1STSCHG 6 Slot 1 Status Change + S1Reset 7 Slot 1 Reset (RW) + + + +Intel SA-1100 Evaluation Platform (http://developer.intel.com/design/strong/) + + Brutus I/O Pins and Chipselect Register + pcmcia-brutus.c, by Ivo Clarysse + (What's the official reference for this info?) + + This SA-1100 development board uses more GPIO pins than say, the Itsy + or the SA-1100/1101 multimedia package. The pin assignments are as + follows: + + GPIO 2 Slot 0 Battery Voltage Detect 1 + GPIO 3 Slot 0 Ready/Interrupt + GPIO 4 Slot 0 Card Detect + GPIO 5 Slot 1 Battery Voltage Detect 1 + GPIO 6 Slot 1 Ready/Interrupt + GPIO 7 Slot 1 Card Detect + + Like the Itsy, Brutus uses a chipselect register in static memory + bank 3 for the other signals, such as voltage sense or reset: + + Read: + P0_VS1 8 Slot 0 Voltage Sense + P0_VS2 9 + P0_STSCHG 10 Slot 0 Status Change + P1_VS1 12 Slot 1 Voltage Sense + P1_VS2 13 + P1_STSCHG 14 Slot 1 Status Change + + Read/Write: + P0_ 16 Slot 0 MAX1600EAI control line + P0_ 17 Slot 0 MAX1600EAI control line + P0_ 18 Slot 0 MAX1600EAI control line + P0_ 19 Slot 0 MAX1600EAI control line + P0_ 20 Slot 0 12V + P0_ 21 Slot 0 Vpp to Vcc (CONFIRM?) + P0_ 22 Slot 0 enable fan-out drivers & xcvrs + P0_SW_RST 23 Slot 0 Reset + P1_ 24 Slot 1 MAX1600EAI control line + P1_ 25 Slot 1 MAX1600EAI control line + P1_ 26 Slot 1 MAX1600EAI control line + P1_ 27 Slot 1 MAX1600EAI control line + P1_ 28 Slot 1 12V + P1_ 29 Slot 1 Vpp to Vcc (CONFIRM?) + P1_ 30 Slot 1 enable fan-out drivers & xcvrs + P1_SW_RST 31 Slot 1 Reset + + For each slot, the bits labelled "MAX1600EAI" should (apparently) + be written with the value 0101 for Vcc 3.3V, and 1001 for Vcc 5V. + + + +Intel SA-1110 Development Platform (http://developer.intel.com/design/strong/) + + GPIO Pin Descriptions and Board Control Register + SA-1110 Microprocessor Development Board User's Guide, p.4-7, 4-10 + + The Assabet board contains only a single Compact Flash slot, + attached to slot 1 on the SA-1110. Card detect, ready, and BVD + signals are routed through GPIO, with power and reset placed in a + control register. Note that the CF bus must be enabled before use. + + GPIO 21 Slot 1 Compact Flash interrupt + GPIO 22 Slot 1 card detect (CD1 NOR CD2) + GPIO 24 Slot 1 Battery Voltage Detect 2 + GPIO 25 Slot 1 Battery Voltage Detect 1 + + Write-only: (Board Control Register - 0x12000000) + CF_PWR 0 CF bus power (3.3V) + CF_RST 1 CF reset + CF_Bus_On 7 CF bus enable + diff --git a/Documentation/arm/XScale/ADIFCC/80200EVB b/Documentation/arm/XScale/ADIFCC/80200EVB new file mode 100644 index 000000000..3762de418 --- /dev/null +++ b/Documentation/arm/XScale/ADIFCC/80200EVB @@ -0,0 +1,110 @@ + +Board Overview +----------------------------- + +This is an beta release of the Xscale Linux port to the ADI 80200EVB +evaluation board. + +The 80200EVB is an evaluation platform for ADI Engineering's high-performance +80200FCC chipset for the Intel 80200 XScale CPU. The 80200FCC is an open +source FPGA based system that contains a PCI unit and a high performance +memory controller. + +In addition to the 80200FCC, the board also contains a 16C550 UART, and 4MB +of flash. + +The board is still under development and currently only the UART is functional +as the PCI bits have not been programmed into the FPGA. + +For more information on the board, see http://www.adiengineering.com + +Port Status +----------------------------- + +Supported: + +- Onboard UART (Polled operation only) +- Cache/TLB locking on 80200 CPU + +TODO: + +- PCI when hardware supports it + +Building the Kernel +----------------------------- +change Linux makefile +make adi_evb_config +make oldconfig +make zImage + +Loading Linux +----------------------------- + +Before you can use Linux on the ADI board, you need to grab the following: + +ADI 80200EVB Monitor: + ftp://source.mvista.com/pub/xscale/ADI_EVB/monitor.srec + +ADI JFFS2 Image: + ftp://source.mvista.com/pub/xscale/ADI_EVB/adi.jffs2 + +Once you've got the Cygnus prompt, type in the following command: + + load + +On another terminal window: + + cat monitor.srec > /dev/ttyS0 + +(replace ttyS0 with the serial port you are using) + +Once completed, just type 'go' at the cygmon prompt and you should see: + + MontaVista IQ80310 Monitor Version 0.1 + monitor> + +Type 'b 115200' at the prompt and change your terminal speed to 115200 + +The first thing to do is to upload and burn the jffs2 filesystem image +onto the boards 4MB of flash: + + monitor> u c1000000 + Uploading file at 0xc1000000 + Now send file with ymodem + +Do as the monitor says and transfer the file adi.jffs2. Once complete, +the following will copy the jffs2 image to location 0x80000 in the flash. + + monitor> f 8000 c1000000 200000 + Erasing sector 0x00080000 + Writing sector 0x00080000 with data at 0xC1000000 + Erasing sector 0x000A0000 + Writing sector 0x000A0000 with data at 0xC1020000 + Erasing sector 0x000C0000 + ... + +Now use the same command as above to upload your zImage to location c1000000. +When you've done that, type 'j c1000000' to run Linux. Login as +root and you're all set to go. + +Misc Notes +----------------------------- + +The current version of the HW does not have an onboard timer, so the 80200 +PMU is not available for general use as it is being used for a timer source. + +By default, the MTD driver reserves the first 512K for bootloaders and +the remaining 3.5MB for the filesystem. You can edit drivers/mtd/map/adi_evb.c +to change this as needed for your application. + +Contributors +----------------------------- + +Thanks to ADI Engineering for providing the hardware for development + +Deepak Saxena - Initial port + +----------------------------- +Enjoy. If you have any problem please contact Deepak Saxena +dsaxena@mvista.com + diff --git a/Documentation/arm/XScale/IOP3XX/IQ80310 b/Documentation/arm/XScale/IOP3XX/IQ80310 new file mode 100644 index 000000000..5312a5742 --- /dev/null +++ b/Documentation/arm/XScale/IOP3XX/IQ80310 @@ -0,0 +1,247 @@ + +Board Overview +----------------------------- + +The Cyclone IQ80310 board is an evaluation platform for Intel's 80200 Xscale +CPU and 80312 Intelligent I/O chipset (collectively called IOP310 chipset). + +The 80312 contains dual PCI hoses (called the ATUs), a PCI-to-PCI bridge, +three DMA channels (1 on secondary PCI, one on primary PCI ), I2C, I2O +messaging unit, XOR unit for RAID operations, a bus performance monitoring +unit, and a memory controller with ECC features. + +For more information on the board, see http://developer.intel.com/iio + +Port Status +----------------------------- + +Supported: + +- MTD/JFFS/JFFS2 +- NFS root +- RAMDISK root +- 2ndary PCI slots +- Onboard ethernet +- Serial ports (ttyS0/S1) +- Cache/TLB locking on 80200 CPU +- Performance monitoring unit on 80200 CPU +- 80200 Performance Monitoring Unit +- Acting as a system controller on Cyclone 80303BP PCI backplane +- DMA engines (EXPERIMENTAL) +- 80312 Bus Performance Monitor (EXPERIMENTAL) +- Application Accelerator Unit (XOR engine for RAID) (EXPERIMENTAL) +- Messaging Unit (EXPERIMENTAL) + +TODO: +- I2C + +Building the Kernel +----------------------------- +make iq80310_config +make oldconfig +make zImage + +This will build an image setup for BOOTP/NFS root support. To change this, +just run make menuconfig and disable nfs root or add a "root=" option. + +Preparing the Hardware +----------------------------- + +This document assumes you're using a Rev D or newer board running +Redboot as the bootloader. Note that the version of RedBoot provided +with the boards has a major issue and you need to replace it with the +latest RedBoot. You can grab the source from the ECOS CVS or you can +get a prebuilt image and burn it in using FRU at: + + ftp://source.mvista.com/pub/xscale/iq80310/redboot.bin + +Make sure you do an 'fis init' command once you boot with the new +RedBoot image. + + + +Downloading Linux +----------------------------- + +Assuming you have your development system setup to act as a bootp/dhcp +server and running tftp: + + RedBoot> load -r -b 0xa1008000 /tftpboot/zImage.xs + Raw file loaded 0xa1008000-0xa1094bd8 + +If you're not using dhcp/tftp, you can use y-modem instead: + + RedBoot> load -r -b 0xa1008000 -m y + +Note that on Rev D. of the board, tftp does not work due to intermittent +interrupt issues, so you need to download using ymodem. + +Once the download is completed: + + RedBoot> go 0xa1008000 + +Root Devices +----------------------------- + +A kernel is not useful without a root filesystem, and you have several +choices with this board: NFS root, RAMDISK, or JFFS/JFFS2. For development +purposes, it is suggested that you use NFS root for easy access to various +tools. Once you're ready to deploy, probably want to utilize JFFS/JFFS2 on +the flash device. + +MTD on the IQ80310 +----------------------------- + +Linux on the IQ80310 supports RedBoot FIS paritioning if it is enabled. +Out of the box, once you've done 'fis init' on RedBoot, you will get +the following partitioning scheme: + + root@192.168.0.14:~# cat /proc/mtd + dev: size erasesize name + mtd0: 00040000 00020000 "RedBoot" + mtd1: 00040000 00020000 "RedBoot[backup]" + mtd2: 0075f000 00020000 "unallocated space" + mtd3: 00001000 00020000 "RedBoot config" + mtd4: 00020000 00020000 "FIS directory" + +To create an FIS directory, you need to use the fis command in RedBoot. +As an example, you can burn the kernel into the flash once it's downloaded: + + RedBoot> fis create -b 0xa1008000 -l 0x8CBAC -r 0xa1008000 -f 0x80000 kernel + ... Erase from 0x00080000-0x00120000: ..... + ... Program from 0xa1008000-0xa1094bac at 0x00080000: ..... + ... Unlock from 0x007e0000-0x00800000: . + ... Erase from 0x007e0000-0x00800000: . + ... Program from 0xa1fdf000-0xa1fff000 at 0x007e0000: . + ... Lock from 0x007e0000-0x00800000: . + + RedBoot> fis list + Name FLASH addr Mem addr Length Entry point + RedBoot 0x00000000 0x00000000 0x00040000 0x00000000 + RedBoot[backup] 0x00040000 0x00040000 0x00040000 0x00000000 + RedBoot config 0x007DF000 0x007DF000 0x00001000 0x00000000 + FIS directory 0x007E0000 0x007E0000 0x00020000 0x00000000 + kernel 0x00080000 0xA1008000 0x000A0000 0x00000000 + +This leads to the following Linux MTD setup: + + mtroot@192.168.0.14:~# cat /proc/mtd + dev: size erasesize name + mtd0: 00040000 00020000 "RedBoot" + mtd1: 00040000 00020000 "RedBoot[backup]" + mtd2: 000a0000 00020000 "kernel" + mtd3: 006bf000 00020000 "unallocated space" + mtd4: 00001000 00020000 "RedBoot config" + mtd5: 00020000 00020000 "FIS directory" + +Note that there is not a 1:1 mapping to the number of RedBoot paritions to +MTD partitions as unused space also gets allocated into MTD partitions. + +As an aside, the -r option when creating the Kernel entry allows you to +simply do an 'fis load kernel' to copy the image from flash into memory. +You can then do an 'fis go 0xa1008000' to start Linux. + +If you choose to use static partitioning instead of the RedBoot partioning: + + /dev/mtd0 0x00000000 - 0x0007ffff: Boot Monitor (512k) + /dev/mtd1 0x00080000 - 0x0011ffff: Kernel Image (640K) + /dev/mtd2 0x00120000 - 0x0071ffff: File System (6M) + /dev/mtd3 0x00720000 - 0x00800000: RedBoot Reserved (896K) + +To use a JFFS1/2 root FS, you need to donwload the JFFS image using either +tftp or ymodem, and then copy it to flash: + + RedBoot> load -r -b 0xa1000000 /tftpboot/jffs.img + Raw file loaded 0xa1000000-0xa1600000 + RedBoot> fis create -b 0xa1000000 -l 0x600000 -f 0x120000 jffs + ... Erase from 0x00120000-0x00720000: .................................. + ... Program from 0xa1000000-0xa1600000 at 0x00120000: .................. + ...................... + ... Unlock from 0x007e0000-0x00800000: . + ... Erase from 0x007e0000-0x00800000: . + ... Program from 0xa1fdf000-0xa1fff000 at 0x007e0000: . + ... Lock from 0x007e0000-0x00800000: . + RedBoot> fis list + Name FLASH addr Mem addr Length Entry point + RedBoot 0x00000000 0x00000000 0x00040000 0x00000000 + RedBoot[backup] 0x00040000 0x00040000 0x00040000 0x00000000 + RedBoot config 0x007DF000 0x007DF000 0x00001000 0x00000000 + FIS directory 0x007E0000 0x007E0000 0x00020000 0x00000000 + kernel 0x00080000 0xA1008000 0x000A0000 0xA1008000 + jffs 0x00120000 0x00120000 0x00600000 0x00000000 + +This looks like this in Linux: + + root@192.168.0.14:~# cat /proc/mtd + dev: size erasesize name + mtd0: 00040000 00020000 "RedBoot" + mtd1: 00040000 00020000 "RedBoot[backup]" + mtd2: 000a0000 00020000 "kernel" + mtd3: 00600000 00020000 "jffs" + mtd4: 000bf000 00020000 "unallocated space" + mtd5: 00001000 00020000 "RedBoot config" + mtd6: 00020000 00020000 "FIS directory" + +You need to boot the kernel once and watch the boot messages to see how the +JFFS RedBoot partition mapped into the MTD partition scheme. + +You can grab a pre-built JFFS image to use as a root file system at: + + ftp://source.mvista.com/pub/xscale/iq80310/jffs.img + +For detailed info on using MTD and creating a JFFS image go to: + + http://www.linux-mtd.infradead.org. + +For details on using RedBoot's FIS commands, type 'fis help' or consult +your RedBoot manual. + +Contributors +----------------------------- + +Thanks to Intel Corporation for providing the hardware. + +John Clark - Initial discovery of RedBoot issues +Dave Jiang - IRQ demux fixes, AAU, DMA, MU +Nicolas Pitre - Initial port, cleanup, debugging +Matt Porter - PCI subsystem development, debugging +Tim Sanders - Initial PCI code +Mark Salter - RedBoot fixes +Deepak Saxena - Cleanup, debug, cache lock, PMU + +----------------------------- +Enjoy. + +If you have any problems please contact Deepak Saxena + +A few notes from rmk +----------------------------- + +These are notes of my initial experience getting the IQ80310 Rev D up and +running. In total, it has taken many hours to work out what's going on... +The version of redboot used is: + + RedBoot(tm) bootstrap and debug environment, version UNKNOWN - built 14:58:21, Aug 15 2001 + + +1. I've had a corrupted download of the redboot.bin file from Montavista's + FTP site. It would be a good idea if there were md5sums, sum or gpg + signatures available to ensure the integrity of the downloaded files. + The result of this was an apparantly 100% dead card. + +2. RedBoot Intel EtherExpress Pro 100 driver seems to be very unstable - + I've had it take out the whole of a 100mbit network for several minutes. + The Hub indiates ZERO activity, despite machines attempting to communicate. + Further to this, while tftping the kernel, the transfer will stall regularly, + and might even drop the link LED. + +3. There appears to be a bug in the Intel Documentation Pack that comes with + the IQ80310 board. Serial port 1, which is the socket next to the LEDs + is address 0xfe810000, not 0xfe800000. + + Note that RedBoot uses either serial port 1 OR serial port 2, so if you + have your console connected to the wrong port, you'll see redboot messages + but not kernel boot messages. + +4. Trying to use fconfig to setup a boot script fails - it hangs when trying + to erase the flash. diff --git a/Documentation/arm/XScale/IOP3XX/IQ80321 b/Documentation/arm/XScale/IOP3XX/IQ80321 new file mode 100644 index 000000000..e3253279d --- /dev/null +++ b/Documentation/arm/XScale/IOP3XX/IQ80321 @@ -0,0 +1,215 @@ + +Board Overview +----------------------------- + +The Worcester IQ80321 board is an evaluation platform for Intel's 80321 Xscale +CPU (sometimes called IOP321 chipset). + +The 80321 contains a single PCI hose (called the ATUs), a PCI-to-PCI bridge, +two DMA channels, I2C, I2O messaging unit, XOR unit for RAID operations, +a bus performance monitoring unit, and a memory controller with ECC features. + +For more information on the board, see http://developer.intel.com/iio + +Port Status +----------------------------- + +Supported: + +- MTD/JFFS/JFFS2 root +- NFS root +- RAMDISK root +- Serial port (ttyS0) +- Cache/TLB locking on 80321 CPU +- Performance monitoring unit on 80321 CPU + +TODO: + +- DMA engines +- I2C +- 80321 Bus Performance Monitor +- Application Accelerator Unit (XOR engine for RAID) +- I2O Messaging Unit +- I2C unit +- SSP + +Building the Kernel +----------------------------- +make iq80321_config +make oldconfig +make zImage + +This will build an image setup for BOOTP/NFS root support. To change this, +just run make menuconfig and disable nfs root or add a "root=" option. + +Preparing the Hardware +----------------------------- + +Make sure you do an 'fis init' command once you boot with the new +RedBoot image. + +Downloading Linux +----------------------------- + +Assuming you have your development system setup to act as a bootp/dhcp +server and running tftp: + +NOTE: The 80321 board uses a different default memory map than the 80310. + + RedBoot> load -r -b 0x01008000 -m y + +Once the download is completed: + + RedBoot> go 0x01008000 + +There is a version of RedBoot floating around that has DHCP support, but +I've never been able to cleanly transfer a kernel image and have it run. + +Root Devices +----------------------------- + +A kernel is not useful without a root filesystem, and you have several +choices with this board: NFS root, RAMDISK, or JFFS/JFFS2. For development +purposes, it is suggested that you use NFS root for easy access to various +tools. Once you're ready to deploy, probably want to utilize JFFS/JFFS2 on +the flash device. + +MTD on the IQ80321 +----------------------------- + +Linux on the IQ80321 supports RedBoot FIS paritioning if it is enabled. +Out of the box, once you've done 'fis init' on RedBoot, you will get +the following partitioning scheme: + + root@192.168.0.14:~# cat /proc/mtd + dev: size erasesize name + mtd0: 00040000 00020000 "RedBoot" + mtd1: 00040000 00020000 "RedBoot[backup]" + mtd2: 0075f000 00020000 "unallocated space" + mtd3: 00001000 00020000 "RedBoot config" + mtd4: 00020000 00020000 "FIS directory" + +To create an FIS directory, you need to use the fis command in RedBoot. +As an example, you can burn the kernel into the flash once it's downloaded: + + RedBoot> fis create -b 0x01008000 -l 0x8CBAC -r 0x01008000 -f 0x80000 kernel + ... Erase from 0x00080000-0x00120000: ..... + ... Program from 0x01008000-0x01094bac at 0x00080000: ..... + ... Unlock from 0x007e0000-0x00800000: . + ... Erase from 0x007e0000-0x00800000: . + ... Program from 0x01fdf000-0x01fff000 at 0x007e0000: . + ... Lock from 0x007e0000-0x00800000: . + + RedBoot> fis list + Name FLASH addr Mem addr Length Entry point + RedBoot 0x00000000 0x00000000 0x00040000 0x00000000 + RedBoot[backup] 0x00040000 0x00040000 0x00040000 0x00000000 + RedBoot config 0x007DF000 0x007DF000 0x00001000 0x00000000 + FIS directory 0x007E0000 0x007E0000 0x00020000 0x00000000 + kernel 0x00080000 0x01008000 0x000A0000 0x00000000 + +This leads to the following Linux MTD setup: + + mtroot@192.168.0.14:~# cat /proc/mtd + dev: size erasesize name + mtd0: 00040000 00020000 "RedBoot" + mtd1: 00040000 00020000 "RedBoot[backup]" + mtd2: 000a0000 00020000 "kernel" + mtd3: 006bf000 00020000 "unallocated space" + mtd4: 00001000 00020000 "RedBoot config" + mtd5: 00020000 00020000 "FIS directory" + +Note that there is not a 1:1 mapping to the number of RedBoot paritions to +MTD partitions as unused space also gets allocated into MTD partitions. + +As an aside, the -r option when creating the Kernel entry allows you to +simply do an 'fis load kernel' to copy the image from flash into memory. +You can then do an 'fis go 0x01008000' to start Linux. + +If you choose to use static partitioning instead of the RedBoot partioning: + + /dev/mtd0 0x00000000 - 0x0007ffff: Boot Monitor (512k) + /dev/mtd1 0x00080000 - 0x0011ffff: Kernel Image (640K) + /dev/mtd2 0x00120000 - 0x0071ffff: File System (6M) + /dev/mtd3 0x00720000 - 0x00800000: RedBoot Reserved (896K) + +To use a JFFS1/2 root FS, you need to donwload the JFFS image using either +tftp or ymodem, and then copy it to flash: + + RedBoot> load -r -b 0x01000000 /tftpboot/jffs.img + Raw file loaded 0x01000000-0x01600000 + RedBoot> fis create -b 0x01000000 -l 0x600000 -f 0x120000 jffs + ... Erase from 0x00120000-0x00720000: .................................. + ... Program from 0x01000000-0x01600000 at 0x00120000: .................. + ...................... + ... Unlock from 0x007e0000-0x00800000: . + ... Erase from 0x007e0000-0x00800000: . + ... Program from 0x01fdf000-0x01fff000 at 0x007e0000: . + ... Lock from 0x007e0000-0x00800000: . + RedBoot> fis list + Name FLASH addr Mem addr Length Entry point + RedBoot 0x00000000 0x00000000 0x00040000 0x00000000 + RedBoot[backup] 0x00040000 0x00040000 0x00040000 0x00000000 + RedBoot config 0x007DF000 0x007DF000 0x00001000 0x00000000 + FIS directory 0x007E0000 0x007E0000 0x00020000 0x00000000 + kernel 0x00080000 0x01008000 0x000A0000 0x01008000 + jffs 0x00120000 0x00120000 0x00600000 0x00000000 + +This looks like this in Linux: + + root@192.168.0.14:~# cat /proc/mtd + dev: size erasesize name + mtd0: 00040000 00020000 "RedBoot" + mtd1: 00040000 00020000 "RedBoot[backup]" + mtd2: 000a0000 00020000 "kernel" + mtd3: 00600000 00020000 "jffs" + mtd4: 000bf000 00020000 "unallocated space" + mtd5: 00001000 00020000 "RedBoot config" + mtd6: 00020000 00020000 "FIS directory" + +You need to boot the kernel once and watch the boot messages to see how the +JFFS RedBoot partition mapped into the MTD partition scheme. + +You can grab a pre-built JFFS image to use as a root file system at: + + ftp://source.mvista.com/pub/xscale/iq80310/jffs.img + +For detailed info on using MTD and creating a JFFS image go to: + + http://www.linux-mtd.infradead.org. + +For details on using RedBoot's FIS commands, type 'fis help' or consult +your RedBoot manual. + +BUGS and ISSUES +----------------------------- + +* As shipped from Intel, pre-production boards have two issues: + +- The on board ethernet is disabled S8E1-2 is off. You will need to turn it on. + +- The PCIXCAPs are configured for a 100Mhz clock, but the clock selected is + actually only 66Mhz. This causes the wrong PPL multiplier to be used and the + board only runs at 400Mhz instead of 600Mhz. The way to observe this is to + use a independent clock to time a "sleep 10" command from the prompt. If it + takes 15 seconds instead of 10, you are running at 400Mhz. + +- The experimental IOP310 drivers for the AAU, DMA, etc. are not supported yet. + +Contributors +----------------------------- +The port to the IQ80321 was performed by: + +Rory Bolt - Initial port, debugging. + +This port was based on the IQ80310 port with the following contributors: + +Nicolas Pitre - Initial port, cleanup, debugging +Matt Porter - PCI subsystem development, debugging +Tim Sanders - Initial PCI code +Deepak Saxena - Cleanup, debug, cache lock, PMU + +The port is currently maintained by Deepak Saxena + +----------------------------- +Enjoy. diff --git a/Documentation/arm/XScale/IOP3XX/aau.txt b/Documentation/arm/XScale/IOP3XX/aau.txt new file mode 100644 index 000000000..e3852ccbf --- /dev/null +++ b/Documentation/arm/XScale/IOP3XX/aau.txt @@ -0,0 +1,178 @@ +Support functions for the Intel 80310 AAU +=========================================== + +Dave Jiang +Last updated: 09/18/2001 + +The Intel 80312 companion chip in the 80310 chipset contains an AAU. The +AAU is capable of processing up to 8 data block sources and perform XOR +operations on them. This unit is typically used to accelerated XOR +operations utilized by RAID storage device drivers such as RAID 5. This +API is designed to provide a set of functions to take adventage of the +AAU. The AAU can also be used to transfer data blocks and used as a memory +copier. The AAU transfer the memory faster than the operation performed by +using CPU copy therefore it is recommended to use the AAU for memory copy. + +------------------ +int aau_request(u32 *aau_context, const char *device_id); +This function allows the user the acquire the control of the the AAU. The +function will return a context of AAU to the user and allocate +an interrupt for the AAU. The user must pass the context as a parameter to +various AAU API calls. + +int aau_queue_buffer(u32 aau_context, aau_head_t *listhead); +This function starts the AAU operation. The user must create a SGL +header with a SGL attached. The format is presented below. The SGL is +built from kernel memory. + +/* hardware descriptor */ +typedef struct _aau_desc +{ + u32 NDA; /* next descriptor address [READONLY] */ + u32 SAR[AAU_SAR_GROUP]; /* src addrs */ + u32 DAR; /* destination addr */ + u32 BC; /* byte count */ + u32 DC; /* descriptor control */ + u32 SARE[AAU_SAR_GROUP]; /* extended src addrs */ +} aau_desc_t; + +/* user SGL format */ +typedef struct _aau_sgl +{ + aau_desc_t aau_desc; /* AAU HW Desc */ + u32 status; /* status of SGL [READONLY] */ + struct _aau_sgl *next; /* pointer to next SG [READONLY] */ + void *dest; /* destination addr */ + void *src[AAU_SAR_GROUP]; /* source addr[4] */ + void *ext_src[AAU_SAR_GROUP]; /* ext src addr[4] */ + u32 total_src; /* total number of source */ +} aau_sgl_t; + +/* header for user SGL */ +typedef struct _aau_head +{ + u32 total; /* total descriptors allocated */ + u32 status; /* SGL status */ + aau_sgl_t *list; /* ptr to head of list */ + aau_callback_t callback; /* callback func ptr */ +} aau_head_t; + + +The function will call aau_start() and start the AAU after it queues +the SGL to the processing queue. When the function will either +a. Sleep on the wait queue aau->wait_q if no callback has been provided, or +b. Continue and then call the provided callback function when DMA interrupt + has been triggered. + +int aau_suspend(u32 aau_context); +Stops/Suspends the AAU operation + +int aau_free(u32 aau_context); +Frees the ownership of AAU. Called when no longer need AAU service. + +aau_sgl_t * aau_get_buffer(u32 aau_context, int num_buf); +This function obtains an AAU SGL for the user. User must specify the number +of descriptors to be allocated in the chain that is returned. + +void aau_return_buffer(u32 aau_context, aau_sgl_t *list); +This function returns all SGL back to the API after user is done. + +int aau_memcpy(void *dest, void *src, u32 size); +This function is a short cut for user to do memory copy utilizing the AAU for +better large block memory copy vs using the CPU. This is similar to using +typical memcpy() call. + +* User is responsible for the source address(es) and the destination address. + The source and destination should all be cached memory. + + + +void aau_test() +{ + u32 aau; + char dev_id[] = "AAU"; + int size = 2; + int err = 0; + aau_head_t *head; + aau_sgl_t *list; + u32 i; + u32 result = 0; + void *src, *dest; + + printk("Starting AAU test\n"); + if((err = aau_request(&aau, dev_id))<0) + { + printk("test - AAU request failed: %d\n", err); + return; + } + else + { + printk("test - AAU request successful\n"); + } + + head = kmalloc(sizeof(aau_head_t), GFP_KERNEL); + head->total = size; + head->status = 0; + head->callback = NULL; + + list = aau_get_buffer(aau, size); + if(!list) + { + printk("Can't get buffers\n"); + return; + } + head->list = list; + + src = kmalloc(1024, GFP_KERNEL); + dest = kmalloc(1024, GFP_KERNEL); + + while(list) + { + list->status = 0; + list->aau_desc->SAR[0] = (u32)src; + list->aau_desc->DAR = (u32)dest; + list->aau_desc->BC = 1024; + + /* see iop310-aau.h for more DCR commands */ + list->aau_desc->DC = AAU_DCR_WRITE | AAU_DCR_BLKCTRL_1_DF; + if(!list->next) + { + list->aau_desc->DC = AAU_DCR_IE; + break; + } + list = list->next; + } + + printk("test- Queueing buffer for AAU operation\n"); + err = aau_queue_buffer(aau, head); + if(err >= 0) + { + printk("AAU Queue Buffer is done...\n"); + } + else + { + printk("AAU Queue Buffer failed...: %d\n", err); + } + + + +#if 1 + printk("freeing the AAU\n"); + aau_return_buffer(aau, head->list); + aau_free(aau); + kfree(src); + kfree(dest); + kfree((void *)head); +#endif +} + +All Disclaimers apply. Use this at your own discretion. Neither Intel nor I +will be responsible if anything goes wrong. =) + + +TODO +____ +* Testing +* Do zero-size AAU transfer/channel at init + so all we have to do is chainining + diff --git a/Documentation/arm/XScale/IOP3XX/dma.txt b/Documentation/arm/XScale/IOP3XX/dma.txt new file mode 100644 index 000000000..50c7f99e4 --- /dev/null +++ b/Documentation/arm/XScale/IOP3XX/dma.txt @@ -0,0 +1,214 @@ +Support functions forthe Intel 80310 DMA channels +================================================== + +Dave Jiang +Last updated: 09/18/2001 + +The Intel 80310 XScale chipset provides 3 DMA channels via the 80312 I/O +companion chip. Two of them resides on the primary PCI bus and one on the +secondary PCI bus. + +The DMA API provided is not compatible with the generic interface in the +ARM tree unfortunately due to how the 80312 DMACs work. Hopefully some time +in the near future a software interface can be done to bridge the differences. +The DMA API has been modeled after Nicholas Pitre's SA11x0 DMA API therefore +they will look somewhat similar. + + +80310 DMA API +------------- + +int dma_request(dmach_t channel, const char *device_id); + +This function will attempt to allocate the channel depending on what the +user requests: + +IOP310_DMA_P0: PCI Primary 1 +IOP310_DMA_P1: PCI Primary 2 +IOP310_DMA_S0: PCI Secondary 1 +/*EOF*/ + +Once the user allocates the DMA channel it is owned until released. Although +other users can also use the same DMA channel, but no new resources will be +allocated. The function will return the allocated channel number if successful. + +int dma_queue_buffer(dmach_t channel, dma_sghead_t *listhead); + +The user will construct a SGL in the form of below: +/* + * Scattered Gather DMA List for user + */ +typedef struct _dma_desc +{ + u32 NDAR; /* next descriptor adress [READONLY] */ + u32 PDAR; /* PCI address */ + u32 PUADR; /* upper PCI address */ + u32 LADR; /* local address */ + u32 BC; /* byte count */ + u32 DC; /* descriptor control */ +} dma_desc_t; + +typedef struct _dma_sgl +{ + dma_desc_t dma_desc; /* DMA descriptor */ + u32 status; /* descriptor status [READONLY] */ + u32 data; /* user defined data */ + struct _dma_sgl *next; /* next descriptor [READONLY] */ +} dma_sgl_t; + +/* dma sgl head */ +typedef struct _dma_head +{ + u32 total; /* total elements in SGL */ + u32 status; /* status of sgl */ + u32 mode; /* read or write mode */ + dma_sgl_t *list; /* pointer to list */ + dma_callback_t callback; /* callback function */ +} dma_head_t; + + +The user shall allocate user SGL elements by calling the function: +dma_get_buffer(). This function will give the user an SGL element. The user +is responsible for creating the SGL head however. The user is also +responsible for allocating the memory for DMA data. The following code segment +shows how a DMA operation can be performed: + +#include + +void dma_test(void) +{ + char dev_id[] = "Primary 0"; + dma_head_t *sgl_head = NULL; + dma_sgl_t *sgl = NULL; + int err = 0; + int channel = -1; + u32 *test_ptr = 0; + DECLARE_WAIT_QUEUE_HEAD(wait_q); + + + *(IOP310_ATUCR) = (IOP310_ATUCR_PRIM_OUT_ENAB | + IOP310_ATUCR_DIR_ADDR_ENAB); + + channel = dma_request(IOP310_DMA_P0, dev_id); + + sgl_head = (dma_head_t *)kmalloc(sizeof(dma_head_t), GFP_KERNEL); + sgl_head->callback = NULL; /* no callback created */ + sgl_head->total = 2; /* allocating 2 DMA descriptors */ + sgl_head->mode = (DMA_MOD_WRITE); + sgl_head->status = 0; + + /* now we get the two descriptors */ + sgl = dma_get_buffer(channel, 2); + + /* we set the header to point to the list we allocated */ + sgl_head->list = sgl; + + /* allocate 1k of DMA data */ + sgl->data = (u32)kmalloc(1024, GFP_KERNEL); + + /* Local address is physical */ + sgl->dma_desc.LADR = (u32)virt_to_phys(sgl->data); + + /* write to arbitrary location over the PCI bus */ + sgl->dma_desc.PDAR = 0x00600000; + sgl->dma_desc.PUADR = 0; + sgl->dma_desc.BC = 1024; + + /* set write & invalidate PCI command */ + sgl->dma_desc.DC = DMA_DCR_PCI_MWI; + sgl->status = 0; + + /* set a pattern */ + memset(sgl->data, 0xFF, 1024); + + /* User's responsibility to keep buffers cached coherent */ + cpu_dcache_clean(sgl->data, sgl->data + 1024); + + sgl = sgl->next; + + sgl->data = (u32)kmalloc(1024, GFP_KERNEL); + sgl->dma_desc.LADR = (u32)virt_to_phys(sgl->data); + sgl->dma_desc.PDAR = 0x00610000; + sgl->dma_desc.PUADR = 0; + sgl->dma_desc.BC = 1024; + + /* second descriptor has interrupt flag enabled */ + sgl->dma_desc.DC = (DMA_DCR_PCI_MWI | DMA_DCR_IE); + + /* must set end of chain flag */ + sgl->status = DMA_END_CHAIN; /* DO NOT FORGET THIS!!!! */ + + memset(sgl->data, 0x0f, 1024); + /* User's responsibility to keep buffers cached coherent */ + cpu_dcache_clean(sgl->data, sgl->data + 1024); + + /* queuing the buffer, this function will sleep since no callback */ + err = dma_queue_buffer(channel, sgl_head); + + /* now we are woken from DMA complete */ + + /* do data operations here */ + + /* free DMA data if necessary */ + + /* return the descriptors */ + dma_return_buffer(channel, sgl_head->list); + + /* free the DMA */ + dma_free(channel); + + kfree((void *)sgl_head); +} + + +dma_sgl_t * dma_get_buffer(dmach_t channel, int buf_num); + +This call allocates DMA descriptors for the user. + + +void dma_return_buffer(dmach_t channel, dma_sgl_t *list); + +This call returns the allocated descriptors back to the API. + + +int dma_suspend(dmach_t channel); + +This call suspends any DMA transfer on the given channel. + + + +int dma_resume(dmach_t channel); + +This call resumes a DMA transfer which would have been stopped through +dma_suspend(). + + +int dma_flush_all(dmach_t channel); + +This completely flushes all queued buffers and on-going DMA transfers on a +given channel. This is called when DMA channel errors have occurred. + + +void dma_free(dmach_t channel); + +This clears all activities on a given DMA channel and releases it for future +requests. + + + +Buffer Allocation +----------------- +It is the user's responsibility to allocate, free, and keep track of the +allocated DMA data memory. Upon calling dma_queue_buffer() the user must +relinquish the control of the buffers to the kernel and not change the +state of the buffers that it has passed to the kernel. The user will regain +the control of the buffers when it has been woken up by the bottom half of +the DMA interrupt handler. The user can allocate cached buffers or non-cached +via pci_alloc_consistent(). It is the user's responsibility to ensure that +the data is cache coherent. + +*Reminder* +The user is responsble to ensure the ATU is setup properly for DMA transfers. + +All Disclaimers apply. Use this at your own discretion. Neither Intel nor I +will be responsible ifanything goes wrong. diff --git a/Documentation/arm/XScale/IOP3XX/message.txt b/Documentation/arm/XScale/IOP3XX/message.txt new file mode 100644 index 000000000..480d13e7a --- /dev/null +++ b/Documentation/arm/XScale/IOP3XX/message.txt @@ -0,0 +1,110 @@ +Support functions for the Intel 80310 MU +=========================================== + +Dave Jiang +Last updated: 10/11/2001 + +The messaging unit of the IOP310 contains 4 components and is utilized for +passing messages between the PCI agents on the primary bus and the Intel(R) +80200 CPU. The four components are: +Messaging Component +Doorbell Component +Circular Queues Component +Index Registers Component + +Messaging Component: +Contains 4 32bit registers, 2 in and 2 out. Writing to the registers assert +interrupt on the PCI bus or to the 80200 depend on incoming or outgoing. + +int mu_msg_request(u32 *mu_context); +Request the usage of Messaging Component. mu_context is written back by the +API. The MU context is passed to other Messaging calls as a parameter. + +int mu_msg_set_callback(u32 mu_context, u8 reg, mu_msg_cb_t func); +Setup the callback function for incoming messages. Callback can be setup for +outbound 0, 1, or both outbound registers. + +int mu_msg_post(u32 mu_context, u32 val, u8 reg); +Posting a message in the val parameter. The reg parameter denotes whether +to use register 0, 1. + +int mu_msg_free(u32 mu_context, u8 mode); +Free the usage of messaging component. mode can be specified soft or hard. In +hardmode all resources are unallocated. + +Doorbell Component: +The doorbell registers contains 1 inbound and 1 outbound. Depending on the bits +being set different interrupts are asserted. + +int mu_db_request(u32 *mu_context); +Request the usage of the doorbell register. + +int mu_db_set_callback(u32 mu_context, mu_db_cb_t func); +Setting up the inbound callback. + +void mu_db_ring(u32 mu_context, u32 mask); +Write to the outbound db register with mask. + +int mu_db_free(u32 mu_context); +Free the usage of doorbell component. + +Circular Queues Component: +The circular queue component has 4 circular queues. Inbound post, inbound free, +outbound post, outbound free. These queues are used to pass messages. + +int mu_cq_request(u32 *mu_context, u32 q_size); +Request the usage of the queue. See code comment header for q_size. It tells +the API how big of queues to setup. + +int mu_cq_inbound_init(u32 mu_context, mfa_list_t *list, u32 size, + mu_cq_cb_t func); +Init inbound queues. The user must provide a list of free message frames to +be put in inbound free queue and the callback function to handle the inbound +messages. + +int mu_cq_enable(u32 mu_context); +Enables the circular queues mechanism. Called once all the setup functions +are called. + +u32 mu_cq_get_frame(u32 mu_context); +Obtain the address of an outbound free frame for the user. + +int mu_cq_post_frame(u32 mu_context, u32 mfa); +The user can post the frame once getting the frame and put information in the +frame. + +int mu_cq_free(u32 mu_context); +Free the usage of circular queues mechanism. + +Index Registers Component: +The index register provides the mechanism to receive inbound messages. + +int mu_ir_request(u32 *mu_context); +Request of Index Register component usage. + +int mu_ir_set_callback(u32 mu_context, mu_ir_cb_t callback); +Setting up callback for inbound messages. The callback will receive the +value of the register that IAR offsets to. + +int mu_ir_free(u32 mu_context); +Free the usage of Index Registers component. + +void mu_set_irq_threshold(u32 mu_context, int thresh); +Setup the IRQ threshold before relinquish processing in IRQ space. Default +is set at 10 loops. + + +*NOTE: Example of host driver that utilize the MU can be found in the Linux I2O +driver. Specifically i2o_pci and some functions of i2o_core. The I2O driver +only utilize the circular queues mechanism. The other 3 components are simple +enough that they can be easily setup. The MU API provides no flow control for +the messaging mechanism. Flow control of the messaging needs to be established +by a higher layer of software on the IOP or the host driver. + +All Disclaimers apply. Use this at your own discretion. Neither Intel nor I +will be responsible if anything goes wrong. =) + + +TODO +____ + diff --git a/Documentation/arm/XScale/IOP3XX/pmon.txt b/Documentation/arm/XScale/IOP3XX/pmon.txt new file mode 100644 index 000000000..7978494a9 --- /dev/null +++ b/Documentation/arm/XScale/IOP3XX/pmon.txt @@ -0,0 +1,71 @@ + +Intel's XScale Microarchitecture 80312 companion processor provides a +Performance Monitoring Unit (PMON) that can be utilized to provide +information that can be useful for fine tuning of code. This text +file describes the API that's been developed for use by Linux kernel +programmers. Note that to get the most usage out of the PMON, +I highly reccomend getting the XScale reference manual from Intel[1] +and looking at chapter 12. + +To use the PMON, you must #include in your +source file. + +Since there's only one PMON, only one user can currently use the PMON +at a given time. To claim the PMON for usage, call iop310_pmon_claim() which +returns an identifier. When you are done using the PMON, call +iop310_pmon_release() with the id you were given earlier. + +The PMON consists of 14 registers that can be used for performance measurements. +By combining different statistics, you can derive complex performance metrics. + +To start the PMON, just call iop310_pmon_start(mode). Mode tells the PMON what +statistics to capture and can each be one of: + + IOP310_PMU_MODE0 + Performance Monitoring Disabled + + IOP310_PMU_MODE1 + Primary PCI bus and internal agents (bridge, dma Ch0, dam Ch1, patu) + + IOP310_PMU_MODE2 + Secondary PCI bus and internal agents (bridge, dma Ch0, dam Ch1, patu) + + IOP310_PMU_MODE3 + Secondary PCI bus and internal agents (external masters 0..2 and Intel + 80312 I/O companion chip) + + IOP310_PMU_MODE4 + Secondary PCI bus and internal agents (external masters 3..5 and Intel + 80312 I/O companion chip) + + IOP310_PMU_MODE5 + Intel 80312 I/O companion chip internal bus, DMA Channels and Application + Accelerator + + IOP310_PMU_MODE6 + Intel 80312 I/O companion chip internal bus, PATU, SATU and Intel 80200 + processor + + IOP310_PMU_MODE7 + Intel 80312 I/O companion chip internal bus, Primary PCI bus, Secondary + PCI bus and Secondary PCI agents (external masters 0..5 & Intel 80312 I/O + companion chip) + +To get the results back, call iop310_pmon_stop(&results) where results is +defined as follows: + +typedef struct _iop310_pmon_result +{ + u32 timestamp; /* Global Time Stamp Register */ + u32 timestamp_overflow; /* Time Stamp overflow count */ + u32 event_count[14]; /* Programmable Event Counter + Registers 1-14 */ + u32 event_overflow[14]; /* Overflow counter for PECR1-14 */ +} iop310_pmon_res_t; + + +-- +This code is still under development, so please feel free to send patches, +questions, comments, etc to me. + +Deepak Saxena diff --git a/Documentation/arm/XScale/cache-lock.txt b/Documentation/arm/XScale/cache-lock.txt new file mode 100644 index 000000000..9728c94f1 --- /dev/null +++ b/Documentation/arm/XScale/cache-lock.txt @@ -0,0 +1,123 @@ + +Intel's XScale Microarchitecture provides support for locking of data +and instructions into the appropriate caches. This file provides +an overview of the API that has been developed to take advantage of this +feature from kernel space. Note that there is NO support for user space +cache locking. + +For example usage of this code, grab: + + ftp://source.mvista.com/pub/xscale/cache-test.c + +If you have any questions, comments, patches, etc, please contact me. + +Deepak Saxena + +API DESCRIPTION + + +I. Header File + + #include + +II. Cache Capability Discovery + + SYNOPSIS + + int cache_query(u8 cache_type, + struct cache_capabilities *pcache); + + struct cache_capabilities + { + u32 flags; /* Flags defining capabilities */ + u32 cache_size; /* Cache size in K (1024 bytes) */ + u32 max_lock; /* Maximum lockable region in K */ + } + + /* + * Flags + */ + + /* + * Bit 0: Cache lockability + * Bits 1-31: Reserved for future use + */ + #define CACHE_LOCKABLE 0x00000001 /* Cache can be locked */ + + /* + * Cache Types + */ + #define ICACHE 0x00 + #define DCACHE 0x01 + + DESCRIPTION + + This function fills out the pcache capability identifier for the + requested cache. cache_type is either DCACHE or ICACHE. This + function is not very useful at the moment as all XScale CPU's + have the same size Cache, but is is provided for future XScale + based processors that may have larger cache sizes. + + RETURN VALUE + + This function returns 0 if no error occurs, otherwise it returns + a negative, errno compatible value. + + -EIO Unknown hardware error + +III. Cache Locking + + SYNOPSIS + + int cache_lock(void *addr, u32 len, u8 cache_type, const char *desc); + + DESCRIPTION + + This function locks a physically contigous portion of memory starting + at the virtual address pointed to by addr into the cache referenced + by cache_type. + + The address of the data/instruction that is to be locked must be + aligned on a cache line boundary (L1_CACHE_ALIGNEMENT). + + The desc parameter is an optional (pass NULL if not used) human readable + descriptor of the locked memory region that is used by the cache + management code to build the /proc/cache_locks table. + + Note that this function does not check whether the address is valid + or not before locking it into the cache. That duty is up to the + caller. Also, it does not check for duplicate or overlaping + entries. + + RETURN VALUE + + If the function is successful in locking the entry into cache, a + zero is returned. + + If an error occurs, an appropriate error value is returned. + + -EINVAL The memory address provided was not cache line aligned + -ENOMEM Could not allocate memory to complete operation + -ENOSPC Not enough space left on cache to lock in requested region + -EIO Unknown error + +III. Cache Unlocking + + SYNOPSIS + + int cache_unlock(void *addr) + + DESCRIPTION + + This function unlocks a portion of memory that was previously locked + into either the I or D cache. + + RETURN VALUE + + If the entry is cleanly unlocked from the cache, a 0 is returned. + In the case of an error, an appropriate error is returned. + + -ENOENT No entry with given address associated with this cache + -EIO Unknown error + + diff --git a/Documentation/arm/XScale/pmu.txt b/Documentation/arm/XScale/pmu.txt new file mode 100644 index 000000000..508575d65 --- /dev/null +++ b/Documentation/arm/XScale/pmu.txt @@ -0,0 +1,168 @@ + +Intel's XScale Microarchitecture processors provide a Performance +Monitoring Unit (PMU) that can be utilized to provide information +that can be useful for fine tuning of code. This text file describes +the API that's been developed for use by Linux kernel programmers. +When I have some extra time on my hand, I will extend the code to +provide support for user mode performance monitoring (which is +probably much more useful). Note that to get the most usage out +of the PMU, I highly reccomend getting the XScale reference manual +from Intel and looking at chapter 12. + +To use the PMU, you must #include in your source file. + +Since there's only one PMU, only one user can currently use the PMU +at a given time. To claim the PMU for usage, call pmu_claim() which +returns an identifier. When you are done using the PMU, call +pmu_release() with the identifier that you were given by pmu_claim. + +In addition, the PMU can only be used on XScale based systems that +provide an external timer. Systems that the PMU is currently supported +on are: + + - Cyclone IQ80310 + +Before delving into how to use the PMU code, let's do a quick overview +of the PMU itself. The PMU consists of three registers that can be +used for performance measurements. The first is the CCNT register with +provides the number of clock cycles elapsed since the PMU was started. +The next two register, PMN0 and PMN1, are eace user programmable to +provide 1 of 20 different performance statistics. By combining different +statistics, you can derive complex performance metrics. + +To start the PMU, just call pmu_start(pm0, pmn1). pmn0 and pmn1 tell +the PMU what statistics to capture and can each be one of: + +EVT_ICACHE_MISS + Instruction fetches requiring access to external memory + +EVT_ICACHE_NO_DELIVER + Instruction cache could not deliver an instruction. Either an + ICACHE miss or an instruction TLB miss. + +EVT_ICACHE_DATA_STALL + Stall in execution due to a data dependency. This counter is + incremented each cycle in which the condition is present. + +EVT_ITLB_MISS + Instruction TLB miss + +EVT_DTLB_MISS + Data TLB miss + +EVT_BRANCH + A branch instruction was executed and it may or may not have + changed program flow + +EVT_BRANCH_MISS + A branch (B or BL instructions only) was mispredicted + +EVT_INSTRUCTION + An instruction was executed + +EVT_DCACHE_FULL_STALL + Stall because data cache buffers are full. Incremented on every + cycle in which condition is present. + +EVT_DCACHE_FULL_STALL_CONTIG + Stall because data cache buffers are full. Incremented on every + cycle in which condition is contigous. + +EVT_DCACHE_ACCESS + Data cache access (data fetch) + +EVT_DCACHE_MISS + Data cache miss + +EVT_DCACHE_WRITE_BACK + Data cache write back. This counter is incremented for every + 1/2 line (four words) that are written back. + +EVT_PC_CHANGED + Software changed the PC. This is incremented only when the + software changes the PC and there is no mode change. For example, + a MOV instruction that targets the PC would increment the counter. + An SWI would not as it triggers a mode change. + +EVT_BCU_REQUEST + The Bus Control Unit(BCU) received a request from the core + +EVT_BCU_FULL + The BCU request queue if full. A high value for this event means + that the BCU is often waiting for to complete on the external bus. + +EVT_BCU_DRAIN + The BCU queues were drained due to either a Drain Write Buffer + command or an I/O transaction for a page that was marked as + uncacheable and unbufferable. + +EVT_BCU_ECC_NO_ELOG + The BCU detected an ECC error on the memory bus but noe ELOG + register was available to to log the errors. + +EVT_BCU_1_BIT_ERR + The BCU detected a 1-bit error while reading from the bus. + +EVT_RMW + An RMW cycle occurred due to narrow write on ECC protected memory. + +To get the results back, call pmu_stop(&results) where results is defined +as a struct pmu_results: + + struct pmu_results + { + u32 ccnt; /* Clock Counter Register */ + u32 ccnt_of; / + u32 pmn0; /* Performance Counter Register 0 */ + u32 pmn0_of; + u32 pmn1; /* Performance Counter Register 1 */ + u32 pmn1_of; + }; + +Pretty simple huh? Following are some examples of how to get some commonly +wanted numbers out of the PMU data. Note that since you will be dividing +things, this isn't super useful from the kernel and you need to printk the +data out to syslog. See [1] for more examples. + +Instruction Cache Efficiency + + pmu_start(EVT_INSTRUCTION, EVT_ICACHE_MISS); + ... + pmu_stop(&results); + + icache_miss_rage = results.pmn1 / results.pmn0; + cycles_per_instruction = results.ccnt / results.pmn0; + +Data Cache Efficiency + + pmu_start(EVT_DCACHE_ACCESS, EVT_DCACHE_MISS); + ... + pmu_stop(&results); + + dcache_miss_rage = results.pmn1 / results.pmn0; + +Instruction Fetch Latency + + pmu_start(EVT_ICACHE_NO_DELIVER, EVT_ICACHE_MISS); + ... + pmu_stop(&results); + + average_stall_waiting_for_instruction_fetch = + results.pmn0 / results.pmn1; + + percent_stall_cycles_due_to_instruction_fetch = + results.pmn0 / results.ccnt; + + +ToDo: + +- Add support for usermode PMU usage. This might require hooking into + the scheduler so that we pause the PMU when the task that requested + statistics is scheduled out. + +-- +This code is still under development, so please feel free to send patches, +questions, comments, etc to me. + +Deepak Saxena + diff --git a/Documentation/arm/XScale/tlb-lock.txt b/Documentation/arm/XScale/tlb-lock.txt new file mode 100644 index 000000000..1ba3e11d0 --- /dev/null +++ b/Documentation/arm/XScale/tlb-lock.txt @@ -0,0 +1,64 @@ + +Intel's XScale Microarchitecture provides support for locking of TLB +entries in both the instruction and data TLBs. This file provides +an overview of the API that has been developed to take advantage of this +feature from kernel space. Note that there is NO support for user space. + +In general, this feature should be used in conjunction with locking +data or instructions into the appropriate caches. See the file +cache-lock.txt in this directory. + +If you have any questions, comments, patches, etc, please contact me. + +Deepak Saxena + + +API DESCRIPTION + +I. Header file + + #include + +II. Locking an entry into the TLB + + SYNOPSIS + + xscale_tlb_lock(u8 tlb_type, u32 addr); + + /* + * TLB types + */ + #define ITLB 0x0 + #define DTLB 0x1 + + DESCRIPTION + + This function locks the virtual to physical mapping for virtual + address addr into the requested TLB. + + RETURN VALUE + + If the entry is properly locked into the TLB, a 0 is returned. + In case of an error, an appropriate error is returned. + + -ENOSPC No more entries left in the TLB + -EIO Unknown error + +III. Unlocking an entry from a TLB + + SYNOPSIS + + xscale_tlb_unlock(u8 tlb_type, u32 addr); + + DESCRIPTION + + This function unlocks the entry for virtual address addr from the + specified cache. + + RETURN VALUE + + If the TLB entry is properly unlocked, a 0 is returned. + In case of an error, an appropriate error is returned. + + -ENOENT No entry for given address in specified TLB + diff --git a/arch/arm/mach-omap/innovator1510.c b/arch/arm/mach-omap/innovator1510.c new file mode 100644 index 000000000..1309f9664 --- /dev/null +++ b/arch/arm/mach-omap/innovator1510.c @@ -0,0 +1,99 @@ +/* + * linux/arch/arm/mach-omap/innovator1510.c + * + * Board specific inits for OMAP-1510 Innovator + * + * Copyright (C) 2001 RidgeRun, Inc. + * Author: Greg Lonnon + * + * Copyright (C) 2002 MontaVista Software, Inc. + * + * Separated FPGA interrupts from innovator1510.c and cleaned up for 2.6 + * Copyright (C) 2004 Nokia Corporation by Tony Lindrgen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "common.h" + +extern int omap_gpio_init(void); + +void innovator_init_irq(void) +{ + omap_init_irq(); + omap_gpio_init(); + fpga_init_irq(); +} + +static struct resource smc91x_resources[] = { + [0] = { + .start = OMAP1510P1_FPGA_ETHR_START, /* Physical */ + .end = OMAP1510P1_FPGA_ETHR_START + 16, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = INT_ETHER, + .end = INT_ETHER, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device smc91x_device = { + .name = "smc91x", + .id = 0, + .num_resources = ARRAY_SIZE(smc91x_resources), + .resource = smc91x_resources, +}; + +static struct platform_device *devices[] __initdata = { + &smc91x_device, +}; + +static void __init innovator_init(void) +{ + if (!machine_is_innovator()) + return; + + (void) platform_add_devices(devices, ARRAY_SIZE(devices)); +} + +/* Only FPGA needs to be mapped here. All others are done with ioremap */ +static struct map_desc innovator_io_desc[] __initdata = { +{ OMAP1510P1_FPGA_BASE, OMAP1510P1_FPGA_START, OMAP1510P1_FPGA_SIZE, + MT_DEVICE }, +}; + +static void __init innovator_map_io(void) +{ + omap_map_io(); + iotable_init(innovator_io_desc, ARRAY_SIZE(innovator_io_desc)); + + /* Dump the Innovator FPGA rev early - useful info for support. */ + printk("Innovator FPGA Rev %d.%d Board Rev %d\n", + fpga_read(OMAP1510P1_FPGA_REV_HIGH), + fpga_read(OMAP1510P1_FPGA_REV_LOW), + fpga_read(OMAP1510P1_FPGA_BOARD_REV)); +} + +MACHINE_START(INNOVATOR, "TI-Innovator/OMAP1510") + MAINTAINER("MontaVista Software, Inc.") + BOOT_MEM(0x10000000, 0xe0000000, 0xe0000000) + BOOT_PARAMS(0x10000100) + MAPIO(innovator_map_io) + INITIRQ(innovator_init_irq) + INIT_MACHINE(innovator_init) +MACHINE_END diff --git a/arch/arm/mach-omap/innovator1610.c b/arch/arm/mach-omap/innovator1610.c new file mode 100644 index 000000000..4081735b0 --- /dev/null +++ b/arch/arm/mach-omap/innovator1610.c @@ -0,0 +1,91 @@ +/* + * linux/arch/arm/mach-omap/innovator1610.c + * + * This file contains Innovator-specific code. + * + * Copyright (C) 2002 MontaVista Software, Inc. + * + * Copyright (C) 2001 RidgeRun, Inc. + * Author: Greg Lonnon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common.h" + +void +innovator_init_irq(void) +{ + omap_init_irq(); +} + +static struct resource smc91x_resources[] = { + [0] = { + .start = OMAP1610_ETHR_START, /* Physical */ + .end = OMAP1610_ETHR_START + SZ_4K, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 0, /* Really GPIO 0 */ + .end = 0, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device smc91x_device = { + .name = "smc91x", + .id = 0, + .num_resources = ARRAY_SIZE(smc91x_resources), + .resource = smc91x_resources, +}; + +static struct platform_device *devices[] __initdata = { + &smc91x_device, +}; + +static void __init innovator_init(void) +{ + if (!machine_is_innovator()) + return; + + (void) platform_add_devices(devices, ARRAY_SIZE(devices)); +} + +static struct map_desc innovator_io_desc[] __initdata = { +{ OMAP1610_ETHR_BASE, OMAP1610_ETHR_START, OMAP1610_ETHR_SIZE,MT_DEVICE }, +{ OMAP1610_NOR_FLASH_BASE, OMAP1610_NOR_FLASH_START, OMAP1610_NOR_FLASH_SIZE, + MT_DEVICE }, +}; + +static void __init innovator_map_io(void) +{ + omap_map_io(); + iotable_init(innovator_io_desc, ARRAY_SIZE(innovator_io_desc)); +} + +MACHINE_START(INNOVATOR, "TI-Innovator/OMAP1610") + MAINTAINER("MontaVista Software, Inc.") + BOOT_MEM(0x10000000, 0xe0000000, 0xe0000000) + BOOT_PARAMS(0x10000100) + MAPIO(innovator_map_io) + INITIRQ(innovator_init_irq) + INIT_MACHINE(innovator_init) +MACHINE_END + diff --git a/arch/arm/mach-omap/irq.h b/arch/arm/mach-omap/irq.h new file mode 100644 index 000000000..8e1aa7810 --- /dev/null +++ b/arch/arm/mach-omap/irq.h @@ -0,0 +1,172 @@ +/* + * linux/arch/arm/mach-omap/irq.h + * + * OMAP specific interrupt bank definitions + * + * Copyright (C) 2004 Nokia Corporation + * Written by Tony Lindgren + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define OMAP_IRQ_TYPE710 1 +#define OMAP_IRQ_TYPE730 2 +#define OMAP_IRQ_TYPE1510 3 +#define OMAP_IRQ_TYPE1610 4 +#define OMAP_IRQ_TYPE1710 5 + +#define MAX_NR_IRQ_BANKS 4 + +#define BANK_NR_IRQS 32 + +struct omap_irq_desc { + unsigned int cpu_type; + unsigned int start_irq; + unsigned long level_map; + unsigned long base_reg; + unsigned long mask_reg; + unsigned long ack_reg; + struct irqchip *handler; +}; + +struct omap_irq_bank { + unsigned int start_irq; + unsigned long level_map; + unsigned long base_reg; + unsigned long mask_reg; + unsigned long ack_reg; + struct irqchip *handler; +}; + +static void omap_offset_ack_irq(unsigned int irq); +static void omap_offset_mask_irq(unsigned int irq); +static void omap_offset_unmask_irq(unsigned int irq); +static void omap_offset_mask_ack_irq(unsigned int irq); + +/* NOTE: These will not work if irq bank offset != 0x100 */ +#define IRQ_TO_BANK(irq) (irq >> 5) +#define IRQ_BIT(irq) (irq & 0x1f) +#define BANK_OFFSET(bank) ((bank - 1) * 0x100) + +static struct irqchip omap_offset_irq = { + .ack = omap_offset_mask_ack_irq, + .mask = omap_offset_mask_irq, + .unmask = omap_offset_unmask_irq, +}; + +/* + * OMAP-730 interrupt banks + */ +static struct omap_irq_desc omap730_bank0_irqs __initdata = { + .cpu_type = OMAP_IRQ_TYPE730, + .start_irq = 0, + .level_map = 0xb3f8e22f, + .base_reg = OMAP_IH1_BASE, + .mask_reg = OMAP_IH1_BASE + IRQ_MIR, + .ack_reg = OMAP_IH1_BASE + IRQ_CONTROL_REG, + .handler = &omap_offset_irq, /* IH2 regs at 0x100 offsets */ +}; + +static struct omap_irq_desc omap730_bank1_irqs __initdata = { + .cpu_type = OMAP_IRQ_TYPE730, + .start_irq = 32, + .level_map = 0xfdb9c1f2, + .base_reg = OMAP_IH2_BASE, + .mask_reg = OMAP_IH2_BASE + IRQ_MIR, + .ack_reg = OMAP_IH2_BASE + IRQ_CONTROL_REG, + .handler = &omap_offset_irq, /* IH2 regs at 0x100 offsets */ +}; + +static struct omap_irq_desc omap730_bank2_irqs __initdata = { + .cpu_type = OMAP_IRQ_TYPE730, + .start_irq = 64, + .level_map = 0x800040f3, + .base_reg = OMAP_IH2_BASE + 0x100, + .mask_reg = OMAP_IH2_BASE + 0x100 + IRQ_MIR, + .ack_reg = OMAP_IH2_BASE + IRQ_CONTROL_REG, /* Not replicated */ + .handler = &omap_offset_irq, /* IH2 regs at 0x100 offsets */ +}; + +/* + * OMAP-1510 interrupt banks + */ +static struct omap_irq_desc omap1510_bank0_irqs __initdata = { + .cpu_type = OMAP_IRQ_TYPE1510, + .start_irq = 0, + .level_map = 0xb3febfff, + .base_reg = OMAP_IH1_BASE, + .mask_reg = OMAP_IH1_BASE + IRQ_MIR, + .ack_reg = OMAP_IH1_BASE + IRQ_CONTROL_REG, + .handler = &omap_offset_irq, /* IH2 regs at 0x100 offsets */ +}; + +static struct omap_irq_desc omap1510_bank1_irqs __initdata = { + .cpu_type = OMAP_IRQ_TYPE1510, + .start_irq = 32, + .level_map = 0xffbfffed, + .base_reg = OMAP_IH2_BASE, + .mask_reg = OMAP_IH2_BASE + IRQ_MIR, + .ack_reg = OMAP_IH2_BASE + IRQ_CONTROL_REG, + .handler = &omap_offset_irq, /* IH2 regs at 0x100 offsets */ +}; + +/* + * OMAP-1610 interrupt banks + */ +static struct omap_irq_desc omap1610_bank0_irqs __initdata = { + .cpu_type = OMAP_IRQ_TYPE1610, + .start_irq = 0, + .level_map = 0xb3fefe8f, + .base_reg = OMAP_IH1_BASE, + .mask_reg = OMAP_IH1_BASE + IRQ_MIR, + .ack_reg = OMAP_IH1_BASE + IRQ_CONTROL_REG, + .handler = &omap_offset_irq, /* IH2 regs at 0x100 offsets */ +}; + +static struct omap_irq_desc omap1610_bank1_irqs __initdata = { + .cpu_type = OMAP_IRQ_TYPE1610, + .start_irq = 32, + .level_map = 0xfffff7ff, + .base_reg = OMAP_IH2_BASE, + .mask_reg = OMAP_IH2_BASE + IRQ_MIR, + .ack_reg = OMAP_IH2_BASE + IRQ_CONTROL_REG, + .handler = &omap_offset_irq, /* IH2 regs at 0x100 offsets */ +}; + +static struct omap_irq_desc omap1610_bank2_irqs __initdata = { + .cpu_type = OMAP_IRQ_TYPE1610, + .start_irq = 64, + .level_map = 0xffffffff, + .base_reg = OMAP_IH2_BASE + 0x100, + .mask_reg = OMAP_IH2_BASE + 0x100 + IRQ_MIR, + .ack_reg = OMAP_IH2_BASE + IRQ_CONTROL_REG, /* Not replicated */ + .handler = &omap_offset_irq, /* IH2 regs at 0x100 offsets */ +}; + +static struct omap_irq_desc omap1610_bank3_irqs __initdata = { + .cpu_type = OMAP_IRQ_TYPE1610, + .start_irq = 96, + .level_map = 0xffffffff, + .base_reg = OMAP_IH2_BASE + 0x200, + .mask_reg = OMAP_IH2_BASE + 0x200 + IRQ_MIR, + .ack_reg = OMAP_IH2_BASE + IRQ_CONTROL_REG, /* Not replicated */ + .handler = &omap_offset_irq, /* IH2 regs at 0x100 offsets */ +}; diff --git a/arch/arm/mach-omap/omap-generic.c b/arch/arm/mach-omap/omap-generic.c new file mode 100644 index 000000000..982830dcd --- /dev/null +++ b/arch/arm/mach-omap/omap-generic.c @@ -0,0 +1,77 @@ +/* + * linux/arch/arm/mach-omap/generic.c + * + * Modified from innovator.c + * + * Code for generic OMAP board. Should work on many OMAP systems where + * the device drivers take care of all the necessary hardware initialization. + * Do not put any board specific code to this file; create a new machine + * type if you need custom low-level initializations. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "common.h" + +static void __init omap_generic_init_irq(void) +{ + omap_init_irq(); +} + +/* + * Muxes the serial ports on + */ +static void __init omap_early_serial_init(void) +{ + omap_cfg_reg(UART1_TX); + omap_cfg_reg(UART1_RTS); + + omap_cfg_reg(UART2_TX); + omap_cfg_reg(UART2_RTS); + + omap_cfg_reg(UART3_TX); + omap_cfg_reg(UART3_RX); +} + +static void __init omap_generic_init(void) +{ + if (!machine_is_omap_generic()) + return; + + /* + * Make sure the serial ports are muxed on at this point. + * You have to mux them off in device drivers later on + * if not needed. + */ + if (cpu_is_omap1510()) { + omap_early_serial_init(); + } +} + +static void __init omap_generic_map_io(void) +{ + omap_map_io(); +} + +MACHINE_START(OMAP_GENERIC, "Generic OMAP-1510/1610") + MAINTAINER("Tony Lindgren ") + BOOT_MEM(0x10000000, 0xe0000000, 0xe0000000) + BOOT_PARAMS(0x10000100) + MAPIO(omap_generic_map_io) + INITIRQ(omap_generic_init_irq) + INIT_MACHINE(omap_generic_init) +MACHINE_END diff --git a/arch/arm/mach-omap/omap-perseus2.c b/arch/arm/mach-omap/omap-perseus2.c new file mode 100644 index 000000000..ec05093c9 --- /dev/null +++ b/arch/arm/mach-omap/omap-perseus2.c @@ -0,0 +1,116 @@ +/* + * linux/arch/arm/mach-omap/omap-perseus2.c + * + * Modified from omap-generic.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#include "common.h" + +void omap_perseus2_init_irq(void) +{ + omap_init_irq(); +} + +static struct resource smc91x_resources[] = { + [0] = { + .start = OMAP730_FPGA_ETHR_START, /* Physical */ + .end = OMAP730_FPGA_ETHR_START + SZ_4K, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 0, + .end = 0, + .flags = INT_ETHER, + }, +}; + +static struct platform_device smc91x_device = { + .name = "smc91x", + .id = 0, + .num_resources = ARRAY_SIZE(smc91x_resources), + .resource = smc91x_resources, +}; + +static struct platform_device *devices[] __initdata = { + &smc91x_device, +}; + +static void __init omap_perseus2_init(void) +{ + if (!machine_is_omap_perseus2()) + return; + + (void) platform_add_devices(devices, ARRAY_SIZE(devices)); +} + +/* Only FPGA needs to be mapped here. All others are done with ioremap */ +static struct map_desc omap_perseus2_io_desc[] __initdata = { + {OMAP730_FPGA_BASE, OMAP730_FPGA_START, OMAP730_FPGA_SIZE, + MT_DEVICE}, +}; + +static void __init omap_perseus2_map_io(void) +{ + omap_map_io(); + iotable_init(omap_perseus2_io_desc, + ARRAY_SIZE(omap_perseus2_io_desc)); + + /* Early, board-dependent init */ + + /* + * Hold GSM Reset until needed + */ + *DSP_M_CTL &= ~1; + + /* + * UARTs -> done automagically by 8250 driver + */ + + /* + * CSx timings, GPIO Mux ... setup + */ + + /* Flash: CS0 timings setup */ + *((volatile __u32 *) OMAP_FLASH_CFG_0) = 0x0000fff3; + *((volatile __u32 *) OMAP_FLASH_ACFG_0) = 0x00000088; + + /* + * Ethernet support trough the debug board + * CS1 timings setup + */ + *((volatile __u32 *) OMAP_FLASH_CFG_1) = 0x0000fff3; + *((volatile __u32 *) OMAP_FLASH_ACFG_1) = 0x00000000; + + /* + * Configure MPU_EXT_NIRQ IO in IO_CONF9 register, + * It is used as the Ethernet controller interrupt + */ + *((volatile __u32 *) PERSEUS2_IO_CONF_9) &= 0x1FFFFFFF; +} + +MACHINE_START(OMAP_PERSEUS2, "OMAP730 Perseus2") + MAINTAINER("Kevin Hilman ") + BOOT_MEM(0x10000000, 0xe0000000, 0xe0000000) + BOOT_PARAMS(0x10000100) + MAPIO(omap_perseus2_map_io) + INITIRQ(omap_perseus2_init_irq) + INIT_MACHINE(omap_perseus2_init) +MACHINE_END diff --git a/arch/i386/mach-es7000/es7000.c b/arch/i386/mach-es7000/es7000.c new file mode 100644 index 000000000..defe41e6c --- /dev/null +++ b/arch/i386/mach-es7000/es7000.c @@ -0,0 +1,279 @@ +/* + * Written by: Garry Forsgren, Unisys Corporation + * Natalie Protasevich, Unisys Corporation + * This file contains the code to configure and interface + * with Unisys ES7000 series hardware system manager. + * + * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Unisys Corporation, Township Line & Union Meeting + * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: + * + * http://www.unisys.com + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "es7000.h" + +/* + * ES7000 Globals + */ + +volatile unsigned long *psai = NULL; +struct mip_reg *mip_reg; +struct mip_reg *host_reg; +int mip_port; +unsigned long mip_addr, host_addr; + +/* + * Parse the OEM Table + */ + +void __init +parse_unisys_oem (char *oemptr, int oem_entries) +{ + int i; + int success = 0; + unsigned char type, size; + unsigned long val; + char *tp = NULL; + struct psai *psaip = NULL; + struct mip_reg_info *mi; + struct mip_reg *host, *mip; + + tp = oemptr; + + tp += 8; + + for (i=0; i <= oem_entries; i++) { + type = *tp++; + size = *tp++; + tp -= 2; + switch (type) { + case MIP_REG: + mi = (struct mip_reg_info *)tp; + val = MIP_RD_LO(mi->host_reg); + host_addr = val; + host = (struct mip_reg *)val; + host_reg = __va(host); + val = MIP_RD_LO(mi->mip_reg); + mip_addr = val; + mip = (struct mip_reg *)val; + mip_reg = __va(mip); + Dprintk("es7000_mipcfg: host_reg = 0x%lx \n", + (unsigned long)host_reg); + Dprintk("es7000_mipcfg: mip_reg = 0x%lx \n", + (unsigned long)mip_reg); + success++; + break; + case MIP_PSAI_REG: + psaip = (struct psai *)tp; + if (tp != NULL) { + if (psaip->addr) + psai = __va(psaip->addr); + else + psai = NULL; + success++; + } + break; + default: + break; + } + if (i == 6) break; + tp += size; + } + + if (success < 2) { + printk("\nNo ES7000 found.\n"); + es7000_plat = 0; + } else { + printk("\nEnabling ES7000 specific features...\n"); + es7000_plat = 1; + } + return; +} + +int __init +find_unisys_acpi_oem_table(unsigned long *oem_addr, int *length) +{ + struct acpi_table_rsdp *rsdp = NULL; + unsigned long rsdp_phys = 0; + struct acpi_table_header *header = NULL; + int i; + struct acpi_table_sdt sdt; + + rsdp_phys = acpi_find_rsdp(); + rsdp = __va(rsdp_phys); + if (rsdp->rsdt_address) { + struct acpi_table_rsdt *mapped_rsdt = NULL; + sdt.pa = rsdp->rsdt_address; + + header = (struct acpi_table_header *) + __acpi_map_table(sdt.pa, sizeof(struct acpi_table_header)); + if (!header) + return -ENODEV; + + sdt.count = (header->length - sizeof(struct acpi_table_header)) >> 3; + mapped_rsdt = (struct acpi_table_rsdt *) + __acpi_map_table(sdt.pa, header->length); + if (!mapped_rsdt) + return -ENODEV; + + header = &mapped_rsdt->header; + + for (i = 0; i < sdt.count; i++) + sdt.entry[i].pa = (unsigned long) mapped_rsdt->entry[i]; + }; + for (i = 0; i < sdt.count; i++) { + + header = (struct acpi_table_header *) + __acpi_map_table(sdt.entry[i].pa, + sizeof(struct acpi_table_header)); + if (!header) + continue; + if (!strncmp((char *) &header->signature, "OEM1", 4)) { + if (!strncmp((char *) &header->oem_id, "UNISYS", 6)) { + void *addr; + struct oem_table *t; + acpi_table_print(header, sdt.entry[i].pa); + t = (struct oem_table *) __acpi_map_table(sdt.entry[i].pa, header->length); + addr = (void *) __acpi_map_table(t->OEMTableAddr, t->OEMTableSize); + *length = header->length; + *oem_addr = (unsigned long) addr; + return 0; + } + } + } + printk("ES7000: did not find Unisys ACPI OEM table!\n"); + return -1; +} + +static void +es7000_spin(int n) +{ + int i = 0; + + while (i++ < n) + rep_nop(); +} + +static int __init +es7000_mip_write(struct mip_reg *mip_reg) +{ + int status = 0; + int spin; + + spin = MIP_SPIN; + while (((unsigned long long)host_reg->off_38 & + (unsigned long long)MIP_VALID) != 0) { + if (--spin <= 0) { + printk("es7000_mip_write: Timeout waiting for Host Valid Flag"); + return -1; + } + es7000_spin(MIP_SPIN); + } + + memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); + outb(1, mip_port); + + spin = MIP_SPIN; + + while (((unsigned long long)mip_reg->off_38 & + (unsigned long long)MIP_VALID) == 0) { + if (--spin <= 0) { + printk("es7000_mip_write: Timeout waiting for MIP Valid Flag"); + return -1; + } + es7000_spin(MIP_SPIN); + } + + status = ((unsigned long long)mip_reg->off_0 & + (unsigned long long)0xffff0000000000) >> 48; + mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 & + (unsigned long long)~MIP_VALID); + return status; +} + +int +es7000_start_cpu(int cpu, unsigned long eip) +{ + unsigned long vect = 0, psaival = 0; + + if (psai == NULL) + return -1; + + vect = ((unsigned long)__pa(eip)/0x1000) << 16; + psaival = (0x1000000 | vect | cpu); + + while (*psai & 0x1000000) + ; + + *psai = psaival; + + return 0; + +} + +int +es7000_stop_cpu(int cpu) +{ + int startup; + + if (psai == NULL) + return -1; + + startup= (0x1000000 | cpu); + + while ((*psai & 0xff00ffff) != startup) + ; + + startup = (*psai & 0xff0000) >> 16; + *psai &= 0xffffff; + + return 0; + +} + +void __init +es7000_sw_apic() +{ + if (es7000_plat) { + int mip_status; + struct mip_reg es7000_mip_reg; + + printk("ES7000: Enabling APIC mode.\n"); + memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); + es7000_mip_reg.off_0 = MIP_SW_APIC; + es7000_mip_reg.off_38 = (MIP_VALID); + while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) + printk("es7000_sw_apic: command failed, status = %x\n", + mip_status); + return; + } +} diff --git a/arch/i386/mach-es7000/setup.c b/arch/i386/mach-es7000/setup.c new file mode 100644 index 000000000..4caed0e43 --- /dev/null +++ b/arch/i386/mach-es7000/setup.c @@ -0,0 +1,106 @@ +/* + * Machine specific setup for es7000 + */ + +#include +#include +#include +#include +#include +#include +#include + +/** + * pre_intr_init_hook - initialisation prior to setting up interrupt vectors + * + * Description: + * Perform any necessary interrupt initialisation prior to setting up + * the "ordinary" interrupt call gates. For legacy reasons, the ISA + * interrupts should be initialised here if the machine emulates a PC + * in any way. + **/void __init pre_intr_init_hook(void) +{ + init_ISA_irqs(); +} + +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL}; + +/** + * intr_init_hook - post gate setup interrupt initialisation + * + * Description: + * Fill in any interrupts that may have been left out by the general + * init_IRQ() routine. interrupts having to do with the machine rather + * than the devices on the I/O bus (like APIC interrupts in intel MP + * systems) are started here. + **/ +void __init intr_init_hook(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + apic_intr_init(); +#endif + + if (!acpi_ioapic) + setup_irq(2, &irq2); +} + +/** + * pre_setup_arch_hook - hook called prior to any setup_arch() execution + * + * Description: + * generally used to activate any machine specific identification + * routines that may be needed before setup_arch() runs. On VISWS + * this is used to get the board revision and type. + **/ +void __init pre_setup_arch_hook(void) +{ +} + +/** + * trap_init_hook - initialise system specific traps + * + * Description: + * Called as the final act of trap_init(). Used in VISWS to initialise + * the various board specific APIC traps. + **/ +void __init trap_init_hook(void) +{ +} + +static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL}; + +/** + * time_init_hook - do any specific initialisations for the system timer. + * + * Description: + * Must plug the system timer interrupt source at HZ into the IRQ listed + * in irq_vectors.h:TIMER_IRQ + **/ +void __init time_init_hook(void) +{ + setup_irq(0, &irq0); +} + +#ifdef CONFIG_MCA +/** + * mca_nmi_hook - hook into MCA specific NMI chain + * + * Description: + * The MCA (Microchannel Arcitecture) has an NMI chain for NMI sources + * along the MCA bus. Use this to hook into that chain if you will need + * it. + **/ +void __init mca_nmi_hook(void) +{ + /* If I recall correctly, there's a whole bunch of other things that + * we can do to check for NMI problems, but that's all I know about + * at the moment. + */ + + printk("NMI generated from unknown source!\n"); +} + +#endif diff --git a/arch/i386/mach-es7000/topology.c b/arch/i386/mach-es7000/topology.c new file mode 100644 index 000000000..e96d8910a --- /dev/null +++ b/arch/i386/mach-es7000/topology.c @@ -0,0 +1,64 @@ +/* + * arch/i386/mach-generic/topology.c - Populate driverfs with topology information + * + * Written by: Matthew Dobson, IBM Corporation + * Original Code: Paul Dorwin, IBM Corporation, Patrick Mochel, OSDL + * + * Copyright (C) 2002, IBM Corp. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send feedback to + */ +#include +#include +#include + +struct i386_cpu cpu_devices[NR_CPUS]; + +#ifdef CONFIG_NUMA +#include +#include + +struct i386_node node_devices[MAX_NUMNODES]; + +static int __init topology_init(void) +{ + int i; + + for (i = 0; i < num_online_nodes(); i++) + arch_register_node(i); + for (i = 0; i < NR_CPUS; i++) + if (cpu_possible(i)) arch_register_cpu(i); + return 0; +} + +#else /* !CONFIG_NUMA */ + +static int __init topology_init(void) +{ + int i; + + for (i = 0; i < NR_CPUS; i++) + if (cpu_possible(i)) arch_register_cpu(i); + return 0; +} + +#endif /* CONFIG_NUMA */ + +subsys_initcall(topology_init); diff --git a/arch/ia64/kernel/perfmon_hpsim.h b/arch/ia64/kernel/perfmon_hpsim.h new file mode 100644 index 000000000..9c6fe7fc1 --- /dev/null +++ b/arch/ia64/kernel/perfmon_hpsim.h @@ -0,0 +1,75 @@ +/* + * This file contains the HP SKI Simulator PMU register description tables + * and pmc checkers used by perfmon.c. + * + * Copyright (C) 2002-2003 Hewlett Packard Co + * Stephane Eranian + * + * File mostly contributed by Ian Wienand + * + * This file is included as a dummy template so the kernel does not + * try to initalize registers the simulator can't handle. + * + * Note the simulator does not (currently) implement these registers, i.e., + * they do not count anything. But you can read/write them. + */ + +#define RDEP(x) (1UL<<(x)) + +#ifndef CONFIG_IA64_HP_SIM +#error "This file should only be included for the HP Simulator" +#endif + +static pfm_reg_desc_t pfm_hpsim_pmc_desc[PMU_MAX_PMCS]={ +/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL, 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL, 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL, 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL, 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc8 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc9 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc10 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(10), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc11 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(11), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc12 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(12), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc13 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(13), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc14 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(14), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc15 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(15), 0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static pfm_reg_desc_t pfm_hpsim_pmd_desc[PMU_MAX_PMDS]={ +/* pmd0 */ { PFM_REG_BUFFER, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmd1 */ { PFM_REG_BUFFER, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmd2 */ { PFM_REG_BUFFER, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmd3 */ { PFM_REG_BUFFER, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, +/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, +/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, +/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, +/* pmd8 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(8),0UL, 0UL, 0UL}}, +/* pmd9 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(9),0UL, 0UL, 0UL}}, +/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(13),0UL, 0UL, 0UL}}, +/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(14),0UL, 0UL, 0UL}}, +/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(15),0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static pmu_config_t pmu_conf={ + .pmu_name = "hpsim", + .pmu_family = 0x7, /* ski emulator reports as Itanium */ + .enabled = 0, + .ovfl_val = (1UL << 32) - 1, + .num_ibrs = 0, /* does not use */ + .num_dbrs = 0, /* does not use */ + .pmd_desc = pfm_hpsim_pmd_desc, + .pmc_desc = pfm_hpsim_pmc_desc +}; diff --git a/arch/ppc/mm/cachemap.c b/arch/ppc/mm/cachemap.c new file mode 100644 index 000000000..2033eec9b --- /dev/null +++ b/arch/ppc/mm/cachemap.c @@ -0,0 +1,174 @@ +/* + * PowerPC version derived from arch/arm/mm/consistent.c + * Copyright (C) 2001 Dan Malek (dmalek@jlc.net) + * + * arch/ppc/mm/cachemap.c + * + * Copyright (C) 2000 Russell King + * + * Consistent memory allocators. Used for DMA devices that want to + * share uncached memory with the processor core. The function return + * is the virtual address and 'dma_handle' is the physical address. + * Mostly stolen from the ARM port, with some changes for PowerPC. + * -- Dan + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int map_page(unsigned long va, phys_addr_t pa, int flags); + +/* This function will allocate the requested contiguous pages and + * map them into the kernel's vmalloc() space. This is done so we + * get unique mapping for these pages, outside of the kernel's 1:1 + * virtual:physical mapping. This is necessary so we can cover large + * portions of the kernel with single large page TLB entries, and + * still get unique uncached pages for consistent DMA. + */ +void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle) +{ + int order, err; + struct page *page, *free, *end; + phys_addr_t pa; + unsigned long flags, offset; + struct vm_struct *area = NULL; + unsigned long va = 0; + + BUG_ON(in_interrupt()); + + /* Only allocate page size areas */ + size = PAGE_ALIGN(size); + order = get_order(size); + + free = page = alloc_pages(gfp, order); + if (! page) + return NULL; + + pa = page_to_phys(page); + *dma_handle = page_to_bus(page); + end = page + (1 << order); + + /* + * we need to ensure that there are no cachelines in use, + * or worse dirty in this area. + */ + invalidate_dcache_range((unsigned long)page_address(page), + (unsigned long)page_address(page) + size); + + /* + * alloc_pages() expects the block to be handled as a unit, so + * it only sets the page count on the first page. We set the + * counts on each page so they can be freed individually + */ + for (; page < end; page++) + set_page_count(page, 1); + + + /* Allocate some common virtual space to map the new pages*/ + area = get_vm_area(size, VM_ALLOC); + if (! area) + goto out; + + va = (unsigned long) area->addr; + + flags = _PAGE_KERNEL | _PAGE_NO_CACHE; + + for (offset = 0; offset < size; offset += PAGE_SIZE) { + err = map_page(va+offset, pa+offset, flags); + if (err) { + vfree((void *)va); + va = 0; + goto out; + } + + free++; + } + + out: + /* Free pages which weren't mapped */ + for (; free < end; free++) { + __free_page(free); + } + + return (void *)va; +} + +/* + * free page(s) as defined by the above mapping. + */ +void consistent_free(void *vaddr) +{ + BUG_ON(in_interrupt()); + vfree(vaddr); +} + +/* + * make an area consistent. + */ +void consistent_sync(void *vaddr, size_t size, int direction) +{ + unsigned long start = (unsigned long)vaddr; + unsigned long end = start + size; + + switch (direction) { + case DMA_NONE: + BUG(); + case DMA_FROM_DEVICE: /* invalidate only */ + invalidate_dcache_range(start, end); + break; + case DMA_TO_DEVICE: /* writeback only */ + clean_dcache_range(start, end); + break; + case DMA_BIDIRECTIONAL: /* writeback and invalidate */ + flush_dcache_range(start, end); + break; + } +} + +/* + * consistent_sync_page make a page are consistent. identical + * to consistent_sync, but takes a struct page instead of a virtual address + */ + +void consistent_sync_page(struct page *page, unsigned long offset, + size_t size, int direction) +{ + unsigned long start; + + start = (unsigned long)page_address(page) + offset; + consistent_sync((void *)start, size, direction); +} + +EXPORT_SYMBOL(consistent_sync_page); diff --git a/arch/ppc/ocp/Makefile b/arch/ppc/ocp/Makefile new file mode 100644 index 000000000..f669ee042 --- /dev/null +++ b/arch/ppc/ocp/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for the linux kernel. +# + +obj-y := ocp.o ocp-driver.o ocp-probe.o + diff --git a/arch/ppc/ocp/ocp-driver.c b/arch/ppc/ocp/ocp-driver.c new file mode 100644 index 000000000..9f6bb3f42 --- /dev/null +++ b/arch/ppc/ocp/ocp-driver.c @@ -0,0 +1,195 @@ +/* + * FILE NAME: ocp-driver.c + * + * BRIEF MODULE DESCRIPTION: + * driver callback, id matching and registration + * Based on drivers/pci/pci-driver, Copyright (c) 1997--1999 Martin Mares + * + * Maintained by: Armin + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include + +/* + * Registration of OCP drivers and handling of hot-pluggable devices. + */ + +static int +ocp_device_probe(struct device *dev) +{ + int error = 0; + struct ocp_driver *drv; + struct ocp_device *ocp_dev; + + drv = to_ocp_driver(dev->driver); + ocp_dev = to_ocp_dev(dev); + + if (drv->probe) { + error = drv->probe(ocp_dev); + DBG("probe return code %d\n", error); + if (error >= 0) { + ocp_dev->driver = drv; + error = 0; + } + } + return error; +} + +static int +ocp_device_remove(struct device *dev) +{ + struct ocp_device *ocp_dev = to_ocp_dev(dev); + + if (ocp_dev->driver) { + if (ocp_dev->driver->remove) + ocp_dev->driver->remove(ocp_dev); + ocp_dev->driver = NULL; + } + return 0; +} + +static int +ocp_device_suspend(struct device *dev, u32 state, u32 level) +{ + struct ocp_device *ocp_dev = to_ocp_dev(dev); + + int error = 0; + + if (ocp_dev->driver) { + if (level == SUSPEND_SAVE_STATE && ocp_dev->driver->save_state) + error = ocp_dev->driver->save_state(ocp_dev, state); + else if (level == SUSPEND_POWER_DOWN + && ocp_dev->driver->suspend) + error = ocp_dev->driver->suspend(ocp_dev, state); + } + return error; +} + +static int +ocp_device_resume(struct device *dev, u32 level) +{ + struct ocp_device *ocp_dev = to_ocp_dev(dev); + + if (ocp_dev->driver) { + if (level == RESUME_POWER_ON && ocp_dev->driver->resume) + ocp_dev->driver->resume(ocp_dev); + } + return 0; +} + +/** + * ocp_bus_match - Works out whether an OCP device matches any + * of the IDs listed for a given OCP driver. + * @dev: the generic device struct for the OCP device + * @drv: the generic driver struct for the OCP driver + * + * Used by a driver to check whether a OCP device present in the + * system is in its list of supported devices. Returns 1 for a + * match, or 0 if there is no match. + */ +static int +ocp_bus_match(struct device *dev, struct device_driver *drv) +{ + struct ocp_device *ocp_dev = to_ocp_dev(dev); + struct ocp_driver *ocp_drv = to_ocp_driver(drv); + const struct ocp_device_id *ids = ocp_drv->id_table; + + if (!ids) + return 0; + + while (ids->vendor || ids->device) { + if ((ids->vendor == OCP_ANY_ID + || ids->vendor == ocp_dev->vendor) + && (ids->device == OCP_ANY_ID + || ids->device == ocp_dev->device)) { + DBG("Bus match -vendor:%x device:%x\n", ids->vendor, + ids->device); + return 1; + } + ids++; + } + return 0; +} + +struct bus_type ocp_bus_type = { + .name = "ocp", + .match = ocp_bus_match, +}; + +static int __init +ocp_driver_init(void) +{ + return bus_register(&ocp_bus_type); +} + +postcore_initcall(ocp_driver_init); + +/** + * ocp_register_driver - register a new ocp driver + * @drv: the driver structure to register + * + * Adds the driver structure to the list of registered drivers + * Returns the number of ocp devices which were claimed by the driver + * during registration. The driver remains registered even if the + * return value is zero. + */ +int +ocp_register_driver(struct ocp_driver *drv) +{ + int count = 0; + + /* initialize common driver fields */ + drv->driver.name = drv->name; + drv->driver.bus = &ocp_bus_type; + drv->driver.probe = ocp_device_probe; + drv->driver.resume = ocp_device_resume; + drv->driver.suspend = ocp_device_suspend; + drv->driver.remove = ocp_device_remove; + + /* register with core */ + count = driver_register(&drv->driver); + return count ? count : 1; +} + +/** + * ocp_unregister_driver - unregister a ocp driver + * @drv: the driver structure to unregister + * + * Deletes the driver structure from the list of registered OCP drivers, + * gives it a chance to clean up by calling its remove() function for + * each device it was responsible for, and marks those devices as + * driverless. + */ + +void +ocp_unregister_driver(struct ocp_driver *drv) +{ + driver_unregister(&drv->driver); +} + +EXPORT_SYMBOL(ocp_register_driver); +EXPORT_SYMBOL(ocp_unregister_driver); +EXPORT_SYMBOL(ocp_bus_type); diff --git a/arch/ppc/ocp/ocp-probe.c b/arch/ppc/ocp/ocp-probe.c new file mode 100644 index 000000000..bb4aff7a6 --- /dev/null +++ b/arch/ppc/ocp/ocp-probe.c @@ -0,0 +1,113 @@ +/* + * FILE NAME: ocp-probe.c + * + * BRIEF MODULE DESCRIPTION: + * Device scanning & bus set routines + * Based on drivers/pci/probe, Copyright (c) 1997--1999 Martin Mares + * + * Maintained by: Armin + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include + +LIST_HEAD(ocp_devices); +struct device *ocp_bus; + +static struct ocp_device * __devinit +ocp_setup_dev(struct ocp_def *odef, unsigned int index) +{ + struct ocp_device *dev; + + dev = kmalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return NULL; + memset(dev, 0, sizeof(*dev)); + + dev->vendor = odef->vendor; + dev->device = odef->device; + dev->num = ocp_get_num(dev->device); + dev->paddr = odef->paddr; + dev->irq = odef->irq; + dev->pm = odef->pm; + dev->current_state = 4; + + sprintf(dev->name, "OCP device %04x:%04x", dev->vendor, dev->device); + + DBG("%s %s 0x%lx irq:%d pm:0x%lx \n", dev->slot_name, dev->name, + (unsigned long) dev->paddr, dev->irq, dev->pm); + + /* now put in global tree */ + sprintf(dev->dev.bus_id, "%d", index); + dev->dev.parent = ocp_bus; + dev->dev.bus = &ocp_bus_type; + device_register(&dev->dev); + + return dev; +} + +static struct device * __devinit ocp_alloc_primary_bus(void) +{ + struct device *b; + + b = kmalloc(sizeof(struct device), GFP_KERNEL); + if (b == NULL) + return NULL; + memset(b, 0, sizeof(struct device)); + strcpy(b->bus_id, "ocp"); + + device_register(b); + + return b; +} + +void __devinit ocp_setup_devices(struct ocp_def *odef) +{ + int index; + struct ocp_device *dev; + + if (ocp_bus == NULL) + ocp_bus = ocp_alloc_primary_bus(); + for (index = 0; odef->vendor != OCP_VENDOR_INVALID; ++index, ++odef) { + dev = ocp_setup_dev(odef, index); + if (dev != NULL) + list_add_tail(&dev->global_list, &ocp_devices); + } +} + +extern struct ocp_def core_ocp[]; + +static int __init +ocparch_init(void) +{ + ocp_setup_devices(core_ocp); + return 0; +} + +subsys_initcall(ocparch_init); + +EXPORT_SYMBOL(ocp_devices); diff --git a/arch/ppc/ocp/ocp.c b/arch/ppc/ocp/ocp.c new file mode 100644 index 000000000..8df60d79f --- /dev/null +++ b/arch/ppc/ocp/ocp.c @@ -0,0 +1,109 @@ +/* + * ocp.c + * + * The is drived from pci.c + * + * Current Maintainer + * Armin Kuster akuster@dslextreme.com + * Jan, 2002 + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * ocp_get_num - This determines how many OCP devices of a given + * device are registered + * @device: OCP device such as HOST, PCI, GPT, UART, OPB, IIC, GPIO, EMAC, ZMII, + * + * The routine returns the number that devices which is registered + */ +unsigned int ocp_get_num(unsigned int device) +{ + unsigned int count = 0; + struct ocp_device *ocp; + struct list_head *ocp_l; + + list_for_each(ocp_l, &ocp_devices) { + ocp = list_entry(ocp_l, struct ocp_device, global_list); + if (device == ocp->device) + count++; + } + return count; +} + +/** + * ocp_get_dev - get ocp driver pointer for ocp device and instance of it + * @device: OCP device such as PCI, GPT, UART, OPB, IIC, GPIO, EMAC, ZMII + * @dev_num: ocp device number whos paddr you want + * + * The routine returns ocp device pointer + * in list based on device and instance of that device + * + */ +struct ocp_device * +ocp_get_dev(unsigned int device, int dev_num) +{ + struct ocp_device *ocp; + struct list_head *ocp_l; + int count = 0; + + list_for_each(ocp_l, &ocp_devices) { + ocp = list_entry(ocp_l, struct ocp_device, global_list); + if (device == ocp->device) { + if (dev_num == count) + return ocp; + count++; + } + } + return NULL; +} + +EXPORT_SYMBOL(ocp_get_dev); +EXPORT_SYMBOL(ocp_get_num); + +#ifdef CONFIG_PM +int ocp_generic_suspend(struct ocp_device *pdev, u32 state) +{ + ocp_force_power_off(pdev); + return 0; +} + +int ocp_generic_resume(struct ocp_device *pdev) +{ + ocp_force_power_on(pdev); +} + +EXPORT_SYMBOL(ocp_generic_suspend); +EXPORT_SYMBOL(ocp_generic_resume); +#endif /* CONFIG_PM */ diff --git a/drivers/char/dz.c b/drivers/char/dz.c new file mode 100644 index 000000000..23630030e --- /dev/null +++ b/drivers/char/dz.c @@ -0,0 +1,1540 @@ +/* + * dz.c: Serial port driver for DECStations equiped + * with the DZ chipset. + * + * Copyright (C) 1998 Olivier A. D. Lebaillif + * + * Email: olivier.lebaillif@ifrsys.com + * + * [31-AUG-98] triemer + * Changed IRQ to use Harald's dec internals interrupts.h + * removed base_addr code - moving address assignment to setup.c + * Changed name of dz_init to rs_init to be consistent with tc code + * [13-NOV-98] triemer fixed code to receive characters + * after patches by harald to irq code. + * [09-JAN-99] triemer minor fix for schedule - due to removal of timeout + * field from "current" - somewhere between 2.1.121 and 2.1.131 +Qua Jun 27 15:02:26 BRT 2001 + * [27-JUN-2001] Arnaldo Carvalho de Melo - cleanups + * + * Parts (C) 1999 David Airlie, airlied@linux.ie + * [07-SEP-99] Bugfixes + */ + +/* #define DEBUG_DZ 1 */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for definition of SERIAL */ + +/* for definition of struct console */ +#ifdef CONFIG_SERIAL_CONSOLE +#define CONSOLE_LINE (3) +#endif /* ifdef CONFIG_SERIAL_CONSOLE */ +#if defined(CONFIG_SERIAL_CONSOLE) || defined(DEBUG_DZ) +#include +#endif /* if defined(CONFIG_SERIAL_CONSOLE) || defined(DEBUG_DZ) */ + +#include +#include + +#include +#include +#include +#include +#include + +#ifdef DEBUG_DZ +#include +#include +#include + +extern int (*prom_printf) (char *,...); +#endif + + + +#include "dz.h" + +#define DZ_INTR_DEBUG 1 + +DECLARE_TASK_QUEUE(tq_serial); + +static struct dz_serial *lines[4]; +static unsigned char tmp_buffer[256]; + + + +#ifdef DEBUG_DZ +/* + * debugging code to send out chars via prom + */ +static void debug_console( const char *s,int count) +{ + unsigned i; + + for (i = 0; i < count; i++) { + if (*s == 10) + prom_printf("%c", 13); + prom_printf("%c", *s++); + } +} +#endif + +/* + * ------------------------------------------------------------ + * dz_in () and dz_out () + * + * These routines are used to access the registers of the DZ + * chip, hiding relocation differences between implementation. + * ------------------------------------------------------------ + */ + +static inline unsigned short dz_in (struct dz_serial *info, unsigned offset) +{ + volatile u16 *addr = (volatile u16 *)(info->port + offset); + + return *addr; +} + +static inline void dz_out (struct dz_serial *info, unsigned offset, + unsigned short value) +{ + volatile u16 *addr = (volatile u16 *)(info->port + offset); + *addr = value; +} + +/* + * ------------------------------------------------------------ + * rs_stop () and rs_start () + * + * These routines are called before setting or resetting + * tty->stopped. They enable or disable transmitter interrupts, + * as necessary. + * ------------------------------------------------------------ + */ + +static void dz_stop (struct tty_struct *tty) +{ + struct dz_serial *info; + unsigned short mask, tmp; + + if (!tty) + return; + + info = (struct dz_serial *)tty->driver_data; + + mask = 1 << info->line; + tmp = dz_in (info, DZ_TCR); /* read the TX flag */ + + tmp &= ~mask; /* clear the TX flag */ + dz_out (info, DZ_TCR, tmp); +} + +static void dz_start (struct tty_struct *tty) +{ + struct dz_serial *info = (struct dz_serial *)tty->driver_data; + unsigned short mask, tmp; + + mask = 1 << info->line; + tmp = dz_in (info, DZ_TCR); /* read the TX flag */ + + tmp |= mask; /* set the TX flag */ + dz_out (info, DZ_TCR, tmp); +} + +/* + * ------------------------------------------------------------ + * Here starts the interrupt handling routines. All of the + * following subroutines are declared as inline and are folded + * into dz_interrupt. They were separated out for readability's + * sake. + * + * Note: rs_interrupt() is a "fast" interrupt, which means that it + * runs with interrupts turned off. People who may want to modify + * rs_interrupt() should try to keep the interrupt handler as fast as + * possible. After you are done making modifications, it is not a bad + * idea to do: + * + * gcc -S -DKERNEL -Wall -Wstrict-prototypes -O6 -fomit-frame-pointer dz.c + * + * and look at the resulting assemble code in serial.s. + * + * ------------------------------------------------------------ + */ + +/* + * ------------------------------------------------------------ + * dz_sched_event () + * + * This routine is used by the interrupt handler to schedule + * processing in the software interrupt portion of the driver. + * ------------------------------------------------------------ + */ +static inline void dz_sched_event (struct dz_serial *info, int event) +{ + info->event |= 1 << event; + queue_task(&info->tqueue, &tq_serial); + mark_bh(SERIAL_BH); +} + +/* + * ------------------------------------------------------------ + * receive_char () + * + * This routine deals with inputs from any lines. + * ------------------------------------------------------------ + */ +static inline void receive_chars (struct dz_serial *info_in) +{ + struct dz_serial *info; + struct tty_struct *tty = 0; + struct async_icount *icount; + int ignore = 0; + unsigned short status, tmp; + unsigned char ch; + + /* + * This code is going to be a problem... the call to tty_flip_buffer + * is going to need to be rethought... + */ + do { + status = dz_in (info_in, DZ_RBUF); + info = lines[LINE(status)]; + + /* punt so we don't get duplicate characters */ + if (!(status & DZ_DVAL)) + goto ignore_char; + + ch = UCHAR(status); /* grab the char */ + +#if 0 + if (info->is_console) { + if (ch == 0) + return; /* it's a break ... */ + } +#endif + + tty = info->tty; /* now tty points to the proper dev */ + icount = &info->icount; + + if (!tty) + break; + if (tty->flip.count >= TTY_FLIPBUF_SIZE) break; + + *tty->flip.char_buf_ptr = ch; + *tty->flip.flag_buf_ptr = 0; + icount->rx++; + + /* keep track of the statistics */ + if (status & (DZ_OERR | DZ_FERR | DZ_PERR)) { + if (status & DZ_PERR) /* parity error */ + icount->parity++; + else if (status & DZ_FERR) /* frame error */ + icount->frame++; + if (status & DZ_OERR) /* overrun error */ + icount->overrun++; + + /* + * Check to see if we should ignore the character and + * mask off conditions that should be ignored + */ + + if (status & info->ignore_status_mask) { + if (++ignore > 100) + break; + goto ignore_char; + } + + /* mask off the error conditions we want to ignore */ + tmp = status & info->read_status_mask; + + if (tmp & DZ_PERR) { + *tty->flip.flag_buf_ptr = TTY_PARITY; +#ifdef DEBUG_DZ + debug_console("PERR\n",5); +#endif /* DEBUG_DZ */ + } else if (tmp & DZ_FERR) { + *tty->flip.flag_buf_ptr = TTY_FRAME; +#ifdef DEBUG_DZ + debug_console("FERR\n",5); +#endif /* DEBUG_DZ */ + } if (tmp & DZ_OERR) { +#ifdef DEBUG_DZ + debug_console("OERR\n",5); +#endif /* DEBUG_DZ */ + if (tty->flip.count < TTY_FLIPBUF_SIZE) { + tty->flip.count++; + tty->flip.flag_buf_ptr++; + tty->flip.char_buf_ptr++; + *tty->flip.flag_buf_ptr = TTY_OVERRUN; + } + } + } + tty->flip.flag_buf_ptr++; + tty->flip.char_buf_ptr++; + tty->flip.count++; +ignore_char: + ; + } while (status & DZ_DVAL); + + if (tty) + tty_flip_buffer_push(tty); +} + +/* + * ------------------------------------------------------------ + * transmit_char () + * + * This routine deals with outputs to any lines. + * ------------------------------------------------------------ + */ +static inline void transmit_chars (struct dz_serial *info) +{ + unsigned char tmp; + + if (info->x_char) { /* XON/XOFF chars */ + dz_out(info, DZ_TDR, info->x_char); + info->icount.tx++; + info->x_char = 0; + return; + } + + /* if nothing to do or stopped or hardware stopped */ + if ((info->xmit_cnt <= 0) || info->tty->stopped || + info->tty->hw_stopped) { + dz_stop(info->tty); + return; + } + + /* + * If something to do ... (rember the dz has no output fifo so we go + * one char at a time :-< + */ + tmp = (unsigned short) info->xmit_buf[info->xmit_tail++]; + dz_out(info, DZ_TDR, tmp); + info->xmit_tail = info->xmit_tail & (DZ_XMIT_SIZE - 1); + info->icount.tx++; + + if (--info->xmit_cnt < WAKEUP_CHARS) + dz_sched_event(info, DZ_EVENT_WRITE_WAKEUP); + + /* Are we done */ + if (info->xmit_cnt <= 0) + dz_stop(info->tty); +} + +/* + * ------------------------------------------------------------ + * check_modem_status () + * + * Only valid for the MODEM line duh ! + * ------------------------------------------------------------ + */ +static inline void check_modem_status (struct dz_serial *info) +{ + unsigned short status; + + /* if not ne modem line just return */ + if (info->line != DZ_MODEM) + return; + + status = dz_in(info, DZ_MSR); + + /* it's easy, since DSR2 is the only bit in the register */ + if (status) + info->icount.dsr++; +} + +/* + * ------------------------------------------------------------ + * dz_interrupt () + * + * this is the main interrupt routine for the DZ chip. + * It deals with the multiple ports. + * ------------------------------------------------------------ + */ +static void dz_interrupt (int irq, void *dev, struct pt_regs *regs) +{ + struct dz_serial *info; + unsigned short status; + + /* get the reason why we just got an irq */ + status = dz_in((struct dz_serial *)dev, DZ_CSR); + info = lines[LINE(status)]; /* re-arrange info the proper port */ + + if (status & DZ_RDONE) + receive_chars(info); /* the receive function */ + + if (status & DZ_TRDY) + transmit_chars (info); +} + +/* + * ------------------------------------------------------------------- + * Here ends the DZ interrupt routines. + * ------------------------------------------------------------------- + */ + +/* + * This routine is used to handle the "bottom half" processing for the + * serial driver, known also the "software interrupt" processing. + * This processing is done at the kernel interrupt level, after the + * rs_interrupt() has returned, BUT WITH INTERRUPTS TURNED ON. This + * is where time-consuming activities which can not be done in the + * interrupt driver proper are done; the interrupt driver schedules + * them using rs_sched_event(), and they get done here. + */ +static void do_serial_bh (void) +{ + run_task_queue (&tq_serial); +} + +static void do_softint (void *private_data) +{ + struct dz_serial *info = (struct dz_serial *) private_data; + struct tty_struct *tty = info->tty; + + if (!tty) + return; + + if (test_and_clear_bit(DZ_EVENT_WRITE_WAKEUP, &info->event)) { + if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && + tty->ldisc.write_wakeup) + (tty->ldisc.write_wakeup) (tty); + wake_up_interruptible (&tty->write_wait); + } +} + +/* + * ------------------------------------------------------------------- + * This routine is called from the scheduler tqueue when the interrupt + * routine has signalled that a hangup has occurred. The path of + * hangup processing is: + * + * serial interrupt routine -> (scheduler tqueue) -> + * do_serial_hangup() -> tty->hangup() -> rs_hangup() + * ------------------------------------------------------------------- + */ +static void do_serial_hangup (void *private_data) +{ + struct dz_serial *info = (struct dz_serial *) private_data; + struct tty_struct *tty = info->tty; + + if (!tty) + return; + + tty_hangup(tty); +} + +/* + * ------------------------------------------------------------------- + * startup () + * + * various initialization tasks + * ------------------------------------------------------------------- + */ +static int startup (struct dz_serial *info) +{ + unsigned long page, flags; + unsigned short tmp; + + if (info->is_initialized) + return 0; + + save_and_cli(flags); + + if (!info->port) { + if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags); + restore_flags(flags); + return -ENODEV; + } + + if (!info->xmit_buf) { + page = get_zeroed_page(GFP_KERNEL); + if (!page) { + restore_flags (flags); + return -ENOMEM; + } + info->xmit_buf = (unsigned char *)page; + } + + if (info->tty) + clear_bit(TTY_IO_ERROR, &info->tty->flags); + + /* enable the interrupt and the scanning */ + tmp = dz_in(info, DZ_CSR); + tmp |= (DZ_RIE | DZ_TIE | DZ_MSE); + dz_out(info, DZ_CSR, tmp); + + info->xmit_cnt = info->xmit_head = info->xmit_tail = 0; + + change_speed(info); /* set up the speed */ + + /* + * Clear the line transmitter buffer I can't figure out why I need to + * do this - but its necessary - in order for the console portion and + * the interrupt portion to live happily side by side. + */ + + info->is_initialized = 1; + + restore_flags(flags); + + return 0; +} + +/* + * ------------------------------------------------------------------- + * shutdown () + * + * This routine will shutdown a serial port; interrupts are disabled, and + * DTR is dropped if the hangup on close termio flag is on. + * ------------------------------------------------------------------- + */ +static void shutdown (struct dz_serial *info) +{ + unsigned long flags; + unsigned short tmp; + + if (!info->is_initialized) + return; + + save_and_cli(flags); + + dz_stop (info->tty); + + info->cflags &= ~DZ_CREAD; /* turn off receive enable flag */ + dz_out(info, DZ_LPR, info->cflags); + + if (info->xmit_buf) { /* free Tx buffer */ + free_page((unsigned long)info->xmit_buf); + info->xmit_buf = 0; + } + + if (!info->tty || (info->tty->termios->c_cflag & HUPCL)) { + tmp = dz_in(info, DZ_TCR); + if (tmp & DZ_MODEM_DTR) { + tmp &= ~DZ_MODEM_DTR; + dz_out(info, DZ_TCR, tmp); + } + } + + if (info->tty) + set_bit (TTY_IO_ERROR, &info->tty->flags); + + info->is_initialized = 0; + + restore_flags (flags); +} + +/* + * ------------------------------------------------------------------- + * change_speed () + * + * set the baud rate. + * ------------------------------------------------------------------- + */ +static void change_speed (struct dz_serial *info) +{ + unsigned long flags; + unsigned cflag; + int baud; + + if (!info->tty || !info->tty->termios) + return; + + save_and_cli(flags); + + info->cflags = info->line; + + cflag = info->tty->termios->c_cflag; + + switch (cflag & CSIZE) { + case CS5: + info->cflags |= DZ_CS5; + break; + case CS6: + info->cflags |= DZ_CS6; + break; + case CS7: + info->cflags |= DZ_CS7; + break; + case CS8: + default: + info->cflags |= DZ_CS8; + } + + if (cflag & CSTOPB) + info->cflags |= DZ_CSTOPB; + if (cflag & PARENB) + info->cflags |= DZ_PARENB; + if (cflag & PARODD) + info->cflags |= DZ_PARODD; + + baud = tty_get_baud_rate(info->tty); + switch (baud) { + case 50: + info->cflags |= DZ_B50; + break; + case 75: + info->cflags |= DZ_B75; + break; + case 110: + info->cflags |= DZ_B110; + break; + case 134: + info->cflags |= DZ_B134; + break; + case 150: + info->cflags |= DZ_B150; + break; + case 300: + info->cflags |= DZ_B300; + break; + case 600: + info->cflags |= DZ_B600; + break; + case 1200: + info->cflags |= DZ_B1200; + break; + case 1800: + info->cflags |= DZ_B1800; + break; + case 2000: + info->cflags |= DZ_B2000; + break; + case 2400: + info->cflags |= DZ_B2400; + break; + case 3600: + info->cflags |= DZ_B3600; + break; + case 4800: + info->cflags |= DZ_B4800; + break; + case 7200: + info->cflags |= DZ_B7200; + break; + case 9600: + default: + info->cflags |= DZ_B9600; + } + + info->cflags |= DZ_RXENAB; + dz_out(info, DZ_LPR, info->cflags); + + /* setup accept flag */ + info->read_status_mask = DZ_OERR; + if (I_INPCK(info->tty)) + info->read_status_mask |= (DZ_FERR | DZ_PERR); + + /* characters to ignore */ + info->ignore_status_mask = 0; + if (I_IGNPAR(info->tty)) + info->ignore_status_mask |= (DZ_FERR | DZ_PERR); + + restore_flags(flags); +} + +/* + * ------------------------------------------------------------------- + * dz_flush_char () + * + * Flush the buffer. + * ------------------------------------------------------------------- + */ +static void dz_flush_chars (struct tty_struct *tty) +{ + struct dz_serial *info = (struct dz_serial *)tty->driver_data; + unsigned long flags; + + if (info->xmit_cnt <= 0 || tty->stopped || tty->hw_stopped || + !info->xmit_buf) + return; + + save_and_cli(flags); + dz_start (info->tty); + restore_flags(flags); +} + + +/* + * ------------------------------------------------------------------- + * dz_write () + * + * main output routine. + * ------------------------------------------------------------------- + */ +static int dz_write (struct tty_struct *tty, int from_user, + const unsigned char *buf, int count) +{ + struct dz_serial *info = (struct dz_serial *)tty->driver_data; + unsigned long flags; + int c, ret = 0; + + if (!tty ) + return ret; + if (!info->xmit_buf) + return ret; + if (!tmp_buf) + tmp_buf = tmp_buffer; + + if (from_user) { + down (&tmp_buf_sem); + while (1) { + c = MIN(count, MIN(DZ_XMIT_SIZE - info->xmit_cnt - 1, + DZ_XMIT_SIZE - info->xmit_head)); + if (c <= 0) + break; + + c -= copy_from_user (tmp_buf, buf, c); + if (!c) { + if (!ret) + ret = -EFAULT; + break; + } + + save_and_cli(flags); + + c = MIN(c, MIN(DZ_XMIT_SIZE - info->xmit_cnt - 1, + DZ_XMIT_SIZE - info->xmit_head)); + memcpy(info->xmit_buf + info->xmit_head, tmp_buf, c); + info->xmit_head = ((info->xmit_head + c) & + (DZ_XMIT_SIZE - 1)); + info->xmit_cnt += c; + restore_flags(flags); + + buf += c; + count -= c; + ret += c; + } + up(&tmp_buf_sem); + } else { + while (1) { + save_and_cli(flags); + + c = MIN(count, MIN(DZ_XMIT_SIZE - info->xmit_cnt - 1, + DZ_XMIT_SIZE - info->xmit_head)); + if (c <= 0) { + restore_flags (flags); + break; + } + memcpy(info->xmit_buf + info->xmit_head, buf, c); + info->xmit_head = ((info->xmit_head + c) & + (DZ_XMIT_SIZE-1)); + info->xmit_cnt += c; + restore_flags(flags); + + buf += c; + count -= c; + ret += c; + } + } + + if (info->xmit_cnt) { + if (!tty->stopped) { + if (!tty->hw_stopped) { + dz_start (info->tty); + } + } + } + + return ret; +} + +/* + * ------------------------------------------------------------------- + * dz_write_room () + * + * compute the amount of space available for writing. + * ------------------------------------------------------------------- + */ +static int dz_write_room (struct tty_struct *tty) +{ + struct dz_serial *info = (struct dz_serial *)tty->driver_data; + int ret; + + ret = DZ_XMIT_SIZE - info->xmit_cnt - 1; + if (ret < 0) + ret = 0; + + return ret; +} + +/* + * ------------------------------------------------------------------- + * dz_chars_in_buffer () + * + * compute the amount of char left to be transmitted + * ------------------------------------------------------------------- + */ +static int dz_chars_in_buffer (struct tty_struct *tty) +{ + struct dz_serial *info = (struct dz_serial *)tty->driver_data; + + return info->xmit_cnt; +} + +/* + * ------------------------------------------------------------------- + * dz_flush_buffer () + * + * Empty the output buffer + * ------------------------------------------------------------------- + */ +static void dz_flush_buffer (struct tty_struct *tty) +{ + struct dz_serial *info = (struct dz_serial *)tty->driver_data; + + cli(); + info->xmit_cnt = info->xmit_head = info->xmit_tail = 0; + sti(); + + wake_up_interruptible (&tty->write_wait); + + if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && + tty->ldisc.write_wakeup) + tty->ldisc.write_wakeup(tty); +} + +/* + * ------------------------------------------------------------ + * dz_throttle () and dz_unthrottle () + * + * This routine is called by the upper-layer tty layer to signal that + * incoming characters should be throttled (or not). + * ------------------------------------------------------------ + */ +static void dz_throttle (struct tty_struct *tty) +{ + struct dz_serial *info = (struct dz_serial *)tty->driver_data; + + if (I_IXOFF(tty)) + info->x_char = STOP_CHAR(tty); +} + +static void dz_unthrottle (struct tty_struct *tty) +{ + struct dz_serial *info = (struct dz_serial *)tty->driver_data; + + if (I_IXOFF(tty)) { + if (info->x_char) + info->x_char = 0; + else + info->x_char = START_CHAR(tty); + } +} + +static void dz_send_xchar (struct tty_struct *tty, char ch) +{ + struct dz_serial *info = (struct dz_serial *)tty->driver_data; + + info->x_char = ch; + + if (ch) + dz_start(info->tty); +} + +/* + * ------------------------------------------------------------ + * rs_ioctl () and friends + * ------------------------------------------------------------ + */ +static int get_serial_info(struct dz_serial *info, + struct serial_struct *retinfo) +{ + struct serial_struct tmp; + + if (!retinfo) + return -EFAULT; + + memset (&tmp, 0, sizeof(tmp)); + + tmp.type = info->type; + tmp.line = info->line; + tmp.port = info->port; + tmp.irq = SERIAL; + tmp.flags = info->flags; + tmp.baud_base = info->baud_base; + tmp.close_delay = info->close_delay; + tmp.closing_wait = info->closing_wait; + + return copy_to_user(retinfo, &tmp, sizeof(*retinfo)) ? -EFAULT : 0; +} + +static int set_serial_info (struct dz_serial *info, + struct serial_struct *new_info) +{ + struct serial_struct new_serial; + struct dz_serial old_info; + int retval = 0; + + if (!new_info) + return -EFAULT; + + if (copy_from_user(&new_serial, new_info, sizeof(new_serial))) + return -EFAULT; + + old_info = *info; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (info->count > 1) + return -EBUSY; + + /* + * OK, past this point, all the error checking has been done. + * At this point, we start making changes..... + */ + + info->baud_base = new_serial.baud_base; + info->type = new_serial.type; + info->close_delay = new_serial.close_delay; + info->closing_wait = new_serial.closing_wait; + + retval = startup(info); + + return retval; +} + +/* + * get_lsr_info - get line status register info + * + * Purpose: Let user call ioctl() to get info when the UART physically + * is emptied. On bus types like RS485, the transmitter must + * release the bus after transmitting. This must be done when + * the transmit shift register is empty, not be done when the + * transmit holding register is empty. This functionality + * allows an RS485 driver to be written in user space. + */ +static int get_lsr_info (struct dz_serial *info, unsigned int *value) +{ + unsigned short status = dz_in (info, DZ_LPR); + + return put_user (status, value); +} + +/* + * This routine sends a break character out the serial port. + */ +static void send_break (struct dz_serial *info, int duration) +{ + unsigned long flags; + unsigned short tmp, mask; + + if (!info->port) + return; + + mask = 1 << info->line; + tmp = dz_in (info, DZ_TCR); + tmp |= mask; + + current->state = TASK_INTERRUPTIBLE; + + save_and_cli(flags); + dz_out(info, DZ_TCR, tmp); + schedule_timeout(duration); + tmp &= ~mask; + dz_out(info, DZ_TCR, tmp); + restore_flags(flags); +} + +static int dz_ioctl(struct tty_struct *tty, struct file *file, + unsigned int cmd, unsigned long arg) +{ + int error; + struct dz_serial * info = (struct dz_serial *)tty->driver_data; + int retval; + + if (cmd != TIOCGSERIAL && cmd != TIOCSSERIAL && + cmd != TIOCSERCONFIG && cmd != TIOCSERGWILD && + cmd != TIOCSERSWILD && cmd != TIOCSERGSTRUCT) { + if (tty->flags & (1 << TTY_IO_ERROR)) + return -EIO; + } + + switch (cmd) { + case TCSBRK: /* SVID version: non-zero arg --> no break */ + retval = tty_check_change(tty); + if (retval) + return retval; + tty_wait_until_sent(tty, 0); + if (!arg) + send_break(info, HZ/4); /* 1/4 second */ + return 0; + + case TCSBRKP: /* support for POSIX tcsendbreak() */ + retval = tty_check_change(tty); + if (retval) + return retval; + tty_wait_until_sent(tty, 0); + send_break(info, arg ? arg*(HZ/10) : HZ/4); + return 0; + + case TIOCGSOFTCAR: + return put_user(C_CLOCAL(tty) ? 1 : 0, (unsigned long *)arg); + + case TIOCSSOFTCAR: + if (get_user (arg, (unsigned long *)arg)) + return -EFAULT; + + tty->termios->c_cflag = (tty->termios->c_cflag & ~CLOCAL) | + (arg ? CLOCAL : 0); + return 0; + + case TIOCGSERIAL: + return get_serial_info(info, (struct serial_struct *)arg); + + case TIOCSSERIAL: + return set_serial_info(info, (struct serial_struct *) arg); + + case TIOCSERGETLSR: /* Get line status register */ + return get_lsr_info (info, (unsigned int *)arg); + + case TIOCSERGSTRUCT: + return copy_to_user((struct dz_serial *)arg, info, + sizeof(struct dz_serial)) ? -EFAULT : 0; + + default: + return -ENOIOCTLCMD; + } + + return 0; +} + +static void dz_set_termios (struct tty_struct *tty, + struct termios *old_termios) +{ + struct dz_serial *info = (struct dz_serial *)tty->driver_data; + + if (tty->termios->c_cflag == old_termios->c_cflag) + return; + + change_speed (info); + + if ((old_termios->c_cflag & CRTSCTS) && + !(tty->termios->c_cflag & CRTSCTS)) { + tty->hw_stopped = 0; + dz_start(tty); + } +} + +/* + * ------------------------------------------------------------ + * dz_close() + * + * This routine is called when the serial port gets closed. First, we + * wait for the last remaining data to be sent. Then, we turn off + * the transmit enable and receive enable flags. + * ------------------------------------------------------------ + */ +static void dz_close(struct tty_struct *tty, struct file *filp) +{ + struct dz_serial * info = (struct dz_serial *)tty->driver_data; + unsigned long flags; + + if (!info) + return; + + save_and_cli(flags); + + if (tty_hung_up_p(filp)) { + restore_flags(flags); + return; + } + + if ((tty->count == 1) && (info->count != 1)) { + /* + * Uh, oh. tty->count is 1, which means that the tty structure + * will be freed. Info->count should always be one in these + * conditions. If it's greater than one, we've got real + * problems, since it means the serial port won't be shutdown. + */ + printk("dz_close: bad serial port count; tty->count is 1, " + "info->count is %d\n", info->count); + info->count = 1; + } + + if (--info->count < 0) { + printk("ds_close: bad serial port count for ttyS%02d: %d\n", + info->line, info->count); + info->count = 0; + } + + if (info->count) { + restore_flags(flags); + return; + } + info->flags |= DZ_CLOSING; + /* + * Now we wait for the transmit buffer to clear; and we notify the line + * discipline to only process XON/XOFF characters. + */ + tty->closing = 1; + + if (info->closing_wait != DZ_CLOSING_WAIT_NONE) + tty_wait_until_sent(tty, info->closing_wait); + + /* + * At this point we stop accepting input. To do this, we disable the + * receive line status interrupts. + */ + shutdown(info); + + if (tty->driver->flush_buffer) + tty->driver->flush_buffer (tty); + if (tty->ldisc.flush_buffer) + tty->ldisc.flush_buffer (tty); + tty->closing = 0; + info->event = 0; + info->tty = 0; + + if (tty->ldisc.num != ldiscs[N_TTY].num) { + if (tty->ldisc.close) + tty->ldisc.close(tty); + tty->ldisc = ldiscs[N_TTY]; + tty->termios->c_line = N_TTY; + if (tty->ldisc.open) + tty->ldisc.open(tty); + } + if (info->blocked_open) { + if (info->close_delay) { + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(info->close_delay); + } + wake_up_interruptible(&info->open_wait); + } + + info->flags &= ~(DZ_NORMAL_ACTIVE | DZ_CLOSING); + wake_up_interruptible(&info->close_wait); + + restore_flags(flags); +} + +/* + * dz_hangup () --- called by tty_hangup() when a hangup is signaled. + */ +static void dz_hangup (struct tty_struct *tty) +{ + struct dz_serial *info = (struct dz_serial *) tty->driver_data; + + dz_flush_buffer(tty); + shutdown(info); + info->event = 0; + info->count = 0; + info->flags &= ~DZ_NORMAL_ACTIVE; + info->tty = 0; + wake_up_interruptible(&info->open_wait); +} + +/* + * ------------------------------------------------------------ + * rs_open() and friends + * ------------------------------------------------------------ + */ +static int block_til_ready(struct tty_struct *tty, struct file *filp, + struct dz_serial *info) +{ + DECLARE_WAITQUEUE(wait, current); + int retval; + int do_clocal = 0; + + /* + * If the device is in the middle of being closed, then block + * until it's done, and then try again. + */ + if (info->flags & DZ_CLOSING) { + interruptible_sleep_on(&info->close_wait); + return -EAGAIN; + } + + /* + * If non-blocking mode is set, or the port is not enabled, then make + * the check up front and then exit. + */ + if ((filp->f_flags & O_NONBLOCK) || + (tty->flags & (1 << TTY_IO_ERROR))) { + info->flags |= DZ_NORMAL_ACTIVE; + + return 0; + } + + if (tty->termios->c_cflag & CLOCAL) + do_clocal = 1; + + /* + * Block waiting for the carrier detect and the line to become free + * (i.e., not in use by the callout). While we are in this loop, + * info->count is dropped by one, so that dz_close() knows when to free + * things. We restore it upon exit, either normal or abnormal. + */ + retval = 0; + add_wait_queue(&info->open_wait, &wait); + + info->count--; + info->blocked_open++; + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + if (tty_hung_up_p (filp) || !(info->is_initialized)) { + retval = -EAGAIN; + break; + } + if (!(info->flags & DZ_CLOSING) && do_clocal) + break; + if (signal_pending(current)) { + retval = -ERESTARTSYS; + break; + } + schedule(); + } + + current->state = TASK_RUNNING; + remove_wait_queue (&info->open_wait, &wait); + if (!tty_hung_up_p(filp)) + info->count++; + info->blocked_open--; + + if (retval) + return retval; + info->flags |= DZ_NORMAL_ACTIVE; + return 0; +} + +/* + * This routine is called whenever a serial port is opened. It + * enables interrupts for a serial port. It also performs the + * serial-specific initialization for the tty structure. + */ +static int dz_open (struct tty_struct *tty, struct file *filp) +{ + struct dz_serial *info; + int retval, line; + + line = tty->index; + + /* + * The dz lines for the mouse/keyboard must be opened using their + * respective drivers. + */ + if ((line < 0) || (line >= DZ_NB_PORT)) + return -ENODEV; + + if ((line == DZ_KEYBOARD) || (line == DZ_MOUSE)) + return -ENODEV; + + info = lines[line]; + info->count++; + + tty->driver_data = info; + info->tty = tty; + + /* + * Start up serial port + */ + retval = startup (info); + if (retval) + return retval; + + retval = block_til_ready (tty, filp, info); + if (retval) + return retval; + + return 0; +} + +static void show_serial_version (void) +{ + printk("%s%s\n", dz_name, dz_version); +} + +static struct tty_driver *serial_driver; + +static struct tty_operations serial_ops = { + .open = dz_open, + .close = dz_close, + .write = dz_write, + .flush_chars = dz_flush_chars, + .write_room = dz_write_room, + .chars_in_buffer = dz_chars_in_buffer, + .flush_buffer = dz_flush_buffer, + .ioctl = dz_ioctl, + .throttle = dz_throttle, + .unthrottle = dz_unthrottle, + .send_xchar = dz_send_xchar, + .set_termios = dz_set_termios, + .stop = dz_stop, + .start = dz_start, + .hangup = dz_hangup, +}; + +int __init dz_init(void) +{ + int i, flags; + struct dz_serial *info; + + serial_driver = alloc_tty_driver(DZ_NB_PORT); + if (!serial_driver) + return -ENOMEM; + + /* Setup base handler, and timer table. */ + init_bh(SERIAL_BH, do_serial_bh); + + show_serial_version(); + + serial_driver->owner = THIS_MODULE; + serial_driver->devfs_name = "tts/"; + serial_driver->name = "ttyS"; + serial_driver->major = TTY_MAJOR; + serial_driver->minor_start = 64; + serial_driver->type = TTY_DRIVER_TYPE_SERIAL; + serial_driver->subtype = SERIAL_TYPE_NORMAL; + serial_driver->init_termios = tty_std_termios; + serial_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | + CLOCAL; + serial_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_NO_DEVFS; + tty_set_operations(serial_driver, &serial_ops); + + if (tty_register_driver(serial_driver)) + panic("Couldn't register serial driver\n"); + + save_flags(flags); cli(); + for (i=0; i < DZ_NB_PORT; i++) { + info = &multi[i]; + lines[i] = info; + info->magic = SERIAL_MAGIC; + + if ((mips_machtype == MACH_DS23100) || + (mips_machtype == MACH_DS5100)) + info->port = (unsigned long) KN01_DZ11_BASE; + else + info->port = (unsigned long) KN02_DZ11_BASE; + + info->line = i; + info->tty = 0; + info->close_delay = 50; + info->closing_wait = 3000; + info->x_char = 0; + info->event = 0; + info->count = 0; + info->blocked_open = 0; + info->tqueue.routine = do_softint; + info->tqueue.data = info; + info->tqueue_hangup.routine = do_serial_hangup; + info->tqueue_hangup.data = info; + init_waitqueue_head(&info->open_wait); + init_waitqueue_head(&info->close_wait); + + /* + * If we are pointing to address zero then punt - not correctly + * set up in setup.c to handle this. + */ + if (! info->port) + return 0; + + printk("ttyS%02d at 0x%08x (irq = %d)\n", info->line, + info->port, SERIAL); + + tty_register_device(serial_driver, info->line, NULL); + } + + /* Reset the chip */ +#ifndef CONFIG_SERIAL_CONSOLE + { + int tmp; + dz_out(info, DZ_CSR, DZ_CLR); + while ((tmp = dz_in(info,DZ_CSR)) & DZ_CLR); + wbflush(); + + /* Enable scanning */ + dz_out(info, DZ_CSR, DZ_MSE); + } +#endif + + /* + * Order matters here... the trick is that flags is updated... in + * request_irq - to immediatedly obliterate it is unwise. + */ + restore_flags(flags); + + if (request_irq(SERIAL, dz_interrupt, SA_INTERRUPT, "DZ", lines[0])) + panic("Unable to register DZ interrupt\n"); + + return 0; +} + +#ifdef CONFIG_SERIAL_CONSOLE +static void dz_console_put_char (unsigned char ch) +{ + unsigned long flags; + int loops = 2500; + unsigned short tmp = ch; + /* + * this code sends stuff out to serial device - spinning its wheels and + * waiting. + */ + + /* force the issue - point it at lines[3]*/ + dz_console = &multi[CONSOLE_LINE]; + + save_and_cli(flags); + + /* spin our wheels */ + while (((dz_in(dz_console, DZ_CSR) & DZ_TRDY) != DZ_TRDY) && loops--) + ; + + /* Actually transmit the character. */ + dz_out(dz_console, DZ_TDR, tmp); + + restore_flags(flags); +} + +/* + * ------------------------------------------------------------------- + * dz_console_print () + * + * dz_console_print is registered for printk. + * The console must be locked when we get here. + * ------------------------------------------------------------------- + */ +static void dz_console_print (struct console *cons, + const char *str, + unsigned int count) +{ +#ifdef DEBUG_DZ + prom_printf((char *)str); +#endif + while (count--) { + if (*str == '\n') + dz_console_put_char('\r'); + dz_console_put_char(*str++); + } +} + +static struct tty_driver *dz_console_device(struct console *c, int *index) +{ + *index = c->index; + return serial_driver; +} + +static int __init dz_console_setup(struct console *co, char *options) +{ + int baud = 9600; + int bits = 8; + int parity = 'n'; + int cflag = CREAD | HUPCL | CLOCAL; + char *s; + unsigned short mask,tmp; + + if (options) { + baud = simple_strtoul(options, NULL, 10); + s = options; + while (*s >= '0' && *s <= '9') + s++; + if (*s) + parity = *s++; + if (*s) + bits = *s - '0'; + } + + /* + * Now construct a cflag setting. + */ + switch (baud) { + case 1200: + cflag |= DZ_B1200; + break; + case 2400: + cflag |= DZ_B2400; + break; + case 4800: + cflag |= DZ_B4800; + break; + case 9600: + default: + cflag |= DZ_B9600; + break; + } + switch (bits) { + case 7: + cflag |= DZ_CS7; + break; + default: + case 8: + cflag |= DZ_CS8; + break; + } + switch (parity) { + case 'o': + case 'O': + cflag |= DZ_PARODD; + break; + case 'e': + case 'E': + cflag |= DZ_PARENB; + break; + } + co->cflag = cflag; + + /* TOFIX: force to console line */ + dz_console = &multi[CONSOLE_LINE]; + if ((mips_machtype == MACH_DS23100) || (mips_machtype == MACH_DS5100)) + dz_console->port = KN01_DZ11_BASE; + else + dz_console->port = KN02_DZ11_BASE; + dz_console->line = CONSOLE_LINE; + + dz_out(dz_console, DZ_CSR, DZ_CLR); + while ((tmp = dz_in(dz_console,DZ_CSR)) & DZ_CLR) + ; + + /* enable scanning */ + dz_out(dz_console, DZ_CSR, DZ_MSE); + + /* Set up flags... */ + dz_console->cflags = 0; + dz_console->cflags |= DZ_B9600; + dz_console->cflags |= DZ_CS8; + dz_console->cflags |= DZ_PARENB; + dz_out(dz_console, DZ_LPR, dz_console->cflags); + + mask = 1 << dz_console->line; + tmp = dz_in (dz_console, DZ_TCR); /* read the TX flag */ + if (!(tmp & mask)) { + tmp |= mask; /* set the TX flag */ + dz_out (dz_console, DZ_TCR, tmp); + } + + return 0; +} + +static struct console dz_sercons = { + .name = "ttyS", + .write = dz_console_print, + .device = dz_console_device, + .setup = dz_console_setup, + .flags = CON_CONSDEV | CON_PRINTBUFFER, + .index = CONSOLE_LINE, +}; + +void __init dz_serial_console_init(void) +{ + register_console(&dz_sercons); +} + +#endif /* ifdef CONFIG_SERIAL_CONSOLE */ + +MODULE_LICENSE("GPL"); diff --git a/drivers/char/dz.h b/drivers/char/dz.h new file mode 100644 index 000000000..989f927a4 --- /dev/null +++ b/drivers/char/dz.h @@ -0,0 +1,230 @@ +/* + * dz.h: Serial port driver for DECStations equiped + * with the DZ chipset. + * + * Copyright (C) 1998 Olivier A. D. Lebaillif + * + * Email: olivier.lebaillif@ifrsys.com + * + */ +#ifndef DZ_SERIAL_H +#define DZ_SERIAL_H + +/* + * Definitions for the Control and Status Received. + */ +#define DZ_TRDY 0x8000 /* Transmitter empty */ +#define DZ_TIE 0x4000 /* Transmitter Interrupt Enable */ +#define DZ_RDONE 0x0080 /* Receiver data ready */ +#define DZ_RIE 0x0040 /* Receive Interrupt Enable */ +#define DZ_MSE 0x0020 /* Master Scan Enable */ +#define DZ_CLR 0x0010 /* Master reset */ +#define DZ_MAINT 0x0008 /* Loop Back Mode */ + +/* + * Definitions for the Received buffer. + */ +#define DZ_RBUF_MASK 0x00FF /* Data Mask in the Receive Buffer */ +#define DZ_LINE_MASK 0x0300 /* Line Mask in the Receive Buffer */ +#define DZ_DVAL 0x8000 /* Valid Data indicator */ +#define DZ_OERR 0x4000 /* Overrun error indicator */ +#define DZ_FERR 0x2000 /* Frame error indicator */ +#define DZ_PERR 0x1000 /* Parity error indicator */ + +#define LINE(x) (x & DZ_LINE_MASK) >> 8 /* Get the line number from the input buffer */ +#define UCHAR(x) (unsigned char)(x & DZ_RBUF_MASK) + +/* + * Definitions for the Transmit Register. + */ +#define DZ_LINE_KEYBOARD 0x0001 +#define DZ_LINE_MOUSE 0x0002 +#define DZ_LINE_MODEM 0x0004 +#define DZ_LINE_PRINTER 0x0008 + +#define DZ_MODEM_DTR 0x0400 /* DTR for the modem line (2) */ + +/* + * Definitions for the Modem Status Register. + */ +#define DZ_MODEM_DSR 0x0200 /* DSR for the modem line (2) */ + +/* + * Definitions for the Transmit Data Register. + */ +#define DZ_BRK0 0x0100 /* Break assertion for line 0 */ +#define DZ_BRK1 0x0200 /* Break assertion for line 1 */ +#define DZ_BRK2 0x0400 /* Break assertion for line 2 */ +#define DZ_BRK3 0x0800 /* Break assertion for line 3 */ + +/* + * Definitions for the Line Parameter Register. + */ +#define DZ_KEYBOARD 0x0000 /* line 0 = keyboard */ +#define DZ_MOUSE 0x0001 /* line 1 = mouse */ +#define DZ_MODEM 0x0002 /* line 2 = modem */ +#define DZ_PRINTER 0x0003 /* line 3 = printer */ + +#define DZ_CSIZE 0x0018 /* Number of bits per byte (mask) */ +#define DZ_CS5 0x0000 /* 5 bits per byte */ +#define DZ_CS6 0x0008 /* 6 bits per byte */ +#define DZ_CS7 0x0010 /* 7 bits per byte */ +#define DZ_CS8 0x0018 /* 8 bits per byte */ + +#define DZ_CSTOPB 0x0020 /* 2 stop bits instead of one */ + +#define DZ_PARENB 0x0040 /* Parity enable */ +#define DZ_PARODD 0x0080 /* Odd parity instead of even */ + +#define DZ_CBAUD 0x0E00 /* Baud Rate (mask) */ +#define DZ_B50 0x0000 +#define DZ_B75 0x0100 +#define DZ_B110 0x0200 +#define DZ_B134 0x0300 +#define DZ_B150 0x0400 +#define DZ_B300 0x0500 +#define DZ_B600 0x0600 +#define DZ_B1200 0x0700 +#define DZ_B1800 0x0800 +#define DZ_B2000 0x0900 +#define DZ_B2400 0x0A00 +#define DZ_B3600 0x0B00 +#define DZ_B4800 0x0C00 +#define DZ_B7200 0x0D00 +#define DZ_B9600 0x0E00 + +#define DZ_CREAD 0x1000 /* Enable receiver */ +#define DZ_RXENAB 0x1000 /* enable receive char */ +/* + * Addresses for the DZ registers + */ +#define DZ_CSR 0x00 /* Control and Status Register */ +#define DZ_RBUF 0x08 /* Receive Buffer */ +#define DZ_LPR 0x08 /* Line Parameters Register */ +#define DZ_TCR 0x10 /* Transmitter Control Register */ +#define DZ_MSR 0x18 /* Modem Status Register */ +#define DZ_TDR 0x18 /* Transmit Data Register */ + + +#define DZ_NB_PORT 4 + +#define DZ_XMIT_SIZE 4096 /* buffer size */ +#define WAKEUP_CHARS DZ_XMIT_SIZE/4 + +#define DZ_EVENT_WRITE_WAKEUP 0 + +#ifndef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +#define DZ_INITIALIZED 0x80000000 /* Serial port was initialized */ +#define DZ_CALLOUT_ACTIVE 0x40000000 /* Call out device is active */ +#define DZ_NORMAL_ACTIVE 0x20000000 /* Normal device is active */ +#define DZ_BOOT_AUTOCONF 0x10000000 /* Autoconfigure port on bootup */ +#define DZ_CLOSING 0x08000000 /* Serial port is closing */ +#define DZ_CTS_FLOW 0x04000000 /* Do CTS flow control */ +#define DZ_CHECK_CD 0x02000000 /* i.e., CLOCAL */ + +#define DZ_CLOSING_WAIT_INF 0 +#define DZ_CLOSING_WAIT_NONE 65535 + +#define DZ_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */ +#define DZ_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */ +#define DZ_PGRP_LOCKOUT 0x0200 /* Lock out cua opens based on pgrp */ + +struct dz_serial { + unsigned port; /* base address for the port */ + int type; + int flags; + int baud_base; + int blocked_open; + unsigned short close_delay; + unsigned short closing_wait; + unsigned short line; /* port/line number */ + unsigned short cflags; /* line configuration flag */ + unsigned short x_char; /* xon/xoff character */ + unsigned short read_status_mask; /* mask for read condition */ + unsigned short ignore_status_mask; /* mask for ignore condition */ + unsigned long event; /* mask used in BH */ + unsigned char *xmit_buf; /* Transmit buffer */ + int xmit_head; /* Position of the head */ + int xmit_tail; /* Position of the tail */ + int xmit_cnt; /* Count of the chars in the buffer */ + int count; /* indicates how many times it has been opened */ + int magic; + + struct async_icount icount; /* keep track of things ... */ + struct tty_struct *tty; /* tty associated */ + struct tq_struct tqueue; /* Queue for BH */ + struct tq_struct tqueue_hangup; + wait_queue_head_t open_wait; + wait_queue_head_t close_wait; + + unsigned char is_console; /* flag indicating a serial console */ + unsigned char is_initialized; +}; + +static struct dz_serial multi[DZ_NB_PORT]; /* Four serial lines in the DZ chip */ +static struct dz_serial *dz_console; + +/* + * tmp_buf is used as a temporary buffer by serial_write. We need to + * lock it in case the copy_from_user blocks while swapping in a page, + * and some other program tries to do a serial write at the same time. + * Since the lock will only come under contention when the system is + * swapping and available memory is low, it makes sense to share one + * buffer across all the serial ports, since it significantly saves + * memory if large numbers of serial ports are open. + */ +static unsigned char *tmp_buf; +static DECLARE_MUTEX(tmp_buf_sem); + +static char *dz_name = "DECstation DZ serial driver version "; +static char *dz_version = "1.02"; + +static inline unsigned short dz_in (struct dz_serial *, unsigned); +static inline void dz_out (struct dz_serial *, unsigned, unsigned short); + +static inline void dz_sched_event (struct dz_serial *, int); +static inline void receive_chars (struct dz_serial *); +static inline void transmit_chars (struct dz_serial *); +static inline void check_modem_status (struct dz_serial *); + +static void dz_stop (struct tty_struct *); +static void dz_start (struct tty_struct *); +static void dz_interrupt (int, void *, struct pt_regs *); +static void do_serial_bh (void); +static void do_softint (void *); +static void do_serial_hangup (void *); +static void change_speed (struct dz_serial *); +static void dz_flush_chars (struct tty_struct *); +static void dz_console_print (struct console *, const char *, unsigned int); +static void dz_flush_buffer (struct tty_struct *); +static void dz_throttle (struct tty_struct *); +static void dz_unthrottle (struct tty_struct *); +static void dz_send_xchar (struct tty_struct *, char); +static void shutdown (struct dz_serial *); +static void send_break (struct dz_serial *, int); +static void dz_set_termios (struct tty_struct *, struct termios *); +static void dz_close (struct tty_struct *, struct file *); +static void dz_hangup (struct tty_struct *); +static void show_serial_version (void); + +static int dz_write (struct tty_struct *, int, const unsigned char *, int); +static int dz_write_room (struct tty_struct *); +static int dz_chars_in_buffer (struct tty_struct *); +static int startup (struct dz_serial *); +static int get_serial_info (struct dz_serial *, struct serial_struct *); +static int set_serial_info (struct dz_serial *, struct serial_struct *); +static int get_lsr_info (struct dz_serial *, unsigned int *); +static int dz_ioctl (struct tty_struct *, struct file *, unsigned int, unsigned long); +static int block_til_ready (struct tty_struct *, struct file *, struct dz_serial *); +static int dz_open (struct tty_struct *, struct file *); + +#ifdef MODULE +int init_module (void) +void cleanup_module (void) +#endif + +#endif + +#endif /* DZ_SERIAL_H */ diff --git a/drivers/char/sh-sci.c b/drivers/char/sh-sci.c new file mode 100644 index 000000000..d3894a6f9 --- /dev/null +++ b/drivers/char/sh-sci.c @@ -0,0 +1,1646 @@ +/* $Id: sh-sci.c,v 1.16 2004/02/10 17:04:17 lethal Exp $ + * + * linux/drivers/char/sh-sci.c + * + * SuperH on-chip serial module support. (SCI with no FIFO / with FIFO) + * Copyright (C) 1999, 2000 Niibe Yutaka + * Copyright (C) 2000 Sugioka Toshinobu + * Modified to support multiple serial ports. Stuart Menefy (May 2000). + * Modified to support SH7760 SCIF. Paul Mundt (Oct 2003). + * Modified to support H8/300 Series. Yoshinori Sato (Feb 2004). + * + * TTY code is based on sx.c (Specialix SX driver) by: + * + * (C) 1998 R.E.Wolff@BitWizard.nl + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(CONFIG_SERIAL_CONSOLE) || defined(CONFIG_SH_KGDB_CONSOLE) +#include +#endif +#ifdef CONFIG_CPU_FREQ +#include +#include +#endif + +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_SH_STANDARD_BIOS +#include +#endif + +#include "sh-sci.h" + +#ifdef CONFIG_SH_KGDB +#include + +int kgdb_sci_setup(void); +static int kgdb_get_char(struct sci_port *port); +static void kgdb_put_char(struct sci_port *port, char c); +static void kgdb_handle_error(struct sci_port *port); +static struct sci_port *kgdb_sci_port; + +#ifdef CONFIG_SH_KGDB_CONSOLE +static struct console kgdbcons; +void __init kgdb_console_init(void); +#endif /* CONFIG_SH_KGDB_CONSOLE */ + +#endif /* CONFIG_SH_KGDB */ + +#ifdef CONFIG_SERIAL_CONSOLE +static struct console sercons; +static struct sci_port* sercons_port=0; +static int sercons_baud; +#ifdef CONFIG_MAGIC_SYSRQ +#include +static int break_pressed; +#endif /* CONFIG_MAGIC_SYSRQ */ +#endif /* CONFIG_SERIAL_CONSOLE */ + +/* Function prototypes */ +static void sci_init_pins_sci(struct sci_port* port, unsigned int cflag); +#ifndef SCI_ONLY +static void sci_init_pins_scif(struct sci_port* port, unsigned int cflag); +#if defined(CONFIG_CPU_SH3) +static void sci_init_pins_irda(struct sci_port* port, unsigned int cflag); +#endif +#endif +static void sci_disable_tx_interrupts(void *ptr); +static void sci_enable_tx_interrupts(void *ptr); +static void sci_disable_rx_interrupts(void *ptr); +static void sci_enable_rx_interrupts(void *ptr); +static int sci_get_CD(void *ptr); +static void sci_shutdown_port(void *ptr); +static int sci_set_real_termios(void *ptr); +static void sci_hungup(void *ptr); +static void sci_close(void *ptr); +static int sci_chars_in_buffer(void *ptr); +static int sci_request_irq(struct sci_port *port); +static void sci_free_irq(struct sci_port *port); +static int sci_init_drivers(void); + +static struct tty_driver *sci_driver; + +static struct sci_port sci_ports[SCI_NPORTS] = SCI_INIT; + +static int sci_debug = 0; + +#ifdef MODULE +MODULE_PARM(sci_debug, "i"); +#endif + +#define dprintk(x...) do { if (sci_debug) printk(x); } while(0) + +#ifdef CONFIG_SERIAL_CONSOLE +static void put_char(struct sci_port *port, char c) +{ + unsigned long flags; + unsigned short status; + + local_irq_save(flags); + + do + status = sci_in(port, SCxSR); + while (!(status & SCxSR_TDxE(port))); + + sci_out(port, SCxTDR, c); + sci_in(port, SCxSR); /* Dummy read */ + sci_out(port, SCxSR, SCxSR_TDxE_CLEAR(port)); + + local_irq_restore(flags); +} +#endif + +#if defined(CONFIG_SH_STANDARD_BIOS) || defined(CONFIG_SH_KGDB) + +static void handle_error(struct sci_port *port) +{ /* Clear error flags */ + sci_out(port, SCxSR, SCxSR_ERROR_CLEAR(port)); +} + +static int get_char(struct sci_port *port) +{ + unsigned long flags; + unsigned short status; + int c; + + local_irq_save(flags); + do { + status = sci_in(port, SCxSR); + if (status & SCxSR_ERRORS(port)) { + handle_error(port); + continue; + } + } while (!(status & SCxSR_RDxF(port))); + c = sci_in(port, SCxRDR); + sci_in(port, SCxSR); /* Dummy read */ + sci_out(port, SCxSR, SCxSR_RDxF_CLEAR(port)); + local_irq_restore(flags); + + return c; +} + +/* Taken from sh-stub.c of GDB 4.18 */ +static const char hexchars[] = "0123456789abcdef"; + +static __inline__ char highhex(int x) +{ + return hexchars[(x >> 4) & 0xf]; +} + +static __inline__ char lowhex(int x) +{ + return hexchars[x & 0xf]; +} + +#endif /* CONFIG_SH_STANDARD_BIOS || CONFIG_SH_KGDB */ + +/* + * Send the packet in buffer. The host gets one chance to read it. + * This routine does not wait for a positive acknowledge. + */ + +#ifdef CONFIG_SERIAL_CONSOLE +static void put_string(struct sci_port *port, const char *buffer, int count) +{ + int i; + const unsigned char *p = buffer; + +#if defined(CONFIG_SH_STANDARD_BIOS) || defined(CONFIG_SH_KGDB) + int checksum; + int usegdb=0; + +#ifdef CONFIG_SH_STANDARD_BIOS + /* This call only does a trap the first time it is + * called, and so is safe to do here unconditionally + */ + usegdb |= sh_bios_in_gdb_mode(); +#endif +#ifdef CONFIG_SH_KGDB + usegdb |= (kgdb_in_gdb_mode && (port == kgdb_sci_port)); +#endif + + if (usegdb) { + /* $#. */ + do { + unsigned char c; + put_char(port, '$'); + put_char(port, 'O'); /* 'O'utput to console */ + checksum = 'O'; + + for (i=0; ibase - SMR0) >> 3; + /* set DDR regs */ + H8300_GPIO_DDR(h8300_sci_pins[ch].port,h8300_sci_pins[ch].rx,H8300_GPIO_INPUT); + H8300_GPIO_DDR(h8300_sci_pins[ch].port,h8300_sci_pins[ch].tx,H8300_GPIO_OUTPUT); + /* tx mark output*/ + H8300_SCI_DR(ch) |= h8300_sci_pins[ch].tx; +} + +#if defined(__H8300S__) +enum {sci_disable,sci_enable}; + +static void h8300_sci_enable(struct sci_port* port, unsigned int ctrl) +{ + volatile unsigned char *mstpcrl=(volatile unsigned char *)MSTPCRL; + int ch = (port->base - SMR0) >> 3; + unsigned char mask = 1 << (ch+1); + if (ctrl == sci_disable) + *mstpcrl |= mask; + else + *mstpcrl &= ~mask; +} +#endif +#endif + +static void sci_setsignals(struct sci_port *port, int dtr, int rts) +{ + /* This routine is used for seting signals of: DTR, DCD, CTS/RTS */ + /* We use SCIF's hardware for CTS/RTS, so don't need any for that. */ + /* If you have signals for DTR and DCD, please implement here. */ + ; +} + +static int sci_getsignals(struct sci_port *port) +{ + /* This routine is used for geting signals of: DTR, DCD, DSR, RI, + and CTS/RTS */ + + return TIOCM_DTR|TIOCM_RTS|TIOCM_DSR; +/* + (((o_stat & OP_DTR)?TIOCM_DTR:0) | + ((o_stat & OP_RTS)?TIOCM_RTS:0) | + ((i_stat & IP_CTS)?TIOCM_CTS:0) | + ((i_stat & IP_DCD)?TIOCM_CAR:0) | + ((i_stat & IP_DSR)?TIOCM_DSR:0) | + ((i_stat & IP_RI) ?TIOCM_RNG:0) +*/ +} + +static void sci_set_baud(struct sci_port *port, int baud) +{ + int t; + + switch (baud) { + case 0: + t = -1; + break; + case 2400: + t = BPS_2400; + break; + case 4800: + t = BPS_4800; + break; + case 9600: + t = BPS_9600; + break; + case 19200: + t = BPS_19200; + break; + case 38400: + t = BPS_38400; + break; + case 57600: + t = BPS_57600; + break; + default: + printk(KERN_INFO "sci: unsupported baud rate: %d, using 115200 instead.\n", baud); + case 115200: + t = BPS_115200; + break; + } + + if (t > 0) { + sci_setsignals (port, 1, -1); + if(t >= 256) { + sci_out(port, SCSMR, (sci_in(port, SCSMR) & ~3) | 1); + t >>= 2; + } else { + sci_out(port, SCSMR, sci_in(port, SCSMR) & ~3); + } + sci_out(port, SCBRR, t); + udelay((1000000+(baud-1)) / baud); /* Wait one bit interval */ + } else { + sci_setsignals (port, 0, -1); + } +} + +static void sci_set_termios_cflag(struct sci_port *port, int cflag, int baud) +{ + unsigned int status; + unsigned int smr_val; + + do + status = sci_in(port, SCxSR); + while (!(status & SCxSR_TEND(port))); + + sci_out(port, SCSCR, 0x00); /* TE=0, RE=0, CKE1=0 */ + +#if !defined(SCI_ONLY) + if (port->type == PORT_SCIF) { + sci_out(port, SCFCR, SCFCR_RFRST | SCFCR_TFRST); + } +#endif + + smr_val = sci_in(port, SCSMR) & 3; + if ((cflag & CSIZE) == CS7) + smr_val |= 0x40; + if (cflag & PARENB) + smr_val |= 0x20; + if (cflag & PARODD) + smr_val |= 0x30; + if (cflag & CSTOPB) + smr_val |= 0x08; + sci_out(port, SCSMR, smr_val); + sci_set_baud(port, baud); + + port->init_pins(port, cflag); + sci_out(port, SCSCR, SCSCR_INIT(port)); +} + +static int sci_set_real_termios(void *ptr) +{ + struct sci_port *port = ptr; + + if (port->old_cflag != port->gs.tty->termios->c_cflag) { + port->old_cflag = port->gs.tty->termios->c_cflag; + sci_set_termios_cflag(port, port->old_cflag, port->gs.baud); + sci_enable_rx_interrupts(port); + } + + return 0; +} + +/* ********************************************************************** * + * the interrupt related routines * + * ********************************************************************** */ + +/* + * This routine is used by the interrupt handler to schedule + * processing in the software interrupt portion of the driver. + */ +static inline void sci_sched_event(struct sci_port *port, int event) +{ + port->event |= 1 << event; + schedule_work(&port->tqueue); +} + +static void sci_transmit_chars(struct sci_port *port) +{ + int count, i; + int txroom; + unsigned long flags; + unsigned short status; + unsigned short ctrl; + unsigned char c; + + status = sci_in(port, SCxSR); + if (!(status & SCxSR_TDxE(port))) { + local_irq_save(flags); + ctrl = sci_in(port, SCSCR); + if (port->gs.xmit_cnt == 0) { + ctrl &= ~SCI_CTRL_FLAGS_TIE; + port->gs.flags &= ~GS_TX_INTEN; + } else + ctrl |= SCI_CTRL_FLAGS_TIE; + sci_out(port, SCSCR, ctrl); + local_irq_restore(flags); + return; + } + + while (1) { + count = port->gs.xmit_cnt; +#if !defined(SCI_ONLY) + if (port->type == PORT_SCIF) { + txroom = 16 - (sci_in(port, SCFDR)>>8); + } else { + txroom = (sci_in(port, SCxSR) & SCI_TDRE)?1:0; + } +#else + txroom = (sci_in(port, SCxSR) & SCI_TDRE)?1:0; +#endif + if (count > txroom) + count = txroom; + + /* Don't copy past the end of the source buffer */ + if (count > SERIAL_XMIT_SIZE - port->gs.xmit_tail) + count = SERIAL_XMIT_SIZE - port->gs.xmit_tail; + + /* If for one reason or another, we can't copy more data, we're done! */ + if (count == 0) + break; + + for (i=0; igs.xmit_buf[port->gs.xmit_tail + i]; + sci_out(port, SCxTDR, c); + } + sci_out(port, SCxSR, SCxSR_TDxE_CLEAR(port)); + + port->icount.tx += count; + + /* Update the kernel buffer end */ + port->gs.xmit_tail = (port->gs.xmit_tail + count) & (SERIAL_XMIT_SIZE-1); + + /* This one last. (this is essential) + It would allow others to start putting more data into the buffer! */ + port->gs.xmit_cnt -= count; + } + + if (port->gs.xmit_cnt <= port->gs.wakeup_chars) + sci_sched_event(port, SCI_EVENT_WRITE_WAKEUP); + + local_irq_save(flags); + ctrl = sci_in(port, SCSCR); + if (port->gs.xmit_cnt == 0) { + ctrl &= ~SCI_CTRL_FLAGS_TIE; + port->gs.flags &= ~GS_TX_INTEN; + } else { +#if !defined(SCI_ONLY) + if (port->type == PORT_SCIF) { + sci_in(port, SCxSR); /* Dummy read */ + sci_out(port, SCxSR, SCxSR_TDxE_CLEAR(port)); + } +#endif + ctrl |= SCI_CTRL_FLAGS_TIE; + } + sci_out(port, SCSCR, ctrl); + local_irq_restore(flags); +} + +/* On SH3, SCIF may read end-of-break as a space->mark char */ +#define STEPFN(c) ({int __c=(c); (((__c-1)|(__c)) == -1); }) + +static inline void sci_receive_chars(struct sci_port *port, + struct pt_regs *regs) +{ + int i, count; + struct tty_struct *tty; + int copied=0; + unsigned short status; + + status = sci_in(port, SCxSR); + if (!(status & SCxSR_RDxF(port))) + return; + + tty = port->gs.tty; + while (1) { +#if !defined(SCI_ONLY) + if (port->type == PORT_SCIF) { + count = sci_in(port, SCFDR)&0x001f; + } else { + count = (sci_in(port, SCxSR)&SCxSR_RDxF(port))?1:0; + } +#else + count = (sci_in(port, SCxSR)&SCxSR_RDxF(port))?1:0; +#endif + + /* Don't copy more bytes than there is room for in the buffer */ + if (tty->flip.count + count > TTY_FLIPBUF_SIZE) + count = TTY_FLIPBUF_SIZE - tty->flip.count; + + /* If for any reason we can't copy more data, we're done! */ + if (count == 0) + break; + + if (port->type == PORT_SCI) { + tty->flip.char_buf_ptr[0] = sci_in(port, SCxRDR); + tty->flip.flag_buf_ptr[0] = TTY_NORMAL; + } else { + for (i=0; ibreak_flag) { + if ((c == 0) && + (status & SCxSR_FER(port))) { + count--; i--; + continue; + } + /* Nonzero => end-of-break */ + dprintk("scif: debounce<%02x>\n", c); + port->break_flag = 0; + if (STEPFN(c)) { + count--; i--; + continue; + } + } +#endif /* __SH3__ */ +#if defined(CONFIG_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) + if (break_pressed && (port == sercons_port)) { + if (c != 0 && + time_before(jiffies, + break_pressed + HZ*5)) { + handle_sysrq(c, regs, NULL); + break_pressed = 0; + count--; i--; + continue; + } else if (c != 0) { + break_pressed = 0; + } + } +#endif /* CONFIG_SERIAL_CONSOLE && CONFIG_MAGIC_SYSRQ */ + + /* Store data and status */ + tty->flip.char_buf_ptr[i] = c; + if (status&SCxSR_FER(port)) { + tty->flip.flag_buf_ptr[i] = TTY_FRAME; + dprintk("sci: frame error\n"); + } else if (status&SCxSR_PER(port)) { + tty->flip.flag_buf_ptr[i] = TTY_PARITY; + dprintk("sci: parity error\n"); + } else { + tty->flip.flag_buf_ptr[i] = TTY_NORMAL; + } + } + } + + sci_in(port, SCxSR); /* dummy read */ + sci_out(port, SCxSR, SCxSR_RDxF_CLEAR(port)); + + /* Update the kernel buffer end */ + tty->flip.count += count; + tty->flip.char_buf_ptr += count; + tty->flip.flag_buf_ptr += count; + + copied += count; + port->icount.rx += count; + } + + if (copied) + /* Tell the rest of the system the news. New characters! */ + tty_flip_buffer_push(tty); + else { + sci_in(port, SCxSR); /* dummy read */ + sci_out(port, SCxSR, SCxSR_RDxF_CLEAR(port)); + } +} + +static inline int sci_handle_errors(struct sci_port *port) +{ + int copied = 0; + unsigned short status = sci_in(port, SCxSR); + struct tty_struct *tty = port->gs.tty; + + if (status&SCxSR_ORER(port) && tty->flip.countflip.flag_buf_ptr++ = TTY_OVERRUN; + dprintk("sci: overrun error\n"); + } + + if (status&SCxSR_FER(port) && tty->flip.countflip.flag_buf_ptr++ = TTY_BREAK; + dprintk("sci: BREAK detected\n"); + } + else { + /* frame error */ + copied++; + *tty->flip.flag_buf_ptr++ = TTY_FRAME; + dprintk("sci: frame error\n"); + } + } + + if (status&SCxSR_PER(port) && tty->flip.countflip.flag_buf_ptr++ = TTY_PARITY; + dprintk("sci: parity error\n"); + } + + if (copied) { + tty->flip.count += copied; + tty_flip_buffer_push(tty); + } + + return copied; +} + +static inline int sci_handle_breaks(struct sci_port *port) +{ + int copied = 0; + unsigned short status = sci_in(port, SCxSR); + struct tty_struct *tty = port->gs.tty; + + if (status&SCxSR_BRK(port) && tty->flip.countbreak_flag) + goto break_continue; + port->break_flag = 1; +#endif +#if defined(CONFIG_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) + if (port == sercons_port) { + if (break_pressed == 0) { + break_pressed = jiffies; + dprintk("sci: implied sysrq\n"); + goto break_continue; + } + /* Double break implies a real break */ + break_pressed = 0; + } +#endif + /* Notify of BREAK */ + copied++; + *tty->flip.flag_buf_ptr++ = TTY_BREAK; + dprintk("sci: BREAK detected\n"); + } + break_continue: + +#if defined(CONFIG_CPU_SUBTYPE_SH7750) || defined(CONFIG_CPU_SUBTYPE_ST40STB1) || \ + defined(CONFIG_CPU_SUBTYPE_SH7760) + /* XXX: Handle SCIF overrun error */ + if (port->type == PORT_SCIF && (sci_in(port, SCLSR) & SCIF_ORER) != 0) { + sci_out(port, SCLSR, 0); + if(tty->flip.countflip.flag_buf_ptr++ = TTY_OVERRUN; + dprintk("sci: overrun error\n"); + } + } +#endif + + if (copied) { + tty->flip.count += copied; + tty_flip_buffer_push(tty); + } + + return copied; +} + +static irqreturn_t sci_rx_interrupt(int irq, void *ptr, struct pt_regs *regs) +{ + struct sci_port *port = ptr; + + if (port->gs.flags & GS_ACTIVE) + if (!(port->gs.flags & SCI_RX_THROTTLE)) { + sci_receive_chars(port, regs); + return IRQ_HANDLED; + + } + sci_disable_rx_interrupts(port); + + return IRQ_HANDLED; +} + +static irqreturn_t sci_tx_interrupt(int irq, void *ptr, struct pt_regs *regs) +{ + struct sci_port *port = ptr; + + if (port->gs.flags & GS_ACTIVE) + sci_transmit_chars(port); + else { + sci_disable_tx_interrupts(port); + } + + return IRQ_HANDLED; +} + +static irqreturn_t sci_er_interrupt(int irq, void *ptr, struct pt_regs *regs) +{ + struct sci_port *port = ptr; + + /* Handle errors */ + if (port->type == PORT_SCI) { + if(sci_handle_errors(port)) { + /* discard character in rx buffer */ + sci_in(port, SCxSR); + sci_out(port, SCxSR, SCxSR_RDxF_CLEAR(port)); + } + } + else + sci_rx_interrupt(irq, ptr, regs); + + sci_out(port, SCxSR, SCxSR_ERROR_CLEAR(port)); + + /* Kick the transmission */ + sci_tx_interrupt(irq, ptr, regs); + + return IRQ_HANDLED; +} + +#if !defined(SCI_ONLY) +static irqreturn_t sci_br_interrupt(int irq, void *ptr, struct pt_regs *regs) +{ + struct sci_port *port = ptr; + + /* Handle BREAKs */ + sci_handle_breaks(port); + sci_out(port, SCxSR, SCxSR_BREAK_CLEAR(port)); + + return IRQ_HANDLED; +} +#endif + +static void do_softint(void *private_) +{ + struct sci_port *port = (struct sci_port *) private_; + struct tty_struct *tty; + + tty = port->gs.tty; + if (!tty) + return; + + if (test_and_clear_bit(SCI_EVENT_WRITE_WAKEUP, &port->event)) { + if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && + tty->ldisc.write_wakeup) + (tty->ldisc.write_wakeup)(tty); + wake_up_interruptible(&tty->write_wait); + } +} + +/* ********************************************************************** * + * Here are the routines that actually * + * interface with the generic_serial driver * + * ********************************************************************** */ + +static void sci_disable_tx_interrupts(void *ptr) +{ + struct sci_port *port = ptr; + unsigned long flags; + unsigned short ctrl; + + /* Clear TIE (Transmit Interrupt Enable) bit in SCSCR */ + local_irq_save(flags); + ctrl = sci_in(port, SCSCR); + ctrl &= ~SCI_CTRL_FLAGS_TIE; + sci_out(port, SCSCR, ctrl); + local_irq_restore(flags); +} + +static void sci_enable_tx_interrupts(void *ptr) +{ + struct sci_port *port = ptr; + + disable_irq(port->irqs[SCIx_TXI_IRQ]); + sci_transmit_chars(port); + enable_irq(port->irqs[SCIx_TXI_IRQ]); +} + +static void sci_disable_rx_interrupts(void * ptr) +{ + struct sci_port *port = ptr; + unsigned long flags; + unsigned short ctrl; + + /* Clear RIE (Receive Interrupt Enable) bit in SCSCR */ + local_irq_save(flags); + ctrl = sci_in(port, SCSCR); + ctrl &= ~SCI_CTRL_FLAGS_RIE; + sci_out(port, SCSCR, ctrl); + local_irq_restore(flags); +} + +static void sci_enable_rx_interrupts(void * ptr) +{ + struct sci_port *port = ptr; + unsigned long flags; + unsigned short ctrl; + + /* Set RIE (Receive Interrupt Enable) bit in SCSCR */ + local_irq_save(flags); + ctrl = sci_in(port, SCSCR); + ctrl |= SCI_CTRL_FLAGS_RIE; + sci_out(port, SCSCR, ctrl); + local_irq_restore(flags); +} + +static int sci_get_CD(void * ptr) +{ + /* If you have signal for CD (Carrier Detect), please change here. */ + return 1; +} + +static int sci_chars_in_buffer(void * ptr) +{ + struct sci_port *port = ptr; + +#if !defined(SCI_ONLY) + if (port->type == PORT_SCIF) { + return (sci_in(port, SCFDR) >> 8) + ((sci_in(port, SCxSR) & SCxSR_TEND(port))? 0: 1); + } else { + return (sci_in(port, SCxSR) & SCxSR_TEND(port))? 0: 1; + } +#else + return (sci_in(port, SCxSR) & SCxSR_TEND(port))? 0: 1; +#endif +} + +static void sci_shutdown_port(void * ptr) +{ + struct sci_port *port = ptr; + + port->gs.flags &= ~ GS_ACTIVE; + if (port->gs.tty && port->gs.tty->termios->c_cflag & HUPCL) + sci_setsignals(port, 0, 0); + sci_free_irq(port); +#if defined(__H8300S__) + h8300_sci_enable(port,sci_disable); +#endif +} + +/* ********************************************************************** * + * Here are the routines that actually * + * interface with the rest of the system * + * ********************************************************************** */ + +static int sci_open(struct tty_struct * tty, struct file * filp) +{ + struct sci_port *port; + int retval, line; + + line = tty->index; + + if ((line < 0) || (line >= SCI_NPORTS)) + return -ENODEV; + + port = &sci_ports[line]; + + tty->driver_data = port; + port->gs.tty = tty; + port->gs.count++; + + port->event = 0; + INIT_WORK(&port->tqueue, do_softint, port); + +#if defined(__H8300S__) + h8300_sci_enable(port,sci_enable); +#endif + + /* + * Start up serial port + */ + retval = gs_init_port(&port->gs); + if (retval) { + goto failed_1; + } + + port->gs.flags |= GS_ACTIVE; + sci_setsignals(port, 1,1); + + if (port->gs.count == 1) { + retval = sci_request_irq(port); + } + + retval = gs_block_til_ready(port, filp); + + if (retval) { + goto failed_3; + } + +#ifdef CONFIG_SERIAL_CONSOLE + if (sercons.cflag && sercons.index == line) { + tty->termios->c_cflag = sercons.cflag; + port->gs.baud = sercons_baud; + sercons.cflag = 0; + sci_set_real_termios(port); + } +#endif + +#ifdef CONFIG_SH_KGDB_CONSOLE + if (kgdbcons.cflag && kgdbcons.index == line) { + tty->termios->c_cflag = kgdbcons.cflag; + port->gs.baud = kgdb_baud; + sercons.cflag = 0; + sci_set_real_termios(port); + } +#endif + + sci_enable_rx_interrupts(port); + + return 0; + +failed_3: + sci_free_irq(port); +failed_1: + port->gs.count--; + return retval; +} + +static void sci_hungup(void *ptr) +{ + return; +} + +static void sci_close(void *ptr) +{ + return; +} + +static int sci_tiocmget(struct tty_struct *tty, struct file *file) +{ + struct sci_port *port = tty->driver_data; + return sci_getsignals(port); +} + +static int sci_tiocmset(struct tty_struct *tty, struct file *file, + unsigned int set, unsigned int clear) +{ + struct sci_port *port = tty->driver_data; + int rts = -1, dtr = -1; + + if (set & TIOCM_RTS) + rts = 1; + if (set & TIOCM_DTR) + dtr = 1; + if (clear & TIOCM_RTS) + rts = 0; + if (clear & TIOCM_DTR) + dtr = 0; + + sci_setsignals(port, dtr, rts); + return 0; +} + +static int sci_ioctl(struct tty_struct * tty, struct file * filp, + unsigned int cmd, unsigned long arg) +{ + int rc; + struct sci_port *port = tty->driver_data; + int ival; + + rc = 0; + switch (cmd) { + case TIOCGSOFTCAR: + rc = put_user(((tty->termios->c_cflag & CLOCAL) ? 1 : 0), + (unsigned int __user *) arg); + break; + case TIOCSSOFTCAR: + if ((rc = get_user(ival, (unsigned int __user *) arg)) == 0) + tty->termios->c_cflag = + (tty->termios->c_cflag & ~CLOCAL) | + (ival ? CLOCAL : 0); + break; + case TIOCGSERIAL: + if ((rc = verify_area(VERIFY_WRITE, (void __user *) arg, + sizeof(struct serial_struct))) == 0) + rc = gs_getserial(&port->gs, (struct serial_struct *) arg); + break; + case TIOCSSERIAL: + if ((rc = verify_area(VERIFY_READ, (void __user *) arg, + sizeof(struct serial_struct))) == 0) + rc = gs_setserial(&port->gs, + (struct serial_struct *) arg); + break; + default: + rc = -ENOIOCTLCMD; + break; + } + + return rc; +} + +static void sci_throttle(struct tty_struct * tty) +{ + struct sci_port *port = (struct sci_port *)tty->driver_data; + + /* If the port is using any type of input flow + * control then throttle the port. + */ + if ((tty->termios->c_cflag & CRTSCTS) || (I_IXOFF(tty)) ) + port->gs.flags |= SCI_RX_THROTTLE; +} + +static void sci_unthrottle(struct tty_struct * tty) +{ + struct sci_port *port = (struct sci_port *)tty->driver_data; + + /* Always unthrottle even if flow control is not enabled on + * this port in case we disabled flow control while the port + * was throttled + */ + port->gs.flags &= ~SCI_RX_THROTTLE; + sci_enable_rx_interrupts(port); + return; +} + +#ifdef CONFIG_PROC_FS +static int sci_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int i; + struct sci_port *port; + int len = 0; + + len += sprintf(page, "sciinfo:0.1\n"); + for (i = 0; i < SCI_NPORTS && len < 4000; i++) { + port = &sci_ports[i]; + len += sprintf(page+len, "%d: uart:%s address: %08x", i, + (port->type == PORT_SCI) ? "SCI" : "SCIF", + port->base); + len += sprintf(page+len, " baud:%d", port->gs.baud); + len += sprintf(page+len, " tx:%d rx:%d", + port->icount.tx, port->icount.rx); + + if (port->icount.frame) + len += sprintf(page+len, " fe:%d", port->icount.frame); + if (port->icount.parity) + len += sprintf(page+len, " pe:%d", port->icount.parity); + if (port->icount.brk) + len += sprintf(page+len, " brk:%d", port->icount.brk); + if (port->icount.overrun) + len += sprintf(page+len, " oe:%d", port->icount.overrun); + len += sprintf(page+len, "\n"); + } + return len; +} +#endif + +#ifdef CONFIG_CPU_FREQ +/* + * Here we define a transistion notifier so that we can update all of our + * ports' baud rate when the peripheral clock changes. + */ + +static int sci_notifier(struct notifier_block *self, unsigned long phase, void *p) +{ + struct cpufreq_freqs *freqs = p; + int i; + + if (phase == CPUFREQ_POSTCHANGE) { + for (i = 0; i < SCI_NPORTS; i++) { + /* + * This will force a baud rate change in hardware. + */ + if (sci_ports[i].gs.tty != NULL) { + sci_set_baud(&sci_ports[i], sci_ports[i].gs.baud); + } + } + printk("%s: got a postchange notification for cpu %d (old %d, new %d)\n", + __FUNCTION__, freqs->cpu, freqs->old, freqs->new); + } + + return NOTIFY_OK; +} + +static struct notifier_block sci_nb = { &sci_notifier, NULL, 0 }; +#endif /* CONFIG_CPU_FREQ */ + +static struct tty_operations sci_ops = { + .open = sci_open, + .close = gs_close, + .write = gs_write, + .put_char = gs_put_char, + .flush_chars = gs_flush_chars, + .write_room = gs_write_room, + .chars_in_buffer = gs_chars_in_buffer, + .flush_buffer = gs_flush_buffer, + .ioctl = sci_ioctl, + .throttle = sci_throttle, + .unthrottle = sci_unthrottle, + .set_termios = gs_set_termios, + .stop = gs_stop, + .start = gs_start, + .hangup = gs_hangup, +#ifdef CONFIG_PROC_FS + .read_proc = sci_read_proc, +#endif + .tiocmget = sci_tiocmget, + .tiocmset = sci_tiocmset, +}; + +/* ********************************************************************** * + * Here are the initialization routines. * + * ********************************************************************** */ + +static int sci_init_drivers(void) +{ + int error; + struct sci_port *port; + sci_driver = alloc_tty_driver(SCI_NPORTS); + if (!sci_driver) + return -ENOMEM; + + sci_driver->owner = THIS_MODULE; + sci_driver->driver_name = "sci"; + sci_driver->name = "ttySC"; + sci_driver->devfs_name = "ttsc/"; + sci_driver->major = SCI_MAJOR; + sci_driver->minor_start = SCI_MINOR_START; + sci_driver->type = TTY_DRIVER_TYPE_SERIAL; + sci_driver->subtype = SERIAL_TYPE_NORMAL; + sci_driver->init_termios = tty_std_termios; + sci_driver->init_termios.c_cflag = + B9600 | CS8 | CREAD | HUPCL | CLOCAL | CRTSCTS; + sci_driver->flags = TTY_DRIVER_REAL_RAW; + tty_set_operations(sci_driver, &sci_ops); + if ((error = tty_register_driver(sci_driver))) { + printk(KERN_ERR "sci: Couldn't register SCI driver, error = %d\n", + error); + put_tty_driver(sci_driver); + return 1; + } + + for (port = &sci_ports[0]; port < &sci_ports[SCI_NPORTS]; port++) { + port->gs.magic = SCI_MAGIC; + port->gs.close_delay = HZ/2; + port->gs.closing_wait = 30 * HZ; + port->gs.rd = &sci_real_driver; + init_waitqueue_head(&port->gs.open_wait); + init_waitqueue_head(&port->gs.close_wait); + port->old_cflag = 0; + port->icount.cts = port->icount.dsr = + port->icount.rng = port->icount.dcd = 0; + port->icount.rx = port->icount.tx = 0; + port->icount.frame = port->icount.parity = 0; + port->icount.overrun = port->icount.brk = 0; + } + +#ifdef CONFIG_CPU_FREQ + /* Setup transition notifier */ + if (cpufreq_register_notifier(&sci_nb, CPUFREQ_TRANSITION_NOTIFIER) < 0) { + printk(KERN_ERR "sci: Unable to register CPU frequency notifier\n"); + return 1; + } + printk("sci: CPU frequency notifier registered\n"); +#endif + return 0; +} + +static int sci_request_irq(struct sci_port *port) +{ + int i; +#if !defined(SCI_ONLY) + irqreturn_t (*handlers[4])(int irq, void *p, struct pt_regs *regs) = { + sci_er_interrupt, sci_rx_interrupt, sci_tx_interrupt, + sci_br_interrupt, + }; +#else + void (*handlers[3])(int irq, void *ptr, struct pt_regs *regs) = { + sci_er_interrupt, sci_rx_interrupt, sci_tx_interrupt, + }; +#endif + for (i=0; i<(sizeof(handlers)/sizeof(handlers[0])); i++) { + if (!port->irqs[i]) continue; + if (request_irq(port->irqs[i], handlers[i], SA_INTERRUPT, + "sci", port)) { + printk(KERN_ERR "sci: Cannot allocate irq.\n"); + return -ENODEV; + } + } + return 0; +} + +static void sci_free_irq(struct sci_port *port) +{ + int i; + + for (i=0; i<4; i++) { + if (!port->irqs[i]) continue; + free_irq(port->irqs[i], port); + } +} + +static char banner[] __initdata = + KERN_INFO "SuperH SCI(F) driver initialized\n"; + +int __init sci_init(void) +{ + struct sci_port *port; + int j; + + printk("%s", banner); + + for (j=0; jbase, + (port->type == PORT_SCI) ? "SCI" : "SCIF"); + } + + sci_init_drivers(); + +#ifdef CONFIG_SH_STANDARD_BIOS + sh_bios_gdb_detach(); +#endif + return 0; /* Return -EIO when not detected */ +} + +module_init(sci_init); + +#ifdef MODULE +#undef func_enter +#undef func_exit + +void cleanup_module(void) +{ + tty_unregister_driver(sci_driver); + put_tty_driver(sci_driver); +} + +#include "generic_serial.c" +#endif + +#ifdef CONFIG_SERIAL_CONSOLE +/* + * Print a string to the serial port trying not to disturb + * any possible real use of the port... + */ +static void serial_console_write(struct console *co, const char *s, + unsigned count) +{ + put_string(sercons_port, s, count); +} + +static struct tty_driver *serial_console_device(struct console *c, int *index) +{ + *index = c->index; + return sci_driver; +} + +/* + * Setup initial baud/bits/parity. We do two things here: + * - construct a cflag setting for the first rs_open() + * - initialize the serial port + * Return non-zero if we didn't find a serial port. + */ +static int __init serial_console_setup(struct console *co, char *options) +{ + int baud = 9600; + int bits = 8; + int parity = 'n'; + int cflag = CREAD | HUPCL | CLOCAL; + char *s; + + sercons_port = &sci_ports[co->index]; + + if (options) { + baud = simple_strtoul(options, NULL, 10); + s = options; + while(*s >= '0' && *s <= '9') + s++; + if (*s) parity = *s++; + if (*s) bits = *s - '0'; + } + + /* + * Now construct a cflag setting. + */ + switch (baud) { + case 19200: + cflag |= B19200; + break; + case 38400: + cflag |= B38400; + break; + case 57600: + cflag |= B57600; + break; + case 115200: + cflag |= B115200; + break; + case 9600: + default: + cflag |= B9600; + baud = 9600; + break; + } + switch (bits) { + case 7: + cflag |= CS7; + break; + default: + case 8: + cflag |= CS8; + break; + } + switch (parity) { + case 'o': case 'O': + cflag |= PARODD; + break; + case 'e': case 'E': + cflag |= PARENB; + break; + } + + co->cflag = cflag; + sercons_baud = baud; + +#if defined(__H8300S__) + h8300_sci_enable(sercons_port,sci_enable); +#endif + sci_set_termios_cflag(sercons_port, cflag, baud); + sercons_port->old_cflag = cflag; + + return 0; +} + +static struct console sercons = { + .name = "ttySC", + .write = serial_console_write, + .device = serial_console_device, + .setup = serial_console_setup, + .flags = CON_PRINTBUFFER, + .index = -1, +}; + +/* + * Register console. + */ + +#ifdef CONFIG_SH_EARLY_PRINTK +extern void sh_console_unregister (void); +#endif + +static int __init sci_console_init(void) +{ + register_console(&sercons); +#ifdef CONFIG_SH_EARLY_PRINTK + /* Now that the real console is available, unregister the one we + * used while first booting. + */ + sh_console_unregister(); +#endif + return 0; +} +console_initcall(sci_console_init); + +#endif /* CONFIG_SERIAL_CONSOLE */ + + +#ifdef CONFIG_SH_KGDB + +/* Initialise the KGDB serial port */ +int kgdb_sci_setup(void) +{ + int cflag = CREAD | HUPCL | CLOCAL; + + if ((kgdb_portnum < 0) || (kgdb_portnum >= SCI_NPORTS)) + return -1; + + kgdb_sci_port = &sci_ports[kgdb_portnum]; + + switch (kgdb_baud) { + case 115200: + cflag |= B115200; + break; + case 57600: + cflag |= B57600; + break; + case 38400: + cflag |= B38400; + break; + case 19200: + cflag |= B19200; + break; + case 9600: + default: + cflag |= B9600; + kgdb_baud = 9600; + break; + } + + switch (kgdb_bits) { + case '7': + cflag |= CS7; + break; + default: + case '8': + cflag |= CS8; + break; + } + + switch (kgdb_parity) { + case 'O': + cflag |= PARODD; + break; + case 'E': + cflag |= PARENB; + break; + } + + kgdb_cflag = cflag; + sci_set_termios_cflag(kgdb_sci_port, kgdb_cflag, kgdb_baud); + + /* Set up the interrupt for BREAK from GDB */ + /* Commented out for now since it may not be possible yet... + request_irq(kgdb_sci_port->irqs[0], kgdb_break_interrupt, + SA_INTERRUPT, "sci", kgdb_sci_port); + sci_enable_rx_interrupts(kgdb_sci_port); + */ + + /* Setup complete: initialize function pointers */ + kgdb_getchar = kgdb_sci_getchar; + kgdb_putchar = kgdb_sci_putchar; + + return 0; +} + +#ifdef CONFIG_SH_KGDB_CONSOLE + +/* Create a console device */ +static kdev_t kgdb_console_device(struct console *c) +{ + return MKDEV(SCI_MAJOR, SCI_MINOR_START + c->index); +} + +/* Set up the KGDB console */ +static int __init kgdb_console_setup(struct console *co, char *options) +{ + /* NB we ignore 'options' because we've already done the setup */ + co->cflag = kgdb_cflag; + + return 0; +} + +/* Register the KGDB console so we get messages (d'oh!) */ +void __init kgdb_console_init(void) +{ + register_console(&kgdbcons); +} + +/* The console structure for KGDB */ +static struct console kgdbcons = { + name:"ttySC", + write:kgdb_console_write, + device:kgdb_console_device, + wait_key:serial_console_wait_key, + setup:kgdb_console_setup, + flags:CON_PRINTBUFFER | CON_ENABLED, + index:-1, +}; + +#endif /* CONFIG_SH_KGDB_CONSOLE */ + +#endif /* CONFIG_SH_KGDB */ diff --git a/drivers/char/sh-sci.h b/drivers/char/sh-sci.h new file mode 100644 index 000000000..5d07cd107 --- /dev/null +++ b/drivers/char/sh-sci.h @@ -0,0 +1,478 @@ +/* $Id: sh-sci.h,v 1.7 2004/02/10 17:04:17 lethal Exp $ + * + * linux/drivers/char/sh-sci.h + * + * SuperH on-chip serial module support. (SCI with no FIFO / with FIFO) + * Copyright (C) 1999, 2000 Niibe Yutaka + * Copyright (C) 2000 Greg Banks + * Modified to support multiple serial ports. Stuart Menefy (May 2000). + * Modified to support SH7760 SCIF. Paul Mundt (Oct 2003). + * Modified to support H8/300 Serise Yoshinori Sato (Feb 2004). + * + */ +#include + +#if defined(__H8300H__) || defined(__H8300S__) +#include +#if defined(CONFIG_H83007) || defined(CONFIG_H83068) +#include +#endif +#if defined(CONFIG_H8S2678) +#include +#endif +#endif + +/* Values for sci_port->type */ +#define PORT_SCI 0 +#define PORT_SCIF 1 +#define PORT_IRDA 1 /* XXX: temporary assignment */ + +/* Offsets into the sci_port->irqs array */ +#define SCIx_ERI_IRQ 0 +#define SCIx_RXI_IRQ 1 +#define SCIx_TXI_IRQ 2 + +/* ERI, RXI, TXI, BRI */ +#define SCI_IRQS { 23, 24, 25, 0 } +#define SH3_SCIF_IRQS { 56, 57, 59, 58 } +#define SH3_IRDA_IRQS { 52, 53, 55, 54 } +#define SH4_SCIF_IRQS { 40, 41, 43, 42 } +#define STB1_SCIF1_IRQS {23, 24, 26, 25 } +#define SH7760_SCIF0_IRQS { 52, 53, 55, 54 } +#define SH7760_SCIF1_IRQS { 72, 73, 75, 74 } +#define SH7760_SCIF2_IRQS { 76, 77, 79, 78 } +#define H8300H_SCI_IRQS0 {52, 53, 54, 0 } +#define H8300H_SCI_IRQS1 {56, 57, 58, 0 } +#define H8300H_SCI_IRQS2 {60, 61, 62, 0 } +#define H8S_SCI_IRQS0 {88, 89, 90, 0 } +#define H8S_SCI_IRQS1 {92, 93, 94, 0 } +#define H8S_SCI_IRQS2 {96, 97, 98, 0 } + +#if defined(CONFIG_CPU_SUBTYPE_SH7708) +# define SCI_NPORTS 1 +# define SCI_INIT { \ + { {}, PORT_SCI, 0xfffffe80, SCI_IRQS, sci_init_pins_sci } \ +} +# define SCSPTR 0xffffff7c /* 8 bit */ +# define SCSCR_INIT(port) 0x30 /* TIE=0,RIE=0,TE=1,RE=1 */ +# define SCI_ONLY +#elif defined(CONFIG_CPU_SUBTYPE_SH7707) || defined(CONFIG_CPU_SUBTYPE_SH7709) +# define SCI_NPORTS 3 +# define SCI_INIT { \ + { {}, PORT_SCI, 0xfffffe80, SCI_IRQS, sci_init_pins_sci }, \ + { {}, PORT_SCIF, 0xA4000150, SH3_SCIF_IRQS, sci_init_pins_scif }, \ + { {}, PORT_SCIF, 0xA4000140, SH3_IRDA_IRQS, sci_init_pins_irda } \ +} +# define SCPCR 0xA4000116 /* 16 bit SCI and SCIF */ +# define SCPDR 0xA4000136 /* 8 bit SCI and SCIF */ +# define SCSCR_INIT(port) 0x30 /* TIE=0,RIE=0,TE=1,RE=1 */ +# define SCI_AND_SCIF +#elif defined(CONFIG_CPU_SUBTYPE_SH7750) || defined(CONFIG_CPU_SUBTYPE_SH7751) +# define SCI_NPORTS 2 +# define SCI_INIT { \ + { {}, PORT_SCI, 0xffe00000, SCI_IRQS, sci_init_pins_sci }, \ + { {}, PORT_SCIF, 0xFFE80000, SH4_SCIF_IRQS, sci_init_pins_scif } \ +} +# define SCSPTR1 0xffe0001c /* 8 bit SCI */ +# define SCSPTR2 0xFFE80020 /* 16 bit SCIF */ +# define SCIF_ORER 0x0001 /* overrun error bit */ +# define SCSCR_INIT(port) (((port)->type == PORT_SCI) ? \ + 0x30 /* TIE=0,RIE=0,TE=1,RE=1 */ : \ + 0x38 /* TIE=0,RIE=0,TE=1,RE=1,REIE=1 */ ) +# define SCI_AND_SCIF +#elif defined(CONFIG_CPU_SUBTYPE_SH7760) +# define SCI_NPORTS 3 +# define SCI_INIT { \ + { {}, PORT_SCIF, 0xfe600000, SH7760_SCIF0_IRQS, sci_init_pins_scif }, \ + { {}, PORT_SCIF, 0xfe610000, SH7760_SCIF1_IRQS, sci_init_pins_scif }, \ + { {}, PORT_SCIF, 0xfe620000, SH7760_SCIF2_IRQS, sci_init_pins_scif } \ +} +# define SCSPTR0 0xfe600024 /* 16 bit SCIF */ +# define SCSPTR1 0xfe610024 /* 16 bit SCIF */ +# define SCSPTR2 0xfe620024 /* 16 bit SCIF */ +# define SCIF_ORDER 0x0001 /* overrun error bit */ +# define SCSCR_INIT(port) 0x38 /* TIE=0,RIE=0,TE=1,RE=1,REIE=1 */ +# define SCIF_ONLY +#elif defined(CONFIG_CPU_SUBTYPE_ST40STB1) +# define SCI_NPORTS 2 +# define SCI_INIT { \ + { {}, PORT_SCIF, 0xffe00000, STB1_SCIF1_IRQS, sci_init_pins_scif }, \ + { {}, PORT_SCIF, 0xffe80000, SH4_SCIF_IRQS, sci_init_pins_scif } \ +} +# define SCSPTR1 0xffe00020 /* 16 bit SCIF */ +# define SCSPTR2 0xffe80020 /* 16 bit SCIF */ +# define SCIF_ORER 0x0001 /* overrun error bit */ +# define SCSCR_INIT(port) 0x38 /* TIE=0,RIE=0,TE=1,RE=1,REIE=1 */ +# define SCIF_ONLY +#elif defined(CONFIG_H83007) || defined(CONFIG_H83068) +# define SCI_NPORTS 3 +# define SCI_INIT { \ + { {}, PORT_SCI, 0x00ffffb0, H8300H_SCI_IRQS0, sci_init_pins_sci }, \ + { {}, PORT_SCI, 0x00ffffb8, H8300H_SCI_IRQS1, sci_init_pins_sci }, \ + { {}, PORT_SCI, 0x00ffffc0, H8300H_SCI_IRQS2, sci_init_pins_sci } \ +} +# define SCSCR_INIT(port) 0x30 /* TIE=0,RIE=0,TE=1,RE=1 */ +# define SCI_ONLY +# define H8300_SCI_DR(ch) *(volatile char *)(P1DR + h8300_sci_pins[ch].port) +#elif defined(CONFIG_H8S2678) +# define SCI_NPORTS 3 +# define SCI_INIT { \ + { {}, PORT_SCI, 0x00ffff78, H8S_SCI_IRQS0, sci_init_pins_sci }, \ + { {}, PORT_SCI, 0x00ffff80, H8S_SCI_IRQS1, sci_init_pins_sci }, \ + { {}, PORT_SCI, 0x00ffff88, H8S_SCI_IRQS2, sci_init_pins_sci } \ +} +# define SCSCR_INIT(port) 0x30 /* TIE=0,RIE=0,TE=1,RE=1 */ +# define SCI_ONLY +# define H8300_SCI_DR(ch) *(volatile char *)(P1DR + h8300_sci_pins[ch].port) +#else +# error CPU subtype not defined +#endif + +/* SCSCR */ +#define SCI_CTRL_FLAGS_TIE 0x80 /* all */ +#define SCI_CTRL_FLAGS_RIE 0x40 /* all */ +#define SCI_CTRL_FLAGS_TE 0x20 /* all */ +#define SCI_CTRL_FLAGS_RE 0x10 /* all */ +/* SCI_CTRL_FLAGS_REIE 0x08 * 7750 SCIF */ +/* SCI_CTRL_FLAGS_MPIE 0x08 * 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */ +/* SCI_CTRL_FLAGS_TEIE 0x04 * 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */ +/* SCI_CTRL_FLAGS_CKE1 0x02 * all */ +/* SCI_CTRL_FLAGS_CKE0 0x01 * 7707 SCI/SCIF, 7708 SCI, 7709 SCI/SCIF, 7750 SCI */ + +/* SCxSR SCI */ +#define SCI_TDRE 0x80 /* 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */ +#define SCI_RDRF 0x40 /* 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */ +#define SCI_ORER 0x20 /* 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */ +#define SCI_FER 0x10 /* 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */ +#define SCI_PER 0x08 /* 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */ +#define SCI_TEND 0x04 /* 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */ +/* SCI_MPB 0x02 * 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */ +/* SCI_MPBT 0x01 * 7707 SCI, 7708 SCI, 7709 SCI, 7750 SCI */ + +#define SCI_ERRORS ( SCI_PER | SCI_FER | SCI_ORER) + +/* SCxSR SCIF */ +#define SCIF_ER 0x0080 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */ +#define SCIF_TEND 0x0040 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */ +#define SCIF_TDFE 0x0020 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */ +#define SCIF_BRK 0x0010 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */ +#define SCIF_FER 0x0008 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */ +#define SCIF_PER 0x0004 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */ +#define SCIF_RDF 0x0002 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */ +#define SCIF_DR 0x0001 /* 7707 SCIF, 7709 SCIF, 7750 SCIF */ + +#define SCIF_ERRORS ( SCIF_PER | SCIF_FER | SCIF_ER | SCIF_BRK) + +#if defined(SCI_ONLY) +# define SCxSR_TEND(port) SCI_TEND +# define SCxSR_ERRORS(port) SCI_ERRORS +# define SCxSR_RDxF(port) SCI_RDRF +# define SCxSR_TDxE(port) SCI_TDRE +# define SCxSR_ORER(port) SCI_ORER +# define SCxSR_FER(port) SCI_FER +# define SCxSR_PER(port) SCI_PER +# define SCxSR_BRK(port) 0x00 +# define SCxSR_RDxF_CLEAR(port) 0xbc +# define SCxSR_ERROR_CLEAR(port) 0xc4 +# define SCxSR_TDxE_CLEAR(port) 0x78 +# define SCxSR_BREAK_CLEAR(port) 0xc4 +#elif defined(SCIF_ONLY) +# define SCxSR_TEND(port) SCIF_TEND +# define SCxSR_ERRORS(port) SCIF_ERRORS +# define SCxSR_RDxF(port) SCIF_RDF +# define SCxSR_TDxE(port) SCIF_TDFE +# define SCxSR_ORER(port) 0x0000 +# define SCxSR_FER(port) SCIF_FER +# define SCxSR_PER(port) SCIF_PER +# define SCxSR_BRK(port) SCIF_BRK +# define SCxSR_RDxF_CLEAR(port) 0x00fc +# define SCxSR_ERROR_CLEAR(port) 0x0073 +# define SCxSR_TDxE_CLEAR(port) 0x00df +# define SCxSR_BREAK_CLEAR(port) 0x00e3 +#else +# define SCxSR_TEND(port) (((port)->type == PORT_SCI) ? SCI_TEND : SCIF_TEND) +# define SCxSR_ERRORS(port) (((port)->type == PORT_SCI) ? SCI_ERRORS : SCIF_ERRORS) +# define SCxSR_RDxF(port) (((port)->type == PORT_SCI) ? SCI_RDRF : SCIF_RDF) +# define SCxSR_TDxE(port) (((port)->type == PORT_SCI) ? SCI_TDRE : SCIF_TDFE) +# define SCxSR_ORER(port) (((port)->type == PORT_SCI) ? SCI_ORER : 0x0000) +# define SCxSR_FER(port) (((port)->type == PORT_SCI) ? SCI_FER : SCIF_FER) +# define SCxSR_PER(port) (((port)->type == PORT_SCI) ? SCI_PER : SCIF_PER) +# define SCxSR_BRK(port) (((port)->type == PORT_SCI) ? 0x00 : SCIF_BRK) +# define SCxSR_RDxF_CLEAR(port) (((port)->type == PORT_SCI) ? 0xbc : 0x00fc) +# define SCxSR_ERROR_CLEAR(port) (((port)->type == PORT_SCI) ? 0xc4 : 0x0073) +# define SCxSR_TDxE_CLEAR(port) (((port)->type == PORT_SCI) ? 0x78 : 0x00df) +# define SCxSR_BREAK_CLEAR(port) (((port)->type == PORT_SCI) ? 0xc4 : 0x00e3) +#endif + +/* SCFCR */ +#define SCFCR_RFRST 0x0002 +#define SCFCR_TFRST 0x0004 +#define SCFCR_MCE 0x0008 + +#define SCI_MAJOR 204 +#define SCI_MINOR_START 8 + +/* Generic serial flags */ +#define SCI_RX_THROTTLE 0x0000001 + +#define SCI_MAGIC 0xbabeface + +/* + * Events are used to schedule things to happen at timer-interrupt + * time, instead of at rs interrupt time. + */ +#define SCI_EVENT_WRITE_WAKEUP 0 + +struct sci_port { + struct gs_port gs; + int type; + unsigned int base; + unsigned char irqs[4]; /* ERI, RXI, TXI, BRI */ + void (*init_pins)(struct sci_port* port, unsigned int cflag); + unsigned int old_cflag; + struct async_icount icount; + struct work_struct tqueue; + unsigned long event; + int break_flag; +}; + +#define SCI_IN(size, offset) \ + unsigned int addr = port->base + (offset); \ + if ((size) == 8) { \ + return ctrl_inb(addr); \ + } else { \ + return ctrl_inw(addr); \ + } +#define SCI_OUT(size, offset, value) \ + unsigned int addr = port->base + (offset); \ + if ((size) == 8) { \ + ctrl_outb(value, addr); \ + } else { \ + ctrl_outw(value, addr); \ + } + +#define CPU_SCIx_FNS(name, sci_offset, sci_size, scif_offset, scif_size)\ + static inline unsigned int sci_##name##_in(struct sci_port* port) \ + { \ + if (port->type == PORT_SCI) { \ + SCI_IN(sci_size, sci_offset) \ + } else { \ + SCI_IN(scif_size, scif_offset); \ + } \ + } \ + static inline void sci_##name##_out(struct sci_port* port, unsigned int value) \ + { \ + if (port->type == PORT_SCI) { \ + SCI_OUT(sci_size, sci_offset, value) \ + } else { \ + SCI_OUT(scif_size, scif_offset, value); \ + } \ + } + +#define CPU_SCIF_FNS(name, scif_offset, scif_size) \ + static inline unsigned int sci_##name##_in(struct sci_port* port) \ + { \ + SCI_IN(scif_size, scif_offset); \ + } \ + static inline void sci_##name##_out(struct sci_port* port, unsigned int value) \ + { \ + SCI_OUT(scif_size, scif_offset, value); \ + } + +#define CPU_SCI_FNS(name, sci_offset, sci_size) \ + static inline unsigned int sci_##name##_in(struct sci_port* port) \ + { \ + SCI_IN(sci_size, sci_offset); \ + } \ + static inline void sci_##name##_out(struct sci_port* port, unsigned int value) \ + { \ + SCI_OUT(sci_size, sci_offset, value); \ + } + +#ifdef CONFIG_CPU_SH3 +#define SCIx_FNS(name, sh3_sci_offset, sh3_sci_size, sh4_sci_offset, sh4_sci_size, \ + sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size, \ + h8_sci_offset, h8_sci_size) \ + CPU_SCIx_FNS(name, sh3_sci_offset, sh3_sci_size, sh3_scif_offset, sh3_scif_size) +#define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size) \ + CPU_SCIF_FNS(name, sh3_scif_offset, sh3_scif_size) +#elif defined(__H8300H__) || defined(__H8300S__) +#define SCIx_FNS(name, sh3_sci_offset, sh3_sci_size, sh4_sci_offset, sh4_sci_size, \ + sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size, \ + h8_sci_offset, h8_sci_size) \ + CPU_SCI_FNS(name, h8_sci_offset, h8_sci_size) +#define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size) +#else +#define SCIx_FNS(name, sh3_sci_offset, sh3_sci_size, sh4_sci_offset, sh4_sci_size, \ + sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size, \ + h8_sci_offset, h8_sci_size) \ + CPU_SCIx_FNS(name, sh4_sci_offset, sh4_sci_size, sh4_scif_offset, sh4_scif_size) +#define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size) \ + CPU_SCIF_FNS(name, sh4_scif_offset, sh4_scif_size) +#endif + +/* reg SCI/SH3 SCI/SH4 SCIF/SH3 SCIF/SH4 SCI/H8*/ +/* name off sz off sz off sz off sz off sz*/ +SCIx_FNS(SCSMR, 0x00, 8, 0x00, 8, 0x00, 8, 0x00, 16, 0x00, 8) +SCIx_FNS(SCBRR, 0x02, 8, 0x04, 8, 0x02, 8, 0x04, 8, 0x01, 8) +SCIx_FNS(SCSCR, 0x04, 8, 0x08, 8, 0x04, 8, 0x08, 16, 0x02, 8) +SCIx_FNS(SCxTDR, 0x06, 8, 0x0c, 8, 0x06, 8, 0x0C, 8, 0x03, 8) +SCIx_FNS(SCxSR, 0x08, 8, 0x10, 8, 0x08, 16, 0x10, 16, 0x04, 8) +SCIx_FNS(SCxRDR, 0x0a, 8, 0x14, 8, 0x0A, 8, 0x14, 8, 0x05, 8) +SCIF_FNS(SCFCR, 0x0c, 8, 0x18, 16) +SCIF_FNS(SCFDR, 0x0e, 16, 0x1C, 16) +SCIF_FNS(SCLSR, 0, 0, 0x24, 16) + +#define sci_in(port, reg) sci_##reg##_in(port) +#define sci_out(port, reg, value) sci_##reg##_out(port, value) + +/* H8/300 series SCI pins assignment */ +#if defined(__H8300H__) || defined(__H8300S__) +static const struct __attribute__((packed)) +{ + int port; /* GPIO port no */ + unsigned short rx,tx; /* GPIO bit no */ +} h8300_sci_pins[] = +{ +#if defined(CONFIG_H83007) || defined(CONFIG_H83068) + { /* SCI0 */ + .port = H8300_GPIO_P9, + .rx = H8300_GPIO_B2, + .tx = H8300_GPIO_B0, + }, + { /* SCI1 */ + .port = H8300_GPIO_P9, + .rx = H8300_GPIO_B3, + .tx = H8300_GPIO_B1, + }, + { /* SCI2 */ + .port = H8300_GPIO_PB, + .rx = H8300_GPIO_B7, + .tx = H8300_GPIO_B6, + } +#elif defined(CONFIG_H8S2678) + { /* SCI0 */ + .port = H8300_GPIO_P3, + .rx = H8300_GPIO_B2, + .tx = H8300_GPIO_B0, + }, + { /* SCI1 */ + .port = H8300_GPIO_P3, + .rx = H8300_GPIO_B3, + .tx = H8300_GPIO_B1, + }, + { /* SCI2 */ + .port = H8300_GPIO_P5, + .rx = H8300_GPIO_B1, + .tx = H8300_GPIO_B0, + } +#endif +}; +#endif + +#if defined(CONFIG_CPU_SUBTYPE_SH7708) +static inline int sci_rxd_in(struct sci_port *port) +{ + if (port->base == 0xfffffe80) + return ctrl_inb(SCSPTR)&0x01 ? 1 : 0; /* SCI */ + return 1; +} +#elif defined(CONFIG_CPU_SUBTYPE_SH7707) || defined(CONFIG_CPU_SUBTYPE_SH7709) +static inline int sci_rxd_in(struct sci_port *port) +{ + if (port->base == 0xfffffe80) + return ctrl_inb(SCPDR)&0x01 ? 1 : 0; /* SCI */ + if (port->base == 0xa4000150) + return ctrl_inb(SCPDR)&0x10 ? 1 : 0; /* SCIF */ + if (port->base == 0xa4000140) + return ctrl_inb(SCPDR)&0x04 ? 1 : 0; /* IRDA */ + return 1; +} +#elif defined(CONFIG_CPU_SUBTYPE_SH7750) || defined(CONFIG_CPU_SUBTYPE_SH7751) +static inline int sci_rxd_in(struct sci_port *port) +{ +#ifndef SCIF_ONLY + if (port->base == 0xffe00000) + return ctrl_inb(SCSPTR1)&0x01 ? 1 : 0; /* SCI */ +#endif +#ifndef SCI_ONLY + if (port->base == 0xffe80000) + return ctrl_inw(SCSPTR2)&0x0001 ? 1 : 0; /* SCIF */ +#endif + return 1; +} +#elif defined(CONFIG_CPU_SUBTYPE_SH7760) +static inline int sci_rxd_in(struct sci_port *port) +{ + if (port->base == 0xfe600000) + return ctrl_inw(SCSPTR0) & 0x0001 ? 1 : 0; /* SCIF */ + if (port->base == 0xfe610000) + return ctrl_inw(SCSPTR1) & 0x0001 ? 1 : 0; /* SCIF */ + if (port->base == 0xfe620000) + return ctrl_inw(SCSPTR2) & 0x0001 ? 1 : 0; /* SCIF */ +} +#elif defined(CONFIG_CPU_SUBTYPE_ST40STB1) +static inline int sci_rxd_in(struct sci_port *port) +{ + if (port->base == 0xffe00000) + return ctrl_inw(SCSPTR1)&0x0001 ? 1 : 0; /* SCIF */ + else + return ctrl_inw(SCSPTR2)&0x0001 ? 1 : 0; /* SCIF */ + +} +#elif defined(__H8300H__) || defined(__H8300S__) +static inline int sci_rxd_in(struct sci_port *port) +{ + int ch = (port->base - SMR0) >> 3; + return (H8300_SCI_DR(ch) & h8300_sci_pins[ch].rx) ? 1 : 0; +} +#endif + +/* + * Values for the BitRate Register (SCBRR) + * + * The values are actually divisors for a frequency which can + * be internal to the SH3 (14.7456MHz) or derived from an external + * clock source. This driver assumes the internal clock is used; + * to support using an external clock source, config options or + * possibly command-line options would need to be added. + * + * Also, to support speeds below 2400 (why?) the lower 2 bits of + * the SCSMR register would also need to be set to non-zero values. + * + * -- Greg Banks 27Feb2000 + * + * Answer: The SCBRR register is only eight bits, and the value in + * it gets larger with lower baud rates. At around 2400 (depending on + * the peripherial module clock) you run out of bits. However the + * lower two bits of SCSMR allow the module clock to be divided down, + * scaling the value which is needed in SCBRR. + * + * -- Stuart Menefy - 23 May 2000 + * + * I meant, why would anyone bother with bitrates below 2400. + * + * -- Greg Banks - 7Jul2000 + * + * You "speedist"! How will I use my 110bps ASR-33 teletype with paper + * tape reader as a console! + * + * -- Mitch Davis - 15 Jul 2000 + */ + +#define PCLK (current_cpu_data.module_clock) + +#if !defined(__H8300H__) && !defined(__H8300S__) +#define SCBRR_VALUE(bps) ((PCLK+16*bps)/(32*bps)-1) +#else +#define SCBRR_VALUE(bps) (((CONFIG_CPU_CLOCK*1000/32)/bps)-1) +#endif +#define BPS_2400 SCBRR_VALUE(2400) +#define BPS_4800 SCBRR_VALUE(4800) +#define BPS_9600 SCBRR_VALUE(9600) +#define BPS_19200 SCBRR_VALUE(19200) +#define BPS_38400 SCBRR_VALUE(38400) +#define BPS_57600 SCBRR_VALUE(57600) +#define BPS_115200 SCBRR_VALUE(115200) +#define BPS_230400 SCBRR_VALUE(230400) + diff --git a/drivers/i2c/busses/i2c-ixp42x.c b/drivers/i2c/busses/i2c-ixp42x.c new file mode 100644 index 000000000..59fcb70fd --- /dev/null +++ b/drivers/i2c/busses/i2c-ixp42x.c @@ -0,0 +1,176 @@ +/* + * drivers/i2c/i2c-adap-ixp42x.c + * + * Intel's IXP42x XScale NPU chipsets (IXP420, 421, 422, 425) do not have + * an on board I2C controller but provide 16 GPIO pins that are often + * used to create an I2C bus. This driver provides an i2c_adapter + * interface that plugs in under algo_bit and drives the GPIO pins + * as instructed by the alogorithm driver. + * + * Author: Deepak Saxena + * + * Copyright (c) 2003-2004 MontaVista Software Inc. + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * NOTE: Since different platforms will use different GPIO pins for + * I2C, this driver uses an IXP42x-specific platform_data + * pointer to pass the GPIO numbers to the driver. This + * allows us to support all the different IXP42x platforms + * w/o having to put #ifdefs in this driver. + * + * See arch/arm/mach-ixp42x/ixdp425.c for an example of building a + * device list and filling in the ixp42x_i2c_pins data structure + * that is passed as the platform_data to this driver. + */ + +#include +#include +#include +#include +#include +#include + +#include /* Pick up IXP42x-specific bits */ + +static inline int ixp42x_scl_pin(void *data) +{ + return ((struct ixp42x_i2c_pins*)data)->scl_pin; +} + +static inline int ixp42x_sda_pin(void *data) +{ + return ((struct ixp42x_i2c_pins*)data)->sda_pin; +} + +static void ixp42x_bit_setscl(void *data, int val) +{ + gpio_line_set(ixp42x_scl_pin(data), 0); + gpio_line_config(ixp42x_scl_pin(data), + val ? IXP425_GPIO_IN : IXP425_GPIO_OUT ); +} + +static void ixp42x_bit_setsda(void *data, int val) +{ + gpio_line_set(ixp42x_sda_pin(data), 0); + gpio_line_config(ixp42x_sda_pin(data), + val ? IXP425_GPIO_IN : IXP425_GPIO_OUT ); +} + +static int ixp42x_bit_getscl(void *data) +{ + int scl; + + gpio_line_config(ixp42x_scl_pin(data), IXP425_GPIO_IN ); + gpio_line_get(ixp42x_scl_pin(data), &scl); + + return scl; +} + +static int ixp42x_bit_getsda(void *data) +{ + int sda; + + gpio_line_config(ixp42x_sda_pin(data), IXP425_GPIO_IN ); + gpio_line_get(ixp42x_sda_pin(data), &sda); + + return sda; +} + +struct ixp42x_i2c_data { + struct ixp42x_i2c_pins *gpio_pins; + struct i2c_adapter adapter; + struct i2c_algo_bit_data algo_data; +}; + +static int ixp42x_i2c_remove(struct device *dev) +{ + struct platform_device *plat_dev = to_platform_device(dev); + struct ixp42x_i2c_data *drv_data = dev_get_drvdata(&plat_dev->dev); + + dev_set_drvdata(&plat_dev->dev, NULL); + + i2c_bit_del_bus(&drv_data->adapter); + + kfree(drv_data); + + return 0; +} + +static int ixp42x_i2c_probe(struct device *dev) +{ + int err; + struct platform_device *plat_dev = to_platform_device(dev); + struct ixp42x_i2c_pins *gpio = plat_dev->dev.platform_data; + struct ixp42x_i2c_data *drv_data = + kmalloc(sizeof(struct ixp42x_i2c_data), GFP_KERNEL); + + if(!drv_data) + return -ENOMEM; + + memzero(drv_data, sizeof(struct ixp42x_i2c_data)); + drv_data->gpio_pins = gpio; + + /* + * We could make a lot of these structures static, but + * certain platforms may have multiple GPIO-based I2C + * buses for various device domains, so we need per-device + * algo_data->data. + */ + drv_data->algo_data.data = gpio; + drv_data->algo_data.setsda = ixp42x_bit_setsda; + drv_data->algo_data.setscl = ixp42x_bit_setscl; + drv_data->algo_data.getsda = ixp42x_bit_getsda; + drv_data->algo_data.getscl = ixp42x_bit_getscl; + drv_data->algo_data.udelay = 10; + drv_data->algo_data.mdelay = 10; + drv_data->algo_data.timeout = 100; + + drv_data->adapter.id = I2C_HW_B_IXP425, + drv_data->adapter.algo_data = &drv_data->algo_data, + + drv_data->adapter.dev.parent = &plat_dev->dev; + + gpio_line_config(gpio->scl_pin, IXP425_GPIO_IN); + gpio_line_config(gpio->sda_pin, IXP425_GPIO_IN); + gpio_line_set(gpio->scl_pin, 0); + gpio_line_set(gpio->sda_pin, 0); + + if ((err = i2c_bit_add_bus(&drv_data->adapter) != 0)) { + printk(KERN_ERR "ERROR: Could not install %s\n", dev->bus_id); + + kfree(drv_data); + return err; + } + + dev_set_drvdata(&plat_dev->dev, drv_data); + + return 0; +} + +static struct device_driver ixp42x_i2c_driver = { + .name = "IXP42X-I2C", + .bus = &platform_bus_type, + .probe = ixp42x_i2c_probe, + .remove = ixp42x_i2c_remove, +}; + +static int __init ixp42x_i2c_init(void) +{ + return driver_register(&ixp42x_i2c_driver); +} + +static void __exit ixp42x_i2c_exit(void) +{ + driver_unregister(&ixp42x_i2c_driver); +} + +module_init(ixp42x_i2c_init); +module_exit(ixp42x_i2c_exit); + +MODULE_DESCRIPTION("GPIO-based I2C driver for IXP42x systems"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Deepak Saxena "); + diff --git a/drivers/ide/pci/cmd640.h b/drivers/ide/pci/cmd640.h new file mode 100644 index 000000000..28b6e0452 --- /dev/null +++ b/drivers/ide/pci/cmd640.h @@ -0,0 +1,32 @@ +#ifndef CMD640_H +#define CMD640_H + +#include +#include +#include + +#define IDE_IGNORE ((void *)-1) + +static ide_pci_device_t cmd640_chipsets[] __initdata = { + { + .vendor = PCI_VENDOR_ID_CMD, + .device = PCI_DEVICE_ID_CMD_640, + .name = "CMD640", + .init_setup = NULL, + .init_chipset = NULL, + .init_iops = NULL, + .init_hwif = IDE_IGNORE, + .init_dma = NULL, + .channels = 2, + .autodma = NODMA, + .enablebits = {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, + .bootable = ON_BOARD, + .extra = 0 + },{ + .vendor = 0, + .device = 0, + .bootable = EOL, + } +} + +#endif /* CMD640_H */ diff --git a/drivers/ide/ppc/swarm.c b/drivers/ide/ppc/swarm.c new file mode 100644 index 000000000..d54a55525 --- /dev/null +++ b/drivers/ide/ppc/swarm.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2001 Broadcom Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* Derived loosely from ide-pmac.c, so: + * + * Copyright (C) 1998 Paul Mackerras. + * Copyright (C) 1995-1998 Mark Lord + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define __IDE_SWARM_C + +#include + +void __init swarm_ide_probe(void) +{ + int i; + ide_hwif_t *hwif; + /* + * Find the first untaken slot in hwifs + */ + for (i = 0; i < MAX_HWIFS; i++) { + if (!ide_hwifs[i].io_ports[IDE_DATA_OFFSET]) { + break; + } + } + if (i == MAX_HWIFS) { + printk("No space for SWARM onboard IDE driver in ide_hwifs[]. Not enabled.\n"); + return; + } + + /* Set up our stuff */ + hwif = &ide_hwifs[i]; + hwif->hw.io_ports[IDE_DATA_OFFSET] = SWARM_IDE_REG(0x1f0); + hwif->hw.io_ports[IDE_ERROR_OFFSET] = SWARM_IDE_REG(0x1f1); + hwif->hw.io_ports[IDE_NSECTOR_OFFSET] = SWARM_IDE_REG(0x1f2); + hwif->hw.io_ports[IDE_SECTOR_OFFSET] = SWARM_IDE_REG(0x1f3); + hwif->hw.io_ports[IDE_LCYL_OFFSET] = SWARM_IDE_REG(0x1f4); + hwif->hw.io_ports[IDE_HCYL_OFFSET] = SWARM_IDE_REG(0x1f5); + hwif->hw.io_ports[IDE_SELECT_OFFSET] = SWARM_IDE_REG(0x1f6); + hwif->hw.io_ports[IDE_STATUS_OFFSET] = SWARM_IDE_REG(0x1f7); + hwif->hw.io_ports[IDE_CONTROL_OFFSET] = SWARM_IDE_REG(0x3f6); + hwif->hw.io_ports[IDE_IRQ_OFFSET] = SWARM_IDE_REG(0x3f7); +// hwif->hw->ack_intr = swarm_ide_ack_intr; + hwif->hw.irq = SWARM_IDE_INT; +#if 0 + hwif->iops = swarm_iops; +#else + hwif->OUTB = hwif->OUTBP = swarm_outb; + hwif->OUTW = hwif->OUTWP = swarm_outw; + hwif->OUTL = hwif->OUTLP = swarm_outl; + hwif->OUTSW = hwif->OUTSWP = swarm_outsw; + hwif->OUTSL = hwif->OUTSLP = swarm_outsl; + hwif->INB = hwif->INBP = swarm_inb; + hwif->INW = hwif->INWP = swarm_inw; + hwif->INL = hwif->INLP = swarm_inl; + hwif->INSW = hwif->INSWP = swarm_insw; + hwif->INSL = hwif->INSLP = swarm_insl; +#endif +#if 0 + hwif->pioops = swarm_pio_ops; +#else + hwif->ata_input_data = swarm_ata_input_data; + hwif->ata_output_data = swarm_ata_output_data; + hwif->atapi_input_bytes = swarm_atapi_input_bytes; + hwif->atapi_output_bytes = swarm_atapi_output_bytes; +#endif + memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports)); + hwif->irq = hwif->hw.irq; + printk("SWARM onboard IDE configured as device %i\n", i); + +#ifndef HWIF_PROBE_CLASSIC_METHOD + probe_hwif_init(hwif->index); +#endif /* HWIF_PROBE_CLASSIC_METHOD */ + +} + diff --git a/drivers/net/auto_irq.c b/drivers/net/auto_irq.c new file mode 100644 index 000000000..96ddc77b7 --- /dev/null +++ b/drivers/net/auto_irq.c @@ -0,0 +1,68 @@ +/* auto_irq.c: Auto-configure IRQ lines for linux. */ +/* + Written 1994 by Donald Becker. + + The author may be reached as becker@scyld.com + + This code is a general-purpose IRQ line detector for devices with + jumpered IRQ lines. If you can make the device raise an IRQ (and + that IRQ line isn't already being used), these routines will tell + you what IRQ line it's using -- perfect for those oh-so-cool boot-time + device probes! + + To use this, first call autoirq_setup(timeout). TIMEOUT is how many + 'jiffies' (1/100 sec.) to detect other devices that have active IRQ lines, + and can usually be zero at boot. 'autoirq_setup()' returns the bit + vector of nominally-available IRQ lines (lines may be physically in-use, + but not yet registered to a device). + Next, set up your device to trigger an interrupt. + Finally call autoirq_report(TIMEOUT) to find out which IRQ line was + most recently active. The TIMEOUT should usually be zero, but may + be set to the number of jiffies to wait for a slow device to raise an IRQ. + + The idea of using the setup timeout to filter out bogus IRQs came from + the serial driver. +*/ + + +#ifdef version +static const char *version= +"auto_irq.c:v1.11 Donald Becker (becker@scyld.com)"; +#endif + +#include +#include +#include +#include +#include +#include +#include + +static unsigned long irqs; + +void autoirq_setup(int waittime) +{ + irqs = probe_irq_on(); +} + +#define BUSY_LOOP_UNTIL(j) while ((long)(jiffies-(j)) < 0) ; +int autoirq_report(int waittime) +{ + unsigned long delay = jiffies + waittime; + BUSY_LOOP_UNTIL(delay) + return probe_irq_off(irqs); +} + +EXPORT_SYMBOL(autoirq_setup); +EXPORT_SYMBOL(autoirq_report); + + +/* + * Local variables: + * compile-command: "gcc -DKERNEL -Wall -O6 -fomit-frame-pointer -I/usr/src/linux/net/tcp -c auto_irq.c" + * version-control: t + * kept-new-versions: 5 + * c-indent-level: 4 + * tab-width: 4 + * End: + */ diff --git a/drivers/net/rcif.h b/drivers/net/rcif.h new file mode 100644 index 000000000..85ff8615c --- /dev/null +++ b/drivers/net/rcif.h @@ -0,0 +1,292 @@ +/* +** ************************************************************************* +** +** +** R C I F . H +** +** +** RedCreek InterFace include file. +** +** --------------------------------------------------------------------- +** --- Copyright (c) 1998-1999, RedCreek Communications Inc. --- +** --- All rights reserved. --- +** --------------------------------------------------------------------- +** +** File Description: +** +** Header file private ioctl commands. +** +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. + +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. + +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +** ************************************************************************* +*/ + +#ifndef RCIF_H +#define RCIF_H + +/* The following protocol revision # should be incremented every time + a new protocol or new structures are used in this file. */ +int USER_PROTOCOL_REV = 2; /* used to track different protocol revisions */ + +/* define a single TCB & buffer */ +typedef struct { /* a single buffer */ + U32 context; /* context */ + U32 scount; /* segment count */ + U32 size; /* segment size */ + U32 addr; /* segment physical address */ +} __attribute__ ((packed)) + singleB, *psingleB; +typedef struct { /* a single TCB */ + /* + ** +-----------------------+ + ** | 1 | one buffer in the TCB + ** +-----------------------+ + ** | | user's buffer reference + ** +-----------------------+ + ** | 1 | one segment buffer + ** +-----------------------+ _ + ** | | size \ + ** +-----------------------+ \ segment descriptor + ** | | physical address of buffer / + ** +-----------------------+ _/ + */ + U32 bcount; /* buffer count */ + singleB b; /* buffer */ + +} __attribute__ ((packed)) + singleTCB, *psingleTCB; + +/* + When adding new entries, please add all 5 related changes, since + it helps keep everything consistent: + 1) User structure entry + 2) User data entry + 3) Structure short-cut entry + 4) Data short-cut entry + 5) Command identifier entry + + For Example ("GETSPEED"): + 1) struct RCgetspeed_tag { U32 LinkSpeedCode; } RCgetspeed; + 2) struct RCgetspeed_tag *getspeed; + 3) #define RCUS_GETSPEED data.RCgetspeed; + 4) #define RCUD_GETSPEED _RC_user_data.getspeed + 5) #define RCUC_GETSPEED 0x02 + + Notes for the "GETSPEED" entry, above: + 1) RCgetspeed - RC{name} + RCgetspeed_tag - RC{name}_tag + LinkSpeedCode - create any structure format desired (not too large, + since memory will be unioned with all other entries) + 2) RCgetspeed_tag - RC{name}_tag chosen in #1 + getspeed - arbitrary name (ptr to structure in #1) + 3) RCUS_GETSPEED - RCUS_{NAME} ("NAME" & "name" do not have to the same) + data.RCgetspeed - data.RC{name} ("RC{name}" from #1) + 4) RCUD_GETSPEED - _RC_user_data.getspeed ("getspeed" from #2) + 5) RCUC_GETSPEED - unique hex identifier entry. +*/ + +typedef struct RC_user_tag RCuser_struct; + +/* 1) User structure entry */ +struct RC_user_tag { + int cmd; + union { + /* GETINFO structure */ + struct RCgetinfo_tag { + unsigned long int mem_start; + unsigned long int mem_end; + unsigned long int base_addr; + unsigned char irq; + unsigned char dma; + unsigned char port; + } RCgetinfo; /* <---- RCgetinfo */ + + /* GETSPEED structure */ + struct RCgetspeed_tag { + U32 LinkSpeedCode; + } RCgetspeed; /* <---- RCgetspeed */ + + /* SETSPEED structure */ + struct RCsetspeed_tag { + U16 LinkSpeedCode; + } RCsetspeed; /* <---- RCsetspeed */ + + /* GETPROM structure */ + struct RCgetprom_tag { + U32 PromMode; + } RCgetprom; /* <---- RCgetprom */ + + /* SETPROM structure */ + struct RCsetprom_tag { + U16 PromMode; + } RCsetprom; /* <---- RCsetprom */ + + /* GETBROADCAST structure */ + struct RCgetbroadcast_tag { + U32 BroadcastMode; + } RCgetbroadcast; /* <---- RCgetbroadcast */ + + /* SETBROADCAST structure */ + struct RCsetbroadcast_tag { + U16 BroadcastMode; + } RCsetbroadcast; /* <---- RCsetbroadcast */ + + /* GETFIRMWAREVER structure */ +#define FirmStringLen 80 + struct RCgetfwver_tag { + U8 FirmString[FirmStringLen]; + } RCgetfwver; /* <---- RCgetfwver */ + + /* GETIPANDMASK structure */ + struct RCgetipnmask_tag { + U32 IpAddr; + U32 NetMask; + } RCgetipandmask; /* <---- RCgetipandmask */ + + /* SETIPANDMASK structure */ + struct RCsetipnmask_tag { + U32 IpAddr; + U32 NetMask; + } RCsetipandmask; /* <---- RCsetipandmask */ + + /* GETMAC structure */ +#define MAC_SIZE 10 + struct RCgetmac_tag { + U8 mac[MAC_SIZE]; + } RCgetmac; /* <---- RCgetmac */ + + /* SETMAC structure */ + struct RCsetmac_tag { + U8 mac[MAC_SIZE]; + } RCsetmac; /* <---- RCsetmac */ + + /* GETLINKSTATUS structure */ + struct RCgetlnkstatus_tag { + U32 ReturnStatus; + } RCgetlnkstatus; /* <---- RCgetlnkstatus */ + + /* GETLINKSTATISTICS structure */ + struct RCgetlinkstats_tag { + RCLINKSTATS StatsReturn; + } RCgetlinkstats; /* <---- RCgetlinkstats */ + + /* DEFAULT structure (when no command was recognized) */ + struct RCdefault_tag { + int rc; + } RCdefault; /* <---- RCdefault */ + + } data; + +}; /* struct RC_user_tag { ... } */ + +/* 2) User data entry */ +/* RCUD = RedCreek User Data */ +union RC_user_data_tag { /* structure tags used are taken from RC_user_tag structure above */ + struct RCgetinfo_tag *getinfo; + struct RCgetspeed_tag *getspeed; + struct RCgetprom_tag *getprom; + struct RCgetbroadcast_tag *getbroadcast; + struct RCgetfwver_tag *getfwver; + struct RCgetipnmask_tag *getipandmask; + struct RCgetmac_tag *getmac; + struct RCgetlnkstatus_tag *getlinkstatus; + struct RCgetlinkstats_tag *getlinkstatistics; + struct RCdefault_tag *rcdefault; + struct RCsetspeed_tag *setspeed; + struct RCsetprom_tag *setprom; + struct RCsetbroadcast_tag *setbroadcast; + struct RCsetipnmask_tag *setipandmask; + struct RCsetmac_tag *setmac; +} _RC_user_data; /* declare as a global, so the defines below will work */ + +/* 3) Structure short-cut entry */ +/* define structure short-cuts *//* structure names are taken from RC_user_tag structure above */ +#define RCUS_GETINFO data.RCgetinfo; +#define RCUS_GETSPEED data.RCgetspeed; +#define RCUS_GETPROM data.RCgetprom; +#define RCUS_GETBROADCAST data.RCgetbroadcast; +#define RCUS_GETFWVER data.RCgetfwver; +#define RCUS_GETIPANDMASK data.RCgetipandmask; +#define RCUS_GETMAC data.RCgetmac; +#define RCUS_GETLINKSTATUS data.RCgetlnkstatus; +#define RCUS_GETLINKSTATISTICS data.RCgetlinkstats; +#define RCUS_DEFAULT data.RCdefault; +#define RCUS_SETSPEED data.RCsetspeed; +#define RCUS_SETPROM data.RCsetprom; +#define RCUS_SETBROADCAST data.RCsetbroadcast; +#define RCUS_SETIPANDMASK data.RCsetipandmask; +#define RCUS_SETMAC data.RCsetmac; + +/* 4) Data short-cut entry */ +/* define data short-cuts *//* pointer names are from RC_user_data_tag union (just below RC_user_tag) */ +#define RCUD_GETINFO _RC_user_data.getinfo +#define RCUD_GETSPEED _RC_user_data.getspeed +#define RCUD_GETPROM _RC_user_data.getprom +#define RCUD_GETBROADCAST _RC_user_data.getbroadcast +#define RCUD_GETFWVER _RC_user_data.getfwver +#define RCUD_GETIPANDMASK _RC_user_data.getipandmask +#define RCUD_GETMAC _RC_user_data.getmac +#define RCUD_GETLINKSTATUS _RC_user_data.getlinkstatus +#define RCUD_GETLINKSTATISTICS _RC_user_data.getlinkstatistics +#define RCUD_DEFAULT _RC_user_data.rcdefault +#define RCUD_SETSPEED _RC_user_data.setspeed +#define RCUD_SETPROM _RC_user_data.setprom +#define RCUD_SETBROADCAST _RC_user_data.setbroadcast +#define RCUD_SETIPANDMASK _RC_user_data.setipandmask +#define RCUD_SETMAC _RC_user_data.setmac + +/* 5) Command identifier entry */ +/* define command identifiers */ +#define RCUC_GETINFO 0x01 +#define RCUC_GETSPEED 0x02 +#define RCUC_GETFWVER 0x03 +#define RCUC_GETIPANDMASK 0x04 +#define RCUC_GETMAC 0x05 +#define RCUC_GETLINKSTATUS 0x06 +#define RCUC_GETLINKSTATISTICS 0x07 +#define RCUC_GETPROM 0x14 +#define RCUC_GETBROADCAST 0x15 +#define RCUC_DEFAULT 0xff +#define RCUC_SETSPEED 0x08 +#define RCUC_SETIPANDMASK 0x09 +#define RCUC_SETMAC 0x0a +#define RCUC_SETPROM 0x16 +#define RCUC_SETBROADCAST 0x17 + +/* define ioctl commands to use, when talking to RC 45/PCI driver */ +#define RCU_PROTOCOL_REV SIOCDEVPRIVATE +#define RCU_COMMAND SIOCDEVPRIVATE+1 + +/* + Intended use for the above defines is shown below (GETINFO, as this example): + + RCuser_struct RCuser; // declare RCuser structure + struct ifreq ifr; // declare an interface request structure + + RCuser.cmd = RCUC_GETINFO; // set user command to GETINFO + ifr->ifr_data = (caddr_t) &RCuser; // set point to user structure + + sock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); // get a socket + ioctl(sock, RCU_COMMAND, &ifr); // do ioctl on socket + + RCUD_GETINFO = &RCuser.RCUS_GETINFO; // set data pointer for GETINFO + + // print results + printf("memory 0x%lx-0x%lx, base address 0x%x, irq 0x%x\n", + RCUD_GETINFO->mem_start, RCUD_GETINFO->mem_end, + RCUD_GETINFO->base_addr, RCUD_GETINFO->irq); +*/ + +#endif /* RCIF_H */ diff --git a/drivers/net/rclanmtl.c b/drivers/net/rclanmtl.c new file mode 100644 index 000000000..14bd88ab2 --- /dev/null +++ b/drivers/net/rclanmtl.c @@ -0,0 +1,2029 @@ +/* +** ************************************************************************* +** +** +** R C L A N M T L . C $Revision: 6 $ +** +** +** RedCreek I2O LAN Message Transport Layer program module. +** +** --------------------------------------------------------------------- +** --- Copyright (c) 1997-1999, RedCreek Communications Inc. --- +** --- All rights reserved. --- +** --------------------------------------------------------------------- +** +** File Description: +** +** Host side I2O (Intelligent I/O) LAN message transport layer. +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. + +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. + +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +** +** 1998-1999, LAN API was modified and enhanced by Alice Hennessy. +** +** Sometime in 1997, LAN API was written from scratch by Wendell Nichols. +** ************************************************************************* +*/ + +#define DEBUG 1 + +#define RC_LINUX_MODULE +#include "rclanmtl.h" + + /* RedCreek LAN device Target ID */ +#define RC_LAN_TARGET_ID 0x10 + /* RedCreek's OSM default LAN receive Initiator */ +#define DEFAULT_RECV_INIT_CONTEXT 0xA17 + +/* +** I2O message structures +*/ + +#define I2O_TID_SZ 12 +#define I2O_FUNCTION_SZ 8 + +/* Transaction Reply Lists (TRL) Control Word structure */ + +#define I2O_TRL_FLAGS_SINGLE_FIXED_LENGTH 0x00 +#define I2O_TRL_FLAGS_SINGLE_VARIABLE_LENGTH 0x40 +#define I2O_TRL_FLAGS_MULTIPLE_FIXED_LENGTH 0x80 + +/* LAN Class specific functions */ + +#define I2O_LAN_PACKET_SEND 0x3B +#define I2O_LAN_SDU_SEND 0x3D +#define I2O_LAN_RECEIVE_POST 0x3E +#define I2O_LAN_RESET 0x35 +#define I2O_LAN_SHUTDOWN 0x37 + +/* Private Class specfic function */ +#define I2O_PRIVATE 0xFF + +/* I2O Executive Function Codes. */ + +#define I2O_EXEC_ADAPTER_ASSIGN 0xB3 +#define I2O_EXEC_ADAPTER_READ 0xB2 +#define I2O_EXEC_ADAPTER_RELEASE 0xB5 +#define I2O_EXEC_BIOS_INFO_SET 0xA5 +#define I2O_EXEC_BOOT_DEVICE_SET 0xA7 +#define I2O_EXEC_CONFIG_VALIDATE 0xBB +#define I2O_EXEC_CONN_SETUP 0xCA +#define I2O_EXEC_DEVICE_ASSIGN 0xB7 +#define I2O_EXEC_DEVICE_RELEASE 0xB9 +#define I2O_EXEC_HRT_GET 0xA8 +#define I2O_EXEC_IOP_CLEAR 0xBE +#define I2O_EXEC_IOP_CONNECT 0xC9 +#define I2O_EXEC_IOP_RESET 0xBD +#define I2O_EXEC_LCT_NOTIFY 0xA2 +#define I2O_EXEC_OUTBOUND_INIT 0xA1 +#define I2O_EXEC_PATH_ENABLE 0xD3 +#define I2O_EXEC_PATH_QUIESCE 0xC5 +#define I2O_EXEC_PATH_RESET 0xD7 +#define I2O_EXEC_STATIC_MF_CREATE 0xDD +#define I2O_EXEC_STATIC_MF_RELEASE 0xDF +#define I2O_EXEC_STATUS_GET 0xA0 +#define I2O_EXEC_SW_DOWNLOAD 0xA9 +#define I2O_EXEC_SW_UPLOAD 0xAB +#define I2O_EXEC_SW_REMOVE 0xAD +#define I2O_EXEC_SYS_ENABLE 0xD1 +#define I2O_EXEC_SYS_MODIFY 0xC1 +#define I2O_EXEC_SYS_QUIESCE 0xC3 +#define I2O_EXEC_SYS_TAB_SET 0xA3 + + /* Init Outbound Q status */ +#define I2O_EXEC_OUTBOUND_INIT_IN_PROGRESS 0x01 +#define I2O_EXEC_OUTBOUND_INIT_REJECTED 0x02 +#define I2O_EXEC_OUTBOUND_INIT_FAILED 0x03 +#define I2O_EXEC_OUTBOUND_INIT_COMPLETE 0x04 + +#define I2O_UTIL_NOP 0x00 + +/* I2O Get Status State values */ + +#define I2O_IOP_STATE_INITIALIZING 0x01 +#define I2O_IOP_STATE_RESET 0x02 +#define I2O_IOP_STATE_HOLD 0x04 +#define I2O_IOP_STATE_READY 0x05 +#define I2O_IOP_STATE_OPERATIONAL 0x08 +#define I2O_IOP_STATE_FAILED 0x10 +#define I2O_IOP_STATE_FAULTED 0x11 + +/* Defines for Request Status Codes: Table 3-1 Reply Status Codes. */ + +#define I2O_REPLY_STATUS_SUCCESS 0x00 +#define I2O_REPLY_STATUS_ABORT_DIRTY 0x01 +#define I2O_REPLY_STATUS_ABORT_NO_DATA_TRANSFER 0x02 +#define I2O_REPLY_STATUS_ABORT_PARTIAL_TRANSFER 0x03 +#define I2O_REPLY_STATUS_ERROR_DIRTY 0x04 +#define I2O_REPLY_STATUS_ERROR_NO_DATA_TRANSFER 0x05 +#define I2O_REPLY_STATUS_ERROR_PARTIAL_TRANSFER 0x06 +#define I2O_REPLY_STATUS_PROCESS_ABORT_DIRTY 0x07 +#define I2O_REPLY_STATUS_PROCESS_ABORT_NO_DATA_TRANSFER 0x08 +#define I2O_REPLY_STATUS_PROCESS_ABORT_PARTIAL_TRANSFER 0x09 +#define I2O_REPLY_STATUS_TRANSACTION_ERROR 0x0A +#define I2O_REPLY_STATUS_PROGRESS_REPORT 0x80 + +/* DetailedStatusCode defines for ALL messages: Table 3-2 Detailed Status Codes.*/ + +#define I2O_DETAIL_STATUS_SUCCESS 0x0000 +#define I2O_DETAIL_STATUS_BAD_KEY 0x0001 +#define I2O_DETAIL_STATUS_CHAIN_BUFFER_TOO_LARGE 0x0002 +#define I2O_DETAIL_STATUS_DEVICE_BUSY 0x0003 +#define I2O_DETAIL_STATUS_DEVICE_LOCKED 0x0004 +#define I2O_DETAIL_STATUS_DEVICE_NOT_AVAILABLE 0x0005 +#define I2O_DETAIL_STATUS_DEVICE_RESET 0x0006 +#define I2O_DETAIL_STATUS_INAPPROPRIATE_FUNCTION 0x0007 +#define I2O_DETAIL_STATUS_INSUFFICIENT_RESOURCE_HARD 0x0008 +#define I2O_DETAIL_STATUS_INSUFFICIENT_RESOURCE_SOFT 0x0009 +#define I2O_DETAIL_STATUS_INVALID_INITIATOR_ADDRESS 0x000A +#define I2O_DETAIL_STATUS_INVALID_MESSAGE_FLAGS 0x000B +#define I2O_DETAIL_STATUS_INVALID_OFFSET 0x000C +#define I2O_DETAIL_STATUS_INVALID_PARAMETER 0x000D +#define I2O_DETAIL_STATUS_INVALID_REQUEST 0x000E +#define I2O_DETAIL_STATUS_INVALID_TARGET_ADDRESS 0x000F +#define I2O_DETAIL_STATUS_MESSAGE_TOO_LARGE 0x0010 +#define I2O_DETAIL_STATUS_MESSAGE_TOO_SMALL 0x0011 +#define I2O_DETAIL_STATUS_MISSING_PARAMETER 0x0012 +#define I2O_DETAIL_STATUS_NO_SUCH_PAGE 0x0013 +#define I2O_DETAIL_STATUS_REPLY_BUFFER_FULL 0x0014 +#define I2O_DETAIL_STATUS_TCL_ERROR 0x0015 +#define I2O_DETAIL_STATUS_TIMEOUT 0x0016 +#define I2O_DETAIL_STATUS_UNKNOWN_ERROR 0x0017 +#define I2O_DETAIL_STATUS_UNKNOWN_FUNCTION 0x0018 +#define I2O_DETAIL_STATUS_UNSUPPORTED_FUNCTION 0x0019 +#define I2O_DETAIL_STATUS_UNSUPPORTED_VERSION 0x001A + + /* I2O msg header defines for VersionOffset */ +#define I2OMSGVER_1_5 0x0001 +#define SGL_OFFSET_0 I2OMSGVER_1_5 +#define SGL_OFFSET_4 (0x0040 | I2OMSGVER_1_5) +#define TRL_OFFSET_5 (0x0050 | I2OMSGVER_1_5) +#define TRL_OFFSET_6 (0x0060 | I2OMSGVER_1_5) + + /* I2O msg header defines for MsgFlags */ +#define MSG_STATIC 0x0100 +#define MSG_64BIT_CNTXT 0x0200 +#define MSG_MULTI_TRANS 0x1000 +#define MSG_FAIL 0x2000 +#define MSG_LAST 0x4000 +#define MSG_REPLY 0x8000 + + /* normal LAN request message MsgFlags and VersionOffset (0x1041) */ +#define LAN_MSG_REQST (MSG_MULTI_TRANS | SGL_OFFSET_4) + + /* minimum size msg */ +#define THREE_WORD_MSG_SIZE 0x00030000 +#define FOUR_WORD_MSG_SIZE 0x00040000 +#define FIVE_WORD_MSG_SIZE 0x00050000 +#define SIX_WORD_MSG_SIZE 0x00060000 +#define SEVEN_WORD_MSG_SIZE 0x00070000 +#define EIGHT_WORD_MSG_SIZE 0x00080000 +#define NINE_WORD_MSG_SIZE 0x00090000 + +/* Special TID Assignments */ + +#define I2O_IOP_TID 0 +#define I2O_HOST_TID 0xB91 + + /* RedCreek I2O private message codes */ +#define RC_PRIVATE_GET_MAC_ADDR 0x0001/**/ /* OBSOLETE */ +#define RC_PRIVATE_SET_MAC_ADDR 0x0002 +#define RC_PRIVATE_GET_NIC_STATS 0x0003 +#define RC_PRIVATE_GET_LINK_STATUS 0x0004 +#define RC_PRIVATE_SET_LINK_SPEED 0x0005 +#define RC_PRIVATE_SET_IP_AND_MASK 0x0006 +/* #define RC_PRIVATE_GET_IP_AND_MASK 0x0007 *//* OBSOLETE */ +#define RC_PRIVATE_GET_LINK_SPEED 0x0008 +#define RC_PRIVATE_GET_FIRMWARE_REV 0x0009 +/* #define RC_PRIVATE_GET_MAC_ADDR 0x000A */ +#define RC_PRIVATE_GET_IP_AND_MASK 0x000B +#define RC_PRIVATE_DEBUG_MSG 0x000C +#define RC_PRIVATE_REPORT_DRIVER_CAPABILITY 0x000D +#define RC_PRIVATE_SET_PROMISCUOUS_MODE 0x000e +#define RC_PRIVATE_GET_PROMISCUOUS_MODE 0x000f +#define RC_PRIVATE_SET_BROADCAST_MODE 0x0010 +#define RC_PRIVATE_GET_BROADCAST_MODE 0x0011 + +#define RC_PRIVATE_REBOOT 0x00FF + +/* I2O message header */ +typedef struct _I2O_MESSAGE_FRAME { + U8 VersionOffset; + U8 MsgFlags; + U16 MessageSize; + BF TargetAddress:I2O_TID_SZ; + BF InitiatorAddress:I2O_TID_SZ; + BF Function:I2O_FUNCTION_SZ; + U32 InitiatorContext; + /* SGL[] */ +} I2O_MESSAGE_FRAME, *PI2O_MESSAGE_FRAME; + + /* assumed a 16K minus 256 byte space for outbound queue message frames */ +#define MSG_FRAME_SIZE 512 +#define NMBR_MSG_FRAMES 30 + + /* + ** in reserved space right after PAB in host memory is area for returning + ** values from card + */ + +/* +** typedef NICSTAT +** +** Data structure for NIC statistics retruned from PCI card. Data copied from +** here to user allocated RCLINKSTATS (see rclanmtl.h) structure. +*/ +typedef struct tag_NicStat { + unsigned long TX_good; + unsigned long TX_maxcol; + unsigned long TX_latecol; + unsigned long TX_urun; + unsigned long TX_crs; /* lost carrier sense */ + unsigned long TX_def; /* transmit deferred */ + unsigned long TX_singlecol; /* single collisions */ + unsigned long TX_multcol; + unsigned long TX_totcol; + unsigned long Rcv_good; + unsigned long Rcv_CRCerr; + unsigned long Rcv_alignerr; + unsigned long Rcv_reserr; /* rnr'd pkts */ + unsigned long Rcv_orun; + unsigned long Rcv_cdt; + unsigned long Rcv_runt; + unsigned long dump_status; /* last field directly from the chip */ +} NICSTAT, *P_NICSTAT; + +#define DUMP_DONE 0x0000A005 /* completed statistical dump */ +#define DUMP_CLEAR 0x0000A007 /* completed stat dump and clear counters */ + +static volatile int msgFlag; + +/* local function prototypes */ +static void ProcessOutboundI2OMsg (PPAB pPab, U32 phyMsgAddr); +static int FillI2OMsgSGLFromTCB (PU32 pMsg, PRCTCB pXmitCntrlBlock); +static int GetI2OStatus (PPAB pPab); +static int SendI2OOutboundQInitMsg (PPAB pPab); +static int SendEnableSysMsg (PPAB pPab); + +/* +** ========================================================================= +** RCInitI2OMsgLayer() +** +** Initialize the RedCreek I2O Module and adapter. +** +** Inputs: dev - the devices net_device struct +** TransmitCallbackFunction - address of transmit callback function +** ReceiveCallbackFunction - address of receive callback function +** +** private message block is allocated by user. It must be in locked pages. +** p_msgbuf and p_phymsgbuf point to the same location. Must be contigous +** memory block of a minimum of 16K byte and long word aligned. +** ========================================================================= +*/ +RC_RETURN +RCInitI2OMsgLayer (struct net_device *dev, + PFNTXCALLBACK TransmitCallbackFunction, + PFNRXCALLBACK ReceiveCallbackFunction, + PFNCALLBACK RebootCallbackFunction) +{ + int result; + PPAB pPab; + U32 pciBaseAddr = dev->base_addr; + PDPA pDpa = dev->priv; + PU8 p_msgbuf = pDpa->msgbuf; + PU8 p_phymsgbuf = (PU8) pDpa->msgbuf_dma; + + dprintk + ("InitI2O: Adapter:0x%04ux ATU:0x%08ulx msgbuf:%p phymsgbuf:0x%08ulx\n" + "TransmitCallbackFunction:0x%08ulx ReceiveCallbackFunction:0x%08ulx\n", + pDpa->id, pciBaseAddr, p_msgbuf, (u32) p_phymsgbuf, + (u32) TransmitCallbackFunction, (u32) ReceiveCallbackFunction); + + /* Check if this interface already initialized - if so, shut it down */ + if (pDpa->pPab != NULL) { + printk (KERN_WARNING + "(rcpci45 driver:) pDpa->pPab [%d] != NULL\n", + pDpa->id); +/* RCResetLANCard(pDpa->id, 0, (PU32)NULL, (PFNCALLBACK)NULL); */ + pDpa->pPab = NULL; + } + + /* store adapter instance values in adapter block. + * Adapter block is at beginning of message buffer */ + + pPab = kmalloc (sizeof (*pPab), GFP_KERNEL); + if (!pPab) { + printk (KERN_ERR + "(rcpci45 driver:) RCInitI2OMsgLayer: Could not allocate memory for PAB struct!\n"); + result = RC_RTN_MALLOC_ERROR; + goto err_out; + } + + memset (pPab, 0, sizeof (*pPab)); + pDpa->pPab = pPab; + pPab->p_atu = (PATU) pciBaseAddr; + pPab->pPci45LinBaseAddr = (PU8) pciBaseAddr; + + /* Set outbound message frame addr */ + pPab->outMsgBlockPhyAddr = (U32) p_phymsgbuf; + pPab->pLinOutMsgBlock = (PU8) p_msgbuf; + + /* store callback function addresses */ + pPab->pTransCallbackFunc = TransmitCallbackFunction; + pPab->pRecvCallbackFunc = ReceiveCallbackFunction; + pPab->pRebootCallbackFunc = RebootCallbackFunction; + pPab->pCallbackFunc = (PFNCALLBACK) NULL; + + /* + ** Initialize I2O IOP + */ + result = GetI2OStatus (pPab); + + if (result != RC_RTN_NO_ERROR) + goto err_out_dealloc; + + if (pPab->IOPState == I2O_IOP_STATE_OPERATIONAL) { + printk (KERN_INFO + "(rcpci45 driver:) pPab->IOPState == op: resetting adapter\n"); + RCResetLANCard (dev, 0, (PU32) NULL, (PFNCALLBACK) NULL); + } + + result = SendI2OOutboundQInitMsg (pPab); + + if (result != RC_RTN_NO_ERROR) + goto err_out_dealloc; + + result = SendEnableSysMsg (pPab); + + if (result != RC_RTN_NO_ERROR) + goto err_out_dealloc; + + return RC_RTN_NO_ERROR; + + err_out_dealloc: + kfree (pPab); + err_out: + return result; +} + +/* +** ========================================================================= +** Disable and Enable I2O interrupts. I2O interrupts are enabled at Init time +** but can be disabled and re-enabled through these two function calls. +** Packets will still be put into any posted received buffers and packets will +** be sent through RCI2OSendPacket() functions. Disabling I2O interrupts +** will prevent hardware interrupt to host even though the outbound I2O msg +** queue is not emtpy. +** ========================================================================= +*/ +#define i960_OUT_POST_Q_INT_BIT 0x0008 /* bit set masks interrupts */ + +RC_RETURN +RCDisableI2OInterrupts (struct net_device * dev) +{ + PPAB pPab = ((PDPA) dev->priv)->pPab; + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + pPab->p_atu->OutIntMask |= i960_OUT_POST_Q_INT_BIT; + + return RC_RTN_NO_ERROR; +} + +RC_RETURN +RCEnableI2OInterrupts (struct net_device * dev) +{ + PPAB pPab = ((PDPA) dev->priv)->pPab; + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + pPab->p_atu->OutIntMask &= ~i960_OUT_POST_Q_INT_BIT; + + return RC_RTN_NO_ERROR; + +} + +/* +** ========================================================================= +** RCI2OSendPacket() +** ========================================================================= +*/ +RC_RETURN +RCI2OSendPacket (struct net_device * dev, U32 InitiatorContext, + PRCTCB pTransCtrlBlock) +{ + U32 msgOffset; + PU32 pMsg; + int size; + PPAB pPab = ((PDPA) dev->priv)->pPab; + + dprintk ("RCI2OSendPacket()...\n"); + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + /* get Inbound free Q entry - reading from In Q gets free Q entry */ + /* offset to Msg Frame in PCI msg block */ + + msgOffset = pPab->p_atu->InQueue; + + if (msgOffset == 0xFFFFFFFF) { + dprintk ("RCI2OSendPacket(): Inbound Free Q empty!\n"); + return RC_RTN_FREE_Q_EMPTY; + } + + /* calc virtual address of msg - virtual already mapped to physical */ + pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset); + + size = FillI2OMsgSGLFromTCB (pMsg + 4, pTransCtrlBlock); + + if (size == -1) { /* error processing TCB - send NOP msg */ + dprintk ("RCI2OSendPacket(): Error Rrocess TCB!\n"); + pMsg[0] = THREE_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = + I2O_UTIL_NOP << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + return RC_RTN_TCB_ERROR; + } else { /* send over msg header */ + + pMsg[0] = (size + 4) << 16 | LAN_MSG_REQST; /* send over message size and flags */ + pMsg[1] = + I2O_LAN_PACKET_SEND << 24 | I2O_HOST_TID << 12 | + RC_LAN_TARGET_ID; + pMsg[2] = InitiatorContext; + pMsg[3] = 0; /* batch reply */ + /* post to Inbound Post Q */ + pPab->p_atu->InQueue = msgOffset; + return RC_RTN_NO_ERROR; + } +} + +/* +** ========================================================================= +** RCI2OPostRecvBuffer() +** +** inputs: pBufrCntrlBlock - pointer to buffer control block +** +** returns TRUE if successful in sending message, else FALSE. +** ========================================================================= +*/ +RC_RETURN +RCPostRecvBuffers (struct net_device * dev, PRCTCB pTransCtrlBlock) +{ + U32 msgOffset; + PU32 pMsg; + int size; + PPAB pPab = ((PDPA) dev->priv)->pPab; + + dprintk ("RCPostRecvBuffers()...\n"); + + /* search for DeviceHandle */ + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + /* get Inbound free Q entry - reading from In Q gets free Q entry */ + /* offset to Msg Frame in PCI msg block */ + msgOffset = pPab->p_atu->InQueue; + + if (msgOffset == 0xFFFFFFFF) { + dprintk ("RCPostRecvBuffers(): Inbound Free Q empty!\n"); + return RC_RTN_FREE_Q_EMPTY; + } + /* calc virtual address of msg - virtual already mapped to physical */ + pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset); + + size = FillI2OMsgSGLFromTCB (pMsg + 4, pTransCtrlBlock); + + if (size == -1) { /* error prcessing TCB - send 3 DWORD private msg == NOP */ + dprintk + ("RCPostRecvBuffers(): Error Processing TCB! size = %d\n", + size); + pMsg[0] = THREE_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = + I2O_UTIL_NOP << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + /* post to Post Q */ + pPab->p_atu->InQueue = msgOffset; + return RC_RTN_TCB_ERROR; + } else { /* send over size msg header */ + + pMsg[0] = (size + 4) << 16 | LAN_MSG_REQST; /* send over message size and flags */ + pMsg[1] = + I2O_LAN_RECEIVE_POST << 24 | I2O_HOST_TID << 12 | + RC_LAN_TARGET_ID; + pMsg[2] = DEFAULT_RECV_INIT_CONTEXT; + pMsg[3] = *(PU32) pTransCtrlBlock; /* number of packet buffers */ + /* post to Post Q */ + pPab->p_atu->InQueue = msgOffset; + return RC_RTN_NO_ERROR; + } +} + +/* +** ========================================================================= +** RCProcI2OMsgQ() +** +** Process I2O outbound message queue until empty. +** ========================================================================= +*/ +irqreturn_t +RCProcI2OMsgQ (struct net_device *dev) +{ + U32 phyAddrMsg; + PU8 p8Msg; + PU32 p32; + U16 count; + PPAB pPab = ((PDPA) dev->priv)->pPab; + unsigned char debug_msg[20]; + + if (pPab == NULL) + return IRQ_NONE; + + phyAddrMsg = pPab->p_atu->OutQueue; + + while (phyAddrMsg != 0xFFFFFFFF) { + p8Msg = + pPab->pLinOutMsgBlock + (phyAddrMsg - + pPab->outMsgBlockPhyAddr); + p32 = (PU32) p8Msg; + + dprintk ("msg: 0x%x 0x%x \n", p8Msg[7], p32[5]); + + /* Send Packet Reply Msg */ + if (I2O_LAN_PACKET_SEND == p8Msg[7]) { /* function code byte */ + count = *(PU16) (p8Msg + 2); + count -= p8Msg[0] >> 4; + /* status, count, context[], adapter */ + (*pPab->pTransCallbackFunc) (p8Msg[19], count, p32 + 5, + dev); + } else if (I2O_LAN_RECEIVE_POST == p8Msg[7]) { /* Receive Packet Reply Msg */ + dprintk + ("I2O_RECV_REPLY pPab:0x%08ulx p8Msg:0x%08ulx p32:0x%08ulx\n", + (u32) pPab, (u32) p8Msg, (u32) p32); + dprintk ("msg: 0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", + p32[0], p32[1], p32[2], p32[3]); + dprintk (" 0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", + p32[4], p32[5], p32[6], p32[7]); + dprintk (" 0x%08ulx:0X%08ulx:0x%08ulx:0x%08ulx\n", + p32[8], p32[9], p32[10], p32[11]); + /* status, count, buckets remaining, packetParmBlock, adapter */ + (*pPab->pRecvCallbackFunc) (p8Msg[19], p8Msg[12], + p32[5], p32 + 6, dev); + } else if (I2O_LAN_RESET == p8Msg[7] + || I2O_LAN_SHUTDOWN == p8Msg[7]) + if (pPab->pCallbackFunc) + (*pPab->pCallbackFunc) (p8Msg[19], 0, 0, dev); + else + pPab->pCallbackFunc = (PFNCALLBACK) 1; + else if (I2O_PRIVATE == p8Msg[7]) { + dprintk ("i2o private 0x%x, 0x%x \n", p8Msg[7], p32[5]); + switch (p32[5]) { + case RC_PRIVATE_DEBUG_MSG: + msgFlag = 1; + dprintk ("Received I2O_PRIVATE msg\n"); + debug_msg[15] = (p32[6] & 0xff000000) >> 24; + debug_msg[14] = (p32[6] & 0x00ff0000) >> 16; + debug_msg[13] = (p32[6] & 0x0000ff00) >> 8; + debug_msg[12] = (p32[6] & 0x000000ff); + + debug_msg[11] = (p32[7] & 0xff000000) >> 24; + debug_msg[10] = (p32[7] & 0x00ff0000) >> 16; + debug_msg[9] = (p32[7] & 0x0000ff00) >> 8; + debug_msg[8] = (p32[7] & 0x000000ff); + + debug_msg[7] = (p32[8] & 0xff000000) >> 24; + debug_msg[6] = (p32[8] & 0x00ff0000) >> 16; + debug_msg[5] = (p32[8] & 0x0000ff00) >> 8; + debug_msg[4] = (p32[8] & 0x000000ff); + + debug_msg[3] = (p32[9] & 0xff000000) >> 24; + debug_msg[2] = (p32[9] & 0x00ff0000) >> 16; + debug_msg[1] = (p32[9] & 0x0000ff00) >> 8; + debug_msg[0] = (p32[9] & 0x000000ff); + + debug_msg[16] = '\0'; + dprintk ("%s", debug_msg); + break; + case RC_PRIVATE_REBOOT: + dprintk ("Adapter reboot initiated...\n"); + if (pPab->pRebootCallbackFunc) + (*pPab->pRebootCallbackFunc) (0, 0, 0, + dev); + break; + default: + printk (KERN_WARNING + "(rcpci45 driver:) Unknown private I2O msg received: 0x%x\n", + p32[5]); + break; + } + } + + /* + ** Process other Msg's + */ + else + ProcessOutboundI2OMsg (pPab, phyAddrMsg); + + /* return MFA to outbound free Q */ + pPab->p_atu->OutQueue = phyAddrMsg; + + /* any more msgs? */ + phyAddrMsg = pPab->p_atu->OutQueue; + } + + return IRQ_HANDLED; +} + +/* +** ========================================================================= +** Returns LAN interface statistical counters to space provided by caller at +** StatsReturnAddr. Returns 0 if success, else RC_RETURN code. +** This function will call the WaitCallback function provided by +** user while waiting for card to respond. +** ========================================================================= +*/ +RC_RETURN +RCGetLinkStatistics (struct net_device *dev, + P_RCLINKSTATS StatsReturnAddr, + PFNWAITCALLBACK WaitCallback) +{ + U32 msgOffset; + volatile U32 timeout; + volatile PU32 pMsg; + volatile PU32 p32, pReturnAddr; + P_NICSTAT pStats; + int i; + PPAB pPab = ((PDPA) dev->priv)->pPab; + +/*dprintk("Get82558Stats() StatsReturnAddr:0x%08ulx\n", StatsReturnAddr); */ + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + msgOffset = pPab->p_atu->InQueue; + + if (msgOffset == 0xFFFFFFFF) { + dprintk ("Get8255XStats(): Inbound Free Q empty!\n"); + return RC_RTN_FREE_Q_EMPTY; + } + + /* calc virtual address of msg - virtual already mapped to physical */ + pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset); + +/*dprintk("Get82558Stats - pMsg = 0x%08ulx, InQ msgOffset = 0x%08ulx\n", pMsg, msgOffset);*/ +/*dprintk("Get82558Stats - pMsg = 0x%08X, InQ msgOffset = 0x%08X\n", pMsg, msgOffset);*/ + + pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = DEFAULT_RECV_INIT_CONTEXT; + pMsg[3] = 0x112; /* transaction context */ + pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_NIC_STATS; + pMsg[5] = pPab->outMsgBlockPhyAddr; + + p32 = (PU32) pPab->outMsgBlockPhyAddr; + pStats = (P_NICSTAT) pPab->pLinOutMsgBlock; + pStats->dump_status = 0xFFFFFFFF; + + /* post to Inbound Post Q */ + pPab->p_atu->InQueue = msgOffset; + + timeout = 100000; + while (1) { + if (WaitCallback) + (*WaitCallback) (); + + udelay (10); + + if (pStats->dump_status != 0xFFFFFFFF) + break; + + if (!timeout--) { + dprintk + ("RCGet82558Stats() Timeout waiting for NIC statistics\n"); + return RC_RTN_MSG_REPLY_TIMEOUT; + } + } + + pReturnAddr = (PU32) StatsReturnAddr; + + /* copy Nic stats to user's structure */ + for (i = 0; i < (int) sizeof (RCLINKSTATS) / 4; i++) + pReturnAddr[i] = p32[i]; + + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** Get82558LinkStatus() +** ========================================================================= +*/ +RC_RETURN +RCGetLinkStatus (struct net_device * dev, PU32 ReturnAddr, + PFNWAITCALLBACK WaitCallback) +{ + U32 msgOffset; + volatile U32 timeout; + volatile PU32 pMsg; + volatile PU32 p32; + PPAB pPab = ((PDPA) dev->priv)->pPab; + + dprintk ("Get82558LinkStatus() ReturnPhysAddr:0x%08ulx\n", + (u32) ReturnAddr); + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + msgOffset = pPab->p_atu->InQueue; + + if (msgOffset == 0xFFFFFFFF) { + dprintk ("Get82558LinkStatus(): Inbound Free Q empty!\n"); + return RC_RTN_FREE_Q_EMPTY; + } + + /* calc virtual address of msg - virtual already mapped to physical */ + pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset); +/*dprintk("Get82558LinkStatus - pMsg = 0x%08ulx, InQ msgOffset = 0x%08ulx\n", pMsg, msgOffset);*/ +/*dprintk("Get82558LinkStatus - pMsg = 0x%08X, InQ msgOffset = 0x%08X\n", pMsg, msgOffset);*/ + + pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = DEFAULT_RECV_INIT_CONTEXT; + pMsg[3] = 0x112; /* transaction context */ + pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_LINK_STATUS; + pMsg[5] = pPab->outMsgBlockPhyAddr; + + p32 = (PU32) pPab->pLinOutMsgBlock; + *p32 = 0xFFFFFFFF; + + /* post to Inbound Post Q */ + pPab->p_atu->InQueue = msgOffset; + + timeout = 100000; + while (1) { + if (WaitCallback) + (*WaitCallback) (); + + udelay (10); + + if (*p32 != 0xFFFFFFFF) + break; + + if (!timeout--) { + dprintk ("Timeout waiting for link status\n"); + return RC_RTN_MSG_REPLY_TIMEOUT; + } + } + + *ReturnAddr = *p32; /* 1 = up 0 = down */ + + return RC_RTN_NO_ERROR; + +} + +/* +** ========================================================================= +** RCGetMAC() +** +** get the MAC address the adapter is listening for in non-promiscous mode. +** MAC address is in media format. +** ========================================================================= +*/ +RC_RETURN +RCGetMAC (struct net_device * dev, PFNWAITCALLBACK WaitCallback) +{ + unsigned timeout; + U32 off; + PU8 mac = dev->dev_addr; + PU32 p; + U32 temp[2]; + PPAB pPab = ((PDPA) dev->priv)->pPab; + PATU p_atu; + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + p_atu = pPab->p_atu; + + p_atu->EtherMacLow = 0; /* first zero return data */ + p_atu->EtherMacHi = 0; + + off = p_atu->InQueue; /* get addresss of message */ + + if (0xFFFFFFFF == off) + return RC_RTN_FREE_Q_EMPTY; + + p = (PU32) (pPab->pPci45LinBaseAddr + off); + + dprintk ("RCGetMAC: p_atu 0x%08x, off 0x%08x, p 0x%08x\n", + (uint) p_atu, (uint) off, (uint) p); + /* setup private message */ + p[0] = FIVE_WORD_MSG_SIZE | SGL_OFFSET_0; + p[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + p[2] = 0; /* initiator context */ + p[3] = 0x218; /* transaction context */ + p[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_MAC_ADDR; + + p_atu->InQueue = off; /* send it to the I2O device */ + dprintk ("RCGetMAC: p_atu 0x%08x, off 0x%08x, p 0x%08x\n", + (uint) p_atu, (uint) off, (uint) p); + + /* wait for the rcpci45 board to update the info */ + timeout = 1000000; + while (0 == p_atu->EtherMacLow) { + if (WaitCallback) + (*WaitCallback) (); + + udelay (10); + + if (!timeout--) { + printk ("rc_getmac: Timeout\n"); + return RC_RTN_MSG_REPLY_TIMEOUT; + } + } + + /* read the mac address */ + temp[0] = p_atu->EtherMacLow; + temp[1] = p_atu->EtherMacHi; + memcpy ((char *) mac, (char *) temp, 6); + + dprintk ("rc_getmac: 0x%x\n", (u32) mac); + + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** RCSetMAC() +** +** set MAC address the adapter is listening for in non-promiscous mode. +** MAC address is in media format. +** ========================================================================= +*/ +RC_RETURN +RCSetMAC (struct net_device * dev, PU8 mac) +{ + U32 off; + PU32 pMsg; + PPAB pPab = ((PDPA) dev->priv)->pPab; + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + off = pPab->p_atu->InQueue; /* get addresss of message */ + + if (0xFFFFFFFF == off) + return RC_RTN_FREE_Q_EMPTY; + + pMsg = (PU32) (pPab->pPci45LinBaseAddr + off); + + /* setup private message */ + pMsg[0] = SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = 0; /* initiator context */ + pMsg[3] = 0x219; /* transaction context */ + pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_SET_MAC_ADDR; + pMsg[5] = *(unsigned *) mac; /* first four bytes */ + pMsg[6] = *(unsigned *) (mac + 4); /* last two bytes */ + + pPab->p_atu->InQueue = off; /* send it to the I2O device */ + + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** RCSetLinkSpeed() +** +** set ethernet link speed. +** input: speedControl - determines action to take as follows +** 0 = reset and auto-negotiate (NWay) +** 1 = Full Duplex 100BaseT +** 2 = Half duplex 100BaseT +** 3 = Full Duplex 10BaseT +** 4 = Half duplex 10BaseT +** all other values are ignore (do nothing) +** ========================================================================= +*/ +RC_RETURN +RCSetLinkSpeed (struct net_device * dev, U16 LinkSpeedCode) +{ + U32 off; + PU32 pMsg; + PPAB pPab = ((PDPA) dev->priv)->pPab; + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + off = pPab->p_atu->InQueue; /* get addresss of message */ + + if (0xFFFFFFFF == off) + return RC_RTN_FREE_Q_EMPTY; + + pMsg = (PU32) (pPab->pPci45LinBaseAddr + off); + + /* setup private message */ + pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = 0; /* initiator context */ + pMsg[3] = 0x219; /* transaction context */ + pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_SET_LINK_SPEED; + pMsg[5] = LinkSpeedCode; /* link speed code */ + + pPab->p_atu->InQueue = off; /* send it to the I2O device */ + + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** RCSetPromiscuousMode() +** +** Defined values for Mode: +** 0 - turn off promiscuous mode +** 1 - turn on promiscuous mode +** +** ========================================================================= +*/ +RC_RETURN +RCSetPromiscuousMode (struct net_device * dev, U16 Mode) +{ + U32 off; + PU32 pMsg; + PPAB pPab = ((PDPA) dev->priv)->pPab; + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + off = pPab->p_atu->InQueue; /* get addresss of message */ + + if (0xFFFFFFFF == off) + return RC_RTN_FREE_Q_EMPTY; + + pMsg = (PU32) (pPab->pPci45LinBaseAddr + off); + + /* setup private message */ + pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = 0; /* initiator context */ + pMsg[3] = 0x219; /* transaction context */ + pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_SET_PROMISCUOUS_MODE; + pMsg[5] = Mode; /* promiscuous mode setting */ + + pPab->p_atu->InQueue = off; /* send it to the device */ + + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** RCGetPromiscuousMode() +** +** get promiscuous mode setting +** +** Possible return values placed in pMode: +** 0 = promisuous mode not set +** 1 = promisuous mode is set +** +** ========================================================================= +*/ +RC_RETURN +RCGetPromiscuousMode (struct net_device * dev, PU32 pMode, + PFNWAITCALLBACK WaitCallback) +{ + U32 msgOffset, timeout; + PU32 pMsg; + volatile PU32 p32; + PPAB pPab = ((PDPA) dev->priv)->pPab; + + msgOffset = pPab->p_atu->InQueue; + + if (msgOffset == 0xFFFFFFFF) { + printk (KERN_WARNING + "(rcpci45 driver:) RCGetLinkSpeed(): Inbound Free Q empty!\n"); + return RC_RTN_FREE_Q_EMPTY; + } + + /* calc virtual address of msg - virtual already mapped to physical */ + pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset); + + /* virtual pointer to return buffer - clear first two dwords */ + p32 = (volatile PU32) pPab->pLinOutMsgBlock; + p32[0] = 0xff; + + /* setup private message */ + pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = 0; /* initiator context */ + pMsg[3] = 0x219; /* transaction context */ + pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_PROMISCUOUS_MODE; + /* phys address to return status - area right after PAB */ + pMsg[5] = pPab->outMsgBlockPhyAddr; + + /* post to Inbound Post Q */ + + pPab->p_atu->InQueue = msgOffset; + + /* wait for response */ + timeout = 1000000; + while (1) { + if (WaitCallback) + (*WaitCallback) (); + + udelay (10); /* please don't hog the bus!!! */ + + if (p32[0] != 0xff) + break; + + if (!timeout--) { + dprintk + ("Timeout waiting for promiscuous mode from adapter\n"); + dprintk ("0x%8x\n", p32[0]); + return RC_RTN_NO_LINK_SPEED; + } + } + + /* get mode */ + *pMode = (U8) ((volatile PU8) p32)[0] & 0x0f; + + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** RCSetBroadcastMode() +** +** Defined values for Mode: +** 0 - turn off promiscuous mode +** 1 - turn on promiscuous mode +** +** ========================================================================= +*/ +RC_RETURN +RCSetBroadcastMode (struct net_device * dev, U16 Mode) +{ + U32 off; + PU32 pMsg; + PPAB pPab = ((PDPA) dev->priv)->pPab; + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + off = pPab->p_atu->InQueue; /* get addresss of message */ + + if (0xFFFFFFFF == off) + return RC_RTN_FREE_Q_EMPTY; + + pMsg = (PU32) (pPab->pPci45LinBaseAddr + off); + + /* setup private message */ + pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = 0; /* initiator context */ + pMsg[3] = 0x219; /* transaction context */ + pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_SET_BROADCAST_MODE; + pMsg[5] = Mode; /* promiscuous mode setting */ + + pPab->p_atu->InQueue = off; /* send it to the device */ + + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** RCGetBroadcastMode() +** +** get promiscuous mode setting +** +** Possible return values placed in pMode: +** 0 = promisuous mode not set +** 1 = promisuous mode is set +** +** ========================================================================= +*/ +RC_RETURN +RCGetBroadcastMode (struct net_device * dev, PU32 pMode, + PFNWAITCALLBACK WaitCallback) +{ + U32 msgOffset, timeout; + PU32 pMsg; + volatile PU32 p32; + PPAB pPab = ((PDPA) dev->priv)->pPab; + + msgOffset = pPab->p_atu->InQueue; + + if (msgOffset == 0xFFFFFFFF) { + printk (KERN_WARNING + "(rcpci45 driver:) RCGetLinkSpeed(): Inbound Free Q empty!\n"); + return RC_RTN_FREE_Q_EMPTY; + } + + /* calc virtual address of msg - virtual already mapped to physical */ + pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset); + + /* virtual pointer to return buffer - clear first two dwords */ + p32 = (volatile PU32) pPab->pLinOutMsgBlock; + p32[0] = 0xff; + + /* setup private message */ + pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = 0; /* initiator context */ + pMsg[3] = 0x219; /* transaction context */ + pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_BROADCAST_MODE; + /* phys address to return status - area right after PAB */ + pMsg[5] = pPab->outMsgBlockPhyAddr; + + /* post to Inbound Post Q */ + + pPab->p_atu->InQueue = msgOffset; + + /* wait for response */ + timeout = 1000000; + while (1) { + if (WaitCallback) + (*WaitCallback) (); + + udelay (10); /* please don't hog the bus!!! */ + + if (p32[0] != 0xff) + break; + + if (!timeout--) { + printk (KERN_WARNING + "(rcpci45 driver:) Timeout waiting for promiscuous mode from adapter\n"); + printk (KERN_WARNING "(rcpci45 driver:) 0x%8x\n", + p32[0]); + return RC_RTN_NO_LINK_SPEED; + } + } + + /* get mode */ + *pMode = (U8) ((volatile PU8) p32)[0] & 0x0f; + + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** RCGetLinkSpeed() +** +** get ethernet link speed. +** +** 0 = Unknown +** 1 = Full Duplex 100BaseT +** 2 = Half duplex 100BaseT +** 3 = Full Duplex 10BaseT +** 4 = Half duplex 10BaseT +** +** ========================================================================= +*/ +RC_RETURN +RCGetLinkSpeed (struct net_device * dev, PU32 pLinkSpeedCode, + PFNWAITCALLBACK WaitCallback) +{ + U32 msgOffset, timeout; + PU32 pMsg; + volatile PU32 p32; + U8 IOPLinkSpeed; + PPAB pPab = ((PDPA) dev->priv)->pPab; + + msgOffset = pPab->p_atu->InQueue; + + if (msgOffset == 0xFFFFFFFF) { + printk (KERN_WARNING + "(rcpci45 driver:) RCGetLinkSpeed(): Inbound Free Q empty!\n"); + return RC_RTN_FREE_Q_EMPTY; + } + + /* calc virtual address of msg - virtual already mapped to physical */ + pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset); + + /* virtual pointer to return buffer - clear first two dwords */ + p32 = (volatile PU32) pPab->pLinOutMsgBlock; + p32[0] = 0xff; + + /* setup private message */ + pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = 0; /* initiator context */ + pMsg[3] = 0x219; /* transaction context */ + pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_LINK_SPEED; + /* phys address to return status - area right after PAB */ + pMsg[5] = pPab->outMsgBlockPhyAddr; + + /* post to Inbound Post Q */ + + pPab->p_atu->InQueue = msgOffset; + + /* wait for response */ + timeout = 1000000; + while (1) { + if (WaitCallback) + (*WaitCallback) (); + + udelay (10); /* please don't hog the bus!!! */ + + if (p32[0] != 0xff) + break; + + if (!timeout--) { + dprintk ("Timeout waiting for link speed from IOP\n"); + dprintk ("0x%8x\n", p32[0]); + return RC_RTN_NO_LINK_SPEED; + } + } + + /* get Link speed */ + IOPLinkSpeed = (U8) ((volatile PU8) p32)[0] & 0x0f; + + *pLinkSpeedCode = IOPLinkSpeed; + + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** RCReportDriverCapability(struct net_device *dev, U32 capability) +** +** Currently defined bits: +** WARM_REBOOT_CAPABLE 0x01 +** +** ========================================================================= +*/ +RC_RETURN +RCReportDriverCapability (struct net_device * dev, U32 capability) +{ + U32 off; + PU32 pMsg; + PPAB pPab = ((PDPA) dev->priv)->pPab; + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + off = pPab->p_atu->InQueue; /* get addresss of message */ + + if (0xFFFFFFFF == off) + return RC_RTN_FREE_Q_EMPTY; + + pMsg = (PU32) (pPab->pPci45LinBaseAddr + off); + + /* setup private message */ + pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = 0; /* initiator context */ + pMsg[3] = 0x219; /* transaction context */ + pMsg[4] = + RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_REPORT_DRIVER_CAPABILITY; + pMsg[5] = capability; + + pPab->p_atu->InQueue = off; /* send it to the I2O device */ + + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** RCGetFirmwareVer() +** +** Return firmware version in the form "SoftwareVersion : Bt BootVersion" +** +** ========================================================================= +*/ +RC_RETURN +RCGetFirmwareVer (struct net_device * dev, PU8 pFirmString, + PFNWAITCALLBACK WaitCallback) +{ + U32 msgOffset, timeout; + PU32 pMsg; + volatile PU32 p32; + PPAB pPab = ((PDPA) dev->priv)->pPab; + + msgOffset = pPab->p_atu->InQueue; + if (msgOffset == 0xFFFFFFFF) { + dprintk ("RCGetFirmwareVer(): Inbound Free Q empty!\n"); + return RC_RTN_FREE_Q_EMPTY; + } + + /* calc virtual address of msg - virtual already mapped to physical */ + pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset); + + /* virtual pointer to return buffer - clear first two dwords */ + p32 = (volatile PU32) pPab->pLinOutMsgBlock; + p32[0] = 0xff; + + /* setup private message */ + pMsg[0] = SIX_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = 0; /* initiator context */ + pMsg[3] = 0x219; /* transaction context */ + pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_FIRMWARE_REV; + /* phys address to return status - area right after PAB */ + pMsg[5] = pPab->outMsgBlockPhyAddr; + + /* post to Inbound Post Q */ + + pPab->p_atu->InQueue = msgOffset; + + /* wait for response */ + timeout = 1000000; + while (1) { + if (WaitCallback) + (*WaitCallback) (); + + udelay (10); /* please don't hog the bus!!! */ + + if (p32[0] != 0xff) + break; + + if (!timeout--) { + dprintk ("Timeout waiting for link speed from IOP\n"); + return RC_RTN_NO_FIRM_VER; + } + } + + strcpy (pFirmString, (PU8) p32); + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** RCResetLANCard() +** +** ResourceFlags indicates whether to return buffer resource explicitly +** to host or keep and reuse. +** CallbackFunction (if not NULL) is the function to be called when +** reset is complete. +** If CallbackFunction is NULL, ReturnAddr will have a 1 placed in it when +** reset is done (if not NULL). +** +** ========================================================================= +*/ +RC_RETURN +RCResetLANCard (struct net_device * dev, U16 ResourceFlags, PU32 ReturnAddr, + PFNCALLBACK CallbackFunction) +{ + unsigned long off; + PU32 pMsg; + PPAB pPab = ((PDPA) dev->priv)->pPab; + long timeout = 0; + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + off = pPab->p_atu->InQueue; /* get addresss of message */ + + if (0xFFFFFFFF == off) + return RC_RTN_FREE_Q_EMPTY; + + pPab->pCallbackFunc = CallbackFunction; + + pMsg = (PU32) (pPab->pPci45LinBaseAddr + off); + + /* setup message */ + pMsg[0] = FOUR_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_LAN_RESET << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = DEFAULT_RECV_INIT_CONTEXT; + pMsg[3] = ResourceFlags << 16; /* resource flags */ + + pPab->p_atu->InQueue = off; /* send it to the I2O device */ + + if (CallbackFunction == (PFNCALLBACK) NULL) { + /* call RCProcI2OMsgQ() until something in pPab->pCallbackFunc + or until timer goes off */ + while (pPab->pCallbackFunc == (PFNCALLBACK) NULL) { + RCProcI2OMsgQ (dev); + udelay (1000); /* please don't hog the bus!!! */ + timeout++; + if (timeout > 10000) { + break; + } + } + if (ReturnAddr != (PU32) NULL) + *ReturnAddr = (U32) pPab->pCallbackFunc; + } + + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** RCResetIOP() +** +** Send StatusGet Msg, wait for results return directly to buffer. +** +** ========================================================================= +*/ +RC_RETURN +RCResetIOP (struct net_device * dev) +{ + U32 msgOffset, timeout; + PU32 pMsg; + PPAB pPab = ((PDPA) dev->priv)->pPab; + volatile PU32 p32; + + msgOffset = pPab->p_atu->InQueue; + + if (msgOffset == 0xFFFFFFFF) { + return RC_RTN_FREE_Q_EMPTY; + } + + /* calc virtual address of msg - virtual already mapped to physical */ + pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset); + + pMsg[0] = NINE_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_EXEC_IOP_RESET << 24 | I2O_HOST_TID << 12 | I2O_IOP_TID; + pMsg[2] = 0; /* universal context */ + pMsg[3] = 0; /* universal context */ + pMsg[4] = 0; /* universal context */ + pMsg[5] = 0; /* universal context */ + /* phys address to return status - area right after PAB */ + pMsg[6] = pPab->outMsgBlockPhyAddr; + pMsg[7] = 0; + pMsg[8] = 1; /* return 1 byte */ + + /* virtual pointer to return buffer - clear first two dwords */ + p32 = (volatile PU32) pPab->pLinOutMsgBlock; + p32[0] = 0; + p32[1] = 0; + + /* post to Inbound Post Q */ + + pPab->p_atu->InQueue = msgOffset; + + /* wait for response */ + timeout = 1000000; + while (1) { + udelay (10); /* please don't hog the bus!!! */ + + if (p32[0] || p32[1]) + break; + + if (!timeout--) { + dprintk ("RCResetIOP timeout\n"); + return RC_RTN_MSG_REPLY_TIMEOUT; + } + } + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** RCShutdownLANCard() +** +** ResourceFlags indicates whether to return buffer resource explicitly +** to host or keep and reuse. +** CallbackFunction (if not NULL) is the function to be called when +** shutdown is complete. +** If CallbackFunction is NULL, ReturnAddr will have a 1 placed in it when +** shutdown is done (if not NULL). +** +** ========================================================================= +*/ +RC_RETURN +RCShutdownLANCard (struct net_device * dev, U16 ResourceFlags, + PU32 ReturnAddr, PFNCALLBACK CallbackFunction) +{ + volatile PU32 pMsg; + U32 off; + PPAB pPab = ((PDPA) dev->priv)->pPab; + long timeout = 0; + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + off = pPab->p_atu->InQueue; /* get addresss of message */ + + if (0xFFFFFFFF == off) + return RC_RTN_FREE_Q_EMPTY; + + pPab->pCallbackFunc = CallbackFunction; + + pMsg = (PU32) (pPab->pPci45LinBaseAddr + off); + + /* setup message */ + pMsg[0] = FOUR_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = + I2O_LAN_SHUTDOWN << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = DEFAULT_RECV_INIT_CONTEXT; + pMsg[3] = ResourceFlags << 16; /* resource flags */ + + pPab->p_atu->InQueue = off; /* send it to the I2O device */ + + if (CallbackFunction == (PFNCALLBACK) NULL) { + /* call RCProcI2OMsgQ() until something in pPab->pCallbackFunc + or until timer goes off */ + while (pPab->pCallbackFunc == (PFNCALLBACK) NULL) { + RCProcI2OMsgQ (dev); + udelay (1000); /* please don't hog the bus!!! */ + timeout++; + if (timeout > 10000) { + printk (KERN_WARNING + "(rcpci45 driver:) RCShutdownLANCard(): timeout\n"); + break; + } + } + if (ReturnAddr != (PU32) NULL) + *ReturnAddr = (U32) pPab->pCallbackFunc; + } + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** RCSetRavlinIPandMask() +** +** Set the Ravlin 45/PCI cards IP address and network mask. +** +** IP address and mask must be in network byte order. +** For example, IP address 1.2.3.4 and mask 255.255.255.0 would be +** 0x04030201 and 0x00FFFFFF on a little endian machine. +** +** ========================================================================= +*/ +RC_RETURN +RCSetRavlinIPandMask (struct net_device * dev, U32 ipAddr, U32 netMask) +{ + volatile PU32 pMsg; + U32 off; + PPAB pPab = ((PDPA) dev->priv)->pPab; + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + off = pPab->p_atu->InQueue; /* get addresss of message */ + + if (0xFFFFFFFF == off) + return RC_RTN_FREE_Q_EMPTY; + + pMsg = (PU32) (pPab->pPci45LinBaseAddr + off); + + /* setup private message */ + pMsg[0] = SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = 0; /* initiator context */ + pMsg[3] = 0x219; /* transaction context */ + pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_SET_IP_AND_MASK; + pMsg[5] = ipAddr; + pMsg[6] = netMask; + + pPab->p_atu->InQueue = off; /* send it to the I2O device */ + return RC_RTN_NO_ERROR; + +} + +/* +** ========================================================================= +** RCGetRavlinIPandMask() +** +** get the IP address and MASK from the card +** +** ========================================================================= +*/ +RC_RETURN +RCGetRavlinIPandMask (struct net_device * dev, PU32 pIpAddr, PU32 pNetMask, + PFNWAITCALLBACK WaitCallback) +{ + unsigned timeout; + U32 off; + PU32 pMsg, p32; + PPAB pPab = ((PDPA) dev->priv)->pPab; + PATU p_atu; + + dprintk + ("RCGetRavlinIPandMask: pIpAddr is 0x%08ulx, *IpAddr is 0x%08ulx\n", + (u32) pIpAddr, *pIpAddr); + + if (pPab == NULL) + return RC_RTN_ADPTR_NOT_REGISTERED; + + p_atu = pPab->p_atu; + off = p_atu->InQueue; /* get addresss of message */ + + if (0xFFFFFFFF == off) + return RC_RTN_FREE_Q_EMPTY; + + p32 = (volatile PU32) pPab->pLinOutMsgBlock; + *p32 = 0xFFFFFFFF; + + pMsg = (PU32) (pPab->pPci45LinBaseAddr + off); + + dprintk + ("RCGetRavlinIPandMask: p_atu 0x%08ulx, off 0x%08ulx, p32 0x%08ulx\n", + (u32) p_atu, off, (u32) p32); + /* setup private message */ + pMsg[0] = FIVE_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_PRIVATE << 24 | I2O_HOST_TID << 12 | RC_LAN_TARGET_ID; + pMsg[2] = 0; /* initiator context */ + pMsg[3] = 0x218; /* transaction context */ + pMsg[4] = RC_PCI45_VENDOR_ID << 16 | RC_PRIVATE_GET_IP_AND_MASK; + pMsg[5] = pPab->outMsgBlockPhyAddr; + + p_atu->InQueue = off; /* send it to the I2O device */ + dprintk + ("RCGetRavlinIPandMask: p_atu 0x%08ulx, off 0x%08ulx, p32 0x%08ulx\n", + (u32) p_atu, off, (u32) p32); + + /* wait for the rcpci45 board to update the info */ + timeout = 100000; + while (0xffffffff == *p32) { + if (WaitCallback) + (*WaitCallback) (); + + udelay (10); + + if (!timeout--) { + dprintk ("RCGetRavlinIPandMask: Timeout\n"); + return RC_RTN_MSG_REPLY_TIMEOUT; + } + } + + dprintk + ("RCGetRavlinIPandMask: after time out\np32[0] (IpAddr) 0x%08ulx, p32[1] (IPmask) 0x%08ulx\n", + p32[0], p32[1]); + + /* send IP and mask to user's space */ + *pIpAddr = p32[0]; + *pNetMask = p32[1]; + + dprintk + ("RCGetRavlinIPandMask: pIpAddr is 0x%08ulx, *IpAddr is 0x%08ulx\n", + (u32) pIpAddr, *pIpAddr); + + return RC_RTN_NO_ERROR; +} + +/* +** ///////////////////////////////////////////////////////////////////////// +** ///////////////////////////////////////////////////////////////////////// +** +** local functions +** +** ///////////////////////////////////////////////////////////////////////// +** ///////////////////////////////////////////////////////////////////////// +*/ + +/* +** ========================================================================= +** SendI2OOutboundQInitMsg() +** +** ========================================================================= +*/ +static int +SendI2OOutboundQInitMsg (PPAB pPab) +{ + U32 msgOffset, timeout, phyOutQFrames, i; + volatile PU32 pMsg; + volatile PU32 p32; + + msgOffset = pPab->p_atu->InQueue; + + if (msgOffset == 0xFFFFFFFF) { + dprintk ("SendI2OOutboundQInitMsg(): Inbound Free Q empty!\n"); + return RC_RTN_FREE_Q_EMPTY; + } + + /* calc virtual address of msg - virtual already mapped to physical */ + pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset); + + dprintk + ("SendI2OOutboundQInitMsg - pMsg = 0x%08ulx, InQ msgOffset = 0x%08ulx\n", + (u32) pMsg, msgOffset); + + pMsg[0] = EIGHT_WORD_MSG_SIZE | TRL_OFFSET_6; + pMsg[1] = + I2O_EXEC_OUTBOUND_INIT << 24 | I2O_HOST_TID << 12 | I2O_IOP_TID; + pMsg[2] = DEFAULT_RECV_INIT_CONTEXT; + pMsg[3] = 0x106; /* transaction context */ + pMsg[4] = 4096; /* Host page frame size */ + pMsg[5] = MSG_FRAME_SIZE << 16 | 0x80; /* outbound msg frame size and Initcode */ + pMsg[6] = 0xD0000004; /* simple sgl element LE, EOB */ + /* phys address to return status - area right after PAB */ + pMsg[7] = pPab->outMsgBlockPhyAddr; + + /* virtual pointer to return buffer - clear first two dwords */ + p32 = (PU32) pPab->pLinOutMsgBlock; + p32[0] = 0; + + /* post to Inbound Post Q */ + pPab->p_atu->InQueue = msgOffset; + + /* wait for response */ + timeout = 100000; + while (1) { + udelay (10); /* please don't hog the bus!!! */ + + if (p32[0]) + break; + + if (!timeout--) { + dprintk + ("Timeout wait for InitOutQ InPrgress status from IOP\n"); + return RC_RTN_NO_I2O_STATUS; + } + } + + timeout = 100000; + while (1) { + udelay (10); /* please don't hog the bus!!! */ + + if (p32[0] == I2O_EXEC_OUTBOUND_INIT_COMPLETE) + break; + + if (!timeout--) { + dprintk + ("Timeout wait for InitOutQ Complete status from IOP\n"); + return RC_RTN_NO_I2O_STATUS; + } + } + + /* load PCI outbound free Q with MF physical addresses */ + phyOutQFrames = pPab->outMsgBlockPhyAddr; + + for (i = 0; i < NMBR_MSG_FRAMES; i++) { + pPab->p_atu->OutQueue = phyOutQFrames; + phyOutQFrames += MSG_FRAME_SIZE; + } + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** GetI2OStatus() +** +** Send StatusGet Msg, wait for results return directly to buffer. +** +** ========================================================================= +*/ +static int +GetI2OStatus (PPAB pPab) +{ + U32 msgOffset, timeout; + PU32 pMsg; + volatile PU32 p32; + + msgOffset = pPab->p_atu->InQueue; + dprintk ("GetI2OStatus: msg offset = 0x%x\n", msgOffset); + if (msgOffset == 0xFFFFFFFF) { + dprintk ("GetI2OStatus(): Inbound Free Q empty!\n"); + return RC_RTN_FREE_Q_EMPTY; + } + + /* calc virtual address of msg - virtual already mapped to physical */ + pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset); + + pMsg[0] = NINE_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_EXEC_STATUS_GET << 24 | I2O_HOST_TID << 12 | I2O_IOP_TID; + pMsg[2] = 0; /* universal context */ + pMsg[3] = 0; /* universal context */ + pMsg[4] = 0; /* universal context */ + pMsg[5] = 0; /* universal context */ + /* phys address to return status - area right after PAB */ + pMsg[6] = pPab->outMsgBlockPhyAddr; + pMsg[7] = 0; + pMsg[8] = 88; /* return 88 bytes */ + + /* virtual pointer to return buffer - clear first two dwords */ + p32 = (volatile PU32) pPab->pLinOutMsgBlock; + p32[0] = 0; + p32[1] = 0; + + dprintk + ("GetI2OStatus - pMsg:0x%08ulx, msgOffset:0x%08ulx, [1]:0x%08ulx, [6]:0x%08ulx\n", + (u32) pMsg, msgOffset, pMsg[1], pMsg[6]); + + /* post to Inbound Post Q */ + pPab->p_atu->InQueue = msgOffset; + + dprintk ("Return status to p32 = 0x%08ulx\n", (u32) p32); + + /* wait for response */ + timeout = 1000000; + while (1) { + udelay (10); /* please don't hog the bus!!! */ + + if (p32[0] && p32[1]) + break; + + if (!timeout--) { + dprintk ("Timeout waiting for status from IOP\n"); + dprintk ("0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", + p32[0], p32[1], p32[2], p32[3]); + dprintk ("0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", + p32[4], p32[5], p32[6], p32[7]); + dprintk ("0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", + p32[8], p32[9], p32[10], p32[11]); + return RC_RTN_NO_I2O_STATUS; + } + } + + dprintk ("0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", p32[0], p32[1], + p32[2], p32[3]); + dprintk ("0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", p32[4], p32[5], + p32[6], p32[7]); + dprintk ("0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", p32[8], p32[9], + p32[10], p32[11]); + /* get IOP state */ + pPab->IOPState = ((volatile PU8) p32)[10]; + pPab->InboundMFrameSize = ((volatile PU16) p32)[6]; + + dprintk ("IOP state 0x%02x InFrameSize = 0x%04x\n", + pPab->IOPState, pPab->InboundMFrameSize); + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** SendEnableSysMsg() +** +** +** ========================================================================= +*/ +static int +SendEnableSysMsg (PPAB pPab) +{ + U32 msgOffset; + volatile PU32 pMsg; + + msgOffset = pPab->p_atu->InQueue; + + if (msgOffset == 0xFFFFFFFF) { + dprintk ("SendEnableSysMsg(): Inbound Free Q empty!\n"); + return RC_RTN_FREE_Q_EMPTY; + } + + /* calc virtual address of msg - virtual already mapped to physical */ + pMsg = (PU32) (pPab->pPci45LinBaseAddr + msgOffset); + + dprintk + ("SendEnableSysMsg - pMsg = 0x%08ulx, InQ msgOffset = 0x%08ulx\n", + (u32) pMsg, msgOffset); + + pMsg[0] = FOUR_WORD_MSG_SIZE | SGL_OFFSET_0; + pMsg[1] = I2O_EXEC_SYS_ENABLE << 24 | I2O_HOST_TID << 12 | I2O_IOP_TID; + pMsg[2] = DEFAULT_RECV_INIT_CONTEXT; + pMsg[3] = 0x110; /* transaction context */ + pMsg[4] = 0x50657465; /* RedCreek Private */ + + /* post to Inbound Post Q */ + pPab->p_atu->InQueue = msgOffset; + + return RC_RTN_NO_ERROR; +} + +/* +** ========================================================================= +** FillI2OMsgFromTCB() +** +** inputs pMsgU32 - virtual pointer (mapped to physical) of message frame +** pXmitCntrlBlock - pointer to caller buffer control block. +** +** fills in LAN SGL after Transaction Control Word or Bucket Count. +** ========================================================================= +*/ +static int +FillI2OMsgSGLFromTCB (PU32 pMsgFrame, PRCTCB pTransCtrlBlock) +{ + unsigned int nmbrBuffers, nmbrSeg, nmbrDwords, context, flags; + PU32 pTCB, pMsg; + + /* SGL element flags */ +#define EOB 0x40000000 +#define LE 0x80000000 +#define SIMPLE_SGL 0x10000000 +#define BC_PRESENT 0x01000000 + + pTCB = (PU32) pTransCtrlBlock; + pMsg = pMsgFrame; + nmbrDwords = 0; + + dprintk ("FillI2OMsgSGLFromTCBX\n"); + dprintk ("TCB 0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", + pTCB[0], pTCB[1], pTCB[2], pTCB[3], pTCB[4]); + dprintk ("pTCB 0x%08ulx, pMsg 0x%08ulx\n", (u32) pTCB, (u32) pMsg); + + nmbrBuffers = *pTCB++; + + if (!nmbrBuffers) { + return -1; + } + + do { + context = *pTCB++; /* buffer tag (context) */ + nmbrSeg = *pTCB++; /* number of segments */ + + if (!nmbrSeg) { + return -1; + } + + flags = SIMPLE_SGL | BC_PRESENT; + + if (1 == nmbrSeg) { + flags |= EOB; + + if (1 == nmbrBuffers) + flags |= LE; + } + + /* 1st SGL buffer element has context */ + pMsg[0] = pTCB[0] | flags; /* send over count (segment size) */ + pMsg[1] = context; + pMsg[2] = pTCB[1]; /* send buffer segment physical address */ + nmbrDwords += 3; + pMsg += 3; + pTCB += 2; + + if (--nmbrSeg) { + do { + flags = SIMPLE_SGL; + + if (1 == nmbrSeg) { + flags |= EOB; + + if (1 == nmbrBuffers) + flags |= LE; + } + + pMsg[0] = pTCB[0] | flags; /* send over count */ + pMsg[1] = pTCB[1]; /* send buffer segment physical address */ + nmbrDwords += 2; + pTCB += 2; + pMsg += 2; + + } while (--nmbrSeg); + } + + } while (--nmbrBuffers); + + return nmbrDwords; +} + +/* +** ========================================================================= +** ProcessOutboundI2OMsg() +** +** process I2O reply message +** * change to msg structure * +** ========================================================================= +*/ +static void +ProcessOutboundI2OMsg (PPAB pPab, U32 phyAddrMsg) +{ + PU8 p8Msg; + PU32 p32; +/* U16 count; */ + + p8Msg = pPab->pLinOutMsgBlock + (phyAddrMsg - pPab->outMsgBlockPhyAddr); + p32 = (PU32) p8Msg; + + dprintk + ("VXD: ProcessOutboundI2OMsg - pPab 0x%08ulx, phyAdr 0x%08ulx, linAdr 0x%08ulx\n", + (u32) pPab, phyAddrMsg, (u32) p8Msg); + dprintk ("msg :0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", p32[0], p32[1], + p32[2], p32[3]); + dprintk ("msg :0x%08ulx:0x%08ulx:0x%08ulx:0x%08ulx\n", p32[4], p32[5], + p32[6], p32[7]); + + if (p32[4] >> 24 != I2O_REPLY_STATUS_SUCCESS) { + dprintk ("Message reply status not success\n"); + return; + } + + switch (p8Msg[7]) { /* function code byte */ + case I2O_EXEC_SYS_TAB_SET: + msgFlag = 1; + dprintk ("Received I2O_EXEC_SYS_TAB_SET reply\n"); + break; + + case I2O_EXEC_HRT_GET: + msgFlag = 1; + dprintk ("Received I2O_EXEC_HRT_GET reply\n"); + break; + + case I2O_EXEC_LCT_NOTIFY: + msgFlag = 1; + dprintk ("Received I2O_EXEC_LCT_NOTIFY reply\n"); + break; + + case I2O_EXEC_SYS_ENABLE: + msgFlag = 1; + dprintk ("Received I2O_EXEC_SYS_ENABLE reply\n"); + break; + + default: + dprintk ("Received UNKNOWN reply\n"); + break; + } +} diff --git a/drivers/net/rclanmtl.h b/drivers/net/rclanmtl.h new file mode 100644 index 000000000..9488c0fd5 --- /dev/null +++ b/drivers/net/rclanmtl.h @@ -0,0 +1,701 @@ +/* +** ************************************************************************* +** +** +** R C L A N M T L . H $Revision: 6 $ +** +** +** RedCreek I2O LAN Message Transport Layer header file. +** +** --------------------------------------------------------------------- +** --- Copyright (c) 1997-1999, RedCreek Communications Inc. --- +** --- All rights reserved. --- +** --------------------------------------------------------------------- +** +** File Description: +** +** Header file for host I2O (Intelligent I/O) LAN message transport layer +** API and data types. +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. + +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. + +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +** +** ************************************************************************* +*/ + +#ifndef RCLANMTL_H +#define RCLANMTL_H + +/* Linux specific includes */ +#include +#ifdef RC_LINUX_MODULE /* linux modules need non-library version of string functions */ +#include +#else +#include +#endif +#include /* for udelay() */ + +#include +#include +#include +#include + +#include + +/* Debug stuff. Define for debug output */ +#undef RCDEBUG + +#ifdef RCDEBUG +#define dprintk(args...) printk(KERN_DEBUG "rc: " args) +#else +#define dprintk(args...) { } +#endif + +/* Typedefs */ + + /* scalar data types */ +typedef __u8 U8; +typedef __u16 U16; +typedef __u32 U32; +typedef __u8 *PU8; +typedef __u16 *PU16; +typedef __u32 *PU32; +typedef unsigned long BF; +typedef int RC_RETURN; + + /* + ** type PFNWAITCALLBACK + ** + ** pointer to void function - type used for WaitCallback in some functions + */ +typedef void (*PFNWAITCALLBACK) (void); /* void argument avoids compiler complaint */ + + /* + ** type PFNTXCALLBACK + ** + ** Pointer to user's transmit callback function. This user function is + ** called from RCProcI2OMsgQ() when packet have been transmitted from buffers + ** given in the RCI2OSendPacket() function. BufferContext is a pointer to + ** an array of 32 bit context values. These are the values the user assigned + ** and passed in the TCB to the RCI2OSendPacket() function. PcktCount + ** indicates the number of buffer context values in the BufferContext[] array. + ** The User's TransmitCallbackFunction should recover (put back in free queue) + ** the packet buffers associated with the buffer context values. + */ +typedef void (*PFNTXCALLBACK) (U32 Status, + U16 PcktCount, + PU32 BufferContext, struct net_device *); + + /* + ** type PFNRXCALLBACK + ** + ** Pointer to user's receive callback function. This user function + ** is called from RCProcI2OMsgQ() when packets have been received into + ** previously posted packet buffers throught the RCPostRecvBuffers() function. + ** The received callback function should process the Packet Descriptor Block + ** pointed to by PacketDescBlock. See Packet Decription Block below. + */ +typedef void (*PFNRXCALLBACK) (U32 Status, + U8 PktCount, + U32 BucketsRemain, + PU32 PacketDescBlock, struct net_device *); + + /* + ** type PFNCALLBACK + ** + ** Pointer to user's generic callback function. This user function + ** can be passed to LANReset or LANShutdown and is called when the + ** the reset or shutdown is complete. + ** Param1 and Param2 are invalid for LANReset and LANShutdown. + */ +typedef void (*PFNCALLBACK) (U32 Status, + U32 Param1, U32 Param2, struct net_device * dev); + +/* +** Message Unit CSR definitions for RedCreek PCI45 board +*/ +typedef struct tag_rcatu { + volatile unsigned long APICRegSel; /* APIC Register Select */ + volatile unsigned long reserved0; + volatile unsigned long APICWinReg; /* APIC Window Register */ + volatile unsigned long reserved1; + volatile unsigned long InMsgReg0; /* inbound message register 0 */ + volatile unsigned long InMsgReg1; /* inbound message register 1 */ + volatile unsigned long OutMsgReg0; /* outbound message register 0 */ + volatile unsigned long OutMsgReg1; /* outbound message register 1 */ + volatile unsigned long InDoorReg; /* inbound doorbell register */ + volatile unsigned long InIntStat; /* inbound interrupt status register */ + volatile unsigned long InIntMask; /* inbound interrupt mask register */ + volatile unsigned long OutDoorReg; /* outbound doorbell register */ + volatile unsigned long OutIntStat; /* outbound interrupt status register */ + volatile unsigned long OutIntMask; /* outbound interrupt mask register */ + volatile unsigned long reserved2; + volatile unsigned long reserved3; + volatile unsigned long InQueue; /* inbound queue port */ + volatile unsigned long OutQueue; /* outbound queue port */ + volatile unsigned long reserved4; + volatile unsigned long reserver5; + /* RedCreek extension */ + volatile unsigned long EtherMacLow; + volatile unsigned long EtherMacHi; + volatile unsigned long IPaddr; + volatile unsigned long IPmask; +} *PATU; + + /* + ** typedef PAB + ** + ** PCI Adapter Block - holds instance specific information. + */ +typedef struct { + PATU p_atu; /* ptr to ATU register block */ + PU8 pPci45LinBaseAddr; + PU8 pLinOutMsgBlock; + U32 outMsgBlockPhyAddr; + PFNTXCALLBACK pTransCallbackFunc; + PFNRXCALLBACK pRecvCallbackFunc; + PFNCALLBACK pRebootCallbackFunc; + PFNCALLBACK pCallbackFunc; + U16 IOPState; + U16 InboundMFrameSize; +} *PPAB; + +/* + * Driver Private Area, DPA. + */ +typedef struct { + U8 id; /* the AdapterID */ + + /* These two field are basically for the RCioctl function. + * I could not determine if they could be avoided. (RAA)*/ + U32 pci_addr; /* the pci address of the adapter */ + U32 pci_addr_len; + + struct pci_dev *pci_dev; + struct timer_list timer; /* timer */ + struct net_device_stats stats; /* the statistics structure */ + unsigned long numOutRcvBuffers; /* number of outstanding receive buffers */ + unsigned char shutdown; + unsigned char reboot; + unsigned char nexus; + PU8 msgbuf; /* Pointer to Lan Api Private Area */ + dma_addr_t msgbuf_dma; + PPAB pPab; /* Pointer to the PCI Adapter Block */ +} *PDPA; + +/* PCI/45 Configuration space values */ +#define RC_PCI45_VENDOR_ID 0x4916 +#define RC_PCI45_DEVICE_ID 0x1960 + + /* RedCreek API function return values */ +#define RC_RTN_NO_ERROR 0 +#define RC_RTN_I2O_NOT_INIT 1 +#define RC_RTN_FREE_Q_EMPTY 2 +#define RC_RTN_TCB_ERROR 3 +#define RC_RTN_TRANSACTION_ERROR 4 +#define RC_RTN_ADAPTER_ALREADY_INIT 5 +#define RC_RTN_MALLOC_ERROR 6 +#define RC_RTN_ADPTR_NOT_REGISTERED 7 +#define RC_RTN_MSG_REPLY_TIMEOUT 8 +#define RC_RTN_NO_I2O_STATUS 9 +#define RC_RTN_NO_FIRM_VER 10 +#define RC_RTN_NO_LINK_SPEED 11 + +/* Driver capability flags */ +#define WARM_REBOOT_CAPABLE 0x01 + +/* +** Status - Transmit and Receive callback status word +** +** A 32 bit Status is returned to the TX and RX callback functions. This value +** contains both the reply status and the detailed status as follows: +** +** 32 24 16 0 +** +------+------+------------+ +** | Reply| | Detailed | +** |Status| 0 | Status | +** +------+------+------------+ +** +** Reply Status and Detailed Status of zero indicates No Errors. +*/ + /* reply message status defines */ +#define I2O_REPLY_STATUS_SUCCESS 0x00 +#define I2O_REPLY_STATUS_ABORT_NO_DATA_TRANSFER 0x02 +#define I2O_REPLY_STATUS_TRANSACTION_ERROR 0x0A + +/* DetailedStatusCode defines */ +#define I2O_LAN_DSC_SUCCESS 0x0000 +#define I2O_LAN_DSC_DEVICE_FAILURE 0x0001 +#define I2O_LAN_DSC_DESTINATION_NOT_FOUND 0x0002 +#define I2O_LAN_DSC_TRANSMIT_ERROR 0x0003 +#define I2O_LAN_DSC_TRANSMIT_ABORTED 0x0004 +#define I2O_LAN_DSC_RECEIVE_ERROR 0x0005 +#define I2O_LAN_DSC_RECEIVE_ABORTED 0x0006 +#define I2O_LAN_DSC_DMA_ERROR 0x0007 +#define I2O_LAN_DSC_BAD_PACKET_DETECTED 0x0008 +#define I2O_LAN_DSC_OUT_OF_MEMORY 0x0009 +#define I2O_LAN_DSC_BUCKET_OVERRUN 0x000A +#define I2O_LAN_DSC_IOP_INTERNAL_ERROR 0x000B +#define I2O_LAN_DSC_CANCELED 0x000C +#define I2O_LAN_DSC_INVALID_TRANSACTION_CONTEXT 0x000D +#define I2O_LAN_DSC_DESTINATION_ADDRESS_DETECTED 0x000E +#define I2O_LAN_DSC_DESTINATION_ADDRESS_OMITTED 0x000F +#define I2O_LAN_DSC_PARTIAL_PACKET_RETURNED 0x0010 + +/* +** Packet Description Block (Received packets) +** +** A pointer to this block structure is returned to the ReceiveCallback +** function. It contains the list of packet buffers which have either been +** filled with a packet or returned to host due to a LANReset function. +** Currently there will only be one packet per receive bucket (buffer) posted. +** +** 32 24 0 +** +-----------------------+ -\ +** | Buffer 1 Context | \ +** +-----------------------+ \ +** | 0xC0000000 | / First Bucket Descriptor +** +-----+-----------------+ / +** | 0 | packet 1 length | / +** +-----------------------+ -\ +** | Buffer 2 Context | \ +** +-----------------------+ \ +** | 0xC0000000 | / Second Bucket Descriptor +** +-----+-----------------+ / +** | 0 | packet 2 length | / +** +-----+-----------------+ - +** | ... | ----- more bucket descriptors +** +-----------------------+ -\ +** | Buffer n Context | \ +** +-----------------------+ \ +** | 0xC0000000 | / Last Bucket Descriptor +** +-----+-----------------+ / +** | 0 | packet n length | / +** +-----+-----------------+ - +** +** Buffer Context values are those given to adapter in the TCB on calls to +** RCPostRecvBuffers(). +** +*/ + +/* +** Transaction Control Block (TCB) structure +** +** A structure like this is filled in by the user and passed by reference to +** RCI2OSendPacket() and RCPostRecvBuffers() functions. Minimum size is five +** 32-bit words for one buffer with one segment descriptor. +** MAX_NMBR_POST_BUFFERS_PER_MSG defines the maximum single segment buffers +** that can be described in a given TCB. +** +** 32 0 +** +-----------------------+ +** | Buffer Count | Number of buffers in the TCB +** +-----------------------+ +** | Buffer 1 Context | first buffer reference +** +-----------------------+ +** | Buffer 1 Seg Count | number of segments in buffer +** +-----------------------+ +** | Buffer 1 Seg Desc 1 | first segment descriptor (size, physical address) +** +-----------------------+ +** | ... | more segment descriptors (size, physical address) +** +-----------------------+ +** | Buffer 1 Seg Desc n | last segment descriptor (size, physical address) +** +-----------------------+ +** | Buffer 2 Context | second buffer reference +** +-----------------------+ +** | Buffer 2 Seg Count | number of segments in buffer +** +-----------------------+ +** | Buffer 2 Seg Desc 1 | segment descriptor (size, physical address) +** +-----------------------+ +** | ... | more segment descriptors (size, physical address) +** +-----------------------+ +** | Buffer 2 Seg Desc n | +** +-----------------------+ +** | ... | more buffer descriptor blocks ... +** +-----------------------+ +** | Buffer n Context | +** +-----------------------+ +** | Buffer n Seg Count | +** +-----------------------+ +** | Buffer n Seg Desc 1 | +** +-----------------------+ +** | ... | +** +-----------------------+ +** | Buffer n Seg Desc n | +** +-----------------------+ +** +** +** A TCB for one contigous packet buffer would look like the following: +** +** 32 0 +** +-----------------------+ +** | 1 | one buffer in the TCB +** +-----------------------+ +** | | user's buffer reference +** +-----------------------+ +** | 1 | one segment buffer +** +-----------------------+ _ +** | | size \ +** +-----------------------+ \ segment descriptor +** | | physical address of buffer / +** +-----------------------+ _/ +** +*/ + + /* Buffer Segment Descriptor */ +typedef struct { + U32 size; + U32 phyAddress; +} BSD, *PBSD; + +typedef PU32 PRCTCB; +/* +** ------------------------------------------------------------------------- +** Exported functions comprising the API to the LAN I2O message transport layer +** ------------------------------------------------------------------------- +*/ + + /* + ** InitRCI2OMsgLayer() + ** + ** Called once prior to using the I2O LAN message transport layer. User + ** provides both the physical and virual address of a locked page buffer + ** that is used as a private buffer for the RedCreek I2O message + ** transport layer. This buffer must be a contigous memory block of a + ** minimum of 16K bytes and long word aligned. The user also must provide + ** the base address of the RedCreek PCI adapter assigned by BIOS or operating + ** system. + ** + ** Inputs: dev - the net_device struct for the device. + ** TransmitCallbackFunction - address of user's TX callback function + ** ReceiveCallbackFunction - address of user's RX callback function + ** RebootCallbackFunction - address of user's reboot callback function + ** + */ +RC_RETURN RCInitI2OMsgLayer (struct net_device *dev, + PFNTXCALLBACK TransmitCallbackFunction, + PFNRXCALLBACK ReceiveCallbackFunction, + PFNCALLBACK RebootCallbackFunction); + + /* + ** RCSetRavlinIPandMask() + ** + ** Set the Ravlin 45/PCI cards IP address and network mask. + ** + ** IP address and mask must be in network byte order. + ** For example, IP address 1.2.3.4 and mask 255.255.255.0 would be + ** 0x04030201 and 0x00FFFFFF on a little endian machine. + ** + */ +RC_RETURN RCSetRavlinIPandMask (struct net_device *dev, U32 ipAddr, + U32 netMask); + +/* +** ========================================================================= +** RCGetRavlinIPandMask() +** +** get the IP address and MASK from the card +** +** ========================================================================= +*/ +RC_RETURN +RCGetRavlinIPandMask (struct net_device *dev, PU32 pIpAddr, PU32 pNetMask, + PFNWAITCALLBACK WaitCallback); + + /* + ** RCProcI2OMsgQ() + ** + ** Called from user's polling loop or Interrupt Service Routine for a PCI + ** interrupt from the RedCreek PCI adapter. User responsible for determining + ** and hooking the PCI interrupt. This function will call the registered + ** callback functions, TransmitCallbackFunction or ReceiveCallbackFunction, + ** if a TX or RX transaction has completed. + */ +irqreturn_t RCProcI2OMsgQ (struct net_device *dev); + + /* + ** Disable and Enable I2O interrupts. I2O interrupts are enabled at Init time + ** but can be disabled and re-enabled through these two function calls. + ** Packets will still be put into any posted received buffers and packets will + ** be sent through RCI2OSendPacket() functions. Disabling I2O interrupts + ** will prevent hardware interrupt to host even though the outbound I2O msg + ** queue is not emtpy. + */ +RC_RETURN RCEnableI2OInterrupts (struct net_device *dev); +RC_RETURN RCDisableI2OInterrupts (struct net_device *dev); + + /* + ** RCPostRecvBuffers() + ** + ** Post user's page locked buffers for use by the PCI adapter to + ** return ethernet packets received from the LAN. Transaction Control Block, + ** provided by user, contains buffer descriptor(s) which includes a buffer + ** context number along with buffer size and physical address. See TCB above. + ** The buffer context and actual packet length are returned to the + ** ReceiveCallbackFunction when packets have been received. Buffers posted + ** to the RedCreek adapter are considered owned by the adapter until the + ** context is return to user through the ReceiveCallbackFunction. + */ +RC_RETURN RCPostRecvBuffers (struct net_device *dev, + PRCTCB pTransactionCtrlBlock); +#define MAX_NMBR_POST_BUFFERS_PER_MSG 32 + + /* + ** RCI2OSendPacket() + ** + ** Send user's ethernet packet from a locked page buffer. + ** Packet must have full MAC header, however without a CRC. + ** Initiator context is a user provided value that is returned + ** to the TransmitCallbackFunction when packet buffer is free. + ** Transmit buffer are considered owned by the adapter until context's + ** returned to user through the TransmitCallbackFunction. + */ +RC_RETURN RCI2OSendPacket (struct net_device *dev, + U32 context, PRCTCB pTransactionCtrlBlock); + + /* Ethernet Link Statistics structure */ +typedef struct tag_RC_link_stats { + U32 TX_good; /* good transmit frames */ + U32 TX_maxcol; /* frames not TX due to MAX collisions */ + U32 TX_latecol; /* frames not TX due to late collisions */ + U32 TX_urun; /* frames not TX due to DMA underrun */ + U32 TX_crs; /* frames TX with lost carrier sense */ + U32 TX_def; /* frames deferred due to activity on link */ + U32 TX_singlecol; /* frames TX with one and only on collision */ + U32 TX_multcol; /* frames TX with more than one collision */ + U32 TX_totcol; /* total collisions detected during TX */ + U32 Rcv_good; /* good frames received */ + U32 Rcv_CRCerr; /* frames RX and discarded with CRC errors */ + U32 Rcv_alignerr; /* frames RX with alignment and CRC errors */ + U32 Rcv_reserr; /* good frames discarded due to no RX buffer */ + U32 Rcv_orun; /* RX frames lost due to FIFO overrun */ + U32 Rcv_cdt; /* RX frames with collision during RX */ + U32 Rcv_runt; /* RX frames shorter than 64 bytes */ +} RCLINKSTATS, *P_RCLINKSTATS; + + /* + ** RCGetLinkStatistics() + ** + ** Returns link statistics in user's structure at address StatsReturnAddr + ** If given, not NULL, the function WaitCallback is called during the wait + ** loop while waiting for the adapter to respond. + */ +RC_RETURN RCGetLinkStatistics (struct net_device *dev, + P_RCLINKSTATS StatsReturnAddr, + PFNWAITCALLBACK WaitCallback); + + /* + ** RCGetLinkStatus() + ** + ** Return link status, up or down, to user's location addressed by ReturnAddr. + ** If given, not NULL, the function WaitCallback is called during the wait + ** loop while waiting for the adapter to respond. + */ +RC_RETURN RCGetLinkStatus (struct net_device *dev, + PU32 pReturnStatus, PFNWAITCALLBACK WaitCallback); + + /* Link Status defines - value returned in pReturnStatus */ +#define RC_LAN_LINK_STATUS_DOWN 0 +#define RC_LAN_LINK_STATUS_UP 1 + + /* + ** RCGetMAC() + ** + ** Get the current MAC address assigned to user. RedCreek Ravlin 45/PCI + ** has two MAC addresses. One which is private to the PCI Card, and + ** another MAC which is given to the user as its link layer MAC address. The + ** adapter runs in promiscous mode because of the dual address requirement. + ** The MAC address is returned to the unsigned char array pointer to by mac. + */ +RC_RETURN RCGetMAC (struct net_device *dev, PFNWAITCALLBACK WaitCallback); + + /* + ** RCSetMAC() + ** + ** Set a new user port MAC address. This address will be returned on + ** subsequent RCGetMAC() calls. + */ +RC_RETURN RCSetMAC (struct net_device *dev, PU8 mac); + + /* + ** RCSetLinkSpeed() + ** + ** set adapter's link speed based on given input code. + */ +RC_RETURN RCSetLinkSpeed (struct net_device *dev, U16 LinkSpeedCode); + /* Set link speed codes */ +#define LNK_SPD_AUTO_NEG_NWAY 0 +#define LNK_SPD_100MB_FULL 1 +#define LNK_SPD_100MB_HALF 2 +#define LNK_SPD_10MB_FULL 3 +#define LNK_SPD_10MB_HALF 4 + + /* + ** RCGetLinkSpeed() + ** + ** Return link speed code. + */ + /* Return link speed codes */ +#define LNK_SPD_UNKNOWN 0 +#define LNK_SPD_100MB_FULL 1 +#define LNK_SPD_100MB_HALF 2 +#define LNK_SPD_10MB_FULL 3 +#define LNK_SPD_10MB_HALF 4 + +RC_RETURN +RCGetLinkSpeed (struct net_device *dev, PU32 pLinkSpeedCode, + PFNWAITCALLBACK WaitCallback); +/* +** ========================================================================= +** RCSetPromiscuousMode(struct net_device *dev, U16 Mode) +** +** Defined values for Mode: +** 0 - turn off promiscuous mode +** 1 - turn on promiscuous mode +** +** ========================================================================= +*/ +#define PROMISCUOUS_MODE_OFF 0 +#define PROMISCUOUS_MODE_ON 1 +RC_RETURN RCSetPromiscuousMode (struct net_device *dev, U16 Mode); +/* +** ========================================================================= +** RCGetPromiscuousMode(struct net_device *dev, PU32 pMode, PFNWAITCALLBACK WaitCallback) +** +** get promiscuous mode setting +** +** Possible return values placed in pMode: +** 0 = promisuous mode not set +** 1 = promisuous mode is set +** +** ========================================================================= +*/ +RC_RETURN +RCGetPromiscuousMode (struct net_device *dev, PU32 pMode, + PFNWAITCALLBACK WaitCallback); + +/* +** ========================================================================= +** RCSetBroadcastMode(struct net_device *dev, U16 Mode) +** +** Defined values for Mode: +** 0 - turn off promiscuous mode +** 1 - turn on promiscuous mode +** +** ========================================================================= +*/ +#define BROADCAST_MODE_OFF 0 +#define BROADCAST_MODE_ON 1 +RC_RETURN RCSetBroadcastMode (struct net_device *dev, U16 Mode); +/* +** ========================================================================= +** RCGetBroadcastMode(struct net_device *dev, PU32 pMode, PFNWAITCALLBACK WaitCallback) +** +** get broadcast mode setting +** +** Possible return values placed in pMode: +** 0 = broadcast mode not set +** 1 = broadcast mode is set +** +** ========================================================================= +*/ +RC_RETURN +RCGetBroadcastMode (struct net_device *dev, PU32 pMode, + PFNWAITCALLBACK WaitCallback); +/* +** ========================================================================= +** RCReportDriverCapability(struct net_device *dev, U32 capability) +** +** Currently defined bits: +** WARM_REBOOT_CAPABLE 0x01 +** +** ========================================================================= +*/ +RC_RETURN RCReportDriverCapability (struct net_device *dev, U32 capability); + +/* +** RCGetFirmwareVer() +** +** Return firmware version in the form "SoftwareVersion : Bt BootVersion" +** +** WARNING: user's space pointed to by pFirmString should be at least 60 bytes. +*/ +RC_RETURN +RCGetFirmwareVer (struct net_device *dev, PU8 pFirmString, + PFNWAITCALLBACK WaitCallback); + +/* +** ---------------------------------------------- +** LAN adapter Reset and Shutdown functions +** ---------------------------------------------- +*/ + /* resource flag bit assignments for RCResetLANCard() & RCShutdownLANCard() */ +#define RC_RESOURCE_RETURN_POSTED_RX_BUCKETS 0x0001 +#define RC_RESOURCE_RETURN_PEND_TX_BUFFERS 0x0002 + + /* + ** RCResetLANCard() + ** + ** Reset LAN card operation. Causes a software reset of the ethernet + ** controller and restarts the command and receive units. Depending on + ** the ResourceFlags given, the buffers are either returned to the + ** host with reply status of I2O_REPLY_STATUS_ABORT_NO_DATA_TRANSFER and + ** detailed status of I2O_LAN_DSC_CANCELED (new receive buffers must be + ** posted after issuing this) OR the buffers are kept and reused by + ** the ethernet controller. If CallbackFunction is not NULL, the function + ** will be called when the reset is complete. If the CallbackFunction is + ** NULL,a 1 will be put into the ReturnAddr after waiting for the reset + ** to complete (please disable I2O interrupts during this method). + ** Any outstanding transmit or receive buffers that are complete will be + ** returned via the normal reply messages before the requested resource + ** buffers are returned. + ** A call to RCPostRecvBuffers() is needed to return the ethernet to full + ** operation if the receive buffers were returned during LANReset. + ** Note: The IOP status is not affected by a LAN reset. + */ +RC_RETURN RCResetLANCard (struct net_device *dev, U16 ResourceFlags, + PU32 ReturnAddr, PFNCALLBACK CallbackFunction); + + /* + ** RCShutdownLANCard() + ** + ** Shutdown LAN card operation and put into an idle (suspended) state. + ** The LAN card is restarted with RCResetLANCard() function. + ** Depending on the ResourceFlags given, the buffers are either returned + ** to the host with reply status of I2O_REPLY_STATUS_ABORT_NO_DATA_TRANSFER + ** and detailed status of I2O_LAN_DSC_CANCELED (new receive buffers must be + ** posted after issuing this) OR the buffers are kept and reused by + ** the ethernet controller. If CallbackFunction is not NULL, the function + ** will be called when the reset is complete. If the CallbackFunction is + ** NULL,a 1 will be put into the ReturnAddr after waiting for the reset + ** to complete (please disable I2O interrupts during this method). + ** Any outstanding transmit or receive buffers that are complete will be + ** returned via the normal reply messages before the requested resource + ** buffers are returned. + ** Note: The IOP status is not affected by a LAN shutdown. + */ +RC_RETURN +RCShutdownLANCard (struct net_device *dev, U16 ResourceFlags, PU32 ReturnAddr, + PFNCALLBACK CallbackFunction); + + /* + ** RCResetIOP(); + ** Initializes IOPState to I2O_IOP_STATE_RESET. + ** Stops access to outbound message Q. + ** Discards any outstanding transmit or posted receive buffers. + ** Clears outbound message Q. + */ +RC_RETURN RCResetIOP (struct net_device *dev); + +#endif /* RCLANMTL_H */ diff --git a/drivers/net/rcpci45.c b/drivers/net/rcpci45.c new file mode 100644 index 000000000..76b63f31b --- /dev/null +++ b/drivers/net/rcpci45.c @@ -0,0 +1,1049 @@ +/* +** +** RCpci45.c +** +** +** +** --------------------------------------------------------------------- +** --- Copyright (c) 1998, 1999, RedCreek Communications Inc. --- +** --- All rights reserved. --- +** --------------------------------------------------------------------- +** +** Written by Pete Popov and Brian Moyle. +** +** Known Problems +** +** None known at this time. +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. + +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. + +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +** +** Francois Romieu, Apr 2003: Converted to pci DMA mapping API. +** +** Pete Popov, Oct 2001: Fixed a few bugs to make the driver functional +** again. Note that this card is not supported or manufactured by +** RedCreek anymore. +** +** Rasmus Andersen, December 2000: Converted to new PCI API and general +** cleanup. +** +** Pete Popov, January 11,99: Fixed a couple of 2.1.x problems +** (virt_to_bus() not called), tested it under 2.2pre5 (as a module), and +** added a #define(s) to enable the use of the same file for both, the 2.0.x +** kernels as well as the 2.1.x. +** +** Ported to 2.1.x by Alan Cox 1998/12/9. +** +** Sometime in mid 1998, written by Pete Popov and Brian Moyle. +** +***************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include /* For NR_IRQS only. */ +#include +#include + +static char version[] __initdata = + "RedCreek Communications PCI linux driver version 2.21\n"; + +#define RC_LINUX_MODULE +#include "rclanmtl.h" +#include "rcif.h" + +#define RUN_AT(x) (jiffies + (x)) + +#define NEW_MULTICAST + +#define MAX_ETHER_SIZE 1520 +#define MAX_NMBR_RCV_BUFFERS 96 +#define RC_POSTED_BUFFERS_LOW_MARK MAX_NMBR_RCV_BUFFERS-16 +#define BD_SIZE 3 /* Bucket Descriptor size */ +#define BD_LEN_OFFSET 2 /* Bucket Descriptor offset to length field */ + +/* RedCreek LAN device Target ID */ +#define RC_LAN_TARGET_ID 0x10 +/* RedCreek's OSM default LAN receive Initiator */ +#define DEFAULT_RECV_INIT_CONTEXT 0xA17 + +/* minimum msg buffer size needed by the card + * Note that the size of this buffer is hard code in the + * ipsec card's firmware. Thus, the size MUST be a minimum + * of 16K. Otherwise the card will end up using memory + * that does not belong to it. + */ +#define MSG_BUF_SIZE 16384 + +/* 2003/04/20: I don't know about the hardware ability but the driver won't + * play safe with 64 bit addressing and DAC without NETIF_F_HIGHDMA doesn't + * really make sense anyway. Let's play safe - romieu. + */ +#define RCPCI45_DMA_MASK ((u64) 0xffffffff) + +static U32 DriverControlWord; + +static void rc_timer (unsigned long); + +static int RCopen (struct net_device *); +static int RC_xmit_packet (struct sk_buff *, struct net_device *); +static irqreturn_t RCinterrupt (int, void *, struct pt_regs *); +static int RCclose (struct net_device *dev); +static struct net_device_stats *RCget_stats (struct net_device *); +static int RCioctl (struct net_device *, struct ifreq *, int); +static int RCconfig (struct net_device *, struct ifmap *); +static void RCxmit_callback (U32, U16, PU32, struct net_device *); +static void RCrecv_callback (U32, U8, U32, PU32, struct net_device *); +static void RCreset_callback (U32, U32, U32, struct net_device *); +static void RCreboot_callback (U32, U32, U32, struct net_device *); +static int RC_allocate_and_post_buffers (struct net_device *, int); + +static struct pci_device_id rcpci45_pci_table[] = { + { PCI_VENDOR_ID_REDCREEK, PCI_DEVICE_ID_RC45, PCI_ANY_ID, PCI_ANY_ID,}, + {} +}; +MODULE_DEVICE_TABLE (pci, rcpci45_pci_table); +MODULE_LICENSE("GPL"); + +static void __devexit +rcpci45_remove_one (struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata (pdev); + PDPA pDpa = dev->priv; + + RCResetIOP (dev); + unregister_netdev (dev); + free_irq (dev->irq, dev); + iounmap ((void *) dev->base_addr); + pci_release_regions (pdev); + pci_free_consistent (pdev, MSG_BUF_SIZE, pDpa->msgbuf, + pDpa->msgbuf_dma); + if (pDpa->pPab) + kfree (pDpa->pPab); + free_netdev (dev); + pci_set_drvdata (pdev, NULL); +} + +static int +rcpci45_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) +{ + unsigned long *vaddr; + PDPA pDpa; + int error; + static int card_idx = -1; + struct net_device *dev; + unsigned long pci_start, pci_len; + + card_idx++; + + /* + * Allocate and fill new device structure. + * We need enough for struct net_device plus DPA plus the LAN + * API private area, which requires a minimum of 16KB. The top + * of the allocated area will be assigned to struct net_device; + * the next chunk will be assigned to DPA; and finally, the rest + * will be assigned to the LAN API layer. + */ + + dev = alloc_etherdev(sizeof(*pDpa)); + if (!dev) { + printk (KERN_ERR + "(rcpci45 driver:) alloc_etherdev alloc failed\n"); + error = -ENOMEM; + goto err_out; + } + + SET_MODULE_OWNER(dev); + SET_NETDEV_DEV(dev, &pdev->dev); + + error = pci_enable_device (pdev); + if (error) { + printk (KERN_ERR + "(rcpci45 driver:) %d: pci enable device error\n", + card_idx); + goto err_out; + } + pci_start = pci_resource_start (pdev, 0); + pci_len = pci_resource_len (pdev, 0); + printk("pci_start %lx pci_len %lx\n", pci_start, pci_len); + + pci_set_drvdata (pdev, dev); + + pDpa = dev->priv; + pDpa->id = card_idx; + pDpa->pci_dev = pdev; + pDpa->pci_addr = pci_start; + + if (!pci_start || !(pci_resource_flags (pdev, 0) & IORESOURCE_MEM)) { + printk (KERN_ERR + "(rcpci45 driver:) No PCI mem resources! Aborting\n"); + error = -EBUSY; + goto err_out_free_dev; + } + + /* + * pDpa->msgbuf is where the card will dma the I2O + * messages. Thus, we need contiguous physical pages of memory. + * 2003/04/20: pci_alloc_consistent() provides well over the needed + * alignment on a 256 bytes boundary for the LAN API private area. + * Thus it isn't needed anymore to align it by hand. + */ + pDpa->msgbuf = pci_alloc_consistent (pdev, MSG_BUF_SIZE, + &pDpa->msgbuf_dma); + if (!pDpa->msgbuf) { + printk (KERN_ERR "(rcpci45 driver:) \ + Could not allocate %d byte memory for the \ + private msgbuf!\n", MSG_BUF_SIZE); + error = -ENOMEM; + goto err_out_free_dev; + } + + /* The adapter is accessible through memory-access read/write, not + * I/O read/write. Thus, we need to map it to some virtual address + * area in order to access the registers as normal memory. + */ + error = pci_request_regions (pdev, dev->name); + if (error) + goto err_out_free_msgbuf; + + error = pci_set_dma_mask (pdev, RCPCI45_DMA_MASK); + if (error) { + printk (KERN_ERR + "(rcpci45 driver:) pci_set_dma_mask failed!\n"); + goto err_out_free_region; + } + + vaddr = (ulong *) ioremap (pci_start, pci_len); + if (!vaddr) { + printk (KERN_ERR + "(rcpci45 driver:) \ + Unable to remap address range from %lu to %lu\n", + pci_start, pci_start + pci_len); + error = -EIO; + goto err_out_free_region; + } + + dev->base_addr = (unsigned long) vaddr; + dev->irq = pdev->irq; + dev->open = &RCopen; + dev->hard_start_xmit = &RC_xmit_packet; + dev->stop = &RCclose; + dev->get_stats = &RCget_stats; + dev->do_ioctl = &RCioctl; + dev->set_config = &RCconfig; + + if ((error = register_netdev(dev))) + goto err_out_iounmap; + + return 0; /* success */ + +err_out_iounmap: + iounmap((void *) dev->base_addr); +err_out_free_region: + pci_release_regions (pdev); +err_out_free_msgbuf: + pci_free_consistent (pdev, MSG_BUF_SIZE, pDpa->msgbuf, + pDpa->msgbuf_dma); +err_out_free_dev: + free_netdev (dev); +err_out: + card_idx--; + return error; +} + +static struct pci_driver rcpci45_driver = { + .name = "rcpci45", + .id_table = rcpci45_pci_table, + .probe = rcpci45_init_one, + .remove = __devexit_p(rcpci45_remove_one), +}; + +static int __init +rcpci_init_module (void) +{ + int rc = pci_module_init (&rcpci45_driver); + if (!rc) + printk (KERN_ERR "%s", version); + return rc; +} + +static int +RCopen (struct net_device *dev) +{ + int post_buffers = MAX_NMBR_RCV_BUFFERS; + PDPA pDpa = dev->priv; + int count = 0; + int requested = 0; + int error; + + if (pDpa->nexus) { + /* This is not the first time RCopen is called. Thus, + * the interface was previously opened and later closed + * by RCclose(). RCclose() does a Shutdown; to wake up + * the adapter, a reset is mandatory before we can post + * receive buffers. However, if the adapter initiated + * a reboot while the interface was closed -- and interrupts + * were turned off -- we need will need to reinitialize + * the adapter, rather than simply waking it up. + */ + printk (KERN_INFO "Waking up adapter...\n"); + RCResetLANCard (dev, 0, 0, 0); + } else { + pDpa->nexus = 1; + /* + * RCInitI2OMsgLayer is done only once, unless the + * adapter was sent a warm reboot + */ + error = RCInitI2OMsgLayer (dev, (PFNTXCALLBACK) RCxmit_callback, + (PFNRXCALLBACK) RCrecv_callback, + (PFNCALLBACK) RCreboot_callback); + if (error) { + printk (KERN_ERR "%s: Unable to init msg layer (%x)\n", + dev->name, error); + goto err_out; + } + if ((error = RCGetMAC (dev, NULL))) { + printk (KERN_ERR "%s: Unable to get adapter MAC\n", + dev->name); + goto err_out; + } + } + + /* Request a shared interrupt line. */ + error = request_irq (dev->irq, RCinterrupt, SA_SHIRQ, dev->name, dev); + if (error) { + printk (KERN_ERR "%s: unable to get IRQ %d\n", + dev->name, dev->irq); + goto err_out; + } + + DriverControlWord |= WARM_REBOOT_CAPABLE; + RCReportDriverCapability (dev, DriverControlWord); + + printk (KERN_INFO "%s: RedCreek Communications IPSEC VPN adapter\n", + dev->name); + + RCEnableI2OInterrupts (dev); + + while (post_buffers) { + if (post_buffers > MAX_NMBR_POST_BUFFERS_PER_MSG) + requested = MAX_NMBR_POST_BUFFERS_PER_MSG; + else + requested = post_buffers; + count = RC_allocate_and_post_buffers (dev, requested); + + if (count < requested) { + /* + * Check to see if we were able to post + * any buffers at all. + */ + if (post_buffers == MAX_NMBR_RCV_BUFFERS) { + printk (KERN_ERR "%s: \ + unable to allocate any buffers\n", + dev->name); + goto err_out_free_irq; + } + printk (KERN_WARNING "%s: \ + unable to allocate all requested buffers\n", dev->name); + break; /* we'll try to post more buffers later */ + } else + post_buffers -= count; + } + pDpa->numOutRcvBuffers = MAX_NMBR_RCV_BUFFERS - post_buffers; + pDpa->shutdown = 0; /* just in case */ + netif_start_queue (dev); + return 0; + +err_out_free_irq: + free_irq (dev->irq, dev); +err_out: + return error; +} + +static int +RC_xmit_packet (struct sk_buff *skb, struct net_device *dev) +{ + + PDPA pDpa = dev->priv; + singleTCB tcb; + psingleTCB ptcb = &tcb; + RC_RETURN status = 0; + + netif_stop_queue (dev); + + if (pDpa->shutdown || pDpa->reboot) { + printk ("RC_xmit_packet: tbusy!\n"); + return 1; + } + + /* + * The user is free to reuse the TCB after RCI2OSendPacket() + * returns, since the function copies the necessary info into its + * own private space. Thus, our TCB can be a local structure. + * The skb, on the other hand, will be freed up in our interrupt + * handler. + */ + + ptcb->bcount = 1; + + /* + * we'll get the context when the adapter interrupts us to tell us that + * the transmission is done. At that time, we can free skb. + */ + ptcb->b.context = (U32) skb; + ptcb->b.scount = 1; + ptcb->b.size = skb->len; + ptcb->b.addr = pci_map_single(pDpa->pci_dev, skb->data, skb->len, + PCI_DMA_TODEVICE); + + if ((status = RCI2OSendPacket (dev, (U32) NULL, (PRCTCB) ptcb)) + != RC_RTN_NO_ERROR) { + printk ("%s: send error 0x%x\n", dev->name, (uint) status); + return 1; + } else { + dev->trans_start = jiffies; + netif_wake_queue (dev); + } + /* + * That's it! + */ + return 0; +} + +/* + * RCxmit_callback() + * + * The transmit callback routine. It's called by RCProcI2OMsgQ() + * because the adapter is done with one or more transmit buffers and + * it's returning them to us, or we asked the adapter to return the + * outstanding transmit buffers by calling RCResetLANCard() with + * RC_RESOURCE_RETURN_PEND_TX_BUFFERS flag. + * All we need to do is free the buffers. + */ +static void +RCxmit_callback (U32 Status, + U16 PcktCount, PU32 BufferContext, struct net_device *dev) +{ + struct sk_buff *skb; + PDPA pDpa = dev->priv; + + if (!pDpa) { + printk (KERN_ERR "%s: Fatal Error in xmit callback, !pDpa\n", + dev->name); + return; + } + + if (Status != I2O_REPLY_STATUS_SUCCESS) + printk (KERN_INFO "%s: xmit_callback: Status = 0x%x\n", + dev->name, (uint) Status); + if (pDpa->shutdown || pDpa->reboot) + printk (KERN_INFO "%s: xmit callback: shutdown||reboot\n", + dev->name); + + while (PcktCount--) { + skb = (struct sk_buff *) (BufferContext[0]); + BufferContext++; + pci_unmap_single(pDpa->pci_dev, BufferContext[1], skb->len, + PCI_DMA_TODEVICE); + dev_kfree_skb_irq (skb); + } + netif_wake_queue (dev); +} + +static void +RCreset_callback (U32 Status, U32 p1, U32 p2, struct net_device *dev) +{ + PDPA pDpa = dev->priv; + + printk ("RCreset_callback Status 0x%x\n", (uint) Status); + /* + * Check to see why we were called. + */ + if (pDpa->shutdown) { + printk (KERN_INFO "%s: shutting down interface\n", + dev->name); + pDpa->shutdown = 0; + pDpa->reboot = 0; + } else if (pDpa->reboot) { + printk (KERN_INFO "%s: reboot, shutdown adapter\n", + dev->name); + /* + * We don't set any of the flags in RCShutdownLANCard() + * and we don't pass a callback routine to it. + * The adapter will have already initiated the reboot by + * the time the function returns. + */ + RCDisableI2OInterrupts (dev); + RCShutdownLANCard (dev, 0, 0, 0); + printk (KERN_INFO "%s: scheduling timer...\n", dev->name); + init_timer (&pDpa->timer); + pDpa->timer.expires = RUN_AT ((40 * HZ) / 10); /* 4 sec. */ + pDpa->timer.data = (unsigned long) dev; + pDpa->timer.function = &rc_timer; /* timer handler */ + add_timer (&pDpa->timer); + } +} + +static void +RCreboot_callback (U32 Status, U32 p1, U32 p2, struct net_device *dev) +{ + PDPA pDpa = dev->priv; + + printk (KERN_INFO "%s: reboot: rcv buffers outstanding = %d\n", + dev->name, (uint) pDpa->numOutRcvBuffers); + + if (pDpa->shutdown) { + printk (KERN_INFO "%s: skip reboot, shutdown initiated\n", + dev->name); + return; + } + pDpa->reboot = 1; + /* + * OK, we reset the adapter and ask it to return all + * outstanding transmit buffers as well as the posted + * receive buffers. When the adapter is done returning + * those buffers, it will call our RCreset_callback() + * routine. In that routine, we'll call RCShutdownLANCard() + * to tell the adapter that it's OK to start the reboot and + * schedule a timer callback routine to execute 3 seconds + * later; this routine will reinitialize the adapter at that time. + */ + RCResetLANCard (dev, RC_RESOURCE_RETURN_POSTED_RX_BUCKETS | + RC_RESOURCE_RETURN_PEND_TX_BUFFERS, 0, + (PFNCALLBACK) RCreset_callback); +} + +/* + * RCrecv_callback() + * + * The receive packet callback routine. This is called by + * RCProcI2OMsgQ() after the adapter posts buffers which have been + * filled (one ethernet packet per buffer). + */ +static void +RCrecv_callback (U32 Status, + U8 PktCount, + U32 BucketsRemain, + PU32 PacketDescBlock, struct net_device *dev) +{ + + U32 len, count; + PDPA pDpa = dev->priv; + struct sk_buff *skb; + singleTCB tcb; + psingleTCB ptcb = &tcb; + + ptcb->bcount = 1; + + if ((pDpa->shutdown || pDpa->reboot) && !Status) + printk (KERN_INFO "%s: shutdown||reboot && !Status (%d)\n", + dev->name, PktCount); + + if ((Status != I2O_REPLY_STATUS_SUCCESS) || pDpa->shutdown) { + /* + * Free whatever buffers the adapter returned, but don't + * pass them to the kernel. + */ + + if (!pDpa->shutdown && !pDpa->reboot) + printk (KERN_INFO "%s: recv error status = 0x%x\n", + dev->name, (uint) Status); + else + printk (KERN_DEBUG "%s: Returning %d buffs stat 0x%x\n", + dev->name, PktCount, (uint) Status); + /* + * TO DO: check the nature of the failure and put the + * adapter in failed mode if it's a hard failure. + * Send a reset to the adapter and free all outstanding memory. + */ + if (PacketDescBlock) { + while (PktCount--) { + skb = (struct sk_buff *) PacketDescBlock[0]; + dev_kfree_skb (skb); + pDpa->numOutRcvBuffers--; + /* point to next context field */ + PacketDescBlock += BD_SIZE; + } + } + return; + } else { + while (PktCount--) { + skb = (struct sk_buff *) PacketDescBlock[0]; + len = PacketDescBlock[2]; + skb->dev = dev; + skb_put (skb, len); /* adjust length and tail */ + skb->protocol = eth_type_trans (skb, dev); + netif_rx (skb); /* send the packet to the kernel */ + dev->last_rx = jiffies; + pDpa->numOutRcvBuffers--; + /* point to next context field */ + PacketDescBlock += BD_SIZE; + } + } + + /* + * Replenish the posted receive buffers. + * DO NOT replenish buffers if the driver has already + * initiated a reboot or shutdown! + */ + + if (!pDpa->shutdown && !pDpa->reboot) { + count = RC_allocate_and_post_buffers (dev, + MAX_NMBR_RCV_BUFFERS - + pDpa->numOutRcvBuffers); + pDpa->numOutRcvBuffers += count; + } + +} + +/* + * RCinterrupt() + * + * Interrupt handler. + * This routine sets up a couple of pointers and calls + * RCProcI2OMsgQ(), which in turn process the message and + * calls one of our callback functions. + */ +static irqreturn_t +RCinterrupt (int irq, void *dev_id, struct pt_regs *regs) +{ + + PDPA pDpa; + struct net_device *dev = dev_id; + + pDpa = dev->priv; + + if (pDpa->shutdown) + printk (KERN_DEBUG "%s: shutdown, service irq\n", + dev->name); + + return RCProcI2OMsgQ (dev); +} + +#define REBOOT_REINIT_RETRY_LIMIT 4 +static void +rc_timer (unsigned long data) +{ + struct net_device *dev = (struct net_device *) data; + PDPA pDpa = dev->priv; + int init_status; + static int retry; + int post_buffers = MAX_NMBR_RCV_BUFFERS; + int count = 0; + int requested = 0; + + if (pDpa->reboot) { + init_status = + RCInitI2OMsgLayer (dev, (PFNTXCALLBACK) RCxmit_callback, + (PFNRXCALLBACK) RCrecv_callback, + (PFNCALLBACK) RCreboot_callback); + + switch (init_status) { + case RC_RTN_NO_ERROR: + + pDpa->reboot = 0; + pDpa->shutdown = 0; /* just in case */ + RCReportDriverCapability (dev, DriverControlWord); + RCEnableI2OInterrupts (dev); + + + if (!(dev->flags & IFF_UP)) { + retry = 0; + return; + } + while (post_buffers) { + if (post_buffers > + MAX_NMBR_POST_BUFFERS_PER_MSG) + requested = + MAX_NMBR_POST_BUFFERS_PER_MSG; + else + requested = post_buffers; + count = + RC_allocate_and_post_buffers (dev, + requested); + post_buffers -= count; + if (count < requested) + break; + } + pDpa->numOutRcvBuffers = + MAX_NMBR_RCV_BUFFERS - post_buffers; + printk ("Initialization done.\n"); + netif_wake_queue (dev); + retry = 0; + return; + case RC_RTN_FREE_Q_EMPTY: + retry++; + printk (KERN_WARNING "%s inbound free q empty\n", + dev->name); + break; + default: + retry++; + printk (KERN_WARNING "%s bad stat after reboot: %d\n", + dev->name, init_status); + break; + } + + if (retry > REBOOT_REINIT_RETRY_LIMIT) { + printk (KERN_WARNING "%s unable to reinitialize adapter after reboot\n", dev->name); + printk (KERN_WARNING "%s shutting down interface\n", dev->name); + RCDisableI2OInterrupts (dev); + dev->flags &= ~IFF_UP; + } else { + printk (KERN_INFO "%s: rescheduling timer...\n", + dev->name); + init_timer (&pDpa->timer); + pDpa->timer.expires = RUN_AT ((40 * HZ) / 10); + pDpa->timer.data = (unsigned long) dev; + pDpa->timer.function = &rc_timer; + add_timer (&pDpa->timer); + } + } else + printk (KERN_WARNING "%s: unexpected timer irq\n", dev->name); +} + +static int +RCclose (struct net_device *dev) +{ + PDPA pDpa = dev->priv; + + printk("RCclose\n"); + netif_stop_queue (dev); + + if (pDpa->reboot) { + printk (KERN_INFO "%s skipping reset -- adapter already in reboot mode\n", dev->name); + dev->flags &= ~IFF_UP; + pDpa->shutdown = 1; + return 0; + } + + pDpa->shutdown = 1; + + /* + * We can't allow the driver to be unloaded until the adapter returns + * all posted receive buffers. It doesn't hurt to tell the adapter + * to return all posted receive buffers and outstanding xmit buffers, + * even if there are none. + */ + + RCShutdownLANCard (dev, RC_RESOURCE_RETURN_POSTED_RX_BUCKETS | + RC_RESOURCE_RETURN_PEND_TX_BUFFERS, 0, + (PFNCALLBACK) RCreset_callback); + + dev->flags &= ~IFF_UP; + return 0; +} + +static struct net_device_stats * +RCget_stats (struct net_device *dev) +{ + RCLINKSTATS RCstats; + + PDPA pDpa = dev->priv; + + if (!pDpa) { + return 0; + } else if (!(dev->flags & IFF_UP)) { + return 0; + } + + memset (&RCstats, 0, sizeof (RCLINKSTATS)); + if ((RCGetLinkStatistics (dev, &RCstats, (void *) 0)) == + RC_RTN_NO_ERROR) { + + /* total packets received */ + pDpa->stats.rx_packets = RCstats.Rcv_good + /* total packets transmitted */; + pDpa->stats.tx_packets = RCstats.TX_good; + + pDpa->stats.rx_errors = RCstats.Rcv_CRCerr + + RCstats.Rcv_alignerr + RCstats.Rcv_reserr + + RCstats.Rcv_orun + RCstats.Rcv_cdt + RCstats.Rcv_runt; + + pDpa->stats.tx_errors = RCstats.TX_urun + RCstats.TX_crs + + RCstats.TX_def + RCstats.TX_totcol; + + /* + * This needs improvement. + */ + pDpa->stats.rx_dropped = 0; /* no space in linux buffers */ + pDpa->stats.tx_dropped = 0; /* no space available in linux */ + pDpa->stats.multicast = 0; /* multicast packets received */ + pDpa->stats.collisions = RCstats.TX_totcol; + + /* detailed rx_errors: */ + pDpa->stats.rx_length_errors = 0; + pDpa->stats.rx_over_errors = RCstats.Rcv_orun; + pDpa->stats.rx_crc_errors = RCstats.Rcv_CRCerr; + pDpa->stats.rx_frame_errors = 0; + pDpa->stats.rx_fifo_errors = 0; + pDpa->stats.rx_missed_errors = 0; + + /* detailed tx_errors */ + pDpa->stats.tx_aborted_errors = 0; + pDpa->stats.tx_carrier_errors = 0; + pDpa->stats.tx_fifo_errors = 0; + pDpa->stats.tx_heartbeat_errors = 0; + pDpa->stats.tx_window_errors = 0; + + return ((struct net_device_stats *) &(pDpa->stats)); + } + return 0; +} + +static int +RCioctl (struct net_device *dev, struct ifreq *rq, int cmd) +{ + RCuser_struct RCuser; + PDPA pDpa = dev->priv; + + if (!capable (CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + + case RCU_PROTOCOL_REV: + /* + * Assign user protocol revision, to tell user-level + * controller program whether or not it's in sync. + */ + rq->ifr_ifru.ifru_data = (caddr_t) USER_PROTOCOL_REV; + break; + + case RCU_COMMAND: + { + if (copy_from_user + (&RCuser, rq->ifr_data, sizeof (RCuser))) + return -EFAULT; + + dprintk ("RCioctl: RCuser_cmd = 0x%x\n", RCuser.cmd); + + switch (RCuser.cmd) { + case RCUC_GETFWVER: + RCUD_GETFWVER = &RCuser.RCUS_GETFWVER; + RCGetFirmwareVer (dev, + (PU8) & RCUD_GETFWVER-> + FirmString, NULL); + break; + case RCUC_GETINFO: + RCUD_GETINFO = &RCuser.RCUS_GETINFO; + RCUD_GETINFO->mem_start = dev->base_addr; + RCUD_GETINFO->mem_end = + dev->base_addr + pDpa->pci_addr_len; + RCUD_GETINFO->base_addr = pDpa->pci_addr; + RCUD_GETINFO->irq = dev->irq; + break; + case RCUC_GETIPANDMASK: + RCUD_GETIPANDMASK = &RCuser.RCUS_GETIPANDMASK; + RCGetRavlinIPandMask (dev, + (PU32) & + RCUD_GETIPANDMASK->IpAddr, + (PU32) & + RCUD_GETIPANDMASK-> + NetMask, NULL); + break; + case RCUC_GETLINKSTATISTICS: + RCUD_GETLINKSTATISTICS = + &RCuser.RCUS_GETLINKSTATISTICS; + RCGetLinkStatistics (dev, + (P_RCLINKSTATS) & + RCUD_GETLINKSTATISTICS-> + StatsReturn, NULL); + break; + case RCUC_GETLINKSTATUS: + RCUD_GETLINKSTATUS = &RCuser.RCUS_GETLINKSTATUS; + RCGetLinkStatus (dev, + (PU32) & RCUD_GETLINKSTATUS-> + ReturnStatus, NULL); + break; + case RCUC_GETMAC: + RCUD_GETMAC = &RCuser.RCUS_GETMAC; + RCGetMAC (dev, NULL); + memcpy(RCUD_GETMAC, dev->dev_addr, 8); + break; + case RCUC_GETPROM: + RCUD_GETPROM = &RCuser.RCUS_GETPROM; + RCGetPromiscuousMode (dev, + (PU32) & RCUD_GETPROM-> + PromMode, NULL); + break; + case RCUC_GETBROADCAST: + RCUD_GETBROADCAST = &RCuser.RCUS_GETBROADCAST; + RCGetBroadcastMode (dev, + (PU32) & RCUD_GETBROADCAST-> + BroadcastMode, NULL); + break; + case RCUC_GETSPEED: + if (!(dev->flags & IFF_UP)) { + return -ENODATA; + } + RCUD_GETSPEED = &RCuser.RCUS_GETSPEED; + RCGetLinkSpeed (dev, + (PU32) & RCUD_GETSPEED-> + LinkSpeedCode, NULL); + break; + case RCUC_SETIPANDMASK: + RCUD_SETIPANDMASK = &RCuser.RCUS_SETIPANDMASK; + RCSetRavlinIPandMask (dev, + (U32) RCUD_SETIPANDMASK-> + IpAddr, + (U32) RCUD_SETIPANDMASK-> + NetMask); + break; + case RCUC_SETMAC: + RCSetMAC (dev, (PU8) & RCUD_SETMAC->mac); + break; + case RCUC_SETSPEED: + RCUD_SETSPEED = &RCuser.RCUS_SETSPEED; + RCSetLinkSpeed (dev, + (U16) RCUD_SETSPEED-> + LinkSpeedCode); + break; + case RCUC_SETPROM: + RCUD_SETPROM = &RCuser.RCUS_SETPROM; + RCSetPromiscuousMode (dev, + (U16) RCUD_SETPROM-> + PromMode); + break; + case RCUC_SETBROADCAST: + RCUD_SETBROADCAST = &RCuser.RCUS_SETBROADCAST; + RCSetBroadcastMode (dev, + (U16) RCUD_SETBROADCAST-> + BroadcastMode); + break; + default: + RCUD_DEFAULT = &RCuser.RCUS_DEFAULT; + RCUD_DEFAULT->rc = 0x11223344; + break; + } + if (copy_to_user (rq->ifr_data, &RCuser, + sizeof (RCuser))) + return -EFAULT; + break; + } /* RCU_COMMAND */ + + default: + rq->ifr_ifru.ifru_data = (caddr_t) 0x12345678; + return -EINVAL; + } + return 0; +} + +static int +RCconfig (struct net_device *dev, struct ifmap *map) +{ + /* + * To be completed ... + */ + return 0; + if (dev->flags & IFF_UP) /* can't act on a running interface */ + return -EBUSY; + + /* Don't allow changing the I/O address */ + if (map->base_addr != dev->base_addr) { + printk (KERN_WARNING "%s Change I/O address not implemented\n", + dev->name); + return -EOPNOTSUPP; + } + return 0; +} + +static void __exit +rcpci_cleanup_module (void) +{ + pci_unregister_driver (&rcpci45_driver); +} + +module_init (rcpci_init_module); +module_exit (rcpci_cleanup_module); + +static int +RC_allocate_and_post_buffers (struct net_device *dev, int numBuffers) +{ + + int i; + PU32 p; + psingleB pB; + struct sk_buff *skb; + PDPA pDpa = dev->priv; + RC_RETURN status; + U32 res = 0; + + if (!numBuffers) + return 0; + else if (numBuffers > MAX_NMBR_POST_BUFFERS_PER_MSG) { + printk (KERN_ERR "%s: Too many buffers requested!\n", + dev->name); + numBuffers = 32; + } + + p = (PU32) kmalloc (sizeof (U32) + numBuffers * sizeof (singleB), + GFP_DMA | GFP_ATOMIC); + + if (!p) { + printk (KERN_WARNING "%s unable to allocate TCB\n", + dev->name); + goto out; + } + + p[0] = 0; /* Buffer Count */ + pB = (psingleB) ((U32) p + sizeof (U32));/* point to the first buffer */ + + for (i = 0; i < numBuffers; i++) { + skb = dev_alloc_skb (MAX_ETHER_SIZE + 2); + if (!skb) { + printk (KERN_WARNING + "%s: unable to allocate enough skbs!\n", + dev->name); + goto err_out_unmap; + } + skb_reserve (skb, 2); /* Align IP on 16 byte boundaries */ + pB->context = (U32) skb; + pB->scount = 1; /* segment count */ + pB->size = MAX_ETHER_SIZE; + pB->addr = pci_map_single(pDpa->pci_dev, skb->data, + MAX_ETHER_SIZE, PCI_DMA_FROMDEVICE); + p[0]++; + pB++; + } + + if ((status = RCPostRecvBuffers (dev, (PRCTCB) p)) != RC_RTN_NO_ERROR) { + printk (KERN_WARNING "%s: Post buffer failed, error 0x%x\n", + dev->name, status); + goto err_out_unmap; + } +out_free: + res = p[0]; + kfree (p); +out: + return (res); /* return the number of posted buffers */ + +err_out_unmap: + for (; p[0] > 0; p[0]--) { + --pB; + skb = (struct sk_buff *) pB->context; + pci_unmap_single(pDpa->pci_dev, pB->addr, MAX_ETHER_SIZE, + PCI_DMA_FROMDEVICE); + dev_kfree_skb (skb); + } + goto out_free; +} diff --git a/drivers/net/wan/comx-hw-comx.c b/drivers/net/wan/comx-hw-comx.c new file mode 100644 index 000000000..a62fe5514 --- /dev/null +++ b/drivers/net/wan/comx-hw-comx.c @@ -0,0 +1,1450 @@ +/* + * Hardware-level driver for the COMX and HICOMX cards + * for Linux kernel 2.2.X + * + * Original authors: Arpad Bakay , + * Peter Bajan , + * Rewritten by: Tivadar Szemethy + * Currently maintained by: Gergely Madarasz + * + * Copyright (C) 1995-2000 ITConsult-Pro Co. + * + * Contributors: + * Arnaldo Carvalho de Melo - 0.86 + * Daniele Bellucci - 0.87 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Version 0.80 (99/06/11): + * - port back to kernel, add support builtin driver + * - cleaned up the source code a bit + * + * Version 0.81 (99/06/22): + * - cleaned up the board load functions, no more long reset + * timeouts + * - lower modem lines on close + * - some interrupt handling fixes + * + * Version 0.82 (99/08/24): + * - fix multiple board support + * + * Version 0.83 (99/11/30): + * - interrupt handling and locking fixes during initalization + * - really fix multiple board support + * + * Version 0.84 (99/12/02): + * - some workarounds for problematic hardware/firmware + * + * Version 0.85 (00/01/14): + * - some additional workarounds :/ + * - printk cleanups + * Version 0.86 (00/08/15): + * - resource release on failure at COMX_init + * + * Version 0.87 (03/07/09) + * - audit copy_from_user in comxhw_write_proc + */ + +#define VERSION "0.87" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "comx.h" +#include "comxhw.h" + +MODULE_AUTHOR("Gergely Madarasz , Tivadar Szemethy , Arpad Bakay"); +MODULE_DESCRIPTION("Hardware-level driver for the COMX and HICOMX adapters\n"); +MODULE_LICENSE("GPL"); + +#define COMX_readw(dev, offset) (readw(dev->mem_start + offset + \ + (unsigned int)(((struct comx_privdata *)\ + ((struct comx_channel *)dev->priv)->HW_privdata)->channel) \ + * COMX_CHANNEL_OFFSET)) + +#define COMX_WRITE(dev, offset, value) (writew(value, dev->mem_start + offset \ + + (unsigned int)(((struct comx_privdata *) \ + ((struct comx_channel *)dev->priv)->HW_privdata)->channel) \ + * COMX_CHANNEL_OFFSET)) + +#define COMX_CMD(dev, cmd) (COMX_WRITE(dev, OFF_A_L2_CMD, cmd)) + +struct comx_firmware { + int len; + unsigned char *data; +}; + +struct comx_privdata { + struct comx_firmware *firmware; + u16 clock; + char channel; // channel no. + int memory_size; + short io_extent; + u_long histogram[5]; +}; + +static struct net_device *memory_used[(COMX_MEM_MAX - COMX_MEM_MIN) / 0x10000]; +extern struct comx_hardware hicomx_hw; +extern struct comx_hardware comx_hw; +extern struct comx_hardware cmx_hw; + +static irqreturn_t COMX_interrupt(int irq, void *dev_id, struct pt_regs *regs); + +static void COMX_board_on(struct net_device *dev) +{ + outb_p( (byte) (((dev->mem_start & 0xf0000) >> 16) | + COMX_ENABLE_BOARD_IT | COMX_ENABLE_BOARD_MEM), dev->base_addr); +} + +static void COMX_board_off(struct net_device *dev) +{ + outb_p( (byte) (((dev->mem_start & 0xf0000) >> 16) | + COMX_ENABLE_BOARD_IT), dev->base_addr); +} + +static void HICOMX_board_on(struct net_device *dev) +{ + outb_p( (byte) (((dev->mem_start & 0xf0000) >> 12) | + HICOMX_ENABLE_BOARD_MEM), dev->base_addr); +} + +static void HICOMX_board_off(struct net_device *dev) +{ + outb_p( (byte) (((dev->mem_start & 0xf0000) >> 12) | + HICOMX_DISABLE_BOARD_MEM), dev->base_addr); +} + +static void COMX_set_clock(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct comx_privdata *hw = ch->HW_privdata; + + COMX_WRITE(dev, OFF_A_L1_CLKINI, hw->clock); +} + +static struct net_device *COMX_access_board(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct net_device *ret; + int mempos = (dev->mem_start - COMX_MEM_MIN) >> 16; + unsigned long flags; + + + save_flags(flags); cli(); + + ret = memory_used[mempos]; + + if(ret == dev) { + goto out; + } + + memory_used[mempos] = dev; + + if (!ch->twin || ret != ch->twin) { + if (ret) ((struct comx_channel *)ret->priv)->HW_board_off(ret); + ch->HW_board_on(dev); + } +out: + restore_flags(flags); + return ret; +} + +static void COMX_release_board(struct net_device *dev, struct net_device *savep) +{ + unsigned long flags; + int mempos = (dev->mem_start - COMX_MEM_MIN) >> 16; + struct comx_channel *ch = dev->priv; + + save_flags(flags); cli(); + + if (memory_used[mempos] == savep) { + goto out; + } + + memory_used[mempos] = savep; + if (!ch->twin || ch->twin != savep) { + ch->HW_board_off(dev); + if (savep) ((struct comx_channel*)savep->priv)->HW_board_on(savep); + } +out: + restore_flags(flags); +} + +static int COMX_txe(struct net_device *dev) +{ + struct net_device *savep; + struct comx_channel *ch = dev->priv; + int rc = 0; + + savep = ch->HW_access_board(dev); + if (COMX_readw(dev,OFF_A_L2_LINKUP) == LINKUP_READY) { + rc = COMX_readw(dev,OFF_A_L2_TxEMPTY); + } + ch->HW_release_board(dev,savep); + if(rc==0xffff) { + printk(KERN_ERR "%s, OFF_A_L2_TxEMPTY is %d\n",dev->name, rc); + } + return rc; +} + +static int COMX_send_packet(struct net_device *dev, struct sk_buff *skb) +{ + struct net_device *savep; + struct comx_channel *ch = dev->priv; + struct comx_privdata *hw = ch->HW_privdata; + int ret = FRAME_DROPPED; + word tmp; + + savep = ch->HW_access_board(dev); + + if (ch->debug_flags & DEBUG_HW_TX) { + comx_debug_bytes(dev, skb->data, skb->len,"COMX_send packet"); + } + + if (skb->len > COMX_MAX_TX_SIZE) { + ret=FRAME_DROPPED; + goto out; + } + + tmp=COMX_readw(dev, OFF_A_L2_TxEMPTY); + if ((ch->line_status & LINE_UP) && tmp==1) { + int lensave = skb->len; + int dest = COMX_readw(dev, OFF_A_L2_TxBUFP); + word *data = (word *)skb->data; + + if(dest==0xffff) { + printk(KERN_ERR "%s: OFF_A_L2_TxBUFP is %d\n", dev->name, dest); + ret=FRAME_DROPPED; + goto out; + } + + writew((unsigned short)skb->len, dev->mem_start + dest); + dest += 2; + while (skb->len > 1) { + writew(*data++, dev->mem_start + dest); + dest += 2; skb->len -= 2; + } + if (skb->len == 1) { + writew(*((byte *)data), dev->mem_start + dest); + } + writew(0, dev->mem_start + (int)hw->channel * + COMX_CHANNEL_OFFSET + OFF_A_L2_TxEMPTY); + ch->stats.tx_packets++; + ch->stats.tx_bytes += lensave; + ret = FRAME_ACCEPTED; + } else { + ch->stats.tx_dropped++; + printk(KERN_INFO "%s: frame dropped\n",dev->name); + if(tmp) { + printk(KERN_ERR "%s: OFF_A_L2_TxEMPTY is %d\n",dev->name,tmp); + } + } + +out: + ch->HW_release_board(dev, savep); + dev_kfree_skb(skb); + return ret; +} + +static inline int comx_read_buffer(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + word rbuf_offs; + struct sk_buff *skb; + word len; + int i=0; + word *writeptr; + + i = 0; + rbuf_offs = COMX_readw(dev, OFF_A_L2_RxBUFP); + if(rbuf_offs == 0xffff) { + printk(KERN_ERR "%s: OFF_A_L2_RxBUFP is %d\n",dev->name,rbuf_offs); + return 0; + } + len = readw(dev->mem_start + rbuf_offs); + if(len > COMX_MAX_RX_SIZE) { + printk(KERN_ERR "%s: packet length is %d\n",dev->name,len); + return 0; + } + if ((skb = dev_alloc_skb(len + 16)) == NULL) { + ch->stats.rx_dropped++; + COMX_WRITE(dev, OFF_A_L2_DAV, 0); + return 0; + } + rbuf_offs += 2; + skb_reserve(skb, 16); + skb_put(skb, len); + skb->dev = dev; + writeptr = (word *)skb->data; + while (i < len) { + *writeptr++ = readw(dev->mem_start + rbuf_offs); + rbuf_offs += 2; + i += 2; + } + COMX_WRITE(dev, OFF_A_L2_DAV, 0); + ch->stats.rx_packets++; + ch->stats.rx_bytes += len; + if (ch->debug_flags & DEBUG_HW_RX) { + comx_debug_skb(dev, skb, "COMX_interrupt receiving"); + } + ch->LINE_rx(dev, skb); + return 1; +} + +static inline char comx_line_change(struct net_device *dev, char linestat) +{ + struct comx_channel *ch=dev->priv; + char idle=1; + + + if (linestat & LINE_UP) { /* Vonal fol */ + if (ch->lineup_delay) { + if (!test_and_set_bit(0, &ch->lineup_pending)) { + ch->lineup_timer.function = comx_lineup_func; + ch->lineup_timer.data = (unsigned long)dev; + ch->lineup_timer.expires = jiffies + + HZ*ch->lineup_delay; + add_timer(&ch->lineup_timer); + idle=0; + } + } else { + idle=0; + ch->LINE_status(dev, ch->line_status |= LINE_UP); + } + } else { /* Vonal le */ + idle=0; + if (test_and_clear_bit(0, &ch->lineup_pending)) { + del_timer(&ch->lineup_timer); + } else { + ch->line_status &= ~LINE_UP; + if (ch->LINE_status) { + ch->LINE_status(dev, ch->line_status); + } + } + } + return idle; +} + + + +static irqreturn_t COMX_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct net_device *dev = dev_id; + struct comx_channel *ch = dev->priv; + struct comx_privdata *hw = ch->HW_privdata; + struct net_device *interrupted; + unsigned long jiffs; + char idle = 0; + int count = 0; + word tmp; + + if (dev == NULL) { + printk(KERN_ERR "COMX_interrupt: irq %d for unknown device\n", irq); + return IRQ_NONE; + } + + jiffs = jiffies; + + interrupted = ch->HW_access_board(dev); + + while (!idle && count < 5000) { + char channel = 0; + idle = 1; + + while (channel < 2) { + char linestat = 0; + char buffers_emptied = 0; + + if (channel == 1) { + if (ch->twin) { + dev = ch->twin; + ch = dev->priv; + hw = ch->HW_privdata; + } else { + break; + } + } else { + COMX_WRITE(dev, OFF_A_L1_REPENA, + COMX_readw(dev, OFF_A_L1_REPENA) & 0xFF00); + } + channel++; + + if ((ch->init_status & (HW_OPEN | LINE_OPEN)) != + (HW_OPEN | LINE_OPEN)) { + continue; + } + + /* Collect stats */ + tmp = COMX_readw(dev, OFF_A_L1_ABOREC); + COMX_WRITE(dev, OFF_A_L1_ABOREC, 0); + if(tmp==0xffff) { + printk(KERN_ERR "%s: OFF_A_L1_ABOREC is %d\n",dev->name,tmp); + break; + } else { + ch->stats.rx_missed_errors += (tmp >> 8) & 0xff; + ch->stats.rx_over_errors += tmp & 0xff; + } + tmp = COMX_readw(dev, OFF_A_L1_CRCREC); + COMX_WRITE(dev, OFF_A_L1_CRCREC, 0); + if(tmp==0xffff) { + printk(KERN_ERR "%s: OFF_A_L1_CRCREC is %d\n",dev->name,tmp); + break; + } else { + ch->stats.rx_crc_errors += (tmp >> 8) & 0xff; + ch->stats.rx_missed_errors += tmp & 0xff; + } + + if ((ch->line_status & LINE_UP) && ch->LINE_rx) { + tmp=COMX_readw(dev, OFF_A_L2_DAV); + while (tmp==1) { + idle=0; + buffers_emptied+=comx_read_buffer(dev); + tmp=COMX_readw(dev, OFF_A_L2_DAV); + } + if(tmp) { + printk(KERN_ERR "%s: OFF_A_L2_DAV is %d\n", dev->name, tmp); + break; + } + } + + tmp=COMX_readw(dev, OFF_A_L2_TxEMPTY); + if (tmp==1 && ch->LINE_tx) { + ch->LINE_tx(dev); + } + if(tmp==0xffff) { + printk(KERN_ERR "%s: OFF_A_L2_TxEMPTY is %d\n", dev->name, tmp); + break; + } + + if (COMX_readw(dev, OFF_A_L1_PBUFOVR) >> 8) { + linestat &= ~LINE_UP; + } else { + linestat |= LINE_UP; + } + + if ((linestat & LINE_UP) != (ch->line_status & LINE_UP)) { + ch->stats.tx_carrier_errors++; + idle &= comx_line_change(dev,linestat); + } + + hw->histogram[(int)buffers_emptied]++; + } + count++; + } + + if(count==5000) { + printk(KERN_WARNING "%s: interrupt stuck\n",dev->name); + } + + ch->HW_release_board(dev, interrupted); + return IRQ_HANDLED; +} + +static int COMX_open(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct comx_privdata *hw = ch->HW_privdata; + struct proc_dir_entry *procfile = ch->procdir->subdir; + unsigned long jiffs; + int twin_open=0; + int retval; + struct net_device *savep; + + if (!dev->base_addr || !dev->irq || !dev->mem_start) { + return -ENODEV; + } + + if (ch->twin && (((struct comx_channel *)(ch->twin->priv))->init_status & HW_OPEN)) { + twin_open=1; + } + + if (!twin_open) { + if (!request_region(dev->base_addr, hw->io_extent, dev->name)) { + return -EAGAIN; + } + if (request_irq(dev->irq, COMX_interrupt, 0, dev->name, + (void *)dev)) { + printk(KERN_ERR "comx-hw-comx: unable to obtain irq %d\n", dev->irq); + release_region(dev->base_addr, hw->io_extent); + return -EAGAIN; + } + ch->init_status |= IRQ_ALLOCATED; + if (!ch->HW_load_board || ch->HW_load_board(dev)) { + ch->init_status &= ~IRQ_ALLOCATED; + retval=-ENODEV; + goto error; + } + } + + savep = ch->HW_access_board(dev); + COMX_WRITE(dev, OFF_A_L2_LINKUP, 0); + + if (ch->HW_set_clock) { + ch->HW_set_clock(dev); + } + + COMX_CMD(dev, COMX_CMD_INIT); + jiffs = jiffies; + while (COMX_readw(dev, OFF_A_L2_LINKUP) != 1 && time_before(jiffies, jiffs + HZ)) { + schedule_timeout(1); + } + + if (time_after_eq(jiffies, jiffs + HZ)) { + printk(KERN_ERR "%s: board timeout on INIT command\n", dev->name); + ch->HW_release_board(dev, savep); + retval=-EIO; + goto error; + } + udelay(1000); + + COMX_CMD(dev, COMX_CMD_OPEN); + + jiffs = jiffies; + while (COMX_readw(dev, OFF_A_L2_LINKUP) != 3 && time_before(jiffies, jiffs + HZ)) { + schedule_timeout(1); + } + + if (time_after_eq(jiffies, jiffs + HZ)) { + printk(KERN_ERR "%s: board timeout on OPEN command\n", dev->name); + ch->HW_release_board(dev, savep); + retval=-EIO; + goto error; + } + + ch->init_status |= HW_OPEN; + + /* Ez eleg ciki, de ilyen a rendszer */ + if (COMX_readw(dev, OFF_A_L1_PBUFOVR) >> 8) { + ch->line_status &= ~LINE_UP; + } else { + ch->line_status |= LINE_UP; + } + + if (ch->LINE_status) { + ch->LINE_status(dev, ch->line_status); + } + + ch->HW_release_board(dev, savep); + + for ( ; procfile ; procfile = procfile->next) { + if (strcmp(procfile->name, FILENAME_IRQ) == 0 + || strcmp(procfile->name, FILENAME_IO) == 0 + || strcmp(procfile->name, FILENAME_MEMADDR) == 0 + || strcmp(procfile->name, FILENAME_CHANNEL) == 0 + || strcmp(procfile->name, FILENAME_FIRMWARE) == 0 + || strcmp(procfile->name, FILENAME_CLOCK) == 0) { + procfile->mode = S_IFREG | 0444; + + } + } + + return 0; + +error: + if(!twin_open) { + release_region(dev->base_addr, hw->io_extent); + free_irq(dev->irq, (void *)dev); + } + return retval; + +} + +static int COMX_close(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct proc_dir_entry *procfile = ch->procdir->subdir; + struct comx_privdata *hw = ch->HW_privdata; + struct comx_channel *twin_ch; + struct net_device *savep; + + savep = ch->HW_access_board(dev); + + COMX_CMD(dev, COMX_CMD_CLOSE); + udelay(1000); + COMX_CMD(dev, COMX_CMD_EXIT); + + ch->HW_release_board(dev, savep); + + if (ch->init_status & IRQ_ALLOCATED) { + free_irq(dev->irq, (void *)dev); + ch->init_status &= ~IRQ_ALLOCATED; + } + release_region(dev->base_addr, hw->io_extent); + + if (ch->twin && (twin_ch = ch->twin->priv) && + (twin_ch->init_status & HW_OPEN)) { + /* Pass the irq to the twin */ + if (request_irq(dev->irq, COMX_interrupt, 0, ch->twin->name, + (void *)ch->twin) == 0) { + twin_ch->init_status |= IRQ_ALLOCATED; + } + } + + for ( ; procfile ; procfile = procfile->next) { + if (strcmp(procfile->name, FILENAME_IRQ) == 0 + || strcmp(procfile->name, FILENAME_IO) == 0 + || strcmp(procfile->name, FILENAME_MEMADDR) == 0 + || strcmp(procfile->name, FILENAME_CHANNEL) == 0 + || strcmp(procfile->name, FILENAME_FIRMWARE) == 0 + || strcmp(procfile->name, FILENAME_CLOCK) == 0) { + procfile->mode = S_IFREG | 0644; + } + } + + ch->init_status &= ~HW_OPEN; + return 0; +} + +static int COMX_statistics(struct net_device *dev, char *page) +{ + struct comx_channel *ch = dev->priv; + struct comx_privdata *hw = ch->HW_privdata; + struct net_device *savep; + int len = 0; + + savep = ch->HW_access_board(dev); + + len += sprintf(page + len, "Board data: %s %s %s %s\nPBUFOVR: %02x, " + "MODSTAT: %02x, LINKUP: %02x, DAV: %02x\nRxBUFP: %02x, " + "TxEMPTY: %02x, TxBUFP: %02x\n", + (ch->init_status & HW_OPEN) ? "HW_OPEN" : "", + (ch->init_status & LINE_OPEN) ? "LINE_OPEN" : "", + (ch->init_status & FW_LOADED) ? "FW_LOADED" : "", + (ch->init_status & IRQ_ALLOCATED) ? "IRQ_ALLOCATED" : "", + COMX_readw(dev, OFF_A_L1_PBUFOVR) & 0xff, + (COMX_readw(dev, OFF_A_L1_PBUFOVR) >> 8) & 0xff, + COMX_readw(dev, OFF_A_L2_LINKUP) & 0xff, + COMX_readw(dev, OFF_A_L2_DAV) & 0xff, + COMX_readw(dev, OFF_A_L2_RxBUFP) & 0xff, + COMX_readw(dev, OFF_A_L2_TxEMPTY) & 0xff, + COMX_readw(dev, OFF_A_L2_TxBUFP) & 0xff); + + len += sprintf(page + len, "hist[0]: %8lu hist[1]: %8lu hist[2]: %8lu\n" + "hist[3]: %8lu hist[4]: %8lu\n",hw->histogram[0],hw->histogram[1], + hw->histogram[2],hw->histogram[3],hw->histogram[4]); + + ch->HW_release_board(dev, savep); + + return len; +} + +static int COMX_load_board(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct comx_privdata *hw = ch->HW_privdata; + struct comx_firmware *fw = hw->firmware; + word board_segment = dev->mem_start >> 16; + int mempos = (dev->mem_start - COMX_MEM_MIN) >> 16; + unsigned long flags; + unsigned char id1, id2; + struct net_device *saved; + int retval; + int loopcount; + int len; + byte *COMX_address; + + if (!fw || !fw->len) { + struct comx_channel *twin_ch = ch->twin ? ch->twin->priv : NULL; + struct comx_privdata *twin_hw; + + if (!twin_ch || !(twin_hw = twin_ch->HW_privdata)) { + return -EAGAIN; + } + + if (!(fw = twin_hw->firmware) || !fw->len) { + return -EAGAIN; + } + } + + id1 = fw->data[OFF_FW_L1_ID]; + id2 = fw->data[OFF_FW_L1_ID + 1]; + + if (id1 != FW_L1_ID_1 || id2 != FW_L1_ID_2_COMX) { + printk(KERN_ERR "%s: incorrect firmware, load aborted\n", + dev->name); + return -EAGAIN; + } + + printk(KERN_INFO "%s: Loading COMX Layer 1 firmware %s\n", dev->name, + (char *)(fw->data + OFF_FW_L1_ID + 2)); + + id1 = fw->data[OFF_FW_L2_ID]; + id2 = fw->data[OFF_FW_L2_ID + 1]; + if (id1 == FW_L2_ID_1 && (id2 == 0xc0 || id2 == 0xc1 || id2 == 0xc2)) { + printk(KERN_INFO "with Layer 2 code %s\n", + (char *)(fw->data + OFF_FW_L2_ID + 2)); + } + + outb_p(board_segment | COMX_BOARD_RESET, dev->base_addr); + /* 10 usec should be enough here */ + udelay(100); + + save_flags(flags); cli(); + saved=memory_used[mempos]; + if(saved) { + ((struct comx_channel *)saved->priv)->HW_board_off(saved); + } + memory_used[mempos]=dev; + + outb_p(board_segment | COMX_ENABLE_BOARD_MEM, dev->base_addr); + + writeb(0, dev->mem_start + COMX_JAIL_OFFSET); + + loopcount=0; + while(loopcount++ < 10000 && + readb(dev->mem_start + COMX_JAIL_OFFSET) != COMX_JAIL_VALUE) { + udelay(100); + } + + if (readb(dev->mem_start + COMX_JAIL_OFFSET) != COMX_JAIL_VALUE) { + printk(KERN_ERR "%s: Can't reset board, JAIL value is %02x\n", + dev->name, readb(dev->mem_start + COMX_JAIL_OFFSET)); + retval=-ENODEV; + goto out; + } + + writeb(0x55, dev->mem_start + 0x18ff); + + loopcount=0; + while(loopcount++ < 10000 && readb(dev->mem_start + 0x18ff) != 0) { + udelay(100); + } + + if(readb(dev->mem_start + 0x18ff) != 0) { + printk(KERN_ERR "%s: Can't reset board, reset timeout\n", + dev->name); + retval=-ENODEV; + goto out; + } + + len = 0; + COMX_address = (byte *)dev->mem_start; + while (fw->len > len) { + writeb(fw->data[len++], COMX_address++); + } + + len = 0; + COMX_address = (byte *)dev->mem_start; + while (len != fw->len && readb(COMX_address++) == fw->data[len]) { + len++; + } + + if (len != fw->len) { + printk(KERN_ERR "%s: error loading firmware: [%d] is 0x%02x " + "instead of 0x%02x\n", dev->name, len, + readb(COMX_address - 1), fw->data[len]); + retval=-EAGAIN; + goto out; + } + + writeb(0, dev->mem_start + COMX_JAIL_OFFSET); + + loopcount = 0; + while ( loopcount++ < 10000 && COMX_readw(dev, OFF_A_L2_LINKUP) != 1 ) { + udelay(100); + } + + if (COMX_readw(dev, OFF_A_L2_LINKUP) != 1) { + printk(KERN_ERR "%s: error starting firmware, linkup word is %04x\n", + dev->name, COMX_readw(dev, OFF_A_L2_LINKUP)); + retval=-EAGAIN; + goto out; + } + + + ch->init_status |= FW_LOADED; + retval=0; + +out: + outb_p(board_segment | COMX_DISABLE_ALL, dev->base_addr); + if(saved) { + ((struct comx_channel *)saved->priv)->HW_board_on(saved); + } + memory_used[mempos]=saved; + restore_flags(flags); + return retval; +} + +static int CMX_load_board(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct comx_privdata *hw = ch->HW_privdata; + struct comx_firmware *fw = hw->firmware; + word board_segment = dev->mem_start >> 16; + int mempos = (dev->mem_start - COMX_MEM_MIN) >> 16; + #if 0 + unsigned char id1, id2; + #endif + struct net_device *saved; + unsigned long flags; + int retval; + int loopcount; + int len; + byte *COMX_address; + + if (!fw || !fw->len) { + struct comx_channel *twin_ch = ch->twin ? ch->twin->priv : NULL; + struct comx_privdata *twin_hw; + + if (!twin_ch || !(twin_hw = twin_ch->HW_privdata)) { + return -EAGAIN; + } + + if (!(fw = twin_hw->firmware) || !fw->len) { + return -EAGAIN; + } + } + + /* Ide kell olyat tenni, hogy ellenorizze az ID-t */ + + if (inb_p(dev->base_addr) != CMX_ID_BYTE) { + printk(KERN_ERR "%s: CMX id byte is invalid(%02x)\n", dev->name, + inb_p(dev->base_addr)); + return -ENODEV; + } + + printk(KERN_INFO "%s: Loading CMX Layer 1 firmware %s\n", dev->name, + (char *)(fw->data + OFF_FW_L1_ID + 2)); + + save_flags(flags); cli(); + saved=memory_used[mempos]; + if(saved) { + ((struct comx_channel *)saved->priv)->HW_board_off(saved); + } + memory_used[mempos]=dev; + + outb_p(board_segment | COMX_ENABLE_BOARD_MEM | COMX_BOARD_RESET, + dev->base_addr); + + len = 0; + COMX_address = (byte *)dev->mem_start; + while (fw->len > len) { + writeb(fw->data[len++], COMX_address++); + } + + len = 0; + COMX_address = (byte *)dev->mem_start; + while (len != fw->len && readb(COMX_address++) == fw->data[len]) { + len++; + } + + outb_p(board_segment | COMX_ENABLE_BOARD_MEM, dev->base_addr); + + if (len != fw->len) { + printk(KERN_ERR "%s: error loading firmware: [%d] is 0x%02x " + "instead of 0x%02x\n", dev->name, len, + readb(COMX_address - 1), fw->data[len]); + retval=-EAGAIN; + goto out; + } + + loopcount=0; + while( loopcount++ < 10000 && COMX_readw(dev, OFF_A_L2_LINKUP) != 1 ) { + udelay(100); + } + + if (COMX_readw(dev, OFF_A_L2_LINKUP) != 1) { + printk(KERN_ERR "%s: error starting firmware, linkup word is %04x\n", + dev->name, COMX_readw(dev, OFF_A_L2_LINKUP)); + retval=-EAGAIN; + goto out; + } + + ch->init_status |= FW_LOADED; + retval=0; + +out: + outb_p(board_segment | COMX_DISABLE_ALL, dev->base_addr); + if(saved) { + ((struct comx_channel *)saved->priv)->HW_board_on(saved); + } + memory_used[mempos]=saved; + restore_flags(flags); + return retval; +} + +static int HICOMX_load_board(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct comx_privdata *hw = ch->HW_privdata; + struct comx_firmware *fw = hw->firmware; + word board_segment = dev->mem_start >> 12; + int mempos = (dev->mem_start - COMX_MEM_MIN) >> 16; + struct net_device *saved; + unsigned char id1, id2; + unsigned long flags; + int retval; + int loopcount; + int len; + word *HICOMX_address; + char id = 1; + + if (!fw || !fw->len) { + struct comx_channel *twin_ch = ch->twin ? ch->twin->priv : NULL; + struct comx_privdata *twin_hw; + + if (!twin_ch || !(twin_hw = twin_ch->HW_privdata)) { + return -EAGAIN; + } + + if (!(fw = twin_hw->firmware) || !fw->len) { + return -EAGAIN; + } + } + + while (id != 4) { + if (inb_p(dev->base_addr + id++) != HICOMX_ID_BYTE) { + break; + } + } + + if (id != 4) { + printk(KERN_ERR "%s: can't find HICOMX at 0x%04x, id[%d] = %02x\n", + dev->name, (unsigned int)dev->base_addr, id - 1, + inb_p(dev->base_addr + id - 1)); + return -1; + } + + id1 = fw->data[OFF_FW_L1_ID]; + id2 = fw->data[OFF_FW_L1_ID + 1]; + if (id1 != FW_L1_ID_1 || id2 != FW_L1_ID_2_HICOMX) { + printk(KERN_ERR "%s: incorrect firmware, load aborted\n", dev->name); + return -EAGAIN; + } + + printk(KERN_INFO "%s: Loading HICOMX Layer 1 firmware %s\n", dev->name, + (char *)(fw->data + OFF_FW_L1_ID + 2)); + + id1 = fw->data[OFF_FW_L2_ID]; + id2 = fw->data[OFF_FW_L2_ID + 1]; + if (id1 == FW_L2_ID_1 && (id2 == 0xc0 || id2 == 0xc1 || id2 == 0xc2)) { + printk(KERN_INFO "with Layer 2 code %s\n", + (char *)(fw->data + OFF_FW_L2_ID + 2)); + } + + outb_p(board_segment | HICOMX_BOARD_RESET, dev->base_addr); + udelay(10); + + save_flags(flags); cli(); + saved=memory_used[mempos]; + if(saved) { + ((struct comx_channel *)saved->priv)->HW_board_off(saved); + } + memory_used[mempos]=dev; + + outb_p(board_segment | HICOMX_ENABLE_BOARD_MEM, dev->base_addr); + outb_p(HICOMX_PRG_MEM, dev->base_addr + 1); + + len = 0; + HICOMX_address = (word *)dev->mem_start; + while (fw->len > len) { + writeb(fw->data[len++], HICOMX_address++); + } + + len = 0; + HICOMX_address = (word *)dev->mem_start; + while (len != fw->len && (readw(HICOMX_address++) & 0xff) == fw->data[len]) { + len++; + } + + if (len != fw->len) { + printk(KERN_ERR "%s: error loading firmware: [%d] is 0x%02x " + "instead of 0x%02x\n", dev->name, len, + readw(HICOMX_address - 1) & 0xff, fw->data[len]); + retval=-EAGAIN; + goto out; + } + + outb_p(board_segment | HICOMX_BOARD_RESET, dev->base_addr); + outb_p(HICOMX_DATA_MEM, dev->base_addr + 1); + + outb_p(board_segment | HICOMX_ENABLE_BOARD_MEM, dev->base_addr); + + loopcount=0; + while(loopcount++ < 10000 && COMX_readw(dev, OFF_A_L2_LINKUP) != 1) { + udelay(100); + } + + if ( COMX_readw(dev, OFF_A_L2_LINKUP) != 1 ) { + printk(KERN_ERR "%s: error starting firmware, linkup word is %04x\n", + dev->name, COMX_readw(dev, OFF_A_L2_LINKUP)); + retval=-EAGAIN; + goto out; + } + + ch->init_status |= FW_LOADED; + retval=0; + +out: + outb_p(board_segment | HICOMX_DISABLE_ALL, dev->base_addr); + outb_p(HICOMX_DATA_MEM, dev->base_addr + 1); + + if(saved) { + ((struct comx_channel *)saved->priv)->HW_board_on(saved); + } + memory_used[mempos]=saved; + restore_flags(flags); + return retval; +} + +static struct net_device *comx_twin_check(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct proc_dir_entry *procfile = ch->procdir->parent->subdir; + struct comx_privdata *hw = ch->HW_privdata; + + struct net_device *twin; + struct comx_channel *ch_twin; + struct comx_privdata *hw_twin; + + + for ( ; procfile ; procfile = procfile->next) { + + if(!S_ISDIR(procfile->mode)) { + continue; + } + + twin=procfile->data; + ch_twin=twin->priv; + hw_twin=ch_twin->HW_privdata; + + + if (twin != dev && dev->irq && dev->base_addr && dev->mem_start && + dev->irq == twin->irq && dev->base_addr == twin->base_addr && + dev->mem_start == twin->mem_start && + hw->channel == (1 - hw_twin->channel) && + ch->hardware == ch_twin->hardware) { + return twin; + } + } + return NULL; +} + +static int comxhw_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + struct proc_dir_entry *entry = (struct proc_dir_entry *)data; + struct net_device *dev = entry->parent->data; + struct comx_channel *ch = dev->priv; + struct comx_privdata *hw = ch->HW_privdata; + char *page; + + + if(ch->init_status & HW_OPEN) { + return -EAGAIN; + } + + if (strcmp(FILENAME_FIRMWARE, entry->name) != 0) { + if (!(page = (char *)__get_free_page(GFP_KERNEL))) { + return -ENOMEM; + } + if(copy_from_user(page, buffer, count = (min_t(int, count, PAGE_SIZE)))) + { + count = -EFAULT; + goto out; + } + if (page[count-1] == '\n') + page[count-1] = '\0'; + else if (count < PAGE_SIZE) + page[count] = '\0'; + else if (page[count]) { + count = -EINVAL; + goto out; + } + page[count]=0; /* Null terminate */ + } else { + byte *tmp; + + if (!hw->firmware) { + if ((hw->firmware = kmalloc(sizeof(struct comx_firmware), + GFP_KERNEL)) == NULL) { + return -ENOMEM; + } + hw->firmware->len = 0; + hw->firmware->data = NULL; + } + + if ((tmp = kmalloc(count + file->f_pos, GFP_KERNEL)) == NULL) { + return -ENOMEM; + } + + /* Ha nem 0 a fpos, akkor meglevo file-t irunk. Gyenge trukk. */ + if (hw->firmware && hw->firmware->len && file->f_pos + && hw->firmware->len < count + file->f_pos) { + memcpy(tmp, hw->firmware->data, hw->firmware->len); + } + if (hw->firmware->data) { + kfree(hw->firmware->data); + } + if (copy_from_user(tmp + file->f_pos, buffer, count)) + return -EFAULT; + hw->firmware->len = entry->size = file->f_pos + count; + hw->firmware->data = tmp; + file->f_pos += count; + return count; + } + + if (strcmp(entry->name, FILENAME_CHANNEL) == 0) { + hw->channel = simple_strtoul(page, NULL, 0); + if (hw->channel >= MAX_CHANNELNO) { + printk(KERN_ERR "Invalid channel number\n"); + hw->channel = 0; + } + if ((ch->twin = comx_twin_check(dev)) != NULL) { + struct comx_channel *twin_ch = ch->twin->priv; + twin_ch->twin = dev; + } + } else if (strcmp(entry->name, FILENAME_IRQ) == 0) { + dev->irq = simple_strtoul(page, NULL, 0); + if (dev->irq == 2) { + dev->irq = 9; + } + if (dev->irq < 3 || dev->irq > 15) { + printk(KERN_ERR "comxhw: Invalid irq number\n"); + dev->irq = 0; + } + if ((ch->twin = comx_twin_check(dev)) != NULL) { + struct comx_channel *twin_ch = ch->twin->priv; + twin_ch->twin = dev; + } + } else if (strcmp(entry->name, FILENAME_IO) == 0) { + dev->base_addr = simple_strtoul(page, NULL, 0); + if ((dev->base_addr & 3) != 0 || dev->base_addr < 0x300 + || dev->base_addr > 0x3fc) { + printk(KERN_ERR "Invalid io value\n"); + dev->base_addr = 0; + } + if ((ch->twin = comx_twin_check(dev)) != NULL) { + struct comx_channel *twin_ch = ch->twin->priv; + + twin_ch->twin = dev; + } + } else if (strcmp(entry->name, FILENAME_MEMADDR) == 0) { + dev->mem_start = simple_strtoul(page, NULL, 0); + if (dev->mem_start <= 0xf000 && dev->mem_start >= 0xa000) { + dev->mem_start *= 16; + } + if ((dev->mem_start & 0xfff) != 0 || dev->mem_start < COMX_MEM_MIN + || dev->mem_start + hw->memory_size > COMX_MEM_MAX) { + printk(KERN_ERR "Invalid memory page\n"); + dev->mem_start = 0; + } + dev->mem_end = dev->mem_start + hw->memory_size; + if ((ch->twin = comx_twin_check(dev)) != NULL) { + struct comx_channel *twin_ch = ch->twin->priv; + + twin_ch->twin = dev; + } + } else if (strcmp(entry->name, FILENAME_CLOCK) == 0) { + if (strncmp("ext", page, 3) == 0) { + hw->clock = 0; + } else { + int kbps; + + kbps = simple_strtoul(page, NULL, 0); + hw->clock = kbps ? COMX_CLOCK_CONST/kbps : 0; + } + } +out: + free_page((unsigned long)page); + return count; +} + +static int comxhw_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct proc_dir_entry *file = (struct proc_dir_entry *)data; + struct net_device *dev = file->parent->data; + struct comx_channel *ch = dev->priv; + struct comx_privdata *hw = ch->HW_privdata; + int len = 0; + + + if (strcmp(file->name, FILENAME_IO) == 0) { + len = sprintf(page, "0x%03x\n", (unsigned int)dev->base_addr); + } else if (strcmp(file->name, FILENAME_IRQ) == 0) { + len = sprintf(page, "0x%02x\n", dev->irq == 9 ? 2 : dev->irq); + } else if (strcmp(file->name, FILENAME_CHANNEL) == 0) { + len = sprintf(page, "%01d\n", hw->channel); + } else if (strcmp(file->name, FILENAME_MEMADDR) == 0) { + len = sprintf(page, "0x%05x\n", (unsigned int)dev->mem_start); + } else if (strcmp(file->name, FILENAME_TWIN) == 0) { + len = sprintf(page, "%s\n", ch->twin ? ch->twin->name : "none"); + } else if (strcmp(file->name, FILENAME_CLOCK) == 0) { + if (hw->clock) { + len = sprintf(page, "%-8d\n", COMX_CLOCK_CONST/hw->clock); + } else { + len = sprintf(page, "external\n"); + } + } else if (strcmp(file->name, FILENAME_FIRMWARE) == 0) { + len = min_t(int, FILE_PAGESIZE, + min_t(int, count, + hw->firmware ? + (hw->firmware->len - off) : 0)); + if (len < 0) { + len = 0; + } + *start = hw->firmware ? (hw->firmware->data + off) : NULL; + if (off + len >= (hw->firmware ? hw->firmware->len : 0) || len == 0) { + *eof = 1; + } + return len; + } + + if (off >= len) { + *eof = 1; + return 0; + } + + *start = page + off; + if (count >= len - off) { + *eof = 1; + } + return min_t(int, count, len - off); +} + +/* Called on echo comx >boardtype */ +static int COMX_init(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct comx_privdata *hw; + struct proc_dir_entry *new_file; + + if ((ch->HW_privdata = kmalloc(sizeof(struct comx_privdata), + GFP_KERNEL)) == NULL) { + return -ENOMEM; + } + memset(hw = ch->HW_privdata, 0, sizeof(struct comx_privdata)); + + if (ch->hardware == &comx_hw || ch->hardware == &cmx_hw) { + hw->memory_size = COMX_MEMORY_SIZE; + hw->io_extent = COMX_IO_EXTENT; + dev->base_addr = COMX_DEFAULT_IO; + dev->irq = COMX_DEFAULT_IRQ; + dev->mem_start = COMX_DEFAULT_MEMADDR; + dev->mem_end = COMX_DEFAULT_MEMADDR + COMX_MEMORY_SIZE; + } else if (ch->hardware == &hicomx_hw) { + hw->memory_size = HICOMX_MEMORY_SIZE; + hw->io_extent = HICOMX_IO_EXTENT; + dev->base_addr = HICOMX_DEFAULT_IO; + dev->irq = HICOMX_DEFAULT_IRQ; + dev->mem_start = HICOMX_DEFAULT_MEMADDR; + dev->mem_end = HICOMX_DEFAULT_MEMADDR + HICOMX_MEMORY_SIZE; + } else { + printk(KERN_ERR "SERIOUS INTERNAL ERROR in %s, line %d\n", __FILE__, __LINE__); + } + + if ((new_file = create_proc_entry(FILENAME_IO, S_IFREG | 0644, ch->procdir)) + == NULL) { + goto cleanup_HW_privdata; + } + new_file->data = (void *)new_file; + new_file->read_proc = &comxhw_read_proc; + new_file->write_proc = &comxhw_write_proc; + new_file->size = 6; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_IRQ, S_IFREG | 0644, ch->procdir)) + == NULL) { + goto cleanup_filename_io; + } + new_file->data = (void *)new_file; + new_file->read_proc = &comxhw_read_proc; + new_file->write_proc = &comxhw_write_proc; + new_file->size = 5; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_CHANNEL, S_IFREG | 0644, + ch->procdir)) == NULL) { + goto cleanup_filename_irq; + } + new_file->data = (void *)new_file; + new_file->read_proc = &comxhw_read_proc; + new_file->write_proc = &comxhw_write_proc; + new_file->size = 2; // Ezt tudjuk + new_file->nlink = 1; + + if (ch->hardware == &hicomx_hw || ch->hardware == &cmx_hw) { + if ((new_file = create_proc_entry(FILENAME_CLOCK, S_IFREG | 0644, + ch->procdir)) == NULL) { + goto cleanup_filename_channel; + } + new_file->data = (void *)new_file; + new_file->read_proc = &comxhw_read_proc; + new_file->write_proc = &comxhw_write_proc; + new_file->size = 9; + new_file->nlink = 1; + } + + if ((new_file = create_proc_entry(FILENAME_MEMADDR, S_IFREG | 0644, + ch->procdir)) == NULL) { + goto cleanup_filename_clock; + } + new_file->data = (void *)new_file; + new_file->read_proc = &comxhw_read_proc; + new_file->write_proc = &comxhw_write_proc; + new_file->size = 8; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_TWIN, S_IFREG | 0444, + ch->procdir)) == NULL) { + goto cleanup_filename_memaddr; + } + new_file->data = (void *)new_file; + new_file->read_proc = &comxhw_read_proc; + new_file->write_proc = NULL; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_FIRMWARE, S_IFREG | 0644, + ch->procdir)) == NULL) { + goto cleanup_filename_twin; + } + new_file->data = (void *)new_file; + new_file->read_proc = &comxhw_read_proc; + new_file->write_proc = &comxhw_write_proc; + new_file->nlink = 1; + + if (ch->hardware == &comx_hw) { + ch->HW_board_on = COMX_board_on; + ch->HW_board_off = COMX_board_off; + ch->HW_load_board = COMX_load_board; + } else if (ch->hardware == &cmx_hw) { + ch->HW_board_on = COMX_board_on; + ch->HW_board_off = COMX_board_off; + ch->HW_load_board = CMX_load_board; + ch->HW_set_clock = COMX_set_clock; + } else if (ch->hardware == &hicomx_hw) { + ch->HW_board_on = HICOMX_board_on; + ch->HW_board_off = HICOMX_board_off; + ch->HW_load_board = HICOMX_load_board; + ch->HW_set_clock = COMX_set_clock; + } else { + printk(KERN_ERR "SERIOUS INTERNAL ERROR in %s, line %d\n", __FILE__, __LINE__); + } + + ch->HW_access_board = COMX_access_board; + ch->HW_release_board = COMX_release_board; + ch->HW_txe = COMX_txe; + ch->HW_open = COMX_open; + ch->HW_close = COMX_close; + ch->HW_send_packet = COMX_send_packet; + ch->HW_statistics = COMX_statistics; + + if ((ch->twin = comx_twin_check(dev)) != NULL) { + struct comx_channel *twin_ch = ch->twin->priv; + + twin_ch->twin = dev; + } + + MOD_INC_USE_COUNT; + return 0; + +cleanup_filename_twin: + remove_proc_entry(FILENAME_TWIN, ch->procdir); +cleanup_filename_memaddr: + remove_proc_entry(FILENAME_MEMADDR, ch->procdir); +cleanup_filename_clock: + if (ch->hardware == &hicomx_hw || ch->hardware == &cmx_hw) + remove_proc_entry(FILENAME_CLOCK, ch->procdir); +cleanup_filename_channel: + remove_proc_entry(FILENAME_CHANNEL, ch->procdir); +cleanup_filename_irq: + remove_proc_entry(FILENAME_IRQ, ch->procdir); +cleanup_filename_io: + remove_proc_entry(FILENAME_IO, ch->procdir); +cleanup_HW_privdata: + kfree(ch->HW_privdata); + return -EIO; +} + +/* Called on echo valami >boardtype */ +static int COMX_exit(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct comx_privdata *hw = ch->HW_privdata; + + if (hw->firmware) { + if (hw->firmware->data) kfree(hw->firmware->data); + kfree(hw->firmware); + } if (ch->twin) { + struct comx_channel *twin_ch = ch->twin->priv; + + twin_ch->twin = NULL; + } + + kfree(ch->HW_privdata); + remove_proc_entry(FILENAME_IO, ch->procdir); + remove_proc_entry(FILENAME_IRQ, ch->procdir); + remove_proc_entry(FILENAME_CHANNEL, ch->procdir); + remove_proc_entry(FILENAME_MEMADDR, ch->procdir); + remove_proc_entry(FILENAME_FIRMWARE, ch->procdir); + remove_proc_entry(FILENAME_TWIN, ch->procdir); + if (ch->hardware == &hicomx_hw || ch->hardware == &cmx_hw) { + remove_proc_entry(FILENAME_CLOCK, ch->procdir); + } + + MOD_DEC_USE_COUNT; + return 0; +} + +static int COMX_dump(struct net_device *dev) +{ + printk(KERN_INFO "%s: COMX_dump called, why ?\n", dev->name); + return 0; +} + +static struct comx_hardware comx_hw = { + "comx", + VERSION, + COMX_init, + COMX_exit, + COMX_dump, + NULL +}; + +static struct comx_hardware cmx_hw = { + "cmx", + VERSION, + COMX_init, + COMX_exit, + COMX_dump, + NULL +}; + +static struct comx_hardware hicomx_hw = { + "hicomx", + VERSION, + COMX_init, + COMX_exit, + COMX_dump, + NULL +}; + +static int __init comx_hw_comx_init(void) +{ + comx_register_hardware(&comx_hw); + comx_register_hardware(&cmx_hw); + comx_register_hardware(&hicomx_hw); + return 0; +} + +static void __exit comx_hw_comx_exit(void) +{ + comx_unregister_hardware("comx"); + comx_unregister_hardware("cmx"); + comx_unregister_hardware("hicomx"); +} + +module_init(comx_hw_comx_init); +module_exit(comx_hw_comx_exit); diff --git a/drivers/net/wan/comx-hw-locomx.c b/drivers/net/wan/comx-hw-locomx.c new file mode 100644 index 000000000..52460164a --- /dev/null +++ b/drivers/net/wan/comx-hw-locomx.c @@ -0,0 +1,496 @@ +/* + * Hardware driver for the LoCOMX card, using the generic z85230 + * functions + * + * Author: Gergely Madarasz + * + * Based on skeleton code and old LoCOMX driver by Tivadar Szemethy + * and the hostess_sv11 driver + * + * Contributors: + * Arnaldo Carvalho de Melo (0.14) + * + * Copyright (C) 1999 ITConsult-Pro Co. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Version 0.10 (99/06/17): + * - rewritten for the z85230 layer + * + * Version 0.11 (99/06/21): + * - some printk's fixed + * - get rid of a memory leak (it was impossible though :)) + * + * Version 0.12 (99/07/07): + * - check CTS for modem lines, not DCD (which is always high + * in case of this board) + * Version 0.13 (99/07/08): + * - Fix the transmitter status check + * - Handle the net device statistics better + * Version 0.14 (00/08/15): + * - resource release on failure at LOCOMX_init + */ + +#define VERSION "0.14" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "comx.h" +#include "z85230.h" + +MODULE_AUTHOR("Gergely Madarasz "); +MODULE_DESCRIPTION("Hardware driver for the LoCOMX board"); +MODULE_LICENSE("GPL"); + +#define RX_DMA 3 +#define TX_DMA 1 +#define LOCOMX_ID 0x33 +#define LOCOMX_IO_EXTENT 8 +#define LOCOMX_DEFAULT_IO 0x368 +#define LOCOMX_DEFAULT_IRQ 7 + +u8 z8530_locomx[] = { + 11, TCRTxCP, + 14, DTRREQ, + 255 +}; + +struct locomx_data { + int io_extent; + struct z8530_dev board; + struct timer_list status_timer; +}; + +static int LOCOMX_txe(struct net_device *dev) +{ + struct comx_channel *ch = netdev_priv(dev); + struct locomx_data *hw = ch->HW_privdata; + + return (!hw->board.chanA.tx_next_skb); +} + + +static void locomx_rx(struct z8530_channel *c, struct sk_buff *skb) +{ + struct net_device *dev = c->netdevice; + struct comx_channel *ch = netdev_priv(dev); + + if (ch->debug_flags & DEBUG_HW_RX) { + comx_debug_skb(dev, skb, "locomx_rx receiving"); + } + ch->LINE_rx(dev,skb); +} + +static int LOCOMX_send_packet(struct net_device *dev, struct sk_buff *skb) +{ + struct comx_channel *ch = netdev_priv(dev); + struct locomx_data *hw = ch->HW_privdata; + + if (ch->debug_flags & DEBUG_HW_TX) { + comx_debug_bytes(dev, skb->data, skb->len, "LOCOMX_send_packet"); + } + + if (!(ch->line_status & LINE_UP)) { + return FRAME_DROPPED; + } + + if(z8530_queue_xmit(&hw->board.chanA,skb)) { + printk(KERN_WARNING "%s: FRAME_DROPPED\n",dev->name); + return FRAME_DROPPED; + } + + if (ch->debug_flags & DEBUG_HW_TX) { + comx_debug(dev, "%s: LOCOMX_send_packet was successful\n\n", dev->name); + } + + if(!hw->board.chanA.tx_next_skb) { + return FRAME_QUEUED; + } else { + return FRAME_ACCEPTED; + } +} + +static void locomx_status_timerfun(unsigned long d) +{ + struct net_device *dev = (struct net_device *)d; + struct comx_channel *ch = netdev_priv(dev); + struct locomx_data *hw = ch->HW_privdata; + + if(!(ch->line_status & LINE_UP) && + (hw->board.chanA.status & CTS)) { + ch->LINE_status(dev, ch->line_status | LINE_UP); + } + if((ch->line_status & LINE_UP) && + !(hw->board.chanA.status & CTS)) { + ch->LINE_status(dev, ch->line_status & ~LINE_UP); + } + mod_timer(&hw->status_timer,jiffies + ch->lineup_delay * HZ); +} + + +static int LOCOMX_open(struct net_device *dev) +{ + struct comx_channel *ch = netdev_priv(dev); + struct locomx_data *hw = ch->HW_privdata; + struct proc_dir_entry *procfile = ch->procdir->subdir; + unsigned long flags; + int ret; + + if (!dev->base_addr || !dev->irq) { + return -ENODEV; + } + + if (!request_region(dev->base_addr, hw->io_extent, dev->name)) { + return -EAGAIN; + } + + hw->board.chanA.ctrlio=dev->base_addr + 5; + hw->board.chanA.dataio=dev->base_addr + 7; + + hw->board.irq=dev->irq; + hw->board.chanA.netdevice=dev; + hw->board.chanA.dev=&hw->board; + hw->board.name=dev->name; + hw->board.chanA.txdma=TX_DMA; + hw->board.chanA.rxdma=RX_DMA; + hw->board.chanA.irqs=&z8530_nop; + hw->board.chanB.irqs=&z8530_nop; + + if(request_irq(dev->irq, z8530_interrupt, SA_INTERRUPT, + dev->name, &hw->board)) { + printk(KERN_ERR "%s: unable to obtain irq %d\n", dev->name, + dev->irq); + ret=-EAGAIN; + goto irq_fail; + } + if(request_dma(TX_DMA,"LoCOMX (TX)")) { + printk(KERN_ERR "%s: unable to obtain TX DMA (DMA channel %d)\n", + dev->name, TX_DMA); + ret=-EAGAIN; + goto dma1_fail; + } + + if(request_dma(RX_DMA,"LoCOMX (RX)")) { + printk(KERN_ERR "%s: unable to obtain RX DMA (DMA channel %d)\n", + dev->name, RX_DMA); + ret=-EAGAIN; + goto dma2_fail; + } + + save_flags(flags); + cli(); + + if(z8530_init(&hw->board)!=0) + { + printk(KERN_ERR "%s: Z8530 device not found.\n",dev->name); + ret=-ENODEV; + goto z8530_fail; + } + + hw->board.chanA.dcdcheck=CTS; + + z8530_channel_load(&hw->board.chanA, z8530_hdlc_kilostream_85230); + z8530_channel_load(&hw->board.chanA, z8530_locomx); + z8530_channel_load(&hw->board.chanB, z8530_dead_port); + + z8530_describe(&hw->board, "I/O", dev->base_addr); + + if((ret=z8530_sync_dma_open(dev, &hw->board.chanA))!=0) { + goto z8530_fail; + } + + restore_flags(flags); + + + hw->board.active=1; + hw->board.chanA.rx_function=locomx_rx; + + ch->init_status |= HW_OPEN; + if (hw->board.chanA.status & DCD) { + ch->line_status |= LINE_UP; + } else { + ch->line_status &= ~LINE_UP; + } + + comx_status(dev, ch->line_status); + + init_timer(&hw->status_timer); + hw->status_timer.function=locomx_status_timerfun; + hw->status_timer.data=(unsigned long)dev; + hw->status_timer.expires=jiffies + ch->lineup_delay * HZ; + add_timer(&hw->status_timer); + + for (; procfile ; procfile = procfile->next) { + if (strcmp(procfile->name, FILENAME_IO) == 0 || + strcmp(procfile->name, FILENAME_IRQ) == 0) { + procfile->mode = S_IFREG | 0444; + } + } + return 0; + +z8530_fail: + restore_flags(flags); + free_dma(RX_DMA); +dma2_fail: + free_dma(TX_DMA); +dma1_fail: + free_irq(dev->irq, &hw->board); +irq_fail: + release_region(dev->base_addr, hw->io_extent); + return ret; +} + +static int LOCOMX_close(struct net_device *dev) +{ + struct comx_channel *ch = netdev_priv(dev); + struct locomx_data *hw = ch->HW_privdata; + struct proc_dir_entry *procfile = ch->procdir->subdir; + + hw->board.chanA.rx_function=z8530_null_rx; + netif_stop_queue(dev); + z8530_sync_dma_close(dev, &hw->board.chanA); + + z8530_shutdown(&hw->board); + + del_timer(&hw->status_timer); + free_dma(RX_DMA); + free_dma(TX_DMA); + free_irq(dev->irq,&hw->board); + release_region(dev->base_addr,8); + + for (; procfile ; procfile = procfile->next) { + if (strcmp(procfile->name, FILENAME_IO) == 0 || + strcmp(procfile->name, FILENAME_IRQ) == 0) { + procfile->mode = S_IFREG | 0644; + } + } + + ch->init_status &= ~HW_OPEN; + return 0; +} + +static int LOCOMX_statistics(struct net_device *dev,char *page) +{ + int len = 0; + + len += sprintf(page + len, "Hello\n"); + + return len; +} + +static int LOCOMX_dump(struct net_device *dev) { + printk(KERN_INFO "LOCOMX_dump called\n"); + return(-1); +} + +static int locomx_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct proc_dir_entry *file = (struct proc_dir_entry *)data; + struct net_device *dev = file->parent->data; + int len = 0; + + if (strcmp(file->name, FILENAME_IO) == 0) { + len = sprintf(page, "0x%x\n", (unsigned int)dev->base_addr); + } else if (strcmp(file->name, FILENAME_IRQ) == 0) { + len = sprintf(page, "%d\n", (unsigned int)dev->irq); + } else { + printk(KERN_ERR "hw_read_proc: internal error, filename %s\n", + file->name); + return -EBADF; + } + + if (off >= len) { + *eof = 1; + return 0; + } + + *start = page + off; + if (count >= len - off) { + *eof = 1; + } + return min_t(int, count, len - off); +} + +static int locomx_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + struct proc_dir_entry *entry = (struct proc_dir_entry *)data; + struct net_device *dev = (struct net_device *)entry->parent->data; + int val; + char *page; + + if (!(page = (char *)__get_free_page(GFP_KERNEL))) { + return -ENOMEM; + } + + if (copy_from_user(page, buffer, count = min_t(unsigned long, count, PAGE_SIZE))) { + free_page((unsigned long)page); + return -EBADF; + } + if (*(page + count - 1) == '\n') { + *(page + count - 1) = 0; + } + + if (strcmp(entry->name, FILENAME_IO) == 0) { + val = simple_strtoul(page, NULL, 0); + if (val != 0x360 && val != 0x368 && val != 0x370 && + val != 0x378) { + printk(KERN_ERR "LoCOMX: incorrect io address!\n"); + } else { + dev->base_addr = val; + } + } else if (strcmp(entry->name, FILENAME_IRQ) == 0) { + val = simple_strtoul(page, NULL, 0); + if (val != 3 && val != 4 && val != 5 && val != 6 && val != 7) { + printk(KERN_ERR "LoCOMX: incorrect irq value!\n"); + } else { + dev->irq = val; + } + } else { + printk(KERN_ERR "locomx_write_proc: internal error, filename %s\n", + entry->name); + free_page((unsigned long)page); + return -EBADF; + } + + free_page((unsigned long)page); + return count; +} + + + +static int LOCOMX_init(struct net_device *dev) +{ + struct comx_channel *ch = netdev_priv(dev); + struct locomx_data *hw; + struct proc_dir_entry *new_file; + + /* Alloc data for private structure */ + if ((ch->HW_privdata = kmalloc(sizeof(struct locomx_data), + GFP_KERNEL)) == NULL) { + return -ENOMEM; + } + + memset(hw = ch->HW_privdata, 0, sizeof(struct locomx_data)); + hw->io_extent = LOCOMX_IO_EXTENT; + + /* Register /proc files */ + if ((new_file = create_proc_entry(FILENAME_IO, S_IFREG | 0644, + ch->procdir)) == NULL) { + goto cleanup_HW_privdata; + } + new_file->data = (void *)new_file; + new_file->read_proc = &locomx_read_proc; + new_file->write_proc = &locomx_write_proc; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_IRQ, S_IFREG | 0644, + ch->procdir)) == NULL) { + goto cleanup_filename_io; + } + new_file->data = (void *)new_file; + new_file->read_proc = &locomx_read_proc; + new_file->write_proc = &locomx_write_proc; + new_file->nlink = 1; + +/* No clock yet */ +/* + if ((new_file = create_proc_entry(FILENAME_CLOCK, S_IFREG | 0644, + ch->procdir)) == NULL) { + return -EIO; + } + new_file->data = (void *)new_file; + new_file->read_proc = &locomx_read_proc; + new_file->write_proc = &locomx_write_proc; + new_file->nlink = 1; +*/ + + ch->HW_access_board = NULL; + ch->HW_release_board = NULL; + ch->HW_txe = LOCOMX_txe; + ch->HW_open = LOCOMX_open; + ch->HW_close = LOCOMX_close; + ch->HW_send_packet = LOCOMX_send_packet; + ch->HW_statistics = LOCOMX_statistics; + ch->HW_set_clock = NULL; + + ch->current_stats = &hw->board.chanA.stats; + memcpy(ch->current_stats, &ch->stats, sizeof(struct net_device_stats)); + + dev->base_addr = LOCOMX_DEFAULT_IO; + dev->irq = LOCOMX_DEFAULT_IRQ; + + + /* O.K. Count one more user on this module */ + MOD_INC_USE_COUNT; + return 0; +cleanup_filename_io: + remove_proc_entry(FILENAME_IO, ch->procdir); +cleanup_HW_privdata: + kfree(ch->HW_privdata); + return -EIO; +} + + +static int LOCOMX_exit(struct net_device *dev) +{ + struct comx_channel *ch = netdev_priv(dev); + + ch->HW_access_board = NULL; + ch->HW_release_board = NULL; + ch->HW_txe = NULL; + ch->HW_open = NULL; + ch->HW_close = NULL; + ch->HW_send_packet = NULL; + ch->HW_statistics = NULL; + ch->HW_set_clock = NULL; + memcpy(&ch->stats, ch->current_stats, sizeof(struct net_device_stats)); + ch->current_stats = &ch->stats; + + kfree(ch->HW_privdata); + + remove_proc_entry(FILENAME_IO, ch->procdir); + remove_proc_entry(FILENAME_IRQ, ch->procdir); +// remove_proc_entry(FILENAME_CLOCK, ch->procdir); + + MOD_DEC_USE_COUNT; + return 0; +} + +static struct comx_hardware locomx_hw = { + "locomx", + VERSION, + LOCOMX_init, + LOCOMX_exit, + LOCOMX_dump, + NULL +}; + +static int __init comx_hw_locomx_init(void) +{ + comx_register_hardware(&locomx_hw); + return 0; +} + +static void __exit comx_hw_locomx_exit(void) +{ + comx_unregister_hardware("locomx"); +} + +module_init(comx_hw_locomx_init); +module_exit(comx_hw_locomx_exit); diff --git a/drivers/net/wan/comx-hw-mixcom.c b/drivers/net/wan/comx-hw-mixcom.c new file mode 100644 index 000000000..c6fb9ac67 --- /dev/null +++ b/drivers/net/wan/comx-hw-mixcom.c @@ -0,0 +1,960 @@ +/* + * Hardware driver for the MixCom synchronous serial board + * + * Author: Gergely Madarasz + * + * based on skeleton driver code and a preliminary hscx driver by + * Tivadar Szemethy + * + * Copyright (C) 1998-1999 ITConsult-Pro Co. + * + * Contributors: + * Arnaldo Carvalho de Melo (0.65) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Version 0.60 (99/06/11): + * - ported to the kernel, now works as builtin code + * + * Version 0.61 (99/06/11): + * - recognize the one-channel MixCOM card (id byte = 0x13) + * - printk fixes + * + * Version 0.62 (99/07/15): + * - fixes according to the new hw docs + * - report line status when open + * + * Version 0.63 (99/09/21): + * - line status report fixes + * + * Version 0.64 (99/12/01): + * - some more cosmetical fixes + * + * Version 0.65 (00/08/15) + * - resource release on failure at MIXCOM_init + */ + +#define VERSION "0.65" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "comx.h" +#include "mixcom.h" +#include "hscx.h" + +MODULE_AUTHOR("Gergely Madarasz "); +MODULE_DESCRIPTION("Hardware-level driver for the serial port of the MixCom board"); +MODULE_LICENSE("GPL"); + +#define MIXCOM_DATA(d) ((struct mixcom_privdata *)(COMX_CHANNEL(d)-> \ + HW_privdata)) + +#define MIXCOM_BOARD_BASE(d) (d->base_addr - MIXCOM_SERIAL_OFFSET - \ + (1 - MIXCOM_DATA(d)->channel) * MIXCOM_CHANNEL_OFFSET) + +#define MIXCOM_DEV_BASE(port,channel) (port + MIXCOM_SERIAL_OFFSET + \ + (1 - channel) * MIXCOM_CHANNEL_OFFSET) + +/* Values used to set the IRQ line */ +static unsigned char mixcom_set_irq[]={0xFF, 0xFF, 0xFF, 0x0, 0xFF, 0x2, 0x4, 0x6, 0xFF, 0xFF, 0x8, 0xA, 0xC, 0xFF, 0xE, 0xFF}; + +static unsigned char* hscx_versions[]={"A1", NULL, "A2", NULL, "A3", "2.1"}; + +struct mixcom_privdata { + u16 clock; + char channel; + long txbusy; + struct sk_buff *sending; + unsigned tx_ptr; + struct sk_buff *recving; + unsigned rx_ptr; + unsigned char status; + char card_has_status; +}; + +static inline void wr_hscx(struct net_device *dev, int reg, unsigned char val) +{ + outb(val, dev->base_addr + reg); +} + +static inline unsigned char rd_hscx(struct net_device *dev, int reg) +{ + return inb(dev->base_addr + reg); +} + +static inline void hscx_cmd(struct net_device *dev, int cmd) +{ + unsigned long jiffs = jiffies; + unsigned char cec; + unsigned delay = 0; + + while ((cec = (rd_hscx(dev, HSCX_STAR) & HSCX_CEC) != 0) && + time_before(jiffies, jiffs + HZ)) { + udelay(1); + if (++delay > (100000 / HZ)) break; + } + if (cec) { + printk(KERN_WARNING "%s: CEC stuck, probably no clock!\n",dev->name); + } else { + wr_hscx(dev, HSCX_CMDR, cmd); + } +} + +static inline void hscx_fill_fifo(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct mixcom_privdata *hw = ch->HW_privdata; + register word to_send = hw->sending->len - hw->tx_ptr; + + + outsb(dev->base_addr + HSCX_FIFO, + &(hw->sending->data[hw->tx_ptr]), min_t(unsigned int, to_send, 32)); + if (to_send <= 32) { + hscx_cmd(dev, HSCX_XTF | HSCX_XME); + kfree_skb(hw->sending); + hw->sending = NULL; + hw->tx_ptr = 0; + } else { + hscx_cmd(dev, HSCX_XTF); + hw->tx_ptr += 32; + } +} + +static inline void hscx_empty_fifo(struct net_device *dev, int cnt) +{ + struct comx_channel *ch = dev->priv; + struct mixcom_privdata *hw = ch->HW_privdata; + + if (hw->recving == NULL) { + if (!(hw->recving = dev_alloc_skb(HSCX_MTU + 16))) { + ch->stats.rx_dropped++; + hscx_cmd(dev, HSCX_RHR); + } else { + skb_reserve(hw->recving, 16); + skb_put(hw->recving, HSCX_MTU); + } + hw->rx_ptr = 0; + } + if (cnt > 32 || !cnt || hw->recving == NULL) { + printk(KERN_ERR "hscx_empty_fifo: cnt is %d, hw->recving %p\n", + cnt, (void *)hw->recving); + return; + } + + insb(dev->base_addr + HSCX_FIFO, &(hw->recving->data[hw->rx_ptr]),cnt); + hw->rx_ptr += cnt; + hscx_cmd(dev, HSCX_RMC); +} + + +static int MIXCOM_txe(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct mixcom_privdata *hw = ch->HW_privdata; + + return !test_bit(0, &hw->txbusy); +} + +static int mixcom_probe(struct net_device *dev) +{ + unsigned long flags; + int id, vstr, ret=0; + + save_flags(flags); cli(); + + id=inb_p(MIXCOM_BOARD_BASE(dev) + MIXCOM_ID_OFFSET) & 0x7f; + + if (id != MIXCOM_ID ) { + ret=-ENODEV; + printk(KERN_WARNING "%s: no MixCOM board found at 0x%04lx\n",dev->name, dev->base_addr); + goto out; + } + + vstr=inb_p(dev->base_addr + HSCX_VSTR) & 0x0f; + if(vstr>=sizeof(hscx_versions)/sizeof(char*) || + hscx_versions[vstr]==NULL) { + printk(KERN_WARNING "%s: board found but no HSCX chip detected at 0x%4lx (vstr = 0x%1x)\n",dev->name,dev->base_addr,vstr); + ret = -ENODEV; + } else { + printk(KERN_INFO "%s: HSCX chip version %s\n",dev->name,hscx_versions[vstr]); + ret = 0; + } + +out: + + restore_flags(flags); + return ret; +} + +#if 0 +static void MIXCOM_set_clock(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct mixcom_privdata *hw = ch->HW_privdata; + + if (hw->clock) { + ; + } else { + ; + } +} +#endif + +static void mixcom_board_on(struct net_device *dev) +{ + outb_p(MIXCOM_OFF , MIXCOM_BOARD_BASE(dev) + MIXCOM_IT_OFFSET); + udelay(1000); + outb_p(mixcom_set_irq[dev->irq] | MIXCOM_ON, + MIXCOM_BOARD_BASE(dev) + MIXCOM_IT_OFFSET); + udelay(1000); +} + +static void mixcom_board_off(struct net_device *dev) +{ + outb_p(MIXCOM_OFF , MIXCOM_BOARD_BASE(dev) + MIXCOM_IT_OFFSET); + udelay(1000); +} + +static void mixcom_off(struct net_device *dev) +{ + wr_hscx(dev, HSCX_CCR1, 0x0); +} + +static void mixcom_on(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + + wr_hscx(dev, HSCX_CCR1, HSCX_PU | HSCX_ODS | HSCX_ITF); // power up, push-pull + wr_hscx(dev, HSCX_CCR2, HSCX_CIE /* | HSCX_RIE */ ); + wr_hscx(dev, HSCX_MODE, HSCX_TRANS | HSCX_ADM8 | HSCX_RAC | HSCX_RTS ); + wr_hscx(dev, HSCX_RLCR, HSCX_RC | 47); // 1504 bytes + wr_hscx(dev, HSCX_MASK, HSCX_RSC | HSCX_TIN ); + hscx_cmd(dev, HSCX_XRES | HSCX_RHR); + + if (ch->HW_set_clock) ch->HW_set_clock(dev); + +} + +static int MIXCOM_send_packet(struct net_device *dev, struct sk_buff *skb) +{ + struct comx_channel *ch = dev->priv; + struct mixcom_privdata *hw = ch->HW_privdata; + unsigned long flags; + + if (ch->debug_flags & DEBUG_HW_TX) { + comx_debug_bytes(dev, skb->data, skb->len, "MIXCOM_send_packet"); + } + + if (!(ch->line_status & LINE_UP)) { + return FRAME_DROPPED; + } + + if (skb->len > HSCX_MTU) { + ch->stats.tx_errors++; + return FRAME_ERROR; + } + + save_flags(flags); cli(); + + if (test_and_set_bit(0, &hw->txbusy)) { + printk(KERN_ERR "%s: transmitter called while busy... dropping frame (length %d)\n", dev->name, skb->len); + restore_flags(flags); + return FRAME_DROPPED; + } + + + hw->sending = skb; + hw->tx_ptr = 0; + hw->txbusy = 1; +// atomic_inc(&skb->users); // save it + hscx_fill_fifo(dev); + restore_flags(flags); + + ch->stats.tx_packets++; + ch->stats.tx_bytes += skb->len; + + if (ch->debug_flags & DEBUG_HW_TX) { + comx_debug(dev, "MIXCOM_send_packet was successful\n\n"); + } + + return FRAME_ACCEPTED; +} + +static inline void mixcom_receive_frame(struct net_device *dev) +{ + struct comx_channel *ch=dev->priv; + struct mixcom_privdata *hw=ch->HW_privdata; + register byte rsta; + register word length; + + rsta = rd_hscx(dev, HSCX_RSTA) & (HSCX_VFR | HSCX_RDO | + HSCX_CRC | HSCX_RAB); + length = ((rd_hscx(dev, HSCX_RBCH) & 0x0f) << 8) | + rd_hscx(dev, HSCX_RBCL); + + if ( length > hw->rx_ptr ) { + hscx_empty_fifo(dev, length - hw->rx_ptr); + } + + if (!(rsta & HSCX_VFR)) { + ch->stats.rx_length_errors++; + } + if (rsta & HSCX_RDO) { + ch->stats.rx_over_errors++; + } + if (!(rsta & HSCX_CRC)) { + ch->stats.rx_crc_errors++; + } + if (rsta & HSCX_RAB) { + ch->stats.rx_frame_errors++; + } + ch->stats.rx_packets++; + ch->stats.rx_bytes += length; + + if (rsta == (HSCX_VFR | HSCX_CRC) && hw->recving) { + skb_trim(hw->recving, hw->rx_ptr - 1); + if (ch->debug_flags & DEBUG_HW_RX) { + comx_debug_skb(dev, hw->recving, + "MIXCOM_interrupt receiving"); + } + hw->recving->dev = dev; + if (ch->LINE_rx) { + ch->LINE_rx(dev, hw->recving); + } + } + else if(hw->recving) { + kfree_skb(hw->recving); + } + hw->recving = NULL; + hw->rx_ptr = 0; +} + + +static inline void mixcom_extended_interrupt(struct net_device *dev) +{ + struct comx_channel *ch=dev->priv; + struct mixcom_privdata *hw=ch->HW_privdata; + register byte exir; + + exir = rd_hscx(dev, HSCX_EXIR) & (HSCX_XDU | HSCX_RFO | HSCX_CSC ); + + if (exir & HSCX_RFO) { + ch->stats.rx_over_errors++; + if (hw->rx_ptr) { + kfree_skb(hw->recving); + hw->recving = NULL; hw->rx_ptr = 0; + } + printk(KERN_ERR "MIXCOM: rx overrun\n"); + hscx_cmd(dev, HSCX_RHR); + } + + if (exir & HSCX_XDU) { // xmit underrun + ch->stats.tx_errors++; + ch->stats.tx_aborted_errors++; + if (hw->tx_ptr) { + kfree_skb(hw->sending); + hw->sending = NULL; + hw->tx_ptr = 0; + } + hscx_cmd(dev, HSCX_XRES); + clear_bit(0, &hw->txbusy); + if (ch->LINE_tx) { + ch->LINE_tx(dev); + } + printk(KERN_ERR "MIXCOM: tx underrun\n"); + } + + if (exir & HSCX_CSC) { + ch->stats.tx_carrier_errors++; + if ((rd_hscx(dev, HSCX_STAR) & HSCX_CTS) == 0) { // Vonal le + if (test_and_clear_bit(0, &ch->lineup_pending)) { + del_timer(&ch->lineup_timer); + } else if (ch->line_status & LINE_UP) { + ch->line_status &= ~LINE_UP; + if (ch->LINE_status) { + ch->LINE_status(dev,ch->line_status); + } + } + } + if (!(ch->line_status & LINE_UP) && (rd_hscx(dev, HSCX_STAR) & + HSCX_CTS)) { // Vonal fol + if (!test_and_set_bit(0,&ch->lineup_pending)) { + ch->lineup_timer.function = comx_lineup_func; + ch->lineup_timer.data = (unsigned long)dev; + ch->lineup_timer.expires = jiffies + HZ * + ch->lineup_delay; + add_timer(&ch->lineup_timer); + hscx_cmd(dev, HSCX_XRES); + clear_bit(0, &hw->txbusy); + if (hw->sending) { + kfree_skb(hw->sending); + } + hw->sending=NULL; + hw->tx_ptr = 0; + } + } + } +} + + +static irqreturn_t MIXCOM_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + unsigned long flags; + struct net_device *dev = (struct net_device *)dev_id; + struct comx_channel *ch, *twin_ch; + struct mixcom_privdata *hw, *twin_hw; + register unsigned char ista; + + if (dev==NULL) { + printk(KERN_ERR "comx_interrupt: irq %d for unknown device\n",irq); + return IRQ_NONE; + } + + ch = dev->priv; + hw = ch->HW_privdata; + + save_flags(flags); cli(); + + while((ista = (rd_hscx(dev, HSCX_ISTA) & (HSCX_RME | HSCX_RPF | + HSCX_XPR | HSCX_EXB | HSCX_EXA | HSCX_ICA)))) { + register byte ista2 = 0; + + if (ista & HSCX_RME) { + mixcom_receive_frame(dev); + } + if (ista & HSCX_RPF) { + hscx_empty_fifo(dev, 32); + } + if (ista & HSCX_XPR) { + if (hw->tx_ptr) { + hscx_fill_fifo(dev); + } else { + clear_bit(0, &hw->txbusy); + ch->LINE_tx(dev); + } + } + + if (ista & HSCX_EXB) { + mixcom_extended_interrupt(dev); + } + + if ((ista & HSCX_EXA) && ch->twin) { + mixcom_extended_interrupt(ch->twin); + } + + if ((ista & HSCX_ICA) && ch->twin && + (ista2 = rd_hscx(ch->twin, HSCX_ISTA) & + (HSCX_RME | HSCX_RPF | HSCX_XPR ))) { + if (ista2 & HSCX_RME) { + mixcom_receive_frame(ch->twin); + } + if (ista2 & HSCX_RPF) { + hscx_empty_fifo(ch->twin, 32); + } + if (ista2 & HSCX_XPR) { + twin_ch=ch->twin->priv; + twin_hw=twin_ch->HW_privdata; + if (twin_hw->tx_ptr) { + hscx_fill_fifo(ch->twin); + } else { + clear_bit(0, &twin_hw->txbusy); + ch->LINE_tx(ch->twin); + } + } + } + } + + restore_flags(flags); + return IRQ_HANDLED; +} + +static int MIXCOM_open(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct mixcom_privdata *hw = ch->HW_privdata; + struct proc_dir_entry *procfile = ch->procdir->subdir; + unsigned long flags; + int ret = -ENODEV; + + if (!dev->base_addr || !dev->irq) + goto err_ret; + + + if(hw->channel==1) { + if(!TWIN(dev) || !(COMX_CHANNEL(TWIN(dev))->init_status & + IRQ_ALLOCATED)) { + printk(KERN_ERR "%s: channel 0 not yet initialized\n",dev->name); + ret = -EAGAIN; + goto err_ret; + } + } + + + /* Is our hw present at all ? Not checking for channel 0 if it is already + open */ + if(hw->channel!=0 || !(ch->init_status & IRQ_ALLOCATED)) { + if (!request_region(dev->base_addr, MIXCOM_IO_EXTENT, dev->name)) { + ret = -EAGAIN; + goto err_ret; + } + if (mixcom_probe(dev)) { + ret = -ENODEV; + goto err_release_region; + } + } + + if(hw->channel==0 && !(ch->init_status & IRQ_ALLOCATED)) { + if (request_irq(dev->irq, MIXCOM_interrupt, 0, + dev->name, (void *)dev)) { + printk(KERN_ERR "MIXCOM: unable to obtain irq %d\n", dev->irq); + ret = -EAGAIN; + goto err_release_region; + } + } + + save_flags(flags); cli(); + + if(hw->channel==0 && !(ch->init_status & IRQ_ALLOCATED)) { + ch->init_status|=IRQ_ALLOCATED; + mixcom_board_on(dev); + } + + mixcom_on(dev); + + + hw->status=inb(MIXCOM_BOARD_BASE(dev) + MIXCOM_STATUS_OFFSET); + if(hw->status != 0xff) { + printk(KERN_DEBUG "%s: board has status register, good\n", dev->name); + hw->card_has_status=1; + } + + hw->txbusy = 0; + ch->init_status |= HW_OPEN; + + if (rd_hscx(dev, HSCX_STAR) & HSCX_CTS) { + ch->line_status |= LINE_UP; + } else { + ch->line_status &= ~LINE_UP; + } + + restore_flags(flags); + + ch->LINE_status(dev, ch->line_status); + + for (; procfile ; procfile = procfile->next) { + if (strcmp(procfile->name, FILENAME_IO) == 0 || + strcmp(procfile->name, FILENAME_CHANNEL) == 0 || + strcmp(procfile->name, FILENAME_CLOCK) == 0 || + strcmp(procfile->name, FILENAME_IRQ) == 0) { + procfile->mode = S_IFREG | 0444; + } + } + + return 0; + +err_release_region: + release_region(dev->base_addr, MIXCOM_IO_EXTENT); +err_ret: + return ret; +} + +static int MIXCOM_close(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct mixcom_privdata *hw = ch->HW_privdata; + struct proc_dir_entry *procfile = ch->procdir->subdir; + unsigned long flags; + + + save_flags(flags); cli(); + + mixcom_off(dev); + + /* This is channel 0, twin is not open, we can safely turn off everything */ + if(hw->channel==0 && (!(TWIN(dev)) || + !(COMX_CHANNEL(TWIN(dev))->init_status & HW_OPEN))) { + mixcom_board_off(dev); + free_irq(dev->irq, dev); + release_region(dev->base_addr, MIXCOM_IO_EXTENT); + ch->init_status &= ~IRQ_ALLOCATED; + } + + /* This is channel 1, channel 0 has already been shutdown, we can release + this one too */ + if(hw->channel==1 && !(COMX_CHANNEL(TWIN(dev))->init_status & HW_OPEN)) { + if(COMX_CHANNEL(TWIN(dev))->init_status & IRQ_ALLOCATED) { + mixcom_board_off(TWIN(dev)); + free_irq(TWIN(dev)->irq, TWIN(dev)); + release_region(TWIN(dev)->base_addr, MIXCOM_IO_EXTENT); + COMX_CHANNEL(TWIN(dev))->init_status &= ~IRQ_ALLOCATED; + } + } + + /* the ioports for channel 1 can be safely released */ + if(hw->channel==1) { + release_region(dev->base_addr, MIXCOM_IO_EXTENT); + } + + restore_flags(flags); + + /* If we don't hold any hardware open */ + if(!(ch->init_status & IRQ_ALLOCATED)) { + for (; procfile ; procfile = procfile->next) { + if (strcmp(procfile->name, FILENAME_IO) == 0 || + strcmp(procfile->name, FILENAME_CHANNEL) == 0 || + strcmp(procfile->name, FILENAME_CLOCK) == 0 || + strcmp(procfile->name, FILENAME_IRQ) == 0) { + procfile->mode = S_IFREG | 0644; + } + } + } + + /* channel 0 was only waiting for us to close channel 1 + close it completely */ + + if(hw->channel==1 && !(COMX_CHANNEL(TWIN(dev))->init_status & HW_OPEN)) { + for (procfile=COMX_CHANNEL(TWIN(dev))->procdir->subdir; + procfile ; procfile = procfile->next) { + if (strcmp(procfile->name, FILENAME_IO) == 0 || + strcmp(procfile->name, FILENAME_CHANNEL) == 0 || + strcmp(procfile->name, FILENAME_CLOCK) == 0 || + strcmp(procfile->name, FILENAME_IRQ) == 0) { + procfile->mode = S_IFREG | 0644; + } + } + } + + ch->init_status &= ~HW_OPEN; + return 0; +} + +static int MIXCOM_statistics(struct net_device *dev,char *page) +{ + struct comx_channel *ch = dev->priv; + // struct mixcom_privdata *hw = ch->HW_privdata; + int len = 0; + + if(ch->init_status && IRQ_ALLOCATED) { + len += sprintf(page + len, "Mixcom board: hardware open\n"); + } + + return len; +} + +static int MIXCOM_dump(struct net_device *dev) { + return 0; +} + +static int mixcom_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct proc_dir_entry *file = (struct proc_dir_entry *)data; + struct net_device *dev = file->parent->data; + struct comx_channel *ch = dev->priv; + struct mixcom_privdata *hw = ch->HW_privdata; + int len = 0; + + if (strcmp(file->name, FILENAME_IO) == 0) { + len = sprintf(page, "0x%x\n", + (unsigned int)MIXCOM_BOARD_BASE(dev)); + } else if (strcmp(file->name, FILENAME_IRQ) == 0) { + len = sprintf(page, "%d\n", (unsigned int)dev->irq); + } else if (strcmp(file->name, FILENAME_CLOCK) == 0) { + if (hw->clock) len = sprintf(page, "%d\n", hw->clock); + else len = sprintf(page, "external\n"); + } else if (strcmp(file->name, FILENAME_CHANNEL) == 0) { + len = sprintf(page, "%01d\n", hw->channel); + } else if (strcmp(file->name, FILENAME_TWIN) == 0) { + if (ch->twin) { + len = sprintf(page, "%s\n",ch->twin->name); + } else { + len = sprintf(page, "none\n"); + } + } else { + printk(KERN_ERR "mixcom_read_proc: internal error, filename %s\n", file->name); + return -EBADF; + } + + if (off >= len) { + *eof = 1; + return 0; + } + *start = page + off; + if (count >= len - off) *eof = 1; + return min_t(int, count, len - off); +} + + +static struct net_device *mixcom_twin_check(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct proc_dir_entry *procfile = ch->procdir->parent->subdir; + struct mixcom_privdata *hw = ch->HW_privdata; + + struct net_device *twin; + struct comx_channel *ch_twin; + struct mixcom_privdata *hw_twin; + + + for ( ; procfile ; procfile = procfile->next) { + if(!S_ISDIR(procfile->mode)) continue; + + twin = procfile->data; + ch_twin = twin->priv; + hw_twin = ch_twin->HW_privdata; + + + if (twin != dev && dev->irq && dev->base_addr && + dev->irq == twin->irq && + ch->hardware == ch_twin->hardware && + dev->base_addr == twin->base_addr + + (1-2*hw->channel)*MIXCOM_CHANNEL_OFFSET && + hw->channel == (1 - hw_twin->channel)) { + if (!TWIN(twin) || TWIN(twin)==dev) { + return twin; + } + } + } + return NULL; +} + + +static void setup_twin(struct net_device* dev) +{ + + if(TWIN(dev) && TWIN(TWIN(dev))) { + TWIN(TWIN(dev))=NULL; + } + if ((TWIN(dev) = mixcom_twin_check(dev)) != NULL) { + if (TWIN(TWIN(dev)) && TWIN(TWIN(dev)) != dev) { + TWIN(dev)=NULL; + } else { + TWIN(TWIN(dev))=dev; + } + } +} + +static int mixcom_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + struct proc_dir_entry *entry = (struct proc_dir_entry *)data; + struct net_device *dev = (struct net_device *)entry->parent->data; + struct comx_channel *ch = dev->priv; + struct mixcom_privdata *hw = ch->HW_privdata; + char *page; + int value; + + if (!(page = (char *)__get_free_page(GFP_KERNEL))) { + return -ENOMEM; + } + + if (copy_from_user(page, buffer, count = min_t(unsigned long, count, PAGE_SIZE))) { + free_page((unsigned long)page); + return -EFAULT; + } + if (*(page + count - 1) == '\n') { + *(page + count - 1) = 0; + } + + if (strcmp(entry->name, FILENAME_IO) == 0) { + value = simple_strtoul(page, NULL, 0); + if (value != 0x180 && value != 0x280 && value != 0x380) { + printk(KERN_ERR "MIXCOM: incorrect io address!\n"); + } else { + dev->base_addr = MIXCOM_DEV_BASE(value,hw->channel); + } + } else if (strcmp(entry->name, FILENAME_IRQ) == 0) { + value = simple_strtoul(page, NULL, 0); + if (value < 0 || value > 15 || mixcom_set_irq[value]==0xFF) { + printk(KERN_ERR "MIXCOM: incorrect irq value!\n"); + } else { + dev->irq = value; + } + } else if (strcmp(entry->name, FILENAME_CLOCK) == 0) { + if (strncmp("ext", page, 3) == 0) { + hw->clock = 0; + } else { + int kbps; + + kbps = simple_strtoul(page, NULL, 0); + if (!kbps) { + hw->clock = 0; + } else { + hw->clock = kbps; + } + if (hw->clock < 32 || hw->clock > 2000) { + hw->clock = 0; + printk(KERN_ERR "MIXCOM: invalid clock rate!\n"); + } + } + if (ch->init_status & HW_OPEN && ch->HW_set_clock) { + ch->HW_set_clock(dev); + } + } else if (strcmp(entry->name, FILENAME_CHANNEL) == 0) { + value = simple_strtoul(page, NULL, 0); + if (value > 2) { + printk(KERN_ERR "Invalid channel number\n"); + } else { + dev->base_addr+=(hw->channel - value) * MIXCOM_CHANNEL_OFFSET; + hw->channel = value; + } + } else { + printk(KERN_ERR "hw_read_proc: internal error, filename %s\n", + entry->name); + return -EBADF; + } + + setup_twin(dev); + + free_page((unsigned long)page); + return count; +} + +static int MIXCOM_init(struct net_device *dev) { + struct comx_channel *ch = dev->priv; + struct mixcom_privdata *hw; + struct proc_dir_entry *new_file; + + if ((ch->HW_privdata = kmalloc(sizeof(struct mixcom_privdata), + GFP_KERNEL)) == NULL) { + return -ENOMEM; + } + + memset(hw = ch->HW_privdata, 0, sizeof(struct mixcom_privdata)); + + if ((new_file = create_proc_entry(FILENAME_IO, S_IFREG | 0644, + ch->procdir)) == NULL) { + goto cleanup_HW_privdata; + } + new_file->data = (void *)new_file; + new_file->read_proc = &mixcom_read_proc; + new_file->write_proc = &mixcom_write_proc; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_IRQ, S_IFREG | 0644, + ch->procdir)) == NULL) { + goto cleanup_filename_io; + } + new_file->data = (void *)new_file; + new_file->read_proc = &mixcom_read_proc; + new_file->write_proc = &mixcom_write_proc; + new_file->nlink = 1; + +#if 0 + if ((new_file = create_proc_entry(FILENAME_CLOCK, S_IFREG | 0644, + ch->procdir)) == NULL) { + return -EIO; + } + new_file->data = (void *)new_file; + new_file->read_proc = &mixcom_read_proc; + new_file->write_proc = &mixcom_write_proc; + new_file->nlink = 1; +#endif + + if ((new_file = create_proc_entry(FILENAME_CHANNEL, S_IFREG | 0644, + ch->procdir)) == NULL) { + goto cleanup_filename_irq; + } + new_file->data = (void *)new_file; + new_file->read_proc = &mixcom_read_proc; + new_file->write_proc = &mixcom_write_proc; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_TWIN, S_IFREG | 0444, + ch->procdir)) == NULL) { + goto cleanup_filename_channel; + } + new_file->data = (void *)new_file; + new_file->read_proc = &mixcom_read_proc; + new_file->write_proc = &mixcom_write_proc; + new_file->nlink = 1; + + setup_twin(dev); + + /* Fill in ch_struct hw specific pointers */ + ch->HW_access_board = NULL; + ch->HW_release_board = NULL; + ch->HW_txe = MIXCOM_txe; + ch->HW_open = MIXCOM_open; + ch->HW_close = MIXCOM_close; + ch->HW_send_packet = MIXCOM_send_packet; + ch->HW_statistics = MIXCOM_statistics; + ch->HW_set_clock = NULL; + + dev->base_addr = MIXCOM_DEV_BASE(MIXCOM_DEFAULT_IO,0); + dev->irq = MIXCOM_DEFAULT_IRQ; + + MOD_INC_USE_COUNT; + return 0; +cleanup_filename_channel: + remove_proc_entry(FILENAME_CHANNEL, ch->procdir); +cleanup_filename_irq: + remove_proc_entry(FILENAME_IRQ, ch->procdir); +cleanup_filename_io: + remove_proc_entry(FILENAME_IO, ch->procdir); +cleanup_HW_privdata: + kfree(ch->HW_privdata); + return -EIO; +} + +static int MIXCOM_exit(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct mixcom_privdata *hw = ch->HW_privdata; + + if(hw->channel==0 && TWIN(dev)) { + return -EBUSY; + } + + if(hw->channel==1 && TWIN(dev)) { + TWIN(TWIN(dev))=NULL; + } + + kfree(ch->HW_privdata); + remove_proc_entry(FILENAME_IO, ch->procdir); + remove_proc_entry(FILENAME_IRQ, ch->procdir); +#if 0 + remove_proc_entry(FILENAME_CLOCK, ch->procdir); +#endif + remove_proc_entry(FILENAME_CHANNEL, ch->procdir); + remove_proc_entry(FILENAME_TWIN, ch->procdir); + + MOD_DEC_USE_COUNT; + return 0; +} + +static struct comx_hardware mixcomhw = { + "mixcom", + VERSION, + MIXCOM_init, + MIXCOM_exit, + MIXCOM_dump, + NULL +}; + +static int __init comx_hw_mixcom_init(void) +{ + return comx_register_hardware(&mixcomhw); +} + +static void __exit comx_hw_mixcom_exit(void) +{ + comx_unregister_hardware("mixcom"); +} + +module_init(comx_hw_mixcom_init); +module_exit(comx_hw_mixcom_exit); diff --git a/drivers/net/wan/comx-hw-munich.c b/drivers/net/wan/comx-hw-munich.c new file mode 100644 index 000000000..195bc2d25 --- /dev/null +++ b/drivers/net/wan/comx-hw-munich.c @@ -0,0 +1,2854 @@ +/* + * Hardware-level driver for the SliceCOM board for Linux kernels 2.4.X + * + * Current maintainer / latest changes: Pasztor Szilard + * + * Original author: Bartok Istvan + * Based on skeleton by Tivadar Szemethy + * + * 0.51: + * - port for 2.4.x + * - clean up some code, make it more portable + * - busted direct hardware access through mapped memory + * - fix a possible race + * - prevent procfs buffer overflow + * + * 0.50: + * - support for the pcicom board, lots of rearrangements + * - handle modem status lines + * + * 0.50a: + * - fix for falc version 1.0 + * + * 0.50b: T&t + * - fix for bad localbus + */ + +#define VERSION "0.51" +#define VERSIONSTR "SliceCOM v" VERSION ", 2002/01/07\n" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define COMX_NEW + +#ifndef COMX_NEW +#include "../include/comx.h" +#include "../include/munich32x.h" +#include "../include/falc-lh.h" +#else +#include "comx.h" +#include "munich32x.h" +#include "falc-lh.h" +#endif + +MODULE_AUTHOR("Bartok Istvan , Gergely Madarasz , Szilard Pasztor "); +MODULE_DESCRIPTION("Hardware-level driver for the SliceCOM and PciCOM (WelCOM) adapters"); +MODULE_LICENSE("GPL"); +/* + * TODO: az ilyenek a comxhw.h -ban szoktak lenni, idovel menjenek majd oda: + */ + +#define FILENAME_BOARDNUM "boardnum" /* /proc/comx/comx0.1/boardnum */ +#define FILENAME_TIMESLOTS "timeslots" /* /proc/comx/comx0.1/timeslots */ +#define FILENAME_FRAMING "framing" /* /proc/comx/comx0.1/framing */ +#define FILENAME_LINECODE "linecode" /* /proc/comx/comx0.1/linecode */ +#define FILENAME_CLOCK_SOURCE "clock_source" /* /proc/comx/comx0.1/clock_source */ +#define FILENAME_LOOPBACK "loopback" /* /proc/comx/comx0.1/loopback */ +#define FILENAME_REG "reg" /* /proc/comx/comx0.1/reg */ +#define FILENAME_LBIREG "lbireg" /* /proc/comx/comx0.1/lbireg */ + +#define SLICECOM_BOARDNUM_DEFAULT 0 + +#define SLICECOM_FRAMING_CRC4 1 +#define SLICECOM_FRAMING_NO_CRC4 2 +#define SLICECOM_FRAMING_DEFAULT SLICECOM_FRAMING_CRC4 + +#define SLICECOM_LINECODE_HDB3 1 +#define SLICECOM_LINECODE_AMI 2 +#define SLICECOM_LINECODE_DEFAULT SLICECOM_LINECODE_HDB3 + +#define SLICECOM_CLOCK_SOURCE_LINE 1 +#define SLICECOM_CLOCK_SOURCE_INTERNAL 2 +#define SLICECOM_CLOCK_SOURCE_DEFAULT SLICECOM_CLOCK_SOURCE_LINE + +#define SLICECOM_LOOPBACK_NONE 1 +#define SLICECOM_LOOPBACK_LOCAL 2 +#define SLICECOM_LOOPBACK_REMOTE 3 +#define SLICECOM_LOOPBACK_DEFAULT SLICECOM_LOOPBACK_NONE + +#define MUNICH_VIRT(addr) (void *)(&bar1[addr]) + +struct slicecom_stringtable +{ + char *name; + int value; +}; + +/* A convention: keep "default" the last not NULL when reading from /proc, + "error" is an indication that something went wrong, we have an undefined value */ + +struct slicecom_stringtable slicecom_framings[] = +{ + {"crc4", SLICECOM_FRAMING_CRC4}, + {"no-crc4", SLICECOM_FRAMING_NO_CRC4}, + {"default", SLICECOM_FRAMING_DEFAULT}, + {"error", 0} +}; + +struct slicecom_stringtable slicecom_linecodes[] = +{ + {"hdb3", SLICECOM_LINECODE_HDB3}, + {"ami", SLICECOM_LINECODE_AMI}, + {"default", SLICECOM_LINECODE_DEFAULT}, + {"error", 0} +}; + +struct slicecom_stringtable slicecom_clock_sources[] = +{ + {"line", SLICECOM_CLOCK_SOURCE_LINE}, + {"internal", SLICECOM_CLOCK_SOURCE_INTERNAL}, + {"default", SLICECOM_CLOCK_SOURCE_DEFAULT}, + {"error", 0} +}; + +struct slicecom_stringtable slicecom_loopbacks[] = +{ + {"none", SLICECOM_LOOPBACK_NONE}, + {"local", SLICECOM_LOOPBACK_LOCAL}, + {"remote", SLICECOM_LOOPBACK_REMOTE}, + {"default", SLICECOM_LOOPBACK_DEFAULT}, + {"error", 0} +}; + +/* + * Some tunable values... + * + * Note: when tuning values which change the length of text in + * /proc/comx/comx[n]/status, keep in mind that it must be shorter then + * PAGESIZE ! + */ + +#define MAX_BOARDS 4 /* ezzel 4 kartya lehet a gepben: 0..3 */ +#define RX_DESC_MAX 8 /* Rx ring size, must be >= 4 */ +#define TX_DESC_MAX 4 /* Tx ring size, must be >= 2 */ + /* a sokkal hosszabb Tx ring mar ronthatja a nem-FIFO packet */ + /* schedulerek (fair queueing, stb.) hatekonysagat. */ +#define MAX_WORK 10 /* TOD: update the info max. ennyi-1 esemenyt dolgoz fel egy interrupt hivasnal */ + +/* + * These are tunable too, but don't touch them without fully understanding what is happening + */ + +#define UDELAY 20 /* We wait UDELAY usecs with disabled interrupts before and */ + /* after each command to avoid writing into each other's */ + /* ccb->action_spec. A _send_packet nem var, mert azt az */ + /* _interrupt()-bol is meghivhatja a LINE_tx() */ + +/* + * Just to avoid warnings about implicit declarations: + */ + +static int MUNICH_close(struct net_device *dev); +static struct comx_hardware slicecomhw; +static struct comx_hardware pcicomhw; + +static unsigned long flags; +static spinlock_t mister_lock = SPIN_LOCK_UNLOCKED; + +typedef volatile struct /* Time Slot Assignment */ +{ + u32 rxfillmask:8, // ----------------------------+------+ + // | | + rxchannel:5, // ----------------------+---+ | | + rti:1, // ---------------------+| | | | + res2:2, // -------------------++|| | | | + // |||| | | | + txfillmask:8, // ----------+------+ |||| | | | + // | | |||| | | | + txchannel:5, // ----+---+ | | |||| | | | + tti:1, // ---+| | | | |||| | | | + res1:2; // -++|| | | | |||| | | | + // 3 2 1 + // 10987654 32109876 54321098 76543210 +} timeslot_spec_t; + +typedef volatile struct /* Receive Descriptor */ +{ + u32 zero1:16, no:13, hi:1, hold:1, zero2:1; + + u32 next; + u32 data; + + u32 zero3:8, status:8, bno:13, zero4:1, c:1, fe:1; +} rx_desc_t; + +typedef volatile struct /* Transmit Descriptor */ +{ + u32 fnum:11, csm:1, no13:1, zero1:2, v110:1, no:13, hi:1, hold:1, fe:1; + + u32 next; + u32 data; + +} tx_desc_t; + +typedef volatile struct /* Channel Specification */ +{ + u32 iftf:1, mode:2, fa:1, trv:2, crc:1, inv:1, cs:1, tflag:7, ra:1, ro:1, + th:1, ta:1, to:1, ti:1, ri:1, nitbs:1, fit:1, fir:1, re:1, te:1, ch:1, + ifc:1, sfe:1, fe2:1; + + u32 frda; + u32 ftda; + + u32 itbs:6, zero1:26; + +} channel_spec_t; + +typedef volatile struct /* Configuration Control Block */ +{ + u32 action_spec; + u32 reserved1; + u32 reserved2; + timeslot_spec_t timeslot_spec[32]; + channel_spec_t channel_spec[32]; + u32 current_rx_desc[32]; + u32 current_tx_desc[32]; + u32 csa; /* Control Start Address. CSA = *CCBA; CCB = *CSA */ + /* MUNICH does it like: CCB = *( *CCBA ) */ +} munich_ccb_t; + +typedef volatile struct /* Entry in the interrupt queue */ +{ + u32 all; +} munich_intq_t; + +#define MUNICH_INTQLEN 63 /* Rx/Tx Interrupt Queue Length + (not the real len, but the TIQL/RIQL value) */ +#define MUNICH_INTQMAX ( 16*(MUNICH_INTQLEN+1) ) /* Rx/Tx/Periph Interrupt Queue size in munich_intq_t's */ +#define MUNICH_INTQSIZE ( 4*MUNICH_INTQMAX ) /* Rx/Tx/Periph Interrupt Queue size in bytes */ + +#define MUNICH_PIQLEN 4 /* Peripheral Interrupt Queue Length. Unlike the RIQL/TIQL, */ +#define MUNICH_PIQMAX ( 4*MUNICH_PIQLEN ) /* PIQL register needs it like this */ +#define MUNICH_PIQSIZE ( 4*MUNICH_PIQMAX ) + +typedef volatile u32 vol_u32; /* TOD: ezek megszunnek ha atirom readw()/writew()-re - kész */ +typedef volatile u8 vol_u8; + +typedef volatile struct /* counters of E1-errors and errored seconds, see rfc2495 */ +{ + /* use here only unsigned ints, we depend on it when calculating the sum for the last N intervals */ + + unsigned line_code_violations, /* AMI: bipolar violations, HDB3: hdb3 violations */ + path_code_violations, /* FAS errors and CRC4 errors */ + e_bit_errors, /* E-Bit Errors (the remote side received from us with CRC4-error) */ + slip_secs, /* number of seconds with (receive) Controlled Slip(s) */ + fr_loss_secs, /* number of seconds an Out Of Frame defect was detected */ + line_err_secs, /* number of seconds with one or more Line Code Violations */ + degraded_mins, /* Degraded Minute - the estimated error rate is >1E-6, but <1E-3 */ + errored_secs, /* Errored Second - at least one of these happened: + - Path Code Violation + - Out Of Frame defect + - Slip + - receiving AIS + - not incremented during an Unavailable Second */ + bursty_err_secs, /* Bursty Errored Second: (rfc2495 says it does not apply to E1) + - Path Code Violations >1, but <320 + - not a Severely Errored Second + - no AIS + - not incremented during an Unavailabla Second */ + severely_err_secs, /* Severely Errored Second: + - CRC4: >=832 Path COde Violations || >0 Out Of Frame defects + - noCRC4: >=2048 Line Code Violations + - not incremented during an Unavailable Second */ + unavail_secs; /* number of Unavailable Seconds. Unavailable state is said after: + - 10 contiguous Severely Errored Seconds + - or RAI || AIS || LOF || LOS + - (any) loopback has been set */ + + /* + * we do not strictly comply to the rfc: we do not retroactively reduce errored_secs, + * bursty_err_secs, severely_err_secs when 'unavailable state' is reached + */ + +} e1_stats_t; + +typedef volatile struct /* ezek board-adatok, nem lehetnek a slicecom_privdata -ban */ +{ + int use_count; /* num. of interfaces using the board */ + int irq; /* a kartya irq-ja. belemasoljuk a dev->irq -kba is, de csak hogy */ + /* szebb legyen az ifconfig outputja */ + /* ha != 0, az azt jelenti hogy az az irq most nekunk sikeresen */ + /* le van foglalva */ + struct pci_dev *pci; /* a kartya PCI strukturaja. NULL, ha nincs kartya */ + u32 *bar1; /* pci->base_address[0] ioremap()-ed by munich_probe(), */ + /* on x86 can be used both as a bus or virtual address. */ + /* These are the Munich's registers */ + u8 *lbi; /* pci->base_address[1] ioremap()-ed by munich_probe(), */ + /* this is a 256-byte range, the start of the LBI on the board */ + munich_ccb_t *ccb; /* virtual address of CCB */ + munich_intq_t *tiq; /* Tx Interrupt Queue */ + munich_intq_t *riq; /* Rx Interrupt Queue */ + munich_intq_t *piq; /* Peripheral Interrupt Queue (FALC interrupts arrive here) */ + int tiq_ptr, /* A 'current' helyek a tiq/riq/piq -ban. */ + riq_ptr, /* amikor feldolgoztam az interruptokat, a legelso ures */ + piq_ptr; /* interrupt_information szora mutatnak. */ + struct net_device *twins[32]; /* MUNICH channel -> network interface assignment */ + + unsigned long lastcheck; /* When were the Rx rings last checked. Time in jiffies */ + + struct timer_list modemline_timer; + char isx21; + char lineup; + char framing; /* a beallitasok tarolasa */ + char linecode; + char clock_source; + char loopback; + + char devname[30]; /* what to show in /proc/interrupts */ + unsigned histogram[MAX_WORK]; /* number of processed events in the interrupt loop */ + unsigned stat_pri_races; /* number of special events, we try to handle them */ + unsigned stat_pti_races; + unsigned stat_pri_races_missed; /* when it can not be handled, because of MAX_WORK */ + unsigned stat_pti_races_missed; + +#define SLICECOM_BOARD_INTERVALS_SIZE 97 + e1_stats_t intervals[SLICECOM_BOARD_INTERVALS_SIZE]; /* E1 line statistics */ + unsigned current_interval; /* pointer to the current interval */ + unsigned elapsed_seconds; /* elapsed seconds from the start of the current interval */ + unsigned ses_seconds; /* counter of contiguous Severely Errored Seconds */ + unsigned is_unavailable; /* set to 1 after 10 contiguous Severely Errored Seconds */ + unsigned no_ses_seconds; /* contiguous Severely Error -free seconds in unavail state */ + + unsigned deg_elapsed_seconds; /* for counting the 'Degraded Mins' */ + unsigned deg_cumulated_errors; + + struct module *owner; /* pointer to our module to avoid module load races */ +} munich_board_t; + +struct slicecom_privdata +{ + int busy; /* transmitter busy - number of packets in the Tx ring */ + int channel; /* Munich logical channel ('channel-group' in Cisco) */ + unsigned boardnum; + u32 timeslots; /* i-th bit means i-th timeslot is our */ + + int tx_ring_hist[TX_DESC_MAX]; /* histogram: number of packets in Tx ring when _send_packet is called */ + + tx_desc_t tx_desc[TX_DESC_MAX]; /* the ring of Tx descriptors */ + u8 tx_data[TX_DESC_MAX][TXBUFFER_SIZE]; /* buffers for data to transmit */ + int tx_desc_ptr; /* hanyadik descriptornal tartunk a beirassal */ + /* ahol ez all, oda irtunk utoljara */ + + rx_desc_t rx_desc[RX_DESC_MAX]; /* the ring of Rx descriptors */ + u8 rx_data[RX_DESC_MAX][RXBUFFER_SIZE]; /* buffers for received data */ + int rx_desc_ptr; /* hanyadik descriptornal tartunk az olvasassal */ + + int rafutott; +}; + +static u32 reg, reg_ertek; /* why static: don't write stack trash into regs if strtoul() fails */ +static u32 lbireg; +static u8 lbireg_ertek; /* why static: don't write stack trash into regs if strtoul() fails */ + +static munich_board_t slicecom_boards[MAX_BOARDS]; +static munich_board_t pcicom_boards[MAX_BOARDS]; + +/* + * Reprogram Idle Channel Registers in the FALC - send special code in not used channels + * Should be called from the open and close, when the timeslot assignment changes + */ + +void rework_idle_channels(struct net_device *dev) +{ + struct comx_channel *ch = netdev_priv(dev); + struct slicecom_privdata *hw = ch->HW_privdata; + munich_board_t *board = slicecom_boards + hw->boardnum; + munich_ccb_t *ccb = board->ccb; + + u8 *lbi = board->lbi; + int i, j, tmp; + + + spin_lock_irqsave(&mister_lock, flags); + + for (i = 0; i < 4; i++) + { + tmp = 0xFF; + for (j = 0; j < 8; j++) + if (ccb->timeslot_spec[8 * i + j].tti == 0) tmp ^= (0x80 >> j); + writeb(tmp, lbi + 0x30 + i); + } + + spin_unlock_irqrestore(&mister_lock, flags); +} + +/* + * Set PCM framing - /proc/comx/comx0/framing + */ + +void slicecom_set_framing(int boardnum, int value) +{ + u8 *lbi = slicecom_boards[boardnum].lbi; + + spin_lock_irqsave(&mister_lock, flags); + + slicecom_boards[boardnum].framing = value; + switch (value) + { + case SLICECOM_FRAMING_CRC4: + writeb(readb(lbi + FMR1) | 8, lbi + FMR1); + writeb((readb(lbi + FMR2) & 0x3f) | 0x80, lbi + FMR2); + break; + case SLICECOM_FRAMING_NO_CRC4: + writeb(readb(lbi + FMR1) & 0xf7, lbi + FMR1); + writeb(readb(lbi + FMR2) & 0x3f, lbi + FMR2); + break; + default: + printk("slicecom: board %d: unhandled " FILENAME_FRAMING + " value %d\n", boardnum, value); + } + + spin_unlock_irqrestore(&mister_lock, flags); +} + +/* + * Set PCM linecode - /proc/comx/comx0/linecode + */ + +void slicecom_set_linecode(int boardnum, int value) +{ + u8 *lbi = slicecom_boards[boardnum].lbi; + + spin_lock_irqsave(&mister_lock, flags); + + slicecom_boards[boardnum].linecode = value; + switch (value) + { + case SLICECOM_LINECODE_HDB3: + writeb(readb(lbi + FMR0) | 0xf0, lbi + FMR0); + break; + case SLICECOM_LINECODE_AMI: + writeb((readb(lbi + FMR0) & 0x0f) | 0xa0, lbi + FMR0); + break; + default: + printk("slicecom: board %d: unhandled " FILENAME_LINECODE + " value %d\n", boardnum, value); + } + spin_unlock_irqrestore(&mister_lock, flags); +} + +/* + * Set PCM clock source - /proc/comx/comx0/clock_source + */ + +void slicecom_set_clock_source(int boardnum, int value) +{ + u8 *lbi = slicecom_boards[boardnum].lbi; + + spin_lock_irqsave(&mister_lock, flags); + + slicecom_boards[boardnum].clock_source = value; + switch (value) + { + case SLICECOM_CLOCK_SOURCE_LINE: + writeb(readb(lbi + LIM0) & ~1, lbi + LIM0); + break; + case SLICECOM_CLOCK_SOURCE_INTERNAL: + writeb(readb(lbi + LIM0) | 1, lbi + LIM0); + break; + default: + printk("slicecom: board %d: unhandled " FILENAME_CLOCK_SOURCE + " value %d\n", boardnum, value); + } + spin_unlock_irqrestore(&mister_lock, flags); +} + +/* + * Set loopbacks - /proc/comx/comx0/loopback + */ + +void slicecom_set_loopback(int boardnum, int value) +{ + u8 *lbi = slicecom_boards[boardnum].lbi; + + spin_lock_irqsave(&mister_lock, flags); + + slicecom_boards[boardnum].loopback = value; + switch (value) + { + case SLICECOM_LOOPBACK_NONE: + writeb(readb(lbi + LIM0) & ~2, lbi + LIM0); /* Local Loop OFF */ + writeb(readb(lbi + LIM1) & ~2, lbi + LIM1); /* Remote Loop OFF */ + break; + case SLICECOM_LOOPBACK_LOCAL: + writeb(readb(lbi + LIM1) & ~2, lbi + LIM1); /* Remote Loop OFF */ + writeb(readb(lbi + LIM0) | 2, lbi + LIM0); /* Local Loop ON */ + break; + case SLICECOM_LOOPBACK_REMOTE: + writeb(readb(lbi + LIM0) & ~2, lbi + LIM0); /* Local Loop OFF */ + writeb(readb(lbi + LIM1) | 2, lbi + LIM1); /* Remote Loop ON */ + break; + default: + printk("slicecom: board %d: unhandled " FILENAME_LOOPBACK + " value %d\n", boardnum, value); + } + spin_unlock_irqrestore(&mister_lock, flags); +} + +/* + * Update E1 line status LEDs on the adapter + */ + +void slicecom_update_leds(munich_board_t * board) +{ + u32 *bar1 = board->bar1; + u8 *lbi = board->lbi; + u8 frs0; + u32 leds; + int i; + + spin_lock_irqsave(&mister_lock, flags); + + leds = 0; + frs0 = readb(lbi + FRS0); /* FRS0 bits described on page 137 */ + + if (!(frs0 & 0xa0)) + { + leds |= 0x2000; /* Green LED: Input signal seems to be OK, no LOS, no LFA */ + if (frs0 & 0x10) + leds |= 0x8000; /* Red LED: Receiving Remote Alarm */ + } + writel(leds, MUNICH_VIRT(GPDATA)); + + if (leds == 0x2000 && !board->lineup) + { /* line up */ + board->lineup = 1; + for (i = 0; i < 32; i++) + { + if (board->twins[i] && (board->twins[i]->flags & IFF_RUNNING)) + { + struct comx_channel *ch = board->twins[i]->priv; + + if (!test_and_set_bit(0, &ch->lineup_pending)) + { + ch->lineup_timer.function = comx_lineup_func; + ch->lineup_timer.data = (unsigned long)board->twins[i]; + ch->lineup_timer.expires = jiffies + HZ * ch->lineup_delay; + add_timer(&ch->lineup_timer); + } + } + } + } + else if (leds != 0x2000 && board->lineup) + { /* line down */ + board->lineup = 0; + for (i = 0; i < 32; i++) + if (board->twins[i] && (board->twins[i]->flags & IFF_RUNNING)) + { + struct comx_channel *ch = board->twins[i]->priv; + + if (test_and_clear_bit(0, &ch->lineup_pending)) + del_timer(&ch->lineup_timer); + else if (ch->line_status & LINE_UP) + { + ch->line_status &= ~LINE_UP; + if (ch->LINE_status) + ch->LINE_status(board->twins[i], ch->line_status); + } + } + } + spin_unlock_irqrestore(&mister_lock, flags); +} + +/* + * This function gets called every second when the FALC issues the interrupt. + * Hardware counters contain error counts for last 1-second time interval. + * We add them to the global counters here. + * Read rfc2495 to understand this. + */ + +void slicecom_update_line_counters(munich_board_t * board) +{ + e1_stats_t *curr_int = &board->intervals[board->current_interval]; + + u8 *lbi = board->lbi; + + unsigned framing_errors, code_violations, path_code_violations, crc4_errors, + e_bit_errors; + unsigned slip_detected, /* this one has logical value, not the number of slips! */ + out_of_frame_defect, /* logical value */ + ais_defect, /* logical value */ + errored_sec, bursty_err_sec, severely_err_sec = 0, failure_sec; + u8 isr2, isr3, isr5, frs0; + + spin_lock_irqsave(&mister_lock, flags); + + isr2 = readb(lbi + ISR2); /* ISR0-5 described on page 156 */ + isr3 = readb(lbi + ISR3); + isr5 = readb(lbi + ISR5); + frs0 = readb(lbi + FRS0); /* FRS0 described on page 137 */ + + /* Error Events: */ + + code_violations = readb(lbi + CVCL) + (readb(lbi + CVCH) << 8); + framing_errors = readb(lbi + FECL) + (readb(lbi + FECH) << 8); + crc4_errors = readb(lbi + CEC1L) + (readb(lbi + CEC1H) << 8); + e_bit_errors = readb(lbi + EBCL) + (readb(lbi + EBCH) << 8); + slip_detected = isr3 & (ISR3_RSN | ISR3_RSP); + + path_code_violations = framing_errors + crc4_errors; + + curr_int->line_code_violations += code_violations; + curr_int->path_code_violations += path_code_violations; + curr_int->e_bit_errors += e_bit_errors; + + /* Performance Defects: */ + + /* there was an LFA in the last second, but maybe disappeared: */ + out_of_frame_defect = (isr2 & ISR2_LFA) || (frs0 & FRS0_LFA); + + /* there was an AIS in the last second, but maybe disappeared: */ + ais_defect = (isr2 & ISR2_AIS) || (frs0 & FRS0_AIS); + + /* Performance Parameters: */ + + if (out_of_frame_defect) + curr_int->fr_loss_secs++; + if (code_violations) + curr_int->line_err_secs++; + + errored_sec = ((board->framing == SLICECOM_FRAMING_NO_CRC4) && + (code_violations)) || path_code_violations || + out_of_frame_defect || slip_detected || ais_defect; + + bursty_err_sec = !out_of_frame_defect && !ais_defect && + (path_code_violations > 1) && (path_code_violations < 320); + + switch (board->framing) + { + case SLICECOM_FRAMING_CRC4: + severely_err_sec = out_of_frame_defect || + (path_code_violations >= 832); + break; + case SLICECOM_FRAMING_NO_CRC4: + severely_err_sec = (code_violations >= 2048); + break; + } + + /* + * failure_sec: true if there was a condition leading to a failure + * (and leading to unavailable state) in this second: + */ + + failure_sec = (isr2 & ISR2_RA) || (frs0 & FRS0_RRA) /* Remote/Far End/Distant Alarm Failure */ + || ais_defect || out_of_frame_defect /* AIS or LOF Failure */ + || (isr2 & ISR2_LOS) || (frs0 & FRS0_LOS) /* Loss Of Signal Failure */ + || (board->loopback != SLICECOM_LOOPBACK_NONE); /* Loopback has been set */ + + if (board->is_unavailable) + { + if (severely_err_sec) + board->no_ses_seconds = 0; + else + board->no_ses_seconds++; + + if ((board->no_ses_seconds >= 10) && !failure_sec) + { + board->is_unavailable = 0; + board->ses_seconds = 0; + board->no_ses_seconds = 0; + } + } + else + { + if (severely_err_sec) + board->ses_seconds++; + else + board->ses_seconds = 0; + + if ((board->ses_seconds >= 10) || failure_sec) + { + board->is_unavailable = 1; + board->ses_seconds = 0; + board->no_ses_seconds = 0; + } + } + + if (board->is_unavailable) + curr_int->unavail_secs++; + else + { + if (slip_detected) + curr_int->slip_secs++; + curr_int->errored_secs += errored_sec; + curr_int->bursty_err_secs += bursty_err_sec; + curr_int->severely_err_secs += severely_err_sec; + } + + /* the RFC does not say clearly which errors to count here, we try to count bit errors */ + + if (!board->is_unavailable && !severely_err_sec) + { + board->deg_cumulated_errors += code_violations; + board->deg_elapsed_seconds++; + if (board->deg_elapsed_seconds >= 60) + { + if (board->deg_cumulated_errors >= 123) + curr_int->degraded_mins++; + board->deg_cumulated_errors = 0; + board->deg_elapsed_seconds = 0; + } + + } + + board->elapsed_seconds++; + if (board->elapsed_seconds >= 900) + { + board->current_interval = + (board->current_interval + 1) % SLICECOM_BOARD_INTERVALS_SIZE; + memset((void *)&board->intervals[board->current_interval], 0, + sizeof(e1_stats_t)); + board->elapsed_seconds = 0; + } + + spin_unlock_irqrestore(&mister_lock, flags); +} + +static void pcicom_modemline(unsigned long b) +{ + munich_board_t *board = (munich_board_t *) b; + struct net_device *dev = board->twins[0]; + struct comx_channel *ch = netdev_priv(dev); + unsigned long regs; + + regs = readl((void *)(&board->bar1[GPDATA])); + if ((ch->line_status & LINE_UP) && (regs & 0x0800)) + { + ch->line_status &= ~LINE_UP; + board->lineup = 0; + if (ch->LINE_status) + { + ch->LINE_status(dev, ch->line_status); + } + } + + if (!(ch->line_status & LINE_UP) && !(regs & 0x0800)) + { + ch->line_status |= LINE_UP; + board->lineup = 1; + if (ch->LINE_status) + { + ch->LINE_status(dev, ch->line_status); + } + } + + mod_timer((struct timer_list *)&board->modemline_timer, jiffies + HZ); +} + +/* + * Is it possible to transmit ? + * Called (may be called) by the protocol layer + */ + +static int MUNICH_txe(struct net_device *dev) +{ + struct comx_channel *ch = netdev_priv(dev); + struct slicecom_privdata *hw = ch->HW_privdata; + + return (hw->busy < TX_DESC_MAX - 1); +} + +/* + * Hw probe function. Detects all the boards in the system, + * and fills up slicecom_boards[] and pcicom_boards[] + * Returns 0 on success. + * We do not disable interrupts! + */ +static int munich_probe(void) +{ + struct pci_dev *pci; + int boardnum; + int slicecom_boardnum; + int pcicom_boardnum; + u32 *bar1; + u8 *lbi; + munich_board_t *board; + + for (boardnum = 0; boardnum < MAX_BOARDS; boardnum++) + { + pcicom_boards[boardnum].pci = 0; + pcicom_boards[boardnum].bar1 = 0; + pcicom_boards[boardnum].lbi = 0; + slicecom_boards[boardnum].pci = 0; + slicecom_boards[boardnum].bar1 = 0; + slicecom_boards[boardnum].lbi = 0; + } + + pci = NULL; + board = NULL; + slicecom_boardnum = 0; + pcicom_boardnum = 0; + + for (boardnum = 0; + boardnum < MAX_BOARDS && (pci = pci_find_device(PCI_VENDOR_ID_SIEMENS, + PCI_DEVICE_ID_SIEMENS_MUNICH32X, pci)); boardnum++) + { + if (pci_enable_device(pci)) + continue; + + printk("munich_probe: munich chip found, IRQ %d\n", pci->irq); + + bar1 = ioremap_nocache(pci->resource[0].start, 0x100); + lbi = ioremap_nocache(pci->resource[1].start, 0x100); + + if (bar1 && lbi) + { + pci_write_config_dword(pci, MUNICH_PCI_PCIRES, 0xe0000); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + pci_write_config_dword(pci, MUNICH_PCI_PCIRES, 0); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + /* check the type of the card */ + writel(LREG0_MAGIC, MUNICH_VIRT(LREG0)); + writel(LREG1_MAGIC, MUNICH_VIRT(LREG1)); + writel(LREG2_MAGIC, MUNICH_VIRT(LREG2)); + writel(LREG3_MAGIC, MUNICH_VIRT(LREG3)); + writel(LREG4_MAGIC, MUNICH_VIRT(LREG4)); + writel(LREG5_MAGIC, MUNICH_VIRT(LREG5)); + writel(LCONF_MAGIC2,MUNICH_VIRT(LCONF)); /* enable the DMSM */ + + if ((readb(lbi + VSTR) == 0x13) || (readb(lbi + VSTR) == 0x10)) + { + board = slicecom_boards + slicecom_boardnum; + sprintf((char *)board->devname, "slicecom%d", + slicecom_boardnum); + board->isx21 = 0; + slicecom_boardnum++; + } + else if ((readb(lbi + VSTR) == 0x6) || (readb(lbi + GIS) == 0x6)) + { + board = pcicom_boards + pcicom_boardnum; + sprintf((char *)board->devname, "pcicom%d", pcicom_boardnum); + board->isx21 = 1; + pcicom_boardnum++; + } + if (board) + { + printk("munich_probe: %s board found\n", board->devname); + writel(LCONF_MAGIC1, MUNICH_VIRT(LCONF)); /* reset the DMSM */ + board->pci = pci; + board->bar1 = bar1; + board->lbi = lbi; + board->framing = SLICECOM_FRAMING_DEFAULT; + board->linecode = SLICECOM_LINECODE_DEFAULT; + board->clock_source = SLICECOM_CLOCK_SOURCE_DEFAULT; + board->loopback = SLICECOM_LOOPBACK_DEFAULT; + board->owner = THIS_MODULE; + } + else + { + printk("munich_probe: Board error, VSTR: %02X\n", + readb(lbi + VSTR)); + iounmap((void *)bar1); + iounmap((void *)lbi); + } + } + else + { + printk("munich_probe: ioremap() failed, not enabling this board!\n"); + /* .pci = NULL, so the MUNICH_open will not try to open it */ + if (bar1) iounmap((void *)bar1); + if (lbi) iounmap((void *)lbi); + } + } + + if (!pci && !boardnum) + { + printk("munich_probe: no PCI present!\n"); + return -ENODEV; + } + + if (pcicom_boardnum + slicecom_boardnum == 0) + { + printk + ("munich_probe: Couldn't find any munich board: vendor:device %x:%x not found\n", + PCI_VENDOR_ID_SIEMENS, PCI_DEVICE_ID_SIEMENS_MUNICH32X); + return -ENODEV; + } + + /* Found some */ + if (pcicom_boardnum) + printk("%d pcicom board(s) found.\n", pcicom_boardnum); + if (slicecom_boardnum) + printk("%d slicecom board(s) found.\n", slicecom_boardnum); + + return 0; +} + +/* + * Reset the hardware. Get called only from within this module if needed. + */ +#if 0 +static int slicecom_reset(struct net_device *dev) +{ + struct comx_channel *ch = netdev_priv(dev); + + printk("slicecom_reset: resetting the hardware\n"); + + /* Begin to reset the hardware */ + + if (ch->HW_set_clock) + ch->HW_set_clock(dev); + + /* And finish it */ + + return 0; +} +#endif + +/* + * Transmit a packet. + * Called by the protocol layer + * Return values: + * FRAME_ACCEPTED: frame is being transmited, transmitter is busy + * FRAME_QUEUED: frame is being transmitted, there's more room in + * the transmitter for additional packet(s) + * FRAME_ERROR: + * FRAME_DROPPED: there was some error + */ + +static int MUNICH_send_packet(struct net_device *dev, struct sk_buff *skb) +{ + struct comx_channel *ch = netdev_priv(dev); + struct slicecom_privdata *hw = ch->HW_privdata; + + /* Send it to the debug facility too if needed: */ + + if (ch->debug_flags & DEBUG_HW_TX) + comx_debug_bytes(dev, skb->data, skb->len, "MUNICH_send_packet"); + + /* If the line is inactive, don't accept: */ + + /* TODO: atgondolni hogy mi is legyen itt */ + /* if (!(ch->line_status & LINE_UP)) return FRAME_DROPPED; */ + + /* More check, to be sure: */ + + if (skb->len > TXBUFFER_SIZE) + { + ch->stats.tx_errors++; + kfree_skb(skb); + return FRAME_ERROR; + } + + /* Maybe you have to disable irq's while programming the hw: */ + + spin_lock_irqsave(&mister_lock, flags); + + /* And more check: */ + + if (hw->busy >= TX_DESC_MAX - 1) + { + printk(KERN_ERR + "%s: Transmitter called while busy... dropping frame, busy = %d\n", + dev->name, hw->busy); + spin_unlock_irqrestore(&mister_lock, flags); + kfree_skb(skb); + return FRAME_DROPPED; + } + + if (hw->busy >= 0) + hw->tx_ring_hist[hw->busy]++; + /* DELL: */ + else + printk("slicecom: %s: FATAL: busy = %d\n", dev->name, hw->busy); + +// /* DEL: */ +// printk("slicecom: %s: _send_packet called, busy = %d\n", dev->name, hw->busy ); + + /* Packet can go, update stats: */ + + ch->stats.tx_packets++; + ch->stats.tx_bytes += skb->len; + + /* Pass the packet to the HW: */ + /* Step forward with the transmit descriptors: */ + + hw->tx_desc_ptr = (hw->tx_desc_ptr + 1) % TX_DESC_MAX; + + memcpy(&(hw->tx_data[hw->tx_desc_ptr][0]), skb->data, skb->len); + hw->tx_desc[hw->tx_desc_ptr].no = skb->len; + + /* We don't issue any command, just step with the HOLD bit */ + + hw->tx_desc[hw->tx_desc_ptr].hold = 1; + hw->tx_desc[(hw->tx_desc_ptr + TX_DESC_MAX - 1) % TX_DESC_MAX].hold = 0; + +#ifdef COMX_NEW + dev_kfree_skb(skb); +#endif + /* csomag kerult a Tx ringbe: */ + + hw->busy++; + + /* Report it: */ + + if (ch->debug_flags & DEBUG_HW_TX) + comx_debug(dev, "%s: MUNICH_send_packet was successful\n\n", dev->name); + + if (hw->busy >= TX_DESC_MAX - 1) + { + spin_unlock_irqrestore(&mister_lock, flags); + return FRAME_ACCEPTED; + } + + spin_unlock_irqrestore(&mister_lock, flags); + + /* All done */ + + return FRAME_QUEUED; +} + +/* + * Interrupt handler routine. + * Called by the Linux kernel. + * BEWARE! The interrupts are enabled on the call! + */ +static irqreturn_t MUNICH_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct sk_buff *skb; + int length; + int rx_status; + int work; /* hany esemenyt kezeltem mar le */ + u32 *bar1; + u8 *lbi; + u32 stat, /* az esemenyek, amiket a ebben a loop korben le kell meg kezelni */ + race_stat = 0, /* race eseten ebben uzenek magamnak hogy mit kell meg lekezelni */ + ack; /* ezt fogom a vegen a STAT-ba irni, kiveszek belole 1-1 bitet ha */ + + /* az adott dolgot nem kell ack-olni mert volt vele munkam, es */ + /* legjobb ha visszaterek ide megegyszer */ + munich_intq_t int_info; + + struct net_device *dev; + struct comx_channel *ch; + struct slicecom_privdata *hw; + munich_board_t *board = (munich_board_t *) dev_id; + int channel; + + // , boardnum = (int)dev_id; + + // board = munich_boards + boardnum; + bar1 = board->bar1; + lbi = board->lbi; + + // Do not uncomment this under heavy load! :-> + // printk("MUNICH_interrupt: masked STAT=0x%08x, tiq=0x%08x, riq=0x%08x, piq=0x%08x\n", stat, board->tiq[0].all, board->riq[0].all, board->piq[0].all ); + + for (work = 0; (stat = (race_stat | (readl(MUNICH_VIRT(STAT)) & ~STAT_NOT_HANDLED_BY_INTERRUPT))) && (work < MAX_WORK - 1); work++) + { + ack = stat & (STAT_PRI | STAT_PTI | STAT_LBII); + + /* Handle the interrupt information in the Rx queue. We don't really trust */ + /* info from this queue, because it can be overflowed, so later check */ + /* every Rx ring for received packets. But there are some errors which can't */ + /* be counted from the Rx rings, so we parse it. */ + + int_info = board->riq[board->riq_ptr]; + if (int_info.all & 0xF0000000) /* ha ez nem 0, akkor itt interrupt_info van */ + { + ack &= ~STAT_PRI; /* don't ack the interrupt, we had some work to do */ + + channel = PCM_INT_CHANNEL(int_info.all); + dev = board->twins[channel]; + + if (dev == NULL) + { + printk + ("MUNICH_interrupt: got an Rx interrupt info for NULL device " + "%s.twins[%d], int_info = 0x%08x\n", board->devname, + channel, int_info.all); + goto go_for_next_interrupt; + } + + ch = netdev_priv(dev); + hw = (struct slicecom_privdata *)ch->HW_privdata; + + // printk("Rx STAT=0x%08x int_info=0x%08x rx_desc_ptr=%d rx_desc.status=0x%01x\n", + // stat, int_info.all, hw->rx_desc_ptr, hw->rx_desc[ hw->rx_desc_ptr ].status ); + + if (int_info.all & PCM_INT_HI) + printk("SliceCOM: %s: Host Initiated interrupt\n", dev->name); + if (int_info.all & PCM_INT_IFC) + printk("SliceCOM: %s: Idle/Flag Change\n", dev->name); + /* TOD: jo ez az Idle/Flag Change valamire? - azonnal latszik belole hogy mikor ad a masik oldal */ + /* TOD: ilyen IT most nem is jon, mert ki van maszkolva az interrupt, biztosan kell ez? */ + + if (int_info.all & PCM_INT_FO) + /* Internal buffer (RB) overrun */ + ch->stats.rx_over_errors++; /* TOD: Ez azt jelenti hogy a belso RB nem volt hozzaferheto, es ezert kihagyott valamit. De nem csak csomag lehetett, hanem esemeny, stb. is. lasd page 247. Ezzel a 'cat status'-hoz igazodok, de a netdevice.h szerint nem egyertelmu hogy ide ez kellene. Nem lehet hogy rx_missed ? */ + /* DE: nem gotozok sehova, elvileg jo igy */ + /* kesobb meg visszaterek az FO-ra, ha packet-FO volt. Keresd a "packet-FO"-t. */ + if (int_info.all & PCM_INT_FI) /* frame received, but we do not trust the int_info queue */ + if (int_info.all & PCM_INT_SF) + { /* Short Frame: rovidebb mint a CRC */ + /* "rovidebb mint CRC+2byte" vizsgalat a "CRC+2"-nel */ + ch->stats.rx_length_errors++; /* TOD: noveljem? ne noveljem? */ + goto go_for_next_interrupt; + } + + go_for_next_interrupt: /* One step in the interrupt queue */ + board->riq[board->riq_ptr].all = 0; /* megjelolom hogy itt meg nem jart a hw */ + board->riq_ptr = (board->riq_ptr + 1) % MUNICH_INTQMAX; + + } + + /* Check every Rx ring for incomed packets: */ + + for (channel = 0; channel < 32; channel++) + { + dev = board->twins[channel]; + + if (dev != NULL) + { + ch = netdev_priv(dev); + hw = (struct slicecom_privdata *)ch->HW_privdata; + + rx_status = hw->rx_desc[hw->rx_desc_ptr].status; + + if (!(rx_status & 0x80)) /* mar jart itt a hardver */ + { + ack &= ~STAT_PRI; /* Don't ack, we had some work */ + + /* Ez most egy kicsit zuros, mert itt mar nem latom az int_infot */ + if (rx_status & RX_STATUS_ROF) + ch->stats.rx_over_errors++; /* TOD: 'cat status'-hoz igazodok */ + + if (rx_status & RX_STATUS_RA) + /* Abort received or issued on channel */ + ch->stats.rx_frame_errors++; /* or HOLD bit in the descriptor */ + /* TOD: 'cat status'-hoz igazodok */ + + if (rx_status & RX_STATUS_LFD) + { /* Long Frame (longer then MFL in the MODE1) */ + ch->stats.rx_length_errors++; + goto go_for_next_frame; + } + + if (rx_status & RX_STATUS_NOB) + { /* Not n*8 bits long frame - frame alignment */ + ch->stats.rx_frame_errors++; /* ez viszont nem igazodik a 'cat status'-hoz */ + goto go_for_next_frame; + } + + if (rx_status & RX_STATUS_CRCO) + { /* CRC error */ + ch->stats.rx_crc_errors++; + goto go_for_next_frame; + } + + if (rx_status & RX_STATUS_SF) + { /* Short Frame: rovidebb mint CRC+2byte */ + ch->stats.rx_errors++; /* The HW does not set PCI_INT_ERR bit for this one, see page 246 */ + ch->stats.rx_length_errors++; + goto go_for_next_frame; + } + + if (rx_status != 0) + { + printk("SliceCOM: %s: unhandled rx_status: 0x%02x\n", + dev->name, rx_status); + goto go_for_next_frame; + } + + /* frame received without errors: */ + + length = hw->rx_desc[hw->rx_desc_ptr].bno; + ch->stats.rx_packets++; /* Count only 'good' packets */ + ch->stats.rx_bytes += length; + + /* Allocate a larger skb and reserve the heading for efficiency: */ + + if ((skb = dev_alloc_skb(length + 16)) == NULL) + { + ch->stats.rx_dropped++; + goto go_for_next_frame; + } + + /* Do bookkeeping: */ + + skb_reserve(skb, 16); + skb_put(skb, length); + skb->dev = dev; + + /* Now copy the data into the buffer: */ + + memcpy(skb->data, &(hw->rx_data[hw->rx_desc_ptr][0]), length); + + /* DEL: UGLY HACK!!!! */ + if (*((int *)skb->data) == 0x02000000 && + *(((int *)skb->data) + 1) == 0x3580008f) + { + printk("%s: swapping hack\n", dev->name); + *((int *)skb->data) = 0x3580008f; + *(((int *)skb->data) + 1) = 0x02000000; + } + + if (ch->debug_flags & DEBUG_HW_RX) + comx_debug_skb(dev, skb, "MUNICH_interrupt receiving"); + + /* Pass it to the protocol entity: */ + + ch->LINE_rx(dev, skb); + + go_for_next_frame: + /* DEL: rafutott-e a HOLD bitre -detektalas */ + { + if( ((rx_desc_t*)phys_to_virt(board->ccb->current_rx_desc[channel]))->hold + && ((rx_desc_t*)phys_to_virt(board->ccb->current_rx_desc[channel]))->status != 0xff) + hw->rafutott++; /* rafutott: hanyszor volt olyan hogy a current descriptoron HOLD bit volt, es a hw mar befejezte az irast (azaz a hw rafutott a HOLD bitre) */ + } + + // if( jiffies % 2 ) /* DELL: okozzunk egy kis Rx ring slipet :) */ + // { + /* Step forward with the receive descriptors: */ + /* if you change this, change the copy of it below too! Search for: "RxSlip" */ + hw->rx_desc[(hw->rx_desc_ptr + RX_DESC_MAX - 1) % RX_DESC_MAX].hold = 1; + hw->rx_desc[hw->rx_desc_ptr].status = 0xFF; /* megjelolom hogy itt meg nem jart a hw */ + hw->rx_desc[(hw->rx_desc_ptr + RX_DESC_MAX - 2) % RX_DESC_MAX].hold = 0; + hw->rx_desc_ptr = (hw->rx_desc_ptr + 1) % RX_DESC_MAX; + // } + } + } + } + + stat &= ~STAT_PRI; + +// } + +// if( stat & STAT_PTI ) /* TOD: primko megvalositas: mindig csak egy esemenyt dolgozok fel, */ + /* es nem torlom a STAT-ot, ezert ujra visszajon ide a rendszer. Amikor */ + /* jon interrupt, de nincs mit feldolgozni, akkor torlom a STAT-ot. */ + /* 'needs a rewrite', de elso megoldasnak jo lesz */ +// { + int_info = board->tiq[board->tiq_ptr]; + if (int_info.all & 0xF0000000) /* ha ez nem 0, akkor itt interrupt_info van */ + { + ack &= ~STAT_PTI; /* don't ack the interrupt, we had some work to do */ + + channel = PCM_INT_CHANNEL(int_info.all); + dev = board->twins[channel]; + + if (dev == NULL) + { + printk("MUNICH_interrupt: got a Tx interrupt for NULL device " + "%s.twins[%d], int_info = 0x%08x\n", + board->isx21 ? "pcicom" : "slicecom", channel, int_info.all); + goto go_for_next_tx_interrupt; + } + + ch = netdev_priv(dev); + hw = (struct slicecom_privdata *)ch->HW_privdata; + + // printk("Tx STAT=0x%08x int_info=0x%08x tiq_ptr=%d\n", stat, int_info.all, board->tiq_ptr ); + + if (int_info.all & PCM_INT_FE2) + { /* "Tx available" */ + /* do nothing */ + } + else if (int_info.all & PCM_INT_FO) + { /* Internal buffer (RB) overrun */ + ch->stats.rx_over_errors++; + } + else + { + printk("slicecom: %s: unhandled Tx int_info: 0x%08x\n", + dev->name, int_info.all); + } + + go_for_next_tx_interrupt: + board->tiq[board->tiq_ptr].all = 0; + board->tiq_ptr = (board->tiq_ptr + 1) % MUNICH_INTQMAX; + } + + /* Check every Tx ring for incoming packets: */ + + for (channel = 0; channel < 32; channel++) + { + dev = board->twins[channel]; + + if (dev != NULL) + { + int newbusy; + + ch = netdev_priv(dev); + hw = (struct slicecom_privdata *)ch->HW_privdata; + + /* We don't trust the "Tx available" info from the TIQ, but check */ + /* every ring if there is some free room */ + + if (ch->init_status && netif_running(dev)) + { + newbusy = ( TX_DESC_MAX + (& hw->tx_desc[ hw->tx_desc_ptr ]) - + (tx_desc_t*)phys_to_virt(board->ccb->current_tx_desc[ hw->channel ]) ) % TX_DESC_MAX; + + if(newbusy < 0) + { + printk("slicecom: %s: FATAL: fresly computed busy = %d, HW: 0x%p, SW: 0x%p\n", + dev->name, newbusy, + phys_to_virt(board->ccb->current_tx_desc[hw->channel]), + & hw->tx_desc[hw->tx_desc_ptr]); + } + + /* Fogyott valami a Tx ringbol? */ + + if (newbusy < hw->busy) + { + // ack &= ~STAT_PTI; /* Don't ack, we had some work */ + hw->busy = newbusy; + if (ch->LINE_tx) + ch->LINE_tx(dev); /* Report it to protocol driver */ + } + else if (newbusy > hw->busy) + printk("slicecom: %s: newbusy > hw->busy, this should not happen!\n", dev->name); + } + } + } + stat &= ~STAT_PTI; + + int_info = board->piq[board->piq_ptr]; + if (int_info.all & 0xF0000000) /* ha ez nem 0, akkor itt interrupt_info van */ + { + ack &= ~STAT_LBII; /* don't ack the interrupt, we had some work to do */ + + /* We do not really use (yet) the interrupt info from this queue, */ + + // printk("slicecom: %s: LBI Interrupt event: %08x\n", board->devname, int_info.all); + + if (!board->isx21) + { + slicecom_update_leds(board); + slicecom_update_line_counters(board); + } + + goto go_for_next_lbi_interrupt; /* To avoid warning about unused label */ + + go_for_next_lbi_interrupt: /* One step in the interrupt queue */ + board->piq[board->piq_ptr].all = 0; /* megjelolom hogy itt meg nem jart a hw */ + board->piq_ptr = (board->piq_ptr + 1) % MUNICH_PIQMAX; + } + stat &= ~STAT_LBII; + + writel(ack, MUNICH_VIRT(STAT)); + + if (stat & STAT_TSPA) + { + // printk("slicecom: %s: PCM TSP Asynchronous\n", board->devname); + writel(STAT_TSPA, MUNICH_VIRT(STAT)); + stat &= ~STAT_TSPA; + } + + if (stat & STAT_RSPA) + { + // printk("slicecom: %s: PCM RSP Asynchronous\n", board->devname); + writel(STAT_RSPA, MUNICH_VIRT(STAT)); + stat &= ~STAT_RSPA; + } + if (stat) + { + printk("MUNICH_interrupt: unhandled interrupt, STAT=0x%08x\n", + stat); + writel(stat, MUNICH_VIRT(STAT)); /* ha valamit megsem kezeltunk le, azert ack-ot kuldunk neki */ + } + + } + board->histogram[work]++; + + /* We can miss these if we reach the MAX_WORK */ + /* Count it to see how often it happens */ + + if (race_stat & STAT_PRI) + board->stat_pri_races_missed++; + if (race_stat & STAT_PTI) + board->stat_pti_races_missed++; + return IRQ_HANDLED; +} + +/* + * Hardware open routine. + * Called by comx (upper) layer when the user wants to bring up the interface + * with ifconfig. + * Initializes hardware, allocates resources etc. + * Returns 0 on OK, or standard error value on error. + */ + +static int MUNICH_open(struct net_device *dev) +{ + struct comx_channel *ch = netdev_priv(dev); + struct slicecom_privdata *hw = ch->HW_privdata; + struct proc_dir_entry *procfile = ch->procdir->subdir; + munich_board_t *board; + munich_ccb_t *ccb; + + u32 *bar1; + u8 *lbi; + u32 stat; + unsigned long flags, jiffs; + + int i, channel; + u32 timeslots = hw->timeslots; + + board = hw->boardnum + (ch->hardware == &pcicomhw ? pcicom_boards : slicecom_boards); + + bar1 = board->bar1; + lbi = board->lbi; + + /* TODO: a timeslotok ellenorzese kell majd ide .. hat, biztos? mar a write_proc-ban is + ellenorzom valamennyire. + if (!dev->io || !dev->irq) return -ENODEV; + */ + + if (!board->pci) + { + printk("MUNICH_open: no %s board with boardnum = %d\n", + ch->hardware->name, hw->boardnum); + return -ENODEV; + } + + spin_lock_irqsave(&mister_lock, flags); + /* lock the section to avoid race with multiple opens and make sure + that no interrupts get called while this lock is active */ + + if (board->use_count == 0) /* bring up the board if it was unused */ + /* if fails, frees allocated resources and returns. */ + /* TOD: is it safe? nem kellene resetelni a kartyat? */ + { + printk("MUNICH_open: %s: bringing up board\n", board->devname); + + /* Clean up the board's static struct if messed: */ + + for (i = 0; i < 32; i++) + board->twins[i] = NULL; + for (i = 0; i < MAX_WORK; i++) + board->histogram[i] = 0; + + board->lineup = 0; + + /* Allocate CCB: */ + board->ccb = kmalloc(sizeof(munich_ccb_t), GFP_KERNEL); + if (board->ccb == NULL) + { + spin_unlock_irqrestore(&mister_lock, flags); + return -ENOMEM; + } + memset((void *)board->ccb, 0, sizeof(munich_ccb_t)); + board->ccb->csa = virt_to_phys(board->ccb); + ccb = board->ccb; + for (i = 0; i < 32; i++) + { + ccb->timeslot_spec[i].tti = 1; + ccb->timeslot_spec[i].rti = 1; + } + + /* Interrupt queues: */ + + board->tiq = kmalloc(MUNICH_INTQSIZE, GFP_KERNEL); + if (board->tiq == NULL) + { + spin_unlock_irqrestore(&mister_lock, flags); + return -ENOMEM; + } + memset((void *)board->tiq, 0, MUNICH_INTQSIZE); + + board->riq = kmalloc(MUNICH_INTQSIZE, GFP_KERNEL); + if (board->riq == NULL) + { + spin_unlock_irqrestore(&mister_lock, flags); + return -ENOMEM; + } + memset((void *)board->riq, 0, MUNICH_INTQSIZE); + + board->piq = kmalloc(MUNICH_PIQSIZE, GFP_KERNEL); + if (board->piq == NULL) + { + spin_unlock_irqrestore(&mister_lock, flags); + return -ENOMEM; + } + memset((void *)board->piq, 0, MUNICH_PIQSIZE); + + board->tiq_ptr = 0; + board->riq_ptr = 0; + board->piq_ptr = 0; + + /* Request irq: */ + + board->irq = 0; + + /* (char*) cast to avoid warning about discarding volatile: */ + if (request_irq(board->pci->irq, MUNICH_interrupt, 0, + (char *)board->devname, (void *)board)) + { + printk("MUNICH_open: %s: unable to obtain irq %d\n", board->devname, + board->pci->irq); + /* TOD: free other resources (a sok malloc feljebb) */ + spin_unlock_irqrestore(&mister_lock, flags); + return -EAGAIN; + } + board->irq = board->pci->irq; /* csak akkor legyen != 0, ha tenyleg le van foglalva nekunk */ + + /* Programming device: */ + + /* Reset the board like a power-on: */ + /* TOD: + - It is not a real power-on: if a DMA transaction fails with master abort, the board + stays in half-dead state. + - It doesn't reset the FALC line driver */ + + pci_write_config_dword(board->pci, MUNICH_PCI_PCIRES, 0xe0000); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + pci_write_config_dword(board->pci, MUNICH_PCI_PCIRES, 0); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + + writel(virt_to_phys(&ccb->csa), MUNICH_VIRT(CCBA)); + writel(virt_to_phys( board->tiq ), MUNICH_VIRT(TIQBA)); + writel(MUNICH_INTQLEN, MUNICH_VIRT(TIQL)); + writel(virt_to_phys( board->riq ), MUNICH_VIRT(RIQBA)); + writel(MUNICH_INTQLEN, MUNICH_VIRT(RIQL)); + writel(virt_to_phys( board->piq ), MUNICH_VIRT(PIQBA)); + writel(MUNICH_PIQLEN, MUNICH_VIRT(PIQL)); + + /* Put the magic values into the registers: */ + + writel(MODE1_MAGIC, MUNICH_VIRT(MODE1)); + writel(MODE2_MAGIC, MUNICH_VIRT(MODE2)); + + writel(LREG0_MAGIC, MUNICH_VIRT(LREG0)); + writel(LREG1_MAGIC, MUNICH_VIRT(LREG1)); + writel(LREG2_MAGIC, MUNICH_VIRT(LREG2)); + writel(LREG3_MAGIC, MUNICH_VIRT(LREG3)); + writel(LREG4_MAGIC, MUNICH_VIRT(LREG4)); + writel(LREG5_MAGIC, MUNICH_VIRT(LREG5)); + + writel(LCONF_MAGIC1, MUNICH_VIRT(LCONF)); /* reset the DMSM */ + writel(LCONF_MAGIC2, MUNICH_VIRT(LCONF)); /* enable the DMSM */ + + writel(~0, MUNICH_VIRT(TXPOLL)); + writel(board->isx21 ? 0x1400 : 0xa000, MUNICH_VIRT(GPDIR)); + + if (readl(MUNICH_VIRT(STAT))) writel(readl(MUNICH_VIRT(STAT)), MUNICH_VIRT(STAT)); + + ccb->action_spec = CCB_ACTIONSPEC_RES | CCB_ACTIONSPEC_IA; + writel(CMD_ARPCM, MUNICH_VIRT(CMD)); /* Start the PCM core reset */ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + + stat = 0; /* Wait for the action to complete max. 1 second */ + jiffs = jiffies; + while (!((stat = readl(MUNICH_VIRT(STAT))) & (STAT_PCMA | STAT_PCMF)) && time_before(jiffies, jiffs + HZ)) + { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + } + + if (stat & STAT_PCMF) + { + printk(KERN_ERR + "MUNICH_open: %s: Initial ARPCM failed. STAT=0x%08x\n", + board->devname, stat); + writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMF, MUNICH_VIRT(STAT)); + free_irq(board->irq, (void *)board); /* TOD: free other resources too *//* maybe shut down hw? */ + board->irq = 0; + spin_unlock_irqrestore(&mister_lock, flags); + return -EAGAIN; + } + else if (!(stat & STAT_PCMA)) + { + printk(KERN_ERR + "MUNICH_open: %s: Initial ARPCM timeout. STAT=0x%08x\n", + board->devname, stat); + free_irq(board->irq, (void *)board); /* TOD: free other resources too *//* maybe shut off the hw? */ + board->irq = 0; + spin_unlock_irqrestore(&mister_lock, flags); + return -EIO; + } + + writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMA, MUNICH_VIRT(STAT)); /* Acknowledge */ + + if (board->isx21) writel(0, MUNICH_VIRT(GPDATA)); + + printk("MUNICH_open: %s: succesful HW-open took %ld jiffies\n", + board->devname, jiffies - jiffs); + + /* Set up the FALC hanging on the Local Bus: */ + + if (!board->isx21) + { + writeb(0x0e, lbi + FMR1); + writeb(0, lbi + LIM0); + writeb(0xb0, lbi + LIM1); /* TODO: input threshold */ + writeb(0xf7, lbi + XPM0); + writeb(0x02, lbi + XPM1); + writeb(0x00, lbi + XPM2); + writeb(0xf0, lbi + FMR0); + writeb(0x80, lbi + PCD); + writeb(0x80, lbi + PCR); + writeb(0x00, lbi + LIM2); + writeb(0x07, lbi + XC0); + writeb(0x3d, lbi + XC1); + writeb(0x05, lbi + RC0); + writeb(0x00, lbi + RC1); + writeb(0x83, lbi + FMR2); + writeb(0x9f, lbi + XSW); + writeb(0x0f, lbi + XSP); + writeb(0x00, lbi + TSWM); + writeb(0xe0, lbi + MODE); + writeb(0xff, lbi + IDLE); /* Idle Code to send in unused timeslots */ + writeb(0x83, lbi + IPC); /* interrupt query line mode: Push/pull output, active high */ + writeb(0xbf, lbi + IMR3); /* send an interrupt every second */ + + slicecom_set_framing(hw->boardnum, board->framing); + slicecom_set_linecode(hw->boardnum, board->linecode); + slicecom_set_clock_source(hw->boardnum, board->clock_source); + slicecom_set_loopback(hw->boardnum, board->loopback); + + memset((void *)board->intervals, 0, sizeof(board->intervals)); + board->current_interval = 0; + board->elapsed_seconds = 0; + board->ses_seconds = 0; + board->is_unavailable = 0; + board->no_ses_seconds = 0; + board->deg_elapsed_seconds = 0; + board->deg_cumulated_errors = 0; + } + + /* Enable the interrupts last */ + /* These interrupts will be enabled. We do not need the others. */ + + writel(readl(MUNICH_VIRT(IMASK)) & ~(STAT_PTI | STAT_PRI | STAT_LBII | STAT_TSPA | STAT_RSPA), MUNICH_VIRT(IMASK)); + } + + spin_unlock_irqrestore(&mister_lock, flags); + + dev->irq = board->irq; /* hogy szep legyen az ifconfig outputja */ + ccb = board->ccb; /* TODO: ez igy csunya egy kicsit hogy benn is meg kinn is beletoltom :( */ + + spin_lock_irqsave(&mister_lock, flags); + + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + + /* Check if the selected timeslots aren't used already */ + + for (i = 0; i < 32; i++) + if (((1 << i) & timeslots) && !ccb->timeslot_spec[i].tti) + { + printk("MUNICH_open: %s: timeslot %d already used by %s\n", + dev->name, i, board->twins[ccb->timeslot_spec[i].txchannel]->name); + spin_unlock_irqrestore(&mister_lock, flags); + return -EBUSY; /* TODO: lehet hogy valami mas errno kellene? */ + } + + /* find a free channel: */ + /* TODO: ugly, rewrite it */ + + for (channel = 0; channel <= 32; channel++) + { + if (channel == 32) + { /* not found a free one */ + printk + ("MUNICH_open: %s: FATAL: can not find a free channel - this should not happen!\n", + dev->name); + spin_unlock_irqrestore(&mister_lock, flags); + return -ENODEV; + } + if (board->twins[channel] == NULL) + break; /* found the first free one */ + } + + board->lastcheck = jiffies; /* avoid checking uninitialized hardware channel */ + + /* Open the channel. If fails, calls MUNICH_close() to properly free resources and stop the HW */ + + hw->channel = channel; + board->twins[channel] = dev; + + board->use_count++; /* meg nem nyitottuk meg a csatornat, de a twins-ben + mar elfoglaltunk egyet, es ha a _close-t akarjuk hivni, akkor ez kell. */ + for (i = 0; i < 32; i++) + if ((1 << i) & timeslots) + { + ccb->timeslot_spec[i].tti = 0; + ccb->timeslot_spec[i].txchannel = channel; + ccb->timeslot_spec[i].txfillmask = ~0; + + ccb->timeslot_spec[i].rti = 0; + ccb->timeslot_spec[i].rxchannel = channel; + ccb->timeslot_spec[i].rxfillmask = ~0; + } + + if (!board->isx21) rework_idle_channels(dev); + + memset((void *)&(hw->tx_desc), 0, TX_DESC_MAX * sizeof(tx_desc_t)); + memset((void *)&(hw->rx_desc), 0, RX_DESC_MAX * sizeof(rx_desc_t)); + + for (i = 0; i < TX_DESC_MAX; i++) + { + hw->tx_desc[i].fe = 1; + hw->tx_desc[i].fnum = 2; + hw->tx_desc[i].data = virt_to_phys( & (hw->tx_data[i][0]) ); + hw->tx_desc[i].next = virt_to_phys( & (hw->tx_desc[ (i+1) % TX_DESC_MAX ]) ); + + } + hw->tx_desc_ptr = 0; /* we will send an initial packet so it is correct: "oda irtunk utoljara" */ + hw->busy = 0; + hw->tx_desc[hw->tx_desc_ptr].hold = 1; + hw->tx_desc[hw->tx_desc_ptr].no = 1; /* TOD: inkabb csak 0 hosszut kuldjunk ki az initkor? */ + + for (i = 0; i < RX_DESC_MAX; i++) + { + hw->rx_desc[i].no = RXBUFFER_SIZE; + hw->rx_desc[i].data = virt_to_phys(&(hw->rx_data[i][0])); + hw->rx_desc[i].next = virt_to_phys(&(hw->rx_desc[(i+1) % RX_DESC_MAX])); + hw->rx_desc[i].status = 0xFF; + } + hw->rx_desc_ptr = 0; + + hw->rx_desc[(hw->rx_desc_ptr + RX_DESC_MAX - 2) % RX_DESC_MAX].hold = 1; + + memset((void *)&ccb->channel_spec[channel], 0, sizeof(channel_spec_t)); + + ccb->channel_spec[channel].ti = 0; /* Transmit off */ + ccb->channel_spec[channel].to = 1; + ccb->channel_spec[channel].ta = 0; + + ccb->channel_spec[channel].th = 1; /* Transmit hold */ + + ccb->channel_spec[channel].ri = 0; /* Receive off */ + ccb->channel_spec[channel].ro = 1; + ccb->channel_spec[channel].ra = 0; + + ccb->channel_spec[channel].mode = 3; /* HDLC */ + + ccb->action_spec = CCB_ACTIONSPEC_IN | (channel << 8); + writel(CMD_ARPCM, MUNICH_VIRT(CMD)); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + + spin_unlock_irqrestore(&mister_lock, flags); + + stat = 0; + jiffs = jiffies; + while (!((stat = readl(MUNICH_VIRT(STAT))) & (STAT_PCMA | STAT_PCMF)) && time_before(jiffies, jiffs + HZ)) + { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + } + + if (stat & STAT_PCMF) + { + printk(KERN_ERR "MUNICH_open: %s: %s channel %d off failed\n", + dev->name, board->devname, channel); + writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMF, MUNICH_VIRT(STAT)); + MUNICH_close(dev); + return -EAGAIN; + } + else if (!(stat & STAT_PCMA)) + { + printk(KERN_ERR "MUNICH_open: %s: %s channel %d off timeout\n", + dev->name, board->devname, channel); + MUNICH_close(dev); + return -EIO; + } + + writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMA, MUNICH_VIRT(STAT)); + // printk("MUNICH_open: %s: succesful channel off took %ld jiffies\n", board->devname, jiffies-jiffs); + + spin_lock_irqsave(&mister_lock, flags); + + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + + ccb->channel_spec[channel].ifc = 1; /* 1 .. 'Idle/Flag change' interrupt letiltva */ + ccb->channel_spec[channel].fit = 1; + ccb->channel_spec[channel].nitbs = 1; + ccb->channel_spec[channel].itbs = 2; + + /* TODOO: lehet hogy jo lenne igy, de utana kellene nezni hogy nem okoz-e fragmentaciot */ + // ccb->channel_spec[channel].itbs = 2 * number_of_timeslots; + // printk("open: %s: number_of_timeslots: %d\n", dev->name, number_of_timeslots); + + ccb->channel_spec[channel].mode = 3; /* HDLC */ + ccb->channel_spec[channel].ftda = virt_to_phys(&(hw->tx_desc)); + ccb->channel_spec[channel].frda = virt_to_phys(&(hw->rx_desc[0])); + + ccb->channel_spec[channel].ti = 1; /* Transmit init */ + ccb->channel_spec[channel].to = 0; + ccb->channel_spec[channel].ta = 1; + + ccb->channel_spec[channel].th = 0; + + ccb->channel_spec[channel].ri = 1; /* Receive init */ + ccb->channel_spec[channel].ro = 0; + ccb->channel_spec[channel].ra = 1; + + ccb->action_spec = CCB_ACTIONSPEC_ICO | (channel << 8); + writel(CMD_ARPCM, MUNICH_VIRT(CMD)); /* Start the channel init */ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + + spin_unlock_irqrestore(&mister_lock, flags); + + stat = 0; /* Wait for the action to complete max. 1 second */ + jiffs = jiffies; + while (!((stat = readl(MUNICH_VIRT(STAT))) & (STAT_PCMA | STAT_PCMF)) && time_before(jiffies, jiffs + HZ)) + { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + } + + if (stat & STAT_PCMF) + { + printk(KERN_ERR "MUNICH_open: %s: channel open ARPCM failed\n", + board->devname); + writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMF, MUNICH_VIRT(STAT)); + MUNICH_close(dev); + return -EAGAIN; + } + else if (!(stat & STAT_PCMA)) + { + printk(KERN_ERR "MUNICH_open: %s: channel open ARPCM timeout\n", + board->devname); + MUNICH_close(dev); + return -EIO; + } + + writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMA, MUNICH_VIRT(STAT)); + // printk("MUNICH_open: %s: succesful channel open took %ld jiffies\n", board->devname, jiffies-jiffs); + + spin_lock_irqsave(&mister_lock, flags); + + ccb->channel_spec[channel].nitbs = 0; /* once ITBS defined, these must be 0 */ + ccb->channel_spec[channel].itbs = 0; + + if (board->isx21) + { + init_timer(&board->modemline_timer); + board->modemline_timer.data = (unsigned long)board; + board->modemline_timer.function = pcicom_modemline; + board->modemline_timer.expires = jiffies + HZ; + add_timer((struct timer_list *)&board->modemline_timer); + } + + /* It is done. Declare that we're open: */ + hw->busy = 0; /* It may be 1 if the frame at Tx init already ended, but it is not */ + /* a real problem: we compute hw->busy on every interrupt */ + hw->rafutott = 0; + ch->init_status |= HW_OPEN; + + /* Initialize line state: */ + if (board->lineup) + ch->line_status |= LINE_UP; + else + ch->line_status &= ~LINE_UP; + + /* Remove w attribute from /proc files associated to hw parameters: + no write when the device is open */ + + for (; procfile; procfile = procfile->next) + if (strcmp(procfile->name, FILENAME_BOARDNUM) == 0 || + strcmp(procfile->name, FILENAME_TIMESLOTS) == 0) + procfile->mode = S_IFREG | 0444; + + spin_unlock_irqrestore(&mister_lock, flags); + + return 0; +} + +/* + * Hardware close routine. + * Called by comx (upper) layer when the user wants to bring down the interface + * with ifconfig. + * We also call it from MUNICH_open, if the open fails. + * Brings down hardware, frees resources, stops receiver + * Returns 0 on OK, or standard error value on error. + */ + +static int MUNICH_close(struct net_device *dev) +{ + struct comx_channel *ch = netdev_priv(dev); + struct slicecom_privdata *hw = ch->HW_privdata; + struct proc_dir_entry *procfile = ch->procdir->subdir; + munich_board_t *board; + munich_ccb_t *ccb; + + u32 *bar1; + u32 timeslots = hw->timeslots; + int stat, i, channel = hw->channel; + unsigned long jiffs; + + board = hw->boardnum + (ch->hardware == &pcicomhw ? pcicom_boards : slicecom_boards); + + ccb = board->ccb; + bar1 = board->bar1; + + if (board->isx21) + del_timer((struct timer_list *)&board->modemline_timer); + + spin_lock_irqsave(&mister_lock, flags); + + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + + /* Disable receiver for the channel: */ + + for (i = 0; i < 32; i++) + if ((1 << i) & timeslots) + { + ccb->timeslot_spec[i].tti = 1; + ccb->timeslot_spec[i].txfillmask = 0; /* just to be double-sure :) */ + + ccb->timeslot_spec[i].rti = 1; + ccb->timeslot_spec[i].rxfillmask = 0; + } + + if (!board->isx21) rework_idle_channels(dev); + + ccb->channel_spec[channel].ti = 0; /* Receive off, Transmit off */ + ccb->channel_spec[channel].to = 1; + ccb->channel_spec[channel].ta = 0; + ccb->channel_spec[channel].th = 1; + + ccb->channel_spec[channel].ri = 0; + ccb->channel_spec[channel].ro = 1; + ccb->channel_spec[channel].ra = 0; + + board->twins[channel] = NULL; + + ccb->action_spec = CCB_ACTIONSPEC_IN | (channel << 8); + writel(CMD_ARPCM, MUNICH_VIRT(CMD)); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + + spin_unlock_irqrestore(&mister_lock, flags); + + stat = 0; + jiffs = jiffies; + while (!((stat = readl(MUNICH_VIRT(STAT))) & (STAT_PCMA | STAT_PCMF)) && time_before(jiffies, jiffs + HZ)) + { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + } + + if (stat & STAT_PCMF) + { + printk(KERN_ERR + "MUNICH_close: %s: FATAL: channel off ARPCM failed, not closing!\n", + dev->name); + writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMF, MUNICH_VIRT(STAT)); + /* If we return success, the privdata (and the descriptor list) will be freed */ + return -EIO; + } + else if (!(stat & STAT_PCMA)) + printk(KERN_ERR "MUNICH_close: %s: channel off ARPCM timeout\n", + board->devname); + + writel(readl(MUNICH_VIRT(STAT)) & STAT_PCMA, MUNICH_VIRT(STAT)); + // printk("MUNICH_close: %s: channel off took %ld jiffies\n", board->devname, jiffies-jiffs); + + spin_lock_irqsave(&mister_lock, flags); + + if (board->use_count) board->use_count--; + + if (!board->use_count) /* we were the last user of the board */ + { + printk("MUNICH_close: bringing down board %s\n", board->devname); + + /* program down the board: */ + + writel(0x0000FF7F, MUNICH_VIRT(IMASK)); /* do not send any interrupts */ + writel(0, MUNICH_VIRT(CMD)); /* stop the timer if someone started it */ + writel(~0U, MUNICH_VIRT(STAT)); /* if an interrupt came between the cli()-sti(), quiet it */ + if (ch->hardware == &pcicomhw) + writel(0x1400, MUNICH_VIRT(GPDATA)); + + /* Put the board into 'reset' state: */ + pci_write_config_dword(board->pci, MUNICH_PCI_PCIRES, 0xe0000); + + /* Free irq and other resources: */ + if (board->irq) + free_irq(board->irq, (void *)board); /* Ha nem inicializalta magat, akkor meg nincs irq */ + board->irq = 0; + + /* Free CCB and the interrupt queues */ + if (board->ccb) kfree((void *)board->ccb); + if (board->tiq) kfree((void *)board->tiq); + if (board->riq) kfree((void *)board->riq); + if (board->piq) kfree((void *)board->piq); + board->ccb = NULL; + board->tiq = board->riq = board->piq = NULL; + } + + /* Enable setting of hw parameters */ + for (; procfile; procfile = procfile->next) + if (strcmp(procfile->name, FILENAME_BOARDNUM) == 0 || + strcmp(procfile->name, FILENAME_TIMESLOTS) == 0) + procfile->mode = S_IFREG | 0644; + + /* We're not open anymore */ + ch->init_status &= ~HW_OPEN; + + spin_unlock_irqrestore(&mister_lock, flags); + + return 0; +} + +/* + * Give (textual) status information. + * The text it returns will be a part of what appears when the user does a + * cat /proc/comx/comx[n]/status + * Don't write more than PAGESIZE. + * Return value: number of bytes written (length of the string, incl. 0) + */ + +static int MUNICH_minden(struct net_device *dev, char *page) +{ + struct comx_channel *ch = netdev_priv(dev); + struct slicecom_privdata *hw = ch->HW_privdata; + munich_board_t *board; + struct net_device *devp; + + u8 *lbi; + e1_stats_t *curr_int, *prev_int; + e1_stats_t last4, last96; /* sum of last 4, resp. last 96 intervals */ + unsigned *sump, /* running pointer for the sum data */ + *p; /* running pointer for the interval data */ + + int len = 0; + u8 frs0, frs1; + u8 fmr2; + int i, j; + u32 timeslots; + + board = hw->boardnum + (ch->hardware == &pcicomhw ? pcicom_boards : slicecom_boards); + + lbi = board->lbi; + curr_int = &board->intervals[board->current_interval]; + prev_int = + &board-> + intervals[(board->current_interval + SLICECOM_BOARD_INTERVALS_SIZE - + 1) % SLICECOM_BOARD_INTERVALS_SIZE]; + + if (!board->isx21) + { + frs0 = readb(lbi + FRS0); + fmr2 = readb(lbi + FMR2); + len += scnprintf(page + len, PAGE_SIZE - len, "Controller status:\n"); + if (frs0 == 0) + len += scnprintf(page + len, PAGE_SIZE - len, "\tNo alarms\n"); + else + { + if (frs0 & FRS0_LOS) + len += scnprintf(page + len, PAGE_SIZE - len, "\tLoss Of Signal\n"); + else + { + if (frs0 & FRS0_AIS) + len += scnprintf(page + len, PAGE_SIZE - len, + "\tAlarm Indication Signal\n"); + else + { + if (frs0 & FRS0_AUXP) + len += scnprintf(page + len, PAGE_SIZE - len, + "\tAuxiliary Pattern Indication\n"); + if (frs0 & FRS0_LFA) + len += scnprintf(page + len, PAGE_SIZE - len, + "\tLoss of Frame Alignment\n"); + else + { + if (frs0 & FRS0_RRA) + len += scnprintf(page + len, PAGE_SIZE - len, + "\tReceive Remote Alarm\n"); + + /* You can't set this framing with the /proc interface, but it */ + /* may be good to have here this alarm if you set it by hand: */ + + if ((board->framing == SLICECOM_FRAMING_CRC4) && + (frs0 & FRS0_LMFA)) + len += scnprintf(page + len, PAGE_SIZE - len, + "\tLoss of CRC4 Multiframe Alignment\n"); + + if (((fmr2 & 0xc0) == 0xc0) && (frs0 & FRS0_NMF)) + len += scnprintf(page + len, PAGE_SIZE - len, + "\tNo CRC4 Multiframe alignment Found after 400 msec\n"); + } + } + } + } + + frs1 = readb(lbi + FRS1); + if (FRS1_XLS & frs1) + len += scnprintf(page + len, PAGE_SIZE - len, + "\tTransmit Line Short\n"); + + /* debug Rx ring: DEL: - vagy meghagyni, de akkor legyen kicsit altalanosabb */ + } + + len += scnprintf(page + len, PAGE_SIZE - len, "Rx ring:\n"); + len += scnprintf(page + len, PAGE_SIZE - len, "\trafutott: %d\n", hw->rafutott); + len += scnprintf(page + len, PAGE_SIZE - len, + "\tlastcheck: %ld, jiffies: %ld\n", board->lastcheck, jiffies); + len += scnprintf(page + len, PAGE_SIZE - len, "\tbase: %08x\n", + (u32) virt_to_phys(&hw->rx_desc[0])); + len += scnprintf(page + len, PAGE_SIZE - len, "\trx_desc_ptr: %d\n", + hw->rx_desc_ptr); + len += scnprintf(page + len, PAGE_SIZE - len, "\trx_desc_ptr: %08x\n", + (u32) virt_to_phys(&hw->rx_desc[hw->rx_desc_ptr])); + len += scnprintf(page + len, PAGE_SIZE - len, "\thw_curr_ptr: %08x\n", + board->ccb->current_rx_desc[hw->channel]); + + for (i = 0; i < RX_DESC_MAX; i++) + len += scnprintf(page + len, PAGE_SIZE - len, "\t%08x %08x %08x %08x\n", + *((u32 *) & hw->rx_desc[i] + 0), + *((u32 *) & hw->rx_desc[i] + 1), + *((u32 *) & hw->rx_desc[i] + 2), + *((u32 *) & hw->rx_desc[i] + 3)); + + if (!board->isx21) + { + len += scnprintf(page + len, PAGE_SIZE - len, + "Interfaces using this board: (channel-group, interface, timeslots)\n"); + for (i = 0; i < 32; i++) + { + devp = board->twins[i]; + if (devp != NULL) + { + timeslots = + ((struct slicecom_privdata *)((struct comx_channel *)devp-> + priv)->HW_privdata)-> + timeslots; + len += scnprintf(page + len, PAGE_SIZE - len, "\t%2d %s: ", i, + devp->name); + for (j = 0; j < 32; j++) + if ((1 << j) & timeslots) + len += scnprintf(page + len, PAGE_SIZE - len, "%d ", j); + len += scnprintf(page + len, PAGE_SIZE - len, "\n"); + } + } + } + + len += scnprintf(page + len, PAGE_SIZE - len, "Interrupt work histogram:\n"); + for (i = 0; i < MAX_WORK; i++) + len += scnprintf(page + len, PAGE_SIZE - len, "hist[%2d]: %8u%c", i, + board->histogram[i], (i && + ((i + 1) % 4 == 0 || + i == MAX_WORK - 1)) ? '\n' : ' '); + + len += scnprintf(page + len, PAGE_SIZE - len, "Tx ring histogram:\n"); + for (i = 0; i < TX_DESC_MAX; i++) + len += scnprintf(page + len, PAGE_SIZE - len, "hist[%2d]: %8u%c", i, + hw->tx_ring_hist[i], (i && + ((i + 1) % 4 == 0 || + i == + TX_DESC_MAX - 1)) ? '\n' : ' '); + + if (!board->isx21) + { + + memset((void *)&last4, 0, sizeof(last4)); + memset((void *)&last96, 0, sizeof(last96)); + + /* Calculate the sum of last 4 intervals: */ + + for (i = 1; i <= 4; i++) + { + p = (unsigned *)&board->intervals[(board->current_interval + + SLICECOM_BOARD_INTERVALS_SIZE - + i) % SLICECOM_BOARD_INTERVALS_SIZE]; + sump = (unsigned *)&last4; + for (j = 0; j < (sizeof(e1_stats_t) / sizeof(unsigned)); j++) + sump[j] += p[j]; + } + + /* Calculate the sum of last 96 intervals: */ + + for (i = 1; i <= 96; i++) + { + p = (unsigned *)&board->intervals[(board->current_interval + + SLICECOM_BOARD_INTERVALS_SIZE - + i) % SLICECOM_BOARD_INTERVALS_SIZE]; + sump = (unsigned *)&last96; + for (j = 0; j < (sizeof(e1_stats_t) / sizeof(unsigned)); j++) + sump[j] += p[j]; + } + + len += scnprintf(page + len, PAGE_SIZE - len, + "Data in current interval (%d seconds elapsed):\n", + board->elapsed_seconds); + len += scnprintf(page + len, PAGE_SIZE - len, + " %d Line Code Violations, %d Path Code Violations, %d E-Bit Errors\n", + curr_int->line_code_violations, + curr_int->path_code_violations, curr_int->e_bit_errors); + len += scnprintf(page + len, PAGE_SIZE - len, + " %d Slip Secs, %d Fr Loss Secs, %d Line Err Secs, %d Degraded Mins\n", + curr_int->slip_secs, curr_int->fr_loss_secs, + curr_int->line_err_secs, curr_int->degraded_mins); + len += scnprintf(page + len, PAGE_SIZE - len, + " %d Errored Secs, %d Bursty Err Secs, %d Severely Err Secs, %d Unavail Secs\n", + curr_int->errored_secs, curr_int->bursty_err_secs, + curr_int->severely_err_secs, curr_int->unavail_secs); + + len += scnprintf(page + len, PAGE_SIZE - len, + "Data in Interval 1 (15 minutes):\n"); + len += scnprintf(page + len, PAGE_SIZE - len, + " %d Line Code Violations, %d Path Code Violations, %d E-Bit Errors\n", + prev_int->line_code_violations, + prev_int->path_code_violations, prev_int->e_bit_errors); + len += scnprintf(page + len, PAGE_SIZE - len, + " %d Slip Secs, %d Fr Loss Secs, %d Line Err Secs, %d Degraded Mins\n", + prev_int->slip_secs, prev_int->fr_loss_secs, + prev_int->line_err_secs, prev_int->degraded_mins); + len += scnprintf(page + len, PAGE_SIZE - len, + " %d Errored Secs, %d Bursty Err Secs, %d Severely Err Secs, %d Unavail Secs\n", + prev_int->errored_secs, prev_int->bursty_err_secs, + prev_int->severely_err_secs, prev_int->unavail_secs); + + len += scnprintf(page + len, PAGE_SIZE - len, + "Data in last 4 intervals (1 hour):\n"); + len += scnprintf(page + len, PAGE_SIZE - len, + " %d Line Code Violations, %d Path Code Violations, %d E-Bit Errors\n", + last4.line_code_violations, last4.path_code_violations, + last4.e_bit_errors); + len += scnprintf(page + len, PAGE_SIZE - len, + " %d Slip Secs, %d Fr Loss Secs, %d Line Err Secs, %d Degraded Mins\n", + last4.slip_secs, last4.fr_loss_secs, last4.line_err_secs, + last4.degraded_mins); + len += scnprintf(page + len, PAGE_SIZE - len, + " %d Errored Secs, %d Bursty Err Secs, %d Severely Err Secs, %d Unavail Secs\n", + last4.errored_secs, last4.bursty_err_secs, + last4.severely_err_secs, last4.unavail_secs); + + len += scnprintf(page + len, PAGE_SIZE - len, + "Data in last 96 intervals (24 hours):\n"); + len += scnprintf(page + len, PAGE_SIZE - len, + " %d Line Code Violations, %d Path Code Violations, %d E-Bit Errors\n", + last96.line_code_violations, last96.path_code_violations, + last96.e_bit_errors); + len += scnprintf(page + len, PAGE_SIZE - len, + " %d Slip Secs, %d Fr Loss Secs, %d Line Err Secs, %d Degraded Mins\n", + last96.slip_secs, last96.fr_loss_secs, + last96.line_err_secs, last96.degraded_mins); + len += scnprintf(page + len, PAGE_SIZE - len, + " %d Errored Secs, %d Bursty Err Secs, %d Severely Err Secs, %d Unavail Secs\n", + last96.errored_secs, last96.bursty_err_secs, + last96.severely_err_secs, last96.unavail_secs); + + } + +// len +=scnprintf( page + len, PAGE_SIZE - len, "Special events:\n" ); +// len +=scnprintf( page + len, PAGE_SIZE - len, "\tstat_pri/missed: %u / %u\n", board->stat_pri_races, board->stat_pri_races_missed ); +// len +=scnprintf( page + len, PAGE_SIZE - len, "\tstat_pti/missed: %u / %u\n", board->stat_pti_races, board->stat_pti_races_missed ); + return len; +} + +/* + * Memory dump function. Not used currently. + */ +static int BOARD_dump(struct net_device *dev) +{ + printk + ("BOARD_dump() requested. It is unimplemented, it should not be called\n"); + return (-1); +} + +/* + * /proc file read function for the files registered by this module. + * This function is called by the procfs implementation when a user + * wants to read from a file registered by this module. + * page is the workspace, start should point to the real start of data, + * off is the file offset, data points to the file's proc_dir_entry + * structure. + * Returns the number of bytes copied to the request buffer. + */ + +static int munich_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct proc_dir_entry *file = (struct proc_dir_entry *)data; + struct net_device *dev = file->parent->data; + struct comx_channel *ch = netdev_priv(dev); + struct slicecom_privdata *hw = ch->HW_privdata; + munich_board_t *board; + + int len = 0, i; + u32 timeslots = hw->timeslots; + + board = hw->boardnum + (ch->hardware == &pcicomhw ? pcicom_boards : slicecom_boards); + + if (!strcmp(file->name, FILENAME_BOARDNUM)) + len = sprintf(page, "%d\n", hw->boardnum); + else if (!strcmp(file->name, FILENAME_TIMESLOTS)) + { + for (i = 0; i < 32; i++) + if ((1 << i) & timeslots) + len += scnprintf(page + len, PAGE_SIZE - len, "%d ", i); + len += scnprintf(page + len, PAGE_SIZE - len, "\n"); + } + else if (!strcmp(file->name, FILENAME_FRAMING)) + { + i = 0; + while (slicecom_framings[i].value && + slicecom_framings[i].value != board->framing) + i++; + len += scnprintf(page + len, PAGE_SIZE - len, "%s\n", + slicecom_framings[i].name); + } + else if (!strcmp(file->name, FILENAME_LINECODE)) + { + i = 0; + while (slicecom_linecodes[i].value && + slicecom_linecodes[i].value != board->linecode) + i++; + len += scnprintf(page + len, PAGE_SIZE - len, "%s\n", + slicecom_linecodes[i].name); + } + else if (!strcmp(file->name, FILENAME_CLOCK_SOURCE)) + { + i = 0; + while (slicecom_clock_sources[i].value && + slicecom_clock_sources[i].value != board->clock_source) + i++; + len += + scnprintf(page + len, PAGE_SIZE - len, "%s\n", + slicecom_clock_sources[i].name); + } + else if (!strcmp(file->name, FILENAME_LOOPBACK)) + { + i = 0; + while (slicecom_loopbacks[i].value && + slicecom_loopbacks[i].value != board->loopback) + i++; + len += scnprintf(page + len, PAGE_SIZE - len, "%s\n", + slicecom_loopbacks[i].name); + } + /* We set permissions to write-only for REG and LBIREG, but root can read them anyway: */ + else if (!strcmp(file->name, FILENAME_REG)) + { + len += scnprintf(page + len, PAGE_SIZE - len, + "%s: " FILENAME_REG ": write-only file\n", dev->name); + } + else if (!strcmp(file->name, FILENAME_LBIREG)) + { + len += scnprintf(page + len, PAGE_SIZE - len, + "%s: " FILENAME_LBIREG ": write-only file\n", dev->name); + } + else + { + printk("slicecom_read_proc: internal error, filename %s\n", file->name); + return -EBADF; + } + /* file handling administration: count eof status, offset, start address + and count: */ + + if (off >= len) + { + *eof = 1; + return 0; + } + + *start = page + off; + if (count >= len - off) + *eof = 1; + return min((off_t) count, (off_t) len - off); +} + +/* + * Write function for /proc files registered by us. + * See the comment on read function above. + * Beware! buffer is in userspace!!! + * Returns the number of bytes written + */ + +static int munich_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + struct proc_dir_entry *entry = (struct proc_dir_entry *)data; + struct net_device *dev = (struct net_device *)entry->parent->data; + struct comx_channel *ch = netdev_priv(dev); + struct slicecom_privdata *hw = ch->HW_privdata; + munich_board_t *board; + + unsigned long ts, tmp_boardnum; + + u32 tmp_timeslots = 0; + char *page, *p; + int i; + + board = hw->boardnum + (ch->hardware == &pcicomhw ? pcicom_boards : slicecom_boards); + + /* Paranoia checking: */ + + if (PDE(file->f_dentry->d_inode) != entry) + { + printk(KERN_ERR "munich_write_proc: file <-> data internal error\n"); + return -EIO; + } + + /* Request tmp buffer */ + if (!(page = (char *)__get_free_page(GFP_KERNEL))) + return -ENOMEM; + + /* Copy user data and cut trailing \n */ + if (copy_from_user(page, buffer, count = min(count, PAGE_SIZE))) { + free_page((unsigned long)page); + return -EFAULT; + } + if (*(page + count - 1) == '\n') + *(page + count - 1) = 0; + *(page + PAGE_SIZE - 1) = 0; + + if (!strcmp(entry->name, FILENAME_BOARDNUM)) + { + tmp_boardnum = simple_strtoul(page, NULL, 0); + if (0 <= tmp_boardnum && tmp_boardnum < MAX_BOARDS) + hw->boardnum = tmp_boardnum; + else + { + printk("%s: " FILENAME_BOARDNUM " range is 0...%d\n", dev->name, + MAX_BOARDS - 1); + free_page((unsigned long)page); + return -EINVAL; + } + } + else if (!strcmp(entry->name, FILENAME_TIMESLOTS)) + { + p = page; + while (*p) + { + if (isspace(*p)) + p++; + else + { + ts = simple_strtoul(p, &p, 10); /* base = 10: Don't read 09 as an octal number */ + /* ts = 0 ha nem tudta beolvasni a stringet, erre egy kicsit epitek itt: */ + if (0 <= ts && ts < 32) + { + tmp_timeslots |= (1 << ts); + } + else + { + printk("%s: " FILENAME_TIMESLOTS " range is 1...31\n", + dev->name); + free_page((unsigned long)page); + return -EINVAL; + } + } + } + hw->timeslots = tmp_timeslots; + } + else if (!strcmp(entry->name, FILENAME_FRAMING)) + { + i = 0; + while (slicecom_framings[i].value && + strncmp(slicecom_framings[i].name, page, + strlen(slicecom_framings[i].name))) + i++; + if (!slicecom_framings[i].value) + { + printk("slicecom: %s: Invalid " FILENAME_FRAMING " '%s'\n", + dev->name, page); + free_page((unsigned long)page); + return -EINVAL; + } + else + { /* + * If somebody says: + * echo >boardnum 0 + * echo >framing no-crc4 + * echo >boardnum 1 + * - when the framing was set, hw->boardnum was 0, so it would set the framing for board 0 + * Workaround: allow to set it only if interface is administrative UP + */ + if (netif_running(dev)) + slicecom_set_framing(hw->boardnum, slicecom_framings[i].value); + else + { + printk("%s: " FILENAME_FRAMING + " can not be set while the interface is DOWN\n", + dev->name); + free_page((unsigned long)page); + return -EINVAL; + } + } + } + else if (!strcmp(entry->name, FILENAME_LINECODE)) + { + i = 0; + while (slicecom_linecodes[i].value && + strncmp(slicecom_linecodes[i].name, page, + strlen(slicecom_linecodes[i].name))) + i++; + if (!slicecom_linecodes[i].value) + { + printk("slicecom: %s: Invalid " FILENAME_LINECODE " '%s'\n", + dev->name, page); + free_page((unsigned long)page); + return -EINVAL; + } + else + { /* + * Allow to set it only if interface is administrative UP, + * for the same reason as FILENAME_FRAMING + */ + if (netif_running(dev)) + slicecom_set_linecode(hw->boardnum, + slicecom_linecodes[i].value); + else + { + printk("%s: " FILENAME_LINECODE + " can not be set while the interface is DOWN\n", + dev->name); + free_page((unsigned long)page); + return -EINVAL; + } + } + } + else if (!strcmp(entry->name, FILENAME_CLOCK_SOURCE)) + { + i = 0; + while (slicecom_clock_sources[i].value && + strncmp(slicecom_clock_sources[i].name, page, + strlen(slicecom_clock_sources[i].name))) + i++; + if (!slicecom_clock_sources[i].value) + { + printk("%s: Invalid " FILENAME_CLOCK_SOURCE " '%s'\n", dev->name, + page); + free_page((unsigned long)page); + return -EINVAL; + } + else + { /* + * Allow to set it only if interface is administrative UP, + * for the same reason as FILENAME_FRAMING + */ + if (netif_running(dev)) + slicecom_set_clock_source(hw->boardnum, + slicecom_clock_sources[i].value); + else + { + printk("%s: " FILENAME_CLOCK_SOURCE + " can not be set while the interface is DOWN\n", + dev->name); + free_page((unsigned long)page); + return -EINVAL; + } + } + } + else if (!strcmp(entry->name, FILENAME_LOOPBACK)) + { + i = 0; + while (slicecom_loopbacks[i].value && + strncmp(slicecom_loopbacks[i].name, page, + strlen(slicecom_loopbacks[i].name))) + i++; + if (!slicecom_loopbacks[i].value) + { + printk("%s: Invalid " FILENAME_LOOPBACK " '%s'\n", dev->name, page); + free_page((unsigned long)page); + return -EINVAL; + } + else + { /* + * Allow to set it only if interface is administrative UP, + * for the same reason as FILENAME_FRAMING + */ + if (netif_running(dev)) + slicecom_set_loopback(hw->boardnum, + slicecom_loopbacks[i].value); + else + { + printk("%s: " FILENAME_LOOPBACK + " can not be set while the interface is DOWN\n", + dev->name); + free_page((unsigned long)page); + return -EINVAL; + } + } + } + else if (!strcmp(entry->name, FILENAME_REG)) + { /* DEL: 'reg' csak tmp */ + char *p; + u32 *bar1 = board->bar1; + + reg = simple_strtoul(page, &p, 0); + reg_ertek = simple_strtoul(p + 1, NULL, 0); + + if (reg < 0x100) + { + printk("reg(0x%02x) := 0x%08x jiff: %lu\n", reg, reg_ertek, jiffies); + writel(reg_ertek, MUNICH_VIRT(reg >> 2)); + } + else + { + printk("reg(0x%02x) is 0x%08x jiff: %lu\n", reg - 0x100, + readl(MUNICH_VIRT((reg - 0x100) >> 2)), jiffies); + } + } + else if (!strcmp(entry->name, FILENAME_LBIREG)) + { /* DEL: 'lbireg' csak tmp */ + char *p; + u8 *lbi = board->lbi; + + lbireg = simple_strtoul(page, &p, 0); + lbireg_ertek = simple_strtoul(p + 1, NULL, 0); + + if (lbireg < 0x100) + { + printk("lbireg(0x%02x) := 0x%02x jiff: %lu\n", lbireg, + lbireg_ertek, jiffies); + writeb(lbireg_ertek, lbi + lbireg); + } + else + printk("lbireg(0x%02x) is 0x%02x jiff: %lu\n", lbireg - 0x100, + readb(lbi + lbireg - 0x100), jiffies); + } + else + { + printk(KERN_ERR "munich_write_proc: internal error, filename %s\n", + entry->name); + free_page((unsigned long)page); + return -EBADF; + } + + /* Don't forget to free the workspace */ + free_page((unsigned long)page); + return count; +} + +/* + * Boardtype init function. + * Called by the comx (upper) layer, when you set boardtype. + * Allocates resources associated to using munich board for this device, + * initializes ch_struct pointers etc. + * Returns 0 on success and standard error codes on error. + */ + +static int init_escape(struct comx_channel *ch) +{ + kfree(ch->HW_privdata); + return -EIO; +} + +static int BOARD_init(struct net_device *dev) +{ + struct comx_channel *ch = netdev_priv(dev); + struct slicecom_privdata *hw; + struct proc_dir_entry *new_file; + + /* Alloc data for private structure */ + if ((ch->HW_privdata = + kmalloc(sizeof(struct slicecom_privdata), GFP_KERNEL)) == NULL) + return -ENOMEM; + + memset(hw = ch->HW_privdata, 0, sizeof(struct slicecom_privdata)); + + /* Register /proc files */ + if ((new_file = create_proc_entry(FILENAME_BOARDNUM, S_IFREG | 0644, + ch->procdir)) == NULL) + return init_escape(ch); + new_file->data = (void *)new_file; + new_file->read_proc = &munich_read_proc; + new_file->write_proc = &munich_write_proc; +// new_file->proc_iops = &comx_normal_inode_ops; + new_file->nlink = 1; + + if (ch->hardware == &slicecomhw) + { + if ((new_file = create_proc_entry(FILENAME_TIMESLOTS, S_IFREG | 0644, + ch->procdir)) == NULL) + return init_escape(ch); + new_file->data = (void *)new_file; + new_file->read_proc = &munich_read_proc; + new_file->write_proc = &munich_write_proc; +// new_file->proc_iops = &comx_normal_inode_ops; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_FRAMING, S_IFREG | 0644, + ch->procdir)) == NULL) + return init_escape(ch); + new_file->data = (void *)new_file; + new_file->read_proc = &munich_read_proc; + new_file->write_proc = &munich_write_proc; +// new_file->proc_iops = &comx_normal_inode_ops; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_LINECODE, S_IFREG | 0644, + ch->procdir)) == NULL) + return init_escape(ch); + new_file->data = (void *)new_file; + new_file->read_proc = &munich_read_proc; + new_file->write_proc = &munich_write_proc; +// new_file->proc_iops = &comx_normal_inode_ops; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_CLOCK_SOURCE, S_IFREG | 0644, + ch->procdir)) == NULL) + return init_escape(ch); + new_file->data = (void *)new_file; + new_file->read_proc = &munich_read_proc; + new_file->write_proc = &munich_write_proc; +// new_file->proc_iops = &comx_normal_inode_ops; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_LOOPBACK, S_IFREG | 0644, + ch->procdir)) == NULL) + return init_escape(ch); + new_file->data = (void *)new_file; + new_file->read_proc = &munich_read_proc; + new_file->write_proc = &munich_write_proc; +// new_file->proc_iops = &comx_normal_inode_ops; + new_file->nlink = 1; + } + + /* DEL: ez itt csak fejlesztesi celokra!! */ + if ((new_file = create_proc_entry(FILENAME_REG, S_IFREG | 0200, ch->procdir)) == NULL) + return init_escape(ch); + new_file->data = (void *)new_file; + new_file->read_proc = &munich_read_proc; + new_file->write_proc = &munich_write_proc; +// new_file->proc_iops = &comx_normal_inode_ops; + new_file->nlink = 1; + + /* DEL: ez itt csak fejlesztesi celokra!! */ + if ((new_file = create_proc_entry(FILENAME_LBIREG, S_IFREG | 0200, + ch->procdir)) == NULL) + return init_escape(ch); + new_file->data = (void *)new_file; + new_file->read_proc = &munich_read_proc; + new_file->write_proc = &munich_write_proc; +// new_file->proc_iops = &comx_normal_inode_ops; + new_file->nlink = 1; + + /* Fill in ch_struct hw specific pointers: */ + + ch->HW_txe = MUNICH_txe; + ch->HW_open = MUNICH_open; + ch->HW_close = MUNICH_close; + ch->HW_send_packet = MUNICH_send_packet; +#ifndef COMX_NEW + ch->HW_minden = MUNICH_minden; +#else + ch->HW_statistics = MUNICH_minden; +#endif + + hw->boardnum = SLICECOM_BOARDNUM_DEFAULT; + hw->timeslots = ch->hardware == &pcicomhw ? 0xffffffff : 2; + + /* O.K. Count one more user on this module */ + MOD_INC_USE_COUNT; + return 0; +} + +/* + * Boardtype exit function. + * Called by the comx (upper) layer, when you clear boardtype from munich. + * Frees resources associated to using munich board for this device, + * resets ch_struct pointers etc. + */ +static int BOARD_exit(struct net_device *dev) +{ + struct comx_channel *ch = netdev_priv(dev); + + /* Free private data area */ +// board = hw->boardnum + (ch->hardware == &pcicomhw ? pcicom_boards : slicecom_boards); + + kfree(ch->HW_privdata); + /* Remove /proc files */ + remove_proc_entry(FILENAME_BOARDNUM, ch->procdir); + if (ch->hardware == &slicecomhw) + { + remove_proc_entry(FILENAME_TIMESLOTS, ch->procdir); + remove_proc_entry(FILENAME_FRAMING, ch->procdir); + remove_proc_entry(FILENAME_LINECODE, ch->procdir); + remove_proc_entry(FILENAME_CLOCK_SOURCE, ch->procdir); + remove_proc_entry(FILENAME_LOOPBACK, ch->procdir); + } + remove_proc_entry(FILENAME_REG, ch->procdir); + remove_proc_entry(FILENAME_LBIREG, ch->procdir); + + /* Minus one user for the module accounting */ + MOD_DEC_USE_COUNT; + return 0; +} + +static struct comx_hardware slicecomhw = +{ + "slicecom", +#ifdef COMX_NEW + VERSION, +#endif + BOARD_init, + BOARD_exit, + BOARD_dump, + NULL +}; + +static struct comx_hardware pcicomhw = +{ + "pcicom", +#ifdef COMX_NEW + VERSION, +#endif + BOARD_init, + BOARD_exit, + BOARD_dump, + NULL +}; + +/* Module management */ + +static int __init init_mister(void) +{ + printk(VERSIONSTR); + comx_register_hardware(&slicecomhw); + comx_register_hardware(&pcicomhw); + return munich_probe(); +} + +static void __exit cleanup_mister(void) +{ + int i; + + comx_unregister_hardware("slicecom"); + comx_unregister_hardware("pcicom"); + + for (i = 0; i < MAX_BOARDS; i++) + { + if (slicecom_boards[i].bar1) + iounmap((void *)slicecom_boards[i].bar1); + if (slicecom_boards[i].lbi) + iounmap((void *)slicecom_boards[i].lbi); + if (pcicom_boards[i].bar1) + iounmap((void *)pcicom_boards[i].bar1); + if (pcicom_boards[i].lbi) + iounmap((void *)pcicom_boards[i].lbi); + } +} + +module_init(init_mister); +module_exit(cleanup_mister); diff --git a/drivers/net/wan/comx-proto-fr.c b/drivers/net/wan/comx-proto-fr.c new file mode 100644 index 000000000..c9551366b --- /dev/null +++ b/drivers/net/wan/comx-proto-fr.c @@ -0,0 +1,1014 @@ +/* + * Frame-relay protocol module for the COMX driver + * for Linux 2.2.X + * + * Original author: Tivadar Szemethy + * Maintainer: Gergely Madarasz + * + * Copyright (C) 1998-1999 ITConsult-Pro Co. + * + * Contributors: + * Arnaldo Carvalho de Melo (0.73) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Version 0.70 (99/06/14): + * - cleaned up the source code a bit + * - ported back to kernel, now works as builtin code + * + * Version 0.71 (99/06/25): + * - use skb priorities and queues for sending keepalive + * - use device queues for slave->master data transmit + * - set IFF_RUNNING only line protocol up + * - fixes on slave device flags + * + * Version 0.72 (99/07/09): + * - handle slave tbusy with master tbusy (should be fixed) + * - fix the keepalive timer addition/deletion + * + * Version 0.73 (00/08/15) + * - resource release on failure at fr_master_init and + * fr_slave_init + */ + +#define VERSION "0.73" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "comx.h" +#include "comxhw.h" + +MODULE_AUTHOR("Author: Tivadar Szemethy "); +MODULE_DESCRIPTION("Frame Relay protocol implementation for the COMX drivers" + "for Linux kernel 2.4.X"); +MODULE_LICENSE("GPL"); + +#define FRAD_UI 0x03 +#define NLPID_IP 0xcc +#define NLPID_Q933_LMI 0x08 +#define NLPID_CISCO_LMI 0x09 +#define Q933_ENQ 0x75 +#define Q933_LINESTAT 0x51 +#define Q933_COUNTERS 0x53 + +#define MAXALIVECNT 3 /* No. of failures */ + +struct fr_data { + u16 dlci; + struct net_device *master; + char keepa_pend; + char keepa_freq; + char keepalivecnt, keeploopcnt; + struct timer_list keepa_timer; + u8 local_cnt, remote_cnt; +}; + +static struct comx_protocol fr_master_protocol; +static struct comx_protocol fr_slave_protocol; +static struct comx_hardware fr_dlci; + +static void fr_keepalive_send(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct fr_data *fr = ch->LINE_privdata; + struct sk_buff *skb; + u8 *fr_packet; + + skb=alloc_skb(dev->hard_header_len + 13, GFP_ATOMIC); + + if(skb==NULL) + return; + + skb_reserve(skb, dev->hard_header_len); + + fr_packet=(u8*)skb_put(skb, 13); + + fr_packet[0] = (fr->dlci & (1024 - 15)) >> 2; + fr_packet[1] = (fr->dlci & 15) << 4 | 1; // EA bit 1 + fr_packet[2] = FRAD_UI; + fr_packet[3] = NLPID_Q933_LMI; + fr_packet[4] = 0; + fr_packet[5] = Q933_ENQ; + fr_packet[6] = Q933_LINESTAT; + fr_packet[7] = 0x01; + fr_packet[8] = 0x01; + fr_packet[9] = Q933_COUNTERS; + fr_packet[10] = 0x02; + fr_packet[11] = ++fr->local_cnt; + fr_packet[12] = fr->remote_cnt; + + skb->dev = dev; + skb->priority = TC_PRIO_CONTROL; + dev_queue_xmit(skb); +} + +static void fr_keepalive_timerfun(unsigned long d) +{ + struct net_device *dev = (struct net_device *)d; + struct comx_channel *ch = dev->priv; + struct fr_data *fr = ch->LINE_privdata; + struct proc_dir_entry *dir = ch->procdir->parent->subdir; + struct comx_channel *sch; + struct fr_data *sfr; + struct net_device *sdev; + + if (ch->init_status & LINE_OPEN) { + if (fr->keepalivecnt == MAXALIVECNT) { + comx_status(dev, ch->line_status & ~PROTO_UP); + dev->flags &= ~IFF_RUNNING; + for (; dir ; dir = dir->next) { + if(!S_ISDIR(dir->mode)) { + continue; + } + + if ((sdev = dir->data) && (sch = sdev->priv) && + (sdev->type == ARPHRD_DLCI) && + (sfr = sch->LINE_privdata) + && (sfr->master == dev) && + (sdev->flags & IFF_UP)) { + sdev->flags &= ~IFF_RUNNING; + comx_status(sdev, + sch->line_status & ~PROTO_UP); + } + } + } + if (fr->keepalivecnt <= MAXALIVECNT) { + ++fr->keepalivecnt; + } + fr_keepalive_send(dev); + } + mod_timer(&fr->keepa_timer, jiffies + HZ * fr->keepa_freq); +} + +static void fr_rx_lmi(struct net_device *dev, struct sk_buff *skb, + u16 dlci, u8 nlpid) +{ + struct comx_channel *ch = dev->priv; + struct fr_data *fr = ch->LINE_privdata; + struct proc_dir_entry *dir = ch->procdir->parent->subdir; + struct comx_channel *sch; + struct fr_data *sfr; + struct net_device *sdev; + + if (dlci != fr->dlci || nlpid != NLPID_Q933_LMI || !fr->keepa_freq) { + return; + } + + fr->remote_cnt = skb->data[7]; + if (skb->data[8] == fr->local_cnt) { // keepalive UP! + fr->keepalivecnt = 0; + if ((ch->line_status & LINE_UP) && + !(ch->line_status & PROTO_UP)) { + comx_status(dev, ch->line_status |= PROTO_UP); + dev->flags |= IFF_RUNNING; + for (; dir ; dir = dir->next) { + if(!S_ISDIR(dir->mode)) { + continue; + } + + if ((sdev = dir->data) && (sch = sdev->priv) && + (sdev->type == ARPHRD_DLCI) && + (sfr = sch->LINE_privdata) + && (sfr->master == dev) && + (sdev->flags & IFF_UP)) { + sdev->flags |= IFF_RUNNING; + comx_status(sdev, + sch->line_status | PROTO_UP); + } + } + } + } +} + +static void fr_set_keepalive(struct net_device *dev, int keepa) +{ + struct comx_channel *ch = dev->priv; + struct fr_data *fr = ch->LINE_privdata; + + if (!keepa && fr->keepa_freq) { // switch off + fr->keepa_freq = 0; + if (ch->line_status & LINE_UP) { + comx_status(dev, ch->line_status | PROTO_UP); + dev->flags |= IFF_RUNNING; + del_timer(&fr->keepa_timer); + } + return; + } + + if (keepa) { // bekapcs + if(fr->keepa_freq && (ch->line_status & LINE_UP)) { + del_timer(&fr->keepa_timer); + } + fr->keepa_freq = keepa; + fr->local_cnt = fr->remote_cnt = 0; + init_timer(&fr->keepa_timer); + fr->keepa_timer.expires = jiffies + HZ; + fr->keepa_timer.function = fr_keepalive_timerfun; + fr->keepa_timer.data = (unsigned long)dev; + ch->line_status &= ~(PROTO_UP | PROTO_LOOP); + dev->flags &= ~IFF_RUNNING; + comx_status(dev, ch->line_status); + if(ch->line_status & LINE_UP) { + add_timer(&fr->keepa_timer); + } + } +} + +static void fr_rx(struct net_device *dev, struct sk_buff *skb) +{ + struct comx_channel *ch = dev->priv; + struct proc_dir_entry *dir = ch->procdir->parent->subdir; + struct net_device *sdev = dev; + struct comx_channel *sch; + struct fr_data *sfr; + u16 dlci; + u8 nlpid; + + if(skb->len <= 4 || skb->data[2] != FRAD_UI) { + kfree_skb(skb); + return; + } + + /* Itt majd ki kell talalni, melyik slave kapja a csomagot */ + dlci = ((skb->data[0] & 0xfc) << 2) | ((skb->data[1] & 0xf0) >> 4); + if ((nlpid = skb->data[3]) == 0) { // Optional padding + nlpid = skb->data[4]; + skb_pull(skb, 1); + } + skb_pull(skb, 4); /* DLCI and header throw away */ + + if (ch->debug_flags & DEBUG_COMX_DLCI) { + comx_debug(dev, "Frame received, DLCI: %d, NLPID: 0x%02x\n", + dlci, nlpid); + comx_debug_skb(dev, skb, "Contents"); + } + + /* Megkeressuk, kihez tartozik */ + for (; dir ; dir = dir->next) { + if(!S_ISDIR(dir->mode)) { + continue; + } + if ((sdev = dir->data) && (sch = sdev->priv) && + (sdev->type == ARPHRD_DLCI) && (sfr = sch->LINE_privdata) && + (sfr->master == dev) && (sfr->dlci == dlci)) { + skb->dev = sdev; + if (ch->debug_flags & DEBUG_COMX_DLCI) { + comx_debug(dev, "Passing it to %s\n",sdev->name); + } + if (dev != sdev) { + sch->stats.rx_packets++; + sch->stats.rx_bytes += skb->len; + } + break; + } + } + switch(nlpid) { + case NLPID_IP: + skb->protocol = htons(ETH_P_IP); + skb->mac.raw = skb->data; + comx_rx(sdev, skb); + break; + case NLPID_Q933_LMI: + fr_rx_lmi(dev, skb, dlci, nlpid); + default: + kfree_skb(skb); + break; + } +} + +static int fr_tx(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct proc_dir_entry *dir = ch->procdir->parent->subdir; + struct net_device *sdev; + struct comx_channel *sch; + struct fr_data *sfr; + int cnt = 1; + + /* Ha minden igaz, 2 helyen fog allni a tbusy: a masternel, + es annal a slave-nel aki eppen kuldott. + Egy helyen akkor all, ha a master kuldott. + Ez megint jo lesz majd, ha utemezni akarunk */ + + /* This should be fixed, the slave tbusy should be set when + the masters queue is full and reset when not */ + + for (; dir ; dir = dir->next) { + if(!S_ISDIR(dir->mode)) { + continue; + } + if ((sdev = dir->data) && (sch = sdev->priv) && + (sdev->type == ARPHRD_DLCI) && (sfr = sch->LINE_privdata) && + (sfr->master == dev) && (netif_queue_stopped(sdev))) { + netif_wake_queue(sdev); + cnt++; + } + } + + netif_wake_queue(dev); + return 0; +} + +static void fr_status(struct net_device *dev, unsigned short status) +{ + struct comx_channel *ch = dev->priv; + struct fr_data *fr = ch->LINE_privdata; + struct proc_dir_entry *dir = ch->procdir->parent->subdir; + struct net_device *sdev; + struct comx_channel *sch; + struct fr_data *sfr; + + if (status & LINE_UP) { + if (!fr->keepa_freq) { + status |= PROTO_UP; + } + } else { + status &= ~(PROTO_UP | PROTO_LOOP); + } + + if (dev == fr->master && fr->keepa_freq) { + if (status & LINE_UP) { + fr->keepa_timer.expires = jiffies + HZ; + add_timer(&fr->keepa_timer); + fr->keepalivecnt = MAXALIVECNT + 1; + fr->keeploopcnt = 0; + } else { + del_timer(&fr->keepa_timer); + } + } + + /* Itt a status valtozast vegig kell vinni az osszes slave-n */ + for (; dir ; dir = dir->next) { + if(!S_ISDIR(dir->mode)) { + continue; + } + + if ((sdev = dir->data) && (sch = sdev->priv) && + (sdev->type == ARPHRD_FRAD || sdev->type == ARPHRD_DLCI) && + (sfr = sch->LINE_privdata) && (sfr->master == dev)) { + if(status & LINE_UP) { + netif_wake_queue(sdev); + } + comx_status(sdev, status); + if(status & (PROTO_UP | PROTO_LOOP)) { + dev->flags |= IFF_RUNNING; + } else { + dev->flags &= ~IFF_RUNNING; + } + } + } +} + +static int fr_open(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct fr_data *fr = ch->LINE_privdata; + struct proc_dir_entry *comxdir = ch->procdir; + struct comx_channel *mch; + + if (!(ch->init_status & HW_OPEN)) { + return -ENODEV; + } + + if ((ch->hardware == &fr_dlci && ch->protocol != &fr_slave_protocol) || + (ch->protocol == &fr_slave_protocol && ch->hardware != &fr_dlci)) { + printk(KERN_ERR "Trying to open an improperly set FR interface, giving up\n"); + return -EINVAL; + } + + if (!fr->master) { + return -ENODEV; + } + mch = fr->master->priv; + if (fr->master != dev && (!(mch->init_status & LINE_OPEN) + || (mch->protocol != &fr_master_protocol))) { + printk(KERN_ERR "Master %s is inactive, or incorrectly set up, " + "unable to open %s\n", fr->master->name, dev->name); + return -ENODEV; + } + + ch->init_status |= LINE_OPEN; + ch->line_status &= ~(PROTO_UP | PROTO_LOOP); + dev->flags &= ~IFF_RUNNING; + + if (fr->master == dev) { + if (fr->keepa_freq) { + fr->keepa_timer.function = fr_keepalive_timerfun; + fr->keepa_timer.data = (unsigned long)dev; + add_timer(&fr->keepa_timer); + } else { + if (ch->line_status & LINE_UP) { + ch->line_status |= PROTO_UP; + dev->flags |= IFF_RUNNING; + } + } + } else { + ch->line_status = mch->line_status; + if(fr->master->flags & IFF_RUNNING) { + dev->flags |= IFF_RUNNING; + } + } + + for (; comxdir ; comxdir = comxdir->next) { + if (strcmp(comxdir->name, FILENAME_DLCI) == 0 || + strcmp(comxdir->name, FILENAME_MASTER) == 0 || + strcmp(comxdir->name, FILENAME_KEEPALIVE) == 0) { + comxdir->mode = S_IFREG | 0444; + } + } +// comx_status(dev, ch->line_status); + return 0; +} + +static int fr_close(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct fr_data *fr = ch->LINE_privdata; + struct proc_dir_entry *comxdir = ch->procdir; + + if (fr->master == dev) { // Ha master + struct proc_dir_entry *dir = ch->procdir->parent->subdir; + struct net_device *sdev = dev; + struct comx_channel *sch; + struct fr_data *sfr; + + if (!(ch->init_status & HW_OPEN)) { + return -ENODEV; + } + + if (fr->keepa_freq) { + del_timer(&fr->keepa_timer); + } + + for (; dir ; dir = dir->next) { + if(!S_ISDIR(dir->mode)) { + continue; + } + if ((sdev = dir->data) && (sch = sdev->priv) && + (sdev->type == ARPHRD_DLCI) && + (sfr = sch->LINE_privdata) && + (sfr->master == dev) && + (sch->init_status & LINE_OPEN)) { + dev_close(sdev); + } + } + } + + ch->init_status &= ~LINE_OPEN; + ch->line_status &= ~(PROTO_UP | PROTO_LOOP); + dev->flags &= ~IFF_RUNNING; + + for (; comxdir ; comxdir = comxdir->next) { + if (strcmp(comxdir->name, FILENAME_DLCI) == 0 || + strcmp(comxdir->name, FILENAME_MASTER) == 0 || + strcmp(comxdir->name, FILENAME_KEEPALIVE) == 0) { + comxdir->mode = S_IFREG | 0444; + } + } + + return 0; +} + +static int fr_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct comx_channel *sch, *mch; + struct fr_data *fr = ch->LINE_privdata; + struct fr_data *sfr; + struct net_device *sdev; + struct proc_dir_entry *dir = ch->procdir->parent->subdir; + + if (!fr->master) { + printk(KERN_ERR "BUG: fr_xmit without a master!!! dev: %s\n", dev->name); + return 0; + } + + mch = fr->master->priv; + + /* Ennek majd a slave utemezeskor lesz igazan jelentosege */ + if (ch->debug_flags & DEBUG_COMX_DLCI) { + comx_debug_skb(dev, skb, "Sending frame"); + } + + if (dev != fr->master) { + struct sk_buff *newskb=skb_clone(skb, GFP_ATOMIC); + if (!newskb) + return -ENOMEM; + newskb->dev=fr->master; + dev_queue_xmit(newskb); + ch->stats.tx_bytes += skb->len; + ch->stats.tx_packets++; + dev_kfree_skb(skb); + } else { + netif_stop_queue(dev); + for (; dir ; dir = dir->next) { + if(!S_ISDIR(dir->mode)) { + continue; + } + if ((sdev = dir->data) && (sch = sdev->priv) && + (sdev->type == ARPHRD_DLCI) && (sfr = sch->LINE_privdata) && + (sfr->master == dev) && (netif_queue_stopped(sdev))) { + netif_stop_queue(sdev); + } + } + + switch(mch->HW_send_packet(dev, skb)) { + case FRAME_QUEUED: + netif_wake_queue(dev); + break; + case FRAME_ACCEPTED: + case FRAME_DROPPED: + break; + case FRAME_ERROR: + printk(KERN_ERR "%s: Transmit frame error (len %d)\n", + dev->name, skb->len); + break; + } + } + return 0; +} + +static int fr_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, void *daddr, void *saddr, unsigned len) +{ + struct comx_channel *ch = dev->priv; + struct fr_data *fr = ch->LINE_privdata; + + skb_push(skb, dev->hard_header_len); + /* Put in DLCI */ + skb->data[0] = (fr->dlci & (1024 - 15)) >> 2; + skb->data[1] = (fr->dlci & 15) << 4 | 1; // EA bit 1 + skb->data[2] = FRAD_UI; + skb->data[3] = NLPID_IP; + + return dev->hard_header_len; +} + +static int fr_statistics(struct net_device *dev, char *page) +{ + struct comx_channel *ch = dev->priv; + struct fr_data *fr = ch->LINE_privdata; + int len = 0; + + if (fr->master == dev) { + struct proc_dir_entry *dir = ch->procdir->parent->subdir; + struct net_device *sdev; + struct comx_channel *sch; + struct fr_data *sfr; + int slaves = 0; + + len += sprintf(page + len, + "This is a Frame Relay master device\nSlaves: "); + for (; dir ; dir = dir->next) { + if(!S_ISDIR(dir->mode)) { + continue; + } + if ((sdev = dir->data) && (sch = sdev->priv) && + (sdev->type == ARPHRD_DLCI) && + (sfr = sch->LINE_privdata) && + (sfr->master == dev) && (sdev != dev)) { + slaves++; + len += sprintf(page + len, "%s ", sdev->name); + } + } + len += sprintf(page + len, "%s\n", slaves ? "" : "(none)"); + if (fr->keepa_freq) { + len += sprintf(page + len, "Line keepalive (value %d) " + "status %s [%d]\n", fr->keepa_freq, + ch->line_status & PROTO_LOOP ? "LOOP" : + ch->line_status & PROTO_UP ? "UP" : "DOWN", + fr->keepalivecnt); + } else { + len += sprintf(page + len, "Line keepalive protocol " + "is not set\n"); + } + } else { // if slave + len += sprintf(page + len, + "This is a Frame Relay slave device, master: %s\n", + fr->master ? fr->master->name : "(not set)"); + } + return len; +} + +static int fr_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct proc_dir_entry *file = (struct proc_dir_entry *)data; + struct net_device *dev = file->parent->data; + struct comx_channel *ch = dev->priv; + struct fr_data *fr = NULL; + int len = 0; + + if (ch) { + fr = ch->LINE_privdata; + } + + if (strcmp(file->name, FILENAME_DLCI) == 0) { + len = sprintf(page, "%04d\n", fr->dlci); + } else if (strcmp(file->name, FILENAME_MASTER) == 0) { + len = sprintf(page, "%-9s\n", fr->master ? fr->master->name : + "(none)"); + } else if (strcmp(file->name, FILENAME_KEEPALIVE) == 0) { + len = fr->keepa_freq ? sprintf(page, "% 3d\n", fr->keepa_freq) + : sprintf(page, "off\n"); + } else { + printk(KERN_ERR "comxfr: internal error, filename %s\n", file->name); + return -EBADF; + } + + if (off >= len) { + *eof = 1; + return 0; + } + + *start = page + off; + if (count >= len - off) *eof = 1; + return min_t(int, count, len - off); +} + +static int fr_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + struct proc_dir_entry *entry = (struct proc_dir_entry *)data; + struct net_device *dev = entry->parent->data; + struct comx_channel *ch = dev->priv; + struct fr_data *fr = NULL; + char *page; + + if (ch) { + fr = ch->LINE_privdata; + } + + if (!(page = (char *)__get_free_page(GFP_KERNEL))) { + return -ENOMEM; + } + + if (copy_from_user(page, buffer, count)) { + free_page((unsigned long)page); + return -EFAULT; + } + if (*(page + count - 1) == '\n') { + *(page + count - 1) = 0; + } + + if (strcmp(entry->name, FILENAME_DLCI) == 0) { + u16 dlci_new = simple_strtoul(page, NULL, 10); + + if (dlci_new > 1023) { + printk(KERN_ERR "Invalid DLCI value\n"); + } + else fr->dlci = dlci_new; + } else if (strcmp(entry->name, FILENAME_MASTER) == 0) { + struct net_device *new_master = dev_get_by_name(page); + + if (new_master && new_master->type == ARPHRD_FRAD) { + struct comx_channel *sch = new_master->priv; + struct fr_data *sfr = sch->LINE_privdata; + + if (sfr && sfr->master == new_master) { + if(fr->master) + dev_put(fr->master); + fr->master = new_master; + /* Megorokli a master statuszat */ + ch->line_status = sch->line_status; + } + } + } else if (strcmp(entry->name, FILENAME_KEEPALIVE) == 0) { + int keepa_new = -1; + + if (strcmp(page, KEEPALIVE_OFF) == 0) { + keepa_new = 0; + } else { + keepa_new = simple_strtoul(page, NULL, 10); + } + + if (keepa_new < 0 || keepa_new > 100) { + printk(KERN_ERR "invalid keepalive\n"); + } else { + if (fr->keepa_freq && keepa_new != fr->keepa_freq) { + fr_set_keepalive(dev, 0); + } + if (keepa_new) { + fr_set_keepalive(dev, keepa_new); + } + } + } else { + printk(KERN_ERR "comxfr_write_proc: internal error, filename %s\n", + entry->name); + count = -EBADF; + } + + free_page((unsigned long)page); + return count; +} + +static int fr_exit(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct fr_data *fr = ch->LINE_privdata; + struct net_device *sdev = dev; + struct comx_channel *sch; + struct fr_data *sfr; + struct proc_dir_entry *dir = ch->procdir->parent->subdir; + + /* Ha lezarunk egy master-t, le kell kattintani a slave-eket is */ + if (fr->master && fr->master == dev) { + for (; dir ; dir = dir->next) { + if(!S_ISDIR(dir->mode)) { + continue; + } + if ((sdev = dir->data) && (sch = sdev->priv) && + (sdev->type == ARPHRD_DLCI) && + (sfr = sch->LINE_privdata) && (sfr->master == dev)) { + dev_close(sdev); + sfr->master = NULL; + } + } + } + dev->flags = 0; + dev->type = 0; + dev->mtu = 0; + dev->hard_header_len = 0; + + ch->LINE_rx = NULL; + ch->LINE_tx = NULL; + ch->LINE_status = NULL; + ch->LINE_open = NULL; + ch->LINE_close = NULL; + ch->LINE_xmit = NULL; + ch->LINE_header = NULL; + ch->LINE_rebuild_header = NULL; + ch->LINE_statistics = NULL; + + ch->LINE_status = 0; + + if (fr->master != dev) { // if not master, remove dlci + if(fr->master) + dev_put(fr->master); + remove_proc_entry(FILENAME_DLCI, ch->procdir); + remove_proc_entry(FILENAME_MASTER, ch->procdir); + } else { + if (fr->keepa_freq) { + fr_set_keepalive(dev, 0); + } + remove_proc_entry(FILENAME_KEEPALIVE, ch->procdir); + remove_proc_entry(FILENAME_DLCI, ch->procdir); + } + + kfree(fr); + ch->LINE_privdata = NULL; + + MOD_DEC_USE_COUNT; + return 0; +} + +static int fr_master_init(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct fr_data *fr; + struct proc_dir_entry *new_file; + + if ((fr = ch->LINE_privdata = kmalloc(sizeof(struct fr_data), + GFP_KERNEL)) == NULL) { + return -ENOMEM; + } + memset(fr, 0, sizeof(struct fr_data)); + fr->master = dev; // this means master + fr->dlci = 0; // let's say default + + dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + dev->type = ARPHRD_FRAD; + dev->mtu = 1500; + dev->hard_header_len = 4; + dev->addr_len = 0; + + ch->LINE_rx = fr_rx; + ch->LINE_tx = fr_tx; + ch->LINE_status = fr_status; + ch->LINE_open = fr_open; + ch->LINE_close = fr_close; + ch->LINE_xmit = fr_xmit; + ch->LINE_header = fr_header; + ch->LINE_rebuild_header = NULL; + ch->LINE_statistics = fr_statistics; + + if ((new_file = create_proc_entry(FILENAME_DLCI, S_IFREG | 0644, + ch->procdir)) == NULL) { + goto cleanup_LINE_privdata; + } + new_file->data = (void *)new_file; + new_file->read_proc = &fr_read_proc; + new_file->write_proc = &fr_write_proc; + new_file->size = 5; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_KEEPALIVE, S_IFREG | 0644, + ch->procdir)) == NULL) { + goto cleanup_filename_dlci; + } + new_file->data = (void *)new_file; + new_file->read_proc = &fr_read_proc; + new_file->write_proc = &fr_write_proc; + new_file->size = 4; + new_file->nlink = 1; + + fr_set_keepalive(dev, 0); + + MOD_INC_USE_COUNT; + return 0; +cleanup_filename_dlci: + remove_proc_entry(FILENAME_DLCI, ch->procdir); +cleanup_LINE_privdata: + kfree(fr); + return -EIO; +} + +static int fr_slave_init(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct fr_data *fr; + struct proc_dir_entry *new_file; + + if ((fr = ch->LINE_privdata = kmalloc(sizeof(struct fr_data), + GFP_KERNEL)) == NULL) { + return -ENOMEM; + } + memset(fr, 0, sizeof(struct fr_data)); + + dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + dev->type = ARPHRD_DLCI; + dev->mtu = 1500; + dev->hard_header_len = 4; + dev->addr_len = 0; + + ch->LINE_rx = fr_rx; + ch->LINE_tx = fr_tx; + ch->LINE_status = fr_status; + ch->LINE_open = fr_open; + ch->LINE_close = fr_close; + ch->LINE_xmit = fr_xmit; + ch->LINE_header = fr_header; + ch->LINE_rebuild_header = NULL; + ch->LINE_statistics = fr_statistics; + + if ((new_file = create_proc_entry(FILENAME_DLCI, S_IFREG | 0644, + ch->procdir)) == NULL) { + goto cleanup_LINE_privdata; + } + + new_file->data = (void *)new_file; + new_file->read_proc = &fr_read_proc; + new_file->write_proc = &fr_write_proc; + new_file->size = 5; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_MASTER, S_IFREG | 0644, + ch->procdir)) == NULL) { + goto cleanup_filename_dlci; + } + new_file->data = (void *)new_file; + new_file->read_proc = &fr_read_proc; + new_file->write_proc = &fr_write_proc; + new_file->size = 10; + new_file->nlink = 1; + MOD_INC_USE_COUNT; + return 0; +cleanup_filename_dlci: + remove_proc_entry(FILENAME_DLCI, ch->procdir); +cleanup_LINE_privdata: + kfree(fr); + return -EIO; +} + +static int dlci_open(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + + ch->init_status |= HW_OPEN; + + MOD_INC_USE_COUNT; + return 0; +} + +static int dlci_close(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + + ch->init_status &= ~HW_OPEN; + + MOD_DEC_USE_COUNT; + return 0; +} + +static int dlci_txe(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct fr_data *fr = ch->LINE_privdata; + + if (!fr->master) { + return 0; + } + + ch = fr->master->priv; + fr = ch->LINE_privdata; + return ch->HW_txe(fr->master); +} + +static int dlci_statistics(struct net_device *dev, char *page) +{ + return 0; +} + +static int dlci_init(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + + ch->HW_open = dlci_open; + ch->HW_close = dlci_close; + ch->HW_txe = dlci_txe; + ch->HW_statistics = dlci_statistics; + + /* Nincs egyeb hw info, mert ugyis a fr->master-bol fog minden kiderulni */ + + MOD_INC_USE_COUNT; + return 0; +} + +static int dlci_exit(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + + ch->HW_open = NULL; + ch->HW_close = NULL; + ch->HW_txe = NULL; + ch->HW_statistics = NULL; + + MOD_DEC_USE_COUNT; + return 0; +} + +static int dlci_dump(struct net_device *dev) +{ + printk(KERN_INFO "dlci_dump %s, HOGY MI ???\n", dev->name); + return -1; +} + +static struct comx_protocol fr_master_protocol = { + .name = "frad", + .version = VERSION, + .encap_type = ARPHRD_FRAD, + .line_init = fr_master_init, + .line_exit = fr_exit, +}; + +static struct comx_protocol fr_slave_protocol = { + .name = "ietf-ip", + .version = VERSION, + .encap_type = ARPHRD_DLCI, + .line_init = fr_slave_init, + .line_exit = fr_exit, +}; + +static struct comx_hardware fr_dlci = { + .name = "dlci", + .version = VERSION, + .hw_init = dlci_init, + .hw_exit = dlci_exit, + .hw_dump = dlci_dump, +}; + +static int __init comx_proto_fr_init(void) +{ + int ret; + + if ((ret = comx_register_hardware(&fr_dlci))) { + return ret; + } + if ((ret = comx_register_protocol(&fr_master_protocol))) { + return ret; + } + return comx_register_protocol(&fr_slave_protocol); +} + +static void __exit comx_proto_fr_exit(void) +{ + comx_unregister_hardware(fr_dlci.name); + comx_unregister_protocol(fr_master_protocol.name); + comx_unregister_protocol(fr_slave_protocol.name); +} + +module_init(comx_proto_fr_init); +module_exit(comx_proto_fr_exit); diff --git a/drivers/net/wan/comx-proto-lapb.c b/drivers/net/wan/comx-proto-lapb.c new file mode 100644 index 000000000..b203ff689 --- /dev/null +++ b/drivers/net/wan/comx-proto-lapb.c @@ -0,0 +1,551 @@ +/* + * LAPB protocol module for the COMX driver + * for Linux kernel 2.2.X + * + * Original author: Tivadar Szemethy + * Maintainer: Gergely Madarasz + * + * Copyright (C) 1997-1999 (C) ITConsult-Pro Co. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Version 0.80 (99/06/14): + * - cleaned up the source code a bit + * - ported back to kernel, now works as non-module + * + * Changed (00/10/29, Henner Eisen): + * - comx_rx() / comxlapb_data_indication() return status. + * + */ + +#define VERSION "0.80" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "comx.h" +#include "comxhw.h" + +static struct proc_dir_entry *create_comxlapb_proc_entry(char *name, int mode, + int size, struct proc_dir_entry *dir); + +static void comxlapb_rx(struct net_device *dev, struct sk_buff *skb) +{ + if (!dev || !dev->priv) { + dev_kfree_skb(skb); + } else { + lapb_data_received(dev, skb); + } +} + +static int comxlapb_tx(struct net_device *dev) +{ + netif_wake_queue(dev); + return 0; +} + +static int comxlapb_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, void *daddr, void *saddr, unsigned len) +{ + return dev->hard_header_len; +} + +static void comxlapb_status(struct net_device *dev, unsigned short status) +{ + struct comx_channel *ch; + + if (!dev || !(ch = dev->priv)) { + return; + } + if (status & LINE_UP) { + netif_wake_queue(dev); + } + comx_status(dev, status); +} + +static int comxlapb_open(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + int err = 0; + + if (!(ch->init_status & HW_OPEN)) { + return -ENODEV; + } + + err = lapb_connect_request(dev); + + if (ch->debug_flags & DEBUG_COMX_LAPB) { + comx_debug(dev, "%s: lapb opened, error code: %d\n", + dev->name, err); + } + + if (!err) { + ch->init_status |= LINE_OPEN; + MOD_INC_USE_COUNT; + } + return err; +} + +static int comxlapb_close(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + + if (!(ch->init_status & HW_OPEN)) { + return -ENODEV; + } + + if (ch->debug_flags & DEBUG_COMX_LAPB) { + comx_debug(dev, "%s: lapb closed\n", dev->name); + } + + lapb_disconnect_request(dev); + + ch->init_status &= ~LINE_OPEN; + ch->line_status &= ~PROTO_UP; + MOD_DEC_USE_COUNT; + return 0; +} + +static int comxlapb_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct sk_buff *skb2; + + if (!dev || !(ch = dev->priv) || !(dev->flags & (IFF_UP | IFF_RUNNING))) { + return -ENODEV; + } + + if (dev->type == ARPHRD_X25) { // first byte tells what to do + switch(skb->data[0]) { + case 0x00: + break; // transmit + case 0x01: + lapb_connect_request(dev); + kfree_skb(skb); + return 0; + case 0x02: + lapb_disconnect_request(dev); + default: + kfree_skb(skb); + return 0; + } + skb_pull(skb,1); + } + + netif_stop_queue(dev); + + if ((skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { + lapb_data_request(dev, skb2); + } + + return FRAME_ACCEPTED; +} + +static int comxlapb_statistics(struct net_device *dev, char *page) +{ + struct lapb_parms_struct parms; + int len = 0; + + len += sprintf(page + len, "Line status: "); + if (lapb_getparms(dev, &parms) != LAPB_OK) { + len += sprintf(page + len, "not initialized\n"); + return len; + } + len += sprintf(page + len, "%s (%s), T1: %d/%d, T2: %d/%d, N2: %d/%d, " + "window: %d\n", parms.mode & LAPB_DCE ? "DCE" : "DTE", + parms.mode & LAPB_EXTENDED ? "EXTENDED" : "STANDARD", + parms.t1timer, parms.t1, parms.t2timer, parms.t2, + parms.n2count, parms.n2, parms.window); + + return len; +} + +static int comxlapb_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct proc_dir_entry *file = (struct proc_dir_entry *)data; + struct net_device *dev = file->parent->data; + struct lapb_parms_struct parms; + int len = 0; + + if (lapb_getparms(dev, &parms)) { + return -ENODEV; + } + + if (strcmp(file->name, FILENAME_T1) == 0) { + len += sprintf(page + len, "%02u / %02u\n", + parms.t1timer, parms.t1); + } else if (strcmp(file->name, FILENAME_T2) == 0) { + len += sprintf(page + len, "%02u / %02u\n", + parms.t2timer, parms.t2); + } else if (strcmp(file->name, FILENAME_N2) == 0) { + len += sprintf(page + len, "%02u / %02u\n", + parms.n2count, parms.n2); + } else if (strcmp(file->name, FILENAME_WINDOW) == 0) { + len += sprintf(page + len, "%u\n", parms.window); + } else if (strcmp(file->name, FILENAME_MODE) == 0) { + len += sprintf(page + len, "%s, %s\n", + parms.mode & LAPB_DCE ? "DCE" : "DTE", + parms.mode & LAPB_EXTENDED ? "EXTENDED" : "STANDARD"); + } else { + printk(KERN_ERR "comxlapb: internal error, filename %s\n", file->name); + return -EBADF; + } + + if (off >= len) { + *eof = 1; + return 0; + } + + *start = page + off; + if (count >= len - off) { + *eof = 1; + } + return min_t(int, count, len - off); +} + +static int comxlapb_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + struct proc_dir_entry *entry = (struct proc_dir_entry *)data; + struct net_device *dev = entry->parent->data; + struct lapb_parms_struct parms; + unsigned long parm; + char *page; + + if (lapb_getparms(dev, &parms)) { + return -ENODEV; + } + + if (!(page = (char *)__get_free_page(GFP_KERNEL))) { + return -ENOMEM; + } + + if (copy_from_user(page, buffer, count)) { + free_page((unsigned long)page); + return -EFAULT; + } + if (*(page + count - 1) == '\n') { + *(page + count - 1) = 0; + } + + if (strcmp(entry->name, FILENAME_T1) == 0) { + parm=simple_strtoul(page,NULL,10); + if (parm > 0 && parm < 100) { + parms.t1=parm; + lapb_setparms(dev, &parms); + } + } else if (strcmp(entry->name, FILENAME_T2) == 0) { + parm=simple_strtoul(page, NULL, 10); + if (parm > 0 && parm < 100) { + parms.t2=parm; + lapb_setparms(dev, &parms); + } + } else if (strcmp(entry->name, FILENAME_N2) == 0) { + parm=simple_strtoul(page, NULL, 10); + if (parm > 0 && parm < 100) { + parms.n2=parm; + lapb_setparms(dev, &parms); + } + } else if (strcmp(entry->name, FILENAME_WINDOW) == 0) { + parms.window = simple_strtoul(page, NULL, 10); + lapb_setparms(dev, &parms); + } else if (strcmp(entry->name, FILENAME_MODE) == 0) { + if (comx_strcasecmp(page, "dte") == 0) { + parms.mode &= ~(LAPB_DCE | LAPB_DTE); + parms.mode |= LAPB_DTE; + } else if (comx_strcasecmp(page, "dce") == 0) { + parms.mode &= ~(LAPB_DTE | LAPB_DCE); + parms.mode |= LAPB_DCE; + } else if (comx_strcasecmp(page, "std") == 0 || + comx_strcasecmp(page, "standard") == 0) { + parms.mode &= ~LAPB_EXTENDED; + parms.mode |= LAPB_STANDARD; + } else if (comx_strcasecmp(page, "ext") == 0 || + comx_strcasecmp(page, "extended") == 0) { + parms.mode &= ~LAPB_STANDARD; + parms.mode |= LAPB_EXTENDED; + } + lapb_setparms(dev, &parms); + } else { + printk(KERN_ERR "comxlapb_write_proc: internal error, filename %s\n", + entry->name); + return -EBADF; + } + + free_page((unsigned long)page); + return count; +} + +static void comxlapb_connected(struct net_device *dev, int reason) +{ + struct comx_channel *ch = dev->priv; + struct proc_dir_entry *comxdir = ch->procdir->subdir; + + if (ch->debug_flags & DEBUG_COMX_LAPB) { + comx_debug(ch->dev, "%s: lapb connected, reason: %d\n", + ch->dev->name, reason); + } + + if (ch->dev->type == ARPHRD_X25) { + unsigned char *p; + struct sk_buff *skb; + + if ((skb = dev_alloc_skb(1)) == NULL) { + printk(KERN_ERR "comxlapb: out of memory!\n"); + return; + } + p = skb_put(skb,1); + *p = 0x01; // link established + skb->dev = ch->dev; + skb->protocol = htons(ETH_P_X25); + skb->mac.raw = skb->data; + skb->pkt_type = PACKET_HOST; + + netif_rx(skb); + ch->dev->last_rx = jiffies; + } + + for (; comxdir; comxdir = comxdir->next) { + if (strcmp(comxdir->name, FILENAME_MODE) == 0) { + comxdir->mode = S_IFREG | 0444; + } + } + + + ch->line_status |= PROTO_UP; + comx_status(ch->dev, ch->line_status); +} + +static void comxlapb_disconnected(struct net_device *dev, int reason) +{ + struct comx_channel *ch = dev->priv; + struct proc_dir_entry *comxdir = ch->procdir->subdir; + + if (ch->debug_flags & DEBUG_COMX_LAPB) { + comx_debug(ch->dev, "%s: lapb disconnected, reason: %d\n", + ch->dev->name, reason); + } + + if (ch->dev->type == ARPHRD_X25) { + unsigned char *p; + struct sk_buff *skb; + + if ((skb = dev_alloc_skb(1)) == NULL) { + printk(KERN_ERR "comxlapb: out of memory!\n"); + return; + } + p = skb_put(skb,1); + *p = 0x02; // link disconnected + skb->dev = ch->dev; + skb->protocol = htons(ETH_P_X25); + skb->mac.raw = skb->data; + skb->pkt_type = PACKET_HOST; + + netif_rx(skb); + ch->dev->last_rx = jiffies; + } + + for (; comxdir; comxdir = comxdir->next) { + if (strcmp(comxdir->name, FILENAME_MODE) == 0) { + comxdir->mode = S_IFREG | 0644; + } + } + + ch->line_status &= ~PROTO_UP; + comx_status(ch->dev, ch->line_status); +} + +static int comxlapb_data_indication(struct net_device *dev, struct sk_buff *skb) +{ + struct comx_channel *ch = dev->priv; + + if (ch->dev->type == ARPHRD_X25) { + skb_push(skb, 1); + + if (skb_cow(skb, 1)) + return NET_RX_DROP; + + skb->data[0] = 0; // indicate data for X25 + skb->protocol = htons(ETH_P_X25); + } else { + skb->protocol = htons(ETH_P_IP); + } + + skb->dev = ch->dev; + skb->mac.raw = skb->data; + return comx_rx(ch->dev, skb); +} + +static void comxlapb_data_transmit(struct net_device *dev, struct sk_buff *skb) +{ + struct comx_channel *ch = dev->priv; + + if (ch->HW_send_packet) { + ch->HW_send_packet(ch->dev, skb); + } +} + +static int comxlapb_exit(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + + dev->flags = 0; + dev->type = 0; + dev->mtu = 0; + dev->hard_header_len = 0; + + ch->LINE_rx = NULL; + ch->LINE_tx = NULL; + ch->LINE_status = NULL; + ch->LINE_open = NULL; + ch->LINE_close = NULL; + ch->LINE_xmit = NULL; + ch->LINE_header = NULL; + ch->LINE_statistics = NULL; + + if (ch->debug_flags & DEBUG_COMX_LAPB) { + comx_debug(dev, "%s: unregistering lapb\n", dev->name); + } + lapb_unregister(dev); + + remove_proc_entry(FILENAME_T1, ch->procdir); + remove_proc_entry(FILENAME_T2, ch->procdir); + remove_proc_entry(FILENAME_N2, ch->procdir); + remove_proc_entry(FILENAME_MODE, ch->procdir); + remove_proc_entry(FILENAME_WINDOW, ch->procdir); + + MOD_DEC_USE_COUNT; + return 0; +} + +static int comxlapb_init(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct lapb_register_struct lapbreg; + + dev->mtu = 1500; + dev->hard_header_len = 4; + dev->addr_len = 0; + + ch->LINE_rx = comxlapb_rx; + ch->LINE_tx = comxlapb_tx; + ch->LINE_status = comxlapb_status; + ch->LINE_open = comxlapb_open; + ch->LINE_close = comxlapb_close; + ch->LINE_xmit = comxlapb_xmit; + ch->LINE_header = comxlapb_header; + ch->LINE_statistics = comxlapb_statistics; + + lapbreg.connect_confirmation = comxlapb_connected; + lapbreg.connect_indication = comxlapb_connected; + lapbreg.disconnect_confirmation = comxlapb_disconnected; + lapbreg.disconnect_indication = comxlapb_disconnected; + lapbreg.data_indication = comxlapb_data_indication; + lapbreg.data_transmit = comxlapb_data_transmit; + if (lapb_register(dev, &lapbreg)) { + return -ENOMEM; + } + if (ch->debug_flags & DEBUG_COMX_LAPB) { + comx_debug(dev, "%s: lapb registered\n", dev->name); + } + + if (!create_comxlapb_proc_entry(FILENAME_T1, 0644, 8, ch->procdir)) { + return -ENOMEM; + } + if (!create_comxlapb_proc_entry(FILENAME_T2, 0644, 8, ch->procdir)) { + return -ENOMEM; + } + if (!create_comxlapb_proc_entry(FILENAME_N2, 0644, 8, ch->procdir)) { + return -ENOMEM; + } + if (!create_comxlapb_proc_entry(FILENAME_MODE, 0644, 14, ch->procdir)) { + return -ENOMEM; + } + if (!create_comxlapb_proc_entry(FILENAME_WINDOW, 0644, 0, ch->procdir)) { + return -ENOMEM; + } + + MOD_INC_USE_COUNT; + return 0; +} + +static int comxlapb_init_lapb(struct net_device *dev) +{ + dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + dev->type = ARPHRD_LAPB; + + return(comxlapb_init(dev)); +} + +static int comxlapb_init_x25(struct net_device *dev) +{ + dev->flags = IFF_NOARP; + dev->type = ARPHRD_X25; + + return(comxlapb_init(dev)); +} + +static struct proc_dir_entry *create_comxlapb_proc_entry(char *name, int mode, + int size, struct proc_dir_entry *dir) +{ + struct proc_dir_entry *new_file; + + if ((new_file = create_proc_entry(name, S_IFREG | mode, dir)) != NULL) { + new_file->data = (void *)new_file; + new_file->read_proc = &comxlapb_read_proc; + new_file->write_proc = &comxlapb_write_proc; + new_file->size = size; + new_file->nlink = 1; + } + return(new_file); +} + +static struct comx_protocol comxlapb_protocol = { + "lapb", + VERSION, + ARPHRD_LAPB, + comxlapb_init_lapb, + comxlapb_exit, + NULL +}; + +static struct comx_protocol comx25_protocol = { + "x25", + VERSION, + ARPHRD_X25, + comxlapb_init_x25, + comxlapb_exit, + NULL +}; + +static int __init comx_proto_lapb_init(void) +{ + int ret; + + if ((ret = comx_register_protocol(&comxlapb_protocol)) != 0) { + return ret; + } + return comx_register_protocol(&comx25_protocol); +} + +static void __exit comx_proto_lapb_exit(void) +{ + comx_unregister_protocol(comxlapb_protocol.name); + comx_unregister_protocol(comx25_protocol.name); +} + +module_init(comx_proto_lapb_init); +module_exit(comx_proto_lapb_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/net/wan/comx-proto-ppp.c b/drivers/net/wan/comx-proto-ppp.c new file mode 100644 index 000000000..3f4501014 --- /dev/null +++ b/drivers/net/wan/comx-proto-ppp.c @@ -0,0 +1,269 @@ +/* + * Synchronous PPP / Cisco-HDLC driver for the COMX boards + * + * Author: Gergely Madarasz + * + * based on skeleton code by Tivadar Szemethy + * + * Copyright (C) 1999 ITConsult-Pro Co. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * + * Version 0.10 (99/06/10): + * - written the first code :) + * + * Version 0.20 (99/06/16): + * - added hdlc protocol + * - protocol up is IFF_RUNNING + * + * Version 0.21 (99/07/15): + * - some small fixes with the line status + * + * Version 0.22 (99/08/05): + * - don't test IFF_RUNNING but the pp_link_state of the sppp + * + * Version 0.23 (99/12/02): + * - tbusy fixes + * + */ + +#define VERSION "0.23" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "comx.h" + +MODULE_AUTHOR("Author: Gergely Madarasz "); +MODULE_DESCRIPTION("Cisco-HDLC / Synchronous PPP driver for the COMX sync serial boards"); +MODULE_LICENSE("GPL"); + +static struct comx_protocol syncppp_protocol; +static struct comx_protocol hdlc_protocol; + +struct syncppp_data { + struct timer_list status_timer; +}; + +static void syncppp_status_timerfun(unsigned long d) { + struct net_device *dev=(struct net_device *)d; + struct comx_channel *ch=dev->priv; + struct syncppp_data *spch=ch->LINE_privdata; + struct sppp *sp = (struct sppp *)sppp_of(dev); + + if(!(ch->line_status & PROTO_UP) && + (sp->pp_link_state==SPPP_LINK_UP)) { + comx_status(dev, ch->line_status | PROTO_UP); + } + if((ch->line_status & PROTO_UP) && + (sp->pp_link_state==SPPP_LINK_DOWN)) { + comx_status(dev, ch->line_status & ~PROTO_UP); + } + mod_timer(&spch->status_timer,jiffies + HZ*3); +} + +static int syncppp_tx(struct net_device *dev) +{ + struct comx_channel *ch=dev->priv; + + if(ch->line_status & LINE_UP) { + netif_wake_queue(dev); + } + return 0; +} + +static void syncppp_status(struct net_device *dev, unsigned short status) +{ + status &= ~(PROTO_UP | PROTO_LOOP); + if(status & LINE_UP) { + netif_wake_queue(dev); + sppp_open(dev); + } else { + /* Line went down */ + netif_stop_queue(dev); + sppp_close(dev); + } + comx_status(dev, status); +} + +static int syncppp_open(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct syncppp_data *spch = ch->LINE_privdata; + + if (!(ch->init_status & HW_OPEN)) return -ENODEV; + + ch->init_status |= LINE_OPEN; + ch->line_status &= ~(PROTO_UP | PROTO_LOOP); + + if(ch->line_status & LINE_UP) { + sppp_open(dev); + } + + init_timer(&spch->status_timer); + spch->status_timer.function=syncppp_status_timerfun; + spch->status_timer.data=(unsigned long)dev; + spch->status_timer.expires=jiffies + HZ*3; + add_timer(&spch->status_timer); + + return 0; +} + +static int syncppp_close(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct syncppp_data *spch = ch->LINE_privdata; + + if (!(ch->init_status & HW_OPEN)) return -ENODEV; + del_timer(&spch->status_timer); + + sppp_close(dev); + + ch->init_status &= ~LINE_OPEN; + ch->line_status &= ~(PROTO_UP | PROTO_LOOP); + + return 0; +} + +static int syncppp_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + + netif_stop_queue(dev); + switch(ch->HW_send_packet(dev, skb)) { + case FRAME_QUEUED: + netif_wake_queue(dev); + break; + case FRAME_ACCEPTED: + case FRAME_DROPPED: + break; + case FRAME_ERROR: + printk(KERN_ERR "%s: Transmit frame error (len %d)\n", + dev->name, skb->len); + break; + } + return 0; +} + + +static int syncppp_statistics(struct net_device *dev, char *page) +{ + int len = 0; + + len += sprintf(page + len, " "); + return len; +} + + +static int syncppp_exit(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + + sppp_detach(dev); + + dev->flags = 0; + dev->type = 0; + dev->mtu = 0; + + ch->LINE_rx = NULL; + ch->LINE_tx = NULL; + ch->LINE_status = NULL; + ch->LINE_open = NULL; + ch->LINE_close = NULL; + ch->LINE_xmit = NULL; + ch->LINE_header = NULL; + ch->LINE_rebuild_header = NULL; + ch->LINE_statistics = NULL; + + kfree(ch->LINE_privdata); + ch->LINE_privdata = NULL; + + MOD_DEC_USE_COUNT; + return 0; +} + +static int syncppp_init(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct ppp_device *pppdev = (struct ppp_device *)ch->if_ptr; + + ch->LINE_privdata = kmalloc(sizeof(struct syncppp_data), GFP_KERNEL); + if (!ch->LINE_privdata) + return -ENOMEM; + + pppdev->dev = dev; + sppp_attach(pppdev); + + if(ch->protocol == &hdlc_protocol) { + pppdev->sppp.pp_flags |= PP_CISCO; + dev->type = ARPHRD_HDLC; + } else { + pppdev->sppp.pp_flags &= ~PP_CISCO; + dev->type = ARPHRD_PPP; + } + + ch->LINE_rx = sppp_input; + ch->LINE_tx = syncppp_tx; + ch->LINE_status = syncppp_status; + ch->LINE_open = syncppp_open; + ch->LINE_close = syncppp_close; + ch->LINE_xmit = syncppp_xmit; + ch->LINE_header = NULL; + ch->LINE_statistics = syncppp_statistics; + + + MOD_INC_USE_COUNT; + return 0; +} + +static struct comx_protocol syncppp_protocol = { + "ppp", + VERSION, + ARPHRD_PPP, + syncppp_init, + syncppp_exit, + NULL +}; + +static struct comx_protocol hdlc_protocol = { + "hdlc", + VERSION, + ARPHRD_PPP, + syncppp_init, + syncppp_exit, + NULL +}; + +static int __init comx_proto_ppp_init(void) +{ + int ret; + + ret = comx_register_protocol(&hdlc_protocol); + if (!ret) { + ret = comx_register_protocol(&syncppp_protocol); + if (ret) + comx_unregister_protocol(hdlc_protocol.name); + } + return ret; +} + +static void __exit comx_proto_ppp_exit(void) +{ + comx_unregister_protocol(syncppp_protocol.name); + comx_unregister_protocol(hdlc_protocol.name); +} + +module_init(comx_proto_ppp_init); +module_exit(comx_proto_ppp_exit); diff --git a/drivers/net/wan/comx.c b/drivers/net/wan/comx.c new file mode 100644 index 000000000..6c0e3fcd2 --- /dev/null +++ b/drivers/net/wan/comx.c @@ -0,0 +1,1128 @@ +/* + * Device driver framework for the COMX line of synchronous serial boards + * + * for Linux kernel 2.2.X / 2.4.X + * + * Original authors: Arpad Bakay , + * Peter Bajan , + * Previous maintainer: Tivadar Szemethy + * Current maintainer: Gergely Madarasz + * + * Copyright (C) 1995-1999 ITConsult-Pro Co. + * + * Contributors: + * Arnaldo Carvalho de Melo (0.85) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Version 0.80 (99/06/11): + * - clean up source code (playing a bit of indent) + * - port back to kernel, add support for non-module versions + * - add support for board resets when channel protocol is down + * - reset the device structure after protocol exit + * the syncppp driver needs it + * - add support for /proc/comx/protocols and + * /proc/comx/boardtypes + * + * Version 0.81 (99/06/21): + * - comment out the board reset support code, the locomx + * driver seems not buggy now + * - printk() levels fixed + * + * Version 0.82 (99/07/08): + * - Handle stats correctly if the lowlevel driver is + * is not a comx one (locomx - z85230) + * + * Version 0.83 (99/07/15): + * - reset line_status when interface is down + * + * Version 0.84 (99/12/01): + * - comx_status should not check for IFF_UP (to report + * line status from dev->open()) + * + * Version 0.85 (00/08/15): + * - resource release on failure in comx_mkdir + * - fix return value on failure at comx_write_proc + * + * Changed (00/10/29, Henner Eisen): + * - comx_rx() / comxlapb_data_indication() return status. + */ + +#define VERSION "0.85" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_KMOD +#include +#endif + +#ifndef CONFIG_PROC_FS +#error For now, COMX really needs the /proc filesystem +#endif + +#include +#include "comx.h" + +MODULE_AUTHOR("Gergely Madarasz "); +MODULE_DESCRIPTION("Common code for the COMX synchronous serial adapters"); +MODULE_LICENSE("GPL"); + +static struct comx_hardware *comx_channels = NULL; +static struct comx_protocol *comx_lines = NULL; + +static int comx_mkdir(struct inode *, struct dentry *, int); +static int comx_rmdir(struct inode *, struct dentry *); +static struct dentry *comx_lookup(struct inode *, struct dentry *, struct nameidata *); + +static struct inode_operations comx_root_inode_ops = { + .lookup = comx_lookup, + .mkdir = comx_mkdir, + .rmdir = comx_rmdir, +}; + +static int comx_delete_dentry(struct dentry *dentry); +static struct proc_dir_entry *create_comx_proc_entry(char *name, int mode, + int size, struct proc_dir_entry *dir); + +static struct dentry_operations comx_dentry_operations = { + .d_delete = comx_delete_dentry, +}; + + +static struct proc_dir_entry * comx_root_dir; + +struct comx_debugflags_struct comx_debugflags[] = { + { "comx_rx", DEBUG_COMX_RX }, + { "comx_tx", DEBUG_COMX_TX }, + { "hw_tx", DEBUG_HW_TX }, + { "hw_rx", DEBUG_HW_RX }, + { "hdlc_keepalive", DEBUG_HDLC_KEEPALIVE }, + { "comxppp", DEBUG_COMX_PPP }, + { "comxlapb", DEBUG_COMX_LAPB }, + { "dlci", DEBUG_COMX_DLCI }, + { NULL, 0 } +}; + + +int comx_debug(struct net_device *dev, char *fmt, ...) +{ + struct comx_channel *ch = dev->priv; + char *page,*str; + va_list args; + int len; + + if (!ch->debug_area) return 0; + + if (!(page = (char *)__get_free_page(GFP_ATOMIC))) return -ENOMEM; + + va_start(args, fmt); + len = vsprintf(str = page, fmt, args); + va_end(args); + + if (len >= PAGE_SIZE) { + printk(KERN_ERR "comx_debug: PANIC! len = %d !!!\n", len); + free_page((unsigned long)page); + return -EINVAL; + } + + while (len) { + int to_copy; + int free = (ch->debug_start - ch->debug_end + ch->debug_size) + % ch->debug_size; + + to_copy = min_t(int, free ? free : ch->debug_size, + min_t(int, ch->debug_size - ch->debug_end, len)); + memcpy(ch->debug_area + ch->debug_end, str, to_copy); + str += to_copy; + len -= to_copy; + ch->debug_end = (ch->debug_end + to_copy) % ch->debug_size; + if (ch->debug_start == ch->debug_end) // Full ? push start away + ch->debug_start = (ch->debug_start + len + 1) % + ch->debug_size; + ch->debug_file->size = (ch->debug_end - ch->debug_start + + ch->debug_size) % ch->debug_size; + } + + free_page((unsigned long)page); + return 0; +} + +int comx_debug_skb(struct net_device *dev, struct sk_buff *skb, char *msg) +{ + struct comx_channel *ch = dev->priv; + + if (!ch->debug_area) return 0; + if (!skb) comx_debug(dev, "%s: %s NULL skb\n\n", dev->name, msg); + if (!skb->len) comx_debug(dev, "%s: %s empty skb\n\n", dev->name, msg); + + return comx_debug_bytes(dev, skb->data, skb->len, msg); +} + +int comx_debug_bytes(struct net_device *dev, unsigned char *bytes, int len, + char *msg) +{ + int pos = 0; + struct comx_channel *ch = dev->priv; + + if (!ch->debug_area) return 0; + + comx_debug(dev, "%s: %s len %d\n", dev->name, msg, len); + + while (pos != len) { + char line[80]; + int i = 0; + + memset(line, 0, 80); + sprintf(line,"%04d ", pos); + do { + sprintf(line + 5 + (pos % 16) * 3, "%02x", bytes[pos]); + sprintf(line + 60 + (pos % 16), "%c", + isprint(bytes[pos]) ? bytes[pos] : '.'); + pos++; + } while (pos != len && pos % 16); + + while ( i++ != 78 ) if (line[i] == 0) line[i] = ' '; + line[77] = '\n'; + line[78] = 0; + + comx_debug(dev, "%s", line); + } + comx_debug(dev, "\n"); + return 0; +} + +static void comx_loadavg_timerfun(unsigned long d) +{ + struct net_device *dev = (struct net_device *)d; + struct comx_channel *ch = dev->priv; + + ch->avg_bytes[ch->loadavg_counter] = ch->current_stats->rx_bytes; + ch->avg_bytes[ch->loadavg_counter + ch->loadavg_size] = + ch->current_stats->tx_bytes; + + ch->loadavg_counter = (ch->loadavg_counter + 1) % ch->loadavg_size; + + mod_timer(&ch->loadavg_timer,jiffies + HZ * ch->loadavg[0]); +} + +#if 0 +static void comx_reset_timerfun(unsigned long d) +{ + struct net_device *dev = (struct net_device *)d; + struct comx_channel *ch = dev->priv; + + if(!(ch->line_status & (PROTO_LOOP | PROTO_UP))) { + if(test_and_set_bit(0,&ch->reset_pending) && ch->HW_reset) { + ch->HW_reset(dev); + } + } + + mod_timer(&ch->reset_timer, jiffies + HZ * ch->reset_timeout); +} +#endif + +static int comx_open(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct proc_dir_entry *comxdir = ch->procdir->subdir; + int ret=0; + + if (!ch->protocol || !ch->hardware) return -ENODEV; + + if ((ret = ch->HW_open(dev))) return ret; + if ((ret = ch->LINE_open(dev))) { + ch->HW_close(dev); + return ret; + }; + + for (; comxdir ; comxdir = comxdir->next) { + if (strcmp(comxdir->name, FILENAME_HARDWARE) == 0 || + strcmp(comxdir->name, FILENAME_PROTOCOL) == 0) + comxdir->mode = S_IFREG | 0444; + } + +#if 0 + ch->reset_pending = 1; + ch->reset_timeout = 30; + ch->reset_timer.function = comx_reset_timerfun; + ch->reset_timer.data = (unsigned long)dev; + ch->reset_timer.expires = jiffies + HZ * ch->reset_timeout; + add_timer(&ch->reset_timer); +#endif + + return 0; +} + +static int comx_close(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + struct proc_dir_entry *comxdir = ch->procdir->subdir; + int ret = -ENODEV; + + if (test_and_clear_bit(0, &ch->lineup_pending)) { + del_timer(&ch->lineup_timer); + } + +#if 0 + del_timer(&ch->reset_timer); +#endif + + if (ch->init_status & LINE_OPEN && ch->protocol && ch->LINE_close) { + ret = ch->LINE_close(dev); + } + + if (ret) return ret; + + if (ch->init_status & HW_OPEN && ch->hardware && ch->HW_close) { + ret = ch->HW_close(dev); + } + + ch->line_status=0; + + for (; comxdir ; comxdir = comxdir->next) { + if (strcmp(comxdir->name, FILENAME_HARDWARE) == 0 || + strcmp(comxdir->name, FILENAME_PROTOCOL) == 0) + comxdir->mode = S_IFREG | 0644; + } + + return ret; +} + +void comx_status(struct net_device *dev, int status) +{ + struct comx_channel *ch = dev->priv; + +#if 0 + if(status & (PROTO_UP | PROTO_LOOP)) { + clear_bit(0,&ch->reset_pending); + } +#endif + + printk(KERN_NOTICE "Interface %s: modem status %s, line protocol %s\n", + dev->name, status & LINE_UP ? "UP" : "DOWN", + status & PROTO_LOOP ? "LOOP" : status & PROTO_UP ? + "UP" : "DOWN"); + + ch->line_status = status; +} + +static int comx_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + int rc; + + if (skb->len > dev->mtu + dev->hard_header_len) { + printk(KERN_ERR "comx_xmit: %s: skb->len %d > dev->mtu %d\n", dev->name, + (int)skb->len, dev->mtu); + } + + if (ch->debug_flags & DEBUG_COMX_TX) { + comx_debug_skb(dev, skb, "comx_xmit skb"); + } + + rc=ch->LINE_xmit(skb, dev); +// if (!rc) dev_kfree_skb(skb); + + return rc; +} + +static int comx_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, void *daddr, void *saddr, unsigned len) +{ + struct comx_channel *ch = dev->priv; + + if (ch->LINE_header) { + return (ch->LINE_header(skb, dev, type, daddr, saddr, len)); + } else { + return 0; + } +} + +static int comx_rebuild_header(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct comx_channel *ch = dev->priv; + + if (ch->LINE_rebuild_header) { + return(ch->LINE_rebuild_header(skb)); + } else { + return 0; + } +} + +int comx_rx(struct net_device *dev, struct sk_buff *skb) +{ + struct comx_channel *ch = dev->priv; + + if (ch->debug_flags & DEBUG_COMX_RX) { + comx_debug_skb(dev, skb, "comx_rx skb"); + } + if (skb) { + netif_rx(skb); + dev->last_rx = jiffies; + } + return 0; +} + +static struct net_device_stats *comx_stats(struct net_device *dev) +{ + struct comx_channel *ch = dev->priv; + + return ch->current_stats; +} + +void comx_lineup_func(unsigned long d) +{ + struct net_device *dev = (struct net_device *)d; + struct comx_channel *ch = dev->priv; + + del_timer(&ch->lineup_timer); + clear_bit(0, &ch->lineup_pending); + + if (ch->LINE_status) { + ch->LINE_status(dev, ch->line_status |= LINE_UP); + } +} + +#define LOADAVG(avg, off) (int) \ + ((ch->avg_bytes[(ch->loadavg_counter - 1 + ch->loadavg_size * 2) \ + % ch->loadavg_size + off] - ch->avg_bytes[(ch->loadavg_counter - 1 \ + - ch->loadavg[avg] / ch->loadavg[0] + ch->loadavg_size * 2) \ + % ch->loadavg_size + off]) / ch->loadavg[avg] * 8) + +static int comx_statistics(struct net_device *dev, char *page) +{ + struct comx_channel *ch = dev->priv; + int len = 0; + int tmp; + int i = 0; + char tmpstr[20]; + int tmpstrlen = 0; + + len += sprintf(page + len, "Interface administrative status is %s, " + "modem status is %s, protocol is %s\n", + dev->flags & IFF_UP ? "UP" : "DOWN", + ch->line_status & LINE_UP ? "UP" : "DOWN", + ch->line_status & PROTO_LOOP ? "LOOP" : + ch->line_status & PROTO_UP ? "UP" : "DOWN"); + len += sprintf(page + len, "Modem status changes: %lu, Transmitter status " + "is %s, tbusy: %d\n", ch->current_stats->tx_carrier_errors, ch->HW_txe ? + ch->HW_txe(dev) ? "IDLE" : "BUSY" : "NOT READY", netif_running(dev)); + len += sprintf(page + len, "Interface load (input): %d / %d / %d bits/s (", + LOADAVG(0,0), LOADAVG(1, 0), LOADAVG(2, 0)); + tmpstr[0] = 0; + for (i=0; i != 3; i++) { + char tf; + + tf = ch->loadavg[i] % 60 == 0 && + ch->loadavg[i] / 60 > 0 ? 'm' : 's'; + tmpstrlen += sprintf(tmpstr + tmpstrlen, "%d%c%s", + ch->loadavg[i] / (tf == 'm' ? 60 : 1), tf, + i == 2 ? ")\n" : "/"); + } + len += sprintf(page + len, + "%s (output): %d / %d / %d bits/s (%s", tmpstr, + LOADAVG(0,ch->loadavg_size), LOADAVG(1, ch->loadavg_size), + LOADAVG(2, ch->loadavg_size), tmpstr); + + len += sprintf(page + len, "Debug flags: "); + tmp = len; i = 0; + while (comx_debugflags[i].name) { + if (ch->debug_flags & comx_debugflags[i].value) + len += sprintf(page + len, "%s ", + comx_debugflags[i].name); + i++; + } + len += sprintf(page + len, "%s\n", tmp == len ? "none" : ""); + + len += sprintf(page + len, "RX errors: len: %lu, overrun: %lu, crc: %lu, " + "aborts: %lu\n buffer overrun: %lu, pbuffer overrun: %lu\n" + "TX errors: underrun: %lu\n", + ch->current_stats->rx_length_errors, ch->current_stats->rx_over_errors, + ch->current_stats->rx_crc_errors, ch->current_stats->rx_frame_errors, + ch->current_stats->rx_missed_errors, ch->current_stats->rx_fifo_errors, + ch->current_stats->tx_fifo_errors); + + if (ch->LINE_statistics && (ch->init_status & LINE_OPEN)) { + len += ch->LINE_statistics(dev, page + len); + } else { + len += sprintf(page+len, "Line status: driver not initialized\n"); + } + if (ch->HW_statistics && (ch->init_status & HW_OPEN)) { + len += ch->HW_statistics(dev, page + len); + } else { + len += sprintf(page+len, "Board status: driver not initialized\n"); + } + + return len; +} + +static int comx_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct comx_channel *ch = dev->priv; + + if (ch->LINE_ioctl) { + return(ch->LINE_ioctl(dev, ifr, cmd)); + } + return -EINVAL; +} + +static void comx_reset_dev(struct net_device *dev) +{ + dev->open = comx_open; + dev->stop = comx_close; + dev->hard_start_xmit = comx_xmit; + dev->hard_header = comx_header; + dev->rebuild_header = comx_rebuild_header; + dev->get_stats = comx_stats; + dev->do_ioctl = comx_ioctl; + dev->change_mtu = NULL; + dev->tx_queue_len = 20; + dev->flags = IFF_NOARP; +} + +static int comx_init_dev(struct net_device *dev) +{ + struct comx_channel *ch; + + if ((ch = kmalloc(sizeof(struct comx_channel), GFP_KERNEL)) == NULL) { + return -ENOMEM; + } + memset(ch, 0, sizeof(struct comx_channel)); + + ch->loadavg[0] = 5; + ch->loadavg[1] = 300; + ch->loadavg[2] = 900; + ch->loadavg_size = ch->loadavg[2] / ch->loadavg[0] + 1; + if ((ch->avg_bytes = kmalloc(ch->loadavg_size * + sizeof(unsigned long) * 2, GFP_KERNEL)) == NULL) { + kfree(ch); + return -ENOMEM; + } + + memset(ch->avg_bytes, 0, ch->loadavg_size * sizeof(unsigned long) * 2); + ch->loadavg_counter = 0; + ch->loadavg_timer.function = comx_loadavg_timerfun; + ch->loadavg_timer.data = (unsigned long)dev; + ch->loadavg_timer.expires = jiffies + HZ * ch->loadavg[0]; + add_timer(&ch->loadavg_timer); + + dev->priv = (void *)ch; + ch->dev = dev; + ch->line_status &= ~LINE_UP; + + ch->current_stats = &ch->stats; + + comx_reset_dev(dev); + return 0; +} + +static int comx_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct proc_dir_entry *file = (struct proc_dir_entry *)data; + struct net_device *dev = file->parent->data; + struct comx_channel *ch = dev->priv; + int len = 0; + + if (strcmp(file->name, FILENAME_STATUS) == 0) { + len = comx_statistics(dev, page); + } else if (strcmp(file->name, FILENAME_HARDWARE) == 0) { + len = sprintf(page, "%s\n", ch->hardware ? + ch->hardware->name : HWNAME_NONE); + } else if (strcmp(file->name, FILENAME_PROTOCOL) == 0) { + len = sprintf(page, "%s\n", ch->protocol ? + ch->protocol->name : PROTONAME_NONE); + } else if (strcmp(file->name, FILENAME_LINEUPDELAY) == 0) { + len = sprintf(page, "%01d\n", ch->lineup_delay); + } + + if (off >= len) { + *eof = 1; + return 0; + } + + *start = page + off; + if (count >= len - off) { + *eof = 1; + } + return min_t(int, count, len - off); +} + + +static int comx_root_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct proc_dir_entry *file = (struct proc_dir_entry *)data; + struct comx_hardware *hw; + struct comx_protocol *line; + + int len = 0; + + if (strcmp(file->name, FILENAME_HARDWARELIST) == 0) { + for(hw=comx_channels;hw;hw=hw->next) + len+=sprintf(page+len, "%s\n", hw->name); + } else if (strcmp(file->name, FILENAME_PROTOCOLLIST) == 0) { + for(line=comx_lines;line;line=line->next) + len+=sprintf(page+len, "%s\n", line->name); + } + + if (off >= len) { + *eof = 1; + return 0; + } + + *start = page + off; + if (count >= len - off) { + *eof = 1; + } + return min_t(int, count, len - off); +} + + + +static int comx_write_proc(struct file *file, const char *buffer, u_long count, + void *data) +{ + struct proc_dir_entry *entry = (struct proc_dir_entry *)data; + struct net_device *dev = (struct net_device *)entry->parent->data; + struct comx_channel *ch = dev->priv; + char *page; + struct comx_hardware *hw = comx_channels; + struct comx_protocol *line = comx_lines; + int ret=0; + + if (count > PAGE_SIZE) { + printk(KERN_ERR "count is %lu > %d!!!\n", count, (int)PAGE_SIZE); + return -ENOSPC; + } + + if (!(page = (char *)__get_free_page(GFP_KERNEL))) return -ENOMEM; + + if(copy_from_user(page, buffer, count)) + { + count = -EFAULT; + goto out; + } + + if (page[count-1] == '\n') + page[count-1] = '\0'; + else if (count < PAGE_SIZE) + page[count] = '\0'; + else if (page[count]) { + count = -EINVAL; + goto out; + } + + if (strcmp(entry->name, FILENAME_DEBUG) == 0) { + int i; + int ret = 0; + + if ((i = simple_strtoul(page, NULL, 10)) != 0) { + unsigned long flags; + + save_flags(flags); cli(); + if (ch->debug_area) kfree(ch->debug_area); + if ((ch->debug_area = kmalloc(ch->debug_size = i, + GFP_KERNEL)) == NULL) { + ret = -ENOMEM; + } + ch->debug_start = ch->debug_end = 0; + restore_flags(flags); + free_page((unsigned long)page); + return ret ? ret : count; + } + + if (*page != '+' && *page != '-') { + free_page((unsigned long)page); + return -EINVAL; + } + while (comx_debugflags[i].value && + strncmp(comx_debugflags[i].name, page + 1, + strlen(comx_debugflags[i].name))) { + i++; + } + + if (comx_debugflags[i].value == 0) { + printk(KERN_ERR "Invalid debug option\n"); + free_page((unsigned long)page); + return -EINVAL; + } + if (*page == '+') { + ch->debug_flags |= comx_debugflags[i].value; + } else { + ch->debug_flags &= ~comx_debugflags[i].value; + } + } else if (strcmp(entry->name, FILENAME_HARDWARE) == 0) { + if(strlen(page)>10) { + free_page((unsigned long)page); + return -EINVAL; + } + while (hw) { + if (strcmp(hw->name, page) == 0) { + break; + } else { + hw = hw->next; + } + } +#ifdef CONFIG_KMOD + if(!hw && comx_strcasecmp(HWNAME_NONE,page) != 0){ + request_module("comx-hw-%s",page); + } + hw=comx_channels; + while (hw) { + if (comx_strcasecmp(hw->name, page) == 0) { + break; + } else { + hw = hw->next; + } + } +#endif + + if (comx_strcasecmp(HWNAME_NONE, page) != 0 && !hw) { + free_page((unsigned long)page); + return -ENODEV; + } + if (ch->init_status & HW_OPEN) { + free_page((unsigned long)page); + return -EBUSY; + } + if (ch->hardware && ch->hardware->hw_exit && + (ret=ch->hardware->hw_exit(dev))) { + free_page((unsigned long)page); + return ret; + } + ch->hardware = hw; + entry->size = strlen(page) + 1; + if (hw && hw->hw_init) hw->hw_init(dev); + } else if (strcmp(entry->name, FILENAME_PROTOCOL) == 0) { + if(strlen(page)>10) { + free_page((unsigned long)page); + return -EINVAL; + } + while (line) { + if (comx_strcasecmp(line->name, page) == 0) { + break; + } else { + line = line->next; + } + } +#ifdef CONFIG_KMOD + if(!line && comx_strcasecmp(PROTONAME_NONE, page) != 0) { + request_module("comx-proto-%s",page); + } + line=comx_lines; + while (line) { + if (comx_strcasecmp(line->name, page) == 0) { + break; + } else { + line = line->next; + } + } +#endif + + if (comx_strcasecmp(PROTONAME_NONE, page) != 0 && !line) { + free_page((unsigned long)page); + return -ENODEV; + } + + if (ch->init_status & LINE_OPEN) { + free_page((unsigned long)page); + return -EBUSY; + } + + if (ch->protocol && ch->protocol->line_exit && + (ret=ch->protocol->line_exit(dev))) { + free_page((unsigned long)page); + return ret; + } + ch->protocol = line; + entry->size = strlen(page) + 1; + comx_reset_dev(dev); + if (line && line->line_init) line->line_init(dev); + } else if (strcmp(entry->name, FILENAME_LINEUPDELAY) == 0) { + int i; + + if ((i = simple_strtoul(page, NULL, 10)) != 0) { + if (i >=0 && i < 10) { + ch->lineup_delay = i; + } else { + printk(KERN_ERR "comx: invalid lineup_delay value\n"); + } + } + } +out: + free_page((unsigned long)page); + return count; +} + +static int comx_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct proc_dir_entry *new_dir, *debug_file; + struct net_device *dev; + struct comx_channel *ch; + int ret = -EIO; + + if ((dev = kmalloc(sizeof(struct net_device), GFP_KERNEL)) == NULL) { + return -ENOMEM; + } + memset(dev, 0, sizeof(struct net_device)); + + lock_kernel(); + if ((new_dir = create_proc_entry(dentry->d_name.name, mode | S_IFDIR, + comx_root_dir)) == NULL) { + goto cleanup_dev; + } + + new_dir->nlink = 2; + new_dir->data = NULL; // ide jon majd a struct dev + + /* Ezek kellenek */ + if (!create_comx_proc_entry(FILENAME_HARDWARE, 0644, + strlen(HWNAME_NONE) + 1, new_dir)) { + goto cleanup_new_dir; + } + if (!create_comx_proc_entry(FILENAME_PROTOCOL, 0644, + strlen(PROTONAME_NONE) + 1, new_dir)) { + goto cleanup_filename_hardware; + } + if (!create_comx_proc_entry(FILENAME_STATUS, 0444, 0, new_dir)) { + goto cleanup_filename_protocol; + } + if (!create_comx_proc_entry(FILENAME_LINEUPDELAY, 0644, 2, new_dir)) { + goto cleanup_filename_status; + } + + if ((debug_file = create_proc_entry(FILENAME_DEBUG, + S_IFREG | 0644, new_dir)) == NULL) { + goto cleanup_filename_lineupdelay; + } + debug_file->data = (void *)debug_file; + debug_file->read_proc = NULL; // see below + debug_file->write_proc = &comx_write_proc; + debug_file->nlink = 1; + + strcpy(dev->name, (char *)new_dir->name); + dev->init = comx_init_dev; + + if (register_netdevice(dev)) { + goto cleanup_filename_debug; + } + ch = dev->priv; + if((ch->if_ptr = (void *)kmalloc(sizeof(struct ppp_device), + GFP_KERNEL)) == NULL) { + goto cleanup_register; + } + memset(ch->if_ptr, 0, sizeof(struct ppp_device)); + ch->debug_file = debug_file; + ch->procdir = new_dir; + new_dir->data = dev; + + ch->debug_start = ch->debug_end = 0; + if ((ch->debug_area = kmalloc(ch->debug_size = DEFAULT_DEBUG_SIZE, + GFP_KERNEL)) == NULL) { + ret = -ENOMEM; + goto cleanup_if_ptr; + } + + ch->lineup_delay = DEFAULT_LINEUP_DELAY; + + MOD_INC_USE_COUNT; + unlock_kernel(); + return 0; +cleanup_if_ptr: + kfree(ch->if_ptr); +cleanup_register: + unregister_netdevice(dev); +cleanup_filename_debug: + remove_proc_entry(FILENAME_DEBUG, new_dir); +cleanup_filename_lineupdelay: + remove_proc_entry(FILENAME_LINEUPDELAY, new_dir); +cleanup_filename_status: + remove_proc_entry(FILENAME_STATUS, new_dir); +cleanup_filename_protocol: + remove_proc_entry(FILENAME_PROTOCOL, new_dir); +cleanup_filename_hardware: + remove_proc_entry(FILENAME_HARDWARE, new_dir); +cleanup_new_dir: + remove_proc_entry(dentry->d_name.name, comx_root_dir); +cleanup_dev: + kfree(dev); + unlock_kernel(); + return ret; +} + +static int comx_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct proc_dir_entry *entry = PDE(dentry->d_inode); + struct net_device *dev; + struct comx_channel *ch; + int ret; + + lock_kernel(); + dev = entry->data; + ch = dev->priv; + if (dev->flags & IFF_UP) { + printk(KERN_ERR "%s: down interface before removing it\n", dev->name); + unlock_kernel(); + return -EBUSY; + } + + if (ch->protocol && ch->protocol->line_exit && + (ret=ch->protocol->line_exit(dev))) { + unlock_kernel(); + return ret; + } + if (ch->hardware && ch->hardware->hw_exit && + (ret=ch->hardware->hw_exit(dev))) { + if(ch->protocol && ch->protocol->line_init) { + ch->protocol->line_init(dev); + } + unlock_kernel(); + return ret; + } + ch->protocol = NULL; + ch->hardware = NULL; + + del_timer(&ch->loadavg_timer); + kfree(ch->avg_bytes); + + unregister_netdev(dev); + if (ch->debug_area) { + kfree(ch->debug_area); + } + if (dev->priv) { + kfree(dev->priv); + } + free_netdev(dev); + + remove_proc_entry(FILENAME_DEBUG, entry); + remove_proc_entry(FILENAME_LINEUPDELAY, entry); + remove_proc_entry(FILENAME_STATUS, entry); + remove_proc_entry(FILENAME_HARDWARE, entry); + remove_proc_entry(FILENAME_PROTOCOL, entry); + remove_proc_entry(dentry->d_name.name, comx_root_dir); + + MOD_DEC_USE_COUNT; + unlock_kernel(); + return 0; +} + +static struct dentry *comx_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct proc_dir_entry *de; + struct inode *inode = NULL; + + lock_kernel(); + if ((de = PDE(dir)) != NULL) { + for (de = de->subdir ; de ; de = de->next) { + if ((de->namelen == dentry->d_name.len) && + (memcmp(dentry->d_name.name, de->name, + de->namelen) == 0)) { + if ((inode = proc_get_inode(dir->i_sb, + de->low_ino, de)) == NULL) { + printk(KERN_ERR "COMX: lookup error\n"); + unlock_kernel(); + return ERR_PTR(-EINVAL); + } + break; + } + } + } + unlock_kernel(); + dentry->d_op = &comx_dentry_operations; + d_add(dentry, inode); + return NULL; +} + +int comx_strcasecmp(const char *cs, const char *ct) +{ + register signed char __res; + + while (1) { + if ((__res = toupper(*cs) - toupper(*ct++)) != 0 || !*cs++) { + break; + } + } + return __res; +} + +static int comx_delete_dentry(struct dentry *dentry) +{ + return 1; +} + +static struct proc_dir_entry *create_comx_proc_entry(char *name, int mode, + int size, struct proc_dir_entry *dir) +{ + struct proc_dir_entry *new_file; + + if ((new_file = create_proc_entry(name, S_IFREG | mode, dir)) != NULL) { + new_file->data = (void *)new_file; + new_file->read_proc = &comx_read_proc; + new_file->write_proc = &comx_write_proc; + new_file->size = size; + new_file->nlink = 1; + } + return(new_file); +} + +int comx_register_hardware(struct comx_hardware *comx_hw) +{ + struct comx_hardware *hw = comx_channels; + + if (!hw) { + comx_channels = comx_hw; + } else { + while (hw->next != NULL && strcmp(comx_hw->name, hw->name) != 0) { + hw = hw->next; + } + if (strcmp(comx_hw->name, hw->name) == 0) { + return -1; + } + hw->next = comx_hw; + } + + printk(KERN_INFO "COMX: driver for hardware type %s, version %s\n", comx_hw->name, comx_hw->version); + return 0; +} + +int comx_unregister_hardware(char *name) +{ + struct comx_hardware *hw = comx_channels; + + if (!hw) { + return -1; + } + + if (strcmp(hw->name, name) == 0) { + comx_channels = comx_channels->next; + return 0; + } + + while (hw->next != NULL && strcmp(hw->next->name,name) != 0) { + hw = hw->next; + } + + if (hw->next != NULL && strcmp(hw->next->name, name) == 0) { + hw->next = hw->next->next; + return 0; + } + return -1; +} + +int comx_register_protocol(struct comx_protocol *comx_line) +{ + struct comx_protocol *pr = comx_lines; + + if (!pr) { + comx_lines = comx_line; + } else { + while (pr->next != NULL && strcmp(comx_line->name, pr->name) !=0) { + pr = pr->next; + } + if (strcmp(comx_line->name, pr->name) == 0) { + return -1; + } + pr->next = comx_line; + } + + printk(KERN_INFO "COMX: driver for protocol type %s, version %s\n", comx_line->name, comx_line->version); + return 0; +} + +int comx_unregister_protocol(char *name) +{ + struct comx_protocol *pr = comx_lines; + + if (!pr) { + return -1; + } + + if (strcmp(pr->name, name) == 0) { + comx_lines = comx_lines->next; + return 0; + } + + while (pr->next != NULL && strcmp(pr->next->name,name) != 0) { + pr = pr->next; + } + + if (pr->next != NULL && strcmp(pr->next->name, name) == 0) { + pr->next = pr->next->next; + return 0; + } + return -1; +} + +static int __init comx_init(void) +{ + struct proc_dir_entry *new_file; + + comx_root_dir = create_proc_entry("comx", + S_IFDIR | S_IWUSR | S_IRUGO | S_IXUGO, &proc_root); + if (!comx_root_dir) + return -ENOMEM; + comx_root_dir->proc_iops = &comx_root_inode_ops; + + if ((new_file = create_proc_entry(FILENAME_HARDWARELIST, + S_IFREG | 0444, comx_root_dir)) == NULL) { + return -ENOMEM; + } + + new_file->data = new_file; + new_file->read_proc = &comx_root_read_proc; + new_file->write_proc = NULL; + new_file->nlink = 1; + + if ((new_file = create_proc_entry(FILENAME_PROTOCOLLIST, + S_IFREG | 0444, comx_root_dir)) == NULL) { + return -ENOMEM; + } + + new_file->data = new_file; + new_file->read_proc = &comx_root_read_proc; + new_file->write_proc = NULL; + new_file->nlink = 1; + + + printk(KERN_INFO "COMX: driver version %s (C) 1995-1999 ITConsult-Pro Co. \n", + VERSION); + return 0; +} + +static void __exit comx_exit(void) +{ + remove_proc_entry(FILENAME_HARDWARELIST, comx_root_dir); + remove_proc_entry(FILENAME_PROTOCOLLIST, comx_root_dir); + remove_proc_entry(comx_root_dir->name, &proc_root); +} + +module_init(comx_init); +module_exit(comx_exit); + +EXPORT_SYMBOL(comx_register_hardware); +EXPORT_SYMBOL(comx_unregister_hardware); +EXPORT_SYMBOL(comx_register_protocol); +EXPORT_SYMBOL(comx_unregister_protocol); +EXPORT_SYMBOL(comx_debug_skb); +EXPORT_SYMBOL(comx_debug_bytes); +EXPORT_SYMBOL(comx_debug); +EXPORT_SYMBOL(comx_lineup_func); +EXPORT_SYMBOL(comx_status); +EXPORT_SYMBOL(comx_rx); +EXPORT_SYMBOL(comx_strcasecmp); +EXPORT_SYMBOL(comx_root_dir); diff --git a/drivers/net/wan/comx.h b/drivers/net/wan/comx.h new file mode 100644 index 000000000..0f7404f21 --- /dev/null +++ b/drivers/net/wan/comx.h @@ -0,0 +1,232 @@ +/* + * General definitions for the COMX driver + * + * Original authors: Arpad Bakay , + * Peter Bajan , + * Previous maintainer: Tivadar Szemethy + * Currently maintained by: Gergely Madarasz + * + * Copyright (C) 1995-1999 ITConsult-Pro Co. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * + * net_device_stats: + * rx_length_errors rec_len < 4 || rec_len > 2000 + * rx_over_errors receive overrun (OVR) + * rx_crc_errors rx crc error + * rx_frame_errors aborts rec'd (ABO) + * rx_fifo_errors status fifo overrun (PBUFOVR) + * rx_missed_errors receive buffer overrun (BUFOVR) + * tx_aborted_errors ? + * tx_carrier_errors modem line status changes + * tx_fifo_errors tx underrun (locomx) + */ +#include + +struct comx_protocol { + char *name; + char *version; + unsigned short encap_type; + int (*line_init)(struct net_device *dev); + int (*line_exit)(struct net_device *dev); + struct comx_protocol *next; + }; + +struct comx_hardware { + char *name; + char *version; + int (*hw_init)(struct net_device *dev); + int (*hw_exit)(struct net_device *dev); + int (*hw_dump)(struct net_device *dev); + struct comx_hardware *next; + }; + +struct comx_channel { + void *if_ptr; // General purpose pointer + struct net_device *dev; // Where we belong to + struct net_device *twin; // On dual-port cards + struct proc_dir_entry *procdir; // the directory + + unsigned char init_status; + unsigned char line_status; + + struct timer_list lineup_timer; // against line jitter + long int lineup_pending; + unsigned char lineup_delay; + +#if 0 + struct timer_list reset_timer; // for board resetting + long reset_pending; + int reset_timeout; +#endif + + struct net_device_stats stats; + struct net_device_stats *current_stats; +#if 0 + unsigned long board_resets; +#endif + unsigned long *avg_bytes; + int loadavg_counter, loadavg_size; + int loadavg[3]; + struct timer_list loadavg_timer; + int debug_flags; + char *debug_area; + int debug_start, debug_end, debug_size; + struct proc_dir_entry *debug_file; +#ifdef CONFIG_COMX_DEBUG_RAW + char *raw; + int raw_len; +#endif + // LINE specific + struct comx_protocol *protocol; + void (*LINE_rx)(struct net_device *dev, struct sk_buff *skb); + int (*LINE_tx)(struct net_device *dev); + void (*LINE_status)(struct net_device *dev, u_short status); + int (*LINE_open)(struct net_device *dev); + int (*LINE_close)(struct net_device *dev); + int (*LINE_xmit)(struct sk_buff *skb, struct net_device *dev); + int (*LINE_header)(struct sk_buff *skb, struct net_device *dev, + u_short type,void *daddr, void *saddr, + unsigned len); + int (*LINE_rebuild_header)(struct sk_buff *skb); + int (*LINE_statistics)(struct net_device *dev, char *page); + int (*LINE_parameter_check)(struct net_device *dev); + int (*LINE_ioctl)(struct net_device *dev, struct ifreq *ifr, + int cmd); + void (*LINE_mod_use)(int); + void * LINE_privdata; + + // HW specific + + struct comx_hardware *hardware; + void (*HW_board_on)(struct net_device *dev); + void (*HW_board_off)(struct net_device *dev); + struct net_device *(*HW_access_board)(struct net_device *dev); + void (*HW_release_board)(struct net_device *dev, struct net_device *savep); + int (*HW_txe)(struct net_device *dev); + int (*HW_open)(struct net_device *dev); + int (*HW_close)(struct net_device *dev); + int (*HW_send_packet)(struct net_device *dev,struct sk_buff *skb); + int (*HW_statistics)(struct net_device *dev, char *page); +#if 0 + int (*HW_reset)(struct net_device *dev, char *page); +#endif + int (*HW_load_board)(struct net_device *dev); + void (*HW_set_clock)(struct net_device *dev); + void *HW_privdata; + }; + +struct comx_debugflags_struct { + char *name; + int value; + }; + +#define COMX_ROOT_DIR_NAME "comx" + +#define FILENAME_HARDWARE "boardtype" +#define FILENAME_HARDWARELIST "boardtypes" +#define FILENAME_PROTOCOL "protocol" +#define FILENAME_PROTOCOLLIST "protocols" +#define FILENAME_DEBUG "debug" +#define FILENAME_CLOCK "clock" +#define FILENAME_STATUS "status" +#define FILENAME_IO "io" +#define FILENAME_IRQ "irq" +#define FILENAME_KEEPALIVE "keepalive" +#define FILENAME_LINEUPDELAY "lineup_delay" +#define FILENAME_CHANNEL "channel" +#define FILENAME_FIRMWARE "firmware" +#define FILENAME_MEMADDR "memaddr" +#define FILENAME_TWIN "twin" +#define FILENAME_T1 "t1" +#define FILENAME_T2 "t2" +#define FILENAME_N2 "n2" +#define FILENAME_WINDOW "window" +#define FILENAME_MODE "mode" +#define FILENAME_DLCI "dlci" +#define FILENAME_MASTER "master" +#ifdef CONFIG_COMX_DEBUG_RAW +#define FILENAME_RAW "raw" +#endif + +#define PROTONAME_NONE "none" +#define HWNAME_NONE "none" +#define KEEPALIVE_OFF "off" + +#define FRAME_ACCEPTED 0 /* sending and xmitter busy */ +#define FRAME_DROPPED 1 +#define FRAME_ERROR 2 /* xmitter error */ +#define FRAME_QUEUED 3 /* sending but more can come */ + +#define LINE_UP 1 /* Modem UP */ +#define PROTO_UP 2 +#define PROTO_LOOP 4 + +#define HW_OPEN 1 +#define LINE_OPEN 2 +#define FW_LOADED 4 +#define IRQ_ALLOCATED 8 + +#define DEBUG_COMX_RX 2 +#define DEBUG_COMX_TX 4 +#define DEBUG_HW_TX 16 +#define DEBUG_HW_RX 32 +#define DEBUG_HDLC_KEEPALIVE 64 +#define DEBUG_COMX_PPP 128 +#define DEBUG_COMX_LAPB 256 +#define DEBUG_COMX_DLCI 512 + +#define DEBUG_PAGESIZE 3072 +#define DEFAULT_DEBUG_SIZE 4096 +#define DEFAULT_LINEUP_DELAY 1 +#define FILE_PAGESIZE 3072 + +#ifndef COMX_PPP_MAJOR +#define COMX_PPP_MAJOR 88 +#endif + + +#define COMX_CHANNEL(dev) ((struct comx_channel*)dev->priv) + +#define TWIN(dev) (COMX_CHANNEL(dev)->twin) + + +#ifndef byte +typedef u8 byte; +#endif +#ifndef word +typedef u16 word; +#endif + +#ifndef SEEK_SET +#define SEEK_SET 0 +#endif +#ifndef SEEK_CUR +#define SEEK_CUR 1 +#endif +#ifndef SEEK_END +#define SEEK_END 2 +#endif + +extern struct proc_dir_entry * comx_root_dir; + +extern int comx_register_hardware(struct comx_hardware *comx_hw); +extern int comx_unregister_hardware(char *name); +extern int comx_register_protocol(struct comx_protocol *comx_line); +extern int comx_unregister_protocol(char *name); + +extern int comx_rx(struct net_device *dev, struct sk_buff *skb); +extern void comx_status(struct net_device *dev, int status); +extern void comx_lineup_func(unsigned long d); + +extern int comx_debug(struct net_device *dev, char *fmt, ...); +extern int comx_debug_skb(struct net_device *dev, struct sk_buff *skb, char *msg); +extern int comx_debug_bytes(struct net_device *dev, unsigned char *bytes, int len, + char *msg); +extern int comx_strcasecmp(const char *cs, const char *ct); + +extern struct inode_operations comx_normal_inode_ops; diff --git a/drivers/net/wan/comxhw.h b/drivers/net/wan/comxhw.h new file mode 100644 index 000000000..15230dc1f --- /dev/null +++ b/drivers/net/wan/comxhw.h @@ -0,0 +1,113 @@ +/* + * Defines for comxhw.c + * + * Original authors: Arpad Bakay , + * Peter Bajan , + * Previous maintainer: Tivadar Szemethy + * Current maintainer: Gergely Madarasz + * + * Copyright (C) 1995-1999 ITConsult-Pro Co. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define LOCOMX_IO_EXTENT 8 +#define COMX_IO_EXTENT 4 +#define HICOMX_IO_EXTENT 16 + +#define COMX_MAX_TX_SIZE 1600 +#define COMX_MAX_RX_SIZE 2048 + +#define COMX_JAIL_OFFSET 0xffff +#define COMX_JAIL_VALUE 0xfe +#define COMX_MEMORY_SIZE 65536 +#define HICOMX_MEMORY_SIZE 16384 +#define COMX_MEM_MIN 0xa0000 +#define COMX_MEM_MAX 0xf0000 + +#define COMX_DEFAULT_IO 0x360 +#define COMX_DEFAULT_IRQ 10 +#define COMX_DEFAULT_MEMADDR 0xd0000 +#define HICOMX_DEFAULT_IO 0x320 +#define HICOMX_DEFAULT_IRQ 10 +#define HICOMX_DEFAULT_MEMADDR 0xd0000 +#define LOCOMX_DEFAULT_IO 0x368 +#define LOCOMX_DEFAULT_IRQ 7 + +#define MAX_CHANNELNO 2 + +#define COMX_CHANNEL_OFFSET 0x2000 + +#define COMX_ENABLE_BOARD_IT 0x40 +#define COMX_BOARD_RESET 0x20 +#define COMX_ENABLE_BOARD_MEM 0x10 +#define COMX_DISABLE_BOARD_MEM 0 +#define COMX_DISABLE_ALL 0x00 + +#define HICOMX_DISABLE_ALL 0x00 +#define HICOMX_ENABLE_BOARD_MEM 0x02 +#define HICOMX_DISABLE_BOARD_MEM 0x0 +#define HICOMX_BOARD_RESET 0x01 +#define HICOMX_PRG_MEM 4 +#define HICOMX_DATA_MEM 0 +#define HICOMX_ID_BYTE 0x55 + +#define CMX_ID_BYTE 0x31 +#define COMX_CLOCK_CONST 8000 + +#define LINKUP_READY 3 + +#define OFF_FW_L1_ID 0x01e /* ID bytes */ +#define OFF_FW_L2_ID 0x1006 +#define FW_L1_ID_1 0xab +#define FW_L1_ID_2_COMX 0xc0 +#define FW_L1_ID_2_HICOMX 0xc1 +#define FW_L2_ID_1 0xab + +#define OFF_A_L2_CMD 0x130 /* command register for L2 */ +#define OFF_A_L2_CMDPAR 0x131 /* command parameter byte */ +#define OFF_A_L1_STATB 0x122 /* stat. block for L1 */ +#define OFF_A_L1_ABOREC 0x122 /* receive ABORT counter */ +#define OFF_A_L1_OVERRUN 0x123 /* receive overrun counter */ +#define OFF_A_L1_CRCREC 0x124 /* CRC error counter */ +#define OFF_A_L1_BUFFOVR 0x125 /* buffer overrun counter */ +#define OFF_A_L1_PBUFOVR 0x126 /* priority buffer overrun counter */ +#define OFF_A_L1_MODSTAT 0x127 /* current state of modem ctrl lines */ +#define OFF_A_L1_STATE 0x127 /* end of stat. block for L1 */ +#define OFF_A_L1_TXPC 0x128 /* Tx counter for the PC */ +#define OFF_A_L1_TXZ80 0x129 /* Tx counter for the Z80 */ +#define OFF_A_L1_RXPC 0x12a /* Rx counter for the PC */ +#define OFF_A_L1_RXZ80 0x12b /* Rx counter for the Z80 */ +#define OFF_A_L1_REPENA 0x12c /* IT rep disable */ +#define OFF_A_L1_CHNR 0x12d /* L1 channel logical number */ +#define OFF_A_L1_CLKINI 0x12e /* Timer Const */ +#define OFF_A_L2_LINKUP 0x132 /* Linkup byte */ +#define OFF_A_L2_DAV 0x134 /* Rx DAV */ +#define OFF_A_L2_RxBUFP 0x136 /* Rx buff relative to membase */ +#define OFF_A_L2_TxEMPTY 0x138 /* Tx Empty */ +#define OFF_A_L2_TxBUFP 0x13a /* Tx Buf */ +#define OFF_A_L2_NBUFFS 0x144 /* Number of buffers to fetch */ + +#define OFF_A_L2_SABMREC 0x164 /* LAPB no. of SABMs received */ +#define OFF_A_L2_SABMSENT 0x165 /* LAPB no. of SABMs sent */ +#define OFF_A_L2_REJREC 0x166 /* LAPB no. of REJs received */ +#define OFF_A_L2_REJSENT 0x167 /* LAPB no. of REJs sent */ +#define OFF_A_L2_FRMRREC 0x168 /* LAPB no. of FRMRs received */ +#define OFF_A_L2_FRMRSENT 0x169 /* LAPB no. of FRMRs sent */ +#define OFF_A_L2_PROTERR 0x16A /* LAPB no. of protocol errors rec'd */ +#define OFF_A_L2_LONGREC 0x16B /* LAPB no. of long frames */ +#define OFF_A_L2_INVNR 0x16C /* LAPB no. of invalid N(R)s rec'd */ +#define OFF_A_L2_UNDEFFR 0x16D /* LAPB no. of invalid frames */ + +#define OFF_A_L2_T1 0x174 /* T1 timer */ +#define OFF_A_L2_ADDR 0x176 /* DCE = 1, DTE = 3 */ + +#define COMX_CMD_INIT 1 +#define COMX_CMD_EXIT 2 +#define COMX_CMD_OPEN 16 +#define COMX_CMD_CLOSE 17 + diff --git a/drivers/net/wan/falc-lh.h b/drivers/net/wan/falc-lh.h new file mode 100644 index 000000000..e30726c82 --- /dev/null +++ b/drivers/net/wan/falc-lh.h @@ -0,0 +1,102 @@ +/* + * Defines for comx-hw-slicecom.c - FALC-LH specific + * + * Author: Bartok Istvan + * Last modified: Mon Feb 7 20:00:38 CET 2000 + * + * :set tabstop=6 + */ + +/* + * Control register offsets on the LBI (page 90) + * use it like: + * lbi[ MODE ] = 0x34; + */ + +#define MODE 0x03 +#define IPC 0x08 +#define IMR0 0x14 /* Interrupt Mask Register 0 */ +#define IMR1 0x15 +#define IMR2 0x16 +#define IMR3 0x17 +#define IMR4 0x18 +#define IMR5 0x19 +#define FMR0 0x1a /* Framer Mode Register 0 */ +#define FMR1 0x1b +#define FMR2 0x1c +#define XSW 0x1e +#define XSP 0x1f +#define XC0 0x20 +#define XC1 0x21 +#define RC0 0x22 +#define RC1 0x23 +#define XPM0 0x24 +#define XPM1 0x25 +#define XPM2 0x26 +#define TSWM 0x27 +#define IDLE 0x29 /* Idle Code */ +#define LIM0 0x34 +#define LIM1 0x35 +#define PCD 0x36 +#define PCR 0x37 +#define LIM2 0x38 + +/* + * Status registers on the LBI (page 134) + * these are read-only, use it like: + * if( lbi[ FRS0 ] ) ... + */ + +#define FRS0 0x4c /* Framer Receive Status register 0 */ +#define FRS1 0x4d /* Framer Receive Status register 1 */ +#define FECL 0x50 /* Framing Error Counter low byte */ /* Counts FAS word receive errors */ +#define FECH 0x51 /* high byte */ +#define CVCL 0x52 /* Code Violation Counter low byte */ /* Counts bipolar and HDB3 code violations */ +#define CVCH 0x53 /* high byte */ +#define CEC1L 0x54 /* CRC4 Error Counter 1 low byte */ /* Counts CRC4 errors in the incoming stream */ +#define CEC1H 0x55 /* high byte */ +#define EBCL 0x56 /* E Bit error Counter low byte */ /* E-bits: the remote end sends them, when */ +#define EBCH 0x57 /* high byte */ /* it detected a CRC4-error */ +#define ISR0 0x68 /* Interrupt Status Register 0 */ +#define ISR1 0x69 /* Interrupt Status Register 1 */ +#define ISR2 0x6a /* Interrupt Status Register 2 */ +#define ISR3 0x6b /* Interrupt Status Register 3 */ +#define ISR5 0x6c /* Interrupt Status Register 5 */ +#define GIS 0x6e /* Global Interrupt Status Register */ +#define VSTR 0x6f /* version information */ + +/* + * Bit fields + */ + +#define FRS0_LOS (1 << 7) +#define FRS0_AIS (1 << 6) +#define FRS0_LFA (1 << 5) +#define FRS0_RRA (1 << 4) +#define FRS0_AUXP (1 << 3) +#define FRS0_NMF (1 << 2) +#define FRS0_LMFA (1 << 1) + +#define FRS1_XLS (1 << 1) +#define FRS1_XLO (1) + +#define ISR2_FAR (1 << 7) +#define ISR2_LFA (1 << 6) +#define ISR2_MFAR (1 << 5) +#define ISR2_T400MS (1 << 4) +#define ISR2_AIS (1 << 3) +#define ISR2_LOS (1 << 2) +#define ISR2_RAR (1 << 1) +#define ISR2_RA (1) + +#define ISR3_ES (1 << 7) +#define ISR3_SEC (1 << 6) +#define ISR3_LMFA16 (1 << 5) +#define ISR3_AIS16 (1 << 4) +#define ISR3_RA16 (1 << 3) +#define ISR3_API (1 << 2) +#define ISR3_RSN (1 << 1) +#define ISR3_RSP (1) + +#define ISR5_XSP (1 << 7) +#define ISR5_XSN (1 << 6) diff --git a/drivers/net/wan/hscx.h b/drivers/net/wan/hscx.h new file mode 100644 index 000000000..675b7b1f1 --- /dev/null +++ b/drivers/net/wan/hscx.h @@ -0,0 +1,103 @@ +#define HSCX_MTU 1600 + +#define HSCX_ISTA 0x00 +#define HSCX_MASK 0x00 +#define HSCX_STAR 0x01 +#define HSCX_CMDR 0x01 +#define HSCX_MODE 0x02 +#define HSCX_TIMR 0x03 +#define HSCX_EXIR 0x04 +#define HSCX_XAD1 0x04 +#define HSCX_RBCL 0x05 +#define HSCX_SAD2 0x05 +#define HSCX_RAH1 0x06 +#define HSCX_RSTA 0x07 +#define HSCX_RAH2 0x07 +#define HSCX_RAL1 0x08 +#define HSCX_RCHR 0x09 +#define HSCX_RAL2 0x09 +#define HSCX_XBCL 0x0a +#define HSCX_BGR 0x0b +#define HSCX_CCR2 0x0c +#define HSCX_RBCH 0x0d +#define HSCX_XBCH 0x0d +#define HSCX_VSTR 0x0e +#define HSCX_RLCR 0x0e +#define HSCX_CCR1 0x0f +#define HSCX_FIFO 0x1e + +#define HSCX_HSCX_CHOFFS 0x400 +#define HSCX_SEROFFS 0x1000 + +#define HSCX_RME 0x80 +#define HSCX_RPF 0x40 +#define HSCX_RSC 0x20 +#define HSCX_XPR 0x10 +#define HSCX_TIN 0x08 +#define HSCX_ICA 0x04 +#define HSCX_EXA 0x02 +#define HSCX_EXB 0x01 + +#define HSCX_XMR 0x80 +#define HSCX_XDU 0x40 +#define HSCX_EXE 0x40 +#define HSCX_PCE 0x20 +#define HSCX_RFO 0x10 +#define HSCX_CSC 0x08 +#define HSCX_RFS 0x04 + +#define HSCX_XDOV 0x80 +#define HSCX_XFW 0x40 +#define HSCX_XRNR 0x20 +#define HSCX_RRNR 0x10 +#define HSCX_RLI 0x08 +#define HSCX_CEC 0x04 +#define HSCX_CTS 0x02 +#define HSCX_WFA 0x01 + +#define HSCX_RMC 0x80 +#define HSCX_RHR 0x40 +#define HSCX_RNR 0x20 +#define HSCX_XREP 0x20 +#define HSCX_STI 0x10 +#define HSCX_XTF 0x08 +#define HSCX_XIF 0x04 +#define HSCX_XME 0x02 +#define HSCX_XRES 0x01 + +#define HSCX_AUTO 0x00 +#define HSCX_NONAUTO 0x40 +#define HSCX_TRANS 0x80 +#define HSCX_XTRANS 0xc0 +#define HSCX_ADM16 0x20 +#define HSCX_ADM8 0x00 +#define HSCX_TMD_EXT 0x00 +#define HSCX_TMD_INT 0x10 +#define HSCX_RAC 0x08 +#define HSCX_RTS 0x04 +#define HSCX_TLP 0x01 + +#define HSCX_VFR 0x80 +#define HSCX_RDO 0x40 +#define HSCX_CRC 0x20 +#define HSCX_RAB 0x10 + +#define HSCX_CIE 0x04 +#define HSCX_RIE 0x02 + +#define HSCX_DMA 0x80 +#define HSCX_NRM 0x40 +#define HSCX_CAS 0x20 +#define HSCX_XC 0x10 + +#define HSCX_OV 0x10 + +#define HSCX_CD 0x80 + +#define HSCX_RC 0x80 + +#define HSCX_PU 0x80 +#define HSCX_NRZ 0x00 +#define HSCX_NRZI 0x40 +#define HSCX_ODS 0x10 +#define HSCX_ITF 0x08 diff --git a/drivers/net/wan/mixcom.h b/drivers/net/wan/mixcom.h new file mode 100644 index 000000000..1815eef75 --- /dev/null +++ b/drivers/net/wan/mixcom.h @@ -0,0 +1,35 @@ +/* + * Defines for the mixcom board + * + * Author: Gergely Madarasz + * + * Copyright (C) 1999 ITConsult-Pro Co. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define MIXCOM_IO_EXTENT 0x20 + +#define MIXCOM_DEFAULT_IO 0x180 +#define MIXCOM_DEFAULT_IRQ 5 + +#define MIXCOM_ID 0x11 +#define MIXCOM_SERIAL_OFFSET 0x1000 +#define MIXCOM_CHANNEL_OFFSET 0x400 +#define MIXCOM_IT_OFFSET 0xc14 +#define MIXCOM_STATUS_OFFSET 0xc14 +#define MIXCOM_ID_OFFSET 0xc10 +#define MIXCOM_ON 0x1 +#define MIXCOM_OFF 0x0 + +/* Status register bits */ + +#define MIXCOM_CTSB 0x1 +#define MIXCOM_CTSA 0x2 +#define MIXCOM_CHANNELNO 0x20 +#define MIXCOM_POWERFAIL 0x40 +#define MIXCOM_BOOT 0x80 diff --git a/drivers/net/wan/munich32x.h b/drivers/net/wan/munich32x.h new file mode 100644 index 000000000..8f151f2ed --- /dev/null +++ b/drivers/net/wan/munich32x.h @@ -0,0 +1,191 @@ +/* + * Defines for comx-hw-slicecom.c - MUNICH32X specific + * + * Author: Bartok Istvan + * Last modified: Tue Jan 11 14:27:36 CET 2000 + * + * :set tabstop=6 + */ + +#define TXBUFFER_SIZE 1536 /* Max mennyit tud a kartya hardver atvenni */ +#define RXBUFFER_SIZE (TXBUFFER_SIZE+4) /* For Rx reasons it must be a multiple of 4, and =>4 (page 265) */ + /* +4 .. see page 265, bit FE */ + /* TOD: a MODE1-be nem is ezt teszem, hanem a TXBUFFER-t, lehet hogy nem is kell? */ + +//#define PCI_VENDOR_ID_SIEMENS 0x110a +#define PCI_DEVICE_ID_SIEMENS_MUNICH32X 0x2101 + +/* + * PCI config space registers (page 120) + */ + +#define MUNICH_PCI_PCIRES 0x4c /* 0xe0000 resets the chip */ + + +/* + * MUNICH slave register offsets relative to base_address[0] (PCI BAR1) (page 181): + * offsets are in bytes, registers are u32's, so we need a >>2 for indexing + * the int[] by byte offsets. Use it like: + * + * bar1[ STAT ] = ~0L; or + * x = bar1[ STAT ]; + */ + +#define CONF (0x00 >> 2) +#define CMD (0x04 >> 2) +#define STAT (0x08 >> 2) +#define STACK (0x08 >> 2) +#define IMASK (0x0c >> 2) +#define PIQBA (0x14 >> 2) +#define PIQL (0x18 >> 2) +#define MODE1 (0x20 >> 2) +#define MODE2 (0x24 >> 2) +#define CCBA (0x28 >> 2) +#define TXPOLL (0x2c >> 2) +#define TIQBA (0x30 >> 2) +#define TIQL (0x34 >> 2) +#define RIQBA (0x38 >> 2) +#define RIQL (0x3c >> 2) +#define LCONF (0x40 >> 2) /* LBI Configuration Register */ +#define LCCBA (0x44 >> 2) /* LBI Configuration Control Block */ /* DE: lehet hogy nem is kell? */ +#define LTIQBA (0x50 >> 2) /* DE: lehet hogy nem is kell? page 210: LBI DMA Controller intq - nem hasznalunk DMA-t.. */ +#define LTIQL (0x54 >> 2) /* DE: lehet hogy nem is kell? */ +#define LRIQBA (0x58 >> 2) /* DE: lehet hogy nem is kell? */ +#define LRIQL (0x5c >> 2) /* DE: lehet hogy nem is kell? */ +#define LREG0 (0x60 >> 2) /* LBI Indirect External Configuration register 0 */ +#define LREG1 (0x64 >> 2) +#define LREG2 (0x68 >> 2) +#define LREG3 (0x6c >> 2) +#define LREG4 (0x70 >> 2) +#define LREG5 (0x74 >> 2) +#define LREG6 (0x78 >> 2) /* LBI Indirect External Configuration register 6 */ +#define LSTAT (0x7c >> 2) /* LBI Status Register */ +#define GPDIR (0x80 >> 2) /* General Purpose Bus DIRection - 0..input, 1..output */ +#define GPDATA (0x84 >> 2) /* General Purpose Bus DATA */ + + +/* + * MUNICH commands: (they go into register CMD) + */ + +#define CMD_ARPCM 0x01 /* Action Request Serial PCM Core */ +#define CMD_ARLBI 0x02 /* Action Request LBI */ + + +/* + * MUNICH event bits in the STAT, STACK, IMASK registers (page 188,189) + */ + +#define STAT_PTI (1 << 15) +#define STAT_PRI (1 << 14) +#define STAT_LTI (1 << 13) +#define STAT_LRI (1 << 12) +#define STAT_IOMI (1 << 11) +#define STAT_SSCI (1 << 10) +#define STAT_LBII (1 << 9) +#define STAT_MBI (1 << 8) + +#define STAT_TI (1 << 6) +#define STAT_TSPA (1 << 5) +#define STAT_RSPA (1 << 4) +#define STAT_LBIF (1 << 3) +#define STAT_LBIA (1 << 2) +#define STAT_PCMF (1 << 1) +#define STAT_PCMA (1) + +/* + * We do not handle these (and do not touch their STAT bits) in the interrupt loop + */ + +#define STAT_NOT_HANDLED_BY_INTERRUPT (STAT_PCMF | STAT_PCMA) + + +/* + * MUNICH MODE1/MODE2 slave register fields (page 193,196) + * these are not all masks, MODE1_XX_YY are my magic values! + */ + +#define MODE1_PCM_E1 (1 << 31) /* E1, 2.048 Mbit/sec */ +#define MODE1_TBS_4 (1 << 24) /* TBS = 4 .. no Tx bit shift */ +#define MODE1_RBS_4 (1 << 18) /* RBS = 4 .. no Rx bit shift */ +#define MODE1_REN (1 << 15) /* Rx Enable */ +#define MODE1_MFL_MY TXBUFFER_SIZE /* Maximum Frame Length */ +#define MODE1_MAGIC (MODE1_PCM_E1 | MODE1_TBS_4 | MODE1_RBS_4 | MODE1_REN | MODE1_MFL_MY) + +#define MODE2_HPOLL (1 << 8) /* Hold Poll */ +#define MODE2_SPOLL (1 << 7) /* Slow Poll */ +#define MODE2_TSF (1) /* real magic - discovered by probing :) */ +// #define MODE2_MAGIC (MODE2_TSF) +#define MODE2_MAGIC (MODE2_SPOLL | MODE2_TSF) + + +/* + * LCONF bits (page 205) + * these are not all masks, LCONF_XX_YY are my magic values! + */ + +#define LCONF_IPA (1 << 31) /* Interrupt Pass. Use 1 for FALC54 */ +#define LCONF_DCA (1 << 30) /* Disregard the int's for Channel A - DMSM does not try to handle them */ +#define LCONF_DCB (1 << 29) /* Disregard the int's for Channel B */ +#define LCONF_EBCRES (1 << 22) /* Reset LBI External Bus Controller, 0..reset, 1..normal operation */ +#define LCONF_LBIRES (1 << 21) /* Reset LBI DMSM, 0..reset, 1..normal operation */ +#define LCONF_BTYP_16DEMUX (1 << 7) /* 16-bit demultiplexed bus */ +#define LCONF_ABM (1 << 4) /* Arbitration Master */ + +/* writing LCONF_MAGIC1 followed by a LCONF_MAGIC2 into LCONF resets the EBC and DMSM: */ + +#define LCONF_MAGIC1 (LCONF_BTYP_16DEMUX | LCONF_ABM | LCONF_IPA | LCONF_DCA | LCONF_DCB) +#define LCONF_MAGIC2 (LCONF_MAGIC1 | LCONF_EBCRES | LCONF_LBIRES) + + +/* + * LREGx magic values if a FALC54 is on the LBI (page 217) + */ + +#define LREG0_MAGIC 0x00000264 +#define LREG1_MAGIC 0x6e6a6b66 +#define LREG2_MAGIC 0x00000264 +#define LREG3_MAGIC 0x6e686966 +#define LREG4_MAGIC 0x00000000 +#define LREG5_MAGIC ( (7<<27) | (3<<24) | (1<<21) | (7<<3) | (2<<9) ) + + +/* + * PCM Action Specification fields (munich_ccb_t.action_spec) + */ + +#define CCB_ACTIONSPEC_IN (1 << 15) /* init */ +#define CCB_ACTIONSPEC_ICO (1 << 14) /* init only this channel */ +#define CCB_ACTIONSPEC_RES (1 << 6) /* reset all channels */ +#define CCB_ACTIONSPEC_LOC (1 << 5) +#define CCB_ACTIONSPEC_LOOP (1 << 4) +#define CCB_ACTIONSPEC_LOOPI (1 << 3) +#define CCB_ACTIONSPEC_IA (1 << 2) + + +/* + * Interrupt Information bits in the TIQ, RIQ + */ + +#define PCM_INT_HI (1 << 12) +#define PCM_INT_FI (1 << 11) +#define PCM_INT_IFC (1 << 10) +#define PCM_INT_SF (1 << 9) +#define PCM_INT_ERR (1 << 8) +#define PCM_INT_FO (1 << 7) +#define PCM_INT_FE2 (1 << 6) + +#define PCM_INT_CHANNEL( info ) (info & 0x1F) + + +/* + * Rx status info in the rx_desc_t.status + */ + +#define RX_STATUS_SF (1 << 6) +#define RX_STATUS_LOSS (1 << 5) +#define RX_STATUS_CRCO (1 << 4) +#define RX_STATUS_NOB (1 << 3) +#define RX_STATUS_LFD (1 << 2) +#define RX_STATUS_RA (1 << 1) +#define RX_STATUS_ROF 1 diff --git a/drivers/pcmcia/sa1100.h b/drivers/pcmcia/sa1100.h new file mode 100644 index 000000000..d2defe598 --- /dev/null +++ b/drivers/pcmcia/sa1100.h @@ -0,0 +1,164 @@ +/*====================================================================== + + Device driver for the PCMCIA control functionality of StrongARM + SA-1100 microprocessors. + + The contents of this file are subject to the Mozilla Public + License Version 1.1 (the "License"); you may not use this file + except in compliance with the License. You may obtain a copy of + the License at http://www.mozilla.org/MPL/ + + Software distributed under the License is distributed on an "AS + IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + implied. See the License for the specific language governing + rights and limitations under the License. + + The initial developer of the original code is John G. Dorsey + . Portions created by John G. Dorsey are + Copyright (C) 1999 John G. Dorsey. All Rights Reserved. + + Alternatively, the contents of this file may be used under the + terms of the GNU Public License version 2 (the "GPL"), in which + case the provisions of the GPL are applicable instead of the + above. If you wish to allow the use of your version of this file + only under the terms of the GPL and not to allow others to use + your version of this file under the MPL, indicate your decision + by deleting the provisions above and replace them with the notice + and other provisions required by the GPL. If you do not delete + the provisions above, a recipient may use your version of this + file under either the MPL or the GPL. + +======================================================================*/ + +#if !defined(_PCMCIA_SA1100_H) +# define _PCMCIA_SA1100_H + +#include +#include +#include +#include +#include "cs_internal.h" +#include "sa1100_generic.h" + +/* MECR: Expansion Memory Configuration Register + * (SA-1100 Developers Manual, p.10-13; SA-1110 Developers Manual, p.10-24) + * + * MECR layout is: + * + * FAST1 BSM1<4:0> BSA1<4:0> BSIO1<4:0> FAST0 BSM0<4:0> BSA0<4:0> BSIO0<4:0> + * + * (This layout is actually true only for the SA-1110; the FASTn bits are + * reserved on the SA-1100.) + */ + +#define MECR_SOCKET_0_SHIFT (0) +#define MECR_SOCKET_1_SHIFT (16) + +#define MECR_BS_MASK (0x1f) +#define MECR_FAST_MODE_MASK (0x01) + +#define MECR_BSIO_SHIFT (0) +#define MECR_BSA_SHIFT (5) +#define MECR_BSM_SHIFT (10) +#define MECR_FAST_SHIFT (15) + +#define MECR_SET(mecr, sock, shift, mask, bs) \ +((mecr)=((mecr)&~(((mask)<<(shift))<<\ + ((sock)==0?MECR_SOCKET_0_SHIFT:MECR_SOCKET_1_SHIFT)))|\ + (((bs)<<(shift))<<((sock)==0?MECR_SOCKET_0_SHIFT:MECR_SOCKET_1_SHIFT))) + +#define MECR_GET(mecr, sock, shift, mask) \ +((((mecr)>>(((sock)==0)?MECR_SOCKET_0_SHIFT:MECR_SOCKET_1_SHIFT))>>\ + (shift))&(mask)) + +#define MECR_BSIO_SET(mecr, sock, bs) \ +MECR_SET((mecr), (sock), MECR_BSIO_SHIFT, MECR_BS_MASK, (bs)) + +#define MECR_BSIO_GET(mecr, sock) \ +MECR_GET((mecr), (sock), MECR_BSIO_SHIFT, MECR_BS_MASK) + +#define MECR_BSA_SET(mecr, sock, bs) \ +MECR_SET((mecr), (sock), MECR_BSA_SHIFT, MECR_BS_MASK, (bs)) + +#define MECR_BSA_GET(mecr, sock) \ +MECR_GET((mecr), (sock), MECR_BSA_SHIFT, MECR_BS_MASK) + +#define MECR_BSM_SET(mecr, sock, bs) \ +MECR_SET((mecr), (sock), MECR_BSM_SHIFT, MECR_BS_MASK, (bs)) + +#define MECR_BSM_GET(mecr, sock) \ +MECR_GET((mecr), (sock), MECR_BSM_SHIFT, MECR_BS_MASK) + +#define MECR_FAST_SET(mecr, sock, fast) \ +MECR_SET((mecr), (sock), MECR_FAST_SHIFT, MECR_FAST_MODE_MASK, (fast)) + +#define MECR_FAST_GET(mecr, sock) \ +MECR_GET((mecr), (sock), MECR_FAST_SHIFT, MECR_FAST_MODE_MASK) + + +/* This function implements the BS value calculation for setting the MECR + * using integer arithmetic: + */ +static inline unsigned int sa1100_pcmcia_mecr_bs(unsigned int pcmcia_cycle_ns, + unsigned int cpu_clock_khz){ + unsigned int t = ((pcmcia_cycle_ns * cpu_clock_khz) / 6) - 1000000; + return (t / 1000000) + (((t % 1000000) == 0) ? 0 : 1); +} + +/* This function returns the (approxmiate) command assertion period, in + * nanoseconds, for a given CPU clock frequency and MECR BS value: + */ +static inline unsigned int sa1100_pcmcia_cmd_time(unsigned int cpu_clock_khz, + unsigned int pcmcia_mecr_bs){ + return (((10000000 * 2) / cpu_clock_khz) * (3 * (pcmcia_mecr_bs + 1))) / 10; +} + + +/* SA-1100 PCMCIA Memory and I/O timing + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * The SA-1110 Developer's Manual, section 10.2.5, says the following: + * + * "To calculate the recommended BS_xx value for each address space: + * divide the command width time (the greater of twIOWR and twIORD, + * or the greater of twWE and twOE) by processor cycle time; divide + * by 2; divide again by 3 (number of BCLK's per command assertion); + * round up to the next whole number; and subtract 1." + * + * The PC Card Standard, Release 7, section 4.13.4, says that twIORD + * has a minimum value of 165ns. Section 4.13.5 says that twIOWR has + * a minimum value of 165ns, as well. Section 4.7.2 (describing + * common and attribute memory write timing) says that twWE has a + * minimum value of 150ns for a 250ns cycle time (for 5V operation; + * see section 4.7.4), or 300ns for a 600ns cycle time (for 3.3V + * operation, also section 4.7.4). Section 4.7.3 says that taOE + * has a maximum value of 150ns for a 300ns cycle time (for 5V + * operation), or 300ns for a 600ns cycle time (for 3.3V operation). + * + * When configuring memory maps, Card Services appears to adopt the policy + * that a memory access time of "0" means "use the default." The default + * PCMCIA I/O command width time is 165ns. The default PCMCIA 5V attribute + * and memory command width time is 150ns; the PCMCIA 3.3V attribute and + * memory command width time is 300ns. + */ +#define SA1100_PCMCIA_IO_ACCESS (165) +#define SA1100_PCMCIA_5V_MEM_ACCESS (150) +#define SA1100_PCMCIA_3V_MEM_ACCESS (300) + + +/* The socket driver actually works nicely in interrupt-driven form, + * so the (relatively infrequent) polling is "just to be sure." + */ +#define SA1100_PCMCIA_POLL_PERIOD (2*HZ) + +struct pcmcia_low_level; + +/* I/O pins replacing memory pins + * (PCMCIA System Architecture, 2nd ed., by Don Anderson, p.75) + * + * These signals change meaning when going from memory-only to + * memory-or-I/O interface: + */ +#define iostschg bvd1 +#define iospkr bvd2 + +#endif /* !defined(_PCMCIA_SA1100_H) */ diff --git a/drivers/pcmcia/sa11xx_core.c b/drivers/pcmcia/sa11xx_core.c new file mode 100644 index 000000000..d7249c033 --- /dev/null +++ b/drivers/pcmcia/sa11xx_core.c @@ -0,0 +1,971 @@ +/*====================================================================== + + Device driver for the PCMCIA control functionality of StrongARM + SA-1100 microprocessors. + + The contents of this file are subject to the Mozilla Public + License Version 1.1 (the "License"); you may not use this file + except in compliance with the License. You may obtain a copy of + the License at http://www.mozilla.org/MPL/ + + Software distributed under the License is distributed on an "AS + IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + implied. See the License for the specific language governing + rights and limitations under the License. + + The initial developer of the original code is John G. Dorsey + . Portions created by John G. Dorsey are + Copyright (C) 1999 John G. Dorsey. All Rights Reserved. + + Alternatively, the contents of this file may be used under the + terms of the GNU Public License version 2 (the "GPL"), in which + case the provisions of the GPL are applicable instead of the + above. If you wish to allow the use of your version of this file + only under the terms of the GPL and not to allow others to use + your version of this file under the MPL, indicate your decision + by deleting the provisions above and replace them with the notice + and other provisions required by the GPL. If you do not delete + the provisions above, a recipient may use your version of this + file under either the MPL or the GPL. + +======================================================================*/ +/* + * Please see linux/Documentation/arm/SA1100/PCMCIA for more information + * on the low-level kernel interface. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "sa11xx_core.h" +#include "sa1100.h" + +#ifdef DEBUG +static int pc_debug; + +module_param(pc_debug, int, 0644); + +#define debug(skt, lvl, fmt, arg...) do { \ + if (pc_debug > (lvl)) \ + printk(KERN_DEBUG "skt%u: %s: " fmt, \ + (skt)->nr, __func__ , ## arg); \ +} while (0) + +#else +#define debug(skt, lvl, fmt, arg...) do { } while (0) +#endif + +#define to_sa1100_socket(x) container_of(x, struct sa1100_pcmcia_socket, socket) + +/* + * sa1100_pcmcia_default_mecr_timing + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * + * Calculate MECR clock wait states for given CPU clock + * speed and command wait state. This function can be over- + * written by a board specific version. + * + * The default is to simply calculate the BS values as specified in + * the INTEL SA1100 development manual + * "Expansion Memory (PCMCIA) Configuration Register (MECR)" + * that's section 10.2.5 in _my_ version of the manual ;) + */ +static unsigned int +sa1100_pcmcia_default_mecr_timing(struct sa1100_pcmcia_socket *skt, + unsigned int cpu_speed, + unsigned int cmd_time) +{ + return sa1100_pcmcia_mecr_bs(cmd_time, cpu_speed); +} + +static unsigned short +calc_speed(unsigned short *spds, int num, unsigned short dflt) +{ + unsigned short speed = 0; + int i; + + for (i = 0; i < num; i++) + if (speed < spds[i]) + speed = spds[i]; + if (speed == 0) + speed = dflt; + + return speed; +} + +/* sa1100_pcmcia_set_mecr() + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * + * set MECR value for socket based on this sockets + * io, mem and attribute space access speed. + * Call board specific BS value calculation to allow boards + * to tweak the BS values. + */ +static int +sa1100_pcmcia_set_mecr(struct sa1100_pcmcia_socket *skt, unsigned int cpu_clock) +{ + u32 mecr, old_mecr; + unsigned long flags; + unsigned short speed; + unsigned int bs_io, bs_mem, bs_attr; + + speed = calc_speed(skt->spd_io, MAX_IO_WIN, SA1100_PCMCIA_IO_ACCESS); + bs_io = skt->ops->socket_get_timing(skt, cpu_clock, speed); + + speed = calc_speed(skt->spd_mem, MAX_WIN, SA1100_PCMCIA_3V_MEM_ACCESS); + bs_mem = skt->ops->socket_get_timing(skt, cpu_clock, speed); + + speed = calc_speed(skt->spd_attr, MAX_WIN, SA1100_PCMCIA_3V_MEM_ACCESS); + bs_attr = skt->ops->socket_get_timing(skt, cpu_clock, speed); + + local_irq_save(flags); + + old_mecr = mecr = MECR; + MECR_FAST_SET(mecr, skt->nr, 0); + MECR_BSIO_SET(mecr, skt->nr, bs_io); + MECR_BSA_SET(mecr, skt->nr, bs_attr); + MECR_BSM_SET(mecr, skt->nr, bs_mem); + if (old_mecr != mecr) + MECR = mecr; + + local_irq_restore(flags); + + debug(skt, 2, "FAST %X BSM %X BSA %X BSIO %X\n", + MECR_FAST_GET(mecr, skt->nr), + MECR_BSM_GET(mecr, skt->nr), MECR_BSA_GET(mecr, skt->nr), + MECR_BSIO_GET(mecr, skt->nr)); + + return 0; +} + +static unsigned int sa1100_pcmcia_skt_state(struct sa1100_pcmcia_socket *skt) +{ + struct pcmcia_state state; + unsigned int stat; + + memset(&state, 0, sizeof(struct pcmcia_state)); + + skt->ops->socket_state(skt, &state); + + stat = state.detect ? SS_DETECT : 0; + stat |= state.ready ? SS_READY : 0; + stat |= state.wrprot ? SS_WRPROT : 0; + stat |= state.vs_3v ? SS_3VCARD : 0; + stat |= state.vs_Xv ? SS_XVCARD : 0; + + /* The power status of individual sockets is not available + * explicitly from the hardware, so we just remember the state + * and regurgitate it upon request: + */ + stat |= skt->cs_state.Vcc ? SS_POWERON : 0; + + if (skt->cs_state.flags & SS_IOCARD) + stat |= state.bvd1 ? SS_STSCHG : 0; + else { + if (state.bvd1 == 0) + stat |= SS_BATDEAD; + else if (state.bvd2 == 0) + stat |= SS_BATWARN; + } + return stat; +} + +/* + * sa1100_pcmcia_config_skt + * ^^^^^^^^^^^^^^^^^^^^^^^^ + * + * Convert PCMCIA socket state to our socket configure structure. + */ +static int +sa1100_pcmcia_config_skt(struct sa1100_pcmcia_socket *skt, socket_state_t *state) +{ + int ret; + + ret = skt->ops->configure_socket(skt, state); + if (ret == 0) { + /* + * This really needs a better solution. The IRQ + * may or may not be claimed by the driver. + */ + if (skt->irq_state != 1 && state->io_irq) { + skt->irq_state = 1; + set_irq_type(skt->irq, IRQT_FALLING); + } else if (skt->irq_state == 1 && state->io_irq == 0) { + skt->irq_state = 0; + set_irq_type(skt->irq, IRQT_NOEDGE); + } + + skt->cs_state = *state; + } + + if (ret < 0) + printk(KERN_ERR "sa1100_pcmcia: unable to configure " + "socket %d\n", skt->nr); + + return ret; +} + +/* sa1100_pcmcia_sock_init() + * ^^^^^^^^^^^^^^^^^^^^^^^^^ + * + * (Re-)Initialise the socket, turning on status interrupts + * and PCMCIA bus. This must wait for power to stabilise + * so that the card status signals report correctly. + * + * Returns: 0 + */ +static int sa1100_pcmcia_sock_init(struct pcmcia_socket *sock) +{ + struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock); + + debug(skt, 2, "initializing socket\n"); + + skt->ops->socket_init(skt); + return 0; +} + + +/* + * sa1100_pcmcia_suspend() + * ^^^^^^^^^^^^^^^^^^^^^^^ + * + * Remove power on the socket, disable IRQs from the card. + * Turn off status interrupts, and disable the PCMCIA bus. + * + * Returns: 0 + */ +static int sa1100_pcmcia_suspend(struct pcmcia_socket *sock) +{ + struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock); + int ret; + + debug(skt, 2, "suspending socket\n"); + + ret = sa1100_pcmcia_config_skt(skt, &dead_socket); + if (ret == 0) + skt->ops->socket_suspend(skt); + + return ret; +} + +static spinlock_t status_lock = SPIN_LOCK_UNLOCKED; + +/* sa1100_check_status() + * ^^^^^^^^^^^^^^^^^^^^^ + */ +static void sa1100_check_status(struct sa1100_pcmcia_socket *skt) +{ + unsigned int events; + + debug(skt, 4, "entering PCMCIA monitoring thread\n"); + + do { + unsigned int status; + unsigned long flags; + + status = sa1100_pcmcia_skt_state(skt); + + spin_lock_irqsave(&status_lock, flags); + events = (status ^ skt->status) & skt->cs_state.csc_mask; + skt->status = status; + spin_unlock_irqrestore(&status_lock, flags); + + debug(skt, 4, "events: %s%s%s%s%s%s\n", + events == 0 ? "" : "", + events & SS_DETECT ? "DETECT " : "", + events & SS_READY ? "READY " : "", + events & SS_BATDEAD ? "BATDEAD " : "", + events & SS_BATWARN ? "BATWARN " : "", + events & SS_STSCHG ? "STSCHG " : ""); + + if (events) + pcmcia_parse_events(&skt->socket, events); + } while (events); +} + +/* sa1100_pcmcia_poll_event() + * ^^^^^^^^^^^^^^^^^^^^^^^^^^ + * Let's poll for events in addition to IRQs since IRQ only is unreliable... + */ +static void sa1100_pcmcia_poll_event(unsigned long dummy) +{ + struct sa1100_pcmcia_socket *skt = (struct sa1100_pcmcia_socket *)dummy; + debug(skt, 4, "polling for events\n"); + + mod_timer(&skt->poll_timer, jiffies + SA1100_PCMCIA_POLL_PERIOD); + + sa1100_check_status(skt); +} + + +/* sa1100_pcmcia_interrupt() + * ^^^^^^^^^^^^^^^^^^^^^^^^^ + * Service routine for socket driver interrupts (requested by the + * low-level PCMCIA init() operation via sa1100_pcmcia_thread()). + * The actual interrupt-servicing work is performed by + * sa1100_pcmcia_thread(), largely because the Card Services event- + * handling code performs scheduling operations which cannot be + * executed from within an interrupt context. + */ +static irqreturn_t sa1100_pcmcia_interrupt(int irq, void *dev, struct pt_regs *regs) +{ + struct sa1100_pcmcia_socket *skt = dev; + + debug(skt, 3, "servicing IRQ %d\n", irq); + + sa1100_check_status(skt); + + return IRQ_HANDLED; +} + + +/* sa1100_pcmcia_get_status() + * ^^^^^^^^^^^^^^^^^^^^^^^^^^ + * Implements the get_status() operation for the in-kernel PCMCIA + * service (formerly SS_GetStatus in Card Services). Essentially just + * fills in bits in `status' according to internal driver state or + * the value of the voltage detect chipselect register. + * + * As a debugging note, during card startup, the PCMCIA core issues + * three set_socket() commands in a row the first with RESET deasserted, + * the second with RESET asserted, and the last with RESET deasserted + * again. Following the third set_socket(), a get_status() command will + * be issued. The kernel is looking for the SS_READY flag (see + * setup_socket(), reset_socket(), and unreset_socket() in cs.c). + * + * Returns: 0 + */ +static int +sa1100_pcmcia_get_status(struct pcmcia_socket *sock, unsigned int *status) +{ + struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock); + + skt->status = sa1100_pcmcia_skt_state(skt); + *status = skt->status; + + return 0; +} + + +/* sa1100_pcmcia_get_socket() + * ^^^^^^^^^^^^^^^^^^^^^^^^^^ + * Implements the get_socket() operation for the in-kernel PCMCIA + * service (formerly SS_GetSocket in Card Services). Not a very + * exciting routine. + * + * Returns: 0 + */ +static int +sa1100_pcmcia_get_socket(struct pcmcia_socket *sock, socket_state_t *state) +{ + struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock); + + debug(skt, 2, "\n"); + + *state = skt->cs_state; + + return 0; +} + +/* sa1100_pcmcia_set_socket() + * ^^^^^^^^^^^^^^^^^^^^^^^^^^ + * Implements the set_socket() operation for the in-kernel PCMCIA + * service (formerly SS_SetSocket in Card Services). We more or + * less punt all of this work and let the kernel handle the details + * of power configuration, reset, &c. We also record the value of + * `state' in order to regurgitate it to the PCMCIA core later. + * + * Returns: 0 + */ +static int +sa1100_pcmcia_set_socket(struct pcmcia_socket *sock, socket_state_t *state) +{ + struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock); + + debug(skt, 2, "mask: %s%s%s%s%s%sflags: %s%s%s%s%s%sVcc %d Vpp %d irq %d\n", + (state->csc_mask==0)?" ":"", + (state->csc_mask&SS_DETECT)?"DETECT ":"", + (state->csc_mask&SS_READY)?"READY ":"", + (state->csc_mask&SS_BATDEAD)?"BATDEAD ":"", + (state->csc_mask&SS_BATWARN)?"BATWARN ":"", + (state->csc_mask&SS_STSCHG)?"STSCHG ":"", + (state->flags==0)?" ":"", + (state->flags&SS_PWR_AUTO)?"PWR_AUTO ":"", + (state->flags&SS_IOCARD)?"IOCARD ":"", + (state->flags&SS_RESET)?"RESET ":"", + (state->flags&SS_SPKR_ENA)?"SPKR_ENA ":"", + (state->flags&SS_OUTPUT_ENA)?"OUTPUT_ENA ":"", + state->Vcc, state->Vpp, state->io_irq); + + return sa1100_pcmcia_config_skt(skt, state); +} /* sa1100_pcmcia_set_socket() */ + + +/* sa1100_pcmcia_set_io_map() + * ^^^^^^^^^^^^^^^^^^^^^^^^^^ + * Implements the set_io_map() operation for the in-kernel PCMCIA + * service (formerly SS_SetIOMap in Card Services). We configure + * the map speed as requested, but override the address ranges + * supplied by Card Services. + * + * Returns: 0 on success, -1 on error + */ +static int +sa1100_pcmcia_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *map) +{ + struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock); + unsigned short speed = map->speed; + + debug(skt, 2, "map %u speed %u start 0x%08x stop 0x%08x\n", + map->map, map->speed, map->start, map->stop); + debug(skt, 2, "flags: %s%s%s%s%s%s%s%s\n", + (map->flags==0)?"":"", + (map->flags&MAP_ACTIVE)?"ACTIVE ":"", + (map->flags&MAP_16BIT)?"16BIT ":"", + (map->flags&MAP_AUTOSZ)?"AUTOSZ ":"", + (map->flags&MAP_0WS)?"0WS ":"", + (map->flags&MAP_WRPROT)?"WRPROT ":"", + (map->flags&MAP_USE_WAIT)?"USE_WAIT ":"", + (map->flags&MAP_PREFETCH)?"PREFETCH ":""); + + if (map->map >= MAX_IO_WIN) { + printk(KERN_ERR "%s(): map (%d) out of range\n", __FUNCTION__, + map->map); + return -1; + } + + if (map->flags & MAP_ACTIVE) { + if (speed == 0) + speed = SA1100_PCMCIA_IO_ACCESS; + } else { + speed = 0; + } + + skt->spd_io[map->map] = speed; + sa1100_pcmcia_set_mecr(skt, cpufreq_get(0)); + + if (map->stop == 1) + map->stop = PAGE_SIZE-1; + + map->stop -= map->start; + map->stop += (unsigned long)skt->virt_io; + map->start = (unsigned long)skt->virt_io; + + return 0; +} /* sa1100_pcmcia_set_io_map() */ + + +/* sa1100_pcmcia_set_mem_map() + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * Implements the set_mem_map() operation for the in-kernel PCMCIA + * service (formerly SS_SetMemMap in Card Services). We configure + * the map speed as requested, but override the address ranges + * supplied by Card Services. + * + * Returns: 0 on success, -1 on error + */ +static int +sa1100_pcmcia_set_mem_map(struct pcmcia_socket *sock, struct pccard_mem_map *map) +{ + struct sa1100_pcmcia_socket *skt = to_sa1100_socket(sock); + struct resource *res; + unsigned short speed = map->speed; + + debug(skt, 2, "map %u speed %u card_start %08x\n", + map->map, map->speed, map->card_start); + debug(skt, 2, "flags: %s%s%s%s%s%s%s%s\n", + (map->flags==0)?"":"", + (map->flags&MAP_ACTIVE)?"ACTIVE ":"", + (map->flags&MAP_16BIT)?"16BIT ":"", + (map->flags&MAP_AUTOSZ)?"AUTOSZ ":"", + (map->flags&MAP_0WS)?"0WS ":"", + (map->flags&MAP_WRPROT)?"WRPROT ":"", + (map->flags&MAP_ATTRIB)?"ATTRIB ":"", + (map->flags&MAP_USE_WAIT)?"USE_WAIT ":""); + + if (map->map >= MAX_WIN) + return -EINVAL; + + if (map->flags & MAP_ACTIVE) { + if (speed == 0) + speed = 300; + } else { + speed = 0; + } + + if (map->flags & MAP_ATTRIB) { + res = &skt->res_attr; + skt->spd_attr[map->map] = speed; + skt->spd_mem[map->map] = 0; + } else { + res = &skt->res_mem; + skt->spd_attr[map->map] = 0; + skt->spd_mem[map->map] = speed; + } + + sa1100_pcmcia_set_mecr(skt, cpufreq_get(0)); + + map->sys_stop -= map->sys_start; + map->sys_stop += res->start + map->card_start; + map->sys_start = res->start + map->card_start; + + return 0; +} + +struct bittbl { + unsigned int mask; + const char *name; +}; + +static struct bittbl status_bits[] = { + { SS_WRPROT, "SS_WRPROT" }, + { SS_BATDEAD, "SS_BATDEAD" }, + { SS_BATWARN, "SS_BATWARN" }, + { SS_READY, "SS_READY" }, + { SS_DETECT, "SS_DETECT" }, + { SS_POWERON, "SS_POWERON" }, + { SS_STSCHG, "SS_STSCHG" }, + { SS_3VCARD, "SS_3VCARD" }, + { SS_XVCARD, "SS_XVCARD" }, +}; + +static struct bittbl conf_bits[] = { + { SS_PWR_AUTO, "SS_PWR_AUTO" }, + { SS_IOCARD, "SS_IOCARD" }, + { SS_RESET, "SS_RESET" }, + { SS_DMA_MODE, "SS_DMA_MODE" }, + { SS_SPKR_ENA, "SS_SPKR_ENA" }, + { SS_OUTPUT_ENA, "SS_OUTPUT_ENA" }, +}; + +static void +dump_bits(char **p, const char *prefix, unsigned int val, struct bittbl *bits, int sz) +{ + char *b = *p; + int i; + + b += sprintf(b, "%-9s:", prefix); + for (i = 0; i < sz; i++) + if (val & bits[i].mask) + b += sprintf(b, " %s", bits[i].name); + *b++ = '\n'; + *p = b; +} + +/* show_status() + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * Implements the /sys/class/pcmcia_socket/??/status file. + * + * Returns: the number of characters added to the buffer + */ +static ssize_t show_status(struct class_device *class_dev, char *buf) +{ + struct sa1100_pcmcia_socket *skt = container_of(class_dev, + struct sa1100_pcmcia_socket, socket.dev); + unsigned int clock = cpufreq_get(0); + unsigned long mecr = MECR; + char *p = buf; + + p+=sprintf(p, "slot : %d\n", skt->nr); + + dump_bits(&p, "status", skt->status, + status_bits, ARRAY_SIZE(status_bits)); + dump_bits(&p, "csc_mask", skt->cs_state.csc_mask, + status_bits, ARRAY_SIZE(status_bits)); + dump_bits(&p, "cs_flags", skt->cs_state.flags, + conf_bits, ARRAY_SIZE(conf_bits)); + + p+=sprintf(p, "Vcc : %d\n", skt->cs_state.Vcc); + p+=sprintf(p, "Vpp : %d\n", skt->cs_state.Vpp); + p+=sprintf(p, "IRQ : %d (%d)\n", skt->cs_state.io_irq, skt->irq); + + p+=sprintf(p, "I/O : %u (%u)\n", + calc_speed(skt->spd_io, MAX_IO_WIN, SA1100_PCMCIA_IO_ACCESS), + sa1100_pcmcia_cmd_time(clock, MECR_BSIO_GET(mecr, skt->nr))); + + p+=sprintf(p, "attribute: %u (%u)\n", + calc_speed(skt->spd_attr, MAX_WIN, SA1100_PCMCIA_3V_MEM_ACCESS), + sa1100_pcmcia_cmd_time(clock, MECR_BSA_GET(mecr, skt->nr))); + + p+=sprintf(p, "common : %u (%u)\n", + calc_speed(skt->spd_mem, MAX_WIN, SA1100_PCMCIA_3V_MEM_ACCESS), + sa1100_pcmcia_cmd_time(clock, MECR_BSM_GET(mecr, skt->nr))); + + return p-buf; +} +static CLASS_DEVICE_ATTR(status, S_IRUGO, show_status, NULL); + + +static struct pccard_operations sa11xx_pcmcia_operations = { + .init = sa1100_pcmcia_sock_init, + .suspend = sa1100_pcmcia_suspend, + .get_status = sa1100_pcmcia_get_status, + .get_socket = sa1100_pcmcia_get_socket, + .set_socket = sa1100_pcmcia_set_socket, + .set_io_map = sa1100_pcmcia_set_io_map, + .set_mem_map = sa1100_pcmcia_set_mem_map, +}; + +int sa11xx_request_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr) +{ + int i, res = 0; + + for (i = 0; i < nr; i++) { + if (irqs[i].sock != skt->nr) + continue; + res = request_irq(irqs[i].irq, sa1100_pcmcia_interrupt, + SA_INTERRUPT, irqs[i].str, skt); + if (res) + break; + set_irq_type(irqs[i].irq, IRQT_NOEDGE); + } + + if (res) { + printk(KERN_ERR "PCMCIA: request for IRQ%d failed (%d)\n", + irqs[i].irq, res); + + while (i--) + if (irqs[i].sock == skt->nr) + free_irq(irqs[i].irq, skt); + } + return res; +} +EXPORT_SYMBOL(sa11xx_request_irqs); + +void sa11xx_free_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr) +{ + int i; + + for (i = 0; i < nr; i++) + if (irqs[i].sock == skt->nr) + free_irq(irqs[i].irq, skt); +} +EXPORT_SYMBOL(sa11xx_free_irqs); + +void sa11xx_disable_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr) +{ + int i; + + for (i = 0; i < nr; i++) + if (irqs[i].sock == skt->nr) + set_irq_type(irqs[i].irq, IRQT_NOEDGE); +} +EXPORT_SYMBOL(sa11xx_disable_irqs); + +void sa11xx_enable_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr) +{ + int i; + + for (i = 0; i < nr; i++) + if (irqs[i].sock == skt->nr) { + set_irq_type(irqs[i].irq, IRQT_RISING); + set_irq_type(irqs[i].irq, IRQT_BOTHEDGE); + } +} +EXPORT_SYMBOL(sa11xx_enable_irqs); + +static LIST_HEAD(sa1100_sockets); +static DECLARE_MUTEX(sa1100_sockets_lock); + +static const char *skt_names[] = { + "PCMCIA socket 0", + "PCMCIA socket 1", +}; + +struct skt_dev_info { + int nskt; + struct sa1100_pcmcia_socket skt[0]; +}; + +#define SKT_DEV_INFO_SIZE(n) \ + (sizeof(struct skt_dev_info) + (n)*sizeof(struct sa1100_pcmcia_socket)) + +int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int first, int nr) +{ + struct skt_dev_info *sinfo; + unsigned int cpu_clock; + int ret, i; + + /* + * set default MECR calculation if the board specific + * code did not specify one... + */ + if (!ops->socket_get_timing) + ops->socket_get_timing = sa1100_pcmcia_default_mecr_timing; + + down(&sa1100_sockets_lock); + + sinfo = kmalloc(SKT_DEV_INFO_SIZE(nr), GFP_KERNEL); + if (!sinfo) { + ret = -ENOMEM; + goto out; + } + + memset(sinfo, 0, SKT_DEV_INFO_SIZE(nr)); + sinfo->nskt = nr; + + cpu_clock = cpufreq_get(0); + + /* + * Initialise the per-socket structure. + */ + for (i = 0; i < nr; i++) { + struct sa1100_pcmcia_socket *skt = &sinfo->skt[i]; + + skt->socket.ops = &sa11xx_pcmcia_operations; + skt->socket.owner = ops->owner; + skt->socket.dev.dev = dev; + + init_timer(&skt->poll_timer); + skt->poll_timer.function = sa1100_pcmcia_poll_event; + skt->poll_timer.data = (unsigned long)skt; + skt->poll_timer.expires = jiffies + SA1100_PCMCIA_POLL_PERIOD; + + skt->nr = first + i; + skt->irq = NO_IRQ; + skt->dev = dev; + skt->ops = ops; + + skt->res_skt.start = _PCMCIA(skt->nr); + skt->res_skt.end = _PCMCIA(skt->nr) + PCMCIASp - 1; + skt->res_skt.name = skt_names[skt->nr]; + skt->res_skt.flags = IORESOURCE_MEM; + + ret = request_resource(&iomem_resource, &skt->res_skt); + if (ret) + goto out_err_1; + + skt->res_io.start = _PCMCIAIO(skt->nr); + skt->res_io.end = _PCMCIAIO(skt->nr) + PCMCIAIOSp - 1; + skt->res_io.name = "io"; + skt->res_io.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + ret = request_resource(&skt->res_skt, &skt->res_io); + if (ret) + goto out_err_2; + + skt->res_mem.start = _PCMCIAMem(skt->nr); + skt->res_mem.end = _PCMCIAMem(skt->nr) + PCMCIAMemSp - 1; + skt->res_mem.name = "memory"; + skt->res_mem.flags = IORESOURCE_MEM; + + ret = request_resource(&skt->res_skt, &skt->res_mem); + if (ret) + goto out_err_3; + + skt->res_attr.start = _PCMCIAAttr(skt->nr); + skt->res_attr.end = _PCMCIAAttr(skt->nr) + PCMCIAAttrSp - 1; + skt->res_attr.name = "attribute"; + skt->res_attr.flags = IORESOURCE_MEM; + + ret = request_resource(&skt->res_skt, &skt->res_attr); + if (ret) + goto out_err_4; + + skt->virt_io = ioremap(skt->res_io.start, 0x10000); + if (skt->virt_io == NULL) { + ret = -ENOMEM; + goto out_err_5; + } + + list_add(&skt->node, &sa1100_sockets); + + /* + * We initialize the MECR to default values here, because + * we are not guaranteed to see a SetIOMap operation at + * runtime. + */ + sa1100_pcmcia_set_mecr(skt, cpu_clock); + + ret = ops->hw_init(skt); + if (ret) + goto out_err_6; + + skt->socket.features = SS_CAP_STATIC_MAP|SS_CAP_PCCARD; + skt->socket.irq_mask = 0; + skt->socket.map_size = PAGE_SIZE; + skt->socket.pci_irq = skt->irq; + skt->socket.io_offset = (unsigned long)skt->virt_io; + + skt->status = sa1100_pcmcia_skt_state(skt); + + ret = pcmcia_register_socket(&skt->socket); + if (ret) + goto out_err_7; + + WARN_ON(skt->socket.sock != i); + + add_timer(&skt->poll_timer); + + class_device_create_file(&skt->socket.dev, &class_device_attr_status); + } + + dev_set_drvdata(dev, sinfo); + ret = 0; + goto out; + + do { + struct sa1100_pcmcia_socket *skt = &sinfo->skt[i]; + + del_timer_sync(&skt->poll_timer); + pcmcia_unregister_socket(&skt->socket); + + out_err_7: + flush_scheduled_work(); + + ops->hw_shutdown(skt); + out_err_6: + list_del(&skt->node); + iounmap(skt->virt_io); + out_err_5: + release_resource(&skt->res_attr); + out_err_4: + release_resource(&skt->res_mem); + out_err_3: + release_resource(&skt->res_io); + out_err_2: + release_resource(&skt->res_skt); + out_err_1: + i--; + } while (i > 0); + + kfree(sinfo); + + out: + up(&sa1100_sockets_lock); + return ret; +} +EXPORT_SYMBOL(sa11xx_drv_pcmcia_probe); + +int sa11xx_drv_pcmcia_remove(struct device *dev) +{ + struct skt_dev_info *sinfo = dev_get_drvdata(dev); + int i; + + dev_set_drvdata(dev, NULL); + + down(&sa1100_sockets_lock); + for (i = 0; i < sinfo->nskt; i++) { + struct sa1100_pcmcia_socket *skt = &sinfo->skt[i]; + + del_timer_sync(&skt->poll_timer); + + pcmcia_unregister_socket(&skt->socket); + + flush_scheduled_work(); + + skt->ops->hw_shutdown(skt); + + sa1100_pcmcia_config_skt(skt, &dead_socket); + + list_del(&skt->node); + iounmap(skt->virt_io); + skt->virt_io = NULL; + release_resource(&skt->res_attr); + release_resource(&skt->res_mem); + release_resource(&skt->res_io); + release_resource(&skt->res_skt); + } + up(&sa1100_sockets_lock); + + kfree(sinfo); + + return 0; +} +EXPORT_SYMBOL(sa11xx_drv_pcmcia_remove); + +#ifdef CONFIG_CPU_FREQ + +/* sa1100_pcmcia_update_mecr() + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * When sa1100_pcmcia_notifier() decides that a MECR adjustment (due + * to a core clock frequency change) is needed, this routine establishes + * new BS_xx values consistent with the clock speed `clock'. + */ +static void sa1100_pcmcia_update_mecr(unsigned int clock) +{ + struct sa1100_pcmcia_socket *skt; + + down(&sa1100_sockets_lock); + list_for_each_entry(skt, &sa1100_sockets, node) + sa1100_pcmcia_set_mecr(skt, clock); + up(&sa1100_sockets_lock); +} + +/* sa1100_pcmcia_notifier() + * ^^^^^^^^^^^^^^^^^^^^^^^^ + * When changing the processor core clock frequency, it is necessary + * to adjust the MECR timings accordingly. We've recorded the timings + * requested by Card Services, so this is just a matter of finding + * out what our current speed is, and then recomputing the new MECR + * values. + * + * Returns: 0 on success, -1 on error + */ +static int +sa1100_pcmcia_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freqs = data; + + switch (val) { + case CPUFREQ_PRECHANGE: + if (freqs->new > freqs->old) + sa1100_pcmcia_update_mecr(freqs->new); + break; + + case CPUFREQ_POSTCHANGE: + if (freqs->new < freqs->old) + sa1100_pcmcia_update_mecr(freqs->new); + break; + } + + return 0; +} + +static struct notifier_block sa1100_pcmcia_notifier_block = { + .notifier_call = sa1100_pcmcia_notifier +}; + +static int __init sa11xx_pcmcia_init(void) +{ + int ret; + + printk(KERN_INFO "SA11xx PCMCIA\n"); + + ret = cpufreq_register_notifier(&sa1100_pcmcia_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + if (ret < 0) + printk(KERN_ERR "Unable to register CPU frequency change " + "notifier (%d)\n", ret); + + return ret; +} +module_init(sa11xx_pcmcia_init); + +static void __exit sa11xx_pcmcia_exit(void) +{ + cpufreq_unregister_notifier(&sa1100_pcmcia_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); +} + +module_exit(sa11xx_pcmcia_exit); +#endif + +MODULE_AUTHOR("John Dorsey "); +MODULE_DESCRIPTION("Linux PCMCIA Card Services: SA-11xx core socket driver"); +MODULE_LICENSE("Dual MPL/GPL"); diff --git a/drivers/pcmcia/sa11xx_core.h b/drivers/pcmcia/sa11xx_core.h new file mode 100644 index 000000000..aadf7c0b6 --- /dev/null +++ b/drivers/pcmcia/sa11xx_core.h @@ -0,0 +1,121 @@ +/* + * linux/include/asm/arch/pcmcia.h + * + * Copyright (C) 2000 John G Dorsey + * + * This file contains definitions for the low-level SA-1100 kernel PCMCIA + * interface. Please see linux/Documentation/arm/SA1100/PCMCIA for details. + */ +#ifndef _ASM_ARCH_PCMCIA +#define _ASM_ARCH_PCMCIA + +/* include the world */ +#include +#include +#include +#include +#include +#include +#include "cs_internal.h" + +struct device; + +/* Ideally, we'd support up to MAX_SOCK sockets, but the SA-1100 only + * has support for two. This shows up in lots of hardwired ways, such + * as the fact that MECR only has enough bits to configure two sockets. + * Since it's so entrenched in the hardware, limiting the software + * in this way doesn't seem too terrible. + */ +#define SA1100_PCMCIA_MAX_SOCK (2) + +struct pcmcia_state { + unsigned detect: 1, + ready: 1, + bvd1: 1, + bvd2: 1, + wrprot: 1, + vs_3v: 1, + vs_Xv: 1; +}; + +/* + * This structure encapsulates per-socket state which we might need to + * use when responding to a Card Services query of some kind. + */ +struct sa1100_pcmcia_socket { + struct pcmcia_socket socket; + + /* + * Info from low level handler + */ + struct device *dev; + unsigned int nr; + unsigned int irq; + + /* + * Core PCMCIA state + */ + struct pcmcia_low_level *ops; + + unsigned int status; + socket_state_t cs_state; + + unsigned short spd_io[MAX_IO_WIN]; + unsigned short spd_mem[MAX_WIN]; + unsigned short spd_attr[MAX_WIN]; + + struct resource res_skt; + struct resource res_io; + struct resource res_mem; + struct resource res_attr; + void *virt_io; + + unsigned int irq_state; + + struct timer_list poll_timer; + struct list_head node; +}; + +struct pcmcia_low_level { + struct module *owner; + + int (*hw_init)(struct sa1100_pcmcia_socket *); + void (*hw_shutdown)(struct sa1100_pcmcia_socket *); + + void (*socket_state)(struct sa1100_pcmcia_socket *, struct pcmcia_state *); + int (*configure_socket)(struct sa1100_pcmcia_socket *, const socket_state_t *); + + /* + * Enable card status IRQs on (re-)initialisation. This can + * be called at initialisation, power management event, or + * pcmcia event. + */ + void (*socket_init)(struct sa1100_pcmcia_socket *); + + /* + * Disable card status IRQs and PCMCIA bus on suspend. + */ + void (*socket_suspend)(struct sa1100_pcmcia_socket *); + + /* + * Calculate MECR timing clock wait states + */ + unsigned int (*socket_get_timing)(struct sa1100_pcmcia_socket *, + unsigned int cpu_speed, unsigned int cmd_time); +}; + +struct pcmcia_irqs { + int sock; + int irq; + const char *str; +}; + +int sa11xx_request_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr); +void sa11xx_free_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr); +void sa11xx_disable_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr); +void sa11xx_enable_irqs(struct sa1100_pcmcia_socket *skt, struct pcmcia_irqs *irqs, int nr); + +extern int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int first, int nr); +extern int sa11xx_drv_pcmcia_remove(struct device *dev); + +#endif diff --git a/drivers/scsi/pcmcia/qlogic_core.c b/drivers/scsi/pcmcia/qlogic_core.c new file mode 100644 index 000000000..78abe22b1 --- /dev/null +++ b/drivers/scsi/pcmcia/qlogic_core.c @@ -0,0 +1,2 @@ +#define PCMCIA 1 +#include "qlogicfas.c" diff --git a/drivers/scsi/qlogicfas.h b/drivers/scsi/qlogicfas.h new file mode 100644 index 000000000..6750e8da6 --- /dev/null +++ b/drivers/scsi/qlogicfas.h @@ -0,0 +1,124 @@ +/* to be used by qlogicfas and qlogic_cs */ +#ifndef __QLOGICFAS_H +#define __QLOGICFAS_H + +/*----------------------------------------------------------------*/ +/* Configuration */ + +/* Set the following to 2 to use normal interrupt (active high/totempole- + tristate), otherwise use 0 (REQUIRED FOR PCMCIA) for active low, open + drain */ + +#define QL_INT_ACTIVE_HIGH 2 + +/* Set the following to max out the speed of the PIO PseudoDMA transfers, + again, 0 tends to be slower, but more stable. */ + +#define QL_TURBO_PDMA 1 + +/* This should be 1 to enable parity detection */ + +#define QL_ENABLE_PARITY 1 + +/* This will reset all devices when the driver is initialized (during bootup). + The other linux drivers don't do this, but the DOS drivers do, and after + using DOS or some kind of crash or lockup this will bring things back + without requiring a cold boot. It does take some time to recover from a + reset, so it is slower, and I have seen timeouts so that devices weren't + recognized when this was set. */ + +#define QL_RESET_AT_START 0 + +/* crystal frequency in megahertz (for offset 5 and 9) + Please set this for your card. Most Qlogic cards are 40 Mhz. The + Control Concepts ISA (not VLB) is 24 Mhz */ + +#define XTALFREQ 40 + +/**********/ +/* DANGER! modify these at your own risk */ +/* SLOWCABLE can usually be reset to zero if you have a clean setup and + proper termination. The rest are for synchronous transfers and other + advanced features if your device can transfer faster than 5Mb/sec. + If you are really curious, email me for a quick howto until I have + something official */ +/**********/ + +/*****/ +/* config register 1 (offset 8) options */ +/* This needs to be set to 1 if your cabling is long or noisy */ +#define SLOWCABLE 1 + +/*****/ +/* offset 0xc */ +/* This will set fast (10Mhz) synchronous timing when set to 1 + For this to have an effect, FASTCLK must also be 1 */ +#define FASTSCSI 0 + +/* This when set to 1 will set a faster sync transfer rate */ +#define FASTCLK 0 /*(XTALFREQ>25?1:0)*/ + +/*****/ +/* offset 6 */ +/* This is the sync transfer divisor, XTALFREQ/X will be the maximum + achievable data rate (assuming the rest of the system is capable + and set properly) */ +#define SYNCXFRPD 5 /*(XTALFREQ/5)*/ + +/*****/ +/* offset 7 */ +/* This is the count of how many synchronous transfers can take place + i.e. how many reqs can occur before an ack is given. + The maximum value for this is 15, the upper bits can modify + REQ/ACK assertion and deassertion during synchronous transfers + If this is 0, the bus will only transfer asynchronously */ +#define SYNCOFFST 0 +/* for the curious, bits 7&6 control the deassertion delay in 1/2 cycles + of the 40Mhz clock. If FASTCLK is 1, specifying 01 (1/2) will + cause the deassertion to be early by 1/2 clock. Bits 5&4 control + the assertion delay, also in 1/2 clocks (FASTCLK is ignored here). */ + +/*----------------------------------------------------------------*/ +#ifdef PCMCIA +#undef QL_INT_ACTIVE_HIGH +#define QL_INT_ACTIVE_HIGH 0 +#endif + +struct qlogicfas_priv; +typedef struct qlogicfas_priv *qlogicfas_priv_t; +struct qlogicfas_priv { + int qbase; /* Port */ + int qinitid; /* initiator ID */ + int qabort; /* Flag to cause an abort */ + int qlirq; /* IRQ being used */ + char qinfo[80]; /* description */ + Scsi_Cmnd *qlcmd; /* current command being processed */ + struct Scsi_Host *shost; /* pointer back to host */ + qlogicfas_priv_t next; /* next private struct */ +}; + +extern int qlcfg5; +extern int qlcfg6; +extern int qlcfg7; +extern int qlcfg8; +extern int qlcfg9; +extern int qlcfgc; + +/* The qlogic card uses two register maps - These macros select which one */ +#define REG0 ( outb( inb( qbase + 0xd ) & 0x7f , qbase + 0xd ), outb( 4 , qbase + 0xd )) +#define REG1 ( outb( inb( qbase + 0xd ) | 0x80 , qbase + 0xd ), outb( 0xb4 | QL_INT_ACTIVE_HIGH , qbase + 0xd )) + +/* following is watchdog timeout in microseconds */ +#define WATCHDOG 5000000 + +/*----------------------------------------------------------------*/ +/* the following will set the monitor border color (useful to find + where something crashed or gets stuck at and as a simple profiler) */ + +#if 0 +#define rtrc(i) {inb(0x3da);outb(0x31,0x3c0);outb((i),0x3c0);} +#else +#define rtrc(i) {} +#endif +#endif /* __QLOGICFAS_H */ + diff --git a/drivers/usb/core/driverfs.c b/drivers/usb/core/driverfs.c new file mode 100644 index 000000000..51ff9bbd6 --- /dev/null +++ b/drivers/usb/core/driverfs.c @@ -0,0 +1,229 @@ +/* + * drivers/usb/core/driverfs.c + * + * (C) Copyright 2002 David Brownell + * (C) Copyright 2002 Greg Kroah-Hartman + * (C) Copyright 2002 IBM Corp. + * + * All of the driverfs file attributes for usb devices and interfaces. + * + */ + + +#include +#include + +#ifdef CONFIG_USB_DEBUG + #define DEBUG +#else + #undef DEBUG +#endif +#include + +#include "usb.h" + +/* Active configuration fields */ +#define usb_actconfig_show(field, multiplier, format_string) \ +static ssize_t show_##field (struct device *dev, char *buf) \ +{ \ + struct usb_device *udev; \ + \ + udev = to_usb_device (dev); \ + if (udev->actconfig) \ + return sprintf (buf, format_string, \ + udev->actconfig->desc.field * multiplier); \ + else \ + return 0; \ +} \ + +#define usb_actconfig_attr(field, multiplier, format_string) \ +usb_actconfig_show(field, multiplier, format_string) \ +static DEVICE_ATTR(field, S_IRUGO, show_##field, NULL); + +usb_actconfig_attr (bNumInterfaces, 1, "%2d\n") +usb_actconfig_attr (bmAttributes, 1, "%2x\n") +usb_actconfig_attr (bMaxPower, 2, "%3dmA\n") + +/* configuration value is always present, and r/w */ +usb_actconfig_show(bConfigurationValue, 1, "%u\n"); + +static ssize_t +set_bConfigurationValue (struct device *dev, const char *buf, size_t count) +{ + struct usb_device *udev = udev = to_usb_device (dev); + int config, value; + + if (sscanf (buf, "%u", &config) != 1 || config > 255) + return -EINVAL; + down(&udev->serialize); + value = usb_set_configuration (udev, config); + up(&udev->serialize); + return (value < 0) ? value : count; +} + +static DEVICE_ATTR(bConfigurationValue, S_IRUGO | S_IWUSR, + show_bConfigurationValue, set_bConfigurationValue); + +/* String fields */ +#define usb_string_attr(name, field) \ +static ssize_t show_##name(struct device *dev, char *buf) \ +{ \ + struct usb_device *udev; \ + int len; \ + \ + udev = to_usb_device (dev); \ + len = usb_string(udev, udev->descriptor.field, buf, PAGE_SIZE); \ + if (len < 0) \ + return 0; \ + buf[len] = '\n'; \ + buf[len+1] = 0; \ + return len+1; \ +} \ +static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL); + +usb_string_attr(product, iProduct); +usb_string_attr(manufacturer, iManufacturer); +usb_string_attr(serial, iSerialNumber); + +static ssize_t +show_speed (struct device *dev, char *buf) +{ + struct usb_device *udev; + char *speed; + + udev = to_usb_device (dev); + + switch (udev->speed) { + case USB_SPEED_LOW: + speed = "1.5"; + break; + case USB_SPEED_UNKNOWN: + case USB_SPEED_FULL: + speed = "12"; + break; + case USB_SPEED_HIGH: + speed = "480"; + break; + default: + speed = "unknown"; + } + return sprintf (buf, "%s\n", speed); +} +static DEVICE_ATTR(speed, S_IRUGO, show_speed, NULL); + +static ssize_t +show_devnum (struct device *dev, char *buf) +{ + struct usb_device *udev; + + udev = to_usb_device (dev); + return sprintf (buf, "%d\n", udev->devnum); +} +static DEVICE_ATTR(devnum, S_IRUGO, show_devnum, NULL); + +static ssize_t +show_version (struct device *dev, char *buf) +{ + struct usb_device *udev; + + udev = to_usb_device (dev); + return sprintf (buf, "%2x.%02x\n", udev->descriptor.bcdUSB >> 8, + udev->descriptor.bcdUSB & 0xff); +} +static DEVICE_ATTR(version, S_IRUGO, show_version, NULL); + +static ssize_t +show_maxchild (struct device *dev, char *buf) +{ + struct usb_device *udev; + + udev = to_usb_device (dev); + return sprintf (buf, "%d\n", udev->maxchild); +} +static DEVICE_ATTR(maxchild, S_IRUGO, show_maxchild, NULL); + +/* Descriptor fields */ +#define usb_descriptor_attr(field, format_string) \ +static ssize_t \ +show_##field (struct device *dev, char *buf) \ +{ \ + struct usb_device *udev; \ + \ + udev = to_usb_device (dev); \ + return sprintf (buf, format_string, udev->descriptor.field); \ +} \ +static DEVICE_ATTR(field, S_IRUGO, show_##field, NULL); + +usb_descriptor_attr (idVendor, "%04x\n") +usb_descriptor_attr (idProduct, "%04x\n") +usb_descriptor_attr (bcdDevice, "%04x\n") +usb_descriptor_attr (bDeviceClass, "%02x\n") +usb_descriptor_attr (bDeviceSubClass, "%02x\n") +usb_descriptor_attr (bDeviceProtocol, "%02x\n") +usb_descriptor_attr (bNumConfigurations, "%d\n") + + +void usb_create_driverfs_dev_files (struct usb_device *udev) +{ + struct device *dev = &udev->dev; + + /* current configuration's attributes */ + device_create_file (dev, &dev_attr_bNumInterfaces); + device_create_file (dev, &dev_attr_bConfigurationValue); + device_create_file (dev, &dev_attr_bmAttributes); + device_create_file (dev, &dev_attr_bMaxPower); + + /* device attributes */ + device_create_file (dev, &dev_attr_idVendor); + device_create_file (dev, &dev_attr_idProduct); + device_create_file (dev, &dev_attr_bcdDevice); + device_create_file (dev, &dev_attr_bDeviceClass); + device_create_file (dev, &dev_attr_bDeviceSubClass); + device_create_file (dev, &dev_attr_bDeviceProtocol); + device_create_file (dev, &dev_attr_bNumConfigurations); + + /* speed varies depending on how you connect the device */ + device_create_file (dev, &dev_attr_speed); + // FIXME iff there are other speed configs, show how many + + if (udev->descriptor.iManufacturer) + device_create_file (dev, &dev_attr_manufacturer); + if (udev->descriptor.iProduct) + device_create_file (dev, &dev_attr_product); + if (udev->descriptor.iSerialNumber) + device_create_file (dev, &dev_attr_serial); + + device_create_file (dev, &dev_attr_devnum); + device_create_file (dev, &dev_attr_version); + device_create_file (dev, &dev_attr_maxchild); +} + +/* Interface fields */ +#define usb_intf_attr(field, format_string) \ +static ssize_t \ +show_##field (struct device *dev, char *buf) \ +{ \ + struct usb_interface *intf = to_usb_interface (dev); \ + \ + return sprintf (buf, format_string, intf->cur_altsetting->desc.field); \ +} \ +static DEVICE_ATTR(field, S_IRUGO, show_##field, NULL); + +usb_intf_attr (bInterfaceNumber, "%02x\n") +usb_intf_attr (bAlternateSetting, "%2d\n") +usb_intf_attr (bNumEndpoints, "%02x\n") +usb_intf_attr (bInterfaceClass, "%02x\n") +usb_intf_attr (bInterfaceSubClass, "%02x\n") +usb_intf_attr (bInterfaceProtocol, "%02x\n") +usb_intf_attr (iInterface, "%02x\n") + +void usb_create_driverfs_intf_files (struct usb_interface *intf) +{ + device_create_file (&intf->dev, &dev_attr_bInterfaceNumber); + device_create_file (&intf->dev, &dev_attr_bAlternateSetting); + device_create_file (&intf->dev, &dev_attr_bNumEndpoints); + device_create_file (&intf->dev, &dev_attr_bInterfaceClass); + device_create_file (&intf->dev, &dev_attr_bInterfaceSubClass); + device_create_file (&intf->dev, &dev_attr_bInterfaceProtocol); + device_create_file (&intf->dev, &dev_attr_iInterface); +} diff --git a/fs/intermezzo/Makefile b/fs/intermezzo/Makefile new file mode 100644 index 000000000..260c7af24 --- /dev/null +++ b/fs/intermezzo/Makefile @@ -0,0 +1,11 @@ +# +# Makefile 1.00 Peter Braam +# + +obj-$(CONFIG_INTERMEZZO_FS) += intermezzo.o + +intermezzo-objs := cache.o dcache.o dir.o ext_attr.o file.o fileset.o \ + inode.o journal.o journal_ext2.o journal_ext3.o \ + journal_obdfs.o journal_reiserfs.o journal_tmpfs.o journal_xfs.o \ + kml_reint.o kml_unpack.o methods.o presto.o psdev.o replicator.o \ + super.o sysctl.o upcall.o vfs.o diff --git a/fs/intermezzo/cache.c b/fs/intermezzo/cache.c new file mode 100644 index 000000000..f97bc164d --- /dev/null +++ b/fs/intermezzo/cache.c @@ -0,0 +1,207 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +/* + This file contains the routines associated with managing a + cache of files for InterMezzo. These caches have two reqs: + - need to be found fast so they are hashed by the device, + with an attempt to have collision chains of length 1. + The methods for the cache are set up in methods. +*/ + +extern kmem_cache_t * presto_dentry_slab; + +/* the intent of this hash is to have collision chains of length 1 */ +#define CACHES_BITS 8 +#define CACHES_SIZE (1 << CACHES_BITS) +#define CACHES_MASK CACHES_SIZE - 1 +static struct list_head presto_caches[CACHES_SIZE]; + +static inline int presto_cache_hash(struct super_block *s) +{ + return (CACHES_MASK) & ((unsigned long)s >> L1_CACHE_SHIFT); +} + +inline void presto_cache_add(struct presto_cache *cache) +{ + list_add(&cache->cache_chain, + &presto_caches[presto_cache_hash(cache->cache_sb)]); +} + +inline void presto_cache_init_hash(void) +{ + int i; + for ( i = 0; i < CACHES_SIZE; i++ ) { + INIT_LIST_HEAD(&presto_caches[i]); + } +} + +int izo_ioctl_packlen(struct izo_ioctl_data *data) +{ + int len = sizeof(struct izo_ioctl_data); + len += size_round(data->ioc_inllen1); + len += size_round(data->ioc_inllen2); + return len; +} + +/* map a device to a cache */ +struct presto_cache *presto_cache_find(struct super_block *s) +{ + struct presto_cache *cache; + struct list_head *lh, *tmp; + + lh = tmp = &(presto_caches[presto_cache_hash(s)]); + while ( (tmp = lh->next) != lh ) { + cache = list_entry(tmp, struct presto_cache, cache_chain); + if (cache->cache_sb == s) + return cache; + } + return NULL; +} + + +/* map an inode to a cache */ +struct presto_cache *presto_get_cache(struct inode *inode) +{ + struct presto_cache *cache; + ENTRY; + /* find the correct presto_cache here, based on the device */ + cache = presto_cache_find(inode->i_sb); + if ( !cache ) { + CERROR("WARNING: no presto cache for %s, ino %ld\n", + inode->i_sb->s_id, inode->i_ino); + EXIT; + return NULL; + } + EXIT; + return cache; +} + +/* another debugging routine: check fs is InterMezzo fs */ +int presto_ispresto(struct inode *inode) +{ + struct presto_cache *cache; + + if ( !inode ) + return 0; + cache = presto_get_cache(inode); + if ( !cache ) + return 0; + return inode->i_sb == cache->cache_sb; +} + +/* setup a cache structure when we need one */ +struct presto_cache *presto_cache_init(void) +{ + struct presto_cache *cache; + + PRESTO_ALLOC(cache, sizeof(struct presto_cache)); + if ( cache ) { + memset(cache, 0, sizeof(struct presto_cache)); + INIT_LIST_HEAD(&cache->cache_chain); + INIT_LIST_HEAD(&cache->cache_fset_list); + cache->cache_lock = SPIN_LOCK_UNLOCKED; + cache->cache_reserved = 0; + } + return cache; +} + +/* free a cache structure and all of the memory it is pointing to */ +inline void presto_free_cache(struct presto_cache *cache) +{ + if (!cache) + return; + + list_del(&cache->cache_chain); + if (cache->cache_sb && cache->cache_sb->s_root && + presto_d2d(cache->cache_sb->s_root)) { + kmem_cache_free(presto_dentry_slab, + presto_d2d(cache->cache_sb->s_root)); + cache->cache_sb->s_root->d_fsdata = NULL; + } + + PRESTO_FREE(cache, sizeof(struct presto_cache)); +} + +int presto_reserve_space(struct presto_cache *cache, loff_t req) +{ + struct filter_fs *filter; + loff_t avail; + struct super_block *sb = cache->cache_sb; + filter = cache->cache_filter; + if (!filter ) { + EXIT; + return 0; + } + if (!filter->o_trops ) { + EXIT; + return 0; + } + if (!filter->o_trops->tr_avail ) { + EXIT; + return 0; + } + + spin_lock(&cache->cache_lock); + avail = filter->o_trops->tr_avail(cache, sb); + CDEBUG(D_SUPER, "ESC::%ld +++> %ld \n", (long) cache->cache_reserved, + (long) (cache->cache_reserved + req)); + CDEBUG(D_SUPER, "ESC::Avail::%ld \n", (long) avail); + if (req + cache->cache_reserved > avail) { + spin_unlock(&cache->cache_lock); + EXIT; + return -ENOSPC; + } + cache->cache_reserved += req; + spin_unlock(&cache->cache_lock); + + EXIT; + return 0; +} + +void presto_release_space(struct presto_cache *cache, loff_t req) +{ + CDEBUG(D_SUPER, "ESC::%ld ---> %ld \n", (long) cache->cache_reserved, + (long) (cache->cache_reserved - req)); + spin_lock(&cache->cache_lock); + cache->cache_reserved -= req; + spin_unlock(&cache->cache_lock); +} diff --git a/fs/intermezzo/dcache.c b/fs/intermezzo/dcache.c new file mode 100644 index 000000000..8f8e2c516 --- /dev/null +++ b/fs/intermezzo/dcache.c @@ -0,0 +1,342 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Original version: Copyright (C) 1996 P. Braam and M. Callahan + * Rewritten for Linux 2.1. Copyright (C) 1997 Carnegie Mellon University + * d_fsdata and NFS compatiblity fixes Copyright (C) 2001 Tacit Networks, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Directory operations for InterMezzo filesystem + */ + +/* inode dentry alias list walking code adapted from linux/fs/dcache.c + * + * fs/dcache.c + * + * (C) 1997 Thomas Schoebel-Theuer, + * with heavy changes by Linus Torvalds + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "intermezzo_fs.h" + +kmem_cache_t * presto_dentry_slab; + +/* called when a cache lookup succeeds */ +static int presto_d_revalidate(struct dentry *de, struct nameidata *nd) +{ + struct inode *inode = de->d_inode; + struct presto_file_set * root_fset; + + ENTRY; + if (!inode) { + EXIT; + return 0; + } + + if (is_bad_inode(inode)) { + EXIT; + return 0; + } + + if (!presto_d2d(de)) { + presto_set_dd(de); + } + + if (!presto_d2d(de)) { + EXIT; + return 0; + } + + root_fset = presto_d2d(de->d_inode->i_sb->s_root)->dd_fset; + if (root_fset->fset_flags & FSET_FLAT_BRANCH && + (presto_d2d(de)->dd_fset != root_fset )) { + presto_d2d(de)->dd_fset = root_fset; + } + + EXIT; + return 1; + +#if 0 + /* The following is needed for metadata on demand. */ + if ( S_ISDIR(inode->i_mode) ) { + EXIT; + return (presto_chk(de, PRESTO_DATA) && + (presto_chk(de, PRESTO_ATTR))); + } else { + EXIT; + return presto_chk(de, PRESTO_ATTR); + } +#endif +} + +static void presto_d_release(struct dentry *dentry) +{ + if (!presto_d2d(dentry)) { + /* This can happen for dentries from NFSd */ + return; + } + presto_d2d(dentry)->dd_count--; + + if (!presto_d2d(dentry)->dd_count) { + kmem_cache_free(presto_dentry_slab, presto_d2d(dentry)); + dentry->d_fsdata = NULL; + } +} + +struct dentry_operations presto_dentry_ops = +{ + .d_revalidate = presto_d_revalidate, + .d_release = presto_d_release +}; + +static inline int presto_is_dentry_ROOT (struct dentry *dentry) +{ + return(dentry_name_cmp(dentry,"ROOT") && + !dentry_name_cmp(dentry->d_parent,".intermezzo")); +} + +static struct presto_file_set* presto_try_find_fset(struct dentry* dentry, + int *is_under_d_intermezzo) +{ + struct dentry* temp_dentry; + struct presto_dentry_data *d_data; + int found_root=0; + + ENTRY; + CDEBUG(D_FSDATA, "finding fileset for %p:%s\n", dentry, + dentry->d_name.name); + + *is_under_d_intermezzo = 0; + + /* walk up through the branch to get the fileset */ + /* The dentry we are passed presumably does not have the correct + * fset information. However, we still want to start walking up + * the branch from this dentry to get our found_root and + * is_under_d_intermezzo decisions correct + */ + for (temp_dentry = dentry ; ; temp_dentry = temp_dentry->d_parent) { + CDEBUG(D_FSDATA, "--->dentry %p:%*s\n", temp_dentry, + temp_dentry->d_name.len,temp_dentry->d_name.name); + if (presto_is_dentry_ROOT(temp_dentry)) + found_root = 1; + if (!found_root && + dentry_name_cmp(temp_dentry, ".intermezzo")) { + *is_under_d_intermezzo = 1; + } + d_data = presto_d2d(temp_dentry); + if (d_data) { + /* If we found a "ROOT" dentry while walking up the + * branch, we will journal regardless of whether + * we are under .intermezzo or not. + * If we are already under d_intermezzo don't reverse + * the decision here...even if we found a "ROOT" + * dentry above .intermezzo (if we were ever to + * modify the directory structure). + */ + if (!*is_under_d_intermezzo) + *is_under_d_intermezzo = !found_root && + (d_data->dd_flags & PRESTO_DONT_JOURNAL); + EXIT; + return d_data->dd_fset; + } + if (temp_dentry->d_parent == temp_dentry) { + break; + } + } + EXIT; + return NULL; +} + +/* Only call this function on positive dentries */ +static struct presto_dentry_data* presto_try_find_alias_with_dd ( + struct dentry* dentry) +{ + struct inode *inode=dentry->d_inode; + struct list_head *head, *next, *tmp; + struct dentry *tmp_dentry; + + /* Search through the alias list for dentries with d_fsdata */ + spin_lock(&dcache_lock); + head = &inode->i_dentry; + next = inode->i_dentry.next; + while (next != head) { + tmp = next; + next = tmp->next; + tmp_dentry = list_entry(tmp, struct dentry, d_alias); + if (!presto_d2d(tmp_dentry)) { + spin_unlock(&dcache_lock); + return presto_d2d(tmp_dentry); + } + } + spin_unlock(&dcache_lock); + return NULL; +} + +/* Only call this function on positive dentries */ +static void presto_set_alias_dd (struct dentry *dentry, + struct presto_dentry_data* dd) +{ + struct inode *inode=dentry->d_inode; + struct list_head *head, *next, *tmp; + struct dentry *tmp_dentry; + + /* Set d_fsdata for this dentry */ + dd->dd_count++; + dentry->d_fsdata = dd; + + /* Now set d_fsdata for all dentries in the alias list. */ + spin_lock(&dcache_lock); + head = &inode->i_dentry; + next = inode->i_dentry.next; + while (next != head) { + tmp = next; + next = tmp->next; + tmp_dentry = list_entry(tmp, struct dentry, d_alias); + if (!presto_d2d(tmp_dentry)) { + dd->dd_count++; + tmp_dentry->d_fsdata = dd; + } + } + spin_unlock(&dcache_lock); + return; +} + +inline struct presto_dentry_data *izo_alloc_ddata(void) +{ + struct presto_dentry_data *dd; + + dd = kmem_cache_alloc(presto_dentry_slab, SLAB_KERNEL); + if (dd == NULL) { + CERROR("IZO: out of memory trying to allocate presto_dentry_data\n"); + return NULL; + } + memset(dd, 0, sizeof(*dd)); + dd->dd_count = 1; + + return dd; +} + +/* This uses the BKL! */ +int presto_set_dd(struct dentry * dentry) +{ + struct presto_file_set *fset; + struct presto_dentry_data *dd; + int is_under_d_izo; + int error=0; + + ENTRY; + + if (!dentry) + BUG(); + + lock_kernel(); + + /* Did we lose a race? */ + if (dentry->d_fsdata) { + CERROR("dentry %p already has d_fsdata set\n", dentry); + if (dentry->d_inode) + CERROR(" inode: %ld\n", dentry->d_inode->i_ino); + EXIT; + goto out_unlock; + } + + if (dentry->d_inode != NULL) { + /* NFSd runs find_fh_dentry which instantiates disconnected + * dentries which are then connected without a lookup(). + * So it is possible to have connected dentries that do not + * have d_fsdata set. So we walk the list trying to find + * an alias which has its d_fsdata set and then use that + * for all the other dentries as well. + * - SHP,Vinny. + */ + + /* If there is an alias with d_fsdata use it. */ + if ((dd = presto_try_find_alias_with_dd (dentry))) { + presto_set_alias_dd (dentry, dd); + EXIT; + goto out_unlock; + } + } else { + /* Negative dentry */ + CDEBUG(D_FSDATA,"negative dentry %p: %*s\n", dentry, + dentry->d_name.len, dentry->d_name.name); + } + + /* No pre-existing d_fsdata, we need to construct one. + * First, we must walk up the tree to find the fileset + * If a fileset can't be found, we leave a null fsdata + * and return EROFS to indicate that we can't journal + * updates. + */ + fset = presto_try_find_fset (dentry, &is_under_d_izo); + if (!fset) { +#ifdef PRESTO_NO_NFS + CERROR("No fileset for dentry %p: %*s\n", dentry, + dentry->d_name.len, dentry->d_name.name); +#endif + error = -EROFS; + EXIT; + goto out_unlock; + } + + dentry->d_fsdata = izo_alloc_ddata(); + if (!presto_d2d(dentry)) { + CERROR ("InterMezzo: out of memory allocating d_fsdata\n"); + error = -ENOMEM; + goto out_unlock; + } + presto_d2d(dentry)->dd_fset = fset; + if (is_under_d_izo) + presto_d2d(dentry)->dd_flags |= PRESTO_DONT_JOURNAL; + EXIT; + +out_unlock: + CDEBUG(D_FSDATA,"presto_set_dd dentry %p: %*s, d_fsdata %p\n", + dentry, dentry->d_name.len, dentry->d_name.name, + dentry->d_fsdata); + unlock_kernel(); + return error; +} + +int presto_init_ddata_cache(void) +{ + ENTRY; + presto_dentry_slab = + kmem_cache_create("presto_cache", + sizeof(struct presto_dentry_data), 0, + SLAB_HWCACHE_ALIGN, NULL, + NULL); + EXIT; + return (presto_dentry_slab != NULL); +} + +void presto_cleanup_ddata_cache(void) +{ + kmem_cache_destroy(presto_dentry_slab); +} diff --git a/fs/intermezzo/dir.c b/fs/intermezzo/dir.c new file mode 100644 index 000000000..3ec2e696a --- /dev/null +++ b/fs/intermezzo/dir.c @@ -0,0 +1,1333 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 Tacitus Systems + * Copyright (C) 2000 Peter J. Braam + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +static inline void presto_relock_sem(struct inode *dir) +{ + /* the lock from sys_mkdir / lookup_create */ + down(&dir->i_sem); + /* the rest is done by the do_{create,mkdir, ...} */ +} + +static inline void presto_relock_other(struct inode *dir) +{ + /* vfs_mkdir locks */ + // down(&dir->i_zombie); + //lock_kernel(); +} + +static inline void presto_fulllock(struct inode *dir) +{ + /* the lock from sys_mkdir / lookup_create */ + down(&dir->i_sem); + /* vfs_mkdir locks */ + // down(&dir->i_zombie); + //lock_kernel(); +} + +static inline void presto_unlock(struct inode *dir) +{ + /* vfs_mkdir locks */ + //unlock_kernel(); + // up(&dir->i_zombie); + /* the lock from sys_mkdir / lookup_create */ + up(&dir->i_sem); +} + + +/* + * these are initialized in super.c + */ +extern int presto_permission(struct inode *inode, int mask, struct nameidata *nd); +static int izo_authorized_uid; + +int izo_dentry_is_ilookup(struct dentry *dentry, ino_t *id, + unsigned int *generation) +{ + char tmpname[64]; + char *next; + + ENTRY; + /* prefix is 7 characters: '...ino:' */ + if ( dentry->d_name.len < 7 || dentry->d_name.len > 64 || + memcmp(dentry->d_name.name, PRESTO_ILOOKUP_MAGIC, 7) != 0 ) { + EXIT; + return 0; + } + + memcpy(tmpname, dentry->d_name.name + 7, dentry->d_name.len - 7); + *(tmpname + dentry->d_name.len - 7) = '\0'; + + /* name is of the form ...ino:: */ + *id = simple_strtoul(tmpname, &next, 16); + if ( *next == PRESTO_ILOOKUP_SEP ) { + *generation = simple_strtoul(next + 1, 0, 16); + CDEBUG(D_INODE, "ino string: %s, Id = %lx (%lu), " + "generation %x (%d)\n", + tmpname, *id, *id, *generation, *generation); + EXIT; + return 1; + } else { + EXIT; + return 0; + } +} + +struct dentry *presto_tmpfs_ilookup(struct inode *dir, + struct dentry *dentry, + ino_t ino, + unsigned int generation) +{ + return dentry; +} + + +inline int presto_can_ilookup(void) +{ + return (current->euid == izo_authorized_uid || + capable(CAP_DAC_READ_SEARCH)); +} + +struct dentry *presto_iget_ilookup(struct inode *dir, + struct dentry *dentry, + ino_t ino, + unsigned int generation) +{ + struct inode *inode; + int error; + + ENTRY; + + if ( !presto_can_ilookup() ) { + CERROR("ilookup denied: euid %u, authorized_uid %u\n", + current->euid, izo_authorized_uid); + return ERR_PTR(-EPERM); + } + error = -ENOENT; + inode = iget(dir->i_sb, ino); + if (!inode) { + CERROR("fatal: NULL inode ino %lu\n", ino); + goto cleanup_iput; + } + if (is_bad_inode(inode) || inode->i_nlink == 0) { + CERROR("fatal: bad inode ino %lu, links %d\n", ino, inode->i_nlink); + goto cleanup_iput; + } + if (inode->i_generation != generation) { + CERROR("fatal: bad generation %u (want %u)\n", + inode->i_generation, generation); + goto cleanup_iput; + } + + d_instantiate(dentry, inode); + dentry->d_flags |= DCACHE_DISCONNECTED; /* NFS hack */ + + EXIT; + return NULL; + +cleanup_iput: + if (inode) + iput(inode); + return ERR_PTR(error); +} + +struct dentry *presto_add_ilookup_dentry(struct dentry *parent, + struct dentry *real) +{ + struct inode *inode = real->d_inode; + struct dentry *de; + char buf[32]; + char *ptr = buf; + struct dentry *inodir; + struct presto_dentry_data *dd; + + inodir = lookup_one_len("..iopen..", parent, strlen("..iopen..")); + if (!inodir || IS_ERR(inodir) || !inodir->d_inode ) { + CERROR("%s: bad ..iopen.. lookup\n", __FUNCTION__); + return NULL; + } + inodir->d_inode->i_op = &presto_dir_iops; + + snprintf(ptr, 32, "...ino:%lx:%x", inode->i_ino, inode->i_generation); + + de = lookup_one_len(ptr, inodir, strlen(ptr)); + if (!de || IS_ERR(de)) { + CERROR("%s: bad ...ino lookup %ld\n", + __FUNCTION__, PTR_ERR(de)); + dput(inodir); + return NULL; + } + + dd = presto_d2d(real); + if (!dd) + BUG(); + + /* already exists */ + if (de->d_inode) + BUG(); +#if 0 + if (de->d_inode != inode ) { + CERROR("XX de->d_inode %ld, inode %ld\n", + de->d_inode->i_ino, inode->i_ino); + BUG(); + } + if (dd->dd_inodentry) { + CERROR("inodentry exists %ld \n", inode->i_ino); + BUG(); + } + dput(inodir); + return de; + } +#endif + + if (presto_d2d(de)) + BUG(); + + atomic_inc(&inode->i_count); + de->d_op = &presto_dentry_ops; + d_add(de, inode); + if (!de->d_op) + CERROR("DD: no ops dentry %p, dd %p\n", de, dd); + dd->dd_inodentry = de; + dd->dd_count++; + de->d_fsdata = dd; + + dput(inodir); + return de; +} + +struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +{ + int rc = 0; + struct dentry *de; + struct presto_cache *cache; + int minor; + ino_t ino; + unsigned int generation; + struct inode_operations *iops; + int is_ilookup = 0; + + ENTRY; + cache = presto_get_cache(dir); + if (cache == NULL) { + CERROR("InterMezzo BUG: no cache in presto_lookup " + "(dir ino: %ld)!\n", dir->i_ino); + EXIT; + return NULL; + } + minor = presto_c2m(cache); + + iops = filter_c2cdiops(cache->cache_filter); + if (!iops || !iops->lookup) { + CERROR("InterMezzo BUG: filesystem has no lookup\n"); + EXIT; + return NULL; + } + + + CDEBUG(D_CACHE, "dentry %p, dir ino: %ld, name: %*s, islento: %d\n", + dentry, dir->i_ino, dentry->d_name.len, dentry->d_name.name, + ISLENTO(minor)); + + if (dentry->d_fsdata) + CERROR("DD -- BAD dentry %p has data\n", dentry); + + dentry->d_fsdata = NULL; +#if 0 + if (ext2_check_for_iopen(dir, dentry)) + de = NULL; + else { +#endif + if ( izo_dentry_is_ilookup(dentry, &ino, &generation) ) { + de = cache->cache_filter->o_trops->tr_ilookup + (dir, dentry, ino, generation); + is_ilookup = 1; + } else + de = iops->lookup(dir, dentry, nd); +#if 0 + } +#endif + + if ( IS_ERR(de) ) { + CERROR("dentry lookup error %ld\n", PTR_ERR(de)); + return de; + } + + /* some file systems have no read_inode: set methods here */ + if (dentry->d_inode) + presto_set_ops(dentry->d_inode, cache->cache_filter); + + filter_setup_dentry_ops(cache->cache_filter, + dentry->d_op, &presto_dentry_ops); + dentry->d_op = filter_c2udops(cache->cache_filter); + + /* In lookup we will tolerate EROFS return codes from presto_set_dd + * to placate NFS. EROFS indicates that a fileset was not found but + * we should still be able to continue through a lookup. + * Anything else is a hard error and must be returned to VFS. */ + if (!is_ilookup) + rc = presto_set_dd(dentry); + if (rc && rc != -EROFS) { + CERROR("presto_set_dd failed (dir %ld, name %*s): %d\n", + dir->i_ino, dentry->d_name.len, dentry->d_name.name, rc); + return ERR_PTR(rc); + } + + EXIT; + return NULL; +} + +static inline int presto_check_set_fsdata (struct dentry *de) +{ + if (presto_d2d(de) == NULL) { +#ifdef PRESTO_NO_NFS + CERROR("dentry without fsdata: %p: %*s\n", de, + de->d_name.len, de->d_name.name); + BUG(); +#endif + return presto_set_dd (de); + } + + return 0; +} + +int presto_setattr(struct dentry *de, struct iattr *iattr) +{ + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct lento_vfs_context info = { 0, {0}, 0 }; + + ENTRY; + + error = presto_prep(de, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + if (!iattr->ia_valid) + CDEBUG(D_INODE, "presto_setattr: iattr is not valid\n"); + + CDEBUG(D_INODE, "valid %#x, mode %#o, uid %u, gid %u, size %Lu, " + "atime %lu mtime %lu ctime %lu flags %d\n", + iattr->ia_valid, iattr->ia_mode, iattr->ia_uid, iattr->ia_gid, + iattr->ia_size, iattr->ia_atime.tv_sec, iattr->ia_mtime.tv_sec, + iattr->ia_ctime.tv_sec, iattr->ia_attr_flags); + + if ( presto_get_permit(de->d_inode) < 0 ) { + EXIT; + return -EROFS; + } + + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_setattr(fset, de, iattr, &info); + presto_put_permit(de->d_inode); + return error; +} + +/* + * Now the meat: the fs operations that require journaling + * + * + * XXX: some of these need modifications for hierarchical filesets + */ + +int presto_prep(struct dentry *dentry, struct presto_cache **cache, + struct presto_file_set **fset) +{ + int rc; + + /* NFS might pass us dentries which have not gone through lookup. + * Test and set d_fsdata for such dentries + */ + rc = presto_check_set_fsdata (dentry); + if (rc) return rc; + + *fset = presto_fset(dentry); + if ( *fset == NULL ) { + CERROR("No file set for dentry at %p: %*s\n", dentry, + dentry->d_name.len, dentry->d_name.name); + return -EROFS; + } + + *cache = (*fset)->fset_cache; + if ( *cache == NULL ) { + CERROR("PRESTO: BAD, BAD: cannot find cache\n"); + return -EBADF; + } + + CDEBUG(D_PIOCTL, "---> cache flags %x, fset flags %x\n", + (*cache)->cache_flags, (*fset)->fset_flags); + if( presto_is_read_only(*fset) ) { + CERROR("PRESTO: cannot modify read-only fileset, minor %d.\n", + presto_c2m(*cache)); + return -EROFS; + } + return 0; +} + +static int presto_create(struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) +{ + int error; + struct presto_cache *cache; + struct dentry *parent = dentry->d_parent; + struct lento_vfs_context info; + struct presto_file_set *fset; + + ENTRY; + error = presto_check_set_fsdata(dentry); + if ( error ) { + EXIT; + return error; + } + + error = presto_prep(dentry->d_parent, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + presto_unlock(dir); + + /* Does blocking and non-blocking behavious need to be + checked for. Without blocking (return 1), the permit + was acquired without reintegration + */ + if ( presto_get_permit(dir) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + presto_relock_sem(dir); + parent = dentry->d_parent; + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_create(fset, parent, dentry, mode, &info); + + presto_relock_other(dir); + presto_put_permit(dir); + EXIT; + return error; +} + +static int presto_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +{ + int error; + struct presto_cache *cache, *new_cache; + struct presto_file_set *fset, *new_fset; + struct dentry *parent = new_dentry->d_parent; + struct lento_vfs_context info; + + ENTRY; + error = presto_prep(old_dentry, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + error = presto_check_set_fsdata(new_dentry); + if ( error ) { + EXIT; + return error; + } + + error = presto_prep(new_dentry->d_parent, &new_cache, &new_fset); + if ( error ) { + EXIT; + return error; + } + + if (fset != new_fset) { + EXIT; + return -EXDEV; + } + + presto_unlock(dir); + if ( presto_get_permit(old_dentry->d_inode) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + if ( presto_get_permit(dir) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + presto_relock_sem(dir); + parent = new_dentry->d_parent; + + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_link(fset, old_dentry, parent, + new_dentry, &info); + +#if 0 + /* XXX for links this is not right */ + if (cache->cache_filter->o_trops->tr_add_ilookup ) { + struct dentry *d; + d = cache->cache_filter->o_trops->tr_add_ilookup + (dir->i_sb->s_root, new_dentry, 1); + } +#endif + + presto_relock_other(dir); + presto_put_permit(dir); + presto_put_permit(old_dentry->d_inode); + return error; +} + +static int presto_mkdir(struct inode * dir, struct dentry * dentry, int mode) +{ + int error; + struct presto_file_set *fset; + struct presto_cache *cache; + struct dentry *parent = dentry->d_parent; + struct lento_vfs_context info; + + ENTRY; + + error = presto_check_set_fsdata(dentry); + if ( error ) { + EXIT; + return error; + } + + error = presto_prep(dentry->d_parent, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + presto_unlock(dir); + + if ( presto_get_permit(dir) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + + presto_relock_sem(dir); + parent = dentry->d_parent; + error = presto_do_mkdir(fset, parent, dentry, mode, &info); + presto_relock_other(dir); + presto_put_permit(dir); + return error; +} + + + +static int presto_symlink(struct inode *dir, struct dentry *dentry, + const char *name) +{ + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct dentry *parent = dentry->d_parent; + struct lento_vfs_context info; + + ENTRY; + error = presto_check_set_fsdata(dentry); + if ( error ) { + EXIT; + return error; + } + + error = presto_prep(dentry->d_parent, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + presto_unlock(dir); + if ( presto_get_permit(dir) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + presto_relock_sem(dir); + parent = dentry->d_parent; + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_symlink(fset, parent, dentry, name, &info); + presto_relock_other(dir); + presto_put_permit(dir); + return error; +} + +int presto_unlink(struct inode *dir, struct dentry *dentry) +{ + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct dentry *parent = dentry->d_parent; + struct lento_vfs_context info; + + ENTRY; + error = presto_check_set_fsdata(dentry); + if ( error ) { + EXIT; + return error; + } + + error = presto_prep(dentry->d_parent, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + presto_unlock(dir); + if ( presto_get_permit(dir) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + presto_relock_sem(dir); + parent = dentry->d_parent; + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + + error = presto_do_unlink(fset, parent, dentry, &info); + + presto_relock_other(dir); + presto_put_permit(dir); + return error; +} + +static int presto_rmdir(struct inode *dir, struct dentry *dentry) +{ + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct dentry *parent = dentry->d_parent; + struct lento_vfs_context info; + + ENTRY; + CDEBUG(D_FILE, "prepping presto\n"); + error = presto_check_set_fsdata(dentry); + + if ( error ) { + EXIT; + return error; + } + + error = presto_prep(dentry->d_parent, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + CDEBUG(D_FILE, "unlocking\n"); + /* We need to dget() before the dput in double_unlock, to ensure we + * still have dentry references. double_lock doesn't do dget for us. + */ + if (d_unhashed(dentry)) + d_rehash(dentry); + // double_up(&dir->i_zombie, &dentry->d_inode->i_zombie); + up(&dentry->d_inode->i_sem); + up(&dir->i_sem); + + CDEBUG(D_FILE, "getting permit\n"); + if ( presto_get_permit(parent->d_inode) < 0 ) { + EXIT; + down(&dir->i_sem); + down(&dentry->d_inode->i_sem); + // double_down(&dir->i_sem, &dentry->d_inode->i_sem); + // double_down(&dir->i_zombie, &dentry->d_inode->i_zombie); + + lock_kernel(); + return -EROFS; + } + CDEBUG(D_FILE, "locking\n"); + + down(&dir->i_sem); + down(&dentry->d_inode->i_sem); + parent = dentry->d_parent; + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_rmdir(fset, parent, dentry, &info); + presto_put_permit(parent->d_inode); + lock_kernel(); + EXIT; + return error; +} + +static int presto_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_t rdev) +{ + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct dentry *parent = dentry->d_parent; + struct lento_vfs_context info; + + if (!old_valid_dev(rdev)) + return -EINVAL; + + ENTRY; + error = presto_check_set_fsdata(dentry); + if ( error ) { + EXIT; + return error; + } + + error = presto_prep(dentry->d_parent, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + presto_unlock(dir); + if ( presto_get_permit(dir) < 0 ) { + EXIT; + presto_fulllock(dir); + return -EROFS; + } + + presto_relock_sem(dir); + parent = dentry->d_parent; + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = presto_do_mknod(fset, parent, dentry, mode, rdev, &info); + presto_relock_other(dir); + presto_put_permit(dir); + EXIT; + return error; +} + + + +// XXX this can be optimized: renamtes across filesets only require +// multiple KML records, but can locally be executed normally. +int presto_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + int error; + struct presto_cache *cache, *new_cache; + struct presto_file_set *fset, *new_fset; + struct lento_vfs_context info; + struct dentry *old_parent = old_dentry->d_parent; + struct dentry *new_parent = new_dentry->d_parent; + int triple; + + ENTRY; + error = presto_prep(old_dentry, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + error = presto_prep(new_parent, &new_cache, &new_fset); + if ( error ) { + EXIT; + return error; + } + + if ( fset != new_fset ) { + EXIT; + return -EXDEV; + } + + /* We need to do dget before the dput in double_unlock, to ensure we + * still have dentry references. double_lock doesn't do dget for us. + */ + + triple = (S_ISDIR(old_dentry->d_inode->i_mode) && new_dentry->d_inode)? + 1:0; + + unlock_rename(new_dentry->d_parent, old_dentry->d_parent); + + if ( presto_get_permit(old_dir) < 0 ) { + EXIT; + return -EROFS; + } + if ( presto_get_permit(new_dir) < 0 ) { + EXIT; + return -EROFS; + } + + lock_rename(new_dentry->d_parent, old_dentry->d_parent); + memset(&info, 0, sizeof(info)); + if (!ISLENTO(presto_c2m(cache))) + info.flags = LENTO_FL_KML; + info.flags |= LENTO_FL_IGNORE_TIME; + error = do_rename(fset, old_parent, old_dentry, new_parent, + new_dentry, &info); + + presto_put_permit(new_dir); + presto_put_permit(old_dir); + return error; +} + +/* basically this allows the ilookup processes access to all files for + * reading, while not making ilookup totally insecure. This could all + * go away if we could set the CAP_DAC_READ_SEARCH capability for the client. + */ +/* If posix acls are available, the underlying cache fs will export the + * appropriate permission function. Thus we do not worry here about ACLs + * or EAs. -SHP + */ +int presto_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + unsigned short mode = inode->i_mode; + struct presto_cache *cache; + int rc; + + ENTRY; + if ( presto_can_ilookup() && !(mask & S_IWOTH)) { + CDEBUG(D_CACHE, "ilookup on %ld OK\n", inode->i_ino); + EXIT; + return 0; + } + + cache = presto_get_cache(inode); + + if ( cache ) { + /* we only override the file/dir permission operations */ + struct inode_operations *fiops = filter_c2cfiops(cache->cache_filter); + struct inode_operations *diops = filter_c2cdiops(cache->cache_filter); + + if ( S_ISREG(mode) && fiops && fiops->permission ) { + EXIT; + return fiops->permission(inode, mask, nd); + } + if ( S_ISDIR(mode) && diops && diops->permission ) { + EXIT; + return diops->permission(inode, mask, nd); + } + } + + /* The cache filesystem doesn't have its own permission function, + * so we call the default one. + */ + rc = vfs_permission(inode, mask); + + EXIT; + return rc; +} + + +int presto_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + char buf[1024]; + struct izo_ioctl_data *data = NULL; + struct presto_dentry_data *dd; + int rc; + + ENTRY; + + /* Try the filesystem's ioctl first, and return if it succeeded. */ + dd = presto_d2d(file->f_dentry); + if (dd && dd->dd_fset) { + int (*cache_ioctl)(struct inode *, struct file *, unsigned int, unsigned long ) = filter_c2cdfops(dd->dd_fset->fset_cache->cache_filter)->ioctl; + rc = -ENOTTY; + if (cache_ioctl) + rc = cache_ioctl(inode, file, cmd, arg); + if (rc != -ENOTTY) { + EXIT; + return rc; + } + } + + if (current->euid != 0 && current->euid != izo_authorized_uid) { + EXIT; + return -EPERM; + } + + memset(buf, 0, sizeof(buf)); + + if (izo_ioctl_getdata(buf, buf + 1024, (void *)arg)) { + CERROR("intermezzo ioctl: data error\n"); + return -EINVAL; + } + data = (struct izo_ioctl_data *)buf; + + switch(cmd) { + case IZO_IOC_REINTKML: { + int rc; + int cperr; + rc = kml_reint_rec(file, data); + + EXIT; + cperr = copy_to_user((char *)arg, data, sizeof(*data)); + if (cperr) { + CERROR("WARNING: cperr %d\n", cperr); + rc = -EFAULT; + } + return rc; + } + + case IZO_IOC_GET_RCVD: { + struct izo_rcvd_rec rec; + struct presto_file_set *fset; + int rc; + + fset = presto_fset(file->f_dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + rc = izo_rcvd_get(&rec, fset, data->ioc_uuid); + if (rc < 0) { + EXIT; + return rc; + } + + EXIT; + return copy_to_user((char *)arg, &rec, sizeof(rec))? -EFAULT : 0; + } + + case IZO_IOC_REPSTATUS: { + __u64 client_kmlsize; + struct izo_rcvd_rec *lr_client; + struct izo_rcvd_rec rec; + struct presto_file_set *fset; + int minor; + int rc; + + fset = presto_fset(file->f_dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + minor = presto_f2m(fset); + + client_kmlsize = data->ioc_kmlsize; + lr_client = (struct izo_rcvd_rec *) data->ioc_pbuf1; + + rc = izo_repstatus(fset, client_kmlsize, + lr_client, &rec); + if (rc < 0) { + EXIT; + return rc; + } + + EXIT; + return copy_to_user((char *)arg, &rec, sizeof(rec))? -EFAULT : 0; + } + + case IZO_IOC_GET_CHANNEL: { + struct presto_file_set *fset; + + fset = presto_fset(file->f_dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + + data->ioc_dev = fset->fset_cache->cache_psdev->uc_minor; + CDEBUG(D_PSDEV, "CHANNEL %d\n", data->ioc_dev); + EXIT; + return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0; + } + + case IZO_IOC_SET_IOCTL_UID: + izo_authorized_uid = data->ioc_uid; + EXIT; + return 0; + + case IZO_IOC_SET_PID: + rc = izo_psdev_setpid(data->ioc_dev); + EXIT; + return rc; + + case IZO_IOC_SET_CHANNEL: + rc = izo_psdev_setchannel(file, data->ioc_dev); + EXIT; + return rc; + + case IZO_IOC_GET_KML_SIZE: { + struct presto_file_set *fset; + __u64 kmlsize; + + fset = presto_fset(file->f_dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + + kmlsize = presto_kml_offset(fset) + fset->fset_kml_logical_off; + + EXIT; + return copy_to_user((char *)arg, &kmlsize, sizeof(kmlsize))?-EFAULT : 0; + } + + case IZO_IOC_PURGE_FILE_DATA: { + struct presto_file_set *fset; + + fset = presto_fset(file->f_dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + + rc = izo_purge_file(fset, data->ioc_inlbuf1); + EXIT; + return rc; + } + + case IZO_IOC_GET_FILEID: { + rc = izo_get_fileid(file, data); + EXIT; + if (rc) + return rc; + return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0; + } + + case IZO_IOC_SET_FILEID: { + rc = izo_set_fileid(file, data); + EXIT; + if (rc) + return rc; + return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0; + } + + case IZO_IOC_ADJUST_LML: { + struct lento_vfs_context *info; + info = (struct lento_vfs_context *)data->ioc_inlbuf1; + rc = presto_adjust_lml(file, info); + EXIT; + return rc; + } + + case IZO_IOC_CONNECT: { + struct presto_file_set *fset; + int minor; + + fset = presto_fset(file->f_dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + minor = presto_f2m(fset); + + rc = izo_upc_connect(minor, data->ioc_ino, + data->ioc_generation, data->ioc_uuid, + data->ioc_flags); + EXIT; + return rc; + } + + case IZO_IOC_GO_FETCH_KML: { + struct presto_file_set *fset; + int minor; + + fset = presto_fset(file->f_dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + minor = presto_f2m(fset); + + rc = izo_upc_go_fetch_kml(minor, fset->fset_name, + data->ioc_uuid, data->ioc_kmlsize); + EXIT; + return rc; + } + + case IZO_IOC_REVOKE_PERMIT: + if (data->ioc_flags) + rc = izo_revoke_permit(file->f_dentry, data->ioc_uuid); + else + rc = izo_revoke_permit(file->f_dentry, NULL); + EXIT; + return rc; + + case IZO_IOC_CLEAR_FSET: + rc = izo_clear_fsetroot(file->f_dentry); + EXIT; + return rc; + + case IZO_IOC_CLEAR_ALL_FSETS: { + struct presto_file_set *fset; + + fset = presto_fset(file->f_dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + + rc = izo_clear_all_fsetroots(fset->fset_cache); + EXIT; + return rc; + } + + case IZO_IOC_SET_FSET: + /* + * Mark this dentry as being a fileset root. + */ + rc = presto_set_fsetroot_from_ioc(file->f_dentry, + data->ioc_inlbuf1, + data->ioc_flags); + EXIT; + return rc; + + + case IZO_IOC_MARK: { + int res = 0; /* resulting flags - returned to user */ + int error; + + CDEBUG(D_DOWNCALL, "mark inode: %ld, and: %x, or: %x, what %d\n", + file->f_dentry->d_inode->i_ino, data->ioc_and_flag, + data->ioc_or_flag, data->ioc_mark_what); + + switch (data->ioc_mark_what) { + case MARK_DENTRY: + error = izo_mark_dentry(file->f_dentry, + data->ioc_and_flag, + data->ioc_or_flag, &res); + break; + case MARK_FSET: + error = izo_mark_fset(file->f_dentry, + data->ioc_and_flag, + data->ioc_or_flag, &res); + break; + case MARK_CACHE: + error = izo_mark_cache(file->f_dentry, + data->ioc_and_flag, + data->ioc_or_flag, &res); + break; + case MARK_GETFL: { + int fflags, cflags; + data->ioc_and_flag = 0xffffffff; + data->ioc_or_flag = 0; + error = izo_mark_dentry(file->f_dentry, + data->ioc_and_flag, + data->ioc_or_flag, &res); + if (error) + break; + error = izo_mark_fset(file->f_dentry, + data->ioc_and_flag, + data->ioc_or_flag, &fflags); + if (error) + break; + error = izo_mark_cache(file->f_dentry, + data->ioc_and_flag, + data->ioc_or_flag, + &cflags); + + if (error) + break; + data->ioc_and_flag = fflags; + data->ioc_or_flag = cflags; + break; + } + default: + error = -EINVAL; + } + + if (error) { + EXIT; + return error; + } + data->ioc_mark_what = res; + CDEBUG(D_DOWNCALL, "mark inode: %ld, and: %x, or: %x, what %x\n", + file->f_dentry->d_inode->i_ino, data->ioc_and_flag, + data->ioc_or_flag, data->ioc_mark_what); + + EXIT; + return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0; + } +#if 0 + case IZO_IOC_CLIENT_MAKE_BRANCH: { + struct presto_file_set *fset; + int minor; + + fset = presto_fset(file->f_dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + minor = presto_f2m(fset); + + rc = izo_upc_client_make_branch(minor, fset->fset_name, + data->ioc_inlbuf1, + data->ioc_inlbuf2); + EXIT; + return rc; + } +#endif + case IZO_IOC_SERVER_MAKE_BRANCH: { + struct presto_file_set *fset; + int minor; + + fset = presto_fset(file->f_dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + minor = presto_f2m(fset); + + izo_upc_server_make_branch(minor, data->ioc_inlbuf1); + EXIT; + return 0; + } + case IZO_IOC_SET_KMLSIZE: { + struct presto_file_set *fset; + int minor; + struct izo_rcvd_rec rec; + + fset = presto_fset(file->f_dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + minor = presto_f2m(fset); + + rc = izo_upc_set_kmlsize(minor, fset->fset_name, data->ioc_uuid, + data->ioc_kmlsize); + + if (rc != 0) { + EXIT; + return rc; + } + + rc = izo_rcvd_get(&rec, fset, data->ioc_uuid); + if (rc == -EINVAL) { + /* We don't know anything about this uuid yet; no + * worries. */ + memset(&rec, 0, sizeof(rec)); + } else if (rc <= 0) { + CERROR("InterMezzo: error reading last_rcvd: %d\n", rc); + EXIT; + return rc; + } + rec.lr_remote_offset = data->ioc_kmlsize; + rc = izo_rcvd_write(fset, &rec); + if (rc <= 0) { + CERROR("InterMezzo: error writing last_rcvd: %d\n", rc); + EXIT; + return rc; + } + EXIT; + return rc; + } + case IZO_IOC_BRANCH_UNDO: { + struct presto_file_set *fset; + int minor; + + fset = presto_fset(file->f_dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + minor = presto_f2m(fset); + + rc = izo_upc_branch_undo(minor, fset->fset_name, + data->ioc_inlbuf1); + EXIT; + return rc; + } + case IZO_IOC_BRANCH_REDO: { + struct presto_file_set *fset; + int minor; + + fset = presto_fset(file->f_dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + minor = presto_f2m(fset); + + rc = izo_upc_branch_redo(minor, fset->fset_name, + data->ioc_inlbuf1); + EXIT; + return rc; + } + + default: + EXIT; + return -ENOTTY; + + } + EXIT; + return 0; +} + +struct file_operations presto_dir_fops = { + .ioctl = presto_ioctl +}; + +struct inode_operations presto_dir_iops = { + .create = presto_create, + .lookup = presto_lookup, + .link = presto_link, + .unlink = presto_unlink, + .symlink = presto_symlink, + .mkdir = presto_mkdir, + .rmdir = presto_rmdir, + .mknod = presto_mknod, + .rename = presto_rename, + .permission = presto_permission, + .setattr = presto_setattr, +#ifdef CONFIG_FS_EXT_ATTR + .set_ext_attr = presto_set_ext_attr, +#endif +}; + + diff --git a/fs/intermezzo/ext_attr.c b/fs/intermezzo/ext_attr.c new file mode 100644 index 000000000..be91417c1 --- /dev/null +++ b/fs/intermezzo/ext_attr.c @@ -0,0 +1,197 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Tacit Networks, Inc. + * Author: Shirish H. Phatak + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Extended attribute handling for presto. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +#ifdef CONFIG_FS_EXT_ATTR +#include + +extern inline void presto_debug_fail_blkdev(struct presto_file_set *fset, + unsigned long value); + + +/* VFS interface */ +/* XXX! Fixme test for user defined attributes */ +int presto_set_ext_attr(struct inode *inode, + const char *name, void *buffer, + size_t buffer_len, int flags) +{ + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct lento_vfs_context info; + struct dentry *dentry; + int minor = presto_i2m(inode); + char *buf = NULL; + + ENTRY; + if (minor < 0) { + EXIT; + return -1; + } + + if ( ISLENTO(minor) ) { + EXIT; + return -EINVAL; + } + + /* BAD...vfs should really pass down the dentry to use, especially + * since every other operation in iops does. But for now + * we do a reverse mapping from inode to the first dentry + */ + if (list_empty(&inode->i_dentry)) { + CERROR("No alias for inode %d\n", (int) inode->i_ino); + EXIT; + return -EINVAL; + } + + dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); + + error = presto_prep(dentry, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + if ((buffer != NULL) && (buffer_len != 0)) { + /* If buffer is a user space pointer copy it to kernel space + * and reset the flag. We do this since the journal functions need + * access to the contents of the buffer, and the file system + * does not care. When we actually invoke the function, we remove + * the EXT_ATTR_FLAG_USER flag. + * + * XXX:Check if the "fs does not care" assertion is always true -SHP + * (works for ext3) + */ + if (flags & EXT_ATTR_FLAG_USER) { + PRESTO_ALLOC(buf, buffer_len); + if (!buf) { + CERROR("InterMezzo: out of memory!!!\n"); + return -ENOMEM; + } + error = copy_from_user(buf, buffer, buffer_len); + if (error) + return -EFAULT; + } else + buf = buffer; + } else + buf = buffer; + + if ( presto_get_permit(inode) < 0 ) { + EXIT; + if (buffer_len && (flags & EXT_ATTR_FLAG_USER)) + PRESTO_FREE(buf, buffer_len); + return -EROFS; + } + + /* Simulate presto_setup_info */ + memset(&info, 0, sizeof(info)); + /* For now redundant..but we keep it around just in case */ + info.flags = LENTO_FL_IGNORE_TIME; + if (!ISLENTO(cache->cache_psdev->uc_minor)) + info.flags |= LENTO_FL_KML; + + /* We pass in the kernel space pointer and reset the + * EXT_ATTR_FLAG_USER flag. + * See comments above. + */ + /* Note that mode is already set by VFS so we send in a NULL */ + error = presto_do_set_ext_attr(fset, dentry, name, buf, + buffer_len, flags & ~EXT_ATTR_FLAG_USER, + NULL, &info); + presto_put_permit(inode); + + if (buffer_len && (flags & EXT_ATTR_FLAG_USER)) + PRESTO_FREE(buf, buffer_len); + EXIT; + return error; +} + +/* Lento Interface */ +/* XXX: ignore flags? We should be forcing these operations through? -SHP*/ +int lento_set_ext_attr(const char *path, const char *name, + void *buffer, size_t buffer_len, int flags, mode_t mode, + struct lento_vfs_context *info) +{ + int error; + char * pathname; + struct nameidata nd; + struct dentry *dentry; + struct presto_file_set *fset; + + ENTRY; + lock_kernel(); + + pathname=getname(path); + error = PTR_ERR(pathname); + if (IS_ERR(pathname)) { + EXIT; + goto exit; + } + + /* Note that ext_attrs apply to both files and directories..*/ + error=presto_walk(pathname,&nd); + if (error) + goto exit; + dentry = nd.dentry; + + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + CERROR("No fileset!\n"); + EXIT; + goto exit_dentry; + } + + if (buffer==NULL) buffer_len=0; + + error = presto_do_set_ext_attr(fset, dentry, name, buffer, + buffer_len, flags, &mode, info); +exit_dentry: + path_release(&nd); +exit_path: + putname(pathname); +exit: + unlock_kernel(); + return error; +} + +#endif /*CONFIG_FS_EXT_ATTR*/ diff --git a/fs/intermezzo/file.c b/fs/intermezzo/file.c new file mode 100644 index 000000000..f6256427b --- /dev/null +++ b/fs/intermezzo/file.c @@ -0,0 +1,534 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 TurboLinux, Inc. + * Copyright (C) 2000 Los Alamos National Laboratory. + * Copyright (C) 2000, 2001 Tacit Networks, Inc. + * Copyright (C) 2000 Peter J. Braam + * Copyright (C) 2001 Mountain View Data, Inc. + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * This file manages file I/O + * + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" +/* + * these are initialized in super.c + */ +extern int presto_permission(struct inode *inode, int mask, struct nameidata *nd); + + +static int presto_open_upcall(int minor, struct dentry *de) +{ + int rc = 0; + char *path, *buffer; + struct presto_file_set *fset; + int pathlen; + struct lento_vfs_context info; + struct presto_dentry_data *dd = presto_d2d(de); + + PRESTO_ALLOC(buffer, PAGE_SIZE); + if ( !buffer ) { + CERROR("PRESTO: out of memory!\n"); + return -ENOMEM; + } + fset = presto_fset(de); + path = presto_path(de, fset->fset_dentry, buffer, PAGE_SIZE); + pathlen = MYPATHLEN(buffer, path); + + CDEBUG(D_FILE, "de %p, dd %p\n", de, dd); + if (dd->remote_ino == 0) { + rc = presto_get_fileid(minor, fset, de); + } + memset (&info, 0, sizeof(info)); + if (dd->remote_ino > 0) { + info.remote_ino = dd->remote_ino; + info.remote_generation = dd->remote_generation; + } else + CERROR("get_fileid failed %d, ino: %Lx, fetching by name\n", rc, + (unsigned long long) dd->remote_ino); + + rc = izo_upc_open(minor, pathlen, path, fset->fset_name, &info); + PRESTO_FREE(buffer, PAGE_SIZE); + return rc; +} + +static inline int open_check_dod(struct file *file, + struct presto_file_set *fset) +{ + int gen, is_iopen = 0, minor; + struct presto_cache *cache = fset->fset_cache; + ino_t inum; + + minor = presto_c2m(cache); + + if ( ISLENTO(minor) ) { + CDEBUG(D_CACHE, "is lento, not doing DOD.\n"); + return 0; + } + + /* Files are only ever opened by inode during backfetches, when by + * definition we have the authoritative copy of the data. No DOD. */ + is_iopen = izo_dentry_is_ilookup(file->f_dentry, &inum, &gen); + + if (is_iopen) { + CDEBUG(D_CACHE, "doing iopen, not doing DOD.\n"); + return 0; + } + + if (!(fset->fset_flags & FSET_DATA_ON_DEMAND)) { + CDEBUG(D_CACHE, "fileset not on demand.\n"); + return 0; + } + + if (file->f_flags & O_TRUNC) { + CDEBUG(D_CACHE, "fileset dod: O_TRUNC.\n"); + return 0; + } + + if (presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL)) { + CDEBUG(D_CACHE, "file under .intermezzo, not doing DOD\n"); + return 0; + } + + if (presto_chk(file->f_dentry, PRESTO_DATA)) { + CDEBUG(D_CACHE, "PRESTO_DATA is set, not doing DOD.\n"); + return 0; + } + + if (cache->cache_filter->o_trops->tr_all_data(file->f_dentry->d_inode)) { + CDEBUG(D_CACHE, "file not sparse, not doing DOD.\n"); + return 0; + } + + return 1; +} + +static int presto_file_open(struct inode *inode, struct file *file) +{ + int rc = 0; + struct file_operations *fops; + struct presto_cache *cache; + struct presto_file_set *fset; + struct presto_file_data *fdata; + int writable = (file->f_flags & (O_RDWR | O_WRONLY)); + int minor, i; + + ENTRY; + + if (presto_prep(file->f_dentry, &cache, &fset) < 0) { + EXIT; + return -EBADF; + } + + minor = presto_c2m(cache); + + CDEBUG(D_CACHE, "DATA_OK: %d, ino: %ld, islento: %d\n", + presto_chk(file->f_dentry, PRESTO_DATA), inode->i_ino, + ISLENTO(minor)); + + if ( !ISLENTO(minor) && (file->f_flags & O_RDWR || + file->f_flags & O_WRONLY)) { + CDEBUG(D_CACHE, "calling presto_get_permit\n"); + if ( presto_get_permit(inode) < 0 ) { + EXIT; + return -EROFS; + } + presto_put_permit(inode); + } + + if (open_check_dod(file, fset)) { + CDEBUG(D_CACHE, "presto_open_upcall\n"); + CDEBUG(D_CACHE, "dentry: %p setting DATA, ATTR\n", file->f_dentry); + presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA); + rc = presto_open_upcall(minor, file->f_dentry); + if (rc) { + EXIT; + CERROR("%s: returning error %d\n", __FUNCTION__, rc); + return rc; + } + + } + + /* file was truncated upon open: do not refetch */ + if (file->f_flags & O_TRUNC) { + CDEBUG(D_CACHE, "setting DATA, ATTR\n"); + presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA); + } + + fops = filter_c2cffops(cache->cache_filter); + if ( fops->open ) { + CDEBUG(D_CACHE, "calling fs open\n"); + rc = fops->open(inode, file); + + if (rc) { + EXIT; + return rc; + } + } + + if (writable) { + PRESTO_ALLOC(fdata, sizeof(*fdata)); + if (!fdata) { + EXIT; + return -ENOMEM; + } + /* LOCK: XXX check that the kernel lock protects this alloc */ + fdata->fd_do_lml = 0; + fdata->fd_bytes_written = 0; + fdata->fd_fsuid = current->fsuid; + fdata->fd_fsgid = current->fsgid; + fdata->fd_mode = file->f_dentry->d_inode->i_mode; + fdata->fd_uid = file->f_dentry->d_inode->i_uid; + fdata->fd_gid = file->f_dentry->d_inode->i_gid; + fdata->fd_ngroups = current->group_info->ngroups; + for (i=0 ; i < current->group_info->ngroups ; i++) + fdata->fd_groups[i] = GROUP_AT(current->group_info,i); + if (!ISLENTO(minor)) + fdata->fd_info.flags = LENTO_FL_KML; + else { + /* this is for the case of DOD, + reint_close will adjust flags if needed */ + fdata->fd_info.flags = 0; + } + + presto_getversion(&fdata->fd_version, inode); + file->private_data = fdata; + } else { + file->private_data = NULL; + } + + EXIT; + return 0; +} + +int presto_adjust_lml(struct file *file, struct lento_vfs_context *info) +{ + struct presto_file_data *fdata = + (struct presto_file_data *) file->private_data; + + if (!fdata) { + EXIT; + return -EINVAL; + } + + memcpy(&fdata->fd_info, info, sizeof(*info)); + EXIT; + return 0; +} + + +static int presto_file_release(struct inode *inode, struct file *file) +{ + int rc; + struct file_operations *fops; + struct presto_cache *cache; + struct presto_file_set *fset; + struct presto_file_data *fdata = + (struct presto_file_data *)file->private_data; + ENTRY; + + rc = presto_prep(file->f_dentry, &cache, &fset); + if ( rc ) { + EXIT; + return rc; + } + + fops = filter_c2cffops(cache->cache_filter); + if (fops && fops->release) + rc = fops->release(inode, file); + + CDEBUG(D_CACHE, "islento = %d (minor %d), rc %d, data %p\n", + ISLENTO(cache->cache_psdev->uc_minor), + cache->cache_psdev->uc_minor, rc, fdata); + + /* this file was modified: ignore close errors, write KML */ + if (fdata && fdata->fd_do_lml) { + /* XXX: remove when lento gets file granularity cd */ + if ( presto_get_permit(inode) < 0 ) { + EXIT; + return -EROFS; + } + + fdata->fd_info.updated_time = file->f_dentry->d_inode->i_mtime; + rc = presto_do_close(fset, file); + presto_put_permit(inode); + } + + if (!rc && fdata) { + PRESTO_FREE(fdata, sizeof(*fdata)); + file->private_data = NULL; + } + + EXIT; + return rc; +} + +static void presto_apply_write_policy(struct file *file, + struct presto_file_set *fset, loff_t res) +{ + struct presto_file_data *fdata = + (struct presto_file_data *)file->private_data; + struct presto_cache *cache = fset->fset_cache; + struct presto_version new_file_ver; + int error; + struct rec_info rec; + + /* Here we do a journal close after a fixed or a specified + amount of KBytes, currently a global parameter set with + sysctl. If files are open for a long time, this gives added + protection. (XXX todo: per cache, add ioctl, handle + journaling in a thread, add more options etc.) + */ + + if ((fset->fset_flags & FSET_JCLOSE_ON_WRITE) && + (!ISLENTO(cache->cache_psdev->uc_minor))) { + fdata->fd_bytes_written += res; + + if (fdata->fd_bytes_written >= fset->fset_file_maxio) { + presto_getversion(&new_file_ver, + file->f_dentry->d_inode); + /* This is really heavy weight and should be fixed + ASAP. At most we should be recording the number + of bytes written and not locking the kernel, + wait for permits, etc, on the write path. SHP + */ + lock_kernel(); + if ( presto_get_permit(file->f_dentry->d_inode) < 0 ) { + EXIT; + /* we must be disconnected, not to worry */ + unlock_kernel(); + return; + } + error = presto_journal_close(&rec, fset, fdata, + file->f_dentry, + &fdata->fd_version, + &new_file_ver); + presto_put_permit(file->f_dentry->d_inode); + unlock_kernel(); + if ( error ) { + CERROR("presto_close: cannot journal close\n"); + /* XXX these errors are really bad */ + /* panic(); */ + return; + } + fdata->fd_bytes_written = 0; + } + } +} + +static ssize_t presto_file_write(struct file *file, const char *buf, + size_t size, loff_t *off) +{ + struct rec_info rec; + int error; + struct presto_cache *cache; + struct presto_file_set *fset; + struct file_operations *fops; + ssize_t res; + int do_lml_here; + void *handle = NULL; + unsigned long blocks; + struct presto_file_data *fdata; + loff_t res_size; + + error = presto_prep(file->f_dentry, &cache, &fset); + if ( error ) { + EXIT; + return error; + } + + blocks = (size >> file->f_dentry->d_inode->i_sb->s_blocksize_bits) + 1; + /* XXX 3 is for ext2 indirect blocks ... */ + res_size = 2 * PRESTO_REQHIGH + ((blocks+3) + << file->f_dentry->d_inode->i_sb->s_blocksize_bits); + + error = presto_reserve_space(fset->fset_cache, res_size); + CDEBUG(D_INODE, "Reserved %Ld for %Zd\n", res_size, size); + if ( error ) { + EXIT; + return -ENOSPC; + } + + CDEBUG(D_INODE, "islento %d, minor: %d\n", + ISLENTO(cache->cache_psdev->uc_minor), + cache->cache_psdev->uc_minor); + + /* + * XXX this lock should become a per inode lock when + * Vinny's changes are in; we could just use i_sem. + */ + read_lock(&fset->fset_lml.fd_lock); + fdata = (struct presto_file_data *)file->private_data; + do_lml_here = size && (fdata->fd_do_lml == 0) && + !presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL); + + if (do_lml_here) + fdata->fd_do_lml = 1; + read_unlock(&fset->fset_lml.fd_lock); + + /* XXX + There might be a bug here. We need to make + absolutely sure that the ext3_file_write commits + after our transaction that writes the LML record. + Nesting the file write helps if new blocks are allocated. + */ + res = 0; + if (do_lml_here) { + struct presto_version file_version; + /* handle different space reqs from file system below! */ + handle = presto_trans_start(fset, file->f_dentry->d_inode, + KML_OPCODE_WRITE); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, res_size); + CERROR("presto_write: no space for transaction\n"); + return -ENOSPC; + } + + presto_getversion(&file_version, file->f_dentry->d_inode); + res = presto_write_lml_close(&rec, fset, file, + fdata->fd_info.remote_ino, + fdata->fd_info.remote_generation, + &fdata->fd_info.remote_version, + &file_version); + fdata->fd_lml_offset = rec.offset; + if ( res ) { + CERROR("intermezzo: PANIC failed to write LML\n"); + *(int *)0 = 1; + EXIT; + goto exit_write; + } + presto_trans_commit(fset, handle); + } + + fops = filter_c2cffops(cache->cache_filter); + res = fops->write(file, buf, size, off); + if ( res != size ) { + CDEBUG(D_FILE, "file write returns short write: size %Zd, res %Zd\n", size, res); + } + + if ( (res > 0) && fdata ) + presto_apply_write_policy(file, fset, res); + + exit_write: + presto_release_space(fset->fset_cache, res_size); + return res; +} + +struct file_operations presto_file_fops = { + .write = presto_file_write, + .open = presto_file_open, + .release = presto_file_release, + .ioctl = presto_ioctl +}; + +struct inode_operations presto_file_iops = { + .permission = presto_permission, + .setattr = presto_setattr, +#ifdef CONFIG_FS_EXT_ATTR + .set_ext_attr = presto_set_ext_attr, +#endif +}; + +/* FIXME: I bet we want to add a lock here and in presto_file_open. */ +int izo_purge_file(struct presto_file_set *fset, char *file) +{ +#if 0 + void *handle = NULL; + char *path = NULL; + struct nameidata nd; + struct dentry *dentry; + int rc = 0, len; + loff_t oldsize; + + /* FIXME: not mtpt it's gone */ + len = strlen(fset->fset_cache->cache_mtpt) + strlen(file) + 1; + PRESTO_ALLOC(path, len + 1); + if (path == NULL) + return -1; + + sprintf(path, "%s/%s", fset->fset_cache->cache_mtpt, file); + rc = izo_lookup_file(fset, path, &nd); + if (rc) + goto error; + dentry = nd.dentry; + + /* FIXME: take a lock here */ + + if (dentry->d_inode->i_atime.tv_sec > get_seconds() - 5) { + /* We lost the race; this file was accessed while we were doing + * ioctls and lookups and whatnot. */ + rc = -EBUSY; + goto error_unlock; + } + + /* FIXME: Check if this file is open. */ + + handle = presto_trans_start(fset, dentry->d_inode, KML_OPCODE_TRUNC); + if (IS_ERR(handle)) { + rc = -ENOMEM; + goto error_unlock; + } + + /* FIXME: Write LML record */ + + oldsize = dentry->d_inode->i_size; + rc = izo_do_truncate(fset, dentry, 0, oldsize); + if (rc != 0) + goto error_clear; + rc = izo_do_truncate(fset, dentry, oldsize, 0); + if (rc != 0) + goto error_clear; + + error_clear: + /* FIXME: clear LML record */ + + error_unlock: + /* FIXME: release the lock here */ + + error: + if (handle != NULL && !IS_ERR(handle)) + presto_trans_commit(fset, handle); + if (path != NULL) + PRESTO_FREE(path, len + 1); + return rc; +#else + return 0; +#endif +} diff --git a/fs/intermezzo/fileset.c b/fs/intermezzo/fileset.c new file mode 100644 index 000000000..9db8cab51 --- /dev/null +++ b/fs/intermezzo/fileset.c @@ -0,0 +1,674 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Managing filesets + * + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +static inline struct presto_file_set *presto_dentry2fset(struct dentry *dentry) +{ + if (presto_d2d(dentry) == NULL) { + EXIT; + return NULL; + } + return presto_d2d(dentry)->dd_fset; +} + +/* find the fileset dentry for this dentry */ +struct presto_file_set *presto_fset(struct dentry *de) +{ + struct dentry *fsde; + ENTRY; + if ( !de->d_inode ) { + /* FIXME: is this ok to be NULL? */ + CDEBUG(D_INODE,"presto_fset: warning %*s has NULL inode.\n", + de->d_name.len, de->d_name.name); + } + for (fsde = de;; fsde = fsde->d_parent) { + if ( presto_dentry2fset(fsde) ) { + EXIT; + return presto_dentry2fset(fsde); + } + if (fsde->d_parent == fsde) + break; + } + EXIT; + return NULL; +} + +int presto_get_lastrecno(char *path, off_t *recno) +{ + struct nameidata nd; + struct presto_file_set *fset; + struct dentry *dentry; + int error; + ENTRY; + + error = presto_walk(path, &nd); + if (error) { + EXIT; + return error; + } + + dentry = nd.dentry; + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) { + EXIT; + goto kml_out; + } + + error = -EINVAL; + if ( ! presto_dentry2fset(dentry)) { + EXIT; + goto kml_out; + } + + fset = presto_dentry2fset(dentry); + if (!fset) { + EXIT; + goto kml_out; + } + error = 0; + *recno = fset->fset_kml.fd_recno; + + kml_out: + path_release(&nd); + return error; +} + +static char * _izo_make_path(char *fsetname, char *name) +{ + char *path = NULL; + int len; + + len = strlen("/.intermezzo/") + strlen(fsetname) + + 1 + strlen(name) + 1; + + PRESTO_ALLOC(path, len); + if (path == NULL) + return NULL; + + sprintf(path, "/.intermezzo/%s/%s", fsetname, name); + + return path; +} + +char * izo_make_path(struct presto_file_set *fset, char *name) +{ + return _izo_make_path(fset->fset_name, name); +} + +static struct file *_izo_fset_open(char *fsetname, char *name, int flags, int mode) +{ + char *path; + struct file *f; + int error; + ENTRY; + + path = _izo_make_path(fsetname, name); + if (path == NULL) { + EXIT; + return ERR_PTR(-ENOMEM); + } + + CDEBUG(D_INODE, "opening file %s\n", path); + f = filp_open(path, flags, mode); + error = PTR_ERR(f); + if (IS_ERR(f)) { + CDEBUG(D_INODE, "Error %d\n", error); + } + + PRESTO_FREE(path, strlen(path)); + + EXIT; + return f; + +} + +struct file *izo_fset_open(struct presto_file_set *fset, char *name, int flags, int mode) +{ + return _izo_fset_open(fset->fset_name, name, flags, mode); +} + + + +/* + * note: this routine "pins" a dentry for a fileset root + */ +int presto_set_fsetroot(struct dentry *ioctl_dentry, char *fsetname, + unsigned int flags) +{ + struct presto_file_set *fset = NULL; + struct presto_cache *cache; + int error; + struct file *fset_root; + struct dentry *dentry; + + ENTRY; + + fset_root = _izo_fset_open(fsetname, "ROOT", O_RDONLY, 000); + if (IS_ERR(fset_root)) { + CERROR("Can't open %s/ROOT\n", fsetname); + EXIT; + error = PTR_ERR(fset_root); + goto out; + } + dentry = dget(fset_root->f_dentry); + filp_close(fset_root, NULL); + + dentry->d_inode->i_op = ioctl_dentry->d_inode->i_op; + dentry->d_inode->i_fop = ioctl_dentry->d_inode->i_fop; + dentry->d_op = ioctl_dentry->d_op; + fset = presto_dentry2fset(dentry); + if (fset && (fset->fset_dentry == dentry) ) { + CERROR("Fsetroot already set (inode %ld)\n", + dentry->d_inode->i_ino); + /* XXX: ignore because clear_fsetroot is broken */ +#if 0 + dput(dentry); + EXIT; + error = -EEXIST; + goto out; +#endif + } + + cache = presto_get_cache(dentry->d_inode); + if (!cache) { + CERROR("No cache found for inode %ld\n", + dentry->d_inode->i_ino); + EXIT; + error = -ENODEV; + goto out_free; + } + + PRESTO_ALLOC(fset, sizeof(*fset)); + if ( !fset ) { + CERROR("No memory allocating fset for %s\n", fsetname); + EXIT; + error = -ENOMEM; + goto out_free; + } + CDEBUG(D_INODE, "fset at %p\n", fset); + + CDEBUG(D_INODE, "InterMezzo: fsetroot: inode %ld, fileset name %s\n", + dentry->d_inode->i_ino, fsetname); + + fset->fset_mnt = mntget(current->fs->pwdmnt); + fset->fset_cache = cache; + fset->fset_dentry = dentry; + fset->fset_name = strdup(fsetname); + fset->fset_chunkbits = CHUNK_BITS; + fset->fset_flags = flags; + fset->fset_file_maxio = FSET_DEFAULT_MAX_FILEIO; + fset->fset_permit_lock = SPIN_LOCK_UNLOCKED; + PRESTO_ALLOC(fset->fset_reint_buf, 64 * 1024); + if (fset->fset_reint_buf == NULL) { + EXIT; + error = -ENOMEM; + goto out_free; + } + init_waitqueue_head(&fset->fset_permit_queue); + + if (presto_d2d(dentry) == NULL) { + dentry->d_fsdata = izo_alloc_ddata(); + } + if (presto_d2d(dentry) == NULL) { + CERROR("InterMezzo: %s: no memory\n", __FUNCTION__); + EXIT; + error = -ENOMEM; + goto out_free; + } + presto_d2d(dentry)->dd_fset = fset; + list_add(&fset->fset_list, &cache->cache_fset_list); + + error = izo_init_kml_file(fset, &fset->fset_kml); + if ( error ) { + EXIT; + CDEBUG(D_JOURNAL, "Error init_kml %d\n", error); + goto out_list_del; + } + + error = izo_init_lml_file(fset, &fset->fset_lml); + if ( error ) { + int rc; + EXIT; + rc = izo_log_close(&fset->fset_kml); + CDEBUG(D_JOURNAL, "Error init_lml %d, cleanup %d\n", error, rc); + goto out_list_del; + } + + /* init_last_rcvd_file could trigger a presto_file_write(), which + * requires that the lml structure be initialized. -phil */ + error = izo_init_last_rcvd_file(fset, &fset->fset_rcvd); + if ( error ) { + int rc; + EXIT; + rc = izo_log_close(&fset->fset_kml); + rc = izo_log_close(&fset->fset_lml); + CDEBUG(D_JOURNAL, "Error init_lastrcvd %d, cleanup %d\n", error, rc); + goto out_list_del; + } + + CDEBUG(D_PIOCTL, "-------> fset at %p, dentry at %p, mtpt %p," + "fset %s, cache %p, presto_d2d(dentry)->dd_fset %p\n", + fset, dentry, fset->fset_dentry, fset->fset_name, cache, + presto_d2d(dentry)->dd_fset); + + EXIT; + return 0; + + out_list_del: + list_del(&fset->fset_list); + presto_d2d(dentry)->dd_fset = NULL; + out_free: + if (fset) { + mntput(fset->fset_mnt); + if (fset->fset_reint_buf != NULL) + PRESTO_FREE(fset->fset_reint_buf, 64 * 1024); + PRESTO_FREE(fset, sizeof(*fset)); + } + dput(dentry); + out: + return error; +} + +static int izo_cleanup_fset(struct presto_file_set *fset) +{ + int error; + struct presto_cache *cache; + + ENTRY; + + CERROR("Cleaning up fset %s\n", fset->fset_name); + + error = izo_log_close(&fset->fset_kml); + if (error) + CERROR("InterMezzo: Closing kml for fset %s: %d\n", + fset->fset_name, error); + error = izo_log_close(&fset->fset_lml); + if (error) + CERROR("InterMezzo: Closing lml for fset %s: %d\n", + fset->fset_name, error); + error = izo_log_close(&fset->fset_rcvd); + if (error) + CERROR("InterMezzo: Closing last_rcvd for fset %s: %d\n", + fset->fset_name, error); + + cache = fset->fset_cache; + + list_del(&fset->fset_list); + + presto_d2d(fset->fset_dentry)->dd_fset = NULL; + dput(fset->fset_dentry); + mntput(fset->fset_mnt); + + PRESTO_FREE(fset->fset_name, strlen(fset->fset_name) + 1); + PRESTO_FREE(fset->fset_reint_buf, 64 * 1024); + PRESTO_FREE(fset, sizeof(*fset)); + EXIT; + return error; +} + +int izo_clear_fsetroot(struct dentry *dentry) +{ + struct presto_file_set *fset; + + ENTRY; + + fset = presto_dentry2fset(dentry); + if (!fset) { + EXIT; + return -EINVAL; + } + + izo_cleanup_fset(fset); + EXIT; + return 0; +} + +int izo_clear_all_fsetroots(struct presto_cache *cache) +{ + struct presto_file_set *fset; + struct list_head *tmp,*tmpnext; + int error; + + error = 0; + tmp = &cache->cache_fset_list; + tmpnext = tmp->next; + while ( tmpnext != &cache->cache_fset_list) { + tmp = tmpnext; + tmpnext = tmp->next; + fset = list_entry(tmp, struct presto_file_set, fset_list); + + error = izo_cleanup_fset(fset); + if (error) + break; + } + return error; +} + +static struct vfsmount *izo_alloc_vfsmnt(void) +{ + struct vfsmount *mnt; + PRESTO_ALLOC(mnt, sizeof(*mnt)); + if (mnt) { + memset(mnt, 0, sizeof(struct vfsmount)); + atomic_set(&mnt->mnt_count,1); + INIT_LIST_HEAD(&mnt->mnt_hash); + INIT_LIST_HEAD(&mnt->mnt_child); + INIT_LIST_HEAD(&mnt->mnt_mounts); + INIT_LIST_HEAD(&mnt->mnt_list); + } + return mnt; +} + + +static void izo_setup_ctxt(struct dentry *root, struct vfsmount *mnt, + struct run_ctxt *save) +{ + struct run_ctxt new; + + mnt->mnt_root = root; + mnt->mnt_sb = root->d_inode->i_sb; + unlock_super(mnt->mnt_sb); + + new.rootmnt = mnt; + new.root = root; + new.pwdmnt = mnt; + new.pwd = root; + new.fsuid = 0; + new.fsgid = 0; + new.fs = get_fs(); + /* XXX where can we get the groups from? */ + new.group_info = groups_alloc(0); + + push_ctxt(save, &new); +} + +static void izo_cleanup_ctxt(struct vfsmount *mnt, struct run_ctxt *save) +{ + lock_super(mnt->mnt_sb); + pop_ctxt(save); +} + +static int izo_simple_mkdir(struct dentry *dir, char *name, int mode) +{ + struct dentry *dchild; + int err; + ENTRY; + + dchild = lookup_one_len(name, dir, strlen(name)); + if (IS_ERR(dchild)) { + EXIT; + return PTR_ERR(dchild); + } + + if (dchild->d_inode) { + dput(dchild); + EXIT; + return -EEXIST; + } + + err = vfs_mkdir(dir->d_inode, dchild, mode); + dput(dchild); + + EXIT; + return err; +} + +static int izo_simple_symlink(struct dentry *dir, char *name, char *tgt) +{ + struct dentry *dchild; + int err; + ENTRY; + + dchild = lookup_one_len(name, dir, strlen(name)); + if (IS_ERR(dchild)) { + EXIT; + return PTR_ERR(dchild); + } + + if (dchild->d_inode) { + dput(dchild); + EXIT; + return -EEXIST; + } + + err = vfs_symlink(dir->d_inode, dchild, tgt); + dput(dchild); + + EXIT; + return err; +} + +/* + * run set_fsetroot in chroot environment + */ +int presto_set_fsetroot_from_ioc(struct dentry *root, char *fsetname, + unsigned int flags) +{ + int rc; + struct presto_cache *cache; + struct vfsmount *mnt; + struct run_ctxt save; + + if (root != root->d_inode->i_sb->s_root) { + CERROR ("IOC_SET_FSET must be called on mount point\n"); + return -ENODEV; + } + + cache = presto_get_cache(root->d_inode); + mnt = cache->cache_vfsmount; + if (!mnt) { + EXIT; + return -ENOMEM; + } + + izo_setup_ctxt(root, mnt, &save); + rc = presto_set_fsetroot(root, fsetname, flags); + izo_cleanup_ctxt(mnt, &save); + return rc; +} + +/* XXX: this function should detect if fsetname is already in use for + the cache under root +*/ +int izo_prepare_fileset(struct dentry *root, char *fsetname) +{ + int err; + struct dentry *dotizo = NULL, *fsetdir = NULL, *dotiopen = NULL; + struct presto_cache *cache; + struct vfsmount *mnt; + struct run_ctxt save; + + cache = presto_get_cache(root->d_inode); + mnt = cache->cache_vfsmount = izo_alloc_vfsmnt(); + if (!mnt) { + EXIT; + return -ENOMEM; + } + + if (!fsetname) + fsetname = "rootfset"; + + izo_setup_ctxt(root, mnt, &save); + + err = izo_simple_mkdir(root, ".intermezzo", 0755); + CDEBUG(D_CACHE, "mkdir on .intermezzo err %d\n", err); + + err = izo_simple_mkdir(root, "..iopen..", 0755); + CDEBUG(D_CACHE, "mkdir on ..iopen.. err %d\n", err); + + dotiopen = lookup_one_len("..iopen..", root, strlen("..iopen..")); + if (IS_ERR(dotiopen)) { + EXIT; + goto out; + } + dotiopen->d_inode->i_op = &presto_dir_iops; + dput(dotiopen); + + + dotizo = lookup_one_len(".intermezzo", root, strlen(".intermezzo")); + if (IS_ERR(dotizo)) { + EXIT; + goto out; + } + + + err = izo_simple_mkdir(dotizo, fsetname, 0755); + CDEBUG(D_CACHE, "mkdir err %d\n", err); + + /* XXX find the dentry of the root of the fileset (root for now) */ + fsetdir = lookup_one_len(fsetname, dotizo, strlen(fsetname)); + if (IS_ERR(fsetdir)) { + EXIT; + goto out; + } + + err = izo_simple_symlink(fsetdir, "ROOT", "../.."); + + /* XXX read flags from flags file */ + err = presto_set_fsetroot(root, fsetname, 0); + CDEBUG(D_CACHE, "set_fsetroot err %d\n", err); + + out: + if (dotizo && !IS_ERR(dotizo)) + dput(dotizo); + if (fsetdir && !IS_ERR(fsetdir)) + dput(fsetdir); + izo_cleanup_ctxt(mnt, &save); + return err; +} + +int izo_set_fileid(struct file *dir, struct izo_ioctl_data *data) +{ + int rc = 0; + struct presto_cache *cache; + struct vfsmount *mnt; + struct run_ctxt save; + struct nameidata nd; + struct dentry *dentry; + struct presto_dentry_data *dd; + struct dentry *root; + char *buf = NULL; + + ENTRY; + + + root = dir->f_dentry; + + /* actually, needs to be called on ROOT of fset, not mount point + if (root != root->d_inode->i_sb->s_root) { + CERROR ("IOC_SET_FSET must be called on mount point\n"); + return -ENODEV; + } + */ + + cache = presto_get_cache(root->d_inode); + mnt = cache->cache_vfsmount; + if (!mnt) { + EXIT; + return -ENOMEM; + } + + izo_setup_ctxt(root, mnt, &save); + + PRESTO_ALLOC(buf, data->ioc_plen1); + if (!buf) { + rc = -ENOMEM; + EXIT; + goto out; + } + if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) { + rc = -EFAULT; + EXIT; + goto out; + } + + rc = presto_walk(buf, &nd); + if (rc) { + CERROR("Unable to open: %s\n", buf); + EXIT; + goto out; + } + dentry = nd.dentry; + if (!dentry) { + CERROR("no dentry!\n"); + rc = -EINVAL; + EXIT; + goto out_close; + } + dd = presto_d2d(dentry); + if (!dd) { + CERROR("no dentry_data!\n"); + rc = -EINVAL; + EXIT; + goto out_close; + } + + CDEBUG(D_FILE,"de:%p dd:%p\n", dentry, dd); + + if (dd->remote_ino != 0) { + CERROR("remote_ino already set? %Lx:%Lx\n", + (unsigned long long) dd->remote_ino, + (unsigned long long) dd->remote_generation); + rc = 0; + EXIT; + goto out_close; + } + + + CDEBUG(D_FILE,"setting %p %p, %s to %Lx:%Lx\n", dentry, dd, + buf, + (unsigned long long) data->ioc_ino, + (unsigned long long) data->ioc_generation); + dd->remote_ino = data->ioc_ino; + dd->remote_generation = data->ioc_generation; + + EXIT; + out_close: + path_release(&nd); + out: + if (buf) + PRESTO_FREE(buf, data->ioc_plen1); + izo_cleanup_ctxt(mnt, &save); + return rc; +} diff --git a/fs/intermezzo/inode.c b/fs/intermezzo/inode.c new file mode 100644 index 000000000..fda188bab --- /dev/null +++ b/fs/intermezzo/inode.c @@ -0,0 +1,179 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 1996 Peter J. Braam and + * Michael Callahan + * Copyright (C) 1999 Carnegie Mellon University + * Rewritten for Linux 2.1. Peter Braam + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Super block/filesystem wide operations + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +extern void presto_free_cache(struct presto_cache *); + +void presto_set_ops(struct inode *inode, struct filter_fs *filter) +{ + ENTRY; + + if (!inode || is_bad_inode(inode)) + return; + + if (S_ISREG(inode->i_mode)) { + if ( !filter_c2cfiops(filter) ) { + filter_setup_file_ops(filter, + inode, &presto_file_iops, + &presto_file_fops); + } + inode->i_op = filter_c2ufiops(filter); + inode->i_fop = filter_c2uffops(filter); + CDEBUG(D_INODE, "set file methods for %ld to %p\n", + inode->i_ino, inode->i_op); + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = filter_c2udiops(filter); + inode->i_fop = filter_c2udfops(filter); + CDEBUG(D_INODE, "set dir methods for %ld to %p ioctl %p\n", + inode->i_ino, inode->i_op, inode->i_fop->ioctl); + } else if (S_ISLNK(inode->i_mode)) { + if ( !filter_c2csiops(filter)) { + filter_setup_symlink_ops(filter, + inode, + &presto_sym_iops, + &presto_sym_fops); + } + inode->i_op = filter_c2usiops(filter); + inode->i_fop = filter_c2usfops(filter); + CDEBUG(D_INODE, "set link methods for %ld to %p\n", + inode->i_ino, inode->i_op); + } + EXIT; +} + +void presto_read_inode(struct inode *inode) +{ + struct presto_cache *cache; + + cache = presto_get_cache(inode); + if ( !cache ) { + CERROR("PRESTO: BAD, BAD: cannot find cache\n"); + make_bad_inode(inode); + return ; + } + + filter_c2csops(cache->cache_filter)->read_inode(inode); + + CDEBUG(D_INODE, "presto_read_inode: ino %ld, gid %d\n", + inode->i_ino, inode->i_gid); + + presto_set_ops(inode, cache->cache_filter); + /* XXX handle special inodes here or not - probably not? */ +} + +static void presto_put_super(struct super_block *sb) +{ + struct presto_cache *cache; + struct upc_channel *channel; + struct super_operations *sops; + struct list_head *lh; + int err; + + ENTRY; + cache = presto_cache_find(sb); + if (!cache) { + EXIT; + goto exit; + } + channel = &izo_channels[presto_c2m(cache)]; + sops = filter_c2csops(cache->cache_filter); + err = izo_clear_all_fsetroots(cache); + if (err) { + CERROR("%s: err %d\n", __FUNCTION__, err); + } + PRESTO_FREE(cache->cache_vfsmount, sizeof(struct vfsmount)); + + /* look at kill_super - fsync_super is not exported GRRR but + probably not needed */ + unlock_super(sb); + shrink_dcache_parent(cache->cache_root); + dput(cache->cache_root); + //fsync_super(sb); + lock_super(sb); + + if (sops->write_super) + sops->write_super(sb); + + if (sops->put_super) + sops->put_super(sb); + + /* free any remaining async upcalls when the filesystem is unmounted */ + spin_lock(&channel->uc_lock); + lh = channel->uc_pending.next; + while ( lh != &channel->uc_pending) { + struct upc_req *req; + req = list_entry(lh, struct upc_req, rq_chain); + + /* assignment must be here: we are about to free &lh */ + lh = lh->next; + if ( ! (req->rq_flags & REQ_ASYNC) ) + continue; + list_del(&(req->rq_chain)); + PRESTO_FREE(req->rq_data, req->rq_bufsize); + PRESTO_FREE(req, sizeof(struct upc_req)); + } + list_del(&cache->cache_channel_list); + spin_unlock(&channel->uc_lock); + + presto_free_cache(cache); + +exit: + CDEBUG(D_MALLOC, "after umount: kmem %ld, vmem %ld\n", + presto_kmemory, presto_vmemory); + return ; +} + +struct super_operations presto_super_ops = { + .read_inode = presto_read_inode, + .put_super = presto_put_super, +}; + + +/* symlinks can be chowned */ +struct inode_operations presto_sym_iops = { + .setattr = presto_setattr +}; + +/* NULL for now */ +struct file_operations presto_sym_fops; diff --git a/fs/intermezzo/intermezzo_fs.h b/fs/intermezzo/intermezzo_fs.h new file mode 100644 index 000000000..350036517 --- /dev/null +++ b/fs/intermezzo/intermezzo_fs.h @@ -0,0 +1,923 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * Copyright (C) 2001 Tacitus Systems, Inc. + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 TurboLinux, Inc. + * Copyright (C) 2000 Los Alamos National Laboratory. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __INTERMEZZO_FS_H_ +#define __INTERMEZZO_FS_H_ 1 + +#include "intermezzo_lib.h" +#include "intermezzo_idl.h" + + +#ifdef __KERNEL__ +typedef __u8 uuid_t[16]; +#else +# include +#endif + +struct lento_vfs_context { + __u64 kml_offset; + struct timespec updated_time; + __u64 remote_ino; + __u64 remote_generation; + __u32 slot_offset; + __u32 recno; + __u32 flags; + uuid_t uuid; + struct presto_version remote_version; +}; + +#ifdef __KERNEL__ +# include +# include +# include +# include +# include +# include + +/* fixups for fs.h */ +# ifndef fs_down +# define fs_down(sem) down(sem) +# endif + +# ifndef fs_up +# define fs_up(sem) up(sem) +# endif + +# define KML_IDLE 0 +# define KML_DECODE 1 +# define KML_OPTIMIZE 2 +# define KML_REINT 3 + +# define KML_OPEN_REINT 0x0100 +# define KML_REINT_BEGIN 0x0200 +# define KML_BACKFETCH 0x0400 +# define KML_REINT_END 0x0800 +# define KML_CLOSE_REINT 0x1000 +# define KML_REINT_MAXBUF (64 * 1024) + +# define CACHE_CLIENT_RO 0x4 +# define CACHE_LENTO_RO 0x8 + +/* global variables */ +extern int presto_debug; +extern int presto_print_entry; +extern long presto_kmemory; +extern long presto_vmemory; + +# define PRESTO_DEBUG +# ifdef PRESTO_DEBUG +/* debugging masks */ +# define D_SUPER 1 +# define D_INODE 2 +# define D_FILE 4 +# define D_CACHE 8 /* cache debugging */ +# define D_MALLOC 16 /* print malloc, de-alloc information */ +# define D_JOURNAL 32 +# define D_UPCALL 64 /* up and downcall debugging */ +# define D_PSDEV 128 +# define D_PIOCTL 256 +# define D_SPECIAL 512 +# define D_TIMING 1024 +# define D_DOWNCALL 2048 +# define D_KML 4096 +# define D_FSDATA 8192 + +# define CDEBUG(mask, format, a...) \ + do { \ + if (presto_debug & mask) { \ + printk("(%s:%s,l. %d %d): " format, __FILE__, \ + __FUNCTION__, __LINE__, current->pid \ + , ## a); \ + } \ + } while (0) + +#define CERROR(format, a...) \ +do { \ + printk("(%s:%s,l. %d %d): " format, __FILE__, __FUNCTION__, \ + __LINE__, current->pid , ## a); \ +} while (0) + +# define ENTRY \ + if (presto_print_entry) \ + printk("Process %d entered %s\n", current->pid, __FUNCTION__) + +# define EXIT \ + if (presto_print_entry) \ + printk("Process %d leaving %s at %d\n", current->pid, \ + __FUNCTION__, __LINE__) + +# define presto_kmem_inc(ptr, size) presto_kmemory += (size) +# define presto_kmem_dec(ptr, size) presto_kmemory -= (size) +# define presto_vmem_inc(ptr, size) presto_vmemory += (size) +# define presto_vmem_dec(ptr, size) presto_vmemory -= (size) +# else /* !PRESTO_DEBUG */ +# define CDEBUG(mask, format, a...) do {} while (0) +# define ENTRY do {} while (0) +# define EXIT do {} while (0) +# define presto_kmem_inc(ptr, size) do {} while (0) +# define presto_kmem_dec(ptr, size) do {} while (0) +# define presto_vmem_inc(ptr, size) do {} while (0) +# define presto_vmem_dec(ptr, size) do {} while (0) +# endif /* PRESTO_DEBUG */ + + +struct run_ctxt { + struct vfsmount *pwdmnt; + struct dentry *pwd; + struct vfsmount *rootmnt; + struct dentry *root; + uid_t fsuid; + gid_t fsgid; + mm_segment_t fs; + struct group_info * group_info; +/* int ngroups; + gid_t groups[NGROUPS];*/ + +}; + +static inline void push_ctxt(struct run_ctxt *save, struct run_ctxt *new) +{ + save->fs = get_fs(); + save->pwd = dget(current->fs->pwd); + save->pwdmnt = mntget(current->fs->pwdmnt); + save->fsgid = current->fsgid; + save->fsuid = current->fsuid; + save->root = current->fs->root; + save->rootmnt = current->fs->rootmnt; + save->group_info = current->group_info; +/* save->ngroups = current->ngroups; + for (i = 0; i< current->ngroups; i++) + save->groups[i] = current->groups[i];*/ + + set_fs(new->fs); + lock_kernel(); + set_fs_pwd(current->fs, new->pwdmnt, new->pwd); + if (new->root) + set_fs_root(current->fs, new->rootmnt, new->root); + unlock_kernel(); + current->fsuid = new->fsuid; + current->fsgid = new->fsgid; + /*if (new->ngroups > 0) { + current->ngroups = new->ngroups; + for (i = 0; i< new->ngroups; i++) + current->groups[i] = new->groups[i]; + }*/ + current->group_info = new->group_info; + +} + +static inline void pop_ctxt(struct run_ctxt *saved) +{ + set_fs(saved->fs); + lock_kernel(); + set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd); + if (saved->root) + set_fs_root(current->fs, saved->rootmnt, saved->root); + unlock_kernel(); + current->fsuid = saved->fsuid; + current->fsgid = saved->fsgid; + current->group_info = saved->group_info; +/* + current->ngroups = saved->ngroups; + for (i = 0; i< saved->ngroups; i++) + current->groups[i] = saved->groups[i]; +*/ + mntput(saved->pwdmnt); + dput(saved->pwd); +} + +static inline struct presto_dentry_data *presto_d2d(struct dentry *dentry) +{ + return (struct presto_dentry_data *)(dentry->d_fsdata); +} + +struct presto_cache { + spinlock_t cache_lock; + loff_t cache_reserved; + struct vfsmount *cache_vfsmount; + struct super_block *cache_sb; + struct dentry *cache_root; + struct list_head cache_chain; /* for the dev/cache hash */ + + int cache_flags; + + char *cache_type; /* filesystem type of cache */ + struct filter_fs *cache_filter; + + struct upc_channel *cache_psdev; /* points to channel used */ + struct list_head cache_channel_list; + struct list_head cache_fset_list; /* filesets mounted in cache */ +}; + +struct presto_log_fd { + rwlock_t fd_lock; + loff_t fd_offset; /* offset where next record should go */ + struct file *fd_file; + int fd_truncating; + unsigned int fd_recno; /* last recno written */ + struct list_head fd_reservations; +}; + +/* file sets */ +# define CHUNK_BITS 16 + +struct presto_file_set { + struct list_head fset_list; + struct presto_log_fd fset_kml; + struct presto_log_fd fset_lml; + struct presto_log_fd fset_rcvd; + struct list_head *fset_clients; /* cache of clients */ + struct dentry *fset_dentry; + struct vfsmount *fset_mnt; + struct presto_cache *fset_cache; + + unsigned int fset_lento_recno; /* last recno mentioned to lento */ + loff_t fset_lento_off; /* last offset mentioned to lento */ + loff_t fset_kml_logical_off; /* logical offset of kml file byte 0 */ + char * fset_name; + + int fset_flags; + int fset_chunkbits; + char *fset_reint_buf; /* temporary buffer holds kml during reint */ + + spinlock_t fset_permit_lock; + int fset_permit_count; + int fset_permit_upcall_count; + /* This queue is used both for processes waiting for the kernel to give + * up the permit as well as processes waiting for the kernel to be given + * the permit, depending on the state of FSET_HASPERMIT. */ + wait_queue_head_t fset_permit_queue; + + loff_t fset_file_maxio; /* writing more than this causes a close */ + unsigned long int kml_truncate_size; +}; + +/* This is the default number of bytes written before a close is recorded*/ +#define FSET_DEFAULT_MAX_FILEIO (1024<<10) + +struct dentry *presto_tmpfs_ilookup(struct inode *dir, struct dentry *dentry, + ino_t ino, unsigned int generation); +struct dentry *presto_iget_ilookup(struct inode *dir, struct dentry *dentry, + ino_t ino, unsigned int generation); +struct dentry *presto_add_ilookup_dentry(struct dentry *parent, + struct dentry *real); + +struct journal_ops { + int (*tr_all_data)(struct inode *); + loff_t (*tr_avail)(struct presto_cache *fset, struct super_block *); + void *(*tr_start)(struct presto_file_set *, struct inode *, int op); + void (*tr_commit)(struct presto_file_set *, void *handle); + void (*tr_journal_data)(struct inode *); + struct dentry *(*tr_ilookup)(struct inode *dir, struct dentry *dentry, ino_t ino, unsigned int generation); + struct dentry *(*tr_add_ilookup)(struct dentry *parent, struct dentry *real); +}; + +extern struct journal_ops presto_ext2_journal_ops; +extern struct journal_ops presto_ext3_journal_ops; +extern struct journal_ops presto_tmpfs_journal_ops; +extern struct journal_ops presto_xfs_journal_ops; +extern struct journal_ops presto_reiserfs_journal_ops; +extern struct journal_ops presto_obdfs_journal_ops; + +# define LENTO_FL_KML 0x0001 +# define LENTO_FL_EXPECT 0x0002 +# define LENTO_FL_VFSCHECK 0x0004 +# define LENTO_FL_JUSTLOG 0x0008 +# define LENTO_FL_WRITE_KML 0x0010 +# define LENTO_FL_CANCEL_LML 0x0020 +# define LENTO_FL_WRITE_EXPECT 0x0040 +# define LENTO_FL_IGNORE_TIME 0x0080 +# define LENTO_FL_TOUCH_PARENT 0x0100 +# define LENTO_FL_TOUCH_NEWOBJ 0x0200 +# define LENTO_FL_SET_DDFILEID 0x0400 + +struct presto_cache *presto_get_cache(struct inode *inode); +int presto_sprint_mounts(char *buf, int buflen, int minor); +struct presto_file_set *presto_fset(struct dentry *de); +int presto_journal(struct dentry *dentry, char *buf, size_t size); +int presto_fwrite(struct file *file, const char *str, int len, loff_t *off); +int presto_ispresto(struct inode *); + +/* super.c */ +extern struct file_system_type presto_fs_type; +extern int init_intermezzo_fs(void); + +/* fileset.c */ +extern int izo_prepare_fileset(struct dentry *root, char *fsetname); +char * izo_make_path(struct presto_file_set *fset, char *name); +struct file *izo_fset_open(struct presto_file_set *fset, char *name, int flags, int mode); + +/* psdev.c */ +int izo_psdev_get_free_channel(void); +int presto_psdev_init(void); +int izo_psdev_setpid(int minor); +extern void presto_psdev_cleanup(void); +int presto_lento_up(int minor); +int izo_psdev_setchannel(struct file *file, int fd); + +/* inode.c */ +extern struct super_operations presto_super_ops; +void presto_set_ops(struct inode *inode, struct filter_fs *filter); + +/* dcache.c */ +void presto_frob_dop(struct dentry *de); +char *presto_path(struct dentry *dentry, struct dentry *root, + char *buffer, int buflen); +struct presto_dentry_data *izo_alloc_ddata(void); +int presto_set_dd(struct dentry *); +int presto_init_ddata_cache(void); +void presto_cleanup_ddata_cache(void); +extern struct dentry_operations presto_dentry_ops; + +/* dir.c */ +extern struct inode_operations presto_dir_iops; +extern struct inode_operations presto_file_iops; +extern struct inode_operations presto_sym_iops; +extern struct file_operations presto_dir_fops; +extern struct file_operations presto_file_fops; +extern struct file_operations presto_sym_fops; +int presto_setattr(struct dentry *de, struct iattr *iattr); +int presto_settime(struct presto_file_set *fset, struct dentry *newobj, + struct dentry *parent, struct dentry *target, + struct lento_vfs_context *ctx, int valid); +int presto_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg); + +extern int presto_ilookup_uid; +# define PRESTO_ILOOKUP_MAGIC "...ino:" +# define PRESTO_ILOOKUP_SEP ':' +int izo_dentry_is_ilookup(struct dentry *, ino_t *id, unsigned int *generation); +struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd); + +struct presto_dentry_data { + int dd_count; /* how mnay dentries are using this dentry */ + struct presto_file_set *dd_fset; + struct dentry *dd_inodentry; + loff_t dd_kml_offset; + int dd_flags; + __u64 remote_ino; + __u64 remote_generation; +}; + +struct presto_file_data { + int fd_do_lml; + loff_t fd_lml_offset; + size_t fd_bytes_written; + /* authorization related data of file at open time */ + uid_t fd_uid; + gid_t fd_gid; + mode_t fd_mode; + /* identification data of calling process */ + uid_t fd_fsuid; + gid_t fd_fsgid; + int fd_ngroups; + gid_t fd_groups[NGROUPS_SMALL]; + /* information how to complete the close operation */ + struct lento_vfs_context fd_info; + struct presto_version fd_version; +}; + +/* presto.c and Lento::Downcall */ + +int presto_walk(const char *name, struct nameidata *nd); +int izo_clear_fsetroot(struct dentry *dentry); +int izo_clear_all_fsetroots(struct presto_cache *cache); +int presto_get_kmlsize(char *path, __u64 *size); +int presto_get_lastrecno(char *path, off_t *size); +int presto_set_fsetroot(struct dentry *dentry, char *fsetname, + unsigned int flags); +int presto_set_fsetroot_from_ioc(struct dentry *dentry, char *fsetname, + unsigned int flags); +int presto_is_read_only(struct presto_file_set *); +int presto_truncate_lml(struct presto_file_set *fset); +int lento_write_lml(char *path, + __u64 remote_ino, + __u32 remote_generation, + __u32 remote_version, + struct presto_version *remote_file_version); +int lento_complete_closes(char *path); +int presto_f2m(struct presto_file_set *fset); +int presto_prep(struct dentry *, struct presto_cache **, + struct presto_file_set **); +/* cache.c */ +extern struct presto_cache *presto_cache_init(void); +extern void presto_cache_add(struct presto_cache *cache); +extern void presto_cache_init_hash(void); + +struct presto_cache *presto_cache_find(struct super_block *sb); + +#define PRESTO_REQLOW (3 * 4096) +#define PRESTO_REQHIGH (6 * 4096) +void presto_release_space(struct presto_cache *cache, loff_t req); +int presto_reserve_space(struct presto_cache *cache, loff_t req); + +#define PRESTO_DATA 0x00000002 /* cached data is valid */ +#define PRESTO_ATTR 0x00000004 /* attributes cached */ +#define PRESTO_DONT_JOURNAL 0x00000008 /* things like .intermezzo/ */ + +struct presto_file_set *presto_path2fileset(const char *name); +int izo_revoke_permit(struct dentry *, uuid_t uuid); +int presto_chk(struct dentry *dentry, int flag); +void presto_set(struct dentry *dentry, int flag); +int presto_get_permit(struct inode *inode); +int presto_put_permit(struct inode *inode); +int presto_set_max_kml_size(const char *path, unsigned long max_size); +int izo_mark_dentry(struct dentry *dentry, int and, int or, int *res); +int izo_mark_cache(struct dentry *dentry, int and_bits, int or_bits, int *); +int izo_mark_fset(struct dentry *dentry, int and_bits, int or_bits, int *); +void presto_getversion(struct presto_version *pv, struct inode *inode); +int presto_i2m(struct inode *inode); +int presto_c2m(struct presto_cache *cache); + + +/* file.c */ +int izo_purge_file(struct presto_file_set *fset, char *file); +int presto_adjust_lml(struct file *file, struct lento_vfs_context *info); + +/* journal.c */ +struct rec_info { + loff_t offset; + int size; + int recno; + int is_kml; +}; + +void presto_trans_commit(struct presto_file_set *fset, void *handle); +void *presto_trans_start(struct presto_file_set *fset, struct inode *inode, + int op); +int presto_fread(struct file *file, char *str, int len, loff_t *off); +int presto_clear_lml_close(struct presto_file_set *fset, + loff_t lml_offset); +int presto_complete_lml(struct presto_file_set *fset); +int presto_read_kml_logical_offset(struct rec_info *recinfo, + struct presto_file_set *fset); +int presto_write_kml_logical_offset(struct presto_file_set *fset); +struct file *presto_copy_kml_tail(struct presto_file_set *fset, + unsigned long int start); +int presto_finish_kml_truncate(struct presto_file_set *fset, + unsigned long int offset); +int izo_lookup_file(struct presto_file_set *fset, char *path, + struct nameidata *nd); +int izo_do_truncate(struct presto_file_set *fset, struct dentry *dentry, + loff_t length, loff_t size_check); +int izo_log_close(struct presto_log_fd *logfd); +struct file *izo_log_open(struct presto_file_set *fset, char *name, int flags); +int izo_init_kml_file(struct presto_file_set *, struct presto_log_fd *); +int izo_init_lml_file(struct presto_file_set *, struct presto_log_fd *); +int izo_init_last_rcvd_file(struct presto_file_set *, struct presto_log_fd *); + +/* vfs.c */ + +/* Extra data needed in the KML for rollback operations; this structure is + * passed around during the KML-writing process. */ +struct izo_rollback_data { + __u32 rb_mode; + __u32 rb_rdev; + __u64 rb_uid; + __u64 rb_gid; +}; + +int presto_write_last_rcvd(struct rec_info *recinfo, + struct presto_file_set *fset, + struct lento_vfs_context *info); +void izo_get_rollback_data(struct inode *inode, struct izo_rollback_data *rb); +int presto_do_close(struct presto_file_set *fset, struct file *file); +int presto_do_setattr(struct presto_file_set *fset, struct dentry *dentry, + struct iattr *iattr, struct lento_vfs_context *info); +int presto_do_create(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, int mode, + struct lento_vfs_context *info); +int presto_do_link(struct presto_file_set *fset, struct dentry *dir, + struct dentry *old_dentry, struct dentry *new_dentry, + struct lento_vfs_context *info); +int presto_do_unlink(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, struct lento_vfs_context *info); +int presto_do_symlink(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, const char *name, + struct lento_vfs_context *info); +int presto_do_mkdir(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, int mode, + struct lento_vfs_context *info); +int presto_do_rmdir(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, struct lento_vfs_context *info); +int presto_do_mknod(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, int mode, dev_t dev, + struct lento_vfs_context *info); +int do_rename(struct presto_file_set *fset, struct dentry *old_dir, + struct dentry *old_dentry, struct dentry *new_dir, + struct dentry *new_dentry, struct lento_vfs_context *info); +int presto_do_statfs (struct presto_file_set *fset, + struct kstatfs * buf); + +int lento_setattr(const char *name, struct iattr *iattr, + struct lento_vfs_context *info); +int lento_create(const char *name, int mode, struct lento_vfs_context *info); +int lento_link(const char *oldname, const char *newname, + struct lento_vfs_context *info); +int lento_unlink(const char *name, struct lento_vfs_context *info); +int lento_symlink(const char *oldname,const char *newname, + struct lento_vfs_context *info); +int lento_mkdir(const char *name, int mode, struct lento_vfs_context *info); +int lento_rmdir(const char *name, struct lento_vfs_context *info); +int lento_mknod(const char *name, int mode, dev_t dev, + struct lento_vfs_context *info); +int lento_rename(const char *oldname, const char *newname, + struct lento_vfs_context *info); +int lento_iopen(const char *name, ino_t ino, unsigned int generation,int flags); + +/* journal.c */ + +#define JOURNAL_PAGE_SZ PAGE_SIZE + +int presto_no_journal(struct presto_file_set *fset); +int journal_fetch(int minor); +int presto_log(struct presto_file_set *fset, struct rec_info *rec, + const char *buf, size_t size, + const char *string1, int len1, + const char *string2, int len2, + const char *string3, int len3); +int presto_get_fileid(int minor, struct presto_file_set *fset, + struct dentry *dentry); +int presto_journal_setattr(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, struct presto_version *old_ver, + struct izo_rollback_data *, struct iattr *iattr); +int presto_journal_create(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *tgt_dir_ver, + struct presto_version *new_file_ver, int mode); +int presto_journal_link(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *src, struct dentry *tgt, + struct presto_version *tgt_dir_ver, + struct presto_version *new_link_ver); +int presto_journal_unlink(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dir, + struct presto_version *tgt_dir_ver, + struct presto_version *old_file_ver, + struct izo_rollback_data *, struct dentry *dentry, + char *old_target, int old_targetlen); +int presto_journal_symlink(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, const char *target, + struct presto_version *tgt_dir_ver, + struct presto_version *new_link_ver); +int presto_journal_mkdir(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *tgt_dir_ver, + struct presto_version *new_dir_ver, int mode); +int presto_journal_rmdir(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *tgt_dir_ver, + struct presto_version *old_dir_ver, + struct izo_rollback_data *, int len, const char *name); +int presto_journal_mknod(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *tgt_dir_ver, + struct presto_version *new_node_ver, int mode, + int dmajor, int dminor); +int presto_journal_rename(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *src, struct dentry *tgt, + struct presto_version *src_dir_ver, + struct presto_version *tgt_dir_ver); +int presto_journal_open(struct rec_info *, struct presto_file_set *, + struct dentry *, struct presto_version *old_ver); +int presto_journal_close(struct rec_info *rec, struct presto_file_set *, + struct presto_file_data *, struct dentry *, + struct presto_version *old_file_ver, + struct presto_version *new_file_ver); +int presto_write_lml_close(struct rec_info *rec, + struct presto_file_set *fset, + struct file *file, + __u64 remote_ino, + __u64 remote_generation, + struct presto_version *remote_version, + struct presto_version *new_file_ver); +void presto_log_op(void *data, int len); +loff_t presto_kml_offset(struct presto_file_set *fset); + +/* upcall.c */ +#define SYNCHRONOUS 0 +#define ASYNCHRONOUS 1 +/* asynchronous calls */ +int izo_upc_kml(int minor, __u64 offset, __u32 first_recno, __u64 length, + __u32 last_recno, char *fsetname); +int izo_upc_kml_truncate(int minor, __u64 length, __u32 last_recno, + char *fsetname); +int izo_upc_go_fetch_kml(int minor, char *fsetname, uuid_t uuid, __u64 kmlsize); +int izo_upc_backfetch(int minor, char *path, char *fileset, + struct lento_vfs_context *); + +/* synchronous calls */ +int izo_upc_get_fileid(int minor, __u32 reclen, char *rec, + __u32 pathlen, char *path, char *fsetname); +int izo_upc_permit(int minor, struct dentry *, __u32 pathlen, char *path, + char *fset); +int izo_upc_open(int minor, __u32 pathlen, char *path, char *fsetname, + struct lento_vfs_context *info); +int izo_upc_connect(int minor, __u64 ip_address, __u64 port, __u8 uuid[16], + int client_flag); +int izo_upc_revoke_permit(int minor, char *fsetname, uuid_t uuid); +int izo_upc_set_kmlsize(int minor, char *fsetname, uuid_t uuid, __u64 kmlsize); +int izo_upc_client_make_branch(int minor, char *fsetname); +int izo_upc_server_make_branch(int minor, char *fsetname); +int izo_upc_branch_undo(int minor, char *fsetname, char *branchname); +int izo_upc_branch_redo(int minor, char *fsetname, char *branchname); +int izo_upc_repstatus(int minor, char * fsetname, struct izo_rcvd_rec *lr_server); + +/* general mechanism */ +int izo_upc_upcall(int minor, int *size, struct izo_upcall_hdr *, int async); + +/* replicator.c */ +int izo_repstatus(struct presto_file_set *fset, __u64 client_kmlsize, + struct izo_rcvd_rec *lr_client, struct izo_rcvd_rec *lr_server); +int izo_rep_cache_init(struct presto_file_set *); +loff_t izo_rcvd_get(struct izo_rcvd_rec *, struct presto_file_set *, char *uuid); +loff_t izo_rcvd_write(struct presto_file_set *, struct izo_rcvd_rec *); +loff_t izo_rcvd_upd_remote(struct presto_file_set *fset, char * uuid, __u64 remote_recno, + __u64 remote_offset); + +int izo_ioctl_packlen(struct izo_ioctl_data *data); + +/* sysctl.c */ +int init_intermezzo_sysctl(void); +void cleanup_intermezzo_sysctl(void); + +/* ext_attr.c */ +/* We will be more tolerant than the default ea patch with attr name sizes and + * the size of value. If these come via VFS from the default ea patches, the + * corresponding character strings will be truncated anyway. During journalling- * we journal length for both name and value. See journal_set_ext_attr. + */ +#define PRESTO_EXT_ATTR_NAME_MAX 128 +#define PRESTO_EXT_ATTR_VALUE_MAX 8192 + +#define PRESTO_ALLOC(ptr, size) \ +do { \ + long s = (size); \ + (ptr) = kmalloc(s, GFP_KERNEL); \ + if ((ptr) == NULL) \ + CERROR("IZO: out of memory at %s:%d (trying to " \ + "allocate %ld)\n", __FILE__, __LINE__, s); \ + else { \ + presto_kmem_inc((ptr), s); \ + memset((ptr), 0, s); \ + } \ + CDEBUG(D_MALLOC, "kmalloced: %ld at %p (tot %ld).\n", \ + s, (ptr), presto_kmemory); \ +} while (0) + +#define PRESTO_FREE(ptr, size) \ +do { \ + long s = (size); \ + if ((ptr) == NULL) { \ + CERROR("IZO: free NULL pointer (%ld bytes) at " \ + "%s:%d\n", s, __FILE__, __LINE__); \ + break; \ + } \ + kfree(ptr); \ + CDEBUG(D_MALLOC, "kfreed: %ld at %p (tot %ld).\n", \ + s, (ptr), presto_kmemory); \ + presto_kmem_dec((ptr), s); \ +} while (0) + +static inline int dentry_name_cmp(struct dentry *dentry, char *name) +{ + return (strlen(name) == dentry->d_name.len && + memcmp(name, dentry->d_name.name, dentry->d_name.len) == 0); +} + +static inline char *strdup(char *str) +{ + char *tmp; + tmp = kmalloc(strlen(str) + 1, GFP_KERNEL); + if (tmp) + memcpy(tmp, str, strlen(str) + 1); + + return tmp; +} + +static inline int izo_ioctl_is_invalid(struct izo_ioctl_data *data) +{ + if (data->ioc_len > (1<<30)) { + CERROR("IZO ioctl: ioc_len larger than 1<<30\n"); + return 1; + } + if (data->ioc_inllen1 > (1<<30)) { + CERROR("IZO ioctl: ioc_inllen1 larger than 1<<30\n"); + return 1; + } + if (data->ioc_inllen2 > (1<<30)) { + CERROR("IZO ioctl: ioc_inllen2 larger than 1<<30\n"); + return 1; + } + if (data->ioc_inlbuf1 && !data->ioc_inllen1) { + CERROR("IZO ioctl: inlbuf1 pointer but 0 length\n"); + return 1; + } + if (data->ioc_inlbuf2 && !data->ioc_inllen2) { + CERROR("IZO ioctl: inlbuf2 pointer but 0 length\n"); + return 1; + } + if (data->ioc_pbuf1 && !data->ioc_plen1) { + CERROR("IZO ioctl: pbuf1 pointer but 0 length\n"); + return 1; + } + if (data->ioc_pbuf2 && !data->ioc_plen2) { + CERROR("IZO ioctl: pbuf2 pointer but 0 length\n"); + return 1; + } + if (izo_ioctl_packlen(data) != data->ioc_len ) { + CERROR("IZO ioctl: packlen exceeds ioc_len\n"); + return 1; + } + if (data->ioc_inllen1 && + data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') { + CERROR("IZO ioctl: inlbuf1 not 0 terminated\n"); + return 1; + } + if (data->ioc_inllen2 && + data->ioc_bulk[size_round(data->ioc_inllen1) + data->ioc_inllen2 + - 1] != '\0') { + CERROR("IZO ioctl: inlbuf2 not 0 terminated\n"); + return 1; + } + return 0; +} + +/* buffer MUST be at least the size of izo_ioctl_hdr */ +static inline int izo_ioctl_getdata(char *buf, char *end, void *arg) +{ + struct izo_ioctl_hdr *hdr; + struct izo_ioctl_data *data; + int err; + ENTRY; + + hdr = (struct izo_ioctl_hdr *)buf; + data = (struct izo_ioctl_data *)buf; + + err = copy_from_user(buf, (void *)arg, sizeof(*hdr)); + if ( err ) { + EXIT; + return err; + } + + if (hdr->ioc_version != IZO_IOCTL_VERSION) { + CERROR("IZO: version mismatch kernel vs application\n"); + return -EINVAL; + } + + if (hdr->ioc_len + buf >= end) { + CERROR("IZO: user buffer exceeds kernel buffer\n"); + return -EINVAL; + } + + if (hdr->ioc_len < sizeof(struct izo_ioctl_data)) { + CERROR("IZO: user buffer too small for ioctl\n"); + return -EINVAL; + } + + err = copy_from_user(buf, (void *)arg, hdr->ioc_len); + if ( err ) { + EXIT; + return err; + } + + if (izo_ioctl_is_invalid(data)) { + CERROR("IZO: ioctl not correctly formatted\n"); + return -EINVAL; + } + + if (data->ioc_inllen1) { + data->ioc_inlbuf1 = &data->ioc_bulk[0]; + } + + if (data->ioc_inllen2) { + data->ioc_inlbuf2 = &data->ioc_bulk[0] + + size_round(data->ioc_inllen1); + } + + EXIT; + return 0; +} + +# define MYPATHLEN(buffer, path) ((buffer) + PAGE_SIZE - (path)) + +# define free kfree +# define malloc(a) kmalloc(a, GFP_KERNEL) +# define printf printk +int kml_reint_rec(struct file *dir, struct izo_ioctl_data *data); +int izo_get_fileid(struct file *dir, struct izo_ioctl_data *data); +int izo_set_fileid(struct file *dir, struct izo_ioctl_data *data); + +#else /* __KERNEL__ */ +# include +# include +# include +# include +# include + +# define printk printf +# ifndef CERROR +# define CERROR printf +# endif +# define kmalloc(a,b) malloc(a) + +void init_fsreintdata (void); +int kml_fsreint(struct kml_rec *rec, char *basedir); +int kml_iocreint(__u32 size, char *ptr, __u32 offset, int dird, + uuid_t uuid, __u32 generate_kml); + +static inline void izo_ioctl_init(struct izo_ioctl_data *data) +{ + memset(data, 0, sizeof(*data)); + data->ioc_len = sizeof(*data); + data->ioc_version = IZO_IOCTL_VERSION; +} + +static inline int +izo_ioctl_pack(struct izo_ioctl_data *data, char **pbuf, int max) +{ + char *ptr; + struct izo_ioctl_data *overlay; + data->ioc_len = izo_ioctl_packlen(data); + data->ioc_version = IZO_IOCTL_VERSION; + + if (*pbuf && izo_ioctl_packlen(data) > max) + return 1; + if (*pbuf == NULL) + *pbuf = malloc(data->ioc_len); + if (*pbuf == NULL) + return 1; + overlay = (struct izo_ioctl_data *)*pbuf; + memcpy(*pbuf, data, sizeof(*data)); + + ptr = overlay->ioc_bulk; + if (data->ioc_inlbuf1) + LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); + if (data->ioc_inlbuf2) + LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); + if (izo_ioctl_is_invalid(overlay)) + return 1; + + return 0; +} + +#endif /* __KERNEL__*/ + +#define IZO_ERROR_NAME 1 +#define IZO_ERROR_UPDATE 2 +#define IZO_ERROR_DELETE 3 +#define IZO_ERROR_RENAME 4 + +static inline char *izo_error(int err) +{ +#ifndef __KERNEL__ + if (err <= 0) + return strerror(-err); +#endif + switch (err) { + case IZO_ERROR_NAME: + return "InterMezzo name/name conflict"; + case IZO_ERROR_UPDATE: + return "InterMezzo update/update conflict"; + case IZO_ERROR_DELETE: + return "InterMezzo update/delete conflict"; + case IZO_ERROR_RENAME: + return "InterMezzo rename/rename conflict"; + } + return "Unknown InterMezzo error"; +} + +/* kml_unpack.c */ +char *kml_print_rec(struct kml_rec *rec, int brief); +int kml_unpack(struct kml_rec *rec, char **buf, char *end); + +/* fs 2.5 compat */ + +/* is_read_only() is replaced by bdev_read_only which takes struct + block_device *. Since this is only needed for debugging, it can be + safely ignored now. +*/ +#define is_read_only(dev) 0 + +#endif diff --git a/fs/intermezzo/intermezzo_idl.h b/fs/intermezzo/intermezzo_idl.h new file mode 100644 index 000000000..4371b161d --- /dev/null +++ b/fs/intermezzo/intermezzo_idl.h @@ -0,0 +1,304 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * Copyright (C) 2001 Tacit Networks, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __INTERMEZZO_IDL_H__ +#define __INTERMEZZO_IDL_H__ + +#include +#include + +/* this file contains all data structures used in InterMezzo's interfaces: + * - upcalls + * - ioctl's + * - KML records + * - RCVD records + * - rpc's + */ + +/* UPCALL */ +#define INTERMEZZO_MINOR 248 + + +#define IZO_UPC_VERSION 0x00010002 +#define IZO_UPC_PERMIT 1 +#define IZO_UPC_CONNECT 2 +#define IZO_UPC_GO_FETCH_KML 3 +#define IZO_UPC_OPEN 4 +#define IZO_UPC_REVOKE_PERMIT 5 +#define IZO_UPC_KML 6 +#define IZO_UPC_BACKFETCH 7 +#define IZO_UPC_KML_TRUNC 8 +#define IZO_UPC_SET_KMLSIZE 9 +#define IZO_UPC_BRANCH_UNDO 10 +#define IZO_UPC_BRANCH_REDO 11 +#define IZO_UPC_GET_FILEID 12 +#define IZO_UPC_CLIENT_MAKE_BRANCH 13 +#define IZO_UPC_SERVER_MAKE_BRANCH 14 +#define IZO_UPC_REPSTATUS 15 + +#define IZO_UPC_LARGEST_OPCODE 15 + +struct izo_upcall_hdr { + __u32 u_len; + __u32 u_version; + __u32 u_opc; + __u32 u_uniq; + __u32 u_pid; + __u32 u_uid; + __u32 u_pathlen; + __u32 u_fsetlen; + __u64 u_offset; + __u64 u_length; + __u32 u_first_recno; + __u32 u_last_recno; + __u32 u_async; + __u32 u_reclen; + __u8 u_uuid[16]; +}; + +/* This structure _must_ sit at the beginning of the buffer */ +struct izo_upcall_resp { + __u32 opcode; + __u32 unique; + __u32 result; +}; + + +/* IOCTL */ + +#define IZO_IOCTL_VERSION 0x00010003 + +/* maximum size supported for ioc_pbuf1 */ +#define KML_MAX_BUF (64*1024) + +struct izo_ioctl_hdr { + __u32 ioc_len; + __u32 ioc_version; +}; + +struct izo_ioctl_data { + __u32 ioc_len; + __u32 ioc_version; + __u32 ioc_izodev; + __u32 ioc_kmlrecno; + __u64 ioc_kmlsize; + __u32 ioc_flags; + __s32 ioc_inofd; + __u64 ioc_ino; + __u64 ioc_generation; + __u32 ioc_mark_what; + __u32 ioc_and_flag; + __u32 ioc_or_flag; + __u32 ioc_dev; + __u32 ioc_offset; + __u32 ioc_slot; + __u64 ioc_uid; + __u8 ioc_uuid[16]; + + __u32 ioc_inllen1; /* path */ + char *ioc_inlbuf1; + __u32 ioc_inllen2; /* fileset */ + char *ioc_inlbuf2; + + __u32 ioc_plen1; /* buffers in user space (KML) */ + char *ioc_pbuf1; + __u32 ioc_plen2; /* buffers in user space (KML) */ + char *ioc_pbuf2; + + char ioc_bulk[0]; +}; + +#define IZO_IOC_DEVICE _IOW ('p',0x50, void *) +#define IZO_IOC_REINTKML _IOW ('p',0x51, void *) +#define IZO_IOC_GET_RCVD _IOW ('p',0x52, void *) +#define IZO_IOC_SET_IOCTL_UID _IOW ('p',0x53, void *) +#define IZO_IOC_GET_KML_SIZE _IOW ('p',0x54, void *) +#define IZO_IOC_PURGE_FILE_DATA _IOW ('p',0x55, void *) +#define IZO_IOC_CONNECT _IOW ('p',0x56, void *) +#define IZO_IOC_GO_FETCH_KML _IOW ('p',0x57, void *) +#define IZO_IOC_MARK _IOW ('p',0x58, void *) +#define IZO_IOC_CLEAR_FSET _IOW ('p',0x59, void *) +#define IZO_IOC_CLEAR_ALL_FSETS _IOW ('p',0x60, void *) +#define IZO_IOC_SET_FSET _IOW ('p',0x61, void *) +#define IZO_IOC_REVOKE_PERMIT _IOW ('p',0x62, void *) +#define IZO_IOC_SET_KMLSIZE _IOW ('p',0x63, void *) +#define IZO_IOC_CLIENT_MAKE_BRANCH _IOW ('p',0x64, void *) +#define IZO_IOC_SERVER_MAKE_BRANCH _IOW ('p',0x65, void *) +#define IZO_IOC_BRANCH_UNDO _IOW ('p',0x66, void *) +#define IZO_IOC_BRANCH_REDO _IOW ('p',0x67, void *) +#define IZO_IOC_SET_PID _IOW ('p',0x68, void *) +#define IZO_IOC_SET_CHANNEL _IOW ('p',0x69, void *) +#define IZO_IOC_GET_CHANNEL _IOW ('p',0x70, void *) +#define IZO_IOC_GET_FILEID _IOW ('p',0x71, void *) +#define IZO_IOC_ADJUST_LML _IOW ('p',0x72, void *) +#define IZO_IOC_SET_FILEID _IOW ('p',0x73, void *) +#define IZO_IOC_REPSTATUS _IOW ('p',0x74, void *) + +/* marking flags for fsets */ +#define FSET_CLIENT_RO 0x00000001 +#define FSET_LENTO_RO 0x00000002 +#define FSET_HASPERMIT 0x00000004 /* we have a permit to WB */ +#define FSET_INSYNC 0x00000008 /* this fileset is in sync */ +#define FSET_PERMIT_WAITING 0x00000010 /* Lento is waiting for permit */ +#define FSET_STEAL_PERMIT 0x00000020 /* take permit if Lento is dead */ +#define FSET_JCLOSE_ON_WRITE 0x00000040 /* Journal closes on writes */ +#define FSET_DATA_ON_DEMAND 0x00000080 /* update data on file_open() */ +#define FSET_PERMIT_EXCLUSIVE 0x00000100 /* only one permitholder allowed */ +#define FSET_HAS_BRANCHES 0x00000200 /* this fileset contains branches */ +#define FSET_IS_BRANCH 0x00000400 /* this fileset is a branch */ +#define FSET_FLAT_BRANCH 0x00000800 /* this fileset is ROOT with branches */ + +/* what to mark indicator (ioctl parameter) */ +#define MARK_DENTRY 101 +#define MARK_FSET 102 +#define MARK_CACHE 103 +#define MARK_GETFL 104 + +/* KML */ + +#define KML_MAJOR_VERSION 0x00010000 +#define KML_MINOR_VERSION 0x00000002 +#define KML_OPCODE_NOOP 0 +#define KML_OPCODE_CREATE 1 +#define KML_OPCODE_MKDIR 2 +#define KML_OPCODE_UNLINK 3 +#define KML_OPCODE_RMDIR 4 +#define KML_OPCODE_CLOSE 5 +#define KML_OPCODE_SYMLINK 6 +#define KML_OPCODE_RENAME 7 +#define KML_OPCODE_SETATTR 8 +#define KML_OPCODE_LINK 9 +#define KML_OPCODE_OPEN 10 +#define KML_OPCODE_MKNOD 11 +#define KML_OPCODE_WRITE 12 +#define KML_OPCODE_RELEASE 13 +#define KML_OPCODE_TRUNC 14 +#define KML_OPCODE_SETEXTATTR 15 +#define KML_OPCODE_DELEXTATTR 16 +#define KML_OPCODE_KML_TRUNC 17 +#define KML_OPCODE_GET_FILEID 18 +#define KML_OPCODE_NUM 19 +/* new stuff */ +struct presto_version { + __u32 pv_mtime_sec; + __u32 pv_mtime_nsec; + __u32 pv_ctime_sec; + __u32 pv_ctime_nsec; + __u64 pv_size; +}; + +struct kml_prefix_hdr { + __u32 len; + __u32 version; + __u32 pid; + __u32 auid; + __u32 fsuid; + __u32 fsgid; + __u32 opcode; + __u32 ngroups; +}; + +struct kml_prefix { + struct kml_prefix_hdr *hdr; + __u32 *groups; +}; + +struct kml_suffix { + __u32 prevrec; + __u32 recno; + __u32 time; + __u32 len; +}; + +struct kml_rec { + char *buf; + struct kml_prefix prefix; + __u64 offset; + char *path; + int pathlen; + char *name; + int namelen; + char *target; + int targetlen; + struct presto_version *old_objectv; + struct presto_version *new_objectv; + struct presto_version *old_parentv; + struct presto_version *new_parentv; + struct presto_version *old_targetv; + struct presto_version *new_targetv; + __u32 valid; + __u32 mode; + __u32 uid; + __u32 gid; + __u64 size; + __u32 mtime_sec; + __u32 mtime_nsec; + __u32 ctime_sec; + __u32 ctime_nsec; + __u32 flags; + __u32 ino; + __u32 rdev; + __u32 major; + __u32 minor; + __u32 generation; + __u32 old_mode; + __u32 old_rdev; + __u64 old_uid; + __u64 old_gid; + char *old_target; + int old_targetlen; + struct kml_suffix *suffix; +}; + + +/* RCVD */ + +/* izo_rcvd_rec fills the .intermezzo/fset/last_rcvd file and provides data about + * our view of reintegration offsets for a given peer. + * + * The only exception is the last_rcvd record which has a UUID consisting of all + * zeroes; this record's lr_local_offset field is the logical byte offset of our + * KML, which is updated when KML truncation takes place. All other fields are + * reserved. */ + +/* XXX - document how clean shutdowns are recorded */ + +struct izo_rcvd_rec { + __u8 lr_uuid[16]; /* which peer? */ + __u64 lr_remote_recno; /* last confirmed remote recno */ + __u64 lr_remote_offset; /* last confirmed remote offset */ + __u64 lr_local_recno; /* last locally reinted recno */ + __u64 lr_local_offset; /* last locally reinted offset */ + __u64 lr_last_ctime; /* the largest ctime that has reintegrated */ +}; + +/* Cache purge database + * + * Each DB entry is this structure followed by the path name, no trailing NUL. */ +struct izo_purge_entry { + __u64 p_atime; + __u32 p_pathlen; +}; + +/* RPC */ + +#endif diff --git a/fs/intermezzo/intermezzo_journal.h b/fs/intermezzo/intermezzo_journal.h new file mode 100644 index 000000000..99d588d48 --- /dev/null +++ b/fs/intermezzo/intermezzo_journal.h @@ -0,0 +1,24 @@ +#ifndef __PRESTO_JOURNAL_H +#define __PRESTO_JOURNAL_H + + +struct journal_prefix { + int len; + u32 version; + int pid; + int uid; + int fsuid; + int fsgid; + int opcode; + u32 ngroups; + u32 groups[0]; +}; + +struct journal_suffix { + unsigned long prevrec; /* offset of previous record for dentry */ + int recno; + int time; + int len; +}; + +#endif diff --git a/fs/intermezzo/intermezzo_kml.h b/fs/intermezzo/intermezzo_kml.h new file mode 100644 index 000000000..ca612e615 --- /dev/null +++ b/fs/intermezzo/intermezzo_kml.h @@ -0,0 +1,260 @@ +#ifndef __INTERMEZZO_KML_H +#define __INTERMEZZO_KML_H + +#include "intermezzo_psdev.h" +#include +#include "intermezzo_journal.h" + +#define PRESTO_KML_MAJOR_VERSION 0x00010000 +#define PRESTO_KML_MINOR_VERSION 0x00002001 +#define PRESTO_OP_NOOP 0 +#define PRESTO_OP_CREATE 1 +#define PRESTO_OP_MKDIR 2 +#define PRESTO_OP_UNLINK 3 +#define PRESTO_OP_RMDIR 4 +#define PRESTO_OP_CLOSE 5 +#define PRESTO_OP_SYMLINK 6 +#define PRESTO_OP_RENAME 7 +#define PRESTO_OP_SETATTR 8 +#define PRESTO_OP_LINK 9 +#define PRESTO_OP_OPEN 10 +#define PRESTO_OP_MKNOD 11 +#define PRESTO_OP_WRITE 12 +#define PRESTO_OP_RELEASE 13 +#define PRESTO_OP_TRUNC 14 +#define PRESTO_OP_SETEXTATTR 15 +#define PRESTO_OP_DELEXTATTR 16 + +#define PRESTO_LML_DONE 1 /* flag to get first write to do LML */ +#define KML_KOP_MARK 0xffff + +struct presto_lml_data { + loff_t rec_offset; +}; + +struct big_journal_prefix { + u32 len; + u32 version; + u32 pid; + u32 uid; + u32 fsuid; + u32 fsgid; + u32 opcode; + u32 ngroups; + u32 groups[NGROUPS_SMALL]; +}; + +enum kml_opcode { + KML_CREATE = 1, + KML_MKDIR, + KML_UNLINK, + KML_RMDIR, + KML_CLOSE, + KML_SYMLINK, + KML_RENAME, + KML_SETATTR, + KML_LINK, + KML_OPEN, + KML_MKNOD, + KML_ENDMARK = 0xff +}; + +struct kml_create { + char *path; + struct presto_version new_objectv, + old_parentv, + new_parentv; + int mode; + int uid; + int gid; +}; + +struct kml_open { +}; + +struct kml_mkdir { + char *path; + struct presto_version new_objectv, + old_parentv, + new_parentv; + int mode; + int uid; + int gid; +}; + +struct kml_unlink { + char *path, + *name; + struct presto_version old_tgtv, + old_parentv, + new_parentv; +}; + +struct kml_rmdir { + char *path, + *name; + struct presto_version old_tgtv, + old_parentv, + new_parentv; +}; + +struct kml_close { + int open_mode, + open_uid, + open_gid; + char *path; + struct presto_version new_objectv; + __u64 ino; + int generation; +}; + +struct kml_symlink { + char *sourcepath, + *targetpath; + struct presto_version new_objectv, + old_parentv, + new_parentv; + int uid; + int gid; +}; + +struct kml_rename { + char *sourcepath, + *targetpath; + struct presto_version old_objectv, + new_objectv, + old_tgtv, + new_tgtv; +}; + +struct kml_setattr { + char *path; + struct presto_version old_objectv; + struct iattr iattr; +}; + +struct kml_link { + char *sourcepath, + *targetpath; + struct presto_version new_objectv, + old_parentv, + new_parentv; +}; + +struct kml_mknod { + char *path; + struct presto_version new_objectv, + old_parentv, + new_parentv; + int mode; + int uid; + int gid; + int major; + int minor; +}; + +/* kml record items for optimizing */ +struct kml_kop_node +{ + u32 kml_recno; + u32 kml_flag; + u32 kml_op; + nlink_t i_nlink; + u32 i_ino; +}; + +struct kml_kop_lnode +{ + struct list_head chains; + struct kml_kop_node node; +}; + +struct kml_endmark { + u32 total; + struct kml_kop_node *kop; +}; + +/* kml_flag */ +#define KML_REC_DELETE 1 +#define KML_REC_EXIST 0 + +struct kml_optimize { + struct list_head kml_chains; + u32 kml_flag; + u32 kml_op; + nlink_t i_nlink; + u32 i_ino; +}; + +struct kml_rec { + /* attribute of this record */ + int rec_size; + int rec_kml_offset; + + struct big_journal_prefix rec_head; + union { + struct kml_create create; + struct kml_open open; + struct kml_mkdir mkdir; + struct kml_unlink unlink; + struct kml_rmdir rmdir; + struct kml_close close; + struct kml_symlink symlink; + struct kml_rename rename; + struct kml_setattr setattr; + struct kml_mknod mknod; + struct kml_link link; + struct kml_endmark endmark; + } rec_kml; + struct journal_suffix rec_tail; + + /* for kml optimize only */ + struct kml_optimize kml_optimize; +}; + +/* kml record items for optimizing */ +extern void kml_kop_init (struct presto_file_set *fset); +extern void kml_kop_addrec (struct presto_file_set *fset, + struct inode *ino, u32 op, u32 flag); +extern int kml_kop_flush (struct presto_file_set *fset); + +/* defined in kml_setup.c */ +extern int kml_init (struct presto_file_set *fset); +extern int kml_cleanup (struct presto_file_set *fset); + +/* defined in kml.c */ +extern int begin_kml_reint (struct file *file, unsigned long arg); +extern int do_kml_reint (struct file *file, unsigned long arg); +extern int end_kml_reint (struct file *file, unsigned long arg); + +/* kml_utils.c */ +extern char *dlogit (void *tbuf, const void *sbuf, int size); +extern char * bdup_printf (char *format, ...); + +/* defined in kml_decode.c */ +/* printop */ +#define PRINT_KML_PREFIX 0x1 +#define PRINT_KML_SUFFIX 0x2 +#define PRINT_KML_REC 0x4 +#define PRINT_KML_OPTIMIZE 0x8 +#define PRINT_KML_EXIST 0x10 +#define PRINT_KML_DELETE 0x20 +extern void kml_printrec (struct kml_rec *rec, int printop); +extern int print_allkmlrec (struct list_head *head, int printop); +extern int delete_kmlrec (struct list_head *head); +extern int kml_decoderec (char *buf, int pos, int buflen, int *size, + struct kml_rec **newrec); +extern int decode_kmlrec (struct list_head *head, char *kml_buf, int buflen); +extern void kml_freerec (struct kml_rec *rec); + +/* defined in kml_reint.c */ +#define KML_CLOSE_BACKFETCH 1 +extern int kml_reintbuf (struct kml_fsdata *kml_fsdata, + char *mtpt, struct kml_rec **rec); + +/* defined in kml_setup.c */ +extern int kml_init (struct presto_file_set *fset); +extern int kml_cleanup (struct presto_file_set *fset); + +#endif + diff --git a/fs/intermezzo/intermezzo_lib.h b/fs/intermezzo/intermezzo_lib.h new file mode 100644 index 000000000..21cc0b94a --- /dev/null +++ b/fs/intermezzo/intermezzo_lib.h @@ -0,0 +1,162 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Data structures unpacking/packing macros & inlines + * + */ + +#ifndef _INTERMEZZO_LIB_H +#define _INTERMEZZO_LIB_H + +#ifdef __KERNEL__ +# include +#else +# include +# include +#endif + +static inline int size_round (int val) +{ + return (val + 3) & (~0x3); +} + +static inline int size_round0(int val) +{ + if (!val) + return 0; + return (val + 1 + 3) & (~0x3); +} + +static inline size_t round_strlen(char *fset) +{ + return size_round(strlen(fset) + 1); +} + +#ifdef __KERNEL__ +# define NTOH__u32(var) le32_to_cpu(var) +# define NTOH__u64(var) le64_to_cpu(var) +# define HTON__u32(var) cpu_to_le32(var) +# define HTON__u64(var) cpu_to_le64(var) +#else +# include +# define NTOH__u32(var) GUINT32_FROM_LE(var) +# define NTOH__u64(var) GUINT64_FROM_LE(var) +# define HTON__u32(var) GUINT32_TO_LE(var) +# define HTON__u64(var) GUINT64_TO_LE(var) +#endif + +/* + * copy sizeof(type) bytes from pointer to var and move ptr forward. + * return EFAULT if pointer goes beyond end + */ +#define UNLOGV(var,type,ptr,end) \ +do { \ + var = *(type *)ptr; \ + ptr += sizeof(type); \ + if (ptr > end ) \ + return -EFAULT; \ +} while (0) + +/* the following two macros convert to little endian */ +/* type MUST be __u32 or __u64 */ +#define LUNLOGV(var,type,ptr,end) \ +do { \ + var = NTOH##type(*(type *)ptr); \ + ptr += sizeof(type); \ + if (ptr > end ) \ + return -EFAULT; \ +} while (0) + +/* now log values */ +#define LOGV(var,type,ptr) \ +do { \ + *((type *)ptr) = var; \ + ptr += sizeof(type); \ +} while (0) + +/* and in network order */ +#define LLOGV(var,type,ptr) \ +do { \ + *((type *)ptr) = HTON##type(var); \ + ptr += sizeof(type); \ +} while (0) + + +/* + * set var to point at (type *)ptr, move ptr forward with sizeof(type) + * return from function with EFAULT if ptr goes beyond end + */ +#define UNLOGP(var,type,ptr,end) \ +do { \ + var = (type *)ptr; \ + ptr += sizeof(type); \ + if (ptr > end ) \ + return -EFAULT; \ +} while (0) + +#define LOGP(var,type,ptr) \ +do { \ + memcpy(ptr, var, sizeof(type)); \ + ptr += sizeof(type); \ +} while (0) + +/* + * set var to point at (char *)ptr, move ptr forward by size_round(len); + * return from function with EFAULT if ptr goes beyond end + */ +#define UNLOGL(var,type,len,ptr,end) \ +do { \ + if (len == 0) \ + var = (type *)0; \ + else { \ + var = (type *)ptr; \ + ptr += size_round(len * sizeof(type)); \ + } \ + if (ptr > end ) \ + return -EFAULT; \ +} while (0) + +#define UNLOGL0(var,type,len,ptr,end) \ +do { \ + UNLOGL(var,type,len+1,ptr,end); \ + if ( *((char *)ptr - size_round(len+1) + len) != '\0') \ + return -EFAULT; \ +} while (0) + +#define LOGL(var,len,ptr) \ +do { \ + size_t __fill = size_round(len); \ + /* Prevent data leakage. */ \ + if (__fill > 0) \ + memset((char *)ptr, 0, __fill); \ + memcpy((char *)ptr, (const char *)var, len); \ + ptr += __fill; \ +} while (0) + +#define LOGL0(var,len,ptr) \ +do { \ + if (!len) break; \ + memcpy((char *)ptr, (const char *)var, len); \ + *((char *)(ptr) + len) = 0; \ + ptr += size_round(len + 1); \ +} while (0) + +#endif /* _INTERMEZZO_LIB_H */ + diff --git a/fs/intermezzo/intermezzo_psdev.h b/fs/intermezzo/intermezzo_psdev.h new file mode 100644 index 000000000..fff728ad8 --- /dev/null +++ b/fs/intermezzo/intermezzo_psdev.h @@ -0,0 +1,55 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ + +#ifndef __PRESTO_PSDEV_H +#define __PRESTO_PSDEV_H + +#define MAX_CHANNEL 16 +#define PROCNAME_SIZE 32 +#include + +/* represents state of an instance reached with /dev/intermezzo */ +/* communication pending & processing queues */ +struct upc_channel { + unsigned int uc_seq; + wait_queue_head_t uc_waitq; /* Lento wait queue */ + struct list_head uc_pending; + struct list_head uc_processing; + spinlock_t uc_lock; + int uc_pid; /* Lento's pid */ + int uc_hard; /* allows signals during upcalls */ + int uc_no_filter; + int uc_no_journal; + int uc_no_upcall; + int uc_timeout; /* . sec: signals will dequeue upc */ + long uc_errorval; /* for testing I/O failures */ + struct list_head uc_cache_list; + int uc_minor; +}; + +#define ISLENTO(minor) (current->pid == izo_channels[minor].uc_pid \ + || current->real_parent->pid == izo_channels[minor].uc_pid \ + || current->real_parent->real_parent->pid == izo_channels[minor].uc_pid) + +extern struct upc_channel izo_channels[MAX_CHANNEL]; + +/* message types between presto filesystem in kernel */ +#define REQ_READ 1 +#define REQ_WRITE 2 +#define REQ_ASYNC 4 +#define REQ_DEAD 8 + +struct upc_req { + struct list_head rq_chain; + caddr_t rq_data; + int rq_flags; + int rq_bufsize; + int rq_rep_size; + int rq_opcode; /* copied from data to save lookup */ + int rq_unique; + wait_queue_head_t rq_sleep; /* process' wait queue */ + unsigned long rq_posttime; +}; + +#endif diff --git a/fs/intermezzo/intermezzo_upcall.h b/fs/intermezzo/intermezzo_upcall.h new file mode 100644 index 000000000..0b3e6ff74 --- /dev/null +++ b/fs/intermezzo/intermezzo_upcall.h @@ -0,0 +1,146 @@ +/* + * Based on cfs.h from Coda, but revamped for increased simplicity. + * Linux modifications by Peter Braam, Aug 1996 + * Rewritten for InterMezzo + */ + +#ifndef _PRESTO_HEADER_ +#define _PRESTO_HEADER_ + + +/* upcall.c */ +#define SYNCHRONOUS 0 +#define ASYNCHRONOUS 1 + +int lento_permit(int minor, int pathlen, int fsetnamelen, char *path, char *fset); +int lento_opendir(int minor, int pathlen, char *path, int async); +int lento_kml(int minor, unsigned int offset, unsigned int first_recno, + unsigned int length, unsigned int last_recno, int namelen, + char *fsetname); +int lento_open(int minor, int pathlen, char *path); +int lento_journal(int minor, char *page, int async); +int lento_release_permit(int minor, int cookie); + +/* + * Kernel <--> Lento communications. + */ +/* upcalls */ +#define LENTO_PERMIT 1 +#define LENTO_JOURNAL 2 +#define LENTO_OPENDIR 3 +#define LENTO_OPEN 4 +#define LENTO_SIGNAL 5 +#define LENTO_KML 6 +#define LENTO_COOKIE 7 + +/* Lento <-> Presto RPC arguments */ +struct lento_up_hdr { + unsigned int opcode; + unsigned int unique; /* Keep multiple outstanding msgs distinct */ + u_short pid; /* Common to all */ + u_short uid; +}; + +/* This structure _must_ sit at the beginning of the buffer */ +struct lento_down_hdr { + unsigned int opcode; + unsigned int unique; + unsigned int result; +}; + +/* lento_permit: */ +struct lento_permit_in { + struct lento_up_hdr uh; + int pathlen; + int fsetnamelen; + char path[0]; +}; +struct lento_permit_out { + struct lento_down_hdr dh; +}; + + +/* lento_opendir: */ +struct lento_opendir_in { + struct lento_up_hdr uh; + int async; + int pathlen; + char path[0]; +}; +struct lento_opendir_out { + struct lento_down_hdr dh; +}; + + +/* lento_kml: */ +struct lento_kml_in { + struct lento_up_hdr uh; + unsigned int offset; + unsigned int first_recno; + unsigned int length; + unsigned int last_recno; + int namelen; + char fsetname[0]; +}; + +struct lento_kml_out { + struct lento_down_hdr dh; +}; + + +/* lento_open: */ +struct lento_open_in { + struct lento_up_hdr uh; + int pathlen; + char path[0]; +}; +struct lento_open_out { + struct lento_down_hdr dh; +}; + +/* lento_response_cookie */ +struct lento_response_cookie_in { + struct lento_up_hdr uh; + int cookie; +}; + +struct lento_response_cookie_out { + struct lento_down_hdr dh; +}; + + +struct lento_mknod { + struct lento_down_hdr dh; + int major; + int minor; + int mode; + char path[0]; +}; + + +/* NB: every struct below begins with an up_hdr */ +union up_args { + struct lento_up_hdr uh; + struct lento_permit_in lento_permit; + struct lento_open_in lento_open; + struct lento_opendir_in lento_opendir; + struct lento_kml_in lento_kml; + struct lento_response_cookie_in lento_response_cookie; +}; + +union down_args { + struct lento_down_hdr dh; + struct lento_permit_out lento_permit; + struct lento_open_out lento_open; + struct lento_opendir_out lento_opendir; + struct lento_kml_out lento_kml; + struct lento_response_cookie_out lento_response_cookie; +}; + +#include "intermezzo_psdev.h" + +int lento_upcall(int minor, int read_size, int *rep_size, + union up_args *buffer, int async, + struct upc_req *rq ); +#endif + diff --git a/fs/intermezzo/journal.c b/fs/intermezzo/journal.c new file mode 100644 index 000000000..2beda3863 --- /dev/null +++ b/fs/intermezzo/journal.c @@ -0,0 +1,2452 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 1998 Peter J. Braam + * Copyright (C) 2001 Cluster File Systems, Inc. + * Copyright (C) 2001 Tacit Networks, Inc. + * + * Support for journalling extended attributes + * Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +struct presto_reservation_data { + unsigned int ri_recno; + loff_t ri_offset; + loff_t ri_size; + struct list_head ri_list; +}; + +/* + * Locking Semantics + * + * write lock in struct presto_log_fd: + * - name: fd_lock + * - required for: accessing any field in a presto_log_fd + * - may not be held across I/O + * - + * + */ + +/* + * reserve record space and/or atomically request state of the log + * rec will hold the location reserved record upon return + * this reservation will be placed in the queue + */ +static void presto_reserve_record(struct presto_file_set *fset, + struct presto_log_fd *fd, + struct rec_info *rec, + struct presto_reservation_data *rd) +{ + int chunked_record = 0; + ENTRY; + + write_lock(&fd->fd_lock); + if ( rec->is_kml ) { + int chunk = 1 << fset->fset_chunkbits; + int chunk_mask = ~(chunk -1); + loff_t boundary; + + boundary = (fd->fd_offset + chunk - 1) & chunk_mask; + if ( fd->fd_offset + rec->size >= boundary ) { + chunked_record = 1; + fd->fd_offset = boundary; + } + } + + fd->fd_recno++; + + /* this moves the fd_offset back after truncation */ + if ( list_empty(&fd->fd_reservations) && + !chunked_record) { + fd->fd_offset = fd->fd_file->f_dentry->d_inode->i_size; + } + + rec->offset = fd->fd_offset; + if (rec->is_kml) + rec->offset += fset->fset_kml_logical_off; + + rec->recno = fd->fd_recno; + + /* add the reservation data to the end of the list */ + rd->ri_offset = fd->fd_offset; + rd->ri_size = rec->size; + rd->ri_recno = rec->recno; + list_add(&rd->ri_list, fd->fd_reservations.prev); + + fd->fd_offset += rec->size; + + write_unlock(&fd->fd_lock); + + EXIT; +} + +static inline void presto_release_record(struct presto_log_fd *fd, + struct presto_reservation_data *rd) +{ + write_lock(&fd->fd_lock); + list_del(&rd->ri_list); + write_unlock(&fd->fd_lock); +} + +/* XXX should we ask for do_truncate to be exported? */ +int izo_do_truncate(struct presto_file_set *fset, struct dentry *dentry, + loff_t length, loff_t size_check) +{ + struct inode *inode = dentry->d_inode; + int error; + struct iattr newattrs; + + ENTRY; + + if (length < 0) { + EXIT; + return -EINVAL; + } + + down(&inode->i_sem); + lock_kernel(); + + if (size_check != inode->i_size) { + unlock_kernel(); + up(&inode->i_sem); + EXIT; + return -EALREADY; + } + + newattrs.ia_size = length; + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + + if (inode->i_op && inode->i_op->setattr) + error = inode->i_op->setattr(dentry, &newattrs); + else { + inode_setattr(dentry->d_inode, &newattrs); + error = 0; + } + + unlock_kernel(); + up(&inode->i_sem); + EXIT; + return error; +} + +static void presto_kml_truncate(struct presto_file_set *fset) +{ + int rc; + ENTRY; + + write_lock(&fset->fset_kml.fd_lock); + if (fset->fset_kml.fd_truncating == 1 ) { + write_unlock(&fset->fset_kml.fd_lock); + EXIT; + return; + } + + fset->fset_kml.fd_truncating = 1; + write_unlock(&fset->fset_kml.fd_lock); + + CERROR("islento: %d, count: %d\n", + ISLENTO(presto_i2m(fset->fset_dentry->d_inode)), + fset->fset_permit_count); + + rc = izo_upc_kml_truncate(fset->fset_cache->cache_psdev->uc_minor, + fset->fset_lento_off, fset->fset_lento_recno, + fset->fset_name); + + /* Userspace is the only permitholder now, and will retain an exclusive + * hold on the permit until KML truncation completes. */ + /* FIXME: double check this code path now that the precise semantics of + * fset->fset_permit_count have changed. */ + + if (rc != 0) { + write_lock(&fset->fset_kml.fd_lock); + fset->fset_kml.fd_truncating = 0; + write_unlock(&fset->fset_kml.fd_lock); + } + + EXIT; +} + +void *presto_trans_start(struct presto_file_set *fset, struct inode *inode, + int op) +{ + ENTRY; + if ( !fset->fset_cache->cache_filter->o_trops ) { + EXIT; + return NULL; + } + EXIT; + return fset->fset_cache->cache_filter->o_trops->tr_start + (fset, inode, op); +} + +void presto_trans_commit(struct presto_file_set *fset, void *handle) +{ + ENTRY; + if (!fset->fset_cache->cache_filter->o_trops ) { + EXIT; + return; + } + + fset->fset_cache->cache_filter->o_trops->tr_commit(fset, handle); + + /* Check to see if the KML needs truncated. */ + if (fset->kml_truncate_size > 0 && + !fset->fset_kml.fd_truncating && + fset->fset_kml.fd_offset > fset->kml_truncate_size) { + CDEBUG(D_JOURNAL, "kml size: %lu; truncating\n", + (unsigned long)fset->fset_kml.fd_offset); + presto_kml_truncate(fset); + } + EXIT; +} + +inline int presto_no_journal(struct presto_file_set *fset) +{ + int minor = fset->fset_cache->cache_psdev->uc_minor; + return izo_channels[minor].uc_no_journal; +} + +#define size_round(x) (((x)+3) & ~0x3) + +#define BUFF_FREE(buf) PRESTO_FREE(buf, PAGE_SIZE) +#define BUFF_ALLOC(newbuf, oldbuf) \ + PRESTO_ALLOC(newbuf, PAGE_SIZE); \ + if ( !newbuf ) { \ + if (oldbuf) \ + BUFF_FREE(oldbuf); \ + return -ENOMEM; \ + } + +/* + * "buflen" should be PAGE_SIZE or more. + * Give relative path wrt to a fsetroot + */ +char * presto_path(struct dentry *dentry, struct dentry *root, + char *buffer, int buflen) +{ + char * end = buffer+buflen; + char * retval; + + *--end = '\0'; + buflen--; + if (dentry->d_parent != dentry && d_unhashed(dentry)) { + buflen -= 10; + end -= 10; + memcpy(end, " (deleted)", 10); + } + + /* Get '/' right */ + retval = end-1; + *retval = '/'; + + for (;;) { + struct dentry * parent; + int namelen; + + if (dentry == root) + break; + parent = dentry->d_parent; + if (dentry == parent) + break; + namelen = dentry->d_name.len; + buflen -= namelen + 1; + if (buflen < 0) + break; + end -= namelen; + memcpy(end, dentry->d_name.name, namelen); + *--end = '/'; + retval = end; + dentry = parent; + } + return retval; +} + +static inline char *logit(char *buf, const void *value, int size) +{ + char *ptr = (char *)value; + + memcpy(buf, ptr, size); + buf += size; + return buf; +} + + +static inline char * +journal_log_prefix_with_groups_and_ids(char *buf, int opcode, + struct rec_info *rec, + __u32 ngroups, gid_t *groups, + __u32 fsuid, __u32 fsgid) +{ + struct kml_prefix_hdr p; + u32 loggroups[NGROUPS_SMALL]; + + int i; + + p.len = cpu_to_le32(rec->size); + p.version = KML_MAJOR_VERSION | KML_MINOR_VERSION; + p.pid = cpu_to_le32(current->pid); + p.auid = cpu_to_le32(current->uid); + p.fsuid = cpu_to_le32(fsuid); + p.fsgid = cpu_to_le32(fsgid); + p.ngroups = cpu_to_le32(ngroups); + p.opcode = cpu_to_le32(opcode); + for (i=0 ; i < ngroups ; i++) + loggroups[i] = cpu_to_le32((__u32) groups[i]); + + buf = logit(buf, &p, sizeof(struct kml_prefix_hdr)); + buf = logit(buf, &loggroups, sizeof(__u32) * ngroups); + return buf; +} + +static inline char * +journal_log_prefix(char *buf, int opcode, struct rec_info *rec) +{ + __u32 groups[NGROUPS_SMALL]; + int i; + + /* convert 16 bit gid's to 32 bit gid's */ + for (i=0; igroup_info->ngroups; i++) + groups[i] = GROUP_AT(current->group_info,i); + + return journal_log_prefix_with_groups_and_ids(buf, opcode, rec, + (__u32)current->group_info->ngroups, + groups, + (__u32)current->fsuid, + (__u32)current->fsgid); +} + +static inline char * +journal_log_prefix_with_groups(char *buf, int opcode, struct rec_info *rec, + __u32 ngroups, gid_t *groups) +{ + return journal_log_prefix_with_groups_and_ids(buf, opcode, rec, + ngroups, groups, + (__u32)current->fsuid, + (__u32)current->fsgid); +} + +static inline char *log_dentry_version(char *buf, struct dentry *dentry) +{ + struct presto_version version; + + presto_getversion(&version, dentry->d_inode); + + version.pv_mtime_sec = HTON__u32(version.pv_mtime_sec); + version.pv_ctime_sec = HTON__u32(version.pv_ctime_sec); + version.pv_mtime_nsec = HTON__u32(version.pv_mtime_nsec); + version.pv_ctime_nsec = HTON__u32(version.pv_ctime_nsec); + version.pv_size = HTON__u64(version.pv_size); + + return logit(buf, &version, sizeof(version)); +} + +static inline char *log_version(char *buf, struct presto_version *pv) +{ + struct presto_version version; + + memcpy(&version, pv, sizeof(version)); + + version.pv_mtime_sec = HTON__u32(version.pv_mtime_sec); + version.pv_mtime_nsec = HTON__u32(version.pv_mtime_nsec); + version.pv_ctime_sec = HTON__u32(version.pv_ctime_sec); + version.pv_ctime_nsec = HTON__u32(version.pv_ctime_nsec); + version.pv_size = HTON__u64(version.pv_size); + + return logit(buf, &version, sizeof(version)); +} + +static inline char *log_rollback(char *buf, struct izo_rollback_data *rb) +{ + struct izo_rollback_data rollback; + + rollback.rb_mode = HTON__u32(rb->rb_mode); + rollback.rb_rdev = HTON__u32(rb->rb_rdev); + rollback.rb_uid = HTON__u64(rb->rb_uid); + rollback.rb_gid = HTON__u64(rb->rb_gid); + + return logit(buf, &rollback, sizeof(rollback)); +} + +static inline char *journal_log_suffix(char *buf, char *log, + struct presto_file_set *fset, + struct dentry *dentry, + struct rec_info *rec) +{ + struct kml_suffix s; + struct kml_prefix_hdr *p = (struct kml_prefix_hdr *)log; + +#if 0 + /* XXX needs to be done after reservation, + disable ths until version 1.2 */ + if ( dentry ) { + s.prevrec = cpu_to_le32(rec->offset - + presto_d2d(dentry)->dd_kml_offset); + presto_d2d(dentry)->dd_kml_offset = rec->offset; + } else { + s.prevrec = -1; + } +#endif + s.prevrec = 0; + + /* record number needs to be filled in after reservation + s.recno = cpu_to_le32(rec->recno); */ + s.time = cpu_to_le32(get_seconds()); + s.len = p->len; + return logit(buf, &s, sizeof(s)); +} + +int izo_log_close(struct presto_log_fd *logfd) +{ + int rc = 0; + + if (logfd->fd_file) { + rc = filp_close(logfd->fd_file, 0); + logfd->fd_file = NULL; + } else + CERROR("InterMezzo: %s: no filp\n", __FUNCTION__); + if (rc != 0) + CERROR("InterMezzo: close files: filp won't close: %d\n", rc); + + return rc; +} + +int presto_fwrite(struct file *file, const char *str, int len, loff_t *off) +{ + int rc; + mm_segment_t old_fs; + ENTRY; + + rc = -EINVAL; + if ( !off ) { + EXIT; + return rc; + } + + if ( ! file ) { + EXIT; + return rc; + } + + if ( ! file->f_op ) { + EXIT; + return rc; + } + + if ( ! file->f_op->write ) { + EXIT; + return rc; + } + + old_fs = get_fs(); + set_fs(get_ds()); + rc = file->f_op->write(file, str, len, off); + if (rc != len) { + CERROR("presto_fwrite: wrote %d bytes instead of " + "%d at %ld\n", rc, len, (long)*off); + rc = -EIO; + } + set_fs(old_fs); + EXIT; + return rc; +} + +int presto_fread(struct file *file, char *str, int len, loff_t *off) +{ + int rc; + mm_segment_t old_fs; + ENTRY; + + if (len > 512) + CERROR("presto_fread: read at %Ld for %d bytes, ino %ld\n", + *off, len, file->f_dentry->d_inode->i_ino); + + rc = -EINVAL; + if ( !off ) { + EXIT; + return rc; + } + + if ( ! file ) { + EXIT; + return rc; + } + + if ( ! file->f_op ) { + EXIT; + return rc; + } + + if ( ! file->f_op->read ) { + EXIT; + return rc; + } + + old_fs = get_fs(); + set_fs(get_ds()); + rc = file->f_op->read(file, str, len, off); + if (rc != len) { + CDEBUG(D_FILE, "presto_fread: read %d bytes instead of " + "%d at %Ld\n", rc, len, *off); + rc = -EIO; + } + set_fs(old_fs); + EXIT; + return rc; +} + +loff_t presto_kml_offset(struct presto_file_set *fset) +{ + unsigned int kml_recno; + struct presto_log_fd *fd = &fset->fset_kml; + loff_t offset; + ENTRY; + + write_lock(&fd->fd_lock); + + /* Determine the largest valid offset, i.e. up until the first + * reservation held on the file. */ + if ( !list_empty(&fd->fd_reservations) ) { + struct presto_reservation_data *rd; + rd = list_entry(fd->fd_reservations.next, + struct presto_reservation_data, + ri_list); + offset = rd->ri_offset; + kml_recno = rd->ri_recno; + } else { + offset = fd->fd_file->f_dentry->d_inode->i_size; + kml_recno = fset->fset_kml.fd_recno; + } + write_unlock(&fd->fd_lock); + return offset; +} + +static int presto_kml_dispatch(struct presto_file_set *fset) +{ + int rc = 0; + unsigned int kml_recno; + struct presto_log_fd *fd = &fset->fset_kml; + loff_t offset; + ENTRY; + + write_lock(&fd->fd_lock); + + /* Determine the largest valid offset, i.e. up until the first + * reservation held on the file. */ + if ( !list_empty(&fd->fd_reservations) ) { + struct presto_reservation_data *rd; + rd = list_entry(fd->fd_reservations.next, + struct presto_reservation_data, + ri_list); + offset = rd->ri_offset; + kml_recno = rd->ri_recno; + } else { + offset = fd->fd_file->f_dentry->d_inode->i_size; + kml_recno = fset->fset_kml.fd_recno; + } + + if ( kml_recno < fset->fset_lento_recno ) { + CERROR("presto_kml_dispatch: smoke is coming\n"); + write_unlock(&fd->fd_lock); + EXIT; + return 0; + } else if ( kml_recno == fset->fset_lento_recno ) { + write_unlock(&fd->fd_lock); + EXIT; + return 0; + /* XXX add a further "if" here to delay the KML upcall */ +#if 0 + } else if ( kml_recno < fset->fset_lento_recno + 100) { + write_unlock(&fd->fd_lock); + EXIT; + return 0; +#endif + } + CDEBUG(D_PIOCTL, "fset: %s\n", fset->fset_name); + + rc = izo_upc_kml(fset->fset_cache->cache_psdev->uc_minor, + fset->fset_lento_off, fset->fset_lento_recno, + offset + fset->fset_kml_logical_off, kml_recno, + fset->fset_name); + + if ( rc ) { + write_unlock(&fd->fd_lock); + EXIT; + return rc; + } + + fset->fset_lento_off = offset; + fset->fset_lento_recno = kml_recno; + write_unlock(&fd->fd_lock); + EXIT; + return 0; +} + +int izo_lookup_file(struct presto_file_set *fset, char *path, + struct nameidata *nd) +{ + int error = 0; + + CDEBUG(D_CACHE, "looking up: %s\n", path); + + error = path_lookup(path, LOOKUP_PARENT, nd); + if (error) { + EXIT; + return error; + } + + return 0; +} + +/* FIXME: this function is a mess of locking and error handling. There's got to + * be a better way. */ +static int do_truncate_rename(struct presto_file_set *fset, char *oldname, + char *newname) +{ + struct dentry *old_dentry, *new_dentry; + struct nameidata oldnd, newnd; + char *oldpath, *newpath; + int error; + + ENTRY; + + oldpath = izo_make_path(fset, oldname); + if (oldpath == NULL) { + EXIT; + return -ENOENT; + } + + newpath = izo_make_path(fset, newname); + if (newpath == NULL) { + error = -ENOENT; + EXIT; + goto exit; + } + + if ((error = izo_lookup_file(fset, oldpath, &oldnd)) != 0) { + EXIT; + goto exit1; + } + + if ((error = izo_lookup_file(fset, newpath, &newnd)) != 0) { + EXIT; + goto exit2; + } + + lock_rename(newnd.dentry, oldnd.dentry); + old_dentry = lookup_hash(&oldnd.last, oldnd.dentry); + error = PTR_ERR(old_dentry); + if (IS_ERR(old_dentry)) { + EXIT; + goto exit3; + } + error = -ENOENT; + if (!old_dentry->d_inode) { + EXIT; + goto exit4; + } + new_dentry = lookup_hash(&newnd.last, newnd.dentry); + error = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) { + EXIT; + goto exit4; + } + + { + extern int presto_rename(struct inode *old_dir,struct dentry *old_dentry, + struct inode *new_dir,struct dentry *new_dentry); + error = presto_rename(old_dentry->d_parent->d_inode, old_dentry, + new_dentry->d_parent->d_inode, new_dentry); + } + + dput(new_dentry); + EXIT; + exit4: + dput(old_dentry); + exit3: + unlock_rename(newnd.dentry, oldnd.dentry); + path_release(&newnd); + exit2: + path_release(&oldnd); + exit1: + PRESTO_FREE(newpath, strlen(newpath) + 1); + exit: + PRESTO_FREE(oldpath, strlen(oldpath) + 1); + return error; +} + +/* This function is called with the fset->fset_kml.fd_lock held */ +int presto_finish_kml_truncate(struct presto_file_set *fset, + unsigned long int offset) +{ + struct lento_vfs_context info; + void *handle; + struct file *f; + struct dentry *dentry; + int error = 0, len; + struct nameidata nd; + char *kmlpath = NULL, *smlpath = NULL; + ENTRY; + + if (offset == 0) { + /* Lento couldn't do what it needed to; abort the truncation. */ + fset->fset_kml.fd_truncating = 0; + EXIT; + return 0; + } + + /* someone is about to write to the end of the KML; try again later. */ + if ( !list_empty(&fset->fset_kml.fd_reservations) ) { + EXIT; + return -EAGAIN; + } + + f = presto_copy_kml_tail(fset, offset); + if (IS_ERR(f)) { + EXIT; + return PTR_ERR(f); + } + + /* In a single transaction: + * + * - unlink 'kml' + * - rename 'kml_tmp' to 'kml' + * - unlink 'sml' + * - rename 'sml_tmp' to 'sml' + * - rewrite the first record of last_rcvd with the new kml + * offset. + */ + handle = presto_trans_start(fset, fset->fset_dentry->d_inode, + KML_OPCODE_KML_TRUNC); + if (IS_ERR(handle)) { + presto_release_space(fset->fset_cache, PRESTO_REQLOW); + CERROR("ERROR: presto_finish_kml_truncate: no space for transaction\n"); + EXIT; + return -ENOMEM; + } + + memset(&info, 0, sizeof(info)); + info.flags = LENTO_FL_IGNORE_TIME; + + kmlpath = izo_make_path(fset, "kml"); + if (kmlpath == NULL) { + error = -ENOMEM; + CERROR("make_path failed: ENOMEM\n"); + EXIT; + goto exit_commit; + } + + if ((error = izo_lookup_file(fset, kmlpath, &nd)) != 0) { + CERROR("izo_lookup_file(kml) failed: %d.\n", error); + EXIT; + goto exit_commit; + } + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) { + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); + CERROR("lookup_hash failed\n"); + EXIT; + goto exit_commit; + } + error = presto_do_unlink(fset, dentry->d_parent, dentry, &info); + dput(dentry); + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); + + if (error != 0) { + CERROR("presto_do_unlink(kml) failed: %d.\n", error); + EXIT; + goto exit_commit; + } + + smlpath = izo_make_path(fset, "sml"); + if (smlpath == NULL) { + error = -ENOMEM; + CERROR("make_path() failed: ENOMEM\n"); + EXIT; + goto exit_commit; + } + + if ((error = izo_lookup_file(fset, smlpath, &nd)) != 0) { + CERROR("izo_lookup_file(sml) failed: %d.\n", error); + EXIT; + goto exit_commit; + } + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) { + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); + CERROR("lookup_hash failed\n"); + EXIT; + goto exit_commit; + } + error = presto_do_unlink(fset, dentry->d_parent, dentry, &info); + dput(dentry); + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); + + if (error != 0) { + CERROR("presto_do_unlink(sml) failed: %d.\n", error); + EXIT; + goto exit_commit; + } + + error = do_truncate_rename(fset, "kml_tmp", "kml"); + if (error != 0) + CERROR("do_truncate_rename(kml_tmp, kml) failed: %d\n", error); + error = do_truncate_rename(fset, "sml_tmp", "sml"); + if (error != 0) + CERROR("do_truncate_rename(sml_tmp, sml) failed: %d\n", error); + + /* Write a new 'last_rcvd' record with the new KML offset */ + fset->fset_kml_logical_off += offset; + CDEBUG(D_CACHE, "new kml_logical_offset: %Lu\n", + fset->fset_kml_logical_off); + if (presto_write_kml_logical_offset(fset) != 0) { + CERROR("presto_write_kml_logical_offset failed\n"); + } + + presto_trans_commit(fset, handle); + + /* Everything was successful, so swap the KML file descriptors */ + filp_close(fset->fset_kml.fd_file, NULL); + fset->fset_kml.fd_file = f; + fset->fset_kml.fd_offset -= offset; + fset->fset_kml.fd_truncating = 0; + + EXIT; + return 0; + + exit_commit: + presto_trans_commit(fset, handle); + len = strlen("/.intermezzo/") + strlen(fset->fset_name) +strlen("sml"); + if (kmlpath != NULL) + PRESTO_FREE(kmlpath, len); + if (smlpath != NULL) + PRESTO_FREE(smlpath, len); + return error; +} + +/* structure of an extended log record: + + buf-prefix buf-body [string1 [string2 [string3]]] buf-suffix + + note: moves offset forward +*/ +static inline int presto_write_record(struct file *f, loff_t *off, + const char *buf, size_t size, + const char *string1, int len1, + const char *string2, int len2, + const char *string3, int len3) +{ + size_t prefix_size; + int rc; + + prefix_size = size - sizeof(struct kml_suffix); + rc = presto_fwrite(f, buf, prefix_size, off); + if ( rc != prefix_size ) { + CERROR("Write error!\n"); + EXIT; + return -EIO; + } + + if ( string1 && len1 ) { + rc = presto_fwrite(f, string1, len1, off); + if ( rc != len1 ) { + CERROR("Write error!\n"); + EXIT; + return -EIO; + } + } + + if ( string2 && len2 ) { + rc = presto_fwrite(f, string2, len2, off); + if ( rc != len2 ) { + CERROR("Write error!\n"); + EXIT; + return -EIO; + } + } + + if ( string3 && len3 ) { + rc = presto_fwrite(f, string3, len3, off); + if ( rc != len3 ) { + CERROR("Write error!\n"); + EXIT; + return -EIO; + } + } + + rc = presto_fwrite(f, buf + prefix_size, + sizeof(struct kml_suffix), off); + if ( rc != sizeof(struct kml_suffix) ) { + CERROR("Write error!\n"); + EXIT; + return -EIO; + } + return 0; +} + + +/* + * rec->size must be valid prior to calling this function. + * + * had to export this for branch_reinter in kml_reint.c + */ +int presto_log(struct presto_file_set *fset, struct rec_info *rec, + const char *buf, size_t size, + const char *string1, int len1, + const char *string2, int len2, + const char *string3, int len3) +{ + int rc; + struct presto_reservation_data rd; + loff_t offset; + struct presto_log_fd *fd; + struct kml_suffix *s; + int prefix_size; + + ENTRY; + + /* buf is NULL when no_journal is in effect */ + if (!buf) { + EXIT; + return -EINVAL; + } + + if (rec->is_kml) { + fd = &fset->fset_kml; + } else { + fd = &fset->fset_lml; + } + + presto_reserve_record(fset, fd, rec, &rd); + + if (rec->is_kml) { + if (rec->offset < fset->fset_kml_logical_off) { + CERROR("record with pre-trunc offset. tell phil.\n"); + BUG(); + } + offset = rec->offset - fset->fset_kml_logical_off; + } else { + offset = rec->offset; + } + + /* now we know the record number */ + prefix_size = size - sizeof(struct kml_suffix); + s = (struct kml_suffix *) (buf + prefix_size); + s->recno = cpu_to_le32(rec->recno); + + rc = presto_write_record(fd->fd_file, &offset, buf, size, + string1, len1, string2, len2, string3, len3); + if (rc) { + CERROR("presto: error writing record to %s\n", + rec->is_kml ? "KML" : "LML"); + return rc; + } + presto_release_record(fd, &rd); + + rc = presto_kml_dispatch(fset); + + EXIT; + return rc; +} + +/* read from the record at tail */ +static int presto_last_record(struct presto_log_fd *fd, loff_t *size, + loff_t *tail_offset, __u32 *recno, loff_t tail) +{ + struct kml_suffix suffix; + int rc; + loff_t zeroes; + + *recno = 0; + *tail_offset = 0; + *size = 0; + + if (tail < sizeof(struct kml_prefix_hdr) + sizeof(suffix)) { + EXIT; + return 0; + } + + zeroes = tail - sizeof(int); + while ( zeroes >= 0 ) { + int data; + rc = presto_fread(fd->fd_file, (char *)&data, sizeof(data), + &zeroes); + if ( rc != sizeof(data) ) { + rc = -EIO; + return rc; + } + if (data) + break; + zeroes -= 2 * sizeof(data); + } + + /* zeroes at the begining of file. this is needed to prevent + presto_fread errors -SHP + */ + if (zeroes <= 0) return 0; + + zeroes -= sizeof(suffix) + sizeof(int); + rc = presto_fread(fd->fd_file, (char *)&suffix, sizeof(suffix), &zeroes); + if ( rc != sizeof(suffix) ) { + EXIT; + return rc; + } + if ( suffix.len > 500 ) { + CERROR("InterMezzo: Warning long record tail at %ld, rec tail_offset at %ld (size %d)\n", + (long) zeroes, (long)*tail_offset, suffix.len); + } + + *recno = suffix.recno; + *size = suffix.len; + *tail_offset = zeroes; + return 0; +} + +static int izo_kml_last_recno(struct presto_log_fd *logfd) +{ + int rc; + loff_t size; + loff_t tail_offset; + int recno; + loff_t tail = logfd->fd_file->f_dentry->d_inode->i_size; + + rc = presto_last_record(logfd, &size, &tail_offset, &recno, tail); + if (rc != 0) { + EXIT; + return rc; + } + + logfd->fd_offset = tail_offset; + logfd->fd_recno = recno; + CDEBUG(D_JOURNAL, "setting fset_kml->fd_recno to %d, offset %Ld\n", + recno, tail_offset); + EXIT; + return 0; +} + +struct file *izo_log_open(struct presto_file_set *fset, char *name, int flags) +{ + struct presto_cache *cache = fset->fset_cache; + struct file *f; + int error; + ENTRY; + + f = izo_fset_open(fset, name, flags, 0644); + error = PTR_ERR(f); + if (IS_ERR(f)) { + EXIT; + return f; + } + + error = -EINVAL; + if ( cache != presto_get_cache(f->f_dentry->d_inode) ) { + CERROR("InterMezzo: %s cache does not match fset cache!\n",name); + fset->fset_kml.fd_file = NULL; + filp_close(f, NULL); + f = NULL; + EXIT; + return f; + } + + if (cache->cache_filter && cache->cache_filter->o_trops && + cache->cache_filter->o_trops->tr_journal_data) { + cache->cache_filter->o_trops->tr_journal_data + (f->f_dentry->d_inode); + } else { + CERROR("InterMezzo WARNING: no file data logging!\n"); + } + + EXIT; + + return f; +} + +int izo_init_kml_file(struct presto_file_set *fset, struct presto_log_fd *logfd) +{ + int error = 0; + struct file *f; + + ENTRY; + if (logfd->fd_file) { + CDEBUG(D_INODE, "fset already has KML open\n"); + EXIT; + return 0; + } + + logfd->fd_lock = RW_LOCK_UNLOCKED; + INIT_LIST_HEAD(&logfd->fd_reservations); + f = izo_log_open(fset, "kml", O_RDWR | O_CREAT); + if (IS_ERR(f)) { + error = PTR_ERR(f); + return error; + } + + logfd->fd_file = f; + error = izo_kml_last_recno(logfd); + + if (error) { + logfd->fd_file = NULL; + filp_close(f, NULL); + CERROR("InterMezzo: IO error in KML of fset %s\n", + fset->fset_name); + EXIT; + return error; + } + fset->fset_lento_off = logfd->fd_offset; + fset->fset_lento_recno = logfd->fd_recno; + + EXIT; + return error; +} + +int izo_init_last_rcvd_file(struct presto_file_set *fset, struct presto_log_fd *logfd) +{ + int error = 0; + struct file *f; + struct rec_info recinfo; + + ENTRY; + if (logfd->fd_file != NULL) { + CDEBUG(D_INODE, "fset already has last_rcvd open\n"); + EXIT; + return 0; + } + + logfd->fd_lock = RW_LOCK_UNLOCKED; + INIT_LIST_HEAD(&logfd->fd_reservations); + f = izo_log_open(fset, "last_rcvd", O_RDWR | O_CREAT); + if (IS_ERR(f)) { + error = PTR_ERR(f); + return error; + } + + logfd->fd_file = f; + logfd->fd_offset = f->f_dentry->d_inode->i_size; + + error = izo_rep_cache_init(fset); + + if (presto_read_kml_logical_offset(&recinfo, fset) == 0) { + fset->fset_kml_logical_off = recinfo.offset; + } else { + /* The 'last_rcvd' file doesn't contain a kml offset record, + * probably because we just created 'last_rcvd'. Write one. */ + fset->fset_kml_logical_off = 0; + presto_write_kml_logical_offset(fset); + } + + EXIT; + return error; +} + +int izo_init_lml_file(struct presto_file_set *fset, struct presto_log_fd *logfd) +{ + int error = 0; + struct file *f; + + ENTRY; + if (logfd->fd_file) { + CDEBUG(D_INODE, "fset already has lml open\n"); + EXIT; + return 0; + } + + logfd->fd_lock = RW_LOCK_UNLOCKED; + INIT_LIST_HEAD(&logfd->fd_reservations); + f = izo_log_open(fset, "lml", O_RDWR | O_CREAT); + if (IS_ERR(f)) { + error = PTR_ERR(f); + return error; + } + + logfd->fd_file = f; + logfd->fd_offset = f->f_dentry->d_inode->i_size; + + EXIT; + return error; +} + +/* Get the KML-offset record from the last_rcvd file */ +int presto_read_kml_logical_offset(struct rec_info *recinfo, + struct presto_file_set *fset) +{ + loff_t off; + struct izo_rcvd_rec rec; + char uuid[16] = {0}; + + off = izo_rcvd_get(&rec, fset, uuid); + if (off < 0) + return -1; + + recinfo->offset = rec.lr_local_offset; + return 0; +} + +int presto_write_kml_logical_offset(struct presto_file_set *fset) +{ + loff_t rc; + struct izo_rcvd_rec rec; + char uuid[16] = {0}; + + rc = izo_rcvd_get(&rec, fset, uuid); + if (rc < 0) + memset(&rec, 0, sizeof(rec)); + + rec.lr_local_offset = + cpu_to_le64(fset->fset_kml_logical_off); + + return izo_rcvd_write(fset, &rec); +} + +struct file * presto_copy_kml_tail(struct presto_file_set *fset, + unsigned long int start) +{ + struct file *f; + int len; + loff_t read_off, write_off, bytes; + + ENTRY; + + /* Copy the tail of 'kml' to 'kml_tmp' */ + f = izo_log_open(fset, "kml_tmp", O_RDWR); + if (IS_ERR(f)) { + EXIT; + return f; + } + + write_off = 0; + read_off = start; + bytes = fset->fset_kml.fd_offset - start; + while (bytes > 0) { + char buf[4096]; + int toread; + + if (bytes > sizeof(buf)) + toread = sizeof(buf); + else + toread = bytes; + + len = presto_fread(fset->fset_kml.fd_file, buf, toread, + &read_off); + if (len <= 0) + break; + + if (presto_fwrite(f, buf, len, &write_off) != len) { + filp_close(f, NULL); + EXIT; + return ERR_PTR(-EIO); + } + + bytes -= len; + } + + EXIT; + return f; +} + + +/* LML records here */ +/* this writes an LML record to the LML file (rec->is_kml =0) */ +int presto_write_lml_close(struct rec_info *rec, + struct presto_file_set *fset, + struct file *file, + __u64 remote_ino, + __u64 remote_generation, + struct presto_version *remote_version, + struct presto_version *new_file_ver) +{ + int opcode = KML_OPCODE_CLOSE; + char *buffer; + struct dentry *dentry = file->f_dentry; + __u64 ino; + __u32 pathlen; + char *path; + __u32 generation; + int size; + char *logrecord; + char record[292]; + struct dentry *root; + int error; + + ENTRY; + + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + root = fset->fset_dentry; + + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + CDEBUG(D_INODE, "Path: %s\n", path); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + ino = cpu_to_le64(dentry->d_inode->i_ino); + generation = cpu_to_le32(dentry->d_inode->i_generation); + size = sizeof(__u32) * current->group_info->ngroups + + sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) + + sizeof(ino) + sizeof(generation) + sizeof(pathlen) + + sizeof(remote_ino) + sizeof(remote_generation) + + sizeof(remote_version) + sizeof(rec->offset) + + sizeof(struct kml_suffix); + + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + rec->is_kml = 0; + rec->size = size + size_round(le32_to_cpu(pathlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = log_version(logrecord, new_file_ver); + logrecord = logit(logrecord, &ino, sizeof(ino)); + logrecord = logit(logrecord, &generation, sizeof(generation)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = logit(logrecord, &remote_ino, sizeof(remote_ino)); + logrecord = logit(logrecord, &remote_generation, + sizeof(remote_generation)); + logrecord = log_version(logrecord, remote_version); + logrecord = logit(logrecord, &rec->offset, sizeof(rec->offset)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + + BUFF_FREE(buffer); + + EXIT; + return error; +} + +/* + * Check if the given record is at the end of the file. If it is, truncate + * the lml to the record's offset, removing it. Repeat on prior record, + * until we reach an active record or a reserved record (as defined by the + * reservations list). + */ +static int presto_truncate_lml_tail(struct presto_file_set *fset) +{ + loff_t lml_tail; + loff_t lml_last_rec; + loff_t lml_last_recsize; + loff_t local_offset; + int recno; + struct kml_prefix_hdr prefix; + struct inode *inode = fset->fset_lml.fd_file->f_dentry->d_inode; + void *handle; + int rc; + + ENTRY; + /* If someone else is already truncating the LML, return. */ + write_lock(&fset->fset_lml.fd_lock); + if (fset->fset_lml.fd_truncating == 1 ) { + write_unlock(&fset->fset_lml.fd_lock); + EXIT; + return 0; + } + /* someone is about to write to the end of the LML */ + if ( !list_empty(&fset->fset_lml.fd_reservations) ) { + write_unlock(&fset->fset_lml.fd_lock); + EXIT; + return 0; + } + lml_tail = fset->fset_lml.fd_file->f_dentry->d_inode->i_size; + /* Nothing to truncate?*/ + if (lml_tail == 0) { + write_unlock(&fset->fset_lml.fd_lock); + EXIT; + return 0; + } + fset->fset_lml.fd_truncating = 1; + write_unlock(&fset->fset_lml.fd_lock); + + presto_last_record(&fset->fset_lml, &lml_last_recsize, + &lml_last_rec, &recno, lml_tail); + /* Do we have a record to check? If not we have zeroes at the + beginning of the file. -SHP + */ + if (lml_last_recsize != 0) { + local_offset = lml_last_rec - lml_last_recsize; + rc = presto_fread(fset->fset_lml.fd_file, (char *)&prefix, + sizeof(prefix), &local_offset); + if (rc != sizeof(prefix)) { + EXIT; + goto tr_out; + } + + if ( prefix.opcode != KML_OPCODE_NOOP ) { + EXIT; + rc = 0; + /* We may have zeroes at the end of the file, should + we clear them out? -SHP + */ + goto tr_out; + } + } else + lml_last_rec=0; + + handle = presto_trans_start(fset, inode, KML_OPCODE_TRUNC); + if ( IS_ERR(handle) ) { + EXIT; + rc = -ENOMEM; + goto tr_out; + } + + rc = izo_do_truncate(fset, fset->fset_lml.fd_file->f_dentry, + lml_last_rec - lml_last_recsize, lml_tail); + presto_trans_commit(fset, handle); + if ( rc == 0 ) { + rc = 1; + } + EXIT; + + tr_out: + CDEBUG(D_JOURNAL, "rc = %d\n", rc); + write_lock(&fset->fset_lml.fd_lock); + fset->fset_lml.fd_truncating = 0; + write_unlock(&fset->fset_lml.fd_lock); + return rc; +} + +int presto_truncate_lml(struct presto_file_set *fset) +{ + int rc; + ENTRY; + + while ( (rc = presto_truncate_lml_tail(fset)) > 0); + if ( rc < 0 && rc != -EALREADY) { + CERROR("truncate_lml error %d\n", rc); + } + EXIT; + return rc; +} + +int presto_clear_lml_close(struct presto_file_set *fset, loff_t lml_offset) +{ + int rc; + struct kml_prefix_hdr record; + loff_t offset = lml_offset; + + ENTRY; + + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + CDEBUG(D_JOURNAL, "reading prefix: off %ld, size %Zd\n", + (long)lml_offset, sizeof(record)); + rc = presto_fread(fset->fset_lml.fd_file, (char *)&record, + sizeof(record), &offset); + + if ( rc != sizeof(record) ) { + CERROR("presto: clear_lml io error %d\n", rc); + EXIT; + return -EIO; + } + + /* overwrite the prefix */ + CDEBUG(D_JOURNAL, "overwriting prefix: off %ld\n", (long)lml_offset); + record.opcode = KML_OPCODE_NOOP; + offset = lml_offset; + /* note: this does just a single transaction in the cache */ + rc = presto_fwrite(fset->fset_lml.fd_file, (char *)(&record), + sizeof(record), &offset); + if ( rc != sizeof(record) ) { + EXIT; + return -EIO; + } + + EXIT; + return 0; +} + + + +/* now a journal function for every operation */ + +int presto_journal_setattr(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, struct presto_version *old_ver, + struct izo_rollback_data *rb, struct iattr *iattr) +{ + int opcode = KML_OPCODE_SETATTR; + char *buffer, *path, *logrecord, record[316]; + struct dentry *root; + __u32 uid, gid, mode, valid, flags, pathlen; + __u64 fsize, mtime, ctime; + int error, size; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) + || ((dentry->d_parent != dentry) && d_unhashed(dentry))) { + EXIT; + return 0; + } + + root = fset->fset_dentry; + + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->group_info->ngroups + + sizeof(struct kml_prefix_hdr) + sizeof(*old_ver) + + sizeof(valid) + sizeof(mode) + sizeof(uid) + sizeof(gid) + + sizeof(fsize) + sizeof(mtime) + sizeof(ctime) + sizeof(flags) + + sizeof(pathlen) + sizeof(*rb) + sizeof(struct kml_suffix); + + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + /* Only journal one kind of mtime, and not atime at all. Also don't + * journal bogus data in iattr, to make the journal more compressible. + */ + if (iattr->ia_valid & ATTR_MTIME_SET) + iattr->ia_valid = iattr->ia_valid | ATTR_MTIME; + valid = cpu_to_le32(iattr->ia_valid & ~(ATTR_ATIME | ATTR_MTIME_SET | + ATTR_ATIME_SET)); + mode = iattr->ia_valid & ATTR_MODE ? cpu_to_le32(iattr->ia_mode): 0; + uid = iattr->ia_valid & ATTR_UID ? cpu_to_le32(iattr->ia_uid): 0; + gid = iattr->ia_valid & ATTR_GID ? cpu_to_le32(iattr->ia_gid): 0; + fsize = iattr->ia_valid & ATTR_SIZE ? cpu_to_le64(iattr->ia_size): 0; + mtime = iattr->ia_valid & ATTR_MTIME ? cpu_to_le64(iattr->ia_mtime.tv_sec): 0; + ctime = iattr->ia_valid & ATTR_CTIME ? cpu_to_le64(iattr->ia_ctime.tv_sec): 0; + flags = iattr->ia_valid & ATTR_ATTR_FLAG ? + cpu_to_le32(iattr->ia_attr_flags): 0; + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = log_version(logrecord, old_ver); + logrecord = logit(logrecord, &valid, sizeof(valid)); + logrecord = logit(logrecord, &mode, sizeof(mode)); + logrecord = logit(logrecord, &uid, sizeof(uid)); + logrecord = logit(logrecord, &gid, sizeof(gid)); + logrecord = logit(logrecord, &fsize, sizeof(fsize)); + logrecord = logit(logrecord, &mtime, sizeof(mtime)); + logrecord = logit(logrecord, &ctime, sizeof(ctime)); + logrecord = logit(logrecord, &flags, sizeof(flags)); + logrecord = log_rollback(logrecord, rb); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + + BUFF_FREE(buffer); + EXIT; + return error; +} + +int presto_get_fileid(int minor, struct presto_file_set *fset, + struct dentry *dentry) +{ + int opcode = KML_OPCODE_GET_FILEID; + struct rec_info rec; + char *buffer, *path, *logrecord, record[4096]; /*include path*/ + struct dentry *root; + __u32 uid, gid, pathlen; + int error, size; + struct kml_suffix *suffix; + + ENTRY; + + root = fset->fset_dentry; + + uid = cpu_to_le32(dentry->d_inode->i_uid); + gid = cpu_to_le32(dentry->d_inode->i_gid); + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->group_info->ngroups + + sizeof(struct kml_prefix_hdr) + sizeof(pathlen) + + size_round(le32_to_cpu(pathlen)) + + sizeof(struct kml_suffix); + + CDEBUG(D_FILE, "kml size: %d\n", size); + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + memset(&rec, 0, sizeof(rec)); + rec.is_kml = 1; + rec.size = size; + + logrecord = journal_log_prefix(record, opcode, &rec); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = logit(logrecord, path, size_round(le32_to_cpu(pathlen))); + suffix = (struct kml_suffix *)logrecord; + logrecord = journal_log_suffix(logrecord, record, fset, dentry, &rec); + /* journal_log_suffix expects journal_log to set this */ + suffix->recno = 0; + + CDEBUG(D_FILE, "actual kml size: %Zd\n", logrecord - record); + CDEBUG(D_FILE, "get fileid: uid %d, gid %d, path: %s\n", uid, gid,path); + + error = izo_upc_get_fileid(minor, size, record, + size_round(le32_to_cpu(pathlen)), path, + fset->fset_name); + + BUFF_FREE(buffer); + EXIT; + return error; +} + +int presto_journal_create(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *tgt_dir_ver, + struct presto_version *new_file_ver, int mode) +{ + int opcode = KML_OPCODE_CREATE; + char *buffer, *path, *logrecord, record[292]; + struct dentry *root; + __u32 uid, gid, lmode, pathlen; + int error, size; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_dentry; + + uid = cpu_to_le32(dentry->d_inode->i_uid); + gid = cpu_to_le32(dentry->d_inode->i_gid); + lmode = cpu_to_le32(mode); + + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->group_info->ngroups + + sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + + sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) + + sizeof(struct kml_suffix); + + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = log_version(logrecord, tgt_dir_ver); + logrecord = log_dentry_version(logrecord, dentry->d_parent); + logrecord = log_version(logrecord, new_file_ver); + logrecord = logit(logrecord, &lmode, sizeof(lmode)); + logrecord = logit(logrecord, &uid, sizeof(uid)); + logrecord = logit(logrecord, &gid, sizeof(gid)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + + BUFF_FREE(buffer); + EXIT; + return error; +} + +int presto_journal_symlink(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, const char *target, + struct presto_version *tgt_dir_ver, + struct presto_version *new_link_ver) +{ + int opcode = KML_OPCODE_SYMLINK; + char *buffer, *path, *logrecord, record[292]; + struct dentry *root; + __u32 uid, gid, pathlen; + __u32 targetlen = cpu_to_le32(strlen(target)); + int error, size; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_dentry; + + uid = cpu_to_le32(dentry->d_inode->i_uid); + gid = cpu_to_le32(dentry->d_inode->i_gid); + + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->group_info->ngroups + + sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + + sizeof(uid) + sizeof(gid) + sizeof(pathlen) + + sizeof(targetlen) + sizeof(struct kml_suffix); + + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)) + + size_round(le32_to_cpu(targetlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = log_version(logrecord, tgt_dir_ver); + logrecord = log_dentry_version(logrecord, dentry->d_parent); + logrecord = log_version(logrecord, new_link_ver); + logrecord = logit(logrecord, &uid, sizeof(uid)); + logrecord = logit(logrecord, &gid, sizeof(gid)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = logit(logrecord, &targetlen, sizeof(targetlen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + target, size_round(le32_to_cpu(targetlen)), + NULL, 0); + + BUFF_FREE(buffer); + EXIT; + return error; +} + +int presto_journal_mkdir(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *tgt_dir_ver, + struct presto_version *new_dir_ver, int mode) +{ + int opcode = KML_OPCODE_MKDIR; + char *buffer, *path, *logrecord, record[292]; + struct dentry *root; + __u32 uid, gid, lmode, pathlen; + int error, size; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_dentry; + + uid = cpu_to_le32(dentry->d_inode->i_uid); + gid = cpu_to_le32(dentry->d_inode->i_gid); + lmode = cpu_to_le32(mode); + + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->group_info->ngroups + + sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + + sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) + + sizeof(struct kml_suffix); + + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)); + logrecord = journal_log_prefix(record, opcode, rec); + + logrecord = log_version(logrecord, tgt_dir_ver); + logrecord = log_dentry_version(logrecord, dentry->d_parent); + logrecord = log_version(logrecord, new_dir_ver); + logrecord = logit(logrecord, &lmode, sizeof(lmode)); + logrecord = logit(logrecord, &uid, sizeof(uid)); + logrecord = logit(logrecord, &gid, sizeof(gid)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + + BUFF_FREE(buffer); + EXIT; + return error; +} + + +int +presto_journal_rmdir(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dir, struct presto_version *tgt_dir_ver, + struct presto_version *old_dir_ver, + struct izo_rollback_data *rb, int len, const char *name) +{ + int opcode = KML_OPCODE_RMDIR; + char *buffer, *path, *logrecord, record[316]; + __u32 pathlen, llen; + struct dentry *root; + int error, size; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_dentry; + + llen = cpu_to_le32(len); + BUFF_ALLOC(buffer, NULL); + path = presto_path(dir, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->group_info->ngroups + + sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + + sizeof(pathlen) + sizeof(llen) + sizeof(*rb) + + sizeof(struct kml_suffix); + + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + CDEBUG(D_JOURNAL, "path: %s (%d), name: %s (%d), size %d\n", + path, pathlen, name, len, size); + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)) + + size_round(len); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = log_version(logrecord, tgt_dir_ver); + logrecord = log_dentry_version(logrecord, dir); + logrecord = log_version(logrecord, old_dir_ver); + logrecord = logit(logrecord, rb, sizeof(*rb)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = logit(logrecord, &llen, sizeof(llen)); + logrecord = journal_log_suffix(logrecord, record, fset, dir, rec); + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + name, size_round(len), + NULL, 0); + + BUFF_FREE(buffer); + EXIT; + return error; +} + + +int +presto_journal_mknod(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dentry, struct presto_version *tgt_dir_ver, + struct presto_version *new_node_ver, int mode, + int dmajor, int dminor ) +{ + int opcode = KML_OPCODE_MKNOD; + char *buffer, *path, *logrecord, record[292]; + struct dentry *root; + __u32 uid, gid, lmode, lmajor, lminor, pathlen; + int error, size; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_dentry; + + uid = cpu_to_le32(dentry->d_inode->i_uid); + gid = cpu_to_le32(dentry->d_inode->i_gid); + lmode = cpu_to_le32(mode); + lmajor = cpu_to_le32(dmajor); + lminor = cpu_to_le32(dminor); + + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->group_info->ngroups + + sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + + sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(lmajor) + + sizeof(lminor) + sizeof(pathlen) + + sizeof(struct kml_suffix); + + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = log_version(logrecord, tgt_dir_ver); + logrecord = log_dentry_version(logrecord, dentry->d_parent); + logrecord = log_version(logrecord, new_node_ver); + logrecord = logit(logrecord, &lmode, sizeof(lmode)); + logrecord = logit(logrecord, &uid, sizeof(uid)); + logrecord = logit(logrecord, &gid, sizeof(gid)); + logrecord = logit(logrecord, &lmajor, sizeof(lmajor)); + logrecord = logit(logrecord, &lminor, sizeof(lminor)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + + BUFF_FREE(buffer); + EXIT; + return error; +} + +int +presto_journal_link(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *src, struct dentry *tgt, + struct presto_version *tgt_dir_ver, + struct presto_version *new_link_ver) +{ + int opcode = KML_OPCODE_LINK; + char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292]; + __u32 pathlen, srcpathlen; + struct dentry *root; + int error, size; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_dentry; + + BUFF_ALLOC(srcbuffer, NULL); + srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE); + srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath)); + + BUFF_ALLOC(buffer, srcbuffer); + path = presto_path(tgt, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->group_info->ngroups + + sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + + sizeof(srcpathlen) + sizeof(pathlen) + + sizeof(struct kml_suffix); + + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)) + + size_round(le32_to_cpu(srcpathlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = log_version(logrecord, tgt_dir_ver); + logrecord = log_dentry_version(logrecord, tgt->d_parent); + logrecord = log_version(logrecord, new_link_ver); + logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec); + + error = presto_log(fset, rec, record, size, + srcpath, size_round(le32_to_cpu(srcpathlen)), + path, size_round(le32_to_cpu(pathlen)), + NULL, 0); + + BUFF_FREE(srcbuffer); + BUFF_FREE(buffer); + EXIT; + return error; +} + + +int presto_journal_rename(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *src, struct dentry *tgt, + struct presto_version *src_dir_ver, + struct presto_version *tgt_dir_ver) +{ + int opcode = KML_OPCODE_RENAME; + char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292]; + __u32 pathlen, srcpathlen; + struct dentry *root; + int error, size; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_dentry; + + BUFF_ALLOC(srcbuffer, NULL); + srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE); + srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath)); + + BUFF_ALLOC(buffer, srcbuffer); + path = presto_path(tgt, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->group_info->ngroups + + sizeof(struct kml_prefix_hdr) + 4 * sizeof(*src_dir_ver) + + sizeof(srcpathlen) + sizeof(pathlen) + + sizeof(struct kml_suffix); + + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)) + + size_round(le32_to_cpu(srcpathlen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = log_version(logrecord, src_dir_ver); + logrecord = log_dentry_version(logrecord, src->d_parent); + logrecord = log_version(logrecord, tgt_dir_ver); + logrecord = log_dentry_version(logrecord, tgt->d_parent); + logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec); + + error = presto_log(fset, rec, record, size, + srcpath, size_round(le32_to_cpu(srcpathlen)), + path, size_round(le32_to_cpu(pathlen)), + NULL, 0); + + BUFF_FREE(buffer); + BUFF_FREE(srcbuffer); + EXIT; + return error; +} + +int presto_journal_unlink(struct rec_info *rec, struct presto_file_set *fset, + struct dentry *dir, struct presto_version *tgt_dir_ver, + struct presto_version *old_file_ver, + struct izo_rollback_data *rb, struct dentry *dentry, + char *old_target, int old_targetlen) +{ + int opcode = KML_OPCODE_UNLINK; + char *buffer, *path, *logrecord, record[316]; + const char *name; + __u32 pathlen, llen; + struct dentry *root; + int error, size, len; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_dentry; + + name = dentry->d_name.name; + len = dentry->d_name.len; + + llen = cpu_to_le32(len); + BUFF_ALLOC(buffer, NULL); + path = presto_path(dir, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + size = sizeof(__u32) * current->group_info->ngroups + + sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + + sizeof(pathlen) + sizeof(llen) + sizeof(*rb) + + sizeof(old_targetlen) + sizeof(struct kml_suffix); + + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)) + size_round(len) + + size_round(old_targetlen); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = log_version(logrecord, tgt_dir_ver); + logrecord = log_dentry_version(logrecord, dir); + logrecord = log_version(logrecord, old_file_ver); + logrecord = log_rollback(logrecord, rb); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = logit(logrecord, &llen, sizeof(llen)); + logrecord = logit(logrecord, &old_targetlen, sizeof(old_targetlen)); + logrecord = journal_log_suffix(logrecord, record, fset, dir, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + name, size_round(len), + old_target, size_round(old_targetlen)); + + BUFF_FREE(buffer); + EXIT; + return error; +} + +int +presto_journal_close(struct rec_info *rec, struct presto_file_set *fset, + struct presto_file_data *fd, struct dentry *dentry, + struct presto_version *old_file_ver, + struct presto_version *new_file_ver) +{ + int opcode = KML_OPCODE_CLOSE; + char *buffer, *path, *logrecord, record[316]; + struct dentry *root; + int error, size, i; + __u32 pathlen, generation; + __u64 ino; + __u32 open_fsuid; + __u32 open_fsgid; + __u32 open_ngroups; + __u32 open_groups[NGROUPS_SMALL]; + __u32 open_mode; + __u32 open_uid; + __u32 open_gid; + + ENTRY; + + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) + || ((dentry->d_parent != dentry) && d_unhashed(dentry))) { + EXIT; + return 0; + } + + root = fset->fset_dentry; + + if (fd) { + open_ngroups = fd->fd_ngroups; + for (i = 0; i < fd->fd_ngroups; i++) + open_groups[i] = (__u32) fd->fd_groups[i]; + open_mode = fd->fd_mode; + open_uid = fd->fd_uid; + open_gid = fd->fd_gid; + open_fsuid = fd->fd_fsuid; + open_fsgid = fd->fd_fsgid; + } else { + open_ngroups = current->group_info->ngroups; + for (i=0; igroup_info->ngroups; i++) + open_groups[i] = (__u32) GROUP_AT(current->group_info,i); + open_mode = dentry->d_inode->i_mode; + open_uid = dentry->d_inode->i_uid; + open_gid = dentry->d_inode->i_gid; + open_fsuid = current->fsuid; + open_fsgid = current->fsgid; + } + BUFF_ALLOC(buffer, NULL); + path = presto_path(dentry, root, buffer, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); + ino = cpu_to_le64(dentry->d_inode->i_ino); + generation = cpu_to_le32(dentry->d_inode->i_generation); + size = sizeof(__u32) * open_ngroups + + sizeof(open_mode) + sizeof(open_uid) + sizeof(open_gid) + + sizeof(struct kml_prefix_hdr) + sizeof(*old_file_ver) + + sizeof(*new_file_ver) + sizeof(ino) + sizeof(generation) + + sizeof(pathlen) + sizeof(struct kml_suffix); + + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)); + + logrecord = journal_log_prefix_with_groups_and_ids( + record, opcode, rec, open_ngroups, open_groups, + open_fsuid, open_fsgid); + logrecord = logit(logrecord, &open_mode, sizeof(open_mode)); + logrecord = logit(logrecord, &open_uid, sizeof(open_uid)); + logrecord = logit(logrecord, &open_gid, sizeof(open_gid)); + logrecord = log_version(logrecord, old_file_ver); + logrecord = log_version(logrecord, new_file_ver); + logrecord = logit(logrecord, &ino, sizeof(ino)); + logrecord = logit(logrecord, &generation, sizeof(generation)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + BUFF_FREE(buffer); + + EXIT; + return error; +} + +int presto_rewrite_close(struct rec_info *rec, struct presto_file_set *fset, + char *path, __u32 pathlen, + int ngroups, __u32 *groups, + __u64 ino, __u32 generation, + struct presto_version *new_file_ver) +{ + int opcode = KML_OPCODE_CLOSE; + char *logrecord, record[292]; + struct dentry *root; + int error, size; + + ENTRY; + + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + root = fset->fset_dentry; + + size = sizeof(__u32) * ngroups + + sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) + + sizeof(ino) + sizeof(generation) + + sizeof(le32_to_cpu(pathlen)) + + sizeof(struct kml_suffix); + + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + rec->is_kml = 1; + rec->size = size + size_round(le32_to_cpu(pathlen)); + + logrecord = journal_log_prefix_with_groups(record, opcode, rec, + ngroups, groups); + logrecord = log_version(logrecord, new_file_ver); + logrecord = logit(logrecord, &ino, sizeof(ino)); + logrecord = logit(logrecord, &generation, sizeof(generation)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = journal_log_suffix(logrecord, record, fset, NULL, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + NULL, 0, NULL, 0); + + EXIT; + return error; +} + + +/* write closes for the local close records in the LML */ +int presto_complete_lml(struct presto_file_set *fset) +{ + __u32 groups[NGROUPS_SMALL]; + loff_t lml_offset; + loff_t read_offset; + char *buffer; + void *handle; + struct rec_info rec; + struct close_rec { + struct presto_version new_file_ver; + __u64 ino; + __u32 generation; + __u32 pathlen; + __u64 remote_ino; + __u32 remote_generation; + __u32 remote_version; + __u64 lml_offset; + } close_rec; + struct file *file = fset->fset_lml.fd_file; + struct kml_prefix_hdr prefix; + int rc = 0; + ENTRY; + + lml_offset = 0; + again: + if (lml_offset >= file->f_dentry->d_inode->i_size) { + EXIT; + return rc; + } + + read_offset = lml_offset; + rc = presto_fread(file, (char *)&prefix, + sizeof(prefix), &read_offset); + if ( rc != sizeof(prefix) ) { + EXIT; + CERROR("presto_complete_lml: ioerror - 1, tell Peter\n"); + return -EIO; + } + + if ( prefix.opcode == KML_OPCODE_NOOP ) { + lml_offset += prefix.len; + goto again; + } + + rc = presto_fread(file, (char *)groups, + prefix.ngroups * sizeof(__u32), &read_offset); + if ( rc != prefix.ngroups * sizeof(__u32) ) { + EXIT; + CERROR("presto_complete_lml: ioerror - 2, tell Peter\n"); + return -EIO; + } + + rc = presto_fread(file, (char *)&close_rec, + sizeof(close_rec), &read_offset); + if ( rc != sizeof(close_rec) ) { + EXIT; + CERROR("presto_complete_lml: ioerror - 3, tell Peter\n"); + return -EIO; + } + + /* is this a backfetch or a close record? */ + if ( le64_to_cpu(close_rec.remote_ino) != 0 ) { + lml_offset += prefix.len; + goto again; + } + + BUFF_ALLOC(buffer, NULL); + rc = presto_fread(file, (char *)buffer, + le32_to_cpu(close_rec.pathlen), &read_offset); + if ( rc != le32_to_cpu(close_rec.pathlen) ) { + EXIT; + CERROR("presto_complete_lml: ioerror - 4, tell Peter\n"); + return -EIO; + } + + handle = presto_trans_start(fset, file->f_dentry->d_inode, + KML_OPCODE_RELEASE); + if ( IS_ERR(handle) ) { + EXIT; + return -ENOMEM; + } + + rc = presto_clear_lml_close(fset, lml_offset); + if ( rc ) { + CERROR("error during clearing: %d\n", rc); + presto_trans_commit(fset, handle); + EXIT; + return rc; + } + + rc = presto_rewrite_close(&rec, fset, buffer, close_rec.pathlen, + prefix.ngroups, groups, + close_rec.ino, close_rec.generation, + &close_rec.new_file_ver); + if ( rc ) { + CERROR("error during rewrite close: %d\n", rc); + presto_trans_commit(fset, handle); + EXIT; + return rc; + } + + presto_trans_commit(fset, handle); + if ( rc ) { + CERROR("error during truncation: %d\n", rc); + EXIT; + return rc; + } + + lml_offset += prefix.len; + CDEBUG(D_JOURNAL, "next LML record at: %ld\n", (long)lml_offset); + goto again; + + EXIT; + return -EINVAL; +} + + +#ifdef CONFIG_FS_EXT_ATTR +/* Journal an ea operation. A NULL buffer implies the attribute is + * getting deleted. In this case we simply change the opcode, but nothing + * else is affected. + */ +int presto_journal_set_ext_attr (struct rec_info *rec, + struct presto_file_set *fset, + struct dentry *dentry, + struct presto_version *ver, const char *name, + const char *buffer, int buffer_len, + int flags) +{ + int opcode = (buffer == NULL) ? + KML_OPCODE_DELEXTATTR : + KML_OPCODE_SETEXTATTR ; + char *temp, *path, *logrecord, record[292]; + struct dentry *root; + int error, size; + __u32 namelen=cpu_to_le32(strnlen(name,PRESTO_EXT_ATTR_NAME_MAX)); + __u32 buflen=(buffer != NULL)? cpu_to_le32(buffer_len): cpu_to_le32(0); + __u32 mode, pathlen; + + ENTRY; + if ( presto_no_journal(fset) ) { + EXIT; + return 0; + } + + if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0) + || ((dentry->d_parent != dentry) && d_unhashed(dentry))) { + EXIT; + return 0; + } + + root = fset->fset_dentry; + + BUFF_ALLOC(temp, NULL); + path = presto_path(dentry, root, temp, PAGE_SIZE); + pathlen = cpu_to_le32(MYPATHLEN(temp, path)); + + flags=cpu_to_le32(flags); + /* Ugly, but needed. posix ACLs change the mode without using + * setattr, we need to record these changes. The EA code per se + * is not really affected. + */ + mode=cpu_to_le32(dentry->d_inode->i_mode); + + size = sizeof(__u32) * current->group_info->ngroups + + sizeof(struct kml_prefix_hdr) + + 2 * sizeof(struct presto_version) + + sizeof(flags) + sizeof(mode) + sizeof(namelen) + + sizeof(buflen) + sizeof(pathlen) + + sizeof(struct kml_suffix); + + if ( size > sizeof(record) ) + CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); + + rec->is_kml = 1; + /* Make space for a path, a attr name and value*/ + /* We use the buflen instead of buffer_len to make sure that we + * journal the right length. This may be a little paranoid, but + * with 64 bits round the corner, I would rather be safe than sorry! + * Also this handles deletes with non-zero buffer_lengths correctly. + * SHP + */ + rec->size = size + size_round(le32_to_cpu(pathlen)) + + size_round(le32_to_cpu(namelen)) + + size_round(le32_to_cpu(buflen)); + + logrecord = journal_log_prefix(record, opcode, rec); + logrecord = log_version(logrecord, ver); + logrecord = log_dentry_version(logrecord, dentry); + logrecord = logit(logrecord, &flags, sizeof(flags)); + logrecord = logit(logrecord, &mode, sizeof(flags)); + logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); + logrecord = logit(logrecord, &namelen, sizeof(namelen)); + logrecord = logit(logrecord, &buflen, sizeof(buflen)); + logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec); + + error = presto_log(fset, rec, record, size, + path, size_round(le32_to_cpu(pathlen)), + name, size_round(le32_to_cpu(namelen)), + buffer, size_round(le32_to_cpu(buflen))); + + BUFF_FREE(temp); + EXIT; + return error; +} +#endif diff --git a/fs/intermezzo/journal_ext2.c b/fs/intermezzo/journal_ext2.c new file mode 100644 index 000000000..d1cb293c2 --- /dev/null +++ b/fs/intermezzo/journal_ext2.c @@ -0,0 +1,90 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 1998 Peter J. Braam + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +#if defined(CONFIG_EXT2_FS) + +/* EXT2 has no journalling, so these functions do nothing */ +static loff_t presto_e2_freespace(struct presto_cache *cache, + struct super_block *sb) +{ + unsigned long freebl = le32_to_cpu(EXT2_SB(sb)->s_es->s_free_blocks_count); + unsigned long avail = freebl - le32_to_cpu(EXT2_SB(sb)->s_es->s_r_blocks_count); + return (avail << EXT2_BLOCK_SIZE_BITS(sb)); +} + +/* start the filesystem journal operations */ +static void *presto_e2_trans_start(struct presto_file_set *fset, struct inode *inode, int op) +{ + __u32 avail_kmlblocks; + + if ( presto_no_journal(fset) || + strcmp(fset->fset_cache->cache_type, "ext2")) + return NULL; + + avail_kmlblocks = EXT2_SB(inode->i_sb)->s_es->s_free_blocks_count; + + if ( avail_kmlblocks < 3 ) { + return ERR_PTR(-ENOSPC); + } + + if ( (op != KML_OPCODE_UNLINK && op != KML_OPCODE_RMDIR) + && avail_kmlblocks < 6 ) { + return ERR_PTR(-ENOSPC); + } + return (void *) 1; +} + +static void presto_e2_trans_commit(struct presto_file_set *fset, void *handle) +{ + do {} while (0); +} + +static int presto_e2_has_all_data(struct inode *inode) +{ + BUG(); + return 0; +} + +struct journal_ops presto_ext2_journal_ops = { + .tr_all_data = presto_e2_has_all_data, + .tr_avail = presto_e2_freespace, + .tr_start = presto_e2_trans_start, + .tr_commit = presto_e2_trans_commit, + .tr_journal_data = NULL +}; + +#endif /* CONFIG_EXT2_FS */ diff --git a/fs/intermezzo/journal_ext3.c b/fs/intermezzo/journal_ext3.c new file mode 100644 index 000000000..b847b6198 --- /dev/null +++ b/fs/intermezzo/journal_ext3.c @@ -0,0 +1,283 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 1998 Peter J. Braam + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 Los Alamos National Laboratory + * Copyright (C) 2000 TurboLinux, Inc. + * Copyright (C) 2001 Mountain View Data, Inc. + * Copyright (C) 2001 Tacit Networks, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE) +#include +#include +#include +#endif + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE) + +#define MAX_PATH_BLOCKS(inode) (PATH_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb)) +#define MAX_NAME_BLOCKS(inode) (NAME_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb)) + +/* space requirements: + presto_do_truncate: + used to truncate the KML forward to next fset->chunksize boundary + - zero partial block + - update inode + presto_write_record: + write header (< one block) + write one path (< MAX_PATHLEN) + possibly write another path (< MAX_PATHLEN) + write suffix (< one block) + presto_update_last_rcvd + write one block +*/ + +static loff_t presto_e3_freespace(struct presto_cache *cache, + struct super_block *sb) +{ + loff_t freebl = le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count); + loff_t avail = freebl - + le32_to_cpu(EXT3_SB(sb)->s_es->s_r_blocks_count); + return (avail << EXT3_BLOCK_SIZE_BITS(sb)); +} + +/* start the filesystem journal operations */ +static void *presto_e3_trans_start(struct presto_file_set *fset, + struct inode *inode, + int op) +{ + int jblocks; + int trunc_blks, one_path_blks, extra_path_blks, + extra_name_blks, lml_blks; + __u32 avail_kmlblocks; + handle_t *handle; + + if ( presto_no_journal(fset) || + strcmp(fset->fset_cache->cache_type, "ext3")) + { + CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n", + fset->fset_cache->cache_type); + return NULL; + } + + avail_kmlblocks = EXT3_SB(inode->i_sb)->s_es->s_free_blocks_count; + + if ( avail_kmlblocks < 3 ) { + return ERR_PTR(-ENOSPC); + } + + if ( (op != KML_OPCODE_UNLINK && op != KML_OPCODE_RMDIR) + && avail_kmlblocks < 6 ) { + return ERR_PTR(-ENOSPC); + } + + /* Need journal space for: + at least three writes to KML (two one block writes, one a path) + possibly a second name (unlink, rmdir) + possibly a second path (symlink, rename) + a one block write to the last rcvd file + */ + + trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1; + one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3; + lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2; + extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode); + extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode); + + /* additional blocks appear for "two pathname" operations + and operations involving the LML records + */ + switch (op) { + case KML_OPCODE_TRUNC: + jblocks = one_path_blks + extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS; + break; + case KML_OPCODE_KML_TRUNC: + /* Hopefully this is a little better, but I'm still mostly + * guessing here. */ + /* unlink 1 */ + jblocks = extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS + 2; + + /* unlink 2 */ + jblocks += extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS + 2; + + /* rename 1 */ + jblocks += 2 * extra_path_blks + trunc_blks + + 2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3; + + /* rename 2 */ + jblocks += 2 * extra_path_blks + trunc_blks + + 2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3; + break; + case KML_OPCODE_RELEASE: + /* + jblocks = one_path_blks + lml_blks + 2*trunc_blks; + */ + jblocks = one_path_blks; + break; + case KML_OPCODE_SETATTR: + jblocks = one_path_blks + trunc_blks + 1 ; + break; + case KML_OPCODE_CREATE: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 3 + 2; + break; + case KML_OPCODE_LINK: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 2; + break; + case KML_OPCODE_UNLINK: + jblocks = one_path_blks + extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS + 2; + break; + case KML_OPCODE_SYMLINK: + jblocks = one_path_blks + extra_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 5; + break; + case KML_OPCODE_MKDIR: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 4 + 2; + break; + case KML_OPCODE_RMDIR: + jblocks = one_path_blks + extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS + 1; + break; + case KML_OPCODE_MKNOD: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 3 + 2; + break; + case KML_OPCODE_RENAME: + jblocks = one_path_blks + extra_path_blks + trunc_blks + + 2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3; + break; + case KML_OPCODE_WRITE: + jblocks = one_path_blks; + /* add this when we can wrap our transaction with + that of ext3_file_write (ordered writes) + + EXT3_DATA_TRANS_BLOCKS; + */ + break; + default: + CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op); + return NULL; + } + + CDEBUG(D_JOURNAL, "creating journal handle (%d blocks) for op %d\n", + jblocks, op); + /* journal_start/stop does not do its own locking while updating + * the handle/transaction information. Hence we create our own + * critical section to protect these calls. -SHP + */ + lock_kernel(); + handle = journal_start(EXT3_JOURNAL(inode), jblocks); + unlock_kernel(); + return handle; +} + +static void presto_e3_trans_commit(struct presto_file_set *fset, void *handle) +{ + if ( presto_no_journal(fset) || !handle) + return; + + /* See comments before journal_start above. -SHP */ + lock_kernel(); + journal_stop(handle); + unlock_kernel(); +} + +static void presto_e3_journal_file_data(struct inode *inode) +{ +#ifdef EXT3_JOURNAL_DATA_FL + EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL; +#else +#warning You must have a facility to enable journaled writes for recovery! +#endif +} + +/* The logic here is a slightly modified version of ext3/inode.c:block_to_path + */ +static int presto_e3_has_all_data(struct inode *inode) +{ + int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb); + int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb); + const long direct_blocks = EXT3_NDIR_BLOCKS, + indirect_blocks = ptrs, + double_blocks = (1 << (ptrs_bits * 2)); + long block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> + inode->i_sb->s_blocksize_bits; + + ENTRY; + + if (inode->i_size == 0) { + EXIT; + return 1; + } + + if (block < direct_blocks) { + /* No indirect blocks, no problem. */ + } else if (block < indirect_blocks + direct_blocks) { + block++; + } else if (block < double_blocks + indirect_blocks + direct_blocks) { + block += 2; + } else if (((block - double_blocks - indirect_blocks - direct_blocks) + >> (ptrs_bits * 2)) < ptrs) { + block += 3; + } + + block *= (inode->i_sb->s_blocksize / 512); + + CDEBUG(D_CACHE, "Need %ld blocks, have %ld.\n", block, inode->i_blocks); + + if (block > inode->i_blocks) { + EXIT; + return 0; + } + + EXIT; + return 1; +} + +struct journal_ops presto_ext3_journal_ops = { + .tr_all_data = presto_e3_has_all_data, + .tr_avail = presto_e3_freespace, + .tr_start = presto_e3_trans_start, + .tr_commit = presto_e3_trans_commit, + .tr_journal_data = presto_e3_journal_file_data, + .tr_ilookup = presto_iget_ilookup +}; + +#endif /* CONFIG_EXT3_FS */ diff --git a/fs/intermezzo/journal_obdfs.c b/fs/intermezzo/journal_obdfs.c new file mode 100644 index 000000000..702ee8b64 --- /dev/null +++ b/fs/intermezzo/journal_obdfs.c @@ -0,0 +1,193 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 1998 Peter J. Braam + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 Los Alamos National Laboratory + * Copyright (C) 2000 TurboLinux, Inc. + * Copyright (C) 2001 Mountain View Data, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_OBDFS_FS +#include /usr/src/obd/include/linux/obdfs.h +#endif + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +#ifdef CONFIG_OBDFS_FS + + +static unsigned long presto_obdfs_freespace(struct presto_file_set *fset, + struct super_block *sb) +{ + return 0x0fffff; +} + +/* start the filesystem journal operations */ +static void *presto_obdfs_trans_start(struct presto_file_set *fset, + struct inode *inode, + int op) +{ + + return (void *) 1; +} + +#if 0 + int jblocks; + int trunc_blks, one_path_blks, extra_path_blks, + extra_name_blks, lml_blks; + __u32 avail_kmlblocks; + + if ( presto_no_journal(fset) || + strcmp(fset->fset_cache->cache_type, "ext3")) + { + CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n", + fset->fset_cache->cache_type); + return NULL; + } + + avail_kmlblocks = inode->i_sb->u.ext3_sb.s_es->s_free_blocks_count; + + if ( avail_kmlblocks < 3 ) { + return ERR_PTR(-ENOSPC); + } + + if ( (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR) + && avail_kmlblocks < 6 ) { + return ERR_PTR(-ENOSPC); + } + + /* Need journal space for: + at least three writes to KML (two one block writes, one a path) + possibly a second name (unlink, rmdir) + possibly a second path (symlink, rename) + a one block write to the last rcvd file + */ + + trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1; + one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3; + lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2; + extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode); + extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode); + + /* additional blocks appear for "two pathname" operations + and operations involving the LML records + */ + switch (op) { + case PRESTO_OP_TRUNC: + jblocks = one_path_blks + extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS; + break; + case PRESTO_OP_RELEASE: + /* + jblocks = one_path_blks + lml_blks + 2*trunc_blks; + */ + jblocks = one_path_blks; + break; + case PRESTO_OP_SETATTR: + jblocks = one_path_blks + trunc_blks + 1 ; + break; + case PRESTO_OP_CREATE: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 3; + break; + case PRESTO_OP_LINK: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS; + break; + case PRESTO_OP_UNLINK: + jblocks = one_path_blks + extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS; + break; + case PRESTO_OP_SYMLINK: + jblocks = one_path_blks + extra_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 5; + break; + case PRESTO_OP_MKDIR: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 4; + break; + case PRESTO_OP_RMDIR: + jblocks = one_path_blks + extra_name_blks + trunc_blks + + EXT3_DELETE_TRANS_BLOCKS; + break; + case PRESTO_OP_MKNOD: + jblocks = one_path_blks + trunc_blks + + EXT3_DATA_TRANS_BLOCKS + 3; + break; + case PRESTO_OP_RENAME: + jblocks = one_path_blks + extra_path_blks + trunc_blks + + 2 * EXT3_DATA_TRANS_BLOCKS + 2; + break; + case PRESTO_OP_WRITE: + jblocks = one_path_blks; + /* add this when we can wrap our transaction with + that of ext3_file_write (ordered writes) + + EXT3_DATA_TRANS_BLOCKS; + */ + break; + default: + CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op); + return NULL; + } + + CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks); + return journal_start(EXT3_JOURNAL(inode), jblocks); +} +#endif + +void presto_obdfs_trans_commit(struct presto_file_set *fset, void *handle) +{ +#if 0 + if ( presto_no_journal(fset) || !handle) + return; + + journal_stop(handle); +#endif +} + +void presto_obdfs_journal_file_data(struct inode *inode) +{ +#ifdef EXT3_JOURNAL_DATA_FL + inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL; +#else +#warning You must have a facility to enable journaled writes for recovery! +#endif +} + +struct journal_ops presto_obdfs_journal_ops = { + .tr_avail = presto_obdfs_freespace, + .tr_start = presto_obdfs_trans_start, + .tr_commit = presto_obdfs_trans_commit, + .tr_journal_data = presto_obdfs_journal_file_data +}; + +#endif diff --git a/fs/intermezzo/journal_reiserfs.c b/fs/intermezzo/journal_reiserfs.c new file mode 100644 index 000000000..93fc14845 --- /dev/null +++ b/fs/intermezzo/journal_reiserfs.c @@ -0,0 +1,140 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 1998 Peter J. Braam + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 Los Alamos National Laboratory + * Copyright (C) 2000 TurboLinux, Inc. + * Copyright (C) 2001 Mountain View Data, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if 0 +#if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE) +#include +#include +#include +#endif + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +#if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE) + + +static loff_t presto_reiserfs_freespace(struct presto_cache *cache, + struct super_block *sb) +{ + struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (sb); + loff_t avail; + + avail = le32_to_cpu(rs->s_free_blocks) * + le16_to_cpu(rs->s_blocksize); + return avail; +} + +/* start the filesystem journal operations */ +static void *presto_reiserfs_trans_start(struct presto_file_set *fset, + struct inode *inode, + int op) +{ + int jblocks; + __u32 avail_kmlblocks; + struct reiserfs_transaction_handle *th ; + + PRESTO_ALLOC(th, sizeof(*th)); + if (!th) { + CERROR("presto: No memory for trans handle\n"); + return NULL; + } + + avail_kmlblocks = presto_reiserfs_freespace(fset->fset_cache, + inode->i_sb); + if ( presto_no_journal(fset) || + strcmp(fset->fset_cache->cache_type, "reiserfs")) + { + CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n", + fset->fset_cache->cache_type); + return NULL; + } + + if ( avail_kmlblocks < 3 ) { + return ERR_PTR(-ENOSPC); + } + + if ( (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR) + && avail_kmlblocks < 6 ) { + return ERR_PTR(-ENOSPC); + } + + jblocks = 3 + JOURNAL_PER_BALANCE_CNT * 4; + CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks); + + lock_kernel(); + journal_begin(th, inode->i_sb, jblocks); + unlock_kernel(); + return th; +} + +static void presto_reiserfs_trans_commit(struct presto_file_set *fset, + void *handle) +{ + int jblocks; + jblocks = 3 + JOURNAL_PER_BALANCE_CNT * 4; + + lock_kernel(); + journal_end(handle, fset->fset_cache->cache_sb, jblocks); + unlock_kernel(); + PRESTO_FREE(handle, sizeof(struct reiserfs_transaction_handle)); +} + +static void presto_reiserfs_journal_file_data(struct inode *inode) +{ +#ifdef EXT3_JOURNAL_DATA_FL + inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL; +#else +#warning You must have a facility to enable journaled writes for recovery! +#endif +} + +static int presto_reiserfs_has_all_data(struct inode *inode) +{ + BUG(); + return 0; +} + +struct journal_ops presto_reiserfs_journal_ops = { + .tr_all_data = presto_reiserfs_has_all_data, + .tr_avail = presto_reiserfs_freespace, + .tr_start = presto_reiserfs_trans_start, + .tr_commit = presto_reiserfs_trans_commit, + .tr_journal_data = presto_reiserfs_journal_file_data +}; + +#endif +#endif diff --git a/fs/intermezzo/journal_tmpfs.c b/fs/intermezzo/journal_tmpfs.c new file mode 100644 index 000000000..4f3c463f0 --- /dev/null +++ b/fs/intermezzo/journal_tmpfs.c @@ -0,0 +1,107 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 1998 Peter J. Braam + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 Los Alamos National Laboratory + * Copyright (C) 2000 TurboLinux, Inc. + * Copyright (C) 2001 Mountain View Data, Inc. + * Copyright (C) 2001 Tacit Networks, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(CONFIG_TMPFS) +#include +#if defined(CONFIG_EXT3) +#include +#include +#endif +#endif + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +#if defined(CONFIG_TMPFS) + +/* space requirements: + presto_do_truncate: + used to truncate the KML forward to next fset->chunksize boundary + - zero partial block + - update inode + presto_write_record: + write header (< one block) + write one path (< MAX_PATHLEN) + possibly write another path (< MAX_PATHLEN) + write suffix (< one block) + presto_update_last_rcvd + write one block +*/ + +static loff_t presto_tmpfs_freespace(struct presto_cache *cache, + struct super_block *sb) +{ + return (1<<30); +} + +/* start the filesystem journal operations */ +static void *presto_tmpfs_trans_start(struct presto_file_set *fset, + struct inode *inode, + int op) +{ + return (void *)1; +} + +static void presto_tmpfs_trans_commit(struct presto_file_set *fset, void *handle) +{ + return; +} + +static void presto_tmpfs_journal_file_data(struct inode *inode) +{ + return; +} + +/* The logic here is a slightly modified version of ext3/inode.c:block_to_path + */ +static int presto_tmpfs_has_all_data(struct inode *inode) +{ + return 0; +} + +struct journal_ops presto_tmpfs_journal_ops = { + .tr_all_data = presto_tmpfs_has_all_data, + .tr_avail = presto_tmpfs_freespace, + .tr_start = presto_tmpfs_trans_start, + .tr_commit = presto_tmpfs_trans_commit, + .tr_journal_data = presto_tmpfs_journal_file_data, + .tr_ilookup = presto_tmpfs_ilookup, + .tr_add_ilookup = presto_add_ilookup_dentry +}; + +#endif /* CONFIG_EXT3_FS */ diff --git a/fs/intermezzo/journal_xfs.c b/fs/intermezzo/journal_xfs.c new file mode 100644 index 000000000..59b22a500 --- /dev/null +++ b/fs/intermezzo/journal_xfs.c @@ -0,0 +1,161 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 1998 Peter J. Braam + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if 0 +/* XFS Support not there yet */ +#ifdef CONFIG_FS_XFS +#include +#endif +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" +#include "intermezzo_journal.h" + +#if 0 + +/* XFS has journalling, but these functions do nothing yet... */ + +static unsigned long presto_xfs_freespace(struct presto_file_set *fset, + struct super_block *sb) +{ + +#if 0 + vfs_t *vfsp = LINVFS_GET_VFS(sb); + struct statvfs_t stat; + bhv_desc_t *bdp; + unsigned long avail; + int rc; + + VFS_STATVFS(vfsp, &stat, NULL, rc); + avail = statp.f_bfree; + + return sbp->sb_fdblocks; +#endif + return 0x0fffffff; +} + + +/* start the filesystem journal operations */ +static void * +presto_xfs_trans_start(struct presto_file_set *fset, + struct inode *inode, int op) +{ + int xfs_op; + /* do a free blocks check as in journal_ext3? does anything protect + * the space in that case or can it disappear out from under us + * anyway? */ + +/* copied from xfs_trans.h, skipping header maze for now */ +#define XFS_TRANS_SETATTR_NOT_SIZE 1 +#define XFS_TRANS_SETATTR_SIZE 2 +#define XFS_TRANS_INACTIVE 3 +#define XFS_TRANS_CREATE 4 +#define XFS_TRANS_CREATE_TRUNC 5 +#define XFS_TRANS_TRUNCATE_FILE 6 +#define XFS_TRANS_REMOVE 7 +#define XFS_TRANS_LINK 8 +#define XFS_TRANS_RENAME 9 +#define XFS_TRANS_MKDIR 10 +#define XFS_TRANS_RMDIR 11 +#define XFS_TRANS_SYMLINK 12 + + /* map the op onto the values for XFS so it can do reservation. if + * we don't have enough info to differentiate between e.g. setattr + * with or without size, what do we do? will it adjust? */ + switch (op) { + case PRESTO_OP_SETATTR: + /* or XFS_TRANS_SETATTR_NOT_SIZE? */ + xfs_op = XFS_TRANS_SETATTR_SIZE; + break; + case PRESTO_OP_CREATE: + /* or CREATE_TRUNC? */ + xfs_op = XFS_TRANS_CREATE; + break; + case PRESTO_OP_LINK: + xfs_op = XFS_TRANS_LINK; + break; + case PRESTO_OP_UNLINK: + xfs_op = XFS_TRANS_REMOVE; + break; + case PRESTO_OP_SYMLINK: + xfs_op = XFS_TRANS_SYMLINK; + break; + case PRESTO_OP_MKDIR: + xfs_op = XFS_TRANS_MKDIR; + break; + case PRESTO_OP_RMDIR: + xfs_op = XFS_TRANS_RMDIR; + break; + case PRESTO_OP_MKNOD: + /* XXX can't find an analog for mknod? */ + xfs_op = XFS_TRANS_CREATE; + break; + case PRESTO_OP_RENAME: + xfs_op = XFS_TRANS_RENAME; + break; + default: + CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op); + return NULL; + } + + return xfs_trans_start(inode, xfs_op); +} + +static void presto_xfs_trans_commit(struct presto_file_set *fset, void *handle) +{ + /* assert (handle == current->j_handle) */ + xfs_trans_stop(handle); +} + +static void presto_xfs_journal_file_data(struct inode *inode) +{ + return; +} + +static int presto_xfs_has_all_data(struct inode *inode) +{ + BUG(); + return 0; +} + +struct journal_ops presto_xfs_journal_ops = { + .tr_all_data = presto_xfs_has_all_data, + .tr_avail = presto_xfs_freespace, + .tr_start = presto_xfs_trans_start, + .tr_commit = presto_xfs_trans_commit, + .tr_journal_data = presto_xfs_journal_file_data +}; + +#endif + + +#endif /* CONFIG_XFS_FS */ + diff --git a/fs/intermezzo/kml.c b/fs/intermezzo/kml.c new file mode 100644 index 000000000..e992c18f8 --- /dev/null +++ b/fs/intermezzo/kml.c @@ -0,0 +1,194 @@ +#include +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_upcall.h" +#include "intermezzo_psdev.h" +#include "intermezzo_kml.h" + +static struct presto_file_set * kml_getfset (char *path) +{ + return presto_path2fileset(path); +} + +/* Send the KML buffer and related volume info into kernel */ +int begin_kml_reint (struct file *file, unsigned long arg) +{ + struct { + char *volname; + int namelen; + char *recbuf; + int reclen; /* int newpos; */ + } input; + struct kml_fsdata *kml_fsdata = NULL; + struct presto_file_set *fset = NULL; + char *path; + int error; + + ENTRY; + /* allocate buffer & copy it to kernel space */ + if (copy_from_user(&input, (char *)arg, sizeof(input))) { + EXIT; + return -EFAULT; + } + + if (input.reclen > kml_fsdata->kml_maxsize) + return -ENOMEM; /* we'll find solution to this in the future */ + + PRESTO_ALLOC(path, char *, input.namelen + 1); + if ( !path ) { + EXIT; + return -ENOMEM; + } + if (copy_from_user(path, input.volname, input.namelen)) { + PRESTO_FREE(path, input.namelen + 1); + EXIT; + return -EFAULT; + } + path[input.namelen] = '\0'; + fset = kml_getfset (path); + PRESTO_FREE(path, input.namelen + 1); + + kml_fsdata = FSET_GET_KMLDATA(fset); + /* read the buf from user memory here */ + if (copy_from_user(kml_fsdata->kml_buf, input.recbuf, input.reclen)) { + EXIT; + return -EFAULT; + } + kml_fsdata->kml_len = input.reclen; + + decode_kmlrec (&kml_fsdata->kml_reint_cache, + kml_fsdata->kml_buf, kml_fsdata->kml_len); + + kml_fsdata->kml_reint_current = kml_fsdata->kml_reint_cache.next; + kml_fsdata->kml_reintpos = 0; + kml_fsdata->kml_count = 0; + return 0; +} + +/* DO_KML_REINT */ +int do_kml_reint (struct file *file, unsigned long arg) +{ + struct { + char *volname; + int namelen; + char *path; + int pathlen; + int recno; + int offset; + int len; + int generation; + __u64 ino; + } input; + int error; + char *path; + struct kml_rec *close_rec; + struct kml_fsdata *kml_fsdata; + struct presto_file_set *fset; + + ENTRY; + if (copy_from_user(&input, (char *)arg, sizeof(input))) { + EXIT; + return -EFAULT; + } + PRESTO_ALLOC(path, char *, input.namelen + 1); + if ( !path ) { + EXIT; + return -ENOMEM; + } + if (copy_from_user(path, input.volname, input.namelen)) { + PRESTO_FREE(path, input.namelen + 1); + EXIT; + return -EFAULT; + } + path[input.namelen] = '\0'; + fset = kml_getfset (path); + PRESTO_FREE(path, input.namelen + 1); + + kml_fsdata = FSET_GET_KMLDATA(fset); + + error = kml_reintbuf(kml_fsdata, + fset->fset_mtpt->d_name.name, + &close_rec); + + if (error == KML_CLOSE_BACKFETCH && close_rec != NULL) { + struct kml_close *close = &close_rec->rec_kml.close; + input.ino = close->ino; + input.generation = close->generation; + if (strlen (close->path) + 1 < input.pathlen) { + strcpy (input.path, close->path); + input.pathlen = strlen (close->path) + 1; + input.recno = close_rec->rec_tail.recno; + input.offset = close_rec->rec_kml_offset; + input.len = close_rec->rec_size; + input.generation = close->generation; + input.ino = close->ino; + } + else { + CDEBUG(D_KML, "KML_DO_REINT::no space to save:%d < %d", + strlen (close->path) + 1, input.pathlen); + error = -ENOMEM; + } + if (copy_to_user((char *)arg, &input, sizeof (input))) + return -EFAULT; + } + return error; +} + +/* END_KML_REINT */ +int end_kml_reint (struct file *file, unsigned long arg) +{ + /* Free KML buffer and related volume info */ + struct { + char *volname; + int namelen; +#if 0 + int count; + int newpos; +#endif + } input; + struct presto_file_set *fset = NULL; + struct kml_fsdata *kml_fsdata = NULL; + int error; + char *path; + + ENTRY; + if (copy_from_user(&input, (char *)arg, sizeof(input))) { + EXIT; + return -EFAULT; + } + + PRESTO_ALLOC(path, char *, input.namelen + 1); + if ( !path ) { + EXIT; + return -ENOMEM; + } + if (copy_from_user(path, input.volname, input.namelen)) { + if ( error ) { + PRESTO_FREE(path, input.namelen + 1); + EXIT; + return -EFAULT; + } + path[input.namelen] = '\0'; + fset = kml_getfset (path); + PRESTO_FREE(path, input.namelen + 1); + + kml_fsdata = FSET_GET_KMLDATA(fset); + delete_kmlrec (&kml_fsdata->kml_reint_cache); + + /* kml reint support */ + kml_fsdata->kml_reint_current = NULL; + kml_fsdata->kml_len = 0; + kml_fsdata->kml_reintpos = 0; + kml_fsdata->kml_count = 0; +#if 0 + input.newpos = kml_upc->newpos; + input.count = kml_upc->count; + if (copy_to_user((char *)arg, &input, sizeof (input))) + return -EFAULT; +#endif + return error; +} diff --git a/fs/intermezzo/kml_decode.c b/fs/intermezzo/kml_decode.c new file mode 100644 index 000000000..f04e7d5fd --- /dev/null +++ b/fs/intermezzo/kml_decode.c @@ -0,0 +1,1016 @@ +/* + * KML Decoding + * + * Copryright (C) 1996 Arthur Ma + * + * Copyright (C) 2001 Mountainview Data, Inc. + */ +#include +#include +#include +#include +#include +#include +#include +#include "intermezzo_fs.h" +#include "intermezzo_kml.h" + +static int size_round (int val); +static int unpack_create (struct kml_create *rec, char *buf, + int pos, int *rec_offs); +static int unpack_open (struct kml_open *rec, char *buf, + int pos, int *rec_offs); +static int unpack_symlink (struct kml_symlink *rec, char *buf, + int pos, int *rec_offs); +static int unpack_mknod (struct kml_mknod *rec, char *buf, + int pos, int *rec_offs); +static int unpack_link (struct kml_link *rec, char *buf, + int pos, int *rec_offs); +static int unpack_rename (struct kml_rename *rec, char *buf, + int pos, int *rec_offs); +static int unpack_unlink (struct kml_unlink *rec, char *buf, + int pos, int *rec_offs); +static int unpack_rmdir (struct kml_rmdir *rec, char *buf, + int pos, int *rec_offs); +static int unpack_setattr (struct kml_setattr *rec, char *buf, + int pos, int *rec_offs); +static int unpack_close (struct kml_close *rec, char *buf, + int pos, int *rec_offs); +static int unpack_mkdir (struct kml_mkdir *rec, char *buf, + int pos, int *rec_offs); + +#if 0 +static int unpack_endmark (struct kml_endmark *rec, char *buf, + int pos, int *rec_offs); +static void print_kml_endmark (struct kml_endmark *rec); +#endif + +static int kml_unpack (char *kml_buf, int rec_size, int kml_offset, + struct kml_rec **newrec); +static char *kml_version (struct presto_version *ver); +static void print_kml_prefix (struct big_journal_prefix *head); +static void print_kml_create (struct kml_create *rec); +static void print_kml_mkdir (struct kml_mkdir *rec); +static void print_kml_unlink (struct kml_unlink *rec); +static void print_kml_rmdir (struct kml_rmdir *rec); +static void print_kml_close (struct kml_close *rec); +static void print_kml_symlink (struct kml_symlink *rec); +static void print_kml_rename (struct kml_rename *rec); +static void print_kml_setattr (struct kml_setattr *rec); +static void print_kml_link (struct kml_link *rec); +static void print_kml_mknod (struct kml_mknod *rec); +static void print_kml_open (struct kml_open *rec); +static void print_kml_suffix (struct journal_suffix *tail); +static char *readrec (char *recbuf, int reclen, int pos, int *size); + +#define KML_PREFIX_WORDS 8 +static int kml_unpack (char *kml_buf, int rec_size, int kml_offset, + struct kml_rec **newrec) +{ + struct kml_rec *rec; + char *p; + int pos, rec_offs; + int error; + + ENTRY; + if (rec_size < sizeof (struct journal_prefix) + + sizeof (struct journal_suffix)) + return -EBADF; + + PRESTO_ALLOC(rec, struct kml_rec *, sizeof (struct kml_rec)); + if (rec == NULL) { + EXIT; + return -ENOMEM; + } + rec->rec_kml_offset = kml_offset; + rec->rec_size = rec_size; + p = kml_buf; + p = dlogit (&rec->rec_head, p, KML_PREFIX_WORDS * sizeof (int)); + p = dlogit (&rec->rec_head.groups, p, + sizeof (int) * rec->rec_head.ngroups); + + pos = sizeof (struct journal_prefix) + + sizeof (int) * rec->rec_head.ngroups; + switch (rec->rec_head.opcode) + { + case KML_CREATE: + error = unpack_create (&rec->rec_kml.create, + kml_buf, pos, &rec_offs); + break; + case KML_MKDIR: + error = unpack_mkdir (&rec->rec_kml.mkdir, + kml_buf, pos, &rec_offs); + break; + case KML_UNLINK: + error = unpack_unlink (&rec->rec_kml.unlink, + kml_buf, pos, &rec_offs); + break; + case KML_RMDIR: + error = unpack_rmdir (&rec->rec_kml.rmdir, + kml_buf, pos, &rec_offs); + break; + case KML_CLOSE: + error = unpack_close (&rec->rec_kml.close, + kml_buf, pos, &rec_offs); + break; + case KML_SYMLINK: + error = unpack_symlink (&rec->rec_kml.symlink, + kml_buf, pos, &rec_offs); + break; + case KML_RENAME: + error = unpack_rename (&rec->rec_kml.rename, + kml_buf, pos, &rec_offs); + break; + case KML_SETATTR: + error = unpack_setattr (&rec->rec_kml.setattr, + kml_buf, pos, &rec_offs); + break; + case KML_LINK: + error = unpack_link (&rec->rec_kml.link, + kml_buf, pos, &rec_offs); + break; + case KML_OPEN: + error = unpack_open (&rec->rec_kml.open, + kml_buf, pos, &rec_offs); + break; + case KML_MKNOD: + error = unpack_mknod (&rec->rec_kml.mknod, + kml_buf, pos, &rec_offs); + break; +#if 0 + case KML_ENDMARK: + error = unpack_endmark (&rec->rec_kml.endmark, + kml_buf, pos, &rec_offs); + break; +#endif + default: + CDEBUG (D_KML, "wrong opcode::%u\n", + rec->rec_head.opcode); + EXIT; + return -EINVAL; + } + if (error) { + PRESTO_FREE (rec, sizeof (struct kml_rec)); + return -EINVAL; + } + p = kml_buf + rec_offs; + p = dlogit (&rec->rec_tail, p, sizeof (struct journal_suffix)); + memset (&rec->kml_optimize, 0, sizeof (struct kml_optimize)); + *newrec = rec; + EXIT; + return 0; +} + +static int size_round (int val) +{ + return (val + 3) & (~0x3); +} + +static int unpack_create (struct kml_create *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 88; + int pathlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->mode, p, sizeof (int)); + p = dlogit (&rec->uid, p, sizeof (int)); + p = dlogit (&rec->gid, p, sizeof (int)); + p = dlogit (&pathlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + + *rec_offs = pos + unpack_size + size_round(pathlen); + EXIT; + return 0; +} + +static int unpack_open (struct kml_open *rec, char *buf, + int pos, int *rec_offs) +{ + *rec_offs = pos; + return 0; +} + +static int unpack_symlink (struct kml_symlink *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 88; + int pathlen, targetlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->uid, p, sizeof (int)); + p = dlogit (&rec->gid, p, sizeof (int)); + p = dlogit (&pathlen, p, sizeof (int)); + p = dlogit (&targetlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->sourcepath = q; + + PRESTO_ALLOC(q, char *, targetlen + 1); + if (q == NULL) { + PRESTO_FREE (rec->sourcepath, pathlen + 1); + EXIT; + return -ENOMEM; + } + + memcpy (q, p, targetlen); + q[targetlen] = '\0'; + rec->targetpath = q; + + *rec_offs = pos + unpack_size + size_round(pathlen) + + size_round(targetlen); + EXIT; + return 0; +} + +static int unpack_mknod (struct kml_mknod *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 96; + int pathlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->mode, p, sizeof (int)); + p = dlogit (&rec->uid, p, sizeof (int)); + p = dlogit (&rec->gid, p, sizeof (int)); + p = dlogit (&rec->major, p, sizeof (int)); + p = dlogit (&rec->minor, p, sizeof (int)); + p = dlogit (&pathlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + + *rec_offs = pos + unpack_size + size_round(pathlen); + EXIT; + return 0; +} + +static int unpack_link (struct kml_link *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 80; + int pathlen, targetlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&pathlen, p, sizeof (int)); + p = dlogit (&targetlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->sourcepath = q; + p += size_round (pathlen); + + PRESTO_ALLOC(q, char *, targetlen + 1); + if (q == NULL) { + PRESTO_FREE (rec->sourcepath, pathlen + 1); + EXIT; + return -ENOMEM; + } + memcpy (q, p, targetlen); + q[targetlen] = '\0'; + rec->targetpath = q; + + *rec_offs = pos + unpack_size + size_round(pathlen) + + size_round(targetlen); + EXIT; + return 0; +} + +static int unpack_rename (struct kml_rename *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 104; + int pathlen, targetlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_tgtv, p, sizeof (struct presto_version)); + p = dlogit (&rec->old_tgtv, p, sizeof (struct presto_version)); + p = dlogit (&pathlen, p, sizeof (int)); + p = dlogit (&targetlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->sourcepath = q; + p += size_round (pathlen); + + PRESTO_ALLOC(q, char *, targetlen + 1); + if (q == NULL) { + PRESTO_FREE (rec->sourcepath, pathlen + 1); + EXIT; + return -ENOMEM; + } + + memcpy (q, p, targetlen); + q[targetlen] = '\0'; + rec->targetpath = q; + + *rec_offs = pos + unpack_size + size_round(pathlen) + + size_round(targetlen); + EXIT; + return 0; +} + +static int unpack_unlink (struct kml_unlink *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 80; + int pathlen, targetlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->old_tgtv, p, sizeof (struct presto_version)); + p = dlogit (&pathlen, p, sizeof (int)); + p = dlogit (&targetlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + p += size_round (pathlen); + + PRESTO_ALLOC(q, char *, targetlen + 1); + if (q == NULL) { + PRESTO_FREE (rec->path, pathlen + 1); + EXIT; + return -ENOMEM; + } + + memcpy (q, p, targetlen); + q[targetlen] = '\0'; + rec->name = q; + + /* fix the presto_journal_unlink problem */ + *rec_offs = pos + unpack_size + size_round(pathlen) + + size_round(targetlen); + EXIT; + return 0; +} + +static int unpack_rmdir (struct kml_rmdir *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 80; + int pathlen, targetlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->old_tgtv, p, sizeof (struct presto_version)); + p = dlogit (&pathlen, p, sizeof (int)); + p = dlogit (&targetlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + p += size_round (pathlen); + + PRESTO_ALLOC(q, char *, targetlen + 1); + if (q == NULL) { + PRESTO_FREE (rec->path, pathlen + 1); + EXIT; + return -ENOMEM; + } + memcpy (q, p, targetlen); + q[targetlen] = '\0'; + rec->name = q; + + *rec_offs = pos + unpack_size + size_round(pathlen) + + size_round(targetlen); + EXIT; + return 0; +} + +static int unpack_setattr (struct kml_setattr *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 72; + struct kml_attr { + __u64 size, mtime, ctime; + } objattr; + int valid, mode, uid, gid, flags; + int pathlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_objectv, p, sizeof (struct presto_version)); + p = dlogit (&valid, p, sizeof (int)); + p = dlogit (&mode, p, sizeof (int)); + p = dlogit (&uid, p, sizeof (int)); + p = dlogit (&gid, p, sizeof (int)); + p = dlogit (&objattr, p, sizeof (struct kml_attr)); + p = dlogit (&flags, p, sizeof (int)); + p = dlogit (&pathlen, p, sizeof (int)); + + rec->iattr.ia_valid = valid; + rec->iattr.ia_mode = mode; + rec->iattr.ia_uid = uid; + rec->iattr.ia_gid = gid; + rec->iattr.ia_size = objattr.size; + rec->iattr.ia_mtime = objattr.mtime; + rec->iattr.ia_ctime = objattr.ctime; + rec->iattr.ia_atime = 0; + rec->iattr.ia_attr_flags = flags; + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + p += pathlen; + + *rec_offs = pos + unpack_size + size_round(pathlen); + EXIT; + return 0; +} + +static int unpack_close (struct kml_close *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 52; + int pathlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->open_mode, p, sizeof (int)); + p = dlogit (&rec->open_uid, p, sizeof (int)); + p = dlogit (&rec->open_gid, p, sizeof (int)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->ino, p, sizeof (__u64)); + p = dlogit (&rec->generation, p, sizeof (int)); + p = dlogit (&pathlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + p += pathlen; + + *rec_offs = pos + unpack_size + size_round(pathlen); + EXIT; + return 0; +} + +static int unpack_mkdir (struct kml_mkdir *rec, char *buf, + int pos, int *rec_offs) +{ + char *p, *q; + int unpack_size = 88; + int pathlen; + + ENTRY; + p = buf + pos; + p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version)); + p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version)); + p = dlogit (&rec->mode, p, sizeof (int)); + p = dlogit (&rec->uid, p, sizeof (int)); + p = dlogit (&rec->gid, p, sizeof (int)); + p = dlogit (&pathlen, p, sizeof (int)); + + PRESTO_ALLOC(q, char *, pathlen + 1); + if (q == NULL) { + EXIT; + return -ENOMEM; + } + + memcpy (q, p, pathlen); + q[pathlen] = '\0'; + rec->path = q; + p += pathlen; + + *rec_offs = pos + unpack_size + size_round(pathlen); + EXIT; + return 0; +} + +#if 0 +static int unpack_endmark (struct kml_endmark *rec, char *buf, + int pos, int *rec_offs) +{ + char *p; + p = buf + pos; + p = dlogit (&rec->total, p, sizeof (int)); + + PRESTO_ALLOC (rec->kop, struct kml_kop_node *, + sizeof (struct kml_kop_node) * rec->total); + if (rec->kop == NULL) { + EXIT; + return -ENOMEM; + } + + p = dlogit (rec->kop, p, sizeof (struct kml_kop_node) * rec->total); + + *rec_offs = pos + sizeof (int) + sizeof (struct kml_kop_node) * rec->total; + return 0; +} +#endif + +static char *kml_version (struct presto_version *ver) +{ + static char buf[256]; + sprintf (buf, "mt::%lld, ct::%lld, size::%lld", + ver->pv_mtime, ver->pv_ctime, ver->pv_size); + return buf; +} + +static void print_kml_prefix (struct big_journal_prefix *head) +{ + int i; + + CDEBUG (D_KML, " === KML PREFIX\n"); + CDEBUG (D_KML, " len = %u\n", head->len); + CDEBUG (D_KML, " version = %u\n", head->version); + CDEBUG (D_KML, " pid = %u\n", head->pid); + CDEBUG (D_KML, " uid = %u\n", head->uid); + CDEBUG (D_KML, " fsuid = %u\n", head->fsuid); + CDEBUG (D_KML, " fsgid = %u\n", head->fsgid); + CDEBUG (D_KML, " opcode = %u\n", head->opcode); + CDEBUG (D_KML, " ngroup = %u", head->ngroups); + for (i = 0; i < head->ngroups; i++) + CDEBUG (D_KML, "%u ", head->groups[i]); + CDEBUG (D_KML, "\n"); +} + +static void print_kml_create (struct kml_create *rec) +{ + CDEBUG (D_KML, " === CREATE\n"); + CDEBUG (D_KML, " path::%s\n", rec->path); + CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); + CDEBUG (D_KML, " mode::%o\n", rec->mode); + CDEBUG (D_KML, " uid::%d\n", rec->uid); + CDEBUG (D_KML, " gid::%d\n", rec->gid); +} + +static void print_kml_mkdir (struct kml_mkdir *rec) +{ + CDEBUG (D_KML, " === MKDIR\n"); + CDEBUG (D_KML, " path::%s\n", rec->path); + CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); + CDEBUG (D_KML, " mode::%o\n", rec->mode); + CDEBUG (D_KML, " uid::%d\n", rec->uid); + CDEBUG (D_KML, " gid::%d\n", rec->gid); +} + +static void print_kml_unlink (struct kml_unlink *rec) +{ + CDEBUG (D_KML, " === UNLINK\n"); + CDEBUG (D_KML, " path::%s/%s\n", rec->path, rec->name); + CDEBUG (D_KML, " old_tgtv::%s\n", kml_version (&rec->old_tgtv)); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); +} + +static void print_kml_rmdir (struct kml_rmdir *rec) +{ + CDEBUG (D_KML, " === RMDIR\n"); + CDEBUG (D_KML, " path::%s/%s\n", rec->path, rec->name); + CDEBUG (D_KML, " old_tgtv::%s\n", kml_version (&rec->old_tgtv)); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); +} + +static void print_kml_close (struct kml_close *rec) +{ + CDEBUG (D_KML, " === CLOSE\n"); + CDEBUG (D_KML, " mode::%o\n", rec->open_mode); + CDEBUG (D_KML, " uid::%d\n", rec->open_uid); + CDEBUG (D_KML, " gid::%d\n", rec->open_gid); + CDEBUG (D_KML, " path::%s\n", rec->path); + CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " ino::%lld\n", rec->ino); + CDEBUG (D_KML, " gen::%u\n", rec->generation); +} + +static void print_kml_symlink (struct kml_symlink *rec) +{ + CDEBUG (D_KML, " === SYMLINK\n"); + CDEBUG (D_KML, " s-path::%s\n", rec->sourcepath); + CDEBUG (D_KML, " t-path::%s\n", rec->targetpath); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); + CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " uid::%d\n", rec->uid); + CDEBUG (D_KML, " gid::%d\n", rec->gid); +} + +static void print_kml_rename (struct kml_rename *rec) +{ + CDEBUG (D_KML, " === RENAME\n"); + CDEBUG (D_KML, " s-path::%s\n", rec->sourcepath); + CDEBUG (D_KML, " t-path::%s\n", rec->targetpath); + CDEBUG (D_KML, " old_tgtv::%s\n", kml_version (&rec->old_tgtv)); + CDEBUG (D_KML, " new_tgtv::%s\n", kml_version (&rec->new_tgtv)); + CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " old_objv::%s\n", kml_version (&rec->old_objectv)); +} + +static void print_kml_setattr (struct kml_setattr *rec) +{ + CDEBUG (D_KML, " === SETATTR\n"); + CDEBUG (D_KML, " path::%s\n", rec->path); + CDEBUG (D_KML, " old_objv::%s\n", kml_version (&rec->old_objectv)); + CDEBUG (D_KML, " valid::0x%x\n", rec->iattr.ia_valid); + CDEBUG (D_KML, " mode::%o\n", rec->iattr.ia_mode); + CDEBUG (D_KML, " uid::%d\n", rec->iattr.ia_uid); + CDEBUG (D_KML, " gid::%d\n", rec->iattr.ia_gid); + CDEBUG (D_KML, " size::%u\n", (u32) rec->iattr.ia_size); + CDEBUG (D_KML, " mtime::%u\n", (u32) rec->iattr.ia_mtime); + CDEBUG (D_KML, " ctime::%u\n", (u32) rec->iattr.ia_ctime); + CDEBUG (D_KML, " flags::%u\n", (u32) rec->iattr.ia_attr_flags); +} + +static void print_kml_link (struct kml_link *rec) +{ + CDEBUG (D_KML, " === LINK\n"); + CDEBUG (D_KML, " path::%s ==> %s\n", rec->sourcepath, rec->targetpath); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_obj::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); +} + +static void print_kml_mknod (struct kml_mknod *rec) +{ + CDEBUG (D_KML, " === MKNOD\n"); + CDEBUG (D_KML, " path::%s\n", rec->path); + CDEBUG (D_KML, " new_obj::%s\n", kml_version (&rec->new_objectv)); + CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv)); + CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv)); + CDEBUG (D_KML, " mode::%o\n", rec->mode); + CDEBUG (D_KML, " uid::%d\n", rec->uid); + CDEBUG (D_KML, " gid::%d\n", rec->gid); + CDEBUG (D_KML, " major::%d\n", rec->major); + CDEBUG (D_KML, " minor::%d\n", rec->minor); +} + +static void print_kml_open (struct kml_open *rec) +{ + CDEBUG (D_KML, " === OPEN\n"); +} + +#if 0 +static void print_kml_endmark (struct kml_endmark *rec) +{ + int i; + CDEBUG (D_KML, " === ENDMARK\n"); + CDEBUG (D_KML, " total::%u\n", rec->total); + for (i = 0; i < rec->total; i++) + { + CDEBUG (D_KML, " recno=%ld::flag=%ld,op=%ld, i_ino=%ld, \ + i_nlink=%ld\n", (long) rec->kop[i].kml_recno, + (long) rec->kop[i].kml_flag, (long) rec->kop[i].kml_op, + (long) rec->kop[i].i_ino, (long) rec->kop[i].i_nlink); + } +} +#endif + +static void print_kml_optimize (struct kml_optimize *rec) +{ + CDEBUG (D_KML, " === OPTIMIZE\n"); + if (rec->kml_flag == KML_REC_DELETE) + CDEBUG (D_KML, " kml_flag::deleted\n"); + else + CDEBUG (D_KML, " kml_flag::exist\n"); + CDEBUG (D_KML, " kml_op::%u\n", rec->kml_op); + CDEBUG (D_KML, " i_nlink::%d\n", rec->i_nlink); + CDEBUG (D_KML, " i_ino::%u\n", rec->i_ino); +} + +static void print_kml_suffix (struct journal_suffix *tail) +{ + CDEBUG (D_KML, " === KML SUFFIX\n"); + CDEBUG (D_KML, " prevrec::%ld\n", tail->prevrec); + CDEBUG (D_KML, " recno::%ld\n", (long) tail->recno); + CDEBUG (D_KML, " time::%d\n", tail->time); + CDEBUG (D_KML, " len::%d\n", tail->len); +} + +void kml_printrec (struct kml_rec *rec, int kml_printop) +{ + if (kml_printop & PRINT_KML_PREFIX) + print_kml_prefix (&rec->rec_head); + if (kml_printop & PRINT_KML_REC) + { + switch (rec->rec_head.opcode) + { + case KML_CREATE: + print_kml_create (&rec->rec_kml.create); + break; + case KML_MKDIR: + print_kml_mkdir (&rec->rec_kml.mkdir); + break; + case KML_UNLINK: + print_kml_unlink (&rec->rec_kml.unlink); + break; + case KML_RMDIR: + print_kml_rmdir (&rec->rec_kml.rmdir); + break; + case KML_CLOSE: + print_kml_close (&rec->rec_kml.close); + break; + case KML_SYMLINK: + print_kml_symlink (&rec->rec_kml.symlink); + break; + case KML_RENAME: + print_kml_rename (&rec->rec_kml.rename); + break; + case KML_SETATTR: + print_kml_setattr (&rec->rec_kml.setattr); + break; + case KML_LINK: + print_kml_link (&rec->rec_kml.link); + break; + case KML_OPEN: + print_kml_open (&rec->rec_kml.open); + break; + case KML_MKNOD: + print_kml_mknod (&rec->rec_kml.mknod); + break; +#if 0 + case KML_ENDMARK: + print_kml_endmark (&rec->rec_kml.endmark); +#endif + break; + default: + CDEBUG (D_KML, " === BAD RECORD, opcode=%u\n", + rec->rec_head.opcode); + break; + } + } + if (kml_printop & PRINT_KML_SUFFIX) + print_kml_suffix (&rec->rec_tail); + if (kml_printop & PRINT_KML_OPTIMIZE) + print_kml_optimize (&rec->kml_optimize); +} + +void kml_freerec (struct kml_rec *rec) +{ + char *sourcepath = NULL, + *targetpath = NULL; + switch (rec->rec_head.opcode) + { + case KML_CREATE: + sourcepath = rec->rec_kml.create.path; + break; + case KML_MKDIR: + sourcepath = rec->rec_kml.create.path; + break; + case KML_UNLINK: + sourcepath = rec->rec_kml.unlink.path; + targetpath = rec->rec_kml.unlink.name; + break; + case KML_RMDIR: + sourcepath = rec->rec_kml.rmdir.path; + targetpath = rec->rec_kml.rmdir.name; + break; + case KML_CLOSE: + sourcepath = rec->rec_kml.close.path; + break; + case KML_SYMLINK: + sourcepath = rec->rec_kml.symlink.sourcepath; + targetpath = rec->rec_kml.symlink.targetpath; + break; + case KML_RENAME: + sourcepath = rec->rec_kml.rename.sourcepath; + targetpath = rec->rec_kml.rename.targetpath; + break; + case KML_SETATTR: + sourcepath = rec->rec_kml.setattr.path; + break; + case KML_LINK: + sourcepath = rec->rec_kml.link.sourcepath; + targetpath = rec->rec_kml.link.targetpath; + break; + case KML_OPEN: + break; + case KML_MKNOD: + sourcepath = rec->rec_kml.mknod.path; + break; +#if 0 + case KML_ENDMARK: + PRESTO_FREE (rec->rec_kml.endmark.kop, sizeof (int) + + sizeof (struct kml_kop_node) * + rec->rec_kml.endmark.total); +#endif + break; + default: + break; + } + if (sourcepath != NULL) + PRESTO_FREE (sourcepath, strlen (sourcepath) + 1); + if (targetpath != NULL) + PRESTO_FREE (targetpath, strlen (targetpath) + 1); +} + +char *readrec (char *recbuf, int reclen, int pos, int *size) +{ + char *p = recbuf + pos; + *size = *((int *) p); + if (*size > (reclen - pos)) + return NULL; + return p; +} + +int kml_decoderec (char *buf, int pos, int buflen, int *size, + struct kml_rec **newrec) +{ + char *tmp; + int error; + tmp = readrec (buf, buflen, pos, size); + if (tmp == NULL) + return -EBADF; + error = kml_unpack (tmp, *size, pos, newrec); + return error; +} + +#if 0 +static void fill_kmlrec_optimize (struct list_head *head, + struct kml_rec *optrec) +{ + struct kml_rec *kmlrec; + struct list_head *tmp; + struct kml_endmark *km; + struct kml_optimize *ko; + int n; + + if (optrec->rec_kml.endmark.total == 0) + return; + n = optrec->rec_kml.endmark.total - 1; + tmp = head->prev; + km = &optrec->rec_kml.endmark; + while ( n >= 0 && tmp != head ) + { + kmlrec = list_entry(tmp, struct kml_rec, + kml_optimize.kml_chains); + tmp = tmp->prev; + if (kmlrec->rec_tail.recno == km->kop[n].kml_recno) + { + ko = &kmlrec->kml_optimize; + ko->kml_flag = km->kop[n].kml_flag; + ko->kml_op = km->kop[n].kml_op; + ko->i_nlink = km->kop[n].i_nlink; + ko->i_ino = km->kop[n].i_ino; + n --; + } + } + if (n != -1) + CDEBUG (D_KML, "Yeah!!!, KML optimize error, recno=%d, n=%d\n", + optrec->rec_tail.recno, n); +} +#endif + +int decode_kmlrec (struct list_head *head, char *kml_buf, int buflen) +{ + struct kml_rec *rec; + int pos = 0, size; + int err; + while (pos < buflen) { + err = kml_decoderec (kml_buf, pos, buflen, &size, &rec); + if (err != 0) + break; +#if 0 + if (rec->rec_head.opcode == KML_ENDMARK) { + fill_kmlrec_optimize (head, rec); + mark_rec_deleted (rec); + } +#endif + list_add_tail (&rec->kml_optimize.kml_chains, head); + pos += size; + } + return err; +} + +int delete_kmlrec (struct list_head *head) +{ + struct kml_rec *rec; + struct list_head *tmp; + + if (list_empty(head)) + return 0; + tmp = head->next; + while ( tmp != head ) { + rec = list_entry(tmp, struct kml_rec, + kml_optimize.kml_chains); + tmp = tmp->next; + kml_freerec (rec); + } + INIT_LIST_HEAD(head); + return 0; +} + +int print_allkmlrec (struct list_head *head, int printop) +{ + struct kml_rec *rec; + struct list_head *tmp; + + if (list_empty(head)) + return 0; + tmp = head->next; + while ( tmp != head ) { + rec = list_entry(tmp, struct kml_rec, + kml_optimize.kml_chains); + tmp = tmp->next; +#if 0 + if (printop & PRINT_KML_EXIST) { + if (is_deleted_node (rec)) + continue; + } + else if (printop & PRINT_KML_DELETE) { + if (! is_deleted_node (rec)) + continue; + } +#endif + kml_printrec (rec, printop); + } + INIT_LIST_HEAD(head); + return 0; +} + diff --git a/fs/intermezzo/kml_reint.c b/fs/intermezzo/kml_reint.c new file mode 100644 index 000000000..e447b766e --- /dev/null +++ b/fs/intermezzo/kml_reint.c @@ -0,0 +1,647 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Reintegration of KML records + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +static void kmlreint_pre_secure(struct kml_rec *rec, struct file *dir, + struct run_ctxt *saved) +{ + struct run_ctxt ctxt; + struct presto_dentry_data *dd = presto_d2d(dir->f_dentry); + int i; + + ctxt.fsuid = rec->prefix.hdr->fsuid; + ctxt.fsgid = rec->prefix.hdr->fsgid; + ctxt.fs = KERNEL_DS; + ctxt.pwd = dd->dd_fset->fset_dentry; + ctxt.pwdmnt = dd->dd_fset->fset_mnt; + + ctxt.root = ctxt.pwd; + ctxt.rootmnt = ctxt.pwdmnt; + if (rec->prefix.hdr->ngroups > 0) { + ctxt.group_info = groups_alloc(rec->prefix.hdr->ngroups); + for (i = 0; i< ctxt.group_info->ngroups; i++) + GROUP_AT(ctxt.group_info,i)= rec->prefix.groups[i]; + } else + ctxt.group_info = groups_alloc(0); + + push_ctxt(saved, &ctxt); +} + + +/* Append two strings in a less-retarded fashion. */ +static char * path_join(char *p1, int p1len, char *p2, int p2len) +{ + int size = p1len + p2len + 2; /* possibly one extra /, one NULL */ + char *path; + + path = kmalloc(size, GFP_KERNEL); + if (path == NULL) + return NULL; + + memcpy(path, p1, p1len); + if (path[p1len - 1] != '/') { + path[p1len] = '/'; + p1len++; + } + memcpy(path + p1len, p2, p2len); + path[p1len + p2len] = '\0'; + + return path; +} + +static inline int kml_recno_equal(struct kml_rec *rec, + struct presto_file_set *fset) +{ + return (rec->suffix->recno == fset->fset_lento_recno + 1); +} + +static inline int version_equal(struct presto_version *a, struct inode *inode) +{ + if (a == NULL) + return 1; + + if (inode == NULL) { + CERROR("InterMezzo: NULL inode in version_equal()\n"); + return 0; + } + + if (inode->i_mtime.tv_sec == a->pv_mtime_sec && + inode->i_mtime.tv_nsec == a->pv_mtime_nsec && + (S_ISDIR(inode->i_mode) || inode->i_size == a->pv_size)) + return 1; + + return 0; +} + +static int reint_close(struct kml_rec *rec, struct file *file, + struct lento_vfs_context *given_info) +{ + struct run_ctxt saved_ctxt; + int error; + struct presto_file_set *fset; + struct lento_vfs_context info; + ENTRY; + + memcpy(&info, given_info, sizeof(*given_info)); + + + CDEBUG (D_KML, "=====REINT_CLOSE::%s\n", rec->path); + + fset = presto_fset(file->f_dentry); + if (fset->fset_flags & FSET_DATA_ON_DEMAND) { + struct iattr iattr; + + iattr.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_SIZE; + iattr.ia_mtime.tv_sec = (time_t)rec->new_objectv->pv_mtime_sec; + iattr.ia_mtime.tv_nsec = (time_t)rec->new_objectv->pv_mtime_nsec; + iattr.ia_ctime.tv_sec = (time_t)rec->new_objectv->pv_ctime_sec; + iattr.ia_ctime.tv_nsec = (time_t)rec->new_objectv->pv_ctime_nsec; + iattr.ia_size = (time_t)rec->new_objectv->pv_size; + + /* no kml record, but update last rcvd */ + /* save fileid in dentry for later backfetch */ + info.flags |= LENTO_FL_EXPECT | LENTO_FL_SET_DDFILEID; + info.remote_ino = rec->ino; + info.remote_generation = rec->generation; + info.flags &= ~LENTO_FL_KML; + kmlreint_pre_secure(rec, file, &saved_ctxt); + error = lento_setattr(rec->path, &iattr, &info); + pop_ctxt(&saved_ctxt); + + presto_d2d(file->f_dentry)->dd_flags &= ~PRESTO_DATA; + } else { + int minor = presto_f2m(fset); + + info.updated_time.tv_sec = rec->new_objectv->pv_mtime_sec; + info.updated_time.tv_nsec = rec->new_objectv->pv_mtime_nsec; + memcpy(&info.remote_version, rec->old_objectv, + sizeof(*rec->old_objectv)); + info.remote_ino = rec->ino; + info.remote_generation = rec->generation; + error = izo_upc_backfetch(minor, rec->path, fset->fset_name, + &info); + if (error) { + CERROR("backfetch error %d\n", error); + /* if file doesn't exist anymore, then ignore the CLOSE + * and just update the last_rcvd. + */ + if (error == ENOENT) { + CDEBUG(D_KML, "manually updating remote offset uuid %s" + "recno %d offset %Lu\n", info.uuid, info.recno, + (unsigned long long) info.kml_offset); + error = izo_rcvd_upd_remote(fset, info.uuid, info.recno, info.kml_offset); + if(error) + CERROR("izo_rcvd_upd_remote error %d\n", error); + + } + } + + /* propagate error to avoid further reint */ + } + + EXIT; + return error; +} + +static int reint_create(struct kml_rec *rec, struct file *dir, + struct lento_vfs_context *info) +{ + struct run_ctxt saved_ctxt; + int error; ENTRY; + + CDEBUG (D_KML, "=====REINT_CREATE::%s\n", rec->path); + info->updated_time.tv_sec = rec->new_objectv->pv_ctime_sec; + info->updated_time.tv_nsec = rec->new_objectv->pv_ctime_nsec; + kmlreint_pre_secure(rec, dir, &saved_ctxt); + error = lento_create(rec->path, rec->mode, info); + pop_ctxt(&saved_ctxt); + + EXIT; + return error; +} + +static int reint_link(struct kml_rec *rec, struct file *dir, + struct lento_vfs_context *info) +{ + struct run_ctxt saved_ctxt; + int error; + + ENTRY; + + CDEBUG (D_KML, "=====REINT_LINK::%s -> %s\n", rec->path, rec->target); + info->updated_time.tv_sec = rec->new_objectv->pv_mtime_sec; + info->updated_time.tv_nsec = rec->new_objectv->pv_mtime_nsec; + kmlreint_pre_secure(rec, dir, &saved_ctxt); + error = lento_link(rec->path, rec->target, info); + pop_ctxt(&saved_ctxt); + + EXIT; + return error; +} + +static int reint_mkdir(struct kml_rec *rec, struct file *dir, + struct lento_vfs_context *info) +{ + struct run_ctxt saved_ctxt; + int error; + + ENTRY; + + CDEBUG (D_KML, "=====REINT_MKDIR::%s\n", rec->path); + info->updated_time.tv_sec = rec->new_objectv->pv_ctime_sec; + info->updated_time.tv_nsec = rec->new_objectv->pv_ctime_nsec; + kmlreint_pre_secure(rec, dir, &saved_ctxt); + error = lento_mkdir(rec->path, rec->mode, info); + pop_ctxt(&saved_ctxt); + + EXIT; + return error; +} + +static int reint_mknod(struct kml_rec *rec, struct file *dir, + struct lento_vfs_context *info) +{ + struct run_ctxt saved_ctxt; + int error; + dev_t dev; + + ENTRY; + + CDEBUG (D_KML, "=====REINT_MKNOD::%s\n", rec->path); + info->updated_time.tv_sec = rec->new_objectv->pv_ctime_sec; + info->updated_time.tv_nsec = rec->new_objectv->pv_ctime_nsec; + kmlreint_pre_secure(rec, dir, &saved_ctxt); + + dev = rec->rdev ? old_decode_dev(rec->rdev) : MKDEV(rec->major, rec->minor); + + error = lento_mknod(rec->path, rec->mode, dev, info); + pop_ctxt(&saved_ctxt); + + EXIT; + return error; +} + + +static int reint_noop(struct kml_rec *rec, struct file *dir, + struct lento_vfs_context *info) +{ + return 0; +} + +static int reint_rename(struct kml_rec *rec, struct file *dir, + struct lento_vfs_context *info) +{ + struct run_ctxt saved_ctxt; + int error; + + ENTRY; + + CDEBUG (D_KML, "=====REINT_RENAME::%s -> %s\n", rec->path, rec->target); + info->updated_time.tv_sec = rec->new_objectv->pv_mtime_sec; + info->updated_time.tv_nsec = rec->new_objectv->pv_mtime_nsec; + kmlreint_pre_secure(rec, dir, &saved_ctxt); + error = lento_rename(rec->path, rec->target, info); + pop_ctxt(&saved_ctxt); + + EXIT; + return error; +} + +static int reint_rmdir(struct kml_rec *rec, struct file *dir, + struct lento_vfs_context *info) +{ + struct run_ctxt saved_ctxt; + int error; + char *path; + + ENTRY; + + path = path_join(rec->path, rec->pathlen - 1, rec->target, rec->targetlen); + if (path == NULL) { + EXIT; + return -ENOMEM; + } + + CDEBUG (D_KML, "=====REINT_RMDIR::%s\n", path); + info->updated_time.tv_sec = rec->new_parentv->pv_mtime_sec; + info->updated_time.tv_nsec = rec->new_parentv->pv_mtime_nsec; + kmlreint_pre_secure(rec, dir, &saved_ctxt); + error = lento_rmdir(path, info); + pop_ctxt(&saved_ctxt); + + kfree(path); + EXIT; + return error; +} + +static int reint_setattr(struct kml_rec *rec, struct file *dir, + struct lento_vfs_context *info) +{ + struct run_ctxt saved_ctxt; + struct iattr iattr; + int error; + + ENTRY; + + iattr.ia_valid = rec->valid; + iattr.ia_mode = (umode_t)rec->mode; + iattr.ia_uid = (uid_t)rec->uid; + iattr.ia_gid = (gid_t)rec->gid; + iattr.ia_size = (off_t)rec->size; + iattr.ia_ctime.tv_sec = rec->ctime_sec; + iattr.ia_ctime.tv_nsec = rec->ctime_nsec; + iattr.ia_mtime.tv_sec = rec->mtime_sec; + iattr.ia_mtime.tv_nsec = rec->mtime_nsec; + iattr.ia_atime = iattr.ia_mtime; /* We don't track atimes. */ + iattr.ia_attr_flags = rec->flags; + + CDEBUG (D_KML, "=====REINT_SETATTR::%s (%d)\n", rec->path, rec->valid); + kmlreint_pre_secure(rec, dir, &saved_ctxt); + error = lento_setattr(rec->path, &iattr, info); + pop_ctxt(&saved_ctxt); + + EXIT; + return error; +} + +static int reint_symlink(struct kml_rec *rec, struct file *dir, + struct lento_vfs_context *info) +{ + struct run_ctxt saved_ctxt; + int error; + + ENTRY; + + CDEBUG (D_KML, "=====REINT_SYMLINK::%s -> %s\n", rec->path, rec->target); + info->updated_time.tv_sec = rec->new_objectv->pv_ctime_sec; + info->updated_time.tv_nsec = rec->new_objectv->pv_ctime_nsec; + kmlreint_pre_secure(rec, dir, &saved_ctxt); + error = lento_symlink(rec->target, rec->path, info); + pop_ctxt(&saved_ctxt); + + EXIT; + return error; +} + +static int reint_unlink(struct kml_rec *rec, struct file *dir, + struct lento_vfs_context *info) +{ + struct run_ctxt saved_ctxt; + int error; + char *path; + + ENTRY; + + path = path_join(rec->path, rec->pathlen - 1, rec->target, rec->targetlen); + if (path == NULL) { + EXIT; + return -ENOMEM; + } + + CDEBUG (D_KML, "=====REINT_UNLINK::%s\n", path); + info->updated_time.tv_sec = rec->new_parentv->pv_mtime_sec; + info->updated_time.tv_nsec = rec->new_parentv->pv_mtime_nsec; + kmlreint_pre_secure(rec, dir, &saved_ctxt); + error = lento_unlink(path, info); + pop_ctxt(&saved_ctxt); + + kfree(path); + EXIT; + return error; +} + +static int branch_reint_rename(struct presto_file_set *fset, struct kml_rec *rec, + struct file *dir, struct lento_vfs_context *info, + char * kml_data, __u64 kml_size) +{ + int error; + + ENTRY; + + error = reint_rename(rec, dir, info); + if (error == -ENOENT) { + /* normal reint failed because path was not found */ + struct rec_info rec; + + CDEBUG(D_KML, "saving branch rename kml\n"); + rec.is_kml = 1; + rec.size = kml_size; + error = presto_log(fset, &rec, kml_data, kml_size, + NULL, 0, NULL, 0, NULL, 0); + if (error == 0) + error = presto_write_last_rcvd(&rec, fset, info); + } + + EXIT; + return error; +} + +int branch_reinter(struct presto_file_set *fset, struct kml_rec *rec, + struct file *dir, struct lento_vfs_context *info, + char * kml_data, __u64 kml_size) +{ + int error = 0; + int op = rec->prefix.hdr->opcode; + + if (op == KML_OPCODE_CLOSE) { + /* regular close and backfetch */ + error = reint_close(rec, dir, info); + } else if (op == KML_OPCODE_RENAME) { + /* rename only if name already exists */ + error = branch_reint_rename(fset, rec, dir, info, + kml_data, kml_size); + } else { + /* just rewrite kml into branch/kml and update last_rcvd */ + struct rec_info rec; + + CDEBUG(D_KML, "Saving branch kml\n"); + rec.is_kml = 1; + rec.size = kml_size; + error = presto_log(fset, &rec, kml_data, kml_size, + NULL, 0, NULL, 0, NULL, 0); + if (error == 0) + error = presto_write_last_rcvd(&rec, fset, info); + } + + return error; +} + +typedef int (*reinter_t)(struct kml_rec *rec, struct file *basedir, + struct lento_vfs_context *info); + +static reinter_t presto_reinters[KML_OPCODE_NUM] = +{ + [KML_OPCODE_CLOSE] = reint_close, + [KML_OPCODE_CREATE] = reint_create, + [KML_OPCODE_LINK] = reint_link, + [KML_OPCODE_MKDIR] = reint_mkdir, + [KML_OPCODE_MKNOD] = reint_mknod, + [KML_OPCODE_NOOP] = reint_noop, + [KML_OPCODE_RENAME] = reint_rename, + [KML_OPCODE_RMDIR] = reint_rmdir, + [KML_OPCODE_SETATTR] = reint_setattr, + [KML_OPCODE_SYMLINK] = reint_symlink, + [KML_OPCODE_UNLINK] = reint_unlink, +}; + +static inline reinter_t get_reinter(int op) +{ + if (op < 0 || op >= sizeof(presto_reinters) / sizeof(reinter_t)) + return NULL; + else + return presto_reinters[op]; +} + +int kml_reint_rec(struct file *dir, struct izo_ioctl_data *data) +{ + char *ptr; + char *end; + struct kml_rec rec; + int error = 0; + struct lento_vfs_context info; + struct presto_cache *cache; + struct presto_file_set *fset; + struct presto_dentry_data *dd = presto_d2d(dir->f_dentry); + int op; + reinter_t reinter; + + struct izo_rcvd_rec lr_rec; + int off; + + ENTRY; + + error = presto_prep(dir->f_dentry, &cache, &fset); + if ( error ) { + CERROR("intermezzo: Reintegration on invalid file\n"); + return error; + } + + if (!dd || !dd->dd_fset || dd->dd_fset->fset_dentry != dir->f_dentry) { + CERROR("intermezzo: reintegration on non-fset root (ino %ld)\n", + dir->f_dentry->d_inode->i_ino); + + return -EINVAL; + } + + if (data->ioc_plen1 > 64 * 1024) { + EXIT; + return -ENOSPC; + } + + ptr = fset->fset_reint_buf; + end = ptr + data->ioc_plen1; + + if (copy_from_user(ptr, data->ioc_pbuf1, data->ioc_plen1)) { + EXIT; + error = -EFAULT; + goto out; + } + + error = kml_unpack(&rec, &ptr, end); + if (error) { + EXIT; + error = -EFAULT; + goto out; + } + + off = izo_rcvd_get(&lr_rec, fset, data->ioc_uuid); + if (off < 0) { + CERROR("No last_rcvd record, setting to 0\n"); + memset(&lr_rec, 0, sizeof(lr_rec)); + } + + data->ioc_kmlsize = ptr - fset->fset_reint_buf; + + if (rec.suffix->recno != lr_rec.lr_remote_recno + 1) { + CERROR("KML record number %Lu expected, not %d\n", + (unsigned long long) (lr_rec.lr_remote_recno + 1), + rec.suffix->recno); + +#if 0 + if (!version_check(&rec, dd->dd_fset, &info)) { + /* FIXME: do an upcall to resolve conflicts */ + CERROR("intermezzo: would be a conflict!\n"); + error = -EINVAL; + EXIT; + goto out; + } +#endif + } + + op = rec.prefix.hdr->opcode; + + reinter = get_reinter(op); + if (!reinter) { + CERROR("%s: Unrecognized KML opcode %d\n", __FUNCTION__, op); + error = -EINVAL; + EXIT; + goto out; + } + + info.kml_offset = data->ioc_offset + data->ioc_kmlsize; + info.recno = rec.suffix->recno; + info.flags = LENTO_FL_EXPECT; + if (data->ioc_flags) + info.flags |= LENTO_FL_KML; + + memcpy(info.uuid, data->ioc_uuid, sizeof(info.uuid)); + + if (fset->fset_flags & FSET_IS_BRANCH && data->ioc_flags) + error = branch_reinter(fset, &rec, dir, &info, fset->fset_reint_buf, + data->ioc_kmlsize); + else + error = reinter(&rec, dir, &info); + out: + EXIT; + return error; +} + +int izo_get_fileid(struct file *dir, struct izo_ioctl_data *data) +{ + char *buf = NULL; + char *ptr; + char *end; + struct kml_rec rec; + struct file *file; + struct presto_cache *cache; + struct presto_file_set *fset; + struct presto_dentry_data *dd = presto_d2d(dir->f_dentry); + struct run_ctxt saved_ctxt; + int error; + + ENTRY; + + error = presto_prep(dir->f_dentry, &cache, &fset); + if ( error ) { + CERROR("intermezzo: Reintegration on invalid file\n"); + return error; + } + + if (!dd || !dd->dd_fset || dd->dd_fset->fset_dentry != dir->f_dentry) { + CERROR("intermezzo: reintegration on non-fset root (ino %ld)\n", + dir->f_dentry->d_inode->i_ino); + + return -EINVAL; + } + + + PRESTO_ALLOC(buf, data->ioc_plen1); + if (!buf) { + EXIT; + return -ENOMEM; + } + ptr = buf; + end = buf + data->ioc_plen1; + + if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) { + EXIT; + PRESTO_FREE(buf, data->ioc_plen1); + return -EFAULT; + } + + error = kml_unpack(&rec, &ptr, end); + if (error) { + EXIT; + PRESTO_FREE(buf, data->ioc_plen1); + return -EFAULT; + } + + kmlreint_pre_secure(&rec, dir, &saved_ctxt); + + file = filp_open(rec.path, O_RDONLY, 0); + if (!file || IS_ERR(file)) { + error = PTR_ERR(file); + goto out; + } + data->ioc_ino = file->f_dentry->d_inode->i_ino; + data->ioc_generation = file->f_dentry->d_inode->i_generation; + filp_close(file, 0); + + CDEBUG(D_FILE, "%s ino %Lx, gen %Lx\n", rec.path, + (unsigned long long) data->ioc_ino, + (unsigned long long) data->ioc_generation); + + out: + if (buf) + PRESTO_FREE(buf, data->ioc_plen1); + pop_ctxt(&saved_ctxt); + EXIT; + return error; +} + + diff --git a/fs/intermezzo/kml_setup.c b/fs/intermezzo/kml_setup.c new file mode 100644 index 000000000..8a017180f --- /dev/null +++ b/fs/intermezzo/kml_setup.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_upcall.h" +#include "intermezzo_psdev.h" +#include "intermezzo_kml.h" + +int kml_init (struct presto_file_set *fset) +{ + struct kml_fsdata *data; + + ENTRY; + PRESTO_ALLOC (data, struct kml_fsdata *, sizeof (struct kml_fsdata)); + if (data == NULL) { + EXIT; + return -ENOMEM; + } + INIT_LIST_HEAD (&data->kml_reint_cache); + INIT_LIST_HEAD (&data->kml_kop_cache); + + PRESTO_ALLOC (data->kml_buf, char *, KML_REINT_MAXBUF); + if (data->kml_buf == NULL) { + PRESTO_FREE (data, sizeof (struct kml_fsdata)); + EXIT; + return -ENOMEM; + } + + data->kml_maxsize = KML_REINT_MAXBUF; + data->kml_len = 0; + data->kml_reintpos = 0; + data->kml_count = 0; + fset->fset_kmldata = data; + EXIT; + return 0; +} + +int kml_cleanup (struct presto_file_set *fset) +{ + struct kml_fsdata *data = fset->fset_kmldata; + + if (data == NULL) + return 0; + + fset->fset_kmldata = NULL; +#if 0 + kml_sop_cleanup (&data->kml_reint_cache); + kml_kop_cleanup (&data->kml_kop_cache); +#endif + PRESTO_FREE (data->kml_buf, KML_REINT_MAXBUF); + PRESTO_FREE (data, sizeof (struct kml_fsdata)); + return 0; +} + + diff --git a/fs/intermezzo/kml_unpack.c b/fs/intermezzo/kml_unpack.c new file mode 100644 index 000000000..d12a346b3 --- /dev/null +++ b/fs/intermezzo/kml_unpack.c @@ -0,0 +1,712 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Unpacking of KML records + * + */ + +#ifdef __KERNEL__ +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +#else +# include +# include +# include +# include +# include +# include +# include +#endif + +#include "intermezzo_lib.h" +#include "intermezzo_idl.h" +#include "intermezzo_fs.h" + +int kml_unpack_version(struct presto_version **ver, char **buf, char *end) +{ + char *ptr = *buf; + struct presto_version *pv; + + UNLOGP(*ver, struct presto_version, ptr, end); + pv = *ver; + pv->pv_mtime_sec = NTOH__u32(pv->pv_mtime_sec); + pv->pv_mtime_nsec = NTOH__u32(pv->pv_mtime_nsec); + pv->pv_ctime_sec = NTOH__u32(pv->pv_ctime_sec); + pv->pv_ctime_nsec = NTOH__u32(pv->pv_ctime_nsec); + pv->pv_size = NTOH__u64(pv->pv_size); + + *buf = ptr; + + return 0; +} + + +static int kml_unpack_noop(struct kml_rec *rec, char **buf, char *end) +{ + return 0; +} + + +static int kml_unpack_get_fileid(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + LUNLOGV(rec->pathlen, __u32, ptr, end); + UNLOGL(rec->path, char, rec->pathlen, ptr, end); + + *buf = ptr; + return 0; +} + +static int kml_unpack_create(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + kml_unpack_version(&rec->old_parentv, &ptr, end); + kml_unpack_version(&rec->new_parentv, &ptr, end); + kml_unpack_version(&rec->new_objectv, &ptr, end); + LUNLOGV(rec->mode, __u32, ptr, end); + LUNLOGV(rec->uid, __u32, ptr, end); + LUNLOGV(rec->gid, __u32, ptr, end); + LUNLOGV(rec->pathlen, __u32, ptr, end); + UNLOGL(rec->path, char, rec->pathlen, ptr, end); + + *buf = ptr; + + return 0; +} + + +static int kml_unpack_mkdir(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + kml_unpack_version(&rec->old_parentv, &ptr, end); + kml_unpack_version(&rec->new_parentv, &ptr, end); + kml_unpack_version(&rec->new_objectv, &ptr, end); + LUNLOGV(rec->mode, __u32, ptr, end); + LUNLOGV(rec->uid, __u32, ptr, end); + LUNLOGV(rec->gid, __u32, ptr, end); + LUNLOGV(rec->pathlen, __u32, ptr, end); + UNLOGL(rec->path, char, rec->pathlen, ptr, end); + + *buf = ptr; + + return 0; +} + + +static int kml_unpack_unlink(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + kml_unpack_version(&rec->old_parentv, &ptr, end); + kml_unpack_version(&rec->new_parentv, &ptr, end); + kml_unpack_version(&rec->old_objectv, &ptr, end); + LUNLOGV(rec->old_mode, __u32, ptr, end); + LUNLOGV(rec->old_rdev, __u32, ptr, end); + LUNLOGV(rec->old_uid, __u64, ptr, end); + LUNLOGV(rec->old_gid, __u64, ptr, end); + LUNLOGV(rec->pathlen, __u32, ptr, end); + LUNLOGV(rec->targetlen, __u32, ptr, end); + LUNLOGV(rec->old_targetlen, __u32, ptr, end); + UNLOGL(rec->path, char, rec->pathlen, ptr, end); + UNLOGL(rec->target, char, rec->targetlen, ptr, end); + UNLOGL(rec->old_target, char, rec->old_targetlen, ptr, end); + + *buf = ptr; + + return 0; +} + + +static int kml_unpack_rmdir(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + kml_unpack_version(&rec->old_parentv, &ptr, end); + kml_unpack_version(&rec->new_parentv, &ptr, end); + kml_unpack_version(&rec->old_objectv, &ptr, end); + LUNLOGV(rec->old_mode, __u32, ptr, end); + LUNLOGV(rec->old_rdev, __u32, ptr, end); + LUNLOGV(rec->old_uid, __u64, ptr, end); + LUNLOGV(rec->old_gid, __u64, ptr, end); + LUNLOGV(rec->pathlen, __u32, ptr, end); + LUNLOGV(rec->targetlen, __u32, ptr, end); + UNLOGL(rec->path, char, rec->pathlen, ptr, end); + UNLOGL(rec->target, char, rec->targetlen, ptr, end); + + *buf = ptr; + + return 0; +} + + +static int kml_unpack_close(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + LUNLOGV(rec->mode, __u32, ptr, end); // used for open_mode + LUNLOGV(rec->uid, __u32, ptr, end); // used for open_uid + LUNLOGV(rec->gid, __u32, ptr, end); // used for open_gid + kml_unpack_version(&rec->old_objectv, &ptr, end); + kml_unpack_version(&rec->new_objectv, &ptr, end); + LUNLOGV(rec->ino, __u64, ptr, end); + LUNLOGV(rec->generation, __u32, ptr, end); + LUNLOGV(rec->pathlen, __u32, ptr, end); + UNLOGL(rec->path, char, rec->pathlen, ptr, end); + + *buf = ptr; + + return 0; +} + + +static int kml_unpack_symlink(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + kml_unpack_version(&rec->old_parentv, &ptr, end); + kml_unpack_version(&rec->new_parentv, &ptr, end); + kml_unpack_version(&rec->new_objectv, &ptr, end); + LUNLOGV(rec->uid, __u32, ptr, end); + LUNLOGV(rec->gid, __u32, ptr, end); + LUNLOGV(rec->pathlen, __u32, ptr, end); + LUNLOGV(rec->targetlen, __u32, ptr, end); + UNLOGL(rec->path, char, rec->pathlen, ptr, end); + UNLOGL(rec->target, char, rec->targetlen, ptr, end); + + *buf = ptr; + + return 0; +} + + +static int kml_unpack_rename(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + kml_unpack_version(&rec->old_objectv, &ptr, end); + kml_unpack_version(&rec->new_objectv, &ptr, end); + kml_unpack_version(&rec->old_parentv, &ptr, end); + kml_unpack_version(&rec->new_parentv, &ptr, end); + LUNLOGV(rec->pathlen, __u32, ptr, end); + LUNLOGV(rec->targetlen, __u32, ptr, end); + UNLOGL(rec->path, char, rec->pathlen, ptr, end); + UNLOGL(rec->target, char, rec->targetlen, ptr, end); + + *buf = ptr; + + return 0; +} + + +static int kml_unpack_setattr(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + kml_unpack_version(&rec->old_objectv, &ptr, end); + LUNLOGV(rec->valid, __u32, ptr, end); + LUNLOGV(rec->mode, __u32, ptr, end); + LUNLOGV(rec->uid, __u32, ptr, end); + LUNLOGV(rec->gid, __u32, ptr, end); + LUNLOGV(rec->size, __u64, ptr, end); + LUNLOGV(rec->mtime_sec, __u32, ptr, end); + LUNLOGV(rec->mtime_nsec, __u32, ptr, end); + LUNLOGV(rec->ctime_sec, __u32, ptr, end); + LUNLOGV(rec->ctime_nsec, __u32, ptr, end); + LUNLOGV(rec->flags, __u32, ptr, end); + LUNLOGV(rec->old_mode, __u32, ptr, end); + LUNLOGV(rec->old_rdev, __u32, ptr, end); + LUNLOGV(rec->old_uid, __u64, ptr, end); + LUNLOGV(rec->old_gid, __u64, ptr, end); + LUNLOGV(rec->pathlen, __u32, ptr, end); + UNLOGL(rec->path, char, rec->pathlen, ptr, end); + + *buf = ptr; + + return 0; +} + + +static int kml_unpack_link(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + kml_unpack_version(&rec->old_parentv, &ptr, end); + kml_unpack_version(&rec->new_parentv, &ptr, end); + kml_unpack_version(&rec->new_objectv, &ptr, end); + LUNLOGV(rec->pathlen, __u32, ptr, end); + LUNLOGV(rec->targetlen, __u32, ptr, end); + UNLOGL(rec->path, char, rec->pathlen, ptr, end); + UNLOGL(rec->target, char, rec->targetlen, ptr, end); + + *buf = ptr; + + return 0; +} + +static int kml_unpack_mknod(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + kml_unpack_version(&rec->old_parentv, &ptr, end); + kml_unpack_version(&rec->new_parentv, &ptr, end); + kml_unpack_version(&rec->new_objectv, &ptr, end); + LUNLOGV(rec->mode, __u32, ptr, end); + LUNLOGV(rec->uid, __u32, ptr, end); + LUNLOGV(rec->gid, __u32, ptr, end); + LUNLOGV(rec->major, __u32, ptr, end); + LUNLOGV(rec->minor, __u32, ptr, end); + LUNLOGV(rec->pathlen, __u32, ptr, end); + UNLOGL(rec->path, char, rec->pathlen, ptr, end); + + *buf = ptr; + + return 0; +} + + +static int kml_unpack_write(struct kml_rec *rec, char **buf, char *end) +{ + printf("NOT IMPLEMENTED"); + return 0; +} + + +static int kml_unpack_release(struct kml_rec *rec, char **buf, char *end) +{ + printf("NOT IMPLEMENTED"); + return 0; +} + + +static int kml_unpack_trunc(struct kml_rec *rec, char **buf, char *end) +{ + printf("NOT IMPLEMENTED"); + return 0; +} + + +static int kml_unpack_setextattr(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + kml_unpack_version(&rec->old_objectv, &ptr, end); + kml_unpack_version(&rec->new_objectv, &ptr, end); + LUNLOGV(rec->flags, __u32, ptr, end); + LUNLOGV(rec->mode, __u32, ptr, end); + LUNLOGV(rec->pathlen, __u32, ptr, end); + LUNLOGV(rec->namelen, __u32, ptr, end); + LUNLOGV(rec->targetlen, __u32, ptr, end); + UNLOGL(rec->path, char, rec->pathlen, ptr, end); + UNLOGL(rec->name, char, rec->namelen, ptr, end); + UNLOGL(rec->target, char, rec->targetlen, ptr, end); + + *buf = ptr; + + return 0; +} + + +static int kml_unpack_delextattr(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + kml_unpack_version(&rec->old_objectv, &ptr, end); + kml_unpack_version(&rec->new_objectv, &ptr, end); + LUNLOGV(rec->flags, __u32, ptr, end); + LUNLOGV(rec->mode, __u32, ptr, end); + LUNLOGV(rec->pathlen, __u32, ptr, end); + LUNLOGV(rec->namelen, __u32, ptr, end); + LUNLOGV(rec->targetlen, __u32, ptr, end); + UNLOGL(rec->path, char, rec->pathlen, ptr, end); + UNLOGL(rec->name, char, rec->namelen, ptr, end); + + *buf = ptr; + + return 0; +} + +static int kml_unpack_open(struct kml_rec *rec, char **buf, char *end) +{ + printf("NOT IMPLEMENTED"); + return 0; +} + +static int kml_unpack_kml_trunc(struct kml_rec *rec, char **buf, char *end) +{ + + printf("NOT IMPLEMENTED"); + return 0; +} + + +typedef int (*unpacker)(struct kml_rec *rec, char **buf, char *end); + +static unpacker unpackers[KML_OPCODE_NUM] = +{ + [KML_OPCODE_NOOP] = kml_unpack_noop, + [KML_OPCODE_CREATE] = kml_unpack_create, + [KML_OPCODE_MKDIR] = kml_unpack_mkdir, + [KML_OPCODE_UNLINK] = kml_unpack_unlink, + [KML_OPCODE_RMDIR] = kml_unpack_rmdir, + [KML_OPCODE_CLOSE] = kml_unpack_close, + [KML_OPCODE_SYMLINK] = kml_unpack_symlink, + [KML_OPCODE_RENAME] = kml_unpack_rename, + [KML_OPCODE_SETATTR] = kml_unpack_setattr, + [KML_OPCODE_LINK] = kml_unpack_link, + [KML_OPCODE_OPEN] = kml_unpack_open, + [KML_OPCODE_MKNOD] = kml_unpack_mknod, + [KML_OPCODE_WRITE] = kml_unpack_write, + [KML_OPCODE_RELEASE] = kml_unpack_release, + [KML_OPCODE_TRUNC] = kml_unpack_trunc, + [KML_OPCODE_SETEXTATTR] = kml_unpack_setextattr, + [KML_OPCODE_DELEXTATTR] = kml_unpack_delextattr, + [KML_OPCODE_KML_TRUNC] = kml_unpack_kml_trunc, + [KML_OPCODE_GET_FILEID] = kml_unpack_get_fileid +}; + +int kml_unpack_prefix(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + int n; + + UNLOGP(rec->prefix.hdr, struct kml_prefix_hdr, ptr, end); + rec->prefix.hdr->len = NTOH__u32(rec->prefix.hdr->len); + rec->prefix.hdr->version = NTOH__u32(rec->prefix.hdr->version); + rec->prefix.hdr->pid = NTOH__u32(rec->prefix.hdr->pid); + rec->prefix.hdr->auid = NTOH__u32(rec->prefix.hdr->auid); + rec->prefix.hdr->fsuid = NTOH__u32(rec->prefix.hdr->fsuid); + rec->prefix.hdr->fsgid = NTOH__u32(rec->prefix.hdr->fsgid); + rec->prefix.hdr->opcode = NTOH__u32(rec->prefix.hdr->opcode); + rec->prefix.hdr->ngroups = NTOH__u32(rec->prefix.hdr->ngroups); + + UNLOGL(rec->prefix.groups, __u32, rec->prefix.hdr->ngroups, ptr, end); + for (n = 0; n < rec->prefix.hdr->ngroups; n++) { + rec->prefix.groups[n] = NTOH__u32(rec->prefix.groups[n]); + } + + *buf = ptr; + + return 0; +} + +int kml_unpack_suffix(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + + UNLOGP(rec->suffix, struct kml_suffix, ptr, end); + rec->suffix->prevrec = NTOH__u32(rec->suffix->prevrec); + rec->suffix->recno = NTOH__u32(rec->suffix->recno); + rec->suffix->time = NTOH__u32(rec->suffix->time); + rec->suffix->len = NTOH__u32(rec->suffix->len); + + *buf = ptr; + + return 0; +} + +int kml_unpack(struct kml_rec *rec, char **buf, char *end) +{ + char *ptr = *buf; + int err; + + if (((unsigned long)ptr % 4) != 0) { + printf("InterMezzo: %s: record misaligned.\n", __FUNCTION__); + return -EINVAL; + } + + while (ptr < end) { + __u32 *i = (__u32 *)ptr; + if (*i) + break; + ptr += sizeof(*i); + } + *buf = ptr; + + memset(rec, 0, sizeof(*rec)); + + err = kml_unpack_prefix(rec, &ptr, end); + if (err) { + printf("InterMezzo: %s: unpack_prefix failed: %d\n", + __FUNCTION__, err); + return err; + } + + if (rec->prefix.hdr->opcode < 0 || + rec->prefix.hdr->opcode >= KML_OPCODE_NUM) { + printf("InterMezzo: %s: invalid opcode (%d)\n", + __FUNCTION__, rec->prefix.hdr->opcode); + return -EINVAL; + } + err = unpackers[rec->prefix.hdr->opcode](rec, &ptr, end); + if (err) { + printf("InterMezzo: %s: unpacker failed: %d\n", + __FUNCTION__, err); + return err; + } + + err = kml_unpack_suffix(rec, &ptr, end); + if (err) { + printf("InterMezzo: %s: unpack_suffix failed: %d\n", + __FUNCTION__, err); + return err; + } + + + if (rec->prefix.hdr->len != rec->suffix->len) { + printf("InterMezzo: %s: lengths don't match\n", + __FUNCTION__); + return -EINVAL; + } + if ((rec->prefix.hdr->len % 4) != 0) { + printf("InterMezzo: %s: record length not a " + "multiple of 4.\n", __FUNCTION__); + return -EINVAL; + } + if (ptr - *buf != rec->prefix.hdr->len) { + printf("InterMezzo: %s: unpacking error\n", + __FUNCTION__); + return -EINVAL; + } + while (ptr < end) { + __u32 *i = (__u32 *)ptr; + if (*i) + break; + ptr += sizeof(*i); + } + *buf = ptr; + return 0; +} + + +#ifndef __KERNEL__ +#define STR(ptr) ((ptr))? (ptr) : "" + +#define OPNAME(n) [KML_OPCODE_##n] = #n +static char *opnames[KML_OPCODE_NUM] = { + OPNAME(NOOP), + OPNAME(CREATE), + OPNAME(MKDIR), + OPNAME(UNLINK), + OPNAME(RMDIR), + OPNAME(CLOSE), + OPNAME(SYMLINK), + OPNAME(RENAME), + OPNAME(SETATTR), + OPNAME(LINK), + OPNAME(OPEN), + OPNAME(MKNOD), + OPNAME(WRITE), + OPNAME(RELEASE), + OPNAME(TRUNC), + OPNAME(SETEXTATTR), + OPNAME(DELEXTATTR), + OPNAME(KML_TRUNC), + OPNAME(GET_FILEID) +}; +#undef OPNAME + +static char *print_opname(int op) +{ + if (op < 0 || op >= sizeof (opnames) / sizeof (*opnames)) + return NULL; + return opnames[op]; +} + + +static char *print_time(__u64 i) +{ + char buf[128]; + + memset(buf, 0, 128); + +#ifndef __KERNEL__ + strftime(buf, 128, "%Y/%m/%d %H:%M:%S", gmtime((time_t *)&i)); +#else + sprintf(buf, "%Ld\n", i); +#endif + + return strdup(buf); +} + +static char *print_version(struct presto_version *ver) +{ + char ver_buf[128]; + char *mtime; + char *ctime; + + if (!ver || ver->pv_ctime == 0) { + return strdup(""); + } + mtime = print_time(ver->pv_mtime); + ctime = print_time(ver->pv_ctime); + sprintf(ver_buf, "mtime %s, ctime %s, len %lld", + mtime, ctime, ver->pv_size); + free(mtime); + free(ctime); + return strdup(ver_buf); +} + + +char *kml_print_rec(struct kml_rec *rec, int brief) +{ + char *str; + char *nov, *oov, *ntv, *otv, *npv, *opv; + char *rectime, *mtime, *ctime; + + if (brief) { + str = g_strdup_printf(" %08d %7s %*s %*s", + rec->suffix->recno, + print_opname (rec->prefix.hdr->opcode), + rec->pathlen, STR(rec->path), + rec->targetlen, STR(rec->target)); + + return str; + } + + rectime = print_time(rec->suffix->time); + mtime = print_time(rec->mtime); + ctime = print_time(rec->ctime); + + nov = print_version(rec->new_objectv); + oov = print_version(rec->old_objectv); + ntv = print_version(rec->new_targetv); + otv = print_version(rec->old_targetv); + npv = print_version(rec->new_parentv); + opv = print_version(rec->old_parentv); + + str = g_strdup_printf("\n -- Record:\n" + " Recno %d\n" + " KML off %lld\n" + " Version %d\n" + " Len %d\n" + " Suf len %d\n" + " Time %s\n" + " Opcode %d\n" + " Op %s\n" + " Pid %d\n" + " AUid %d\n" + " Fsuid %d\n" + " Fsgid %d\n" + " Prevrec %d\n" + " Ngroups %d\n" + //" Groups @{$self->{groups}}\n" + " -- Path:\n" + " Inode %d\n" + " Gen num %u\n" + " Old mode %o\n" + " Old rdev %x\n" + " Old uid %llu\n" + " Old gid %llu\n" + " Path %*s\n" + //" Open_mode %o\n", + " Pathlen %d\n" + " Tgt %*s\n" + " Tgtlen %d\n" + " Old Tgt %*s\n" + " Old Tgtln %d\n" + " -- Attr:\n" + " Valid %x\n" + " mode %o, uid %d, gid %d, size %lld, mtime %s, ctime %s rdev %x (%d:%d)\n" + " -- Versions:\n" + " New object %s\n" + " Old object %s\n" + " New target %s\n" + " Old target %s\n" + " New parent %s\n" + " Old parent %s\n", + + rec->suffix->recno, + rec->offset, + rec->prefix.hdr->version, + rec->prefix.hdr->len, + rec->suffix->len, + rectime, + rec->prefix.hdr->opcode, + print_opname (rec->prefix.hdr->opcode), + rec->prefix.hdr->pid, + rec->prefix.hdr->auid, + rec->prefix.hdr->fsuid, + rec->prefix.hdr->fsgid, + rec->suffix->prevrec, + rec->prefix.hdr->ngroups, + rec->ino, + rec->generation, + rec->old_mode, + rec->old_rdev, + rec->old_uid, + rec->old_gid, + rec->pathlen, + STR(rec->path), + rec->pathlen, + rec->targetlen, + STR(rec->target), + rec->targetlen, + rec->old_targetlen, + STR(rec->old_target), + rec->old_targetlen, + + rec->valid, + rec->mode, + rec->uid, + rec->gid, + rec->size, + mtime, + ctime, + rec->rdev, rec->major, rec->minor, + nov, oov, ntv, otv, npv, opv); + + free(nov); + free(oov); + free(ntv); + free(otv); + free(npv); + free(opv); + + free(rectime); + free(ctime); + free(mtime); + + return str; +} +#endif diff --git a/fs/intermezzo/kml_utils.c b/fs/intermezzo/kml_utils.c new file mode 100644 index 000000000..5062e2d71 --- /dev/null +++ b/fs/intermezzo/kml_utils.c @@ -0,0 +1,43 @@ +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_kml.h" + + +// dlogit -- oppsite to logit () +// return the sbuf + size; +char *dlogit (void *tbuf, const void *sbuf, int size) +{ + char *ptr = (char *)sbuf; + memcpy(tbuf, ptr, size); + ptr += size; + return ptr; +} + +static spinlock_t kml_lock = SPIN_LOCK_UNLOCKED; +static char buf[1024]; +char * bdup_printf (char *format, ...) +{ + va_list args; + int i; + char *path; + unsigned long flags; + + spin_lock_irqsave(&kml_lock, flags); + va_start(args, format); + i = vsprintf(buf, format, args); /* hopefully i < sizeof(buf) */ + va_end(args); + + PRESTO_ALLOC (path, char *, i + 1); + if (path == NULL) + return NULL; + strcpy (path, buf); + + spin_unlock_irqrestore(&kml_lock, flags); + return path; +} + + diff --git a/fs/intermezzo/methods.c b/fs/intermezzo/methods.c new file mode 100644 index 000000000..8950efc8c --- /dev/null +++ b/fs/intermezzo/methods.c @@ -0,0 +1,493 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * Copyright (C) 2000 Mountain View Data, Inc. + * + * Extended Attribute Support + * Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "intermezzo_fs.h" + + +int filter_print_entry = 0; +int filter_debug = 0xfffffff; +/* + * The function in this file are responsible for setting up the + * correct methods layered file systems like InterMezzo and snapfs + */ + + +static struct filter_fs filter_oppar[FILTER_FS_TYPES]; + +/* get to the upper methods (intermezzo, snapfs) */ +inline struct super_operations *filter_c2usops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_sops; +} + +inline struct inode_operations *filter_c2udiops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_dir_iops; +} + + +inline struct inode_operations *filter_c2ufiops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_file_iops; +} + +inline struct inode_operations *filter_c2usiops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_sym_iops; +} + + +inline struct file_operations *filter_c2udfops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_dir_fops; +} + +inline struct file_operations *filter_c2uffops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_file_fops; +} + +inline struct file_operations *filter_c2usfops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_sym_fops; +} + +inline struct dentry_operations *filter_c2udops(struct filter_fs *cache) +{ + return &cache->o_fops.filter_dentry_ops; +} + +/* get to the cache (lower) methods */ +inline struct super_operations *filter_c2csops(struct filter_fs *cache) +{ + return cache->o_caops.cache_sops; +} + +inline struct inode_operations *filter_c2cdiops(struct filter_fs *cache) +{ + return cache->o_caops.cache_dir_iops; +} + +inline struct inode_operations *filter_c2cfiops(struct filter_fs *cache) +{ + return cache->o_caops.cache_file_iops; +} + +inline struct inode_operations *filter_c2csiops(struct filter_fs *cache) +{ + return cache->o_caops.cache_sym_iops; +} + +inline struct file_operations *filter_c2cdfops(struct filter_fs *cache) +{ + return cache->o_caops.cache_dir_fops; +} + +inline struct file_operations *filter_c2cffops(struct filter_fs *cache) +{ + return cache->o_caops.cache_file_fops; +} + +inline struct file_operations *filter_c2csfops(struct filter_fs *cache) +{ + return cache->o_caops.cache_sym_fops; +} + +inline struct dentry_operations *filter_c2cdops(struct filter_fs *cache) +{ + return cache->o_caops.cache_dentry_ops; +} + + +void filter_setup_journal_ops(struct filter_fs *ops, char *cache_type) +{ + if ( strlen(cache_type) == strlen("ext2") && + memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) { +#ifdef CONFIG_EXT2_FS + ops->o_trops = &presto_ext2_journal_ops; +#else + ops->o_trops = NULL; +#endif + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("ext3") && + memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) { +#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE) + ops->o_trops = &presto_ext3_journal_ops; +#else + ops->o_trops = NULL; +#endif + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("tmpfs") && + memcmp(cache_type, "tmpfs", strlen("tmpfs")) == 0 ) { +#if defined(CONFIG_TMPFS) + ops->o_trops = &presto_tmpfs_journal_ops; +#else + ops->o_trops = NULL; +#endif + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("reiserfs") && + memcmp(cache_type, "reiserfs", strlen("reiserfs")) == 0 ) { +#if 0 + /* #if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE) */ + ops->o_trops = &presto_reiserfs_journal_ops; +#else + ops->o_trops = NULL; +#endif + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("xfs") && + memcmp(cache_type, "xfs", strlen("xfs")) == 0 ) { +#if 0 +/*#if defined(CONFIG_XFS_FS) || defined (CONFIG_XFS_FS_MODULE) */ + ops->o_trops = &presto_xfs_journal_ops; +#else + ops->o_trops = NULL; +#endif + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("obdfs") && + memcmp(cache_type, "obdfs", strlen("obdfs")) == 0 ) { +#if defined(CONFIG_OBDFS_FS) || defined (CONFIG_OBDFS_FS_MODULE) + ops->o_trops = presto_obdfs_journal_ops; +#else + ops->o_trops = NULL; +#endif + FDEBUG(D_SUPER, "ops at %p\n", ops); + } +} + + +/* find the cache for this FS */ +struct filter_fs *filter_get_filter_fs(const char *cache_type) +{ + struct filter_fs *ops = NULL; + FENTRY; + + if ( strlen(cache_type) == strlen("ext2") && + memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) { + ops = &filter_oppar[FILTER_FS_EXT2]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("xfs") && + memcmp(cache_type, "xfs", strlen("xfs")) == 0 ) { + ops = &filter_oppar[FILTER_FS_XFS]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("ext3") && + memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) { + ops = &filter_oppar[FILTER_FS_EXT3]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("tmpfs") && + memcmp(cache_type, "tmpfs", strlen("tmpfs")) == 0 ) { + ops = &filter_oppar[FILTER_FS_TMPFS]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if ( strlen(cache_type) == strlen("reiserfs") && + memcmp(cache_type, "reiserfs", strlen("reiserfs")) == 0 ) { + ops = &filter_oppar[FILTER_FS_REISERFS]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + if ( strlen(cache_type) == strlen("obdfs") && + memcmp(cache_type, "obdfs", strlen("obdfs")) == 0 ) { + ops = &filter_oppar[FILTER_FS_OBDFS]; + FDEBUG(D_SUPER, "ops at %p\n", ops); + } + + if (ops == NULL) { + CERROR("prepare to die: unrecognized cache type for Filter\n"); + } + FEXIT; + return ops; +} + + +/* + * Frobnicate the InterMezzo operations + * this establishes the link between the InterMezzo file system + * and the underlying file system used for the cache. + */ + +void filter_setup_super_ops(struct filter_fs *cache, struct super_operations *cache_sops, struct super_operations *filter_sops) +{ + /* Get ptr to the shared struct snapfs_ops structure. */ + struct filter_ops *props = &cache->o_fops; + /* Get ptr to the shared struct cache_ops structure. */ + struct cache_ops *caops = &cache->o_caops; + + FENTRY; + + if ( cache->o_flags & FILTER_DID_SUPER_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_SUPER_OPS; + + /* Set the cache superblock operations to point to the + superblock operations of the underlying file system. */ + caops->cache_sops = cache_sops; + + /* + * Copy the cache (real fs) superblock ops to the "filter" + * superblock ops as defaults. Some will be changed below + */ + memcpy(&props->filter_sops, cache_sops, sizeof(*cache_sops)); + + /* 'put_super' unconditionally is that of filter */ + if (filter_sops->put_super) { + props->filter_sops.put_super = filter_sops->put_super; + } + + if (cache_sops->read_inode) { + props->filter_sops.read_inode = filter_sops->read_inode; + FDEBUG(D_INODE, "setting filter_read_inode, cache_ops %p, cache %p, ri at %p\n", + cache, cache, props->filter_sops.read_inode); + } + + if (cache_sops->remount_fs) + props->filter_sops.remount_fs = filter_sops->remount_fs; + FEXIT; +} + + +void filter_setup_dir_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops) +{ + struct inode_operations *cache_filter_iops; + struct inode_operations *cache_iops = inode->i_op; + struct file_operations *cache_fops = inode->i_fop; + FENTRY; + + if ( cache->o_flags & FILTER_DID_DIR_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_DIR_OPS; + + /* former ops become cache_ops */ + cache->o_caops.cache_dir_iops = cache_iops; + cache->o_caops.cache_dir_fops = cache_fops; + FDEBUG(D_SUPER, "filter at %p, cache iops %p, iops %p\n", + cache, cache_iops, filter_c2udiops(cache)); + + /* setup our dir iops: copy and modify */ + memcpy(filter_c2udiops(cache), cache_iops, sizeof(*cache_iops)); + + /* abbreviate */ + cache_filter_iops = filter_c2udiops(cache); + + /* methods that filter if cache filesystem has these ops */ + if (cache_iops->lookup && filter_iops->lookup) + cache_filter_iops->lookup = filter_iops->lookup; + if (cache_iops->create && filter_iops->create) + cache_filter_iops->create = filter_iops->create; + if (cache_iops->link && filter_iops->link) + cache_filter_iops->link = filter_iops->link; + if (cache_iops->unlink && filter_iops->unlink) + cache_filter_iops->unlink = filter_iops->unlink; + if (cache_iops->mkdir && filter_iops->mkdir) + cache_filter_iops->mkdir = filter_iops->mkdir; + if (cache_iops->rmdir && filter_iops->rmdir) + cache_filter_iops->rmdir = filter_iops->rmdir; + if (cache_iops->symlink && filter_iops->symlink) + cache_filter_iops->symlink = filter_iops->symlink; + if (cache_iops->rename && filter_iops->rename) + cache_filter_iops->rename = filter_iops->rename; + if (cache_iops->mknod && filter_iops->mknod) + cache_filter_iops->mknod = filter_iops->mknod; + if (cache_iops->permission && filter_iops->permission) + cache_filter_iops->permission = filter_iops->permission; + if (cache_iops->getattr) + cache_filter_iops->getattr = filter_iops->getattr; + /* Some filesystems do not use a setattr method of their own + instead relying on inode_setattr/write_inode. We still need to + journal these so we make setattr an unconditional operation. + XXX: we should probably check for write_inode. SHP + */ + /*if (cache_iops->setattr)*/ + cache_filter_iops->setattr = filter_iops->setattr; +#ifdef CONFIG_FS_EXT_ATTR + /* For now we assume that posix acls are handled through extended + * attributes. If this is not the case, we must explicitly trap + * posix_set_acl. SHP + */ + if (cache_iops->set_ext_attr && filter_iops->set_ext_attr) + cache_filter_iops->set_ext_attr = filter_iops->set_ext_attr; +#endif + + + /* copy dir fops */ + memcpy(filter_c2udfops(cache), cache_fops, sizeof(*cache_fops)); + + /* unconditional filtering operations */ + filter_c2udfops(cache)->ioctl = filter_fops->ioctl; + + FEXIT; +} + + +void filter_setup_file_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops) +{ + struct inode_operations *pr_iops; + struct inode_operations *cache_iops = inode->i_op; + struct file_operations *cache_fops = inode->i_fop; + FENTRY; + + if ( cache->o_flags & FILTER_DID_FILE_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_FILE_OPS; + + /* steal the old ops */ + /* former ops become cache_ops */ + cache->o_caops.cache_file_iops = cache_iops; + cache->o_caops.cache_file_fops = cache_fops; + + /* abbreviate */ + pr_iops = filter_c2ufiops(cache); + + /* setup our dir iops: copy and modify */ + memcpy(pr_iops, cache_iops, sizeof(*cache_iops)); + + /* copy dir fops */ + CERROR("*** cache file ops at %p\n", cache_fops); + memcpy(filter_c2uffops(cache), cache_fops, sizeof(*cache_fops)); + + /* assign */ + /* See comments above in filter_setup_dir_ops. SHP */ + /*if (cache_iops->setattr)*/ + pr_iops->setattr = filter_iops->setattr; + if (cache_iops->getattr) + pr_iops->getattr = filter_iops->getattr; + /* XXX Should this be conditional rmr ? */ + pr_iops->permission = filter_iops->permission; +#ifdef CONFIG_FS_EXT_ATTR + /* For now we assume that posix acls are handled through extended + * attributes. If this is not the case, we must explicitly trap and + * posix_set_acl + */ + if (cache_iops->set_ext_attr && filter_iops->set_ext_attr) + pr_iops->set_ext_attr = filter_iops->set_ext_attr; +#endif + + + /* unconditional filtering operations */ + filter_c2uffops(cache)->open = filter_fops->open; + filter_c2uffops(cache)->release = filter_fops->release; + filter_c2uffops(cache)->write = filter_fops->write; + filter_c2uffops(cache)->ioctl = filter_fops->ioctl; + + FEXIT; +} + +/* XXX in 2.3 there are "fast" and "slow" symlink ops for ext2 XXX */ +void filter_setup_symlink_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops) +{ + struct inode_operations *pr_iops; + struct inode_operations *cache_iops = inode->i_op; + struct file_operations *cache_fops = inode->i_fop; + FENTRY; + + if ( cache->o_flags & FILTER_DID_SYMLINK_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_SYMLINK_OPS; + + /* steal the old ops */ + cache->o_caops.cache_sym_iops = cache_iops; + cache->o_caops.cache_sym_fops = cache_fops; + + /* abbreviate */ + pr_iops = filter_c2usiops(cache); + + /* setup our dir iops: copy and modify */ + memcpy(pr_iops, cache_iops, sizeof(*cache_iops)); + + /* See comments above in filter_setup_dir_ops. SHP */ + /* if (cache_iops->setattr) */ + pr_iops->setattr = filter_iops->setattr; + if (cache_iops->getattr) + pr_iops->getattr = filter_iops->getattr; + + /* assign */ + /* copy fops - careful for symlinks they might be NULL */ + if ( cache_fops ) { + memcpy(filter_c2usfops(cache), cache_fops, sizeof(*cache_fops)); + } + + FEXIT; +} + +void filter_setup_dentry_ops(struct filter_fs *cache, + struct dentry_operations *cache_dop, + struct dentry_operations *filter_dop) +{ + if ( cache->o_flags & FILTER_DID_DENTRY_OPS ) { + FEXIT; + return; + } + cache->o_flags |= FILTER_DID_DENTRY_OPS; + + cache->o_caops.cache_dentry_ops = cache_dop; + memcpy(&cache->o_fops.filter_dentry_ops, + filter_dop, sizeof(*filter_dop)); + + if (cache_dop && cache_dop != filter_dop && cache_dop->d_revalidate){ + CERROR("WARNING: filter overriding revalidation!\n"); + } + return; +} diff --git a/fs/intermezzo/presto.c b/fs/intermezzo/presto.c new file mode 100644 index 000000000..bf1603186 --- /dev/null +++ b/fs/intermezzo/presto.c @@ -0,0 +1,736 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Author: Peter J. Braam + * Copyright (C) 1998 Stelias Computing Inc + * Copyright (C) 1999 Red Hat Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * This file implements basic routines supporting the semantics + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +int presto_walk(const char *name, struct nameidata *nd) +{ + int err; + /* we do not follow symlinks to support symlink operations + correctly. The vfs should always hand us resolved dentries + so we should not be required to use LOOKUP_FOLLOW. At the + reintegrating end, lento again should be working with the + resolved pathname and not the symlink. SHP + XXX: This code implies that direct symlinks do not work. SHP + */ + unsigned int flags = 0; //LOOKUP_POSITIVE; + + ENTRY; + err = path_lookup(name, flags, nd); + return err; +} + + +/* find the presto minor device for this inode */ +int presto_i2m(struct inode *inode) +{ + struct presto_cache *cache; + ENTRY; + cache = presto_get_cache(inode); + CDEBUG(D_PSDEV, "\n"); + if ( !cache ) { + CERROR("PRESTO: BAD: cannot find cache for dev %s, ino %ld\n", + inode->i_sb->s_id, inode->i_ino); + EXIT; + return -1; + } + EXIT; + return cache->cache_psdev->uc_minor; +} + +inline int presto_f2m(struct presto_file_set *fset) +{ + return fset->fset_cache->cache_psdev->uc_minor; + +} + +inline int presto_c2m(struct presto_cache *cache) +{ + return cache->cache_psdev->uc_minor; + +} + +/* XXX check this out */ +struct presto_file_set *presto_path2fileset(const char *name) +{ + struct nameidata nd; + struct presto_file_set *fileset; + int error; + ENTRY; + + error = presto_walk(name, &nd); + if (!error) { +#if 0 + error = do_revalidate(nd.dentry); +#endif + if (!error) + fileset = presto_fset(nd.dentry); + path_release(&nd); + EXIT; + } else + fileset = ERR_PTR(error); + + EXIT; + return fileset; +} + +/* check a flag on this dentry or fset root. Semantics: + - most flags: test if it is set + - PRESTO_ATTR, PRESTO_DATA return 1 if PRESTO_FSETINSYNC is set +*/ +int presto_chk(struct dentry *dentry, int flag) +{ + int minor; + struct presto_file_set *fset = presto_fset(dentry); + + ENTRY; + minor = presto_i2m(dentry->d_inode); + if ( izo_channels[minor].uc_no_filter ) { + EXIT; + return ~0; + } + + /* if the fileset is in sync DATA and ATTR are OK */ + if ( fset && + (flag == PRESTO_ATTR || flag == PRESTO_DATA) && + (fset->fset_flags & FSET_INSYNC) ) { + CDEBUG(D_INODE, "fset in sync (ino %ld)!\n", + fset->fset_dentry->d_inode->i_ino); + EXIT; + return 1; + } + + EXIT; + return (presto_d2d(dentry)->dd_flags & flag); +} + +/* set a bit in the dentry flags */ +void presto_set(struct dentry *dentry, int flag) +{ + ENTRY; + if ( dentry->d_inode ) { + CDEBUG(D_INODE, "SET ino %ld, flag %x\n", + dentry->d_inode->i_ino, flag); + } + if ( presto_d2d(dentry) == NULL) { + CERROR("dentry without d_fsdata in presto_set: %p: %*s", dentry, + dentry->d_name.len, dentry->d_name.name); + BUG(); + } + presto_d2d(dentry)->dd_flags |= flag; + EXIT; +} + +/* given a path: complete the closes on the fset */ +int lento_complete_closes(char *path) +{ + struct nameidata nd; + struct dentry *dentry; + int error; + struct presto_file_set *fset; + ENTRY; + + error = presto_walk(path, &nd); + if (error) { + EXIT; + return error; + } + + dentry = nd.dentry; + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) { + EXIT; + goto out_complete; + } + + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + CERROR("No fileset!\n"); + EXIT; + goto out_complete; + } + + /* transactions and locking are internal to this function */ + error = presto_complete_lml(fset); + + EXIT; + out_complete: + path_release(&nd); + return error; +} + +#if 0 +/* given a path: write a close record and cancel an LML record, finally + call truncate LML. Lento is doing this so it goes in with uid/gid's + root. +*/ +int lento_cancel_lml(char *path, + __u64 lml_offset, + __u64 remote_ino, + __u32 remote_generation, + __u32 remote_version, + struct lento_vfs_context *info) +{ + struct nameidata nd; + struct rec_info rec; + struct dentry *dentry; + int error; + struct presto_file_set *fset; + void *handle; + struct presto_version new_ver; + ENTRY; + + + error = presto_walk(path, &nd); + if (error) { + EXIT; + return error; + } + dentry = nd.dentry; + + error = -ENXIO; + if ( !presto_ispresto(dentry->d_inode) ) { + EXIT; + goto out_cancel_lml; + } + + fset = presto_fset(dentry); + + error=-EINVAL; + if (fset==NULL) { + CERROR("No fileset!\n"); + EXIT; + goto out_cancel_lml; + } + + /* this only requires a transaction below which is automatic */ + handle = presto_trans_start(fset, dentry->d_inode, PRESTO_OP_RELEASE); + if ( IS_ERR(handle) ) { + error = -ENOMEM; + EXIT; + goto out_cancel_lml; + } + + if (info->flags & LENTO_FL_CANCEL_LML) { + error = presto_clear_lml_close(fset, lml_offset); + if ( error ) { + presto_trans_commit(fset, handle); + EXIT; + goto out_cancel_lml; + } + } + + + if (info->flags & LENTO_FL_WRITE_KML) { + presto_getversion(&new_ver, dentry->d_inode); + error = presto_journal_close(&rec, fset, NULL, dentry, + &new_ver); + if ( error ) { + EXIT; + presto_trans_commit(fset, handle); + goto out_cancel_lml; + } + } + + if (info->flags & LENTO_FL_WRITE_EXPECT) { + error = presto_write_last_rcvd(&rec, fset, info); + if ( error < 0 ) { + EXIT; + presto_trans_commit(fset, handle); + goto out_cancel_lml; + } + } + + presto_trans_commit(fset, handle); + + if (info->flags & LENTO_FL_CANCEL_LML) { + presto_truncate_lml(fset); + } + + + out_cancel_lml: + EXIT; + path_release(&nd); + return error; +} +#endif + +/* given a dentry, operate on the flags in its dentry. Used by downcalls */ +int izo_mark_dentry(struct dentry *dentry, int and_flag, int or_flag, + int *res) +{ + int error = 0; + + if (presto_d2d(dentry) == NULL) { + CERROR("InterMezzo: no ddata for inode %ld in %s\n", + dentry->d_inode->i_ino, __FUNCTION__); + return -EINVAL; + } + + CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n", + dentry->d_inode->i_ino, and_flag, or_flag, + presto_d2d(dentry)->dd_flags); + + presto_d2d(dentry)->dd_flags &= and_flag; + presto_d2d(dentry)->dd_flags |= or_flag; + if (res) + *res = presto_d2d(dentry)->dd_flags; + + return error; +} + +/* given a path, operate on the flags in its cache. Used by mark_ioctl */ +int izo_mark_cache(struct dentry *dentry, int and_flag, int or_flag, + int *res) +{ + struct presto_cache *cache; + + if (presto_d2d(dentry) == NULL) { + CERROR("InterMezzo: no ddata for inode %ld in %s\n", + dentry->d_inode->i_ino, __FUNCTION__); + return -EINVAL; + } + + CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n", + dentry->d_inode->i_ino, and_flag, or_flag, + presto_d2d(dentry)->dd_flags); + + cache = presto_get_cache(dentry->d_inode); + if ( !cache ) { + CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n"); + return -EBADF; + } + + cache->cache_flags &= and_flag; + cache->cache_flags |= or_flag; + if (res) + *res = (int)cache->cache_flags; + + return 0; +} + +int presto_set_max_kml_size(const char *path, unsigned long max_size) +{ + struct presto_file_set *fset; + + ENTRY; + + fset = presto_path2fileset(path); + if (IS_ERR(fset)) { + EXIT; + return PTR_ERR(fset); + } + + fset->kml_truncate_size = max_size; + CDEBUG(D_CACHE, "KML truncate size set to %lu bytes for fset %s.\n", + max_size, path); + + EXIT; + return 0; +} + +int izo_mark_fset(struct dentry *dentry, int and_flag, int or_flag, + int * res) +{ + struct presto_file_set *fset; + + fset = presto_fset(dentry); + if ( !fset ) { + CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n"); + make_bad_inode(dentry->d_inode); + return -EBADF; + } + fset->fset_flags &= and_flag; + fset->fset_flags |= or_flag; + if (res) + *res = (int)fset->fset_flags; + + return 0; +} + +/* talk to Lento about the permit */ +static int presto_permit_upcall(struct dentry *dentry) +{ + int rc; + char *path, *buffer; + int pathlen; + int minor; + int fsetnamelen; + struct presto_file_set *fset = NULL; + + ENTRY; + + if ( (minor = presto_i2m(dentry->d_inode)) < 0) { + EXIT; + return -EINVAL; + } + + fset = presto_fset(dentry); + if (!fset) { + EXIT; + return -ENOTCONN; + } + + if ( !presto_lento_up(minor) ) { + if ( fset->fset_flags & FSET_STEAL_PERMIT ) { + EXIT; + return 0; + } else { + EXIT; + return -ENOTCONN; + } + } + + PRESTO_ALLOC(buffer, PAGE_SIZE); + if ( !buffer ) { + CERROR("PRESTO: out of memory!\n"); + EXIT; + return -ENOMEM; + } + path = presto_path(dentry, fset->fset_dentry, buffer, PAGE_SIZE); + pathlen = MYPATHLEN(buffer, path); + fsetnamelen = strlen(fset->fset_name); + rc = izo_upc_permit(minor, dentry, pathlen, path, fset->fset_name); + PRESTO_FREE(buffer, PAGE_SIZE); + EXIT; + return rc; +} + +/* get a write permit for the fileset of this inode + * - if this returns a negative value there was an error + * - if 0 is returned the permit was already in the kernel -- or -- + * Lento gave us the permit without reintegration + * - lento returns the number of records it reintegrated + * + * Note that if this fileset has branches, a permit will -never- to a normal + * process for writing in the data area (ie, outside of .intermezzo) + */ +int presto_get_permit(struct inode * inode) +{ + struct dentry *de; + struct presto_file_set *fset; + int minor = presto_i2m(inode); + int rc = 0; + + ENTRY; + if (minor < 0) { + EXIT; + return -1; + } + + if ( ISLENTO(minor) ) { + EXIT; + return 0; + } + + if (list_empty(&inode->i_dentry)) { + CERROR("No alias for inode %d\n", (int) inode->i_ino); + EXIT; + return -EINVAL; + } + + de = list_entry(inode->i_dentry.next, struct dentry, d_alias); + + if (presto_chk(de, PRESTO_DONT_JOURNAL)) { + EXIT; + return 0; + } + + fset = presto_fset(de); + if ( !fset ) { + CERROR("Presto: no fileset in presto_get_permit!\n"); + EXIT; + return -EINVAL; + } + + if (fset->fset_flags & FSET_HAS_BRANCHES) { + EXIT; + return -EROFS; + } + + spin_lock(&fset->fset_permit_lock); + if (fset->fset_flags & FSET_HASPERMIT) { + fset->fset_permit_count++; + CDEBUG(D_INODE, "permit count now %d, inode %lx\n", + fset->fset_permit_count, inode->i_ino); + spin_unlock(&fset->fset_permit_lock); + EXIT; + return 0; + } + + /* Allow reintegration to proceed without locks -SHP */ + fset->fset_permit_upcall_count++; + if (fset->fset_permit_upcall_count == 1) { + spin_unlock(&fset->fset_permit_lock); + rc = presto_permit_upcall(fset->fset_dentry); + spin_lock(&fset->fset_permit_lock); + fset->fset_permit_upcall_count--; + if (rc == 0) { + izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT, + NULL); + fset->fset_permit_count++; + } else if (rc == ENOTCONN) { + CERROR("InterMezzo: disconnected operation. stealing permit.\n"); + izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT, + NULL); + fset->fset_permit_count++; + /* set a disconnected flag here to stop upcalls */ + rc = 0; + } else { + CERROR("InterMezzo: presto_permit_upcall failed: %d\n", rc); + rc = -EROFS; + /* go to sleep here and try again? */ + } + wake_up_interruptible(&fset->fset_permit_queue); + } else { + /* Someone is already doing an upcall; go to sleep. */ + DECLARE_WAITQUEUE(wait, current); + + spin_unlock(&fset->fset_permit_lock); + add_wait_queue(&fset->fset_permit_queue, &wait); + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + + spin_lock(&fset->fset_permit_lock); + if (fset->fset_permit_upcall_count == 0) + break; + spin_unlock(&fset->fset_permit_lock); + + if (signal_pending(current)) { + remove_wait_queue(&fset->fset_permit_queue, + &wait); + return -ERESTARTSYS; + } + schedule(); + } + remove_wait_queue(&fset->fset_permit_queue, &wait); + /* We've been woken up: do we have the permit? */ + if (fset->fset_flags & FSET_HASPERMIT) + /* FIXME: Is this the right thing? */ + rc = -EAGAIN; + } + + CDEBUG(D_INODE, "permit count now %d, ino %ld (likely 1), " + "rc %d\n", fset->fset_permit_count, inode->i_ino, rc); + spin_unlock(&fset->fset_permit_lock); + EXIT; + return rc; +} + +int presto_put_permit(struct inode * inode) +{ + struct dentry *de; + struct presto_file_set *fset; + int minor = presto_i2m(inode); + + ENTRY; + if (minor < 0) { + EXIT; + return -1; + } + + if ( ISLENTO(minor) ) { + EXIT; + return 0; + } + + if (list_empty(&inode->i_dentry)) { + CERROR("No alias for inode %d\n", (int) inode->i_ino); + EXIT; + return -1; + } + + de = list_entry(inode->i_dentry.next, struct dentry, d_alias); + + fset = presto_fset(de); + if ( !fset ) { + CERROR("InterMezzo: no fileset in %s!\n", __FUNCTION__); + EXIT; + return -1; + } + + if (presto_chk(de, PRESTO_DONT_JOURNAL)) { + EXIT; + return 0; + } + + spin_lock(&fset->fset_permit_lock); + if (fset->fset_flags & FSET_HASPERMIT) { + if (fset->fset_permit_count > 0) + fset->fset_permit_count--; + else + CERROR("Put permit while permit count is 0, " + "inode %ld!\n", inode->i_ino); + } else { + fset->fset_permit_count = 0; + CERROR("InterMezzo: put permit while no permit, inode %ld, " + "flags %x!\n", inode->i_ino, fset->fset_flags); + } + + CDEBUG(D_INODE, "permit count now %d, inode %ld\n", + fset->fset_permit_count, inode->i_ino); + + if (fset->fset_flags & FSET_PERMIT_WAITING && + fset->fset_permit_count == 0) { + CDEBUG(D_INODE, "permit count now 0, ino %ld, wake sleepers\n", + inode->i_ino); + wake_up_interruptible(&fset->fset_permit_queue); + } + spin_unlock(&fset->fset_permit_lock); + + EXIT; + return 0; +} + +void presto_getversion(struct presto_version * presto_version, + struct inode * inode) +{ + presto_version->pv_mtime_sec = inode->i_mtime.tv_sec; + presto_version->pv_mtime_nsec = inode->i_mtime.tv_nsec; + presto_version->pv_ctime_sec = inode->i_ctime.tv_sec; + presto_version->pv_ctime_nsec = inode->i_ctime.tv_nsec; + presto_version->pv_size = (__u64)inode->i_size; +} + + +/* If uuid is non-null, it is the uuid of the peer that's making the revocation + * request. If it is null, this request was made locally, without external + * pressure to give up the permit. This most often occurs when a client + * starts up. + * + * FIXME: this function needs to be refactored slightly once we start handling + * multiple clients. + */ +int izo_revoke_permit(struct dentry *dentry, __u8 uuid[16]) +{ + struct presto_file_set *fset; + DECLARE_WAITQUEUE(wait, current); + int minor, rc; + + ENTRY; + + minor = presto_i2m(dentry->d_inode); + if (minor < 0) { + EXIT; + return -ENODEV; + } + + fset = presto_fset(dentry); + if (fset == NULL) { + EXIT; + return -ENODEV; + } + + spin_lock(&fset->fset_permit_lock); + if (fset->fset_flags & FSET_PERMIT_WAITING) { + CERROR("InterMezzo: Two processes are waiting on the same permit--this not yet supported! Aborting this particular permit request...\n"); + EXIT; + spin_unlock(&fset->fset_permit_lock); + return -EINVAL; + } + + if (fset->fset_permit_count == 0) + goto got_permit; + + /* Something is still using this permit. Mark that we're waiting for it + * and go to sleep. */ + rc = izo_mark_fset(dentry, ~0, FSET_PERMIT_WAITING, NULL); + spin_unlock(&fset->fset_permit_lock); + if (rc < 0) { + EXIT; + return rc; + } + + add_wait_queue(&fset->fset_permit_queue, &wait); + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + + spin_lock(&fset->fset_permit_lock); + if (fset->fset_permit_count == 0) + break; + spin_unlock(&fset->fset_permit_lock); + + if (signal_pending(current)) { + /* FIXME: there must be a better thing to return... */ + remove_wait_queue(&fset->fset_permit_queue, &wait); + EXIT; + return -ERESTARTSYS; + } + + /* FIXME: maybe there should be a timeout here. */ + + schedule(); + } + + remove_wait_queue(&fset->fset_permit_queue, &wait); + got_permit: + /* By this point fset->fset_permit_count is zero and we're holding the + * lock. */ + CDEBUG(D_CACHE, "InterMezzo: releasing permit inode %ld\n", + dentry->d_inode->i_ino); + + if (uuid != NULL) { + rc = izo_upc_revoke_permit(minor, fset->fset_name, uuid); + if (rc < 0) { + spin_unlock(&fset->fset_permit_lock); + EXIT; + return rc; + } + } + + izo_mark_fset(fset->fset_dentry, ~FSET_PERMIT_WAITING, 0, NULL); + izo_mark_fset(fset->fset_dentry, ~FSET_HASPERMIT, 0, NULL); + spin_unlock(&fset->fset_permit_lock); + EXIT; + return 0; +} + +inline int presto_is_read_only(struct presto_file_set * fset) +{ + int minor, mask; + struct presto_cache *cache = fset->fset_cache; + + minor= cache->cache_psdev->uc_minor; + mask= (ISLENTO(minor)? FSET_LENTO_RO : FSET_CLIENT_RO); + if ( fset->fset_flags & mask ) + return 1; + mask= (ISLENTO(minor)? CACHE_LENTO_RO : CACHE_CLIENT_RO); + return ((cache->cache_flags & mask)? 1 : 0); +} diff --git a/fs/intermezzo/psdev.c b/fs/intermezzo/psdev.c new file mode 100644 index 000000000..40a85cc7e --- /dev/null +++ b/fs/intermezzo/psdev.c @@ -0,0 +1,647 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * An implementation of a loadable kernel mode driver providing + * multiple kernel/user space bidirectional communications links. + * + * Author: Alan Cox + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * Adapted to become the Linux 2.0 Coda pseudo device + * Peter Braam + * Michael Callahan + * + * Changes for Linux 2.1 + * Copyright (c) 1997 Carnegie-Mellon University + * + * Redone again for InterMezzo + * Copyright (c) 1998 Peter J. Braam + * Copyright (c) 2000 Mountain View Data, Inc. + * Copyright (c) 2000 Tacitus Systems, Inc. + * Copyright (c) 2001 Cluster File Systems, Inc. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + + +#ifdef PRESTO_DEVEL +int presto_print_entry = 1; +int presto_debug = 4095; +#else +int presto_print_entry = 0; +int presto_debug = 0; +#endif + +/* Like inode.c (presto_sym_iops), the initializer is just to prevent + izo_channels from appearing as a COMMON symbol (and therefore + interfering with other modules that use the same variable name). */ +struct upc_channel izo_channels[MAX_CHANNEL] = {{0}}; + +int izo_psdev_get_free_channel(void) +{ + int i, result = -1; + + for (i = 0 ; i < MAX_CHANNEL ; i++ ) { + if (list_empty(&(izo_channels[i].uc_cache_list))) { + result = i; + break; + } + } + return result; +} + + +int izo_psdev_setpid(int minor) +{ + struct upc_channel *channel; + if (minor < 0 || minor >= MAX_CHANNEL) { + return -EINVAL; + } + + channel = &(izo_channels[minor]); + /* + * This ioctl is performed by each Lento that starts up + * and wants to do further communication with presto. + */ + CDEBUG(D_PSDEV, "Setting current pid to %d channel %d\n", + current->pid, minor); + channel->uc_pid = current->pid; + spin_lock(&channel->uc_lock); + if ( !list_empty(&channel->uc_processing) ) { + struct list_head *lh; + struct upc_req *req; + CERROR("WARNING: setpid & processing not empty!\n"); + list_for_each(lh, &channel->uc_processing) { + req = list_entry(lh, struct upc_req, rq_chain); + /* freeing of req and data is done by the sleeper */ + wake_up(&req->rq_sleep); + } + } + if ( !list_empty(&channel->uc_processing) ) { + CERROR("BAD: FAILDED TO CLEAN PROCESSING LIST!\n"); + } + spin_unlock(&channel->uc_lock); + EXIT; + return 0; +} + +int izo_psdev_setchannel(struct file *file, int fd) +{ + + struct file *psdev_file = fget(fd); + struct presto_cache *cache = presto_get_cache(file->f_dentry->d_inode); + + if (!psdev_file) { + CERROR("%s: no psdev_file!\n", __FUNCTION__); + return -EINVAL; + } + + if (!cache) { + CERROR("%s: no cache!\n", __FUNCTION__); + fput(psdev_file); + return -EINVAL; + } + + if (psdev_file->private_data) { + CERROR("%s: channel already set!\n", __FUNCTION__); + fput(psdev_file); + return -EINVAL; + } + + psdev_file->private_data = cache->cache_psdev; + fput(psdev_file); + EXIT; + return 0; +} + +inline int presto_lento_up(int minor) +{ + return izo_channels[minor].uc_pid; +} + +static unsigned int presto_psdev_poll(struct file *file, poll_table * wait) + { + struct upc_channel *channel = (struct upc_channel *)file->private_data; + unsigned int mask = POLLOUT | POLLWRNORM; + + /* ENTRY; this will flood you */ + if ( ! channel ) { + CERROR("%s: bad psdev file\n", __FUNCTION__); + return -EBADF; + } + + poll_wait(file, &(channel->uc_waitq), wait); + + spin_lock(&channel->uc_lock); + if (!list_empty(&channel->uc_pending)) { + CDEBUG(D_PSDEV, "Non-empty pending list.\n"); + mask |= POLLIN | POLLRDNORM; + } + spin_unlock(&channel->uc_lock); + + /* EXIT; will flood you */ + return mask; +} + +/* + * Receive a message written by Lento to the psdev + */ +static ssize_t presto_psdev_write(struct file *file, const char *buf, + size_t count, loff_t *off) +{ + struct upc_channel *channel = (struct upc_channel *)file->private_data; + struct upc_req *req = NULL; + struct upc_req *tmp; + struct list_head *lh; + struct izo_upcall_resp hdr; + int error; + + if ( ! channel ) { + CERROR("%s: bad psdev file\n", __FUNCTION__); + return -EBADF; + } + + /* Peek at the opcode, uniquefier */ + if ( count < sizeof(hdr) ) { + CERROR("presto_psdev_write: Lento didn't write full hdr.\n"); + return -EINVAL; + } + + error = copy_from_user(&hdr, buf, sizeof(hdr)); + if ( error ) + return -EFAULT; + + CDEBUG(D_PSDEV, "(process,opc,uniq)=(%d,%d,%d)\n", + current->pid, hdr.opcode, hdr.unique); + + spin_lock(&channel->uc_lock); + /* Look for the message on the processing queue. */ + list_for_each(lh, &channel->uc_processing) { + tmp = list_entry(lh, struct upc_req , rq_chain); + if (tmp->rq_unique == hdr.unique) { + req = tmp; + /* unlink here: keeps search length minimal */ + list_del_init(&req->rq_chain); + CDEBUG(D_PSDEV,"Eureka opc %d uniq %d!\n", + hdr.opcode, hdr.unique); + break; + } + } + spin_unlock(&channel->uc_lock); + if (!req) { + CERROR("psdev_write: msg (%d, %d) not found\n", + hdr.opcode, hdr.unique); + return(-ESRCH); + } + + /* move data into response buffer. */ + if (req->rq_bufsize < count) { + CERROR("psdev_write: too much cnt: %d, cnt: %Zd, " + "opc: %d, uniq: %d.\n", + req->rq_bufsize, count, hdr.opcode, hdr.unique); + count = req->rq_bufsize; /* don't have more space! */ + } + error = copy_from_user(req->rq_data, buf, count); + if ( error ) + return -EFAULT; + + /* adjust outsize: good upcalls can be aware of this */ + req->rq_rep_size = count; + req->rq_flags |= REQ_WRITE; + + wake_up(&req->rq_sleep); + return(count); +} + +/* + * Read a message from the kernel to Lento + */ +static ssize_t presto_psdev_read(struct file * file, char * buf, + size_t count, loff_t *off) +{ + struct upc_channel *channel = (struct upc_channel *)file->private_data; + struct upc_req *req; + int result = count; + + if ( ! channel ) { + CERROR("%s: bad psdev file\n", __FUNCTION__); + return -EBADF; + } + + spin_lock(&channel->uc_lock); + if (list_empty(&(channel->uc_pending))) { + CDEBUG(D_UPCALL, "Empty pending list in read, not good\n"); + spin_unlock(&channel->uc_lock); + return -EINVAL; + } + req = list_entry((channel->uc_pending.next), struct upc_req, rq_chain); + list_del(&(req->rq_chain)); + if (! (req->rq_flags & REQ_ASYNC) ) { + list_add(&(req->rq_chain), channel->uc_processing.prev); + } + spin_unlock(&channel->uc_lock); + + req->rq_flags |= REQ_READ; + + /* Move the input args into userspace */ + CDEBUG(D_PSDEV, "\n"); + if (req->rq_bufsize <= count) { + result = req->rq_bufsize; + } + + if (count < req->rq_bufsize) { + CERROR ("psdev_read: buffer too small, read %Zd of %d bytes\n", + count, req->rq_bufsize); + } + + if ( copy_to_user(buf, req->rq_data, result) ) { + BUG(); + return -EFAULT; + } + + /* If request was asynchronous don't enqueue, but free */ + if (req->rq_flags & REQ_ASYNC) { + CDEBUG(D_PSDEV, "psdev_read: async msg (%d, %d), result %d\n", + req->rq_opcode, req->rq_unique, result); + PRESTO_FREE(req->rq_data, req->rq_bufsize); + PRESTO_FREE(req, sizeof(*req)); + return result; + } + + return result; +} + + +static int presto_psdev_open(struct inode * inode, struct file * file) +{ + ENTRY; + + file->private_data = NULL; + + CDEBUG(D_PSDEV, "Psdev_open: caller: %d, flags: %d\n", current->pid, file->f_flags); + + EXIT; + return 0; +} + + + +static int presto_psdev_release(struct inode * inode, struct file * file) +{ + struct upc_channel *channel = (struct upc_channel *)file->private_data; + struct upc_req *req; + struct list_head *lh; + ENTRY; + + if ( ! channel ) { + CERROR("%s: bad psdev file\n", __FUNCTION__); + return -EBADF; + } + + CDEBUG(D_PSDEV, "Lento: pid %d\n", current->pid); + channel->uc_pid = 0; + + /* Wake up clients so they can return. */ + CDEBUG(D_PSDEV, "Wake up clients sleeping for pending.\n"); + spin_lock(&channel->uc_lock); + list_for_each(lh, &channel->uc_pending) { + req = list_entry(lh, struct upc_req, rq_chain); + + /* Async requests stay around for a new lento */ + if (req->rq_flags & REQ_ASYNC) { + continue; + } + /* the sleeper will free the req and data */ + req->rq_flags |= REQ_DEAD; + wake_up(&req->rq_sleep); + } + + CDEBUG(D_PSDEV, "Wake up clients sleeping for processing\n"); + list_for_each(lh, &channel->uc_processing) { + req = list_entry(lh, struct upc_req, rq_chain); + /* freeing of req and data is done by the sleeper */ + req->rq_flags |= REQ_DEAD; + wake_up(&req->rq_sleep); + } + spin_unlock(&channel->uc_lock); + CDEBUG(D_PSDEV, "Done.\n"); + + EXIT; + return 0; +} + +static struct file_operations presto_psdev_fops = { + .owner = THIS_MODULE, + .read = presto_psdev_read, + .write = presto_psdev_write, + .poll = presto_psdev_poll, + .open = presto_psdev_open, + .release = presto_psdev_release +}; + +/* modules setup */ +static struct miscdevice intermezzo_psdev = { + INTERMEZZO_MINOR, + "intermezzo", + &presto_psdev_fops +}; + +int presto_psdev_init(void) +{ + int i; + int err; + + if ( (err = misc_register(&intermezzo_psdev)) ) { + CERROR("%s: cannot register %d err %d\n", + __FUNCTION__, INTERMEZZO_MINOR, err); + return -EIO; + } + + memset(&izo_channels, 0, sizeof(izo_channels)); + for ( i = 0 ; i < MAX_CHANNEL ; i++ ) { + struct upc_channel *channel = &(izo_channels[i]); + INIT_LIST_HEAD(&channel->uc_pending); + INIT_LIST_HEAD(&channel->uc_processing); + INIT_LIST_HEAD(&channel->uc_cache_list); + init_waitqueue_head(&channel->uc_waitq); + channel->uc_lock = SPIN_LOCK_UNLOCKED; + channel->uc_hard = 0; + channel->uc_no_filter = 0; + channel->uc_no_journal = 0; + channel->uc_no_upcall = 0; + channel->uc_timeout = 30; + channel->uc_errorval = 0; + channel->uc_minor = i; + } + return 0; +} + +void presto_psdev_cleanup(void) +{ + int i; + + misc_deregister(&intermezzo_psdev); + + for ( i = 0 ; i < MAX_CHANNEL ; i++ ) { + struct upc_channel *channel = &(izo_channels[i]); + struct list_head *lh, *next; + + spin_lock(&channel->uc_lock); + if ( ! list_empty(&channel->uc_pending)) { + CERROR("Weird, tell Peter: module cleanup and pending list not empty dev %d\n", i); + } + if ( ! list_empty(&channel->uc_processing)) { + CERROR("Weird, tell Peter: module cleanup and processing list not empty dev %d\n", i); + } + if ( ! list_empty(&channel->uc_cache_list)) { + CERROR("Weird, tell Peter: module cleanup and cache listnot empty dev %d\n", i); + } + list_for_each_safe(lh, next, &channel->uc_pending) { + struct upc_req *req; + + req = list_entry(lh, struct upc_req, rq_chain); + if ( req->rq_flags & REQ_ASYNC ) { + list_del(&(req->rq_chain)); + CDEBUG(D_UPCALL, "free pending upcall type %d\n", + req->rq_opcode); + PRESTO_FREE(req->rq_data, req->rq_bufsize); + PRESTO_FREE(req, sizeof(struct upc_req)); + } else { + req->rq_flags |= REQ_DEAD; + wake_up(&req->rq_sleep); + } + } + list_for_each(lh, &channel->uc_processing) { + struct upc_req *req; + req = list_entry(lh, struct upc_req, rq_chain); + list_del(&(req->rq_chain)); + req->rq_flags |= REQ_DEAD; + wake_up(&req->rq_sleep); + } + spin_unlock(&channel->uc_lock); + } +} + +/* + * lento_upcall and lento_downcall routines + */ +static inline unsigned long lento_waitfor_upcall + (struct upc_channel *channel, struct upc_req *req, int minor) +{ + DECLARE_WAITQUEUE(wait, current); + unsigned long posttime; + + req->rq_posttime = posttime = jiffies; + + add_wait_queue(&req->rq_sleep, &wait); + for (;;) { + if ( izo_channels[minor].uc_hard == 0 ) + set_current_state(TASK_INTERRUPTIBLE); + else + set_current_state(TASK_UNINTERRUPTIBLE); + + /* got a reply */ + if ( req->rq_flags & (REQ_WRITE | REQ_DEAD) ) + break; + + /* these cases only apply when TASK_INTERRUPTIBLE */ + if ( !izo_channels[minor].uc_hard && signal_pending(current) ) { + /* if this process really wants to die, let it go */ + if (sigismember(&(current->pending.signal), SIGKILL)|| + sigismember(&(current->pending.signal), SIGINT) ) + break; + /* signal is present: after timeout always return + really smart idea, probably useless ... */ + if ( time_after(jiffies, req->rq_posttime + + izo_channels[minor].uc_timeout * HZ) ) + break; + } + schedule(); + } + + spin_lock(&channel->uc_lock); + list_del_init(&req->rq_chain); + spin_unlock(&channel->uc_lock); + remove_wait_queue(&req->rq_sleep, &wait); + set_current_state(TASK_RUNNING); + + CDEBUG(D_SPECIAL, "posttime: %ld, returned: %ld\n", + posttime, jiffies-posttime); + return (jiffies - posttime); +} + +/* + * lento_upcall will return an error in the case of + * failed communication with Lento _or_ will peek at Lento + * reply and return Lento's error. + * + * As lento has 2 types of errors, normal errors (positive) and internal + * errors (negative), normal errors are negated, while internal errors + * are all mapped to -EINTR, while showing a nice warning message. (jh) + * + * lento_upcall will always free buffer, either directly, when an upcall + * is read (in presto_psdev_read), when the filesystem is unmounted, or + * when the module is unloaded. + */ +int izo_upc_upcall(int minor, int *size, struct izo_upcall_hdr *buffer, + int async) +{ + unsigned long runtime; + struct upc_channel *channel; + struct izo_upcall_resp *out; + struct upc_req *req; + int error = 0; + + ENTRY; + channel = &(izo_channels[minor]); + + if (channel->uc_no_upcall) { + EXIT; + goto exit_buf; + } + if (!channel->uc_pid && !async) { + EXIT; + error = -ENXIO; + goto exit_buf; + } + + /* Format the request message. */ + PRESTO_ALLOC(req, sizeof(struct upc_req)); + if ( !req ) { + EXIT; + error = -ENOMEM; + goto exit_buf; + } + req->rq_data = (void *)buffer; + req->rq_flags = 0; + req->rq_bufsize = *size; + req->rq_rep_size = 0; + req->rq_opcode = buffer->u_opc; + req->rq_unique = ++channel->uc_seq; + init_waitqueue_head(&req->rq_sleep); + + /* Fill in the common input args. */ + buffer->u_uniq = req->rq_unique; + buffer->u_async = async; + + /* Remove potential datarace possibility*/ + if ( async ) + req->rq_flags = REQ_ASYNC; + + spin_lock(&channel->uc_lock); + /* Append msg to pending queue and poke Lento. */ + list_add(&req->rq_chain, channel->uc_pending.prev); + spin_unlock(&channel->uc_lock); + CDEBUG(D_UPCALL, + "Proc %d waking Lento %d for(opc,uniq) =(%d,%d) msg at %p.\n", + current->pid, channel->uc_pid, req->rq_opcode, + req->rq_unique, req); + wake_up_interruptible(&channel->uc_waitq); + + if ( async ) { + /* req, rq_data are freed in presto_psdev_read for async */ + /* req->rq_flags = REQ_ASYNC;*/ + EXIT; + return 0; + } + + /* We can be interrupted while we wait for Lento to process + * our request. If the interrupt occurs before Lento has read + * the request, we dequeue and return. If it occurs after the + * read but before the reply, we dequeue, send a signal + * message, and return. If it occurs after the reply we ignore + * it. In no case do we want to restart the syscall. If it + * was interrupted by a lento shutdown (psdev_close), return + * ENODEV. */ + + /* Go to sleep. Wake up on signals only after the timeout. */ + runtime = lento_waitfor_upcall(channel, req, minor); + + CDEBUG(D_TIMING, "opc: %d time: %ld uniq: %d size: %d\n", + req->rq_opcode, jiffies - req->rq_posttime, + req->rq_unique, req->rq_rep_size); + CDEBUG(D_UPCALL, + "..process %d woken up by Lento for req at 0x%p, data at %p\n", + current->pid, req, req->rq_data); + + if (channel->uc_pid) { /* i.e. Lento is still alive */ + /* Op went through, interrupt or not we go on */ + if (req->rq_flags & REQ_WRITE) { + out = (struct izo_upcall_resp *)req->rq_data; + /* here we map positive Lento errors to kernel errors */ + if ( out->result < 0 ) { + CERROR("Tell Peter: Lento returns negative error %d, for oc %d!\n", + out->result, out->opcode); + out->result = EINVAL; + } + error = -out->result; + CDEBUG(D_UPCALL, "upcall: (u,o,r) (%d, %d, %d) out at %p\n", + out->unique, out->opcode, out->result, out); + *size = req->rq_rep_size; + EXIT; + goto exit_req; + } + /* Interrupted before lento read it. */ + if ( !(req->rq_flags & REQ_READ) && signal_pending(current)) { + CDEBUG(D_UPCALL, + "Interrupt before read: (op,un)=(%d,%d), flags %x\n", + req->rq_opcode, req->rq_unique, req->rq_flags); + /* perhaps the best way to convince the app to give up? */ + error = -EINTR; + EXIT; + goto exit_req; + } + + /* interrupted after Lento did its read, send signal */ + if ( (req->rq_flags & REQ_READ) && signal_pending(current) ) { + CDEBUG(D_UPCALL,"Interrupt after read: op = %d.%d, flags = %x\n", + req->rq_opcode, req->rq_unique, req->rq_flags); + + error = -EINTR; + } else { + CERROR("Lento: Strange interruption - tell Peter.\n"); + error = -EINTR; + } + } else { /* If lento died i.e. !UC_OPEN(channel) */ + CERROR("lento_upcall: Lento dead on (op,un) (%d.%d) flags %d\n", + req->rq_opcode, req->rq_unique, req->rq_flags); + error = -ENODEV; + } + +exit_req: + PRESTO_FREE(req, sizeof(struct upc_req)); +exit_buf: + PRESTO_FREE(buffer,*size); + return error; +} diff --git a/fs/intermezzo/replicator.c b/fs/intermezzo/replicator.c new file mode 100644 index 000000000..e7a0c5c17 --- /dev/null +++ b/fs/intermezzo/replicator.c @@ -0,0 +1,290 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * Copyright (C) 2001 Tacit Networks, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Manage RCVD records for clients in the kernel + * + */ + +#include +#include + +#include +#include +#include + +#include "intermezzo_fs.h" + +/* + * this file contains a hash table of replicators/clients for a + * fileset. It allows fast lookup and update of reintegration status + */ + +struct izo_offset_rec { + struct list_head or_list; + char or_uuid[16]; + loff_t or_offset; +}; + +#define RCACHE_BITS 8 +#define RCACHE_SIZE (1 << RCACHE_BITS) +#define RCACHE_MASK (RCACHE_SIZE - 1) + +static struct list_head * +izo_rep_cache(void) +{ + int i; + struct list_head *cache; + PRESTO_ALLOC(cache, sizeof(struct list_head) * RCACHE_SIZE); + if (cache == NULL) { + CERROR("intermezzo-fatal: no memory for replicator cache\n"); + return NULL; + } + memset(cache, 0, sizeof(struct list_head) * RCACHE_SIZE); + for (i = 0; i < RCACHE_SIZE; i++) + INIT_LIST_HEAD(&cache[i]); + + return cache; +} + +static struct list_head * +izo_rep_hash(struct list_head *cache, char *uuid) +{ + return &cache[(RCACHE_MASK & uuid[1])]; +} + +static void +izo_rep_cache_clean(struct presto_file_set *fset) +{ + int i; + struct list_head *bucket; + struct list_head *tmp; + + if (fset->fset_clients == NULL) + return; + for (i = 0; i < RCACHE_SIZE; i++) { + tmp = bucket = &fset->fset_clients[i]; + + tmp = tmp->next; + while (tmp != bucket) { + struct izo_offset_rec *offrec; + tmp = tmp->next; + list_del(tmp); + offrec = list_entry(tmp, struct izo_offset_rec, + or_list); + PRESTO_FREE(offrec, sizeof(struct izo_offset_rec)); + } + } +} + +struct izo_offset_rec * +izo_rep_cache_find(struct presto_file_set *fset, char *uuid) +{ + struct list_head *tmp, *buck = izo_rep_hash(fset->fset_clients, uuid); + struct izo_offset_rec *rec = NULL; + + list_for_each(tmp, buck) { + rec = list_entry(tmp, struct izo_offset_rec, or_list); + if ( memcmp(rec->or_uuid, uuid, sizeof(rec->or_uuid)) == 0 ) + return rec; + } + + return NULL; +} + +static int +izo_rep_cache_add(struct presto_file_set *fset, struct izo_rcvd_rec *rec, + loff_t offset) +{ + struct izo_offset_rec *offrec; + + if (izo_rep_cache_find(fset, rec->lr_uuid)) { + CERROR("izo: duplicate client entry %s off %Ld\n", + fset->fset_name, offset); + return -EINVAL; + } + + PRESTO_ALLOC(offrec, sizeof(*offrec)); + if (offrec == NULL) { + CERROR("izo: cannot allocate offrec\n"); + return -ENOMEM; + } + + memcpy(offrec->or_uuid, rec->lr_uuid, sizeof(rec->lr_uuid)); + offrec->or_offset = offset; + + list_add(&offrec->or_list, + izo_rep_hash(fset->fset_clients, rec->lr_uuid)); + return 0; +} + +int +izo_rep_cache_init(struct presto_file_set *fset) +{ + struct izo_rcvd_rec rec; + loff_t offset = 0, last_offset = 0; + + fset->fset_clients = izo_rep_cache(); + if (fset->fset_clients == NULL) { + CERROR("Error initializing client cache\n"); + return -ENOMEM; + } + + while ( presto_fread(fset->fset_rcvd.fd_file, (char *)&rec, + sizeof(rec), &offset) == sizeof(rec) ) { + int rc; + + if ((rc = izo_rep_cache_add(fset, &rec, last_offset)) < 0) { + izo_rep_cache_clean(fset); + return rc; + } + + last_offset = offset; + } + + return 0; +} + +/* + * Return local last_rcvd record for the client. Update or create + * if necessary. + * + * XXX: After this call, any -EINVAL from izo_rcvd_get is a real error. + */ +int +izo_repstatus(struct presto_file_set *fset, __u64 client_kmlsize, + struct izo_rcvd_rec *lr_client, struct izo_rcvd_rec *lr_server) +{ + int rc; + rc = izo_rcvd_get(lr_server, fset, lr_client->lr_uuid); + if (rc < 0 && rc != -EINVAL) { + return rc; + } + + /* client is new or has been reset. */ + if (rc < 0 || (client_kmlsize == 0 && lr_client->lr_remote_offset == 0)) { + memset(lr_server, 0, sizeof(*lr_server)); + memcpy(lr_server->lr_uuid, lr_client->lr_uuid, sizeof(lr_server->lr_uuid)); + rc = izo_rcvd_write(fset, lr_server); + if (rc < 0) + return rc; + } + + /* update intersync */ + rc = izo_upc_repstatus(presto_f2m(fset), fset->fset_name, lr_server); + return rc; +} + +loff_t +izo_rcvd_get(struct izo_rcvd_rec *rec, struct presto_file_set *fset, char *uuid) +{ + struct izo_offset_rec *offrec; + struct izo_rcvd_rec tmprec; + loff_t offset; + + offrec = izo_rep_cache_find(fset, uuid); + if (offrec == NULL) { + CDEBUG(D_SPECIAL, "izo_get_rcvd: uuid not in hash.\n"); + return -EINVAL; + } + offset = offrec->or_offset; + + if (rec == NULL) + return offset; + + if (presto_fread(fset->fset_rcvd.fd_file, (char *)&tmprec, + sizeof(tmprec), &offset) != sizeof(tmprec)) { + CERROR("izo_get_rcvd: Unable to read from last_rcvd file offset " + "%Lu\n", offset); + return -EIO; + } + + memcpy(rec->lr_uuid, tmprec.lr_uuid, sizeof(tmprec.lr_uuid)); + rec->lr_remote_recno = le64_to_cpu(tmprec.lr_remote_recno); + rec->lr_remote_offset = le64_to_cpu(tmprec.lr_remote_offset); + rec->lr_local_recno = le64_to_cpu(tmprec.lr_local_recno); + rec->lr_local_offset = le64_to_cpu(tmprec.lr_local_offset); + rec->lr_last_ctime = le64_to_cpu(tmprec.lr_last_ctime); + + return offrec->or_offset; +} + +/* Try to lookup the UUID in the hash. Insert it if it isn't found. Write the + * data to the file. + * + * Returns the offset of the beginning of the record in the last_rcvd file. */ +loff_t +izo_rcvd_write(struct presto_file_set *fset, struct izo_rcvd_rec *rec) +{ + struct izo_offset_rec *offrec; + loff_t offset, rc; + + ENTRY; + + offrec = izo_rep_cache_find(fset, rec->lr_uuid); + if (offrec == NULL) { + /* I don't think it should be possible for an entry to be not in + * the hash table without also having an invalid offset, but we + * handle it gracefully regardless. */ + write_lock(&fset->fset_rcvd.fd_lock); + offset = fset->fset_rcvd.fd_offset; + fset->fset_rcvd.fd_offset += sizeof(*rec); + write_unlock(&fset->fset_rcvd.fd_lock); + + rc = izo_rep_cache_add(fset, rec, offset); + if (rc < 0) { + EXIT; + return rc; + } + } else + offset = offrec->or_offset; + + + rc = presto_fwrite(fset->fset_rcvd.fd_file, (char *)rec, sizeof(*rec), + &offset); + if (rc == sizeof(*rec)) + /* presto_fwrite() advances 'offset' */ + rc = offset - sizeof(*rec); + + EXIT; + return rc; +} + +loff_t +izo_rcvd_upd_remote(struct presto_file_set *fset, char * uuid, __u64 remote_recno, + __u64 remote_offset) +{ + struct izo_rcvd_rec rec; + + loff_t rc; + + ENTRY; + rc = izo_rcvd_get(&rec, fset, uuid); + if (rc < 0) + return rc; + rec.lr_remote_recno = remote_recno; + rec.lr_remote_offset = remote_offset; + + rc = izo_rcvd_write(fset, &rec); + EXIT; + if (rc < 0) + return rc; + return 0; +} diff --git a/fs/intermezzo/super.c b/fs/intermezzo/super.c new file mode 100644 index 000000000..9993ef2bf --- /dev/null +++ b/fs/intermezzo/super.c @@ -0,0 +1,407 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 1998 Peter J. Braam + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * presto's super.c + */ + +static char rcsid[] __attribute ((unused)) = "$Id: super.c,v 1.4 2002/10/12 02:16:19 rread Exp $"; +#define INTERMEZZO_VERSION "$Revision: 1.4 $" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +#ifdef PRESTO_DEBUG +long presto_vmemory = 0; +long presto_kmemory = 0; +#endif + +/* returns an allocated string, copied out from data if opt is found */ +static char *opt_read(const char *opt, char *data) +{ + char *value; + char *retval; + + CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data); + if ( strncmp(opt, data, strlen(opt)) ) + return NULL; + + if ( (value = strchr(data, '=')) == NULL ) + return NULL; + + value++; + PRESTO_ALLOC(retval, strlen(value) + 1); + if ( !retval ) { + CERROR("InterMezzo: Out of memory!\n"); + return NULL; + } + + strcpy(retval, value); + CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval); + return retval; +} + +static void opt_store(char **dst, char *opt) +{ + if (!dst) + CERROR("intermezzo: store_opt, error dst == NULL\n"); + + if (*dst) + PRESTO_FREE(*dst, strlen(*dst) + 1); + *dst = opt; +} + +static void opt_set_default(char **dst, char *defval) +{ + if (!dst) + CERROR("intermezzo: store_opt, error dst == NULL\n"); + + if (*dst) + PRESTO_FREE(*dst, strlen(*dst) + 1); + if (defval) { + char *def_alloced; + PRESTO_ALLOC(def_alloced, strlen(defval)+1); + if (!def_alloced) { + CERROR("InterMezzo: Out of memory!\n"); + return ; + } + strcpy(def_alloced, defval); + *dst = def_alloced; + } +} + + +/* Find the options for InterMezzo in "options", saving them into the + * passed pointers. If the pointer is null, the option is discarded. + * Copy out all non-InterMezzo options into cache_data (to be passed + * to the read_super operation of the cache). The return value will + * be a pointer to the end of the cache_data. + */ +static char *presto_options(struct file_system_type *fstype, + char *options, char *cache_data, + char **cache_type, char **fileset, + char **channel) +{ + char *this_char; + char *opt_ptr = options; + char *cache_data_end = cache_data; + + /* set the defaults */ + if (strcmp(fstype->name, "intermezzo") == 0) + opt_set_default(cache_type, "ext3"); + else + opt_set_default(cache_type, "tmpfs"); + + if (!options || !cache_data) + return cache_data_end; + + + CDEBUG(D_SUPER, "parsing options\n"); + while ((this_char = strsep (&opt_ptr, ",")) != NULL) { + char *opt; + if (!*this_char) + continue; + CDEBUG(D_SUPER, "this_char %s\n", this_char); + + if ( (opt = opt_read("fileset", this_char)) ) { + opt_store(fileset, opt); + continue; + } + if ( (opt = opt_read("cache_type", this_char)) ) { + opt_store(cache_type, opt); + continue; + } + if ( (opt = opt_read("channel", this_char)) ) { + opt_store(channel, opt); + continue; + } + + cache_data_end += + sprintf(cache_data_end, "%s%s", + cache_data_end != cache_data ? ",":"", + this_char); + } + + return cache_data_end; +} + +static int presto_set_channel(struct presto_cache *cache, char *channel) +{ + int minor; + + ENTRY; + if (!channel) { + minor = izo_psdev_get_free_channel(); + } else { + minor = simple_strtoul(channel, NULL, 0); + } + if (minor < 0 || minor >= MAX_CHANNEL) { + CERROR("all channels in use or channel too large %d\n", + minor); + return -EINVAL; + } + + cache->cache_psdev = &(izo_channels[minor]); + list_add(&cache->cache_channel_list, + &cache->cache_psdev->uc_cache_list); + + EXIT; + return minor; +} + +/* We always need to remove the presto options before passing + mount options to cache FS */ +struct super_block * +presto_get_sb(struct file_system_type *izo_type, int flags, + const char *devname, void *data) +{ + struct file_system_type *fstype; + struct presto_cache *cache = NULL; + char *cache_data = NULL; + char *cache_data_end; + char *cache_type = NULL; + char *fileset = NULL; + char *channel = NULL; + struct super_block *sb; + int err; + unsigned int minor; + + ENTRY; + + /* reserve space for the cache's data */ + PRESTO_ALLOC(cache_data, PAGE_SIZE); + if ( !cache_data ) { + CERROR("presto_read_super: Cannot allocate data page.\n"); + EXIT; + goto out_err; + } + + /* read and validate options */ + cache_data_end = presto_options(izo_type, data, cache_data, &cache_type, + &fileset, &channel); + + /* was there anything for the cache filesystem in the data? */ + if (cache_data_end == cache_data) { + PRESTO_FREE(cache_data, PAGE_SIZE); + cache_data_end = cache_data = NULL; + } else { + CDEBUG(D_SUPER, "cache_data at %p is: %s\n", cache_data, + cache_data); + } + + /* set up the cache */ + cache = presto_cache_init(); + if ( !cache ) { + CERROR("presto_read_super: failure allocating cache.\n"); + EXIT; + goto out_err; + } + cache->cache_type = cache_type; + + /* link cache to channel */ + minor = presto_set_channel(cache, channel); + if (minor < 0) { + EXIT; + goto out_err; + } + + CDEBUG(D_SUPER, "Presto: type=%s, fset=%s, dev= %d, flags %x\n", + cache_type, fileset?fileset:"NULL", minor, cache->cache_flags); + + /* get the filter for the cache */ + fstype = get_fs_type(cache_type); + cache->cache_filter = filter_get_filter_fs((const char *)cache_type); + if ( !fstype || !cache->cache_filter) { + CERROR("Presto: unrecognized fs type or cache type\n"); + EXIT; + goto out_err; + } + + sb = fstype->get_sb(fstype, flags, devname, cache_data); + + if ( !sb || IS_ERR(sb)) { + CERROR("InterMezzo: cache mount failure.\n"); + EXIT; + goto out_err; + } + + /* can we in fact mount the cache */ + if (sb->s_bdev && (strcmp(fstype->name, "vintermezzo") == 0)) { + CERROR("vintermezzo must not be used with a block device\n"); + EXIT; + goto out_err; + } + + /* this might have been freed above */ + if (cache_data) { + PRESTO_FREE(cache_data, PAGE_SIZE); + cache_data = NULL; + } + + cache->cache_sb = sb; + cache->cache_root = dget(sb->s_root); + + /* we now know the dev of the cache: hash the cache */ + presto_cache_add(cache); + err = izo_prepare_fileset(sb->s_root, fileset); + + filter_setup_journal_ops(cache->cache_filter, cache->cache_type); + + /* make sure we have our own super operations: sb + still contains the cache operations */ + filter_setup_super_ops(cache->cache_filter, sb->s_op, + &presto_super_ops); + sb->s_op = filter_c2usops(cache->cache_filter); + + /* get izo directory operations: sb->s_root->d_inode exists now */ + filter_setup_dir_ops(cache->cache_filter, sb->s_root->d_inode, + &presto_dir_iops, &presto_dir_fops); + filter_setup_dentry_ops(cache->cache_filter, sb->s_root->d_op, + &presto_dentry_ops); + sb->s_root->d_inode->i_op = filter_c2udiops(cache->cache_filter); + sb->s_root->d_inode->i_fop = filter_c2udfops(cache->cache_filter); + sb->s_root->d_op = filter_c2udops(cache->cache_filter); + + EXIT; + return sb; + + out_err: + CDEBUG(D_SUPER, "out_err called\n"); + if (cache) + PRESTO_FREE(cache, sizeof(struct presto_cache)); + if (cache_data) + PRESTO_FREE(cache_data, PAGE_SIZE); + if (fileset) + PRESTO_FREE(fileset, strlen(fileset) + 1); + if (channel) + PRESTO_FREE(channel, strlen(channel) + 1); + if (cache_type) + PRESTO_FREE(cache_type, strlen(cache_type) + 1); + + CDEBUG(D_MALLOC, "mount error exit: kmem %ld, vmem %ld\n", + presto_kmemory, presto_vmemory); + return ERR_PTR(-EINVAL); +} + + + + +#ifdef PRESTO_DEVEL +static DECLARE_FSTYPE(presto_fs_type, "izo", presto_read_super, FS_REQUIRES_DEV); +static DECLARE_FSTYPE(vpresto_fs_type, "vintermezzo", presto_read_super, FS_LITTER); +#else +static struct file_system_type vpresto_fs_type = { + .owner = THIS_MODULE, + .name = "vintermezzo", + .get_sb = presto_get_sb, + .kill_sb = kill_litter_super, +}; +static struct file_system_type presto_fs_type = { + .owner = THIS_MODULE, + .name = "intermezzo", + .get_sb = presto_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; +#endif + + + +int __init init_intermezzo_fs(void) +{ + int status; + + printk(KERN_INFO "InterMezzo Kernel/Intersync communications " INTERMEZZO_VERSION + " info@clusterfs.com\n"); + + status = presto_psdev_init(); + if ( status ) { + CERROR("Problem (%d) in init_intermezzo_psdev\n", status); + return status; + } + + status = init_intermezzo_sysctl(); + if (status) { + CERROR("presto: failed in init_intermezzo_sysctl!\n"); + } + + presto_cache_init_hash(); + + if (!presto_init_ddata_cache()) { + CERROR("presto out of memory!\n"); + return -ENOMEM; + } + + status = register_filesystem(&presto_fs_type); + if (status) { + CERROR("presto: failed in register_filesystem!\n"); + } + status = register_filesystem(&vpresto_fs_type); + if (status) { + CERROR("vpresto: failed in register_filesystem!\n"); + } + return status; +} + +void __exit exit_intermezzo_fs(void) +{ + int err; + + ENTRY; + + if ( (err = unregister_filesystem(&presto_fs_type)) != 0 ) { + CERROR("presto: failed to unregister filesystem\n"); + } + if ( (err = unregister_filesystem(&vpresto_fs_type)) != 0 ) { + CERROR("vpresto: failed to unregister filesystem\n"); + } + + presto_psdev_cleanup(); + cleanup_intermezzo_sysctl(); + presto_cleanup_ddata_cache(); + CERROR("after cleanup: kmem %ld, vmem %ld\n", + presto_kmemory, presto_vmemory); +} + + +MODULE_AUTHOR("Cluster Filesystems Inc. "); +MODULE_DESCRIPTION("InterMezzo Kernel/Intersync communications " INTERMEZZO_VERSION); +MODULE_LICENSE("GPL"); + +module_init(init_intermezzo_fs) +module_exit(exit_intermezzo_fs) diff --git a/fs/intermezzo/sysctl.c b/fs/intermezzo/sysctl.c new file mode 100644 index 000000000..9436adf7a --- /dev/null +++ b/fs/intermezzo/sysctl.c @@ -0,0 +1,368 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 1999 Peter J. Braam + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Sysctrl entries for Intermezzo! + */ + +#include /* for CONFIG_PROC_FS */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +/* /proc entries */ + +#ifdef CONFIG_PROC_FS +struct proc_dir_entry *proc_fs_intermezzo; +int intermezzo_mount_get_info( char * buffer, char ** start, off_t offset, + int length) +{ + int len=0; + + /* this works as long as we are below 1024 characters! */ + *start = buffer + offset; + len -= offset; + + if ( len < 0 ) + return -EINVAL; + + return len; +} + +#endif + + +/* SYSCTL below */ + +static struct ctl_table_header *intermezzo_table_header = NULL; +/* 0x100 to avoid any chance of collisions at any point in the tree with + * non-directories + */ +#define PSDEV_INTERMEZZO (0x100) + +#define PSDEV_DEBUG 1 /* control debugging */ +#define PSDEV_TRACE 2 /* control enter/leave pattern */ +#define PSDEV_TIMEOUT 3 /* timeout on upcalls to become intrble */ +#define PSDEV_HARD 4 /* mount type "hard" or "soft" */ +#define PSDEV_NO_FILTER 5 /* controls presto_chk */ +#define PSDEV_NO_JOURNAL 6 /* controls presto_chk */ +#define PSDEV_NO_UPCALL 7 /* controls lento_upcall */ +#define PSDEV_ERRORVAL 8 /* controls presto_debug_fail_blkdev */ +#define PSDEV_EXCL_GID 9 /* which GID is ignored by presto */ +#define PSDEV_BYTES_TO_CLOSE 11 /* bytes to write before close */ + +/* These are global presto control options */ +#define PRESTO_PRIMARY_CTLCNT 2 +static struct ctl_table presto_table[ PRESTO_PRIMARY_CTLCNT + MAX_CHANNEL + 1] = +{ + {PSDEV_DEBUG, "debug", &presto_debug, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_TRACE, "trace", &presto_print_entry, sizeof(int), 0644, NULL, &proc_dointvec}, +}; + +/* + * Intalling the sysctl entries: strategy + * - have templates for each /proc/sys/intermezzo/ entry + * such an entry exists for each /dev/presto + * (proto_channel_entry) + * - have a template for the contents of such directories + * (proto_psdev_table) + * - have the master table (presto_table) + * + * When installing, malloc, memcpy and fix up the pointers to point to + * the appropriate constants in izo_channels[your_minor] + */ + +static ctl_table proto_psdev_table[] = { + {PSDEV_HARD, "hard", 0, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_NO_FILTER, "no_filter", 0, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_NO_JOURNAL, "no_journal", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_NO_UPCALL, "no_upcall", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_TIMEOUT, "timeout", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, +#ifdef PRESTO_DEBUG + {PSDEV_ERRORVAL, "errorval", NULL, sizeof(int), 0644, NULL, &proc_dointvec}, +#endif + { 0 } +}; + +static ctl_table proto_channel_entry = { + PSDEV_INTERMEZZO, 0, NULL, 0, 0555, 0, +}; + +static ctl_table intermezzo_table[2] = { + {PSDEV_INTERMEZZO, "intermezzo", NULL, 0, 0555, presto_table}, + {0} +}; + +/* support for external setting and getting of opts. */ +/* particularly via ioctl. The Right way to do this is via sysctl, + * but that will have to wait until intermezzo gets its own nice set of + * sysctl IDs + */ +/* we made these separate as setting may in future be more restricted + * than getting + */ +#ifdef RON_MINNICH +int dosetopt(int minor, struct psdev_opt *opt) +{ + int retval = 0; + int newval = opt->optval; + + ENTRY; + + switch(opt->optname) { + + case PSDEV_TIMEOUT: + izo_channels[minor].uc_timeout = newval; + break; + + case PSDEV_HARD: + izo_channels[minor].uc_hard = newval; + break; + + case PSDEV_NO_FILTER: + izo_channels[minor].uc_no_filter = newval; + break; + + case PSDEV_NO_JOURNAL: + izo_channels[minor].uc_no_journal = newval; + break; + + case PSDEV_NO_UPCALL: + izo_channels[minor].uc_no_upcall = newval; + break; + +#ifdef PRESTO_DEBUG + case PSDEV_ERRORVAL: { + /* If we have a positive arg, set a breakpoint for that + * value. If we have a negative arg, make that device + * read-only. FIXME It would be much better to only + * allow setting the underlying device read-only for the + * current presto cache. + */ + int errorval = izo_channels[minor].uc_errorval; + if (errorval < 0) { + if (newval == 0) + set_device_ro(-errorval, 0); + else + CERROR("device %s already read only\n", + kdevname(-errorval)); + } else { + if (newval < 0) + set_device_ro(-newval, 1); + izo_channels[minor].uc_errorval = newval; + CDEBUG(D_PSDEV, "setting errorval to %d\n", newval); + } + + break; + } +#endif + + case PSDEV_TRACE: + case PSDEV_DEBUG: + case PSDEV_BYTES_TO_CLOSE: + default: + CDEBUG(D_PSDEV, + "ioctl: dosetopt: minor %d, bad optname 0x%x, \n", + minor, opt->optname); + + retval = -EINVAL; + } + + EXIT; + return retval; +} + +int dogetopt(int minor, struct psdev_opt *opt) +{ + int retval = 0; + + ENTRY; + + switch(opt->optname) { + + case PSDEV_TIMEOUT: + opt->optval = izo_channels[minor].uc_timeout; + break; + + case PSDEV_HARD: + opt->optval = izo_channels[minor].uc_hard; + break; + + case PSDEV_NO_FILTER: + opt->optval = izo_channels[minor].uc_no_filter; + break; + + case PSDEV_NO_JOURNAL: + opt->optval = izo_channels[minor].uc_no_journal; + break; + + case PSDEV_NO_UPCALL: + opt->optval = izo_channels[minor].uc_no_upcall; + break; + +#ifdef PSDEV_DEBUG + case PSDEV_ERRORVAL: { + int errorval = izo_channels[minor].uc_errorval; + if (errorval < 0 && is_read_only(-errorval)) + CERROR("device %s has been set read-only\n", + kdevname(-errorval)); + opt->optval = izo_channels[minor].uc_errorval; + break; + } +#endif + + case PSDEV_TRACE: + case PSDEV_DEBUG: + case PSDEV_BYTES_TO_CLOSE: + default: + CDEBUG(D_PSDEV, + "ioctl: dogetopt: minor %d, bad optval 0x%x, \n", + minor, opt->optname); + + retval = -EINVAL; + } + + EXIT; + return retval; +} +#endif + + +/* allocate the tables for the presto devices. We need + * sizeof(proto_channel_table)/sizeof(proto_channel_table[0]) + * entries for each dev + */ +int /* __init */ init_intermezzo_sysctl(void) +{ + int i; + int total_dev = MAX_CHANNEL; + int entries_per_dev = sizeof(proto_psdev_table) / + sizeof(proto_psdev_table[0]); + int total_entries = entries_per_dev * total_dev; + ctl_table *dev_ctl_table; + + PRESTO_ALLOC(dev_ctl_table, sizeof(ctl_table) * total_entries); + + if (! dev_ctl_table) { + CERROR("WARNING: presto couldn't allocate dev_ctl_table\n"); + EXIT; + return -ENOMEM; + } + + /* now fill in the entries ... we put the individual presto + * entries at the end of the table, and the per-presto stuff + * starting at the front. We assume that the compiler makes + * this code more efficient, but really, who cares ... it + * happens once per reboot. + */ + for(i = 0; i < total_dev; i++) { + void *p; + + /* entry for this /proc/sys/intermezzo/intermezzo"i" */ + ctl_table *psdev = &presto_table[i + PRESTO_PRIMARY_CTLCNT]; + /* entries for the individual "files" in this "directory" */ + ctl_table *psdev_entries = &dev_ctl_table[i * entries_per_dev]; + /* init the psdev and psdev_entries with the prototypes */ + *psdev = proto_channel_entry; + memcpy(psdev_entries, proto_psdev_table, + sizeof(proto_psdev_table)); + /* now specialize them ... */ + /* the psdev has to point to psdev_entries, and fix the number */ + psdev->ctl_name = psdev->ctl_name + i + 1; /* sorry */ + + PRESTO_ALLOC(p, PROCNAME_SIZE); + psdev->procname = p; + if (!psdev->procname) { + PRESTO_FREE(dev_ctl_table, + sizeof(ctl_table) * total_entries); + return -ENOMEM; + } + sprintf((char *) psdev->procname, "intermezzo%d", i); + /* hook presto into */ + psdev->child = psdev_entries; + + /* now for each psdev entry ... */ + psdev_entries[0].data = &(izo_channels[i].uc_hard); + psdev_entries[1].data = &(izo_channels[i].uc_no_filter); + psdev_entries[2].data = &(izo_channels[i].uc_no_journal); + psdev_entries[3].data = &(izo_channels[i].uc_no_upcall); + psdev_entries[4].data = &(izo_channels[i].uc_timeout); +#ifdef PRESTO_DEBUG + psdev_entries[5].data = &(izo_channels[i].uc_errorval); +#endif + } + + +#ifdef CONFIG_SYSCTL + if ( !intermezzo_table_header ) + intermezzo_table_header = + register_sysctl_table(intermezzo_table, 0); +#endif +#ifdef CONFIG_PROC_FS + proc_fs_intermezzo = proc_mkdir("intermezzo", proc_root_fs); + proc_fs_intermezzo->owner = THIS_MODULE; + create_proc_info_entry("mounts", 0, proc_fs_intermezzo, + intermezzo_mount_get_info); +#endif + return 0; +} + +void cleanup_intermezzo_sysctl(void) +{ + int total_dev = MAX_CHANNEL; + int entries_per_dev = sizeof(proto_psdev_table) / + sizeof(proto_psdev_table[0]); + int total_entries = entries_per_dev * total_dev; + int i; + +#ifdef CONFIG_SYSCTL + if ( intermezzo_table_header ) + unregister_sysctl_table(intermezzo_table_header); + intermezzo_table_header = NULL; +#endif + for(i = 0; i < total_dev; i++) { + /* entry for this /proc/sys/intermezzo/intermezzo"i" */ + ctl_table *psdev = &presto_table[i + PRESTO_PRIMARY_CTLCNT]; + PRESTO_FREE(psdev->procname, PROCNAME_SIZE); + } + /* presto_table[PRESTO_PRIMARY_CTLCNT].child points to the + * dev_ctl_table previously allocated in init_intermezzo_psdev() + */ + PRESTO_FREE(presto_table[PRESTO_PRIMARY_CTLCNT].child, sizeof(ctl_table) * total_entries); + +#ifdef CONFIG_PROC_FS + remove_proc_entry("mounts", proc_fs_intermezzo); + remove_proc_entry("intermezzo", proc_root_fs); +#endif +} + diff --git a/fs/intermezzo/upcall.c b/fs/intermezzo/upcall.c new file mode 100644 index 000000000..8019157dd --- /dev/null +++ b/fs/intermezzo/upcall.c @@ -0,0 +1,559 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * Copyright (C) 2001 Tacit Networks, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Mostly platform independent upcall operations to a cache manager: + * -- upcalls + * -- upcall routines + * + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "intermezzo_lib.h" +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +#include "intermezzo_idl.h" + +/* + At present: + -- Asynchronous calls: + - kml: give a "more" kml indication to userland + - kml_truncate: initiate KML truncation + - release_permit: kernel is done with permit + -- Synchronous + - open: fetch file + - permit: get a permit + + Errors returned by user level code are positive + + */ + +static struct izo_upcall_hdr *upc_pack(__u32 opcode, int pathlen, char *path, + char *fsetname, int reclen, char *rec, + int *size) +{ + struct izo_upcall_hdr *hdr; + char *ptr; + ENTRY; + + *size = sizeof(struct izo_upcall_hdr); + if ( fsetname ) { + *size += round_strlen(fsetname); + } + if ( path ) { + *size += round_strlen(path); + } + if ( rec ) { + *size += size_round(reclen); + } + PRESTO_ALLOC(hdr, *size); + if (!hdr) { + CERROR("intermezzo upcall: out of memory (opc %d)\n", opcode); + EXIT; + return NULL; + } + memset(hdr, 0, *size); + + ptr = (char *)hdr + sizeof(*hdr); + + /* XXX do we need fsuid ? */ + hdr->u_len = *size; + hdr->u_version = IZO_UPC_VERSION; + hdr->u_opc = opcode; + hdr->u_pid = current->pid; + hdr->u_uid = current->fsuid; + + if (path) { + /*XXX Robert: please review what len to pass in for + NUL terminated strings */ + hdr->u_pathlen = strlen(path); + LOGL0(path, hdr->u_pathlen, ptr); + } + if (fsetname) { + hdr->u_fsetlen = strlen(fsetname); + LOGL0(fsetname, strlen(fsetname), ptr); + } + if (rec) { + hdr->u_reclen = reclen; + LOGL(rec, reclen, ptr); + } + + EXIT; + return hdr; +} + +/* the upcalls */ +int izo_upc_kml(int minor, __u64 offset, __u32 first_recno, __u64 length, __u32 last_recno, char *fsetname) +{ + int size; + int error; + struct izo_upcall_hdr *hdr; + + ENTRY; + if (!presto_lento_up(minor)) { + EXIT; + return 0; + } + + hdr = upc_pack(IZO_UPC_KML, 0, NULL, fsetname, 0, NULL, &size); + if (!hdr || IS_ERR(hdr)) { + EXIT; + return -PTR_ERR(hdr); + } + + hdr->u_offset = offset; + hdr->u_first_recno = first_recno; + hdr->u_length = length; + hdr->u_last_recno = last_recno; + + CDEBUG(D_UPCALL, "KML: fileset %s, offset %Lu, length %Lu, " + "first %u, last %d; minor %d\n", + fsetname, + (unsigned long long) hdr->u_offset, + (unsigned long long) hdr->u_length, + hdr->u_first_recno, + hdr->u_last_recno, minor); + + error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS); + + EXIT; + return -error; +} + +int izo_upc_kml_truncate(int minor, __u64 length, __u32 last_recno, char *fsetname) +{ + int size; + int error; + struct izo_upcall_hdr *hdr; + + ENTRY; + if (!presto_lento_up(minor)) { + EXIT; + return 0; + } + + hdr = upc_pack(IZO_UPC_KML_TRUNC, 0, NULL, fsetname, 0, NULL, &size); + if (!hdr || IS_ERR(hdr)) { + EXIT; + return -PTR_ERR(hdr); + } + + hdr->u_length = length; + hdr->u_last_recno = last_recno; + + CDEBUG(D_UPCALL, "KML TRUNCATE: fileset %s, length %Lu, " + "last recno %d, minor %d\n", + fsetname, + (unsigned long long) hdr->u_length, + hdr->u_last_recno, minor); + + error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS); + + EXIT; + return error; +} + +int izo_upc_open(int minor, __u32 pathlen, char *path, char *fsetname, struct lento_vfs_context *info) +{ + int size; + int error; + struct izo_upcall_hdr *hdr; + ENTRY; + + if (!presto_lento_up(minor)) { + EXIT; + return -EIO; + } + + hdr = upc_pack(IZO_UPC_OPEN, pathlen, path, fsetname, + sizeof(*info), (char*)info, &size); + if (!hdr || IS_ERR(hdr)) { + EXIT; + return -PTR_ERR(hdr); + } + + CDEBUG(D_UPCALL, "path %s\n", path); + + error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); + if (error) + CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); + + EXIT; + return -error; +} + +int izo_upc_get_fileid(int minor, __u32 reclen, char *rec, + __u32 pathlen, char *path, char *fsetname) +{ + int size; + int error; + struct izo_upcall_hdr *hdr; + ENTRY; + + if (!presto_lento_up(minor)) { + EXIT; + return -EIO; + } + + hdr = upc_pack(IZO_UPC_GET_FILEID, pathlen, path, fsetname, reclen, rec, &size); + if (!hdr || IS_ERR(hdr)) { + EXIT; + return -PTR_ERR(hdr); + } + + CDEBUG(D_UPCALL, "path %s\n", path); + + error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); + if (error) + CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); + + EXIT; + return -error; +} + +int izo_upc_backfetch(int minor, char *path, char *fsetname, struct lento_vfs_context *info) +{ + int size; + int error; + struct izo_upcall_hdr *hdr; + ENTRY; + + if (!presto_lento_up(minor)) { + EXIT; + return -EIO; + } + + hdr = upc_pack(IZO_UPC_BACKFETCH, strlen(path), path, fsetname, + sizeof(*info), (char *)info, &size); + if (!hdr || IS_ERR(hdr)) { + EXIT; + return -PTR_ERR(hdr); + } + + /* This is currently synchronous, kml_reint_record blocks */ + error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); + if (error) + CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); + + EXIT; + return -error; +} + +int izo_upc_permit(int minor, struct dentry *dentry, __u32 pathlen, char *path, + char *fsetname) +{ + int size; + int error; + struct izo_upcall_hdr *hdr; + + ENTRY; + + hdr = upc_pack(IZO_UPC_PERMIT, pathlen, path, fsetname, 0, NULL, &size); + if (!hdr || IS_ERR(hdr)) { + EXIT; + return -PTR_ERR(hdr); + } + + CDEBUG(D_UPCALL, "Permit minor %d path %s\n", minor, path); + + error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); + + if (error == -EROFS) { + int err; + CERROR("InterMezzo: ERROR - requested permit for read-only " + "fileset.\n Setting \"%s\" read-only!\n", path); + err = izo_mark_cache(dentry, 0xFFFFFFFF, CACHE_CLIENT_RO, NULL); + if (err) + CERROR("InterMezzo ERROR: mark_cache %d\n", err); + } else if (error) { + CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); + } + + EXIT; + return error; +} + +/* This is a ping-pong upcall handled on the server when a client (uuid) + * requests the permit for itself. */ +int izo_upc_revoke_permit(int minor, char *fsetname, __u8 uuid[16]) +{ + int size; + int error; + struct izo_upcall_hdr *hdr; + + ENTRY; + + hdr = upc_pack(IZO_UPC_REVOKE_PERMIT, 0, NULL, fsetname, 0, NULL, &size); + if (!hdr || IS_ERR(hdr)) { + EXIT; + return -PTR_ERR(hdr); + } + + memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid)); + + error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); + + if (error) + CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); + + EXIT; + return -error; +} + +int izo_upc_go_fetch_kml(int minor, char *fsetname, __u8 uuid[16], + __u64 kmlsize) +{ + int size; + int error; + struct izo_upcall_hdr *hdr; + ENTRY; + + if (!presto_lento_up(minor)) { + EXIT; + return -EIO; + } + + hdr = upc_pack(IZO_UPC_GO_FETCH_KML, 0, NULL, fsetname, 0, NULL, &size); + if (!hdr || IS_ERR(hdr)) { + EXIT; + return -PTR_ERR(hdr); + } + + hdr->u_offset = kmlsize; + memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid)); + + error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS); + if (error) + CERROR("%s: error %d\n", __FUNCTION__, error); + + EXIT; + return -error; +} + +int izo_upc_connect(int minor, __u64 ip_address, __u64 port, __u8 uuid[16], + int client_flag) +{ + int size; + int error; + struct izo_upcall_hdr *hdr; + ENTRY; + + if (!presto_lento_up(minor)) { + EXIT; + return -EIO; + } + + hdr = upc_pack(IZO_UPC_CONNECT, 0, NULL, NULL, 0, NULL, &size); + if (!hdr || IS_ERR(hdr)) { + EXIT; + return -PTR_ERR(hdr); + } + + hdr->u_offset = ip_address; + hdr->u_length = port; + memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid)); + hdr->u_first_recno = client_flag; + + error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); + if (error) { + CERROR("%s: error %d\n", __FUNCTION__, error); + } + + EXIT; + return -error; +} + +int izo_upc_set_kmlsize(int minor, char *fsetname, __u8 uuid[16], __u64 kmlsize) +{ + int size; + int error; + struct izo_upcall_hdr *hdr; + ENTRY; + + if (!presto_lento_up(minor)) { + EXIT; + return -EIO; + } + + hdr = upc_pack(IZO_UPC_SET_KMLSIZE, 0, NULL, fsetname, 0, NULL, &size); + if (!hdr || IS_ERR(hdr)) { + EXIT; + return -PTR_ERR(hdr); + } + + memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid)); + hdr->u_length = kmlsize; + + error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); + if (error) + CERROR("%s: error %d\n", __FUNCTION__, error); + + EXIT; + return -error; +} + +int izo_upc_repstatus(int minor, char * fsetname, struct izo_rcvd_rec *lr_server) +{ + int size; + int error; + struct izo_upcall_hdr *hdr; + ENTRY; + + if (!presto_lento_up(minor)) { + EXIT; + return -EIO; + } + + hdr = upc_pack(IZO_UPC_REPSTATUS, 0, NULL, fsetname, + sizeof(*lr_server), (char*)lr_server, + &size); + if (!hdr || IS_ERR(hdr)) { + EXIT; + return -PTR_ERR(hdr); + } + + error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); + if (error) + CERROR("%s: error %d\n", __FUNCTION__, error); + + EXIT; + return -error; +} + + +#if 0 +int izo_upc_client_make_branch(int minor, char *fsetname, char *tagname, + char *branchname) +{ + int size, error; + struct izo_upcall_hdr *hdr; + int pathlen; + char *path; + ENTRY; + + hdr = upc_pack(IZO_UPC_CLIENT_MAKE_BRANCH, strlen(tagname), tagname, + fsetname, strlen(branchname) + 1, branchname, &size); + if (!hdr || IS_ERR(hdr)) { + error = -PTR_ERR(hdr); + goto error; + } + + error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); + if (error) + CERROR("InterMezzo: error %d\n", error); + + error: + PRESTO_FREE(path, pathlen); + EXIT; + return error; +} +#endif + +int izo_upc_server_make_branch(int minor, char *fsetname) +{ + int size, error; + struct izo_upcall_hdr *hdr; + ENTRY; + + hdr = upc_pack(IZO_UPC_SERVER_MAKE_BRANCH, 0, NULL, fsetname, 0, NULL, &size); + if (!hdr || IS_ERR(hdr)) { + error = -PTR_ERR(hdr); + goto error; + } + + error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); + if (error) + CERROR("InterMezzo: error %d\n", error); + + error: + EXIT; + return -error; +} + +int izo_upc_branch_undo(int minor, char *fsetname, char *branchname) +{ + int size; + int error; + struct izo_upcall_hdr *hdr; + ENTRY; + + if (!presto_lento_up(minor)) { + EXIT; + return -EIO; + } + + hdr = upc_pack(IZO_UPC_BRANCH_UNDO, strlen(branchname), branchname, + fsetname, 0, NULL, &size); + if (!hdr || IS_ERR(hdr)) { + EXIT; + return -PTR_ERR(hdr); + } + + error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); + if (error) + CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); + + EXIT; + return -error; +} + +int izo_upc_branch_redo(int minor, char *fsetname, char *branchname) +{ + int size; + int error; + struct izo_upcall_hdr *hdr; + ENTRY; + + if (!presto_lento_up(minor)) { + EXIT; + return -EIO; + } + + hdr = upc_pack(IZO_UPC_BRANCH_REDO, strlen(branchname) + 1, branchname, + fsetname, 0, NULL, &size); + if (!hdr || IS_ERR(hdr)) { + EXIT; + return -PTR_ERR(hdr); + } + + error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS); + if (error) + CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error); + + EXIT; + return -error; +} diff --git a/fs/intermezzo/vfs.c b/fs/intermezzo/vfs.c new file mode 100644 index 000000000..84b5882a5 --- /dev/null +++ b/fs/intermezzo/vfs.c @@ -0,0 +1,2416 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * Copyright (C) 2000 Stelias Computing, Inc. + * Copyright (C) 2000 Red Hat, Inc. + * + * This file is part of InterMezzo, http://www.inter-mezzo.org. + * + * InterMezzo is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * InterMezzo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with InterMezzo; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * vfs.c + * + * This file implements kernel downcalls from lento. + * + * Author: Rob Simmonds + * Andreas Dilger + * Copyright (C) 2000 Stelias Computing Inc + * Copyright (C) 2000 Red Hat Inc. + * + * Extended attribute support + * Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc. + * + * This code is based on code from namei.c in the linux file system; + * see copyright notice below. + */ + +/** namei.c copyright **/ + +/* + * linux/fs/namei.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ +/* + * Some corrections by tytso. + */ + +/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname + * lookup logic. + */ + +/** end of namei.c copyright **/ + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "intermezzo_fs.h" +#include "intermezzo_psdev.h" + +#ifdef CONFIG_FS_EXT_ATTR +# include + +# if 0 /* was a broken check for Posix ACLs */ +# include +# endif +#endif + +extern struct inode_operations presto_sym_iops; + +/* Write the last_rcvd values to the last_rcvd file. We don't know what the + * UUID or last_ctime values are, so we have to read from the file first + * (sigh). + * exported for branch_reinter in kml_reint.c*/ +int presto_write_last_rcvd(struct rec_info *recinfo, + struct presto_file_set *fset, + struct lento_vfs_context *info) +{ + int rc; + struct izo_rcvd_rec rcvd_rec; + + ENTRY; + + memset(&rcvd_rec, 0, sizeof(rcvd_rec)); + memcpy(rcvd_rec.lr_uuid, info->uuid, sizeof(rcvd_rec.lr_uuid)); + rcvd_rec.lr_remote_recno = HTON__u64(info->recno); + rcvd_rec.lr_remote_offset = HTON__u64(info->kml_offset); + rcvd_rec.lr_local_recno = HTON__u64(recinfo->recno); + rcvd_rec.lr_local_offset = HTON__u64(recinfo->offset + recinfo->size); + + rc = izo_rcvd_write(fset, &rcvd_rec); + if (rc < 0) { + /* izo_rcvd_write returns negative errors and non-negative + * offsets */ + CERROR("InterMezzo: izo_rcvd_write failed: %d\n", rc); + EXIT; + return rc; + } + EXIT; + return 0; +} + +/* + * It's inline, so penalty for filesystems that don't use sticky bit is + * minimal. + */ +static inline int check_sticky(struct inode *dir, struct inode *inode) +{ + if (!(dir->i_mode & S_ISVTX)) + return 0; + if (inode->i_uid == current->fsuid) + return 0; + if (dir->i_uid == current->fsuid) + return 0; + return !capable(CAP_FOWNER); +} + +/* from linux/fs/namei.c */ +static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) +{ + int error; + if (!victim->d_inode || victim->d_parent->d_inode != dir) + return -ENOENT; + error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); + if (error) + return error; + if (IS_APPEND(dir)) + return -EPERM; + if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| + IS_IMMUTABLE(victim->d_inode)) + return -EPERM; + if (isdir) { + if (!S_ISDIR(victim->d_inode->i_mode)) + return -ENOTDIR; + if (IS_ROOT(victim)) + return -EBUSY; + } else if (S_ISDIR(victim->d_inode->i_mode)) + return -EISDIR; + return 0; +} + +/* from linux/fs/namei.c */ +static inline int may_create(struct inode *dir, struct dentry *child) { + if (child->d_inode) + return -EEXIST; + if (IS_DEADDIR(dir)) + return -ENOENT; + return permission(dir,MAY_WRITE | MAY_EXEC, NULL); +} + +#ifdef PRESTO_DEBUG +/* The loop_discard_io() function is available via a kernel patch to the + * loop block device. It "works" by accepting writes, but throwing them + * away, rather than trying to write them to disk. The old method worked + * by setting the underlying device read-only, but that has the problem + * that dirty buffers are kept in memory, and ext3 didn't like that at all. + */ +#ifdef CONFIG_LOOP_DISCARD +#define BLKDEV_FAIL(dev,fail) loop_discard_io(dev,fail) +#else +#define BLKDEV_FAIL(dev,fail) set_device_ro(dev, 1) +#endif + +/* If a breakpoint has been set via /proc/sys/intermezzo/intermezzoX/errorval, + * that is the same as "value", the underlying device will "fail" now. + */ +inline void presto_debug_fail_blkdev(struct presto_file_set *fset, + unsigned long value) +{ + int minor = presto_f2m(fset); + int errorval = izo_channels[minor].uc_errorval; + struct block_device *bdev = fset->fset_dentry->d_inode->i_sb->s_bdev; + char b[BDEVNAME_SIZE]; + + if (errorval && errorval == (long)value && !bdev_read_only(bdev)) { + CDEBUG(D_SUPER, "setting device %s read only\n", + bdevname(bdev, b)); + BLKDEV_FAIL(bdev, 1); + izo_channels[minor].uc_errorval = -bdev->bd_dev; + } +} +#else +#define presto_debug_fail_blkdev(dev,value) do {} while (0) +#endif + + +static inline int presto_do_kml(struct lento_vfs_context *info, + struct dentry *dentry) +{ + if ( ! (info->flags & LENTO_FL_KML) ) + return 0; + if ( presto_chk(dentry, PRESTO_DONT_JOURNAL) ) + return 0; + return 1; +} + +static inline int presto_do_rcvd(struct lento_vfs_context *info, + struct dentry *dentry) +{ + if ( ! (info->flags & LENTO_FL_EXPECT) ) + return 0; + if ( presto_chk(dentry, PRESTO_DONT_JOURNAL) ) + return 0; + return 1; +} + + +/* XXX fixme: this should not fail, all these dentries are in memory + when _we_ call this */ +int presto_settime(struct presto_file_set *fset, + struct dentry *newobj, + struct dentry *parent, + struct dentry *target, + struct lento_vfs_context *ctx, + int valid) +{ + int error = 0; + struct dentry *dentry; + struct inode *inode; + struct inode_operations *iops; + struct iattr iattr; + + ENTRY; + if (ctx->flags & LENTO_FL_IGNORE_TIME ) { + EXIT; + return 0; + } + + iattr.ia_ctime = ctx->updated_time; + iattr.ia_mtime = ctx->updated_time; + iattr.ia_valid = valid; + + while (1) { + if (parent && ctx->flags & LENTO_FL_TOUCH_PARENT) { + dentry = parent; + parent = NULL; + } else if (newobj && ctx->flags & LENTO_FL_TOUCH_NEWOBJ) { + dentry = newobj; + newobj = NULL; + } else if (target) { + dentry = target; + target = NULL; + } else + break; + + inode = dentry->d_inode; + + error = -EROFS; + if (IS_RDONLY(inode)) { + EXIT; + return -EROFS; + } + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + EXIT; + return -EPERM; + } + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops) { + EXIT; + return error; + } + + if (iops->setattr != NULL) + error = iops->setattr(dentry, &iattr); + else { + error = 0; + inode_setattr(dentry->d_inode, &iattr); + } + } + EXIT; + return error; +} + +void izo_get_rollback_data(struct inode *inode, struct izo_rollback_data *rb) +{ + rb->rb_mode = (__u32)inode->i_mode; + rb->rb_rdev = (__u32)old_encode_dev(inode->i_rdev); + rb->rb_uid = (__u64)inode->i_uid; + rb->rb_gid = (__u64)inode->i_gid; +} + + +int presto_do_close(struct presto_file_set *fset, struct file *file) +{ + struct rec_info rec; + int rc = -ENOSPC; + void *handle; + struct inode *inode = file->f_dentry->d_inode; + struct presto_file_data *fdata = + (struct presto_file_data *)file->private_data; + + ENTRY; + presto_getversion(&fdata->fd_info.remote_version, inode); + + rc = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); + if (rc) { + EXIT; + return rc; + } + + handle = presto_trans_start(fset, file->f_dentry->d_inode, + KML_OPCODE_RELEASE); + if ( IS_ERR(handle) ) { + CERROR("presto_release: no space for transaction\n"); + return rc; + } + + if (fdata->fd_info.flags & LENTO_FL_KML) + rc = presto_journal_close(&rec, fset, fdata, file->f_dentry, + &fdata->fd_version, + &fdata->fd_info.remote_version); + if (rc) { + CERROR("presto_close: cannot journal close\n"); + goto out; + } + + if (fdata->fd_info.flags & LENTO_FL_EXPECT) + rc = presto_write_last_rcvd(&rec, fset, &fdata->fd_info); + + if (rc) { + CERROR("presto_close: cannot journal last_rcvd\n"); + goto out; + } + presto_trans_commit(fset, handle); + + /* cancel the LML record */ + handle = presto_trans_start(fset, inode, KML_OPCODE_WRITE); + if ( IS_ERR(handle) ) { + CERROR("presto_release: no space for clear\n"); + return -ENOSPC; + } + + rc = presto_clear_lml_close(fset, fdata->fd_lml_offset); + if (rc < 0 ) { + CERROR("presto_close: cannot journal close\n"); + goto out; + } + presto_truncate_lml(fset); + + out: + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + presto_trans_commit(fset, handle); + EXIT; + return rc; +} + +int presto_do_setattr(struct presto_file_set *fset, struct dentry *dentry, + struct iattr *iattr, struct lento_vfs_context *info) +{ + struct rec_info rec; + struct inode *inode = dentry->d_inode; + struct inode_operations *iops; + int error; + struct presto_version old_ver, new_ver; + struct izo_rollback_data rb; + void *handle; + loff_t old_size=inode->i_size; + + ENTRY; + error = -EROFS; + if (IS_RDONLY(inode)) { + EXIT; + return -EROFS; + } + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + EXIT; + return -EPERM; + } + + presto_getversion(&old_ver, dentry->d_inode); + izo_get_rollback_data(dentry->d_inode, &rb); + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + + error = presto_reserve_space(fset->fset_cache, 2*PRESTO_REQHIGH); + if (error) { + EXIT; + return error; + } + + if (iattr->ia_valid & ATTR_SIZE) { + if (izo_mark_dentry(dentry, ~PRESTO_DATA, 0, NULL) != 0) + CERROR("izo_mark_dentry(inode %ld, ~PRESTO_DATA) " + "failed\n", dentry->d_inode->i_ino); + handle = presto_trans_start(fset, dentry->d_inode, + KML_OPCODE_TRUNC); + } else { + handle = presto_trans_start(fset, dentry->d_inode, + KML_OPCODE_SETATTR); + } + + if ( IS_ERR(handle) ) { + CERROR("presto_do_setattr: no space for transaction\n"); + presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH); + return -ENOSPC; + } + + if (dentry->d_inode && iops && iops->setattr) { + error = iops->setattr(dentry, iattr); + } else { + error = inode_change_ok(dentry->d_inode, iattr); + if (!error) + inode_setattr(inode, iattr); + } + + if (!error && (iattr->ia_valid & ATTR_SIZE)) + vmtruncate(inode, iattr->ia_size); + + if (error) { + EXIT; + goto exit; + } + + presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x10); + + if ( presto_do_kml(info, dentry) ) { + if ((iattr->ia_valid & ATTR_SIZE) && (old_size != inode->i_size)) { + /* Journal a close whenever we see a potential truncate + * At the receiving end, lento should explicitly remove + * ATTR_SIZE from the list of valid attributes */ + presto_getversion(&new_ver, inode); + error = presto_journal_close(&rec, fset, NULL, dentry, + &old_ver, &new_ver); + } + + if (!error) + error = presto_journal_setattr(&rec, fset, dentry, + &old_ver, &rb, iattr); + } + + presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x20); + if ( presto_do_rcvd(info, dentry) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x30); + + EXIT; +exit: + presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH); + presto_trans_commit(fset, handle); + return error; +} + +int lento_setattr(const char *name, struct iattr *iattr, + struct lento_vfs_context *info) +{ + struct nameidata nd; + struct dentry *dentry; + struct presto_file_set *fset; + int error; +#if 0 /* was a broken check for Posix ACLs */ + int (*set_posix_acl)(struct inode *, int type, posix_acl_t *)=NULL; +#endif + + ENTRY; + CDEBUG(D_PIOCTL,"name %s, valid %#x, mode %#o, uid %d, gid %d, size %Ld\n", + name, iattr->ia_valid, iattr->ia_mode, iattr->ia_uid, + iattr->ia_gid, iattr->ia_size); + CDEBUG(D_PIOCTL, "atime %#lx, mtime %#lx, ctime %#lx, attr_flags %#x\n", + iattr->ia_atime.tv_sec, iattr->ia_mtime.tv_sec, iattr->ia_ctime.tv_sec, + iattr->ia_attr_flags); + CDEBUG(D_PIOCTL, "offset %d, recno %d, flags %#x\n", + info->slot_offset, info->recno, info->flags); + + lock_kernel(); + error = presto_walk(name, &nd); + if (error) { + EXIT; + goto exit; + } + dentry = nd.dentry; + + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + CERROR("No fileset!\n"); + EXIT; + goto exit_lock; + } + + /* NOTE: this prevents us from changing the filetype on setattr, + * as we normally only want to change permission bits. + * If this is not correct, then we need to fix the perl code + * to always send the file type OR'ed with the permission. + */ + if (iattr->ia_valid & ATTR_MODE) { + int set_mode = iattr->ia_mode; + iattr->ia_mode = (iattr->ia_mode & S_IALLUGO) | + (dentry->d_inode->i_mode & ~S_IALLUGO); + CDEBUG(D_PIOCTL, "chmod: orig %#o, set %#o, result %#o\n", + dentry->d_inode->i_mode, set_mode, iattr->ia_mode); +#if 0 /* was a broken check for Posix ACLs */ + /* ACl code interacts badly with setattr + * since it tries to modify the ACL using + * set_ext_attr which recurses back into presto. + * This only happens if ATTR_MODE is set. + * Here we are doing a "forced" mode set + * (initiated by lento), so we disable the + * set_posix_acl operation which + * prevents such recursion. -SHP + * + * This will probably still be required when native + * acl journalling is in place. + */ + set_posix_acl=dentry->d_inode->i_op->set_posix_acl; + dentry->d_inode->i_op->set_posix_acl=NULL; +#endif + } + + error = presto_do_setattr(fset, dentry, iattr, info); + + if (info->flags & LENTO_FL_SET_DDFILEID) { + struct presto_dentry_data *dd = presto_d2d(dentry); + if (dd) { + dd->remote_ino = info->remote_ino; + dd->remote_generation = info->remote_generation; + } + } + +#if 0 /* was a broken check for Posix ACLs */ + /* restore the inode_operations if we changed them*/ + if (iattr->ia_valid & ATTR_MODE) + dentry->d_inode->i_op->set_posix_acl=set_posix_acl; +#endif + + + EXIT; +exit_lock: + path_release(&nd); +exit: + unlock_kernel(); + return error; +} + +int presto_do_create(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, int mode, + struct lento_vfs_context *info) +{ + struct rec_info rec; + int error; + struct presto_version tgt_dir_ver, new_file_ver; + struct inode_operations *iops; + void *handle; + + ENTRY; + mode &= S_IALLUGO; + mode |= S_IFREG; + + // down(&dir->d_inode->i_zombie); + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); + if (error) { + EXIT; + // up(&dir->d_inode->i_zombie); + return error; + } + + error = may_create(dir->d_inode, dentry); + if (error) { + EXIT; + goto exit_pre_lock; + } + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops->create) { + EXIT; + goto exit_pre_lock; + } + + presto_getversion(&tgt_dir_ver, dir->d_inode); + handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_CREATE); + if ( IS_ERR(handle) ) { + EXIT; + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + CERROR("presto_do_create: no space for transaction\n"); + error=-ENOSPC; + goto exit_pre_lock; + } + DQUOT_INIT(dir->d_inode); + lock_kernel(); + error = iops->create(dir->d_inode, dentry, mode, NULL); + if (error) { + EXIT; + goto exit_lock; + } + + if (dentry->d_inode) { + struct presto_cache *cache = fset->fset_cache; + /* was this already done? */ + presto_set_ops(dentry->d_inode, cache->cache_filter); + + filter_setup_dentry_ops(cache->cache_filter, + dentry->d_op, + &presto_dentry_ops); + dentry->d_op = filter_c2udops(cache->cache_filter); + + /* if Lento creates this file, we won't have data */ + if ( ISLENTO(presto_c2m(cache)) ) { + presto_set(dentry, PRESTO_ATTR); + } else { + presto_set(dentry, PRESTO_ATTR | PRESTO_DATA); + } + } + + info->flags |= LENTO_FL_TOUCH_PARENT; + error = presto_settime(fset, NULL, dir, dentry, + info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit_lock; + } + + presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x10); + + if ( presto_do_kml(info, dentry) ) { + presto_getversion(&new_file_ver, dentry->d_inode); + error = presto_journal_create(&rec, fset, dentry, &tgt_dir_ver, + &new_file_ver, + dentry->d_inode->i_mode); + } + + presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x20); + + if ( presto_do_rcvd(info, dentry) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x30); + + /* add inode dentry */ + if (fset->fset_cache->cache_filter->o_trops->tr_add_ilookup ) { + struct dentry *d; + d = fset->fset_cache->cache_filter->o_trops->tr_add_ilookup + (dir->d_inode->i_sb->s_root, dentry); + } + + EXIT; + + exit_lock: + unlock_kernel(); + presto_trans_commit(fset, handle); + exit_pre_lock: + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + // up(&dir->d_inode->i_zombie); + return error; +} + +int lento_create(const char *name, int mode, struct lento_vfs_context *info) +{ + int error; + struct nameidata nd; + char * pathname; + struct dentry *dentry; + struct presto_file_set *fset; + + ENTRY; + pathname = getname(name); + error = PTR_ERR(pathname); + if (IS_ERR(pathname)) { + EXIT; + goto exit; + } + + /* this looks up the parent */ + error = path_lookup(pathname, LOOKUP_PARENT, &nd); + if (error) { + EXIT; + goto exit; + } + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) { + EXIT; + goto exit_lock; + } + + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + CERROR("No fileset!\n"); + EXIT; + goto exit_lock; + } + error = presto_do_create(fset, dentry->d_parent, dentry, (mode&S_IALLUGO)|S_IFREG, + info); + + EXIT; + + exit_lock: + path_release (&nd); + dput(dentry); + up(&dentry->d_parent->d_inode->i_sem); + putname(pathname); +exit: + return error; +} + +int presto_do_link(struct presto_file_set *fset, struct dentry *old_dentry, + struct dentry *dir, struct dentry *new_dentry, + struct lento_vfs_context *info) +{ + struct rec_info rec; + struct inode *inode; + int error; + struct inode_operations *iops; + struct presto_version tgt_dir_ver; + struct presto_version new_link_ver; + void *handle; + + // down(&dir->d_inode->i_zombie); + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); + if (error) { + EXIT; + // up(&dir->d_inode->i_zombie); + return error; + } + error = -ENOENT; + inode = old_dentry->d_inode; + if (!inode) + goto exit_lock; + + error = may_create(dir->d_inode, new_dentry); + if (error) + goto exit_lock; + + error = -EXDEV; + if (dir->d_inode->i_sb != inode->i_sb) + goto exit_lock; + + /* + * A link to an append-only or immutable file cannot be created. + */ + error = -EPERM; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { + EXIT; + goto exit_lock; + } + + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops->link) { + EXIT; + goto exit_lock; + } + + + presto_getversion(&tgt_dir_ver, dir->d_inode); + handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_LINK); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + CERROR("presto_do_link: no space for transaction\n"); + return -ENOSPC; + } + + DQUOT_INIT(dir->d_inode); + lock_kernel(); + error = iops->link(old_dentry, dir->d_inode, new_dentry); + unlock_kernel(); + if (error) { + EXIT; + goto exit_lock; + } + + /* link dd data to that of existing dentry */ + old_dentry->d_op->d_release(new_dentry); + if (!presto_d2d(old_dentry)) + BUG(); + presto_d2d(old_dentry)->dd_count++; + + new_dentry->d_fsdata = presto_d2d(old_dentry); + + info->flags |= LENTO_FL_TOUCH_PARENT; + error = presto_settime(fset, NULL, dir, new_dentry, + info, ATTR_CTIME); + if (error) { + EXIT; + goto exit_lock; + } + + presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x10); + presto_getversion(&new_link_ver, new_dentry->d_inode); + if ( presto_do_kml(info, old_dentry) ) + error = presto_journal_link(&rec, fset, old_dentry, new_dentry, + &tgt_dir_ver, &new_link_ver); + + presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x20); + if ( presto_do_rcvd(info, old_dentry) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x30); + EXIT; + presto_trans_commit(fset, handle); +exit_lock: + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + // up(&dir->d_inode->i_zombie); + return error; +} + + +int lento_link(const char * oldname, const char * newname, + struct lento_vfs_context *info) +{ + int error; + char * to; + struct presto_file_set *fset; + + to = getname(newname); + error = PTR_ERR(to); + if (!IS_ERR(to)) { + struct dentry *new_dentry; + struct nameidata nd, old_nd; + + error = __user_walk(oldname, 0, &old_nd); + if (error) + goto exit; + error = path_lookup(to, LOOKUP_PARENT, &nd); + if (error) + goto out; + error = -EXDEV; + if (old_nd.mnt != nd.mnt) + goto out; + new_dentry = lookup_create(&nd, 0); + error = PTR_ERR(new_dentry); + + if (!IS_ERR(new_dentry)) { + fset = presto_fset(new_dentry); + error = -EINVAL; + if ( !fset ) { + CERROR("No fileset!\n"); + EXIT; + goto out2; + } + error = presto_do_link(fset, old_nd.dentry, + nd.dentry, + new_dentry, info); + dput(new_dentry); + } + out2: + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); + out: + path_release(&old_nd); + exit: + putname(to); + } + return error; +} + +int presto_do_unlink(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, struct lento_vfs_context *info) +{ + struct rec_info rec; + struct inode_operations *iops; + struct presto_version tgt_dir_ver, old_file_ver; + struct izo_rollback_data rb; + void *handle; + int do_kml = 0, do_rcvd = 0, linkno = 0, error, old_targetlen = 0; + char *old_target = NULL; + + ENTRY; + // down(&dir->d_inode->i_zombie); + error = may_delete(dir->d_inode, dentry, 0); + if (error) { + EXIT; + // up(&dir->d_inode->i_zombie); + return error; + } + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops->unlink) { + EXIT; + // up(&dir->d_inode->i_zombie); + return error; + } + + error = presto_reserve_space(fset->fset_cache, PRESTO_REQLOW); + if (error) { + EXIT; + // up(&dir->d_inode->i_zombie); + return error; + } + + + if (presto_d2d(dentry)) { + struct presto_dentry_data *dd = presto_d2d(dentry); + struct dentry *de = dd->dd_inodentry; + if (de && dentry->d_inode->i_nlink == 1) { + dd->dd_count--; + dd->dd_inodentry = NULL; + de->d_fsdata = NULL; + atomic_dec(&de->d_inode->i_count); + de->d_inode = NULL; + dput(de); + } + } + + presto_getversion(&tgt_dir_ver, dir->d_inode); + presto_getversion(&old_file_ver, dentry->d_inode); + izo_get_rollback_data(dentry->d_inode, &rb); + handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_UNLINK); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, PRESTO_REQLOW); + CERROR("ERROR: presto_do_unlink: no space for transaction. Tell Peter.\n"); + // up(&dir->d_inode->i_zombie); + return -ENOSPC; + } + DQUOT_INIT(dir->d_inode); + if (d_mountpoint(dentry)) + error = -EBUSY; + else { + lock_kernel(); + linkno = dentry->d_inode->i_nlink; + if (linkno > 1) { + dget(dentry); + } + + if (S_ISLNK(dentry->d_inode->i_mode)) { + mm_segment_t old_fs; + struct inode_operations *riops; + riops = filter_c2csiops(fset->fset_cache->cache_filter); + + PRESTO_ALLOC(old_target, PATH_MAX); + if (old_target == NULL) { + error = -ENOMEM; + EXIT; + goto exit; + } + + old_fs = get_fs(); + set_fs(get_ds()); + + if (riops->readlink == NULL) + CERROR("InterMezzo %s: no readlink iops.\n", + __FUNCTION__); + else + old_targetlen = + riops->readlink(dentry, old_target, + PATH_MAX); + if (old_targetlen < 0) { + CERROR("InterMezzo: readlink failed: %ld\n", + PTR_ERR(old_target)); + PRESTO_FREE(old_target, PATH_MAX); + old_target = NULL; + old_targetlen = 0; + } + set_fs(old_fs); + } + + do_kml = presto_do_kml(info, dir); + do_rcvd = presto_do_rcvd(info, dir); + error = iops->unlink(dir->d_inode, dentry); + unlock_kernel(); + } + + if (linkno > 1) { + /* FIXME: Combine this with the next call? */ + error = presto_settime(fset, NULL, NULL, dentry, + info, ATTR_CTIME); + dput(dentry); + if (error) { + EXIT; + goto exit; + } + } + + error = presto_settime(fset, NULL, NULL, dir, + info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit; + } + + // up(&dir->d_inode->i_zombie); + + presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x10); + if ( do_kml ) + error = presto_journal_unlink(&rec, fset, dir, &tgt_dir_ver, + &old_file_ver, &rb, dentry, + old_target, old_targetlen); + presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x20); + if ( do_rcvd ) { + error = presto_write_last_rcvd(&rec, fset, info); + } + presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x30); + EXIT; +exit: + presto_release_space(fset->fset_cache, PRESTO_REQLOW); + presto_trans_commit(fset, handle); + if (old_target != NULL) + PRESTO_FREE(old_target, PATH_MAX); + return error; +} + + +int lento_unlink(const char *pathname, struct lento_vfs_context *info) +{ + int error = 0; + char * name; + struct dentry *dentry; + struct nameidata nd; + struct presto_file_set *fset; + + ENTRY; + + name = getname(pathname); + if(IS_ERR(name)) + return PTR_ERR(name); + + error = path_lookup(name, LOOKUP_PARENT, &nd); + if (error) + goto exit; + error = -EISDIR; + if (nd.last_type != LAST_NORM) + goto exit1; + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + CERROR("No fileset!\n"); + EXIT; + goto exit2; + } + /* Why not before? Because we want correct error value */ + if (nd.last.name[nd.last.len]) + goto slashes; + error = presto_do_unlink(fset, nd.dentry, dentry, info); + if (!error) + d_delete(dentry); + exit2: + EXIT; + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); +exit1: + path_release(&nd); +exit: + putname(name); + + return error; + +slashes: + error = !dentry->d_inode ? -ENOENT : + S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; + goto exit2; +} + +int presto_do_symlink(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, const char *oldname, + struct lento_vfs_context *info) +{ + struct rec_info rec; + int error; + struct presto_version tgt_dir_ver, new_link_ver; + struct inode_operations *iops; + void *handle; + + ENTRY; + // down(&dir->d_inode->i_zombie); + /* record + max path len + space to free */ + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096); + if (error) { + EXIT; + // up(&dir->d_inode->i_zombie); + return error; + } + + error = may_create(dir->d_inode, dentry); + if (error) { + EXIT; + goto exit_lock; + } + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops->symlink) { + EXIT; + goto exit_lock; + } + + presto_getversion(&tgt_dir_ver, dir->d_inode); + handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_SYMLINK); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); + CERROR("ERROR: presto_do_symlink: no space for transaction. Tell Peter.\n"); + EXIT; + // up(&dir->d_inode->i_zombie); + return -ENOSPC; + } + DQUOT_INIT(dir->d_inode); + lock_kernel(); + error = iops->symlink(dir->d_inode, dentry, oldname); + if (error) { + EXIT; + goto exit; + } + + if (dentry->d_inode) { + struct presto_cache *cache = fset->fset_cache; + + presto_set_ops(dentry->d_inode, cache->cache_filter); + + filter_setup_dentry_ops(cache->cache_filter, dentry->d_op, + &presto_dentry_ops); + dentry->d_op = filter_c2udops(cache->cache_filter); + /* XXX ? Cache state ? if Lento creates a symlink */ + if ( ISLENTO(presto_c2m(cache)) ) { + presto_set(dentry, PRESTO_ATTR); + } else { + presto_set(dentry, PRESTO_ATTR | PRESTO_DATA); + } + } + + info->flags |= LENTO_FL_TOUCH_PARENT; + error = presto_settime(fset, NULL, dir, dentry, + info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit; + } + + presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x10); + presto_getversion(&new_link_ver, dentry->d_inode); + if ( presto_do_kml(info, dentry) ) + error = presto_journal_symlink(&rec, fset, dentry, oldname, + &tgt_dir_ver, &new_link_ver); + + presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x20); + if ( presto_do_rcvd(info, dentry) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x30); + EXIT; +exit: + unlock_kernel(); + presto_trans_commit(fset, handle); + exit_lock: + presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); + // up(&dir->d_inode->i_zombie); + return error; +} + +int lento_symlink(const char *oldname, const char *newname, + struct lento_vfs_context *info) +{ + int error; + char *from; + char *to; + struct dentry *dentry; + struct presto_file_set *fset; + struct nameidata nd; + + ENTRY; + lock_kernel(); + from = getname(oldname); + error = PTR_ERR(from); + if (IS_ERR(from)) { + EXIT; + goto exit; + } + + to = getname(newname); + error = PTR_ERR(to); + if (IS_ERR(to)) { + EXIT; + goto exit_from; + } + + error = path_lookup(to, LOOKUP_PARENT, &nd); + if (error) { + EXIT; + goto exit_to; + } + + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) { + path_release(&nd); + EXIT; + goto exit_to; + } + + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + CERROR("No fileset!\n"); + path_release(&nd); + EXIT; + goto exit_lock; + } + error = presto_do_symlink(fset, nd.dentry, + dentry, from, info); + path_release(&nd); + EXIT; + exit_lock: + up(&nd.dentry->d_inode->i_sem); + dput(dentry); + exit_to: + putname(to); + exit_from: + putname(from); + exit: + unlock_kernel(); + return error; +} + +int presto_do_mkdir(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, int mode, + struct lento_vfs_context *info) +{ + struct rec_info rec; + int error; + struct presto_version tgt_dir_ver, new_dir_ver; + void *handle; + + ENTRY; + // down(&dir->d_inode->i_zombie); + + /* one journal record + directory block + room for removals*/ + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096); + if (error) { + EXIT; + // up(&dir->d_inode->i_zombie); + return error; + } + + error = may_create(dir->d_inode, dentry); + if (error) { + EXIT; + goto exit_lock; + } + + error = -EPERM; + if (!filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir) { + EXIT; + goto exit_lock; + } + + error = -ENOSPC; + presto_getversion(&tgt_dir_ver, dir->d_inode); + handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_MKDIR); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); + CERROR("presto_do_mkdir: no space for transaction\n"); + goto exit_lock; + } + + DQUOT_INIT(dir->d_inode); + mode &= (S_IRWXUGO|S_ISVTX); + lock_kernel(); + error = filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir(dir->d_inode, dentry, mode); + if (error) { + EXIT; + goto exit; + } + + if ( dentry->d_inode && !error) { + struct presto_cache *cache = fset->fset_cache; + + presto_set_ops(dentry->d_inode, cache->cache_filter); + + filter_setup_dentry_ops(cache->cache_filter, + dentry->d_op, + &presto_dentry_ops); + dentry->d_op = filter_c2udops(cache->cache_filter); + /* if Lento does this, we won't have data */ + if ( ISLENTO(presto_c2m(cache)) ) { + presto_set(dentry, PRESTO_ATTR); + } else { + presto_set(dentry, PRESTO_ATTR | PRESTO_DATA); + } + } + + info->flags |= LENTO_FL_TOUCH_PARENT; + error = presto_settime(fset, NULL, dir, dentry, + info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit; + } + + presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x10); + presto_getversion(&new_dir_ver, dentry->d_inode); + if ( presto_do_kml(info, dir) ) + error = presto_journal_mkdir(&rec, fset, dentry, &tgt_dir_ver, + &new_dir_ver, + dentry->d_inode->i_mode); + + presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x20); + if ( presto_do_rcvd(info, dentry) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x30); + EXIT; +exit: + unlock_kernel(); + presto_trans_commit(fset, handle); + exit_lock: + presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096); + // up(&dir->d_inode->i_zombie); + return error; +} + +/* + * Look out: this function may change a normal dentry + * into a directory dentry (different size).. + */ +int lento_mkdir(const char *name, int mode, struct lento_vfs_context *info) +{ + int error; + char *pathname; + struct dentry *dentry; + struct presto_file_set *fset; + struct nameidata nd; + + ENTRY; + CDEBUG(D_PIOCTL, "name: %s, mode %o, offset %d, recno %d, flags %x\n", + name, mode, info->slot_offset, info->recno, info->flags); + pathname = getname(name); + error = PTR_ERR(pathname); + if (IS_ERR(pathname)) { + EXIT; + return error; + } + + error = path_lookup(pathname, LOOKUP_PARENT, &nd); + if (error) + goto out_name; + + dentry = lookup_create(&nd, 1); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + fset = presto_fset(dentry); + error = -EINVAL; + if (!fset) { + CERROR("No fileset!\n"); + EXIT; + goto out_dput; + } + + error = presto_do_mkdir(fset, nd.dentry, dentry, + mode & S_IALLUGO, info); +out_dput: + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); +out_name: + EXIT; + putname(pathname); + CDEBUG(D_PIOCTL, "error: %d\n", error); + return error; +} + +static void d_unhash(struct dentry *dentry) +{ + dget(dentry); + switch (atomic_read(&dentry->d_count)) { + default: + shrink_dcache_parent(dentry); + if (atomic_read(&dentry->d_count) != 2) + break; + case 2: + d_drop(dentry); + } +} + +int presto_do_rmdir(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, struct lento_vfs_context *info) +{ + struct rec_info rec; + int error; + struct presto_version tgt_dir_ver, old_dir_ver; + struct izo_rollback_data rb; + struct inode_operations *iops; + void *handle; + int do_kml, do_rcvd; + int size; + + ENTRY; + error = may_delete(dir->d_inode, dentry, 1); + if (error) + return error; + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops->rmdir) { + EXIT; + return error; + } + + size = PRESTO_REQHIGH - dentry->d_inode->i_size; + error = presto_reserve_space(fset->fset_cache, size); + if (error) { + EXIT; + return error; + } + + presto_getversion(&tgt_dir_ver, dir->d_inode); + presto_getversion(&old_dir_ver, dentry->d_inode); + izo_get_rollback_data(dentry->d_inode, &rb); + handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_RMDIR); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, size); + CERROR("ERROR: presto_do_rmdir: no space for transaction. Tell Peter.\n"); + return -ENOSPC; + } + + DQUOT_INIT(dir->d_inode); + + do_kml = presto_do_kml(info, dir); + do_rcvd = presto_do_rcvd(info, dir); + + // double_down(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie); + d_unhash(dentry); + if (IS_DEADDIR(dir->d_inode)) + error = -ENOENT; + else if (d_mountpoint(dentry)) { + CERROR("foo: d_mountpoint(dentry): ino %ld\n", + dentry->d_inode->i_ino); + error = -EBUSY; + } else { + lock_kernel(); + error = iops->rmdir(dir->d_inode, dentry); + unlock_kernel(); + if (!error) { + dentry->d_inode->i_flags |= S_DEAD; + error = presto_settime(fset, NULL, NULL, dir, info, + ATTR_CTIME | ATTR_MTIME); + } + } + // double_up(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie); + if (!error) + d_delete(dentry); + dput(dentry); + + presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x10); + if ( !error && do_kml ) + error = presto_journal_rmdir(&rec, fset, dir, &tgt_dir_ver, + &old_dir_ver, &rb, + dentry->d_name.len, + dentry->d_name.name); + + presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x20); + if ( !error && do_rcvd ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x30); + EXIT; + + presto_trans_commit(fset, handle); + presto_release_space(fset->fset_cache, size); + return error; +} + +int lento_rmdir(const char *pathname, struct lento_vfs_context *info) +{ + int error = 0; + char * name; + struct dentry *dentry; + struct presto_file_set *fset; + struct nameidata nd; + + ENTRY; + name = getname(pathname); + if(IS_ERR(name)) { + EXIT; + return PTR_ERR(name); + } + + error = path_lookup(name, LOOKUP_PARENT, &nd); + if (error) { + EXIT; + goto exit; + } + switch(nd.last_type) { + case LAST_DOTDOT: + error = -ENOTEMPTY; + EXIT; + goto exit1; + case LAST_ROOT: + case LAST_DOT: + error = -EBUSY; + EXIT; + goto exit1; + } + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + CERROR("No fileset!\n"); + EXIT; + goto exit_put; + } + error = presto_do_rmdir(fset, nd.dentry, dentry, info); + exit_put: + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); +exit1: + path_release(&nd); +exit: + putname(name); + EXIT; + return error; +} + +int presto_do_mknod(struct presto_file_set *fset, struct dentry *dir, + struct dentry *dentry, int mode, dev_t dev, + struct lento_vfs_context *info) +{ + struct rec_info rec; + int error = -EPERM; + struct presto_version tgt_dir_ver, new_node_ver; + struct inode_operations *iops; + void *handle; + + ENTRY; + + // down(&dir->d_inode->i_zombie); + /* one KML entry */ + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); + if (error) { + EXIT; + // up(&dir->d_inode->i_zombie); + return error; + } + + if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) { + EXIT; + goto exit_lock; + } + + error = may_create(dir->d_inode, dentry); + if (error) { + EXIT; + goto exit_lock; + } + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops->mknod) { + EXIT; + goto exit_lock; + } + + DQUOT_INIT(dir->d_inode); + lock_kernel(); + + error = -ENOSPC; + presto_getversion(&tgt_dir_ver, dir->d_inode); + handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_MKNOD); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + CERROR("presto_do_mknod: no space for transaction\n"); + goto exit_lock2; + } + + error = iops->mknod(dir->d_inode, dentry, mode, dev); + if (error) { + EXIT; + goto exit_commit; + } + if ( dentry->d_inode) { + struct presto_cache *cache = fset->fset_cache; + + presto_set_ops(dentry->d_inode, cache->cache_filter); + + filter_setup_dentry_ops(cache->cache_filter, dentry->d_op, + &presto_dentry_ops); + dentry->d_op = filter_c2udops(cache->cache_filter); + + /* if Lento does this, we won't have data */ + if ( ISLENTO(presto_c2m(cache)) ) { + presto_set(dentry, PRESTO_ATTR); + } else { + presto_set(dentry, PRESTO_ATTR | PRESTO_DATA); + } + } + + error = presto_settime(fset, NULL, NULL, dir, + info, ATTR_MTIME); + if (error) { + EXIT; + } + error = presto_settime(fset, NULL, NULL, dentry, + info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + } + + presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x10); + presto_getversion(&new_node_ver, dentry->d_inode); + if ( presto_do_kml(info, dentry) ) + error = presto_journal_mknod(&rec, fset, dentry, &tgt_dir_ver, + &new_node_ver, + dentry->d_inode->i_mode, + MAJOR(dev), MINOR(dev) ); + + presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x20); + if ( presto_do_rcvd(info, dentry) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x30); + EXIT; + exit_commit: + presto_trans_commit(fset, handle); + exit_lock2: + unlock_kernel(); + exit_lock: + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + // up(&dir->d_inode->i_zombie); + return error; +} + +int lento_mknod(const char *filename, int mode, dev_t dev, + struct lento_vfs_context *info) +{ + int error = 0; + char * tmp; + struct dentry * dentry; + struct nameidata nd; + struct presto_file_set *fset; + + ENTRY; + + if (S_ISDIR(mode)) + return -EPERM; + tmp = getname(filename); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + error = path_lookup(tmp, LOOKUP_PARENT, &nd); + if (error) + goto out; + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + fset = presto_fset(dentry); + error = -EINVAL; + if ( !fset ) { + CERROR("No fileset!\n"); + EXIT; + goto exit_put; + } + switch (mode & S_IFMT) { + case 0: case S_IFREG: + error = -EOPNOTSUPP; + break; + case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: + error = presto_do_mknod(fset, nd.dentry, dentry, + mode, dev, info); + break; + case S_IFDIR: + error = -EPERM; + break; + default: + error = -EINVAL; + } + exit_put: + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); + path_release(&nd); +out: + putname(tmp); + + return error; +} + +int do_rename(struct presto_file_set *fset, + struct dentry *old_parent, struct dentry *old_dentry, + struct dentry *new_parent, struct dentry *new_dentry, + struct lento_vfs_context *info) +{ + struct rec_info rec; + int error; + struct inode_operations *iops; + struct presto_version src_dir_ver, tgt_dir_ver; + void *handle; + int new_inode_unlink = 0; + struct inode *old_dir = old_parent->d_inode; + struct inode *new_dir = new_parent->d_inode; + + ENTRY; + presto_getversion(&src_dir_ver, old_dir); + presto_getversion(&tgt_dir_ver, new_dir); + + error = -EPERM; + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + if (!iops || !iops->rename) { + EXIT; + return error; + } + + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); + if (error) { + EXIT; + return error; + } + handle = presto_trans_start(fset, old_dir, KML_OPCODE_RENAME); + if ( IS_ERR(handle) ) { + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + CERROR("presto_do_rename: no space for transaction\n"); + return -ENOSPC; + } + if (new_dentry->d_inode && new_dentry->d_inode->i_nlink > 1) { + dget(new_dentry); + new_inode_unlink = 1; + } + + error = iops->rename(old_dir, old_dentry, new_dir, new_dentry); + + if (error) { + EXIT; + goto exit; + } + + if (new_inode_unlink) { + error = presto_settime(fset, NULL, NULL, old_dentry, + info, ATTR_CTIME); + dput(old_dentry); + if (error) { + EXIT; + goto exit; + } + } + info->flags |= LENTO_FL_TOUCH_PARENT; + error = presto_settime(fset, NULL, new_parent, old_parent, + info, ATTR_CTIME | ATTR_MTIME); + if (error) { + EXIT; + goto exit; + } + + /* XXX make a distinction between cross file set + * and intra file set renames here + */ + presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x10); + if ( presto_do_kml(info, old_dentry) ) + error = presto_journal_rename(&rec, fset, old_dentry, + new_dentry, + &src_dir_ver, &tgt_dir_ver); + + presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x20); + + if ( presto_do_rcvd(info, old_dentry) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x30); + EXIT; +exit: + presto_trans_commit(fset, handle); + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + return error; +} + +static +int presto_rename_dir(struct presto_file_set *fset, struct dentry *old_parent, + struct dentry *old_dentry, struct dentry *new_parent, + struct dentry *new_dentry, struct lento_vfs_context *info) +{ + int error; + struct inode *target; + struct inode *old_dir = old_parent->d_inode; + struct inode *new_dir = new_parent->d_inode; + + if (old_dentry->d_inode == new_dentry->d_inode) + return 0; + + error = may_delete(old_dir, old_dentry, 1); + if (error) + return error; + + if (new_dir->i_sb != old_dir->i_sb) + return -EXDEV; + + if (!new_dentry->d_inode) + error = may_create(new_dir, new_dentry); + else + error = may_delete(new_dir, new_dentry, 1); + if (error) + return error; + + if (!old_dir->i_op || !old_dir->i_op->rename) + return -EPERM; + + /* + * If we are going to change the parent - check write permissions, + * we'll need to flip '..'. + */ + if (new_dir != old_dir) { + error = permission(old_dentry->d_inode, MAY_WRITE, NULL); + } + if (error) + return error; + + DQUOT_INIT(old_dir); + DQUOT_INIT(new_dir); + down(&old_dir->i_sb->s_vfs_rename_sem); + error = -EINVAL; + if (is_subdir(new_dentry, old_dentry)) + goto out_unlock; + target = new_dentry->d_inode; + if (target) { /* Hastur! Hastur! Hastur! */ + // triple_down(&old_dir->i_zombie, + // &new_dir->i_zombie, + // &target->i_zombie); + d_unhash(new_dentry); + } else + // double_down(&old_dir->i_zombie, + // &new_dir->i_zombie); + if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir)) + error = -ENOENT; + else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) + error = -EBUSY; + else + error = do_rename(fset, old_parent, old_dentry, + new_parent, new_dentry, info); + if (target) { + if (!error) + target->i_flags |= S_DEAD; + // triple_up(&old_dir->i_zombie, + // &new_dir->i_zombie, + // &target->i_zombie); + if (d_unhashed(new_dentry)) + d_rehash(new_dentry); + dput(new_dentry); + } else + // double_up(&old_dir->i_zombie, + // &new_dir->i_zombie); + + if (!error) + d_move(old_dentry,new_dentry); +out_unlock: + up(&old_dir->i_sb->s_vfs_rename_sem); + return error; +} + +static +int presto_rename_other(struct presto_file_set *fset, struct dentry *old_parent, + struct dentry *old_dentry, struct dentry *new_parent, + struct dentry *new_dentry, struct lento_vfs_context *info) +{ + struct inode *old_dir = old_parent->d_inode; + struct inode *new_dir = new_parent->d_inode; + int error; + + if (old_dentry->d_inode == new_dentry->d_inode) + return 0; + + error = may_delete(old_dir, old_dentry, 0); + if (error) + return error; + + if (new_dir->i_sb != old_dir->i_sb) + return -EXDEV; + + if (!new_dentry->d_inode) + error = may_create(new_dir, new_dentry); + else + error = may_delete(new_dir, new_dentry, 0); + if (error) + return error; + + if (!old_dir->i_op || !old_dir->i_op->rename) + return -EPERM; + + DQUOT_INIT(old_dir); + DQUOT_INIT(new_dir); + // double_down(&old_dir->i_zombie, &new_dir->i_zombie); + if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) + error = -EBUSY; + else + error = do_rename(fset, old_parent, old_dentry, + new_parent, new_dentry, info); + // double_up(&old_dir->i_zombie, &new_dir->i_zombie); + if (error) + return error; + /* The following d_move() should become unconditional */ + if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) { + d_move(old_dentry, new_dentry); + } + return 0; +} + +int presto_do_rename(struct presto_file_set *fset, + struct dentry *old_parent, struct dentry *old_dentry, + struct dentry *new_parent, struct dentry *new_dentry, + struct lento_vfs_context *info) +{ + if (S_ISDIR(old_dentry->d_inode->i_mode)) + return presto_rename_dir(fset, old_parent,old_dentry,new_parent, + new_dentry, info); + else + return presto_rename_other(fset, old_parent, old_dentry, + new_parent,new_dentry, info); +} + + +int lento_do_rename(const char *oldname, const char *newname, + struct lento_vfs_context *info) +{ + int error = 0; + struct dentry * old_dir, * new_dir; + struct dentry * old_dentry, *new_dentry; + struct nameidata oldnd, newnd; + struct presto_file_set *fset; + + ENTRY; + + error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); + if (error) + goto exit; + + error = path_lookup(newname, LOOKUP_PARENT, &newnd); + if (error) + goto exit1; + + error = -EXDEV; + if (oldnd.mnt != newnd.mnt) + goto exit2; + + old_dir = oldnd.dentry; + error = -EBUSY; + if (oldnd.last_type != LAST_NORM) + goto exit2; + + new_dir = newnd.dentry; + if (newnd.last_type != LAST_NORM) + goto exit2; + + lock_rename(new_dir, old_dir); + + old_dentry = lookup_hash(&oldnd.last, old_dir); + error = PTR_ERR(old_dentry); + if (IS_ERR(old_dentry)) + goto exit3; + /* source must exist */ + error = -ENOENT; + if (!old_dentry->d_inode) + goto exit4; + fset = presto_fset(old_dentry); + error = -EINVAL; + if ( !fset ) { + CERROR("No fileset!\n"); + EXIT; + goto exit4; + } + /* unless the source is a directory trailing slashes give -ENOTDIR */ + if (!S_ISDIR(old_dentry->d_inode->i_mode)) { + error = -ENOTDIR; + if (oldnd.last.name[oldnd.last.len]) + goto exit4; + if (newnd.last.name[newnd.last.len]) + goto exit4; + } + new_dentry = lookup_hash(&newnd.last, new_dir); + error = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) + goto exit4; + + lock_kernel(); + error = presto_do_rename(fset, old_dir, old_dentry, + new_dir, new_dentry, info); + unlock_kernel(); + + dput(new_dentry); +exit4: + dput(old_dentry); +exit3: + unlock_rename(new_dir, old_dir); +exit2: + path_release(&newnd); +exit1: + path_release(&oldnd); +exit: + return error; +} + +int lento_rename(const char * oldname, const char * newname, + struct lento_vfs_context *info) +{ + int error; + char * from; + char * to; + + from = getname(oldname); + if(IS_ERR(from)) + return PTR_ERR(from); + to = getname(newname); + error = PTR_ERR(to); + if (!IS_ERR(to)) { + error = lento_do_rename(from,to, info); + putname(to); + } + putname(from); + return error; +} + +struct dentry *presto_iopen(struct dentry *dentry, + ino_t ino, unsigned int generation) +{ + struct presto_file_set *fset; + char name[48]; + int error; + + ENTRY; + /* see if we already have the dentry we want */ + if (dentry->d_inode && dentry->d_inode->i_ino == ino && + dentry->d_inode->i_generation == generation) { + EXIT; + return dentry; + } + + /* Make sure we have a cache beneath us. We should always find at + * least one dentry inside the cache (if it exists), otherwise not + * even the cache root exists, or we passed in a bad name. + */ + fset = presto_fset(dentry); + error = -EINVAL; + if (!fset) { + CERROR("No fileset for %*s!\n", + dentry->d_name.len, dentry->d_name.name); + EXIT; + dput(dentry); + return ERR_PTR(error); + } + dput(dentry); + + sprintf(name, "%s%#lx%c%#x", + PRESTO_ILOOKUP_MAGIC, ino, PRESTO_ILOOKUP_SEP, generation); + CDEBUG(D_PIOCTL, "opening %ld by number (as %s)\n", ino, name); + return lookup_one_len(name, fset->fset_dentry, strlen(name)); +} + +static struct file *presto_filp_dopen(struct dentry *dentry, int flags) +{ + struct file *f; + struct inode *inode; + int flag, error; + + ENTRY; + error = -ENFILE; + f = get_empty_filp(); + if (!f) { + CDEBUG(D_PIOCTL, "error getting file pointer\n"); + EXIT; + goto out; + } + f->f_flags = flag = flags; + f->f_mode = (flag+1) & O_ACCMODE; + inode = dentry->d_inode; + if (f->f_mode & FMODE_WRITE) { + error = get_write_access(inode); + if (error) { + CDEBUG(D_PIOCTL, "error getting write access\n"); + EXIT; goto cleanup_file; + } + } + + /* XXX: where the fuck is ->f_vfsmnt? */ + f->f_dentry = dentry; + f->f_mapping = dentry->d_inode->i_mapping; + f->f_pos = 0; + //f->f_reada = 0; + f->f_op = NULL; + if (inode->i_op) + /* XXX should we set to presto ops, or leave at cache ops? */ + f->f_op = inode->i_fop; + if (f->f_op && f->f_op->open) { + error = f->f_op->open(inode, f); + if (error) { + CDEBUG(D_PIOCTL, "error calling cache 'open'\n"); + EXIT; + goto cleanup_all; + } + } + f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); + + return f; + +cleanup_all: + if (f->f_mode & FMODE_WRITE) + put_write_access(inode); +cleanup_file: + put_filp(f); +out: + return ERR_PTR(error); +} + + +/* Open an inode by number. We pass in the cache root name (or a subdirectory + * from the cache that is guaranteed to exist) to be able to access the cache. + */ +int lento_iopen(const char *name, ino_t ino, unsigned int generation, + int flags) +{ + char * tmp; + struct dentry *dentry; + struct nameidata nd; + int fd; + int error; + + ENTRY; + CDEBUG(D_PIOCTL, + "open %s:inode %#lx (%ld), generation %x (%d), flags %d \n", + name, ino, ino, generation, generation, flags); + /* We don't allow creation of files by number only, as it would + * lead to a dangling files not in any directory. We could also + * just turn off the flag and ignore it. + */ + if (flags & O_CREAT) { + CERROR("%s: create file by inode number (%ld) not allowed\n", + __FUNCTION__, ino); + EXIT; + return -EACCES; + } + + tmp = getname(name); + if (IS_ERR(tmp)) { + EXIT; + return PTR_ERR(tmp); + } + + lock_kernel(); +again: /* look the named file or a parent directory so we can get the cache */ + error = presto_walk(tmp, &nd); + if ( error && error != -ENOENT ) { + EXIT; + unlock_kernel(); + putname(tmp); + return error; + } + if (error == -ENOENT) + dentry = NULL; + else + dentry = nd.dentry; + + /* we didn't find the named file, so see if a parent exists */ + if (!dentry) { + char *slash; + + slash = strrchr(tmp, '/'); + if (slash && slash != tmp) { + *slash = '\0'; + path_release(&nd); + goto again; + } + /* we should never get here... */ + CDEBUG(D_PIOCTL, "no more path components to try!\n"); + fd = -ENOENT; + goto exit; + } + CDEBUG(D_PIOCTL, "returned dentry %p\n", dentry); + + dentry = presto_iopen(dentry, ino, generation); + fd = PTR_ERR(dentry); + if (IS_ERR(dentry)) { + EXIT; + goto exit; + } + + /* XXX start of code that might be replaced by something like: + * if (flags & (O_WRONLY | O_RDWR)) { + * error = get_write_access(dentry->d_inode); + * if (error) { + * EXIT; + * goto cleanup_dput; + * } + * } + * fd = open_dentry(dentry, flags); + * + * including the presto_filp_dopen() function (check dget counts!) + */ + fd = get_unused_fd(); + if (fd < 0) { + EXIT; + goto exit; + } + + { + int error; + struct file * f = presto_filp_dopen(dentry, flags); + error = PTR_ERR(f); + if (IS_ERR(f)) { + put_unused_fd(fd); + fd = error; + } else { + fd_install(fd, f); + } + } + /* end of code that might be replaced by open_dentry */ + + EXIT; +exit: + unlock_kernel(); + path_release(&nd); + putname(tmp); + return fd; +} + +#ifdef CONFIG_FS_EXT_ATTR + +#if 0 /* was a broken check for Posix ACLs */ +/* Posix ACL code changes i_mode without using a notify_change (or + * a mark_inode_dirty!). We need to duplicate this at the reintegrator + * which is done by this function. This function also takes care of + * resetting the cached posix acls in this inode. If we don't reset these + * VFS continues using the old acl information, which by now may be out of + * date. + */ +int presto_setmode(struct presto_file_set *fset, struct dentry *dentry, + mode_t mode) +{ + struct inode *inode = dentry->d_inode; + + ENTRY; + /* The extended attributes for this inode were modified. + * At this point we can not be sure if any of the ACL + * information for this inode was updated. So we will + * force VFS to reread the acls. Note that we do this + * only when called from the SETEXTATTR ioctl, which is why we + * do this while setting the mode of the file. Also note + * that mark_inode_dirty is not be needed for i_*acl only + * to force i_mode info to disk, and should be removed once + * we use notify_change to update the mode. + * XXX: is mode setting really needed? Just setting acl's should + * be enough! VFS should change the i_mode as needed? SHP + */ + if (inode->i_acl && + inode->i_acl != POSIX_ACL_NOT_CACHED) + posix_acl_release(inode->i_acl); + if (inode->i_default_acl && + inode->i_default_acl != POSIX_ACL_NOT_CACHED) + posix_acl_release(inode->i_default_acl); + inode->i_acl = POSIX_ACL_NOT_CACHED; + inode->i_default_acl = POSIX_ACL_NOT_CACHED; + inode->i_mode = mode; + /* inode should already be dirty...but just in case */ + mark_inode_dirty(inode); + return 0; + +#if 0 + /* XXX: The following code is the preferred way to set mode, + * however, I need to carefully go through possible recursion + * paths back into presto. See comments in presto_do_setattr. + */ + { + int error=0; + struct super_operations *sops; + struct iattr iattr; + + iattr.ia_mode = mode; + iattr.ia_valid = ATTR_MODE|ATTR_FORCE; + + error = -EPERM; + sops = filter_c2csops(fset->fset_cache->cache_filter); + if (!sops && + !sops->notify_change) { + EXIT; + return error; + } + + error = sops->notify_change(dentry, &iattr); + + EXIT; + return error; + } +#endif +} +#endif + +/* setextattr Interface to cache filesystem */ +int presto_do_set_ext_attr(struct presto_file_set *fset, + struct dentry *dentry, + const char *name, void *buffer, + size_t buffer_len, int flags, mode_t *mode, + struct lento_vfs_context *info) +{ + struct rec_info rec; + struct inode *inode = dentry->d_inode; + struct inode_operations *iops; + int error; + struct presto_version ver; + void *handle; + char temp[PRESTO_EXT_ATTR_NAME_MAX+1]; + + ENTRY; + error = -EROFS; + if (IS_RDONLY(inode)) { + EXIT; + return -EROFS; + } + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + EXIT; + return -EPERM; + } + + presto_getversion(&ver, inode); + error = -EPERM; + /* We need to invoke different filters based on whether + * this dentry is a regular file, directory or symlink. + */ + switch (inode->i_mode & S_IFMT) { + case S_IFLNK: /* symlink */ + iops = filter_c2csiops(fset->fset_cache->cache_filter); + break; + case S_IFDIR: /* directory */ + iops = filter_c2cdiops(fset->fset_cache->cache_filter); + break; + case S_IFREG: + default: /* everything else including regular files */ + iops = filter_c2cfiops(fset->fset_cache->cache_filter); + } + + if (!iops && !iops->set_ext_attr) { + EXIT; + return error; + } + + error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH); + if (error) { + EXIT; + return error; + } + + + handle = presto_trans_start(fset,dentry->d_inode,KML_OPCODE_SETEXTATTR); + if ( IS_ERR(handle) ) { + CERROR("presto_do_set_ext_attr: no space for transaction\n"); + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + return -ENOSPC; + } + + /* We first "truncate" name to the maximum allowable in presto */ + /* This simulates the strncpy_from_use code in fs/ext_attr.c */ + strlcpy(temp,name,sizeof(temp)); + + /* Pass down to cache*/ + error = iops->set_ext_attr(inode,temp,buffer,buffer_len,flags); + if (error) { + EXIT; + goto exit; + } + +#if 0 /* was a broken check for Posix ACLs */ + /* Reset mode if specified*/ + /* XXX: when we do native acl support, move this code out! */ + if (mode != NULL) { + error = presto_setmode(fset, dentry, *mode); + if (error) { + EXIT; + goto exit; + } + } +#endif + + /* Reset ctime. Only inode change time (ctime) is affected */ + error = presto_settime(fset, NULL, NULL, dentry, info, ATTR_CTIME); + if (error) { + EXIT; + goto exit; + } + + if (flags & EXT_ATTR_FLAG_USER) { + CERROR(" USER flag passed to presto_do_set_ext_attr!\n"); + BUG(); + } + + /* We are here, so set_ext_attr succeeded. We no longer need to keep + * track of EXT_ATTR_FLAG_{EXISTS,CREATE}, instead, we will force + * the attribute value during log replay. -SHP + */ + flags &= ~(EXT_ATTR_FLAG_EXISTS | EXT_ATTR_FLAG_CREATE); + + presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x10); + if ( presto_do_kml(info, dentry) ) + error = presto_journal_set_ext_attr + (&rec, fset, dentry, &ver, name, buffer, + buffer_len, flags); + + presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x20); + if ( presto_do_rcvd(info, dentry) ) + error = presto_write_last_rcvd(&rec, fset, info); + + presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x30); + EXIT; +exit: + presto_release_space(fset->fset_cache, PRESTO_REQHIGH); + presto_trans_commit(fset, handle); + + return error; +} +#endif diff --git a/fs/xfs/linux/kmem.h b/fs/xfs/linux/kmem.h new file mode 100644 index 000000000..c9df16472 --- /dev/null +++ b/fs/xfs/linux/kmem.h @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPPORT_KMEM_H__ +#define __XFS_SUPPORT_KMEM_H__ + +#include +#include +#include +#include + +/* + * Cutoff point to use vmalloc instead of kmalloc. + */ +#define MAX_SLAB_SIZE 0x10000 + +/* + * XFS uses slightly different names for these due to the + * IRIX heritage. + */ +#define kmem_zone kmem_cache_s +#define kmem_zone_t kmem_cache_t + +#define KM_SLEEP 0x0001 +#define KM_NOSLEEP 0x0002 +#define KM_NOFS 0x0004 + +typedef unsigned long xfs_pflags_t; + +#define PFLAGS_TEST_FSTRANS() (current->flags & PF_FSTRANS) + +/* these could be nested, so we save state */ +#define PFLAGS_SET_FSTRANS(STATEP) do { \ + *(STATEP) = current->flags; \ + current->flags |= PF_FSTRANS; \ +} while (0) + +#define PFLAGS_CLEAR_FSTRANS(STATEP) do { \ + *(STATEP) = current->flags; \ + current->flags &= ~PF_FSTRANS; \ +} while (0) + +/* Restore the PF_FSTRANS state to what was saved in STATEP */ +#define PFLAGS_RESTORE_FSTRANS(STATEP) do { \ + current->flags = ((current->flags & ~PF_FSTRANS) | \ + (*(STATEP) & PF_FSTRANS)); \ +} while (0) + +#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \ + *(NSTATEP) = *(OSTATEP); \ +} while (0) + +/* + * XXX get rid of the unconditional __GFP_NOFAIL by adding + * a KM_FAIL flag and using it where we're allowed to fail. + */ +static __inline unsigned int +kmem_flags_convert(int flags) +{ + int lflags; + +#if DEBUG + if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS))) { + printk(KERN_WARNING + "XFS: memory allocation with wrong flags (%x)\n", flags); + BUG(); + } +#endif + + lflags = (flags & KM_NOSLEEP) ? GFP_ATOMIC : (GFP_KERNEL|__GFP_NOFAIL); + + /* avoid recusive callbacks to filesystem during transactions */ + if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS)) + lflags &= ~__GFP_FS; + + return lflags; +} + +static __inline void * +kmem_alloc(size_t size, int flags) +{ + if (unlikely(MAX_SLAB_SIZE < size)) + /* Avoid doing filesystem sensitive stuff to get this */ + return __vmalloc(size, kmem_flags_convert(flags), PAGE_KERNEL); + return kmalloc(size, kmem_flags_convert(flags)); +} + +static __inline void * +kmem_zalloc(size_t size, int flags) +{ + void *ptr = kmem_alloc(size, flags); + if (likely(ptr != NULL)) + memset(ptr, 0, size); + return ptr; +} + +static __inline void +kmem_free(void *ptr, size_t size) +{ + if (unlikely((unsigned long)ptr < VMALLOC_START || + (unsigned long)ptr >= VMALLOC_END)) + kfree(ptr); + else + vfree(ptr); +} + +static __inline void * +kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags) +{ + void *new = kmem_alloc(newsize, flags); + + if (likely(ptr != NULL)) { + if (likely(new != NULL)) + memcpy(new, ptr, min(oldsize, newsize)); + kmem_free(ptr, oldsize); + } + + return new; +} + +static __inline kmem_zone_t * +kmem_zone_init(int size, char *zone_name) +{ + return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL); +} + +static __inline void * +kmem_zone_alloc(kmem_zone_t *zone, int flags) +{ + return kmem_cache_alloc(zone, kmem_flags_convert(flags)); +} + +static __inline void * +kmem_zone_zalloc(kmem_zone_t *zone, int flags) +{ + void *ptr = kmem_zone_alloc(zone, flags); + if (likely(ptr != NULL)) + memset(ptr, 0, kmem_cache_size(zone)); + return ptr; +} + +static __inline void +kmem_zone_free(kmem_zone_t *zone, void *ptr) +{ + kmem_cache_free(zone, ptr); +} + +typedef struct shrinker *kmem_shaker_t; +typedef int (*kmem_shake_func_t)(int, unsigned int); + +static __inline kmem_shaker_t +kmem_shake_register(kmem_shake_func_t sfunc) +{ + return set_shrinker(DEFAULT_SEEKS, sfunc); +} + +static __inline void +kmem_shake_deregister(kmem_shaker_t shrinker) +{ + remove_shrinker(shrinker); +} + +static __inline int +kmem_shake_allow(unsigned int gfp_mask) +{ + return (gfp_mask & __GFP_WAIT); +} + +#endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/linux/mrlock.h b/fs/xfs/linux/mrlock.h new file mode 100644 index 000000000..d2c11a098 --- /dev/null +++ b/fs/xfs/linux/mrlock.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPPORT_MRLOCK_H__ +#define __XFS_SUPPORT_MRLOCK_H__ + +#include + +enum { MR_NONE, MR_ACCESS, MR_UPDATE }; + +typedef struct { + struct rw_semaphore mr_lock; + int mr_writer; +} mrlock_t; + +#define mrinit(mrp, name) \ + ( (mrp)->mr_writer = 0, init_rwsem(&(mrp)->mr_lock) ) +#define mrlock_init(mrp, t,n,s) mrinit(mrp, n) +#define mrfree(mrp) do { } while (0) +#define mraccess(mrp) mraccessf(mrp, 0) +#define mrupdate(mrp) mrupdatef(mrp, 0) + +static inline void mraccessf(mrlock_t *mrp, int flags) +{ + down_read(&mrp->mr_lock); +} + +static inline void mrupdatef(mrlock_t *mrp, int flags) +{ + down_write(&mrp->mr_lock); + mrp->mr_writer = 1; +} + +static inline int mrtryaccess(mrlock_t *mrp) +{ + return down_read_trylock(&mrp->mr_lock); +} + +static inline int mrtryupdate(mrlock_t *mrp) +{ + if (!down_write_trylock(&mrp->mr_lock)) + return 0; + mrp->mr_writer = 1; + return 1; +} + +static inline void mrunlock(mrlock_t *mrp) +{ + if (mrp->mr_writer) { + mrp->mr_writer = 0; + up_write(&mrp->mr_lock); + } else { + up_read(&mrp->mr_lock); + } +} + +static inline void mrdemote(mrlock_t *mrp) +{ + mrp->mr_writer = 0; + downgrade_write(&mrp->mr_lock); +} + +#ifdef DEBUG +/* + * Debug-only routine, without some platform-specific asm code, we can + * now only answer requests regarding whether we hold the lock for write + * (reader state is outside our visibility, we only track writer state). + * Note: means !ismrlocked would give false positivies, so don't do that. + */ +static inline int ismrlocked(mrlock_t *mrp, int type) +{ + if (mrp && type == MR_UPDATE) + return mrp->mr_writer; + return 1; +} +#endif + +#endif /* __XFS_SUPPORT_MRLOCK_H__ */ diff --git a/fs/xfs/linux/mutex.h b/fs/xfs/linux/mutex.h new file mode 100644 index 000000000..0b296bb94 --- /dev/null +++ b/fs/xfs/linux/mutex.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPPORT_MUTEX_H__ +#define __XFS_SUPPORT_MUTEX_H__ + +#include +#include + +/* + * Map the mutex'es from IRIX to Linux semaphores. + * + * Destroy just simply initializes to -99 which should block all other + * callers. + */ +#define MUTEX_DEFAULT 0x0 +typedef struct semaphore mutex_t; + +#define mutex_init(lock, type, name) sema_init(lock, 1) +#define mutex_destroy(lock) sema_init(lock, -99) +#define mutex_lock(lock, num) down(lock) +#define mutex_trylock(lock) (down_trylock(lock) ? 0 : 1) +#define mutex_unlock(lock) up(lock) + +#endif /* __XFS_SUPPORT_MUTEX_H__ */ diff --git a/fs/xfs/linux/sema.h b/fs/xfs/linux/sema.h new file mode 100644 index 000000000..30b67b4e1 --- /dev/null +++ b/fs/xfs/linux/sema.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPPORT_SEMA_H__ +#define __XFS_SUPPORT_SEMA_H__ + +#include +#include +#include +#include + +/* + * sema_t structure just maps to struct semaphore in Linux kernel. + */ + +typedef struct semaphore sema_t; + +#define init_sema(sp, val, c, d) sema_init(sp, val) +#define initsema(sp, val) sema_init(sp, val) +#define initnsema(sp, val, name) sema_init(sp, val) +#define psema(sp, b) down(sp) +#define vsema(sp) up(sp) +#define valusema(sp) (atomic_read(&(sp)->count)) +#define freesema(sema) + +/* + * Map cpsema (try to get the sema) to down_trylock. We need to switch + * the return values since cpsema returns 1 (acquired) 0 (failed) and + * down_trylock returns the reverse 0 (acquired) 1 (failed). + */ + +#define cpsema(sp) (down_trylock(sp) ? 0 : 1) + +/* + * Didn't do cvsema(sp). Not sure how to map this to up/down/... + * It does a vsema if the values is < 0 other wise nothing. + */ + +#endif /* __XFS_SUPPORT_SEMA_H__ */ diff --git a/fs/xfs/linux/spin.h b/fs/xfs/linux/spin.h new file mode 100644 index 000000000..80a3a6bae --- /dev/null +++ b/fs/xfs/linux/spin.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPPORT_SPIN_H__ +#define __XFS_SUPPORT_SPIN_H__ + +#include /* preempt needs this */ +#include + +/* + * Map lock_t from IRIX to Linux spinlocks. + * + * Note that linux turns on/off spinlocks depending on CONFIG_SMP. + * We don't need to worry about SMP or not here. + */ + +#define SPLDECL(s) unsigned long s + +typedef spinlock_t lock_t; + +#define spinlock_init(lock, name) spin_lock_init(lock) +#define spinlock_destroy(lock) + +static inline unsigned long mutex_spinlock(lock_t *lock) +{ + spin_lock(lock); + return 0; +} + +/*ARGSUSED*/ +static inline void mutex_spinunlock(lock_t *lock, unsigned long s) +{ + spin_unlock(lock); +} + +static inline void nested_spinlock(lock_t *lock) +{ + spin_lock(lock); +} + +static inline void nested_spinunlock(lock_t *lock) +{ + spin_unlock(lock); +} + +#endif /* __XFS_SUPPORT_SPIN_H__ */ diff --git a/fs/xfs/linux/sv.h b/fs/xfs/linux/sv.h new file mode 100644 index 000000000..821d3167e --- /dev/null +++ b/fs/xfs/linux/sv.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPPORT_SV_H__ +#define __XFS_SUPPORT_SV_H__ + +#include +#include +#include + +/* + * Synchronisation variables. + * + * (Parameters "pri", "svf" and "rts" are not implemented) + */ + +typedef struct sv_s { + wait_queue_head_t waiters; +} sv_t; + +#define SV_FIFO 0x0 /* sv_t is FIFO type */ +#define SV_LIFO 0x2 /* sv_t is LIFO type */ +#define SV_PRIO 0x4 /* sv_t is PRIO type */ +#define SV_KEYED 0x6 /* sv_t is KEYED type */ +#define SV_DEFAULT SV_FIFO + + +static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state, + unsigned long timeout) +{ + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue_exclusive(&sv->waiters, &wait); + __set_current_state(state); + spin_unlock(lock); + + schedule_timeout(timeout); + + remove_wait_queue(&sv->waiters, &wait); +} + +#define init_sv(sv,type,name,flag) \ + init_waitqueue_head(&(sv)->waiters) +#define sv_init(sv,flag,name) \ + init_waitqueue_head(&(sv)->waiters) +#define sv_destroy(sv) \ + /*NOTHING*/ +#define sv_wait(sv, pri, lock, s) \ + _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT) +#define sv_wait_sig(sv, pri, lock, s) \ + _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT) +#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \ + _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts)) +#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \ + _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts)) +#define sv_signal(sv) \ + wake_up(&(sv)->waiters) +#define sv_broadcast(sv) \ + wake_up_all(&(sv)->waiters) + +#endif /* __XFS_SUPPORT_SV_H__ */ diff --git a/fs/xfs/linux/time.h b/fs/xfs/linux/time.h new file mode 100644 index 000000000..109b5c083 --- /dev/null +++ b/fs/xfs/linux/time.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPPORT_TIME_H__ +#define __XFS_SUPPORT_TIME_H__ + +#include +#include + +typedef struct timespec timespec_t; + +static inline void delay(long ticks) +{ + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(ticks); +} + +static inline void nanotime(struct timespec *tvp) +{ + *tvp = CURRENT_TIME; +} + +#endif /* __XFS_SUPPORT_TIME_H__ */ diff --git a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c new file mode 100644 index 000000000..3afc61d10 --- /dev/null +++ b/fs/xfs/linux/xfs_aops.c @@ -0,0 +1,1276 @@ +/* + * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_sb.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_trans.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_alloc.h" +#include "xfs_btree.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_error.h" +#include "xfs_rw.h" +#include "xfs_iomap.h" +#include + +STATIC void xfs_count_page_state(struct page *, int *, int *, int *); +STATIC void xfs_convert_page(struct inode *, struct page *, + xfs_iomap_t *, void *, int, int); + +#if defined(XFS_RW_TRACE) +void +xfs_page_trace( + int tag, + struct inode *inode, + struct page *page, + int mask) +{ + xfs_inode_t *ip; + bhv_desc_t *bdp; + vnode_t *vp = LINVFS_GET_VP(inode); + loff_t isize = i_size_read(inode); + loff_t offset = page->index << PAGE_CACHE_SHIFT; + int delalloc = -1, unmapped = -1, unwritten = -1; + + if (page_has_buffers(page)) + xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); + + bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); + ip = XFS_BHVTOI(bdp); + if (!ip->i_rwtrace) + return; + + ktrace_enter(ip->i_rwtrace, + (void *)((unsigned long)tag), + (void *)ip, + (void *)inode, + (void *)page, + (void *)((unsigned long)mask), + (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), + (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), + (void *)((unsigned long)((isize >> 32) & 0xffffffff)), + (void *)((unsigned long)(isize & 0xffffffff)), + (void *)((unsigned long)((offset >> 32) & 0xffffffff)), + (void *)((unsigned long)(offset & 0xffffffff)), + (void *)((unsigned long)delalloc), + (void *)((unsigned long)unmapped), + (void *)((unsigned long)unwritten), + (void *)NULL, + (void *)NULL); +} +#else +#define xfs_page_trace(tag, inode, page, mask) +#endif + +void +linvfs_unwritten_done( + struct buffer_head *bh, + int uptodate) +{ + xfs_buf_t *pb = (xfs_buf_t *)bh->b_private; + + ASSERT(buffer_unwritten(bh)); + bh->b_end_io = NULL; + clear_buffer_unwritten(bh); + if (!uptodate) + pagebuf_ioerror(pb, EIO); + if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { + pagebuf_iodone(pb, 1, 1); + } + end_buffer_async_write(bh, uptodate); +} + +/* + * Issue transactions to convert a buffer range from unwritten + * to written extents (buffered IO). + */ +STATIC void +linvfs_unwritten_convert( + xfs_buf_t *bp) +{ + vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *); + int error; + + BUG_ON(atomic_read(&bp->pb_hold) < 1); + VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp), + BMAPI_UNWRITTEN, NULL, NULL, error); + XFS_BUF_SET_FSPRIVATE(bp, NULL); + XFS_BUF_CLR_IODONE_FUNC(bp); + XFS_BUF_UNDATAIO(bp); + iput(LINVFS_GET_IP(vp)); + pagebuf_iodone(bp, 0, 0); +} + +/* + * Issue transactions to convert a buffer range from unwritten + * to written extents (direct IO). + */ +STATIC void +linvfs_unwritten_convert_direct( + struct inode *inode, + loff_t offset, + ssize_t size, + void *private) +{ + ASSERT(!private || inode == (struct inode *)private); + + /* private indicates an unwritten extent lay beneath this IO, + * see linvfs_get_block_core. + */ + if (private && size > 0) { + vnode_t *vp = LINVFS_GET_VP(inode); + int error; + + VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); + } +} + +STATIC int +xfs_map_blocks( + struct inode *inode, + loff_t offset, + ssize_t count, + xfs_iomap_t *iomapp, + int flags) +{ + vnode_t *vp = LINVFS_GET_VP(inode); + int error, niomaps = 1; + + if (((flags & (BMAPI_DIRECT|BMAPI_SYNC)) == BMAPI_DIRECT) && + (offset >= i_size_read(inode))) + count = max_t(ssize_t, count, XFS_WRITE_IO_LOG); +retry: + VOP_BMAP(vp, offset, count, flags, iomapp, &niomaps, error); + if ((error == EAGAIN) || (error == EIO)) + return -error; + if (unlikely((flags & (BMAPI_WRITE|BMAPI_DIRECT)) == + (BMAPI_WRITE|BMAPI_DIRECT) && niomaps && + (iomapp->iomap_flags & IOMAP_DELAY))) { + flags = BMAPI_ALLOCATE; + goto retry; + } + if (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { + VMODIFY(vp); + } + return -error; +} + +/* + * Finds the corresponding mapping in block @map array of the + * given @offset within a @page. + */ +STATIC xfs_iomap_t * +xfs_offset_to_map( + struct page *page, + xfs_iomap_t *iomapp, + unsigned long offset) +{ + loff_t full_offset; /* offset from start of file */ + + ASSERT(offset < PAGE_CACHE_SIZE); + + full_offset = page->index; /* NB: using 64bit number */ + full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */ + full_offset += offset; /* offset from page start */ + + if (full_offset < iomapp->iomap_offset) + return NULL; + if (iomapp->iomap_offset + iomapp->iomap_bsize > full_offset) + return iomapp; + return NULL; +} + +STATIC void +xfs_map_at_offset( + struct page *page, + struct buffer_head *bh, + unsigned long offset, + int block_bits, + xfs_iomap_t *iomapp) +{ + xfs_daddr_t bn; + loff_t delta; + int sector_shift; + + ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); + ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); + ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); + + delta = page->index; + delta <<= PAGE_CACHE_SHIFT; + delta += offset; + delta -= iomapp->iomap_offset; + delta >>= block_bits; + + sector_shift = block_bits - BBSHIFT; + bn = iomapp->iomap_bn >> sector_shift; + bn += delta; + ASSERT((bn << sector_shift) >= iomapp->iomap_bn); + + lock_buffer(bh); + bh->b_blocknr = bn; + bh->b_bdev = iomapp->iomap_target->pbr_bdev; + set_buffer_mapped(bh); + clear_buffer_delay(bh); +} + +/* + * Look for a page at index which is unlocked and contains our + * unwritten extent flagged buffers at its head. Returns page + * locked and with an extra reference count, and length of the + * unwritten extent component on this page that we can write, + * in units of filesystem blocks. + */ +STATIC struct page * +xfs_probe_unwritten_page( + struct address_space *mapping, + pgoff_t index, + xfs_iomap_t *iomapp, + xfs_buf_t *pb, + unsigned long max_offset, + unsigned long *fsbs, + unsigned int bbits) +{ + struct page *page; + + page = find_trylock_page(mapping, index); + if (!page) + return 0; + if (PageWriteback(page)) + goto out; + + if (page->mapping && page_has_buffers(page)) { + struct buffer_head *bh, *head; + unsigned long p_offset = 0; + + *fsbs = 0; + bh = head = page_buffers(page); + do { + if (!buffer_unwritten(bh)) + break; + if (!xfs_offset_to_map(page, iomapp, p_offset)) + break; + if (p_offset >= max_offset) + break; + xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); + set_buffer_unwritten_io(bh); + bh->b_private = pb; + p_offset += bh->b_size; + (*fsbs)++; + } while ((bh = bh->b_this_page) != head); + + if (p_offset) + return page; + } + +out: + unlock_page(page); + return NULL; +} + +/* + * Look for a page at index which is unlocked and not mapped + * yet - clustering for mmap write case. + */ +STATIC unsigned int +xfs_probe_unmapped_page( + struct address_space *mapping, + pgoff_t index, + unsigned int pg_offset) +{ + struct page *page; + int ret = 0; + + page = find_trylock_page(mapping, index); + if (!page) + return 0; + if (PageWriteback(page)) + goto out; + + if (page->mapping && PageDirty(page)) { + if (page_has_buffers(page)) { + struct buffer_head *bh, *head; + + bh = head = page_buffers(page); + do { + if (buffer_mapped(bh) || !buffer_uptodate(bh)) + break; + ret += bh->b_size; + if (ret >= pg_offset) + break; + } while ((bh = bh->b_this_page) != head); + } else + ret = PAGE_CACHE_SIZE; + } + +out: + unlock_page(page); + return ret; +} + +STATIC unsigned int +xfs_probe_unmapped_cluster( + struct inode *inode, + struct page *startpage, + struct buffer_head *bh, + struct buffer_head *head) +{ + pgoff_t tindex, tlast, tloff; + unsigned int pg_offset, len, total = 0; + struct address_space *mapping = inode->i_mapping; + + /* First sum forwards in this page */ + do { + if (buffer_mapped(bh)) + break; + total += bh->b_size; + } while ((bh = bh->b_this_page) != head); + + /* If we reached the end of the page, sum forwards in + * following pages. + */ + if (bh == head) { + tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; + /* Prune this back to avoid pathological behavior */ + tloff = min(tlast, startpage->index + 64); + for (tindex = startpage->index + 1; tindex < tloff; tindex++) { + len = xfs_probe_unmapped_page(mapping, tindex, + PAGE_CACHE_SIZE); + if (!len) + return total; + total += len; + } + if (tindex == tlast && + (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { + total += xfs_probe_unmapped_page(mapping, + tindex, pg_offset); + } + } + return total; +} + +/* + * Probe for a given page (index) in the inode and test if it is delayed + * and without unwritten buffers. Returns page locked and with an extra + * reference count. + */ +STATIC struct page * +xfs_probe_delalloc_page( + struct inode *inode, + pgoff_t index) +{ + struct page *page; + + page = find_trylock_page(inode->i_mapping, index); + if (!page) + return NULL; + if (PageWriteback(page)) + goto out; + + if (page->mapping && page_has_buffers(page)) { + struct buffer_head *bh, *head; + int acceptable = 0; + + bh = head = page_buffers(page); + do { + if (buffer_unwritten(bh)) { + acceptable = 0; + break; + } else if (buffer_delay(bh)) { + acceptable = 1; + } + } while ((bh = bh->b_this_page) != head); + + if (acceptable) + return page; + } + +out: + unlock_page(page); + return NULL; +} + +STATIC int +xfs_map_unwritten( + struct inode *inode, + struct page *start_page, + struct buffer_head *head, + struct buffer_head *curr, + unsigned long p_offset, + int block_bits, + xfs_iomap_t *iomapp, + int startio, + int all_bh) +{ + struct buffer_head *bh = curr; + xfs_iomap_t *tmp; + xfs_buf_t *pb; + loff_t offset, size; + unsigned long nblocks = 0; + + offset = start_page->index; + offset <<= PAGE_CACHE_SHIFT; + offset += p_offset; + + /* get an "empty" pagebuf to manage IO completion + * Proper values will be set before returning */ + pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0); + if (!pb) + return -EAGAIN; + + /* Take a reference to the inode to prevent it from + * being reclaimed while we have outstanding unwritten + * extent IO on it. + */ + if ((igrab(inode)) != inode) { + pagebuf_free(pb); + return -EAGAIN; + } + + /* Set the count to 1 initially, this will stop an I/O + * completion callout which happens before we have started + * all the I/O from calling pagebuf_iodone too early. + */ + atomic_set(&pb->pb_io_remaining, 1); + + /* First map forwards in the page consecutive buffers + * covering this unwritten extent + */ + do { + if (!buffer_unwritten(bh)) + break; + tmp = xfs_offset_to_map(start_page, iomapp, p_offset); + if (!tmp) + break; + xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp); + set_buffer_unwritten_io(bh); + bh->b_private = pb; + p_offset += bh->b_size; + nblocks++; + } while ((bh = bh->b_this_page) != head); + + atomic_add(nblocks, &pb->pb_io_remaining); + + /* If we reached the end of the page, map forwards in any + * following pages which are also covered by this extent. + */ + if (bh == head) { + struct address_space *mapping = inode->i_mapping; + pgoff_t tindex, tloff, tlast; + unsigned long bs; + unsigned int pg_offset, bbits = inode->i_blkbits; + struct page *page; + + tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; + tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT; + tloff = min(tlast, tloff); + for (tindex = start_page->index + 1; tindex < tloff; tindex++) { + page = xfs_probe_unwritten_page(mapping, + tindex, iomapp, pb, + PAGE_CACHE_SIZE, &bs, bbits); + if (!page) + break; + nblocks += bs; + atomic_add(bs, &pb->pb_io_remaining); + xfs_convert_page(inode, page, iomapp, pb, + startio, all_bh); + /* stop if converting the next page might add + * enough blocks that the corresponding byte + * count won't fit in our ulong page buf length */ + if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) + goto enough; + } + + if (tindex == tlast && + (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) { + page = xfs_probe_unwritten_page(mapping, + tindex, iomapp, pb, + pg_offset, &bs, bbits); + if (page) { + nblocks += bs; + atomic_add(bs, &pb->pb_io_remaining); + xfs_convert_page(inode, page, iomapp, pb, + startio, all_bh); + if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits)) + goto enough; + } + } + } + +enough: + size = nblocks; /* NB: using 64bit number here */ + size <<= block_bits; /* convert fsb's to byte range */ + + XFS_BUF_DATAIO(pb); + XFS_BUF_ASYNC(pb); + XFS_BUF_SET_SIZE(pb, size); + XFS_BUF_SET_COUNT(pb, size); + XFS_BUF_SET_OFFSET(pb, offset); + XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode)); + XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert); + + if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { + pagebuf_iodone(pb, 1, 1); + } + + return 0; +} + +STATIC void +xfs_submit_page( + struct page *page, + struct buffer_head *bh_arr[], + int cnt) +{ + struct buffer_head *bh; + int i; + + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + clear_page_dirty(page); + unlock_page(page); + + if (cnt) { + for (i = 0; i < cnt; i++) { + bh = bh_arr[i]; + mark_buffer_async_write(bh); + if (buffer_unwritten(bh)) + set_buffer_unwritten_io(bh); + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + } + + for (i = 0; i < cnt; i++) + submit_bh(WRITE, bh_arr[i]); + } else + end_page_writeback(page); +} + +/* + * Allocate & map buffers for page given the extent map. Write it out. + * except for the original page of a writepage, this is called on + * delalloc/unwritten pages only, for the original page it is possible + * that the page has no mapping at all. + */ +STATIC void +xfs_convert_page( + struct inode *inode, + struct page *page, + xfs_iomap_t *iomapp, + void *private, + int startio, + int all_bh) +{ + struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; + xfs_iomap_t *mp = iomapp, *tmp; + unsigned long end, offset; + pgoff_t end_index; + int i = 0, index = 0; + int bbits = inode->i_blkbits; + + end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT; + if (page->index < end_index) { + end = PAGE_CACHE_SIZE; + } else { + end = i_size_read(inode) & (PAGE_CACHE_SIZE-1); + } + bh = head = page_buffers(page); + do { + offset = i << bbits; + if (!(PageUptodate(page) || buffer_uptodate(bh))) + continue; + if (buffer_mapped(bh) && all_bh && + !buffer_unwritten(bh) && !buffer_delay(bh)) { + if (startio && (offset < end)) { + lock_buffer(bh); + bh_arr[index++] = bh; + } + continue; + } + tmp = xfs_offset_to_map(page, mp, offset); + if (!tmp) + continue; + ASSERT(!(tmp->iomap_flags & IOMAP_HOLE)); + ASSERT(!(tmp->iomap_flags & IOMAP_DELAY)); + + /* If this is a new unwritten extent buffer (i.e. one + * that we haven't passed in private data for, we must + * now map this buffer too. + */ + if (buffer_unwritten(bh) && !bh->b_end_io) { + ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN); + xfs_map_unwritten(inode, page, head, bh, + offset, bbits, tmp, startio, all_bh); + } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) { + xfs_map_at_offset(page, bh, offset, bbits, tmp); + if (buffer_unwritten(bh)) { + set_buffer_unwritten_io(bh); + bh->b_private = private; + ASSERT(private); + } + } + if (startio && (offset < end)) { + bh_arr[index++] = bh; + } else { + set_buffer_dirty(bh); + unlock_buffer(bh); + mark_buffer_dirty(bh); + } + } while (i++, (bh = bh->b_this_page) != head); + + if (startio) { + xfs_submit_page(page, bh_arr, index); + } else { + unlock_page(page); + } +} + +/* + * Convert & write out a cluster of pages in the same extent as defined + * by mp and following the start page. + */ +STATIC void +xfs_cluster_write( + struct inode *inode, + pgoff_t tindex, + xfs_iomap_t *iomapp, + int startio, + int all_bh) +{ + pgoff_t tlast; + struct page *page; + + tlast = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT; + for (; tindex < tlast; tindex++) { + page = xfs_probe_delalloc_page(inode, tindex); + if (!page) + break; + xfs_convert_page(inode, page, iomapp, NULL, startio, all_bh); + } +} + +/* + * Calling this without startio set means we are being asked to make a dirty + * page ready for freeing it's buffers. When called with startio set then + * we are coming from writepage. + * + * When called with startio set it is important that we write the WHOLE + * page if possible. + * The bh->b_state's cannot know if any of the blocks or which block for + * that matter are dirty due to mmap writes, and therefore bh uptodate is + * only vaild if the page itself isn't completely uptodate. Some layers + * may clear the page dirty flag prior to calling write page, under the + * assumption the entire page will be written out; by not writing out the + * whole page the page can be reused before all valid dirty data is + * written out. Note: in the case of a page that has been dirty'd by + * mapwrite and but partially setup by block_prepare_write the + * bh->b_states's will not agree and only ones setup by BPW/BCW will have + * valid state, thus the whole page must be written out thing. + */ + +STATIC int +xfs_page_state_convert( + struct inode *inode, + struct page *page, + int startio, + int unmapped) /* also implies page uptodate */ +{ + struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head; + xfs_iomap_t *iomp, iomap; + unsigned long p_offset = 0; + pgoff_t end_index; + loff_t offset; + unsigned long long end_offset; + int len, err, i, cnt = 0, uptodate = 1; + int flags = startio ? 0 : BMAPI_TRYLOCK; + int page_dirty = 1; + + + /* Are we off the end of the file ? */ + end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT; + if (page->index >= end_index) { + if ((page->index >= end_index + 1) || + !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { + err = -EIO; + goto error; + } + } + + offset = (loff_t)page->index << PAGE_CACHE_SHIFT; + end_offset = min_t(unsigned long long, + offset + PAGE_CACHE_SIZE, i_size_read(inode)); + + bh = head = page_buffers(page); + iomp = NULL; + + len = bh->b_size; + do { + if (offset >= end_offset) + break; + if (!buffer_uptodate(bh)) + uptodate = 0; + if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) + continue; + + if (iomp) { + iomp = xfs_offset_to_map(page, &iomap, p_offset); + } + + /* + * First case, map an unwritten extent and prepare for + * extent state conversion transaction on completion. + */ + if (buffer_unwritten(bh)) { + if (!iomp) { + err = xfs_map_blocks(inode, offset, len, &iomap, + BMAPI_READ|BMAPI_IGNSTATE); + if (err) { + goto error; + } + iomp = xfs_offset_to_map(page, &iomap, + p_offset); + } + if (iomp && startio) { + if (!bh->b_end_io) { + err = xfs_map_unwritten(inode, page, + head, bh, p_offset, + inode->i_blkbits, iomp, + startio, unmapped); + if (err) { + goto error; + } + } + bh_arr[cnt++] = bh; + page_dirty = 0; + } + /* + * Second case, allocate space for a delalloc buffer. + * We can return EAGAIN here in the release page case. + */ + } else if (buffer_delay(bh)) { + if (!iomp) { + err = xfs_map_blocks(inode, offset, len, &iomap, + BMAPI_ALLOCATE | flags); + if (err) { + goto error; + } + iomp = xfs_offset_to_map(page, &iomap, + p_offset); + } + if (iomp) { + xfs_map_at_offset(page, bh, p_offset, + inode->i_blkbits, iomp); + if (startio) { + bh_arr[cnt++] = bh; + } else { + set_buffer_dirty(bh); + unlock_buffer(bh); + mark_buffer_dirty(bh); + } + page_dirty = 0; + } + } else if ((buffer_uptodate(bh) || PageUptodate(page)) && + (unmapped || startio)) { + + if (!buffer_mapped(bh)) { + int size; + + /* + * Getting here implies an unmapped buffer + * was found, and we are in a path where we + * need to write the whole page out. + */ + if (!iomp) { + size = xfs_probe_unmapped_cluster( + inode, page, bh, head); + err = xfs_map_blocks(inode, offset, + size, &iomap, + BMAPI_WRITE|BMAPI_MMAP); + if (err) { + goto error; + } + iomp = xfs_offset_to_map(page, &iomap, + p_offset); + } + if (iomp) { + xfs_map_at_offset(page, + bh, p_offset, + inode->i_blkbits, iomp); + if (startio) { + bh_arr[cnt++] = bh; + } else { + set_buffer_dirty(bh); + unlock_buffer(bh); + mark_buffer_dirty(bh); + } + page_dirty = 0; + } + } else if (startio) { + if (buffer_uptodate(bh) && + !test_and_set_bit(BH_Lock, &bh->b_state)) { + bh_arr[cnt++] = bh; + page_dirty = 0; + } + } + } + } while (offset += len, p_offset += len, + ((bh = bh->b_this_page) != head)); + + if (uptodate && bh == head) + SetPageUptodate(page); + + if (startio) + xfs_submit_page(page, bh_arr, cnt); + + if (iomp) + xfs_cluster_write(inode, page->index + 1, iomp, startio, unmapped); + + return page_dirty; + +error: + for (i = 0; i < cnt; i++) { + unlock_buffer(bh_arr[i]); + } + + /* + * If it's delalloc and we have nowhere to put it, + * throw it away, unless the lower layers told + * us to try again. + */ + if (err != -EAGAIN) { + if (!unmapped) { + block_invalidatepage(page, 0); + } + ClearPageUptodate(page); + } + return err; +} + +STATIC int +linvfs_get_block_core( + struct inode *inode, + sector_t iblock, + unsigned long blocks, + struct buffer_head *bh_result, + int create, + int direct, + bmapi_flags_t flags) +{ + vnode_t *vp = LINVFS_GET_VP(inode); + xfs_iomap_t iomap; + int retpbbm = 1; + int error; + ssize_t size; + loff_t offset = (loff_t)iblock << inode->i_blkbits; + + /* If we are doing writes at the end of the file, + * allocate in chunks + */ + if (blocks) + size = blocks << inode->i_blkbits; + else if (create && (offset >= i_size_read(inode))) + size = 1 << XFS_WRITE_IO_LOG; + else + size = 1 << inode->i_blkbits; + + VOP_BMAP(vp, offset, size, + create ? flags : BMAPI_READ, &iomap, &retpbbm, error); + if (error) + return -error; + + if (retpbbm == 0) + return 0; + + if (iomap.iomap_bn != IOMAP_DADDR_NULL) { + xfs_daddr_t bn; + loff_t delta; + + /* For unwritten extents do not report a disk address on + * the read case (treat as if we're reading into a hole). + */ + if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) { + delta = offset - iomap.iomap_offset; + delta >>= inode->i_blkbits; + + bn = iomap.iomap_bn >> (inode->i_blkbits - BBSHIFT); + bn += delta; + + bh_result->b_blocknr = bn; + bh_result->b_bdev = iomap.iomap_target->pbr_bdev; + set_buffer_mapped(bh_result); + } + if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) { + if (direct) + bh_result->b_private = inode; + set_buffer_unwritten(bh_result); + set_buffer_delay(bh_result); + } + } + + /* If this is a realtime file, data might be on a new device */ + bh_result->b_bdev = iomap.iomap_target->pbr_bdev; + + /* If we previously allocated a block out beyond eof and + * we are now coming back to use it then we will need to + * flag it as new even if it has a disk address. + */ + if (create && + ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || + (offset >= i_size_read(inode)) || (iomap.iomap_flags & IOMAP_NEW))) { + set_buffer_new(bh_result); + } + + if (iomap.iomap_flags & IOMAP_DELAY) { + if (unlikely(direct)) + BUG(); + if (create) { + set_buffer_mapped(bh_result); + set_buffer_uptodate(bh_result); + } + bh_result->b_bdev = iomap.iomap_target->pbr_bdev; + set_buffer_delay(bh_result); + } + + if (blocks) { + loff_t iosize; + iosize = (iomap.iomap_bsize - iomap.iomap_delta); + bh_result->b_size = + (ssize_t)min(iosize, (loff_t)(blocks << inode->i_blkbits)); + } + + return 0; +} + +int +linvfs_get_block( + struct inode *inode, + sector_t iblock, + struct buffer_head *bh_result, + int create) +{ + return linvfs_get_block_core(inode, iblock, 0, bh_result, + create, 0, BMAPI_WRITE); +} + +STATIC int +linvfs_get_block_sync( + struct inode *inode, + sector_t iblock, + struct buffer_head *bh_result, + int create) +{ + return linvfs_get_block_core(inode, iblock, 0, bh_result, + create, 0, BMAPI_SYNC|BMAPI_WRITE); +} + +STATIC int +linvfs_get_blocks_direct( + struct inode *inode, + sector_t iblock, + unsigned long max_blocks, + struct buffer_head *bh_result, + int create) +{ + return linvfs_get_block_core(inode, iblock, max_blocks, bh_result, + create, 1, BMAPI_WRITE|BMAPI_DIRECT); +} + +STATIC ssize_t +linvfs_direct_IO( + int rw, + struct kiocb *iocb, + const struct iovec *iov, + loff_t offset, + unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + vnode_t *vp = LINVFS_GET_VP(inode); + xfs_iomap_t iomap; + int maps = 1; + int error; + + VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error); + if (error) + return -error; + + return blockdev_direct_IO_no_locking(rw, iocb, inode, + iomap.iomap_target->pbr_bdev, + iov, offset, nr_segs, + linvfs_get_blocks_direct, + linvfs_unwritten_convert_direct); +} + + +STATIC sector_t +linvfs_bmap( + struct address_space *mapping, + sector_t block) +{ + struct inode *inode = (struct inode *)mapping->host; + vnode_t *vp = LINVFS_GET_VP(inode); + int error; + + vn_trace_entry(vp, "linvfs_bmap", (inst_t *)__return_address); + + VOP_RWLOCK(vp, VRWLOCK_READ); + VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error); + VOP_RWUNLOCK(vp, VRWLOCK_READ); + return generic_block_bmap(mapping, block, linvfs_get_block); +} + +STATIC int +linvfs_readpage( + struct file *unused, + struct page *page) +{ + return mpage_readpage(page, linvfs_get_block); +} + +STATIC int +linvfs_readpages( + struct file *unused, + struct address_space *mapping, + struct list_head *pages, + unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block); +} + +STATIC void +xfs_count_page_state( + struct page *page, + int *delalloc, + int *unmapped, + int *unwritten) +{ + struct buffer_head *bh, *head; + + *delalloc = *unmapped = *unwritten = 0; + + bh = head = page_buffers(page); + do { + if (buffer_uptodate(bh) && !buffer_mapped(bh)) + (*unmapped) = 1; + else if (buffer_unwritten(bh) && !buffer_delay(bh)) + clear_buffer_unwritten(bh); + else if (buffer_unwritten(bh)) + (*unwritten) = 1; + else if (buffer_delay(bh)) + (*delalloc) = 1; + } while ((bh = bh->b_this_page) != head); +} + + +/* + * writepage: Called from one of two places: + * + * 1. we are flushing a delalloc buffer head. + * + * 2. we are writing out a dirty page. Typically the page dirty + * state is cleared before we get here. In this case is it + * conceivable we have no buffer heads. + * + * For delalloc space on the page we need to allocate space and + * flush it. For unmapped buffer heads on the page we should + * allocate space if the page is uptodate. For any other dirty + * buffer heads on the page we should flush them. + * + * If we detect that a transaction would be required to flush + * the page, we have to check the process flags first, if we + * are already in a transaction or disk I/O during allocations + * is off, we need to fail the writepage and redirty the page. + */ + +STATIC int +linvfs_writepage( + struct page *page, + struct writeback_control *wbc) +{ + int error; + int need_trans; + int delalloc, unmapped, unwritten; + struct inode *inode = page->mapping->host; + + xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0); + + /* + * We need a transaction if: + * 1. There are delalloc buffers on the page + * 2. The page is uptodate and we have unmapped buffers + * 3. The page is uptodate and we have no buffers + * 4. There are unwritten buffers on the page + */ + + if (!page_has_buffers(page)) { + unmapped = 1; + need_trans = 1; + } else { + xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); + if (!PageUptodate(page)) + unmapped = 0; + need_trans = delalloc + unmapped + unwritten; + } + + /* + * If we need a transaction and the process flags say + * we are already in a transaction, or no IO is allowed + * then mark the page dirty again and leave the page + * as is. + */ + if (PFLAGS_TEST_FSTRANS() && need_trans) + goto out_fail; + + /* + * Delay hooking up buffer heads until we have + * made our go/no-go decision. + */ + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, 0); + + /* + * Convert delayed allocate, unwritten or unmapped space + * to real space and flush out to disk. + */ + error = xfs_page_state_convert(inode, page, 1, unmapped); + if (error == -EAGAIN) + goto out_fail; + if (unlikely(error < 0)) + goto out_unlock; + + return 0; + +out_fail: + set_page_dirty(page); + unlock_page(page); + return 0; +out_unlock: + unlock_page(page); + return error; +} + +/* + * Called to move a page into cleanable state - and from there + * to be released. Possibly the page is already clean. We always + * have buffer heads in this call. + * + * Returns 0 if the page is ok to release, 1 otherwise. + * + * Possible scenarios are: + * + * 1. We are being called to release a page which has been written + * to via regular I/O. buffer heads will be dirty and possibly + * delalloc. If no delalloc buffer heads in this case then we + * can just return zero. + * + * 2. We are called to release a page which has been written via + * mmap, all we need to do is ensure there is no delalloc + * state in the buffer heads, if not we can let the caller + * free them and we should come back later via writepage. + */ +STATIC int +linvfs_release_page( + struct page *page, + int gfp_mask) +{ + struct inode *inode = page->mapping->host; + int dirty, delalloc, unmapped, unwritten; + + xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask); + + xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); + if (!delalloc && !unwritten) + goto free_buffers; + + if (!(gfp_mask & __GFP_FS)) + return 0; + + /* If we are already inside a transaction or the thread cannot + * do I/O, we cannot release this page. + */ + if (PFLAGS_TEST_FSTRANS()) + return 0; + + /* + * Convert delalloc space to real space, do not flush the + * data out to disk, that will be done by the caller. + * Never need to allocate space here - we will always + * come back to writepage in that case. + */ + dirty = xfs_page_state_convert(inode, page, 0, 0); + if (dirty == 0 && !unwritten) + goto free_buffers; + return 0; + +free_buffers: + return try_to_free_buffers(page); +} + +STATIC int +linvfs_prepare_write( + struct file *file, + struct page *page, + unsigned int from, + unsigned int to) +{ + if (file && (file->f_flags & O_SYNC)) { + return block_prepare_write(page, from, to, + linvfs_get_block_sync); + } else { + return block_prepare_write(page, from, to, + linvfs_get_block); + } +} + +struct address_space_operations linvfs_aops = { + .readpage = linvfs_readpage, + .readpages = linvfs_readpages, + .writepage = linvfs_writepage, + .sync_page = block_sync_page, + .releasepage = linvfs_release_page, + .prepare_write = linvfs_prepare_write, + .commit_write = generic_commit_write, + .bmap = linvfs_bmap, + .direct_IO = linvfs_direct_IO, +}; diff --git a/fs/xfs/linux/xfs_buf.c b/fs/xfs/linux/xfs_buf.c new file mode 100644 index 000000000..69050a0de --- /dev/null +++ b/fs/xfs/linux/xfs_buf.c @@ -0,0 +1,1811 @@ +/* + * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * The xfs_buf.c code provides an abstract buffer cache model on top + * of the Linux page cache. Cached metadata blocks for a file system + * are hashed to the inode for the block device. xfs_buf.c assembles + * buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O. + * + * Written by Steve Lord, Jim Mostek, Russell Cattelan + * and Rajagopal Ananthanarayanan ("ananth") at SGI. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xfs_linux.h" + +#ifndef GFP_READAHEAD +#define GFP_READAHEAD (__GFP_NOWARN|__GFP_NORETRY) +#endif + +/* + * File wide globals + */ + +STATIC kmem_cache_t *pagebuf_cache; +STATIC void pagebuf_daemon_wakeup(void); +STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); +STATIC struct workqueue_struct *pagebuf_logio_workqueue; +STATIC struct workqueue_struct *pagebuf_dataio_workqueue; + +/* + * Pagebuf debugging + */ + +#ifdef PAGEBUF_TRACE +void +pagebuf_trace( + xfs_buf_t *pb, + char *id, + void *data, + void *ra) +{ + ktrace_enter(pagebuf_trace_buf, + pb, id, + (void *)(unsigned long)pb->pb_flags, + (void *)(unsigned long)pb->pb_hold.counter, + (void *)(unsigned long)pb->pb_sema.count.counter, + (void *)current, + data, ra, + (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff), + (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff), + (void *)(unsigned long)pb->pb_buffer_length, + NULL, NULL, NULL, NULL, NULL); +} +ktrace_t *pagebuf_trace_buf; +#define PAGEBUF_TRACE_SIZE 4096 +#define PB_TRACE(pb, id, data) \ + pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0)) +#else +#define PB_TRACE(pb, id, data) do { } while (0) +#endif + +#ifdef PAGEBUF_LOCK_TRACKING +# define PB_SET_OWNER(pb) ((pb)->pb_last_holder = current->pid) +# define PB_CLEAR_OWNER(pb) ((pb)->pb_last_holder = -1) +# define PB_GET_OWNER(pb) ((pb)->pb_last_holder) +#else +# define PB_SET_OWNER(pb) do { } while (0) +# define PB_CLEAR_OWNER(pb) do { } while (0) +# define PB_GET_OWNER(pb) do { } while (0) +#endif + +/* + * Pagebuf allocation / freeing. + */ + +#define pb_to_gfp(flags) \ + (((flags) & PBF_READ_AHEAD) ? GFP_READAHEAD : \ + ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) + +#define pb_to_km(flags) \ + (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) + + +#define pagebuf_allocate(flags) \ + kmem_zone_alloc(pagebuf_cache, pb_to_km(flags)) +#define pagebuf_deallocate(pb) \ + kmem_zone_free(pagebuf_cache, (pb)); + +/* + * Pagebuf hashing + */ + +#define NBITS 8 +#define NHASH (1<pb_hash_index] + +STATIC int +_bhash( + struct block_device *bdev, + loff_t base) +{ + int bit, hval; + + base >>= 9; + base ^= (unsigned long)bdev / L1_CACHE_BYTES; + for (bit = hval = 0; base && bit < sizeof(base) * 8; bit += NBITS) { + hval ^= (int)base & (NHASH-1); + base >>= NBITS; + } + return hval; +} + +/* + * Mapping of multi-page buffers into contiguous virtual space + */ + +typedef struct a_list { + void *vm_addr; + struct a_list *next; +} a_list_t; + +STATIC a_list_t *as_free_head; +STATIC int as_list_len; +STATIC spinlock_t as_lock = SPIN_LOCK_UNLOCKED; + +/* + * Try to batch vunmaps because they are costly. + */ +STATIC void +free_address( + void *addr) +{ + a_list_t *aentry; + + aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC); + if (aentry) { + spin_lock(&as_lock); + aentry->next = as_free_head; + aentry->vm_addr = addr; + as_free_head = aentry; + as_list_len++; + spin_unlock(&as_lock); + } else { + vunmap(addr); + } +} + +STATIC void +purge_addresses(void) +{ + a_list_t *aentry, *old; + + if (as_free_head == NULL) + return; + + spin_lock(&as_lock); + aentry = as_free_head; + as_free_head = NULL; + as_list_len = 0; + spin_unlock(&as_lock); + + while ((old = aentry) != NULL) { + vunmap(aentry->vm_addr); + aentry = aentry->next; + kfree(old); + } +} + +/* + * Internal pagebuf object manipulation + */ + +STATIC void +_pagebuf_initialize( + xfs_buf_t *pb, + xfs_buftarg_t *target, + loff_t range_base, + size_t range_length, + page_buf_flags_t flags) +{ + /* + * We don't want certain flags to appear in pb->pb_flags. + */ + flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD); + + memset(pb, 0, sizeof(xfs_buf_t)); + atomic_set(&pb->pb_hold, 1); + init_MUTEX_LOCKED(&pb->pb_iodonesema); + INIT_LIST_HEAD(&pb->pb_list); + INIT_LIST_HEAD(&pb->pb_hash_list); + init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */ + PB_SET_OWNER(pb); + pb->pb_target = target; + pb->pb_file_offset = range_base; + /* + * Set buffer_length and count_desired to the same value initially. + * I/O routines should use count_desired, which will be the same in + * most cases but may be reset (e.g. XFS recovery). + */ + pb->pb_buffer_length = pb->pb_count_desired = range_length; + pb->pb_flags = flags | PBF_NONE; + pb->pb_bn = XFS_BUF_DADDR_NULL; + atomic_set(&pb->pb_pin_count, 0); + init_waitqueue_head(&pb->pb_waiters); + + XFS_STATS_INC(pb_create); + PB_TRACE(pb, "initialize", target); +} + +/* + * Allocate a page array capable of holding a specified number + * of pages, and point the page buf at it. + */ +STATIC int +_pagebuf_get_pages( + xfs_buf_t *pb, + int page_count, + page_buf_flags_t flags) +{ + /* Make sure that we have a page list */ + if (pb->pb_pages == NULL) { + pb->pb_offset = page_buf_poff(pb->pb_file_offset); + pb->pb_page_count = page_count; + if (page_count <= PB_PAGES) { + pb->pb_pages = pb->pb_page_array; + } else { + pb->pb_pages = kmem_alloc(sizeof(struct page *) * + page_count, pb_to_km(flags)); + if (pb->pb_pages == NULL) + return -ENOMEM; + } + memset(pb->pb_pages, 0, sizeof(struct page *) * page_count); + } + return 0; +} + +/* + * Frees pb_pages if it was malloced. + */ +STATIC void +_pagebuf_free_pages( + xfs_buf_t *bp) +{ + if (bp->pb_pages != bp->pb_page_array) { + kmem_free(bp->pb_pages, + bp->pb_page_count * sizeof(struct page *)); + } +} + +/* + * Releases the specified buffer. + * + * The modification state of any associated pages is left unchanged. + * The buffer most not be on any hash - use pagebuf_rele instead for + * hashed and refcounted buffers + */ +void +pagebuf_free( + xfs_buf_t *bp) +{ + PB_TRACE(bp, "free", 0); + + ASSERT(list_empty(&bp->pb_hash_list)); + + if (bp->pb_flags & _PBF_PAGE_CACHE) { + uint i; + + if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1)) + free_address(bp->pb_addr - bp->pb_offset); + + for (i = 0; i < bp->pb_page_count; i++) + page_cache_release(bp->pb_pages[i]); + _pagebuf_free_pages(bp); + } else if (bp->pb_flags & _PBF_KMEM_ALLOC) { + /* + * XXX(hch): bp->pb_count_desired might be incorrect (see + * pagebuf_associate_memory for details), but fortunately + * the Linux version of kmem_free ignores the len argument.. + */ + kmem_free(bp->pb_addr, bp->pb_count_desired); + _pagebuf_free_pages(bp); + } + + pagebuf_deallocate(bp); +} + +/* + * Finds all pages for buffer in question and builds it's page list. + */ +STATIC int +_pagebuf_lookup_pages( + xfs_buf_t *bp, + uint flags) +{ + struct address_space *mapping = bp->pb_target->pbr_mapping; + unsigned int sectorshift = bp->pb_target->pbr_sshift; + size_t blocksize = bp->pb_target->pbr_bsize; + size_t size = bp->pb_count_desired; + size_t nbytes, offset; + int gfp_mask = pb_to_gfp(flags); + unsigned short page_count, i; + pgoff_t first; + loff_t end; + int error; + + end = bp->pb_file_offset + bp->pb_buffer_length; + page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset); + + error = _pagebuf_get_pages(bp, page_count, flags); + if (unlikely(error)) + return error; + + offset = bp->pb_offset; + first = bp->pb_file_offset >> PAGE_CACHE_SHIFT; + + for (i = 0; i < bp->pb_page_count; i++) { + struct page *page; + uint retries = 0; + + retry: + page = find_or_create_page(mapping, first + i, gfp_mask); + if (unlikely(page == NULL)) { + if (flags & PBF_READ_AHEAD) + return -ENOMEM; + + /* + * This could deadlock. + * + * But until all the XFS lowlevel code is revamped to + * handle buffer allocation failures we can't do much. + */ + if (!(++retries % 100)) { + printk(KERN_ERR "possibly deadlocking in %s\n", + __FUNCTION__); + } + + XFS_STATS_INC(pb_page_retries); + pagebuf_daemon_wakeup(); + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(10); + goto retry; + } + + XFS_STATS_INC(pb_page_found); + + nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); + size -= nbytes; + + if (!PageUptodate(page)) { + page_count--; + if (blocksize == PAGE_CACHE_SIZE) { + if (flags & PBF_READ) + bp->pb_locked = 1; + } else if (!PagePrivate(page)) { + unsigned long j, range; + + /* + * In this case page->private holds a bitmap + * of uptodate sectors within the page + */ + ASSERT(blocksize < PAGE_CACHE_SIZE); + range = (offset + nbytes) >> sectorshift; + for (j = offset >> sectorshift; j < range; j++) + if (!test_bit(j, &page->private)) + break; + if (j == range) + page_count++; + } + } + + bp->pb_pages[i] = page; + offset = 0; + } + + if (!bp->pb_locked) { + for (i = 0; i < bp->pb_page_count; i++) + unlock_page(bp->pb_pages[i]); + } + + bp->pb_flags |= _PBF_PAGE_CACHE; + + if (page_count) { + /* if we have any uptodate pages, mark that in the buffer */ + bp->pb_flags &= ~PBF_NONE; + + /* if some pages aren't uptodate, mark that in the buffer */ + if (page_count != bp->pb_page_count) + bp->pb_flags |= PBF_PARTIAL; + } + + PB_TRACE(bp, "lookup_pages", (long)page_count); + return error; +} + +/* + * Map buffer into kernel address-space if nessecary. + */ +STATIC int +_pagebuf_map_pages( + xfs_buf_t *bp, + uint flags) +{ + /* A single page buffer is always mappable */ + if (bp->pb_page_count == 1) { + bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset; + bp->pb_flags |= PBF_MAPPED; + } else if (flags & PBF_MAPPED) { + if (as_list_len > 64) + purge_addresses(); + bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count, + VM_MAP, PAGE_KERNEL); + if (unlikely(bp->pb_addr == NULL)) + return -ENOMEM; + bp->pb_addr += bp->pb_offset; + bp->pb_flags |= PBF_MAPPED; + } + + return 0; +} + +/* + * Finding and Reading Buffers + */ + +/* + * _pagebuf_find + * + * Looks up, and creates if absent, a lockable buffer for + * a given range of an inode. The buffer is returned + * locked. If other overlapping buffers exist, they are + * released before the new buffer is created and locked, + * which may imply that this call will block until those buffers + * are unlocked. No I/O is implied by this call. + */ +STATIC xfs_buf_t * +_pagebuf_find( /* find buffer for block */ + xfs_buftarg_t *target,/* target for block */ + loff_t ioff, /* starting offset of range */ + size_t isize, /* length of range */ + page_buf_flags_t flags, /* PBF_TRYLOCK */ + xfs_buf_t *new_pb)/* newly allocated buffer */ +{ + loff_t range_base; + size_t range_length; + int hval; + pb_hash_t *h; + xfs_buf_t *pb, *n; + int not_locked; + + range_base = (ioff << BBSHIFT); + range_length = (isize << BBSHIFT); + + /* Ensure we never do IOs smaller than the sector size */ + BUG_ON(range_length < (1 << target->pbr_sshift)); + + /* Ensure we never do IOs that are not sector aligned */ + BUG_ON(range_base & (loff_t)target->pbr_smask); + + hval = _bhash(target->pbr_bdev, range_base); + h = &pbhash[hval]; + + spin_lock(&h->pb_hash_lock); + list_for_each_entry_safe(pb, n, &h->pb_hash, pb_hash_list) { + if (pb->pb_target == target && + pb->pb_file_offset == range_base && + pb->pb_buffer_length == range_length) { + /* If we look at something bring it to the + * front of the list for next time + */ + atomic_inc(&pb->pb_hold); + list_move(&pb->pb_hash_list, &h->pb_hash); + goto found; + } + } + + /* No match found */ + if (new_pb) { + _pagebuf_initialize(new_pb, target, range_base, + range_length, flags); + new_pb->pb_hash_index = hval; + list_add(&new_pb->pb_hash_list, &h->pb_hash); + } else { + XFS_STATS_INC(pb_miss_locked); + } + + spin_unlock(&h->pb_hash_lock); + return (new_pb); + +found: + spin_unlock(&h->pb_hash_lock); + + /* Attempt to get the semaphore without sleeping, + * if this does not work then we need to drop the + * spinlock and do a hard attempt on the semaphore. + */ + not_locked = down_trylock(&pb->pb_sema); + if (not_locked) { + if (!(flags & PBF_TRYLOCK)) { + /* wait for buffer ownership */ + PB_TRACE(pb, "get_lock", 0); + pagebuf_lock(pb); + XFS_STATS_INC(pb_get_locked_waited); + } else { + /* We asked for a trylock and failed, no need + * to look at file offset and length here, we + * know that this pagebuf at least overlaps our + * pagebuf and is locked, therefore our buffer + * either does not exist, or is this buffer + */ + + pagebuf_rele(pb); + XFS_STATS_INC(pb_busy_locked); + return (NULL); + } + } else { + /* trylock worked */ + PB_SET_OWNER(pb); + } + + if (pb->pb_flags & PBF_STALE) + pb->pb_flags &= PBF_MAPPED; + PB_TRACE(pb, "got_lock", 0); + XFS_STATS_INC(pb_get_locked); + return (pb); +} + + +/* + * pagebuf_find + * + * pagebuf_find returns a buffer matching the specified range of + * data for the specified target, if any of the relevant blocks + * are in memory. The buffer may have unallocated holes, if + * some, but not all, of the blocks are in memory. Even where + * pages are present in the buffer, not all of every page may be + * valid. + */ +xfs_buf_t * +pagebuf_find( /* find buffer for block */ + /* if the block is in memory */ + xfs_buftarg_t *target,/* target for block */ + loff_t ioff, /* starting offset of range */ + size_t isize, /* length of range */ + page_buf_flags_t flags) /* PBF_TRYLOCK */ +{ + return _pagebuf_find(target, ioff, isize, flags, NULL); +} + +/* + * pagebuf_get + * + * pagebuf_get assembles a buffer covering the specified range. + * Some or all of the blocks in the range may be valid. Storage + * in memory for all portions of the buffer will be allocated, + * although backing storage may not be. If PBF_READ is set in + * flags, pagebuf_iostart is called also. + */ +xfs_buf_t * +pagebuf_get( /* allocate a buffer */ + xfs_buftarg_t *target,/* target for buffer */ + loff_t ioff, /* starting offset of range */ + size_t isize, /* length of range */ + page_buf_flags_t flags) /* PBF_TRYLOCK */ +{ + xfs_buf_t *pb, *new_pb; + int error = 0, i; + + new_pb = pagebuf_allocate(flags); + if (unlikely(!new_pb)) + return NULL; + + pb = _pagebuf_find(target, ioff, isize, flags, new_pb); + if (pb == new_pb) { + error = _pagebuf_lookup_pages(pb, flags); + if (unlikely(error)) { + printk(KERN_WARNING + "pagebuf_get: failed to lookup pages\n"); + goto no_buffer; + } + } else { + pagebuf_deallocate(new_pb); + if (unlikely(pb == NULL)) + return NULL; + } + + for (i = 0; i < pb->pb_page_count; i++) + mark_page_accessed(pb->pb_pages[i]); + + if (!(pb->pb_flags & PBF_MAPPED)) { + error = _pagebuf_map_pages(pb, flags); + if (unlikely(error)) { + printk(KERN_WARNING + "pagebuf_get: failed to map pages\n"); + goto no_buffer; + } + } + + XFS_STATS_INC(pb_get); + + /* + * Always fill in the block number now, the mapped cases can do + * their own overlay of this later. + */ + pb->pb_bn = ioff; + pb->pb_count_desired = pb->pb_buffer_length; + + if (flags & PBF_READ) { + if (PBF_NOT_DONE(pb)) { + PB_TRACE(pb, "get_read", (unsigned long)flags); + XFS_STATS_INC(pb_get_read); + pagebuf_iostart(pb, flags); + } else if (flags & PBF_ASYNC) { + PB_TRACE(pb, "get_read_async", (unsigned long)flags); + /* + * Read ahead call which is already satisfied, + * drop the buffer + */ + goto no_buffer; + } else { + PB_TRACE(pb, "get_read_done", (unsigned long)flags); + /* We do not want read in the flags */ + pb->pb_flags &= ~PBF_READ; + } + } else { + PB_TRACE(pb, "get_write", (unsigned long)flags); + } + + return pb; + +no_buffer: + if (flags & (PBF_LOCK | PBF_TRYLOCK)) + pagebuf_unlock(pb); + pagebuf_rele(pb); + return NULL; +} + +/* + * Create a skeletal pagebuf (no pages associated with it). + */ +xfs_buf_t * +pagebuf_lookup( + xfs_buftarg_t *target, + loff_t ioff, + size_t isize, + page_buf_flags_t flags) +{ + xfs_buf_t *pb; + + pb = pagebuf_allocate(flags); + if (pb) { + _pagebuf_initialize(pb, target, ioff, isize, flags); + } + return pb; +} + +/* + * If we are not low on memory then do the readahead in a deadlock + * safe manner. + */ +void +pagebuf_readahead( + xfs_buftarg_t *target, + loff_t ioff, + size_t isize, + page_buf_flags_t flags) +{ + struct backing_dev_info *bdi; + + bdi = target->pbr_mapping->backing_dev_info; + if (bdi_read_congested(bdi)) + return; + if (bdi_write_congested(bdi)) + return; + + flags |= (PBF_TRYLOCK|PBF_READ|PBF_ASYNC|PBF_READ_AHEAD); + pagebuf_get(target, ioff, isize, flags); +} + +xfs_buf_t * +pagebuf_get_empty( + size_t len, + xfs_buftarg_t *target) +{ + xfs_buf_t *pb; + + pb = pagebuf_allocate(0); + if (pb) + _pagebuf_initialize(pb, target, 0, len, 0); + return pb; +} + +static inline struct page * +mem_to_page( + void *addr) +{ + if (((unsigned long)addr < VMALLOC_START) || + ((unsigned long)addr >= VMALLOC_END)) { + return virt_to_page(addr); + } else { + return vmalloc_to_page(addr); + } +} + +int +pagebuf_associate_memory( + xfs_buf_t *pb, + void *mem, + size_t len) +{ + int rval; + int i = 0; + size_t ptr; + size_t end, end_cur; + off_t offset; + int page_count; + + page_count = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT; + offset = (off_t) mem - ((off_t)mem & PAGE_CACHE_MASK); + if (offset && (len > PAGE_CACHE_SIZE)) + page_count++; + + /* Free any previous set of page pointers */ + if (pb->pb_pages) + _pagebuf_free_pages(pb); + + pb->pb_pages = NULL; + pb->pb_addr = mem; + + rval = _pagebuf_get_pages(pb, page_count, 0); + if (rval) + return rval; + + pb->pb_offset = offset; + ptr = (size_t) mem & PAGE_CACHE_MASK; + end = PAGE_CACHE_ALIGN((size_t) mem + len); + end_cur = end; + /* set up first page */ + pb->pb_pages[0] = mem_to_page(mem); + + ptr += PAGE_CACHE_SIZE; + pb->pb_page_count = ++i; + while (ptr < end) { + pb->pb_pages[i] = mem_to_page((void *)ptr); + pb->pb_page_count = ++i; + ptr += PAGE_CACHE_SIZE; + } + pb->pb_locked = 0; + + pb->pb_count_desired = pb->pb_buffer_length = len; + pb->pb_flags |= PBF_MAPPED; + + return 0; +} + +xfs_buf_t * +pagebuf_get_no_daddr( + size_t len, + xfs_buftarg_t *target) +{ + size_t malloc_len = len; + xfs_buf_t *bp; + void *data; + int error; + + if (unlikely(len > 0x20000)) + goto fail; + + bp = pagebuf_allocate(0); + if (unlikely(bp == NULL)) + goto fail; + _pagebuf_initialize(bp, target, 0, len, PBF_FORCEIO); + + try_again: + data = kmem_alloc(malloc_len, KM_SLEEP); + if (unlikely(data == NULL)) + goto fail_free_buf; + + /* check whether alignment matches.. */ + if ((__psunsigned_t)data != + ((__psunsigned_t)data & ~target->pbr_smask)) { + /* .. else double the size and try again */ + kmem_free(data, malloc_len); + malloc_len <<= 1; + goto try_again; + } + + error = pagebuf_associate_memory(bp, data, len); + if (error) + goto fail_free_mem; + bp->pb_flags |= _PBF_KMEM_ALLOC; + + pagebuf_unlock(bp); + + PB_TRACE(bp, "no_daddr", data); + return bp; + fail_free_mem: + kmem_free(data, malloc_len); + fail_free_buf: + pagebuf_free(bp); + fail: + return NULL; +} + +/* + * pagebuf_hold + * + * Increment reference count on buffer, to hold the buffer concurrently + * with another thread which may release (free) the buffer asynchronously. + * + * Must hold the buffer already to call this function. + */ +void +pagebuf_hold( + xfs_buf_t *pb) +{ + atomic_inc(&pb->pb_hold); + PB_TRACE(pb, "hold", 0); +} + +/* + * pagebuf_rele + * + * pagebuf_rele releases a hold on the specified buffer. If the + * the hold count is 1, pagebuf_rele calls pagebuf_free. + */ +void +pagebuf_rele( + xfs_buf_t *pb) +{ + pb_hash_t *hash = pb_hash(pb); + + PB_TRACE(pb, "rele", pb->pb_relse); + + if (atomic_dec_and_lock(&pb->pb_hold, &hash->pb_hash_lock)) { + int do_free = 1; + + if (pb->pb_relse) { + atomic_inc(&pb->pb_hold); + spin_unlock(&hash->pb_hash_lock); + (*(pb->pb_relse)) (pb); + spin_lock(&hash->pb_hash_lock); + do_free = 0; + } + + if (pb->pb_flags & PBF_DELWRI) { + pb->pb_flags |= PBF_ASYNC; + atomic_inc(&pb->pb_hold); + pagebuf_delwri_queue(pb, 0); + do_free = 0; + } else if (pb->pb_flags & PBF_FS_MANAGED) { + do_free = 0; + } + + if (do_free) { + list_del_init(&pb->pb_hash_list); + spin_unlock(&hash->pb_hash_lock); + pagebuf_free(pb); + } else { + spin_unlock(&hash->pb_hash_lock); + } + } +} + + +/* + * Mutual exclusion on buffers. Locking model: + * + * Buffers associated with inodes for which buffer locking + * is not enabled are not protected by semaphores, and are + * assumed to be exclusively owned by the caller. There is a + * spinlock in the buffer, used by the caller when concurrent + * access is possible. + */ + +/* + * pagebuf_cond_lock + * + * pagebuf_cond_lock locks a buffer object, if it is not already locked. + * Note that this in no way + * locks the underlying pages, so it is only useful for synchronizing + * concurrent use of page buffer objects, not for synchronizing independent + * access to the underlying pages. + */ +int +pagebuf_cond_lock( /* lock buffer, if not locked */ + /* returns -EBUSY if locked) */ + xfs_buf_t *pb) +{ + int locked; + + locked = down_trylock(&pb->pb_sema) == 0; + if (locked) { + PB_SET_OWNER(pb); + } + PB_TRACE(pb, "cond_lock", (long)locked); + return(locked ? 0 : -EBUSY); +} + +/* + * pagebuf_lock_value + * + * Return lock value for a pagebuf + */ +int +pagebuf_lock_value( + xfs_buf_t *pb) +{ + return(atomic_read(&pb->pb_sema.count)); +} + +/* + * pagebuf_lock + * + * pagebuf_lock locks a buffer object. Note that this in no way + * locks the underlying pages, so it is only useful for synchronizing + * concurrent use of page buffer objects, not for synchronizing independent + * access to the underlying pages. + */ +int +pagebuf_lock( + xfs_buf_t *pb) +{ + PB_TRACE(pb, "lock", 0); + if (atomic_read(&pb->pb_io_remaining)) + blk_run_address_space(pb->pb_target->pbr_mapping); + down(&pb->pb_sema); + PB_SET_OWNER(pb); + PB_TRACE(pb, "locked", 0); + return 0; +} + +/* + * pagebuf_unlock + * + * pagebuf_unlock releases the lock on the buffer object created by + * pagebuf_lock or pagebuf_cond_lock (not any + * pinning of underlying pages created by pagebuf_pin). + */ +void +pagebuf_unlock( /* unlock buffer */ + xfs_buf_t *pb) /* buffer to unlock */ +{ + PB_CLEAR_OWNER(pb); + up(&pb->pb_sema); + PB_TRACE(pb, "unlock", 0); +} + + +/* + * Pinning Buffer Storage in Memory + */ + +/* + * pagebuf_pin + * + * pagebuf_pin locks all of the memory represented by a buffer in + * memory. Multiple calls to pagebuf_pin and pagebuf_unpin, for + * the same or different buffers affecting a given page, will + * properly count the number of outstanding "pin" requests. The + * buffer may be released after the pagebuf_pin and a different + * buffer used when calling pagebuf_unpin, if desired. + * pagebuf_pin should be used by the file system when it wants be + * assured that no attempt will be made to force the affected + * memory to disk. It does not assure that a given logical page + * will not be moved to a different physical page. + */ +void +pagebuf_pin( + xfs_buf_t *pb) +{ + atomic_inc(&pb->pb_pin_count); + PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter); +} + +/* + * pagebuf_unpin + * + * pagebuf_unpin reverses the locking of memory performed by + * pagebuf_pin. Note that both functions affected the logical + * pages associated with the buffer, not the buffer itself. + */ +void +pagebuf_unpin( + xfs_buf_t *pb) +{ + if (atomic_dec_and_test(&pb->pb_pin_count)) { + wake_up_all(&pb->pb_waiters); + } + PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter); +} + +int +pagebuf_ispin( + xfs_buf_t *pb) +{ + return atomic_read(&pb->pb_pin_count); +} + +/* + * pagebuf_wait_unpin + * + * pagebuf_wait_unpin waits until all of the memory associated + * with the buffer is not longer locked in memory. It returns + * immediately if none of the affected pages are locked. + */ +static inline void +_pagebuf_wait_unpin( + xfs_buf_t *pb) +{ + DECLARE_WAITQUEUE (wait, current); + + if (atomic_read(&pb->pb_pin_count) == 0) + return; + + add_wait_queue(&pb->pb_waiters, &wait); + for (;;) { + current->state = TASK_UNINTERRUPTIBLE; + if (atomic_read(&pb->pb_pin_count) == 0) + break; + if (atomic_read(&pb->pb_io_remaining)) + blk_run_address_space(pb->pb_target->pbr_mapping); + schedule(); + } + remove_wait_queue(&pb->pb_waiters, &wait); + current->state = TASK_RUNNING; +} + +/* + * Buffer Utility Routines + */ + +/* + * pagebuf_iodone + * + * pagebuf_iodone marks a buffer for which I/O is in progress + * done with respect to that I/O. The pb_iodone routine, if + * present, will be called as a side-effect. + */ +void +pagebuf_iodone_work( + void *v) +{ + xfs_buf_t *bp = (xfs_buf_t *)v; + + if (bp->pb_iodone) + (*(bp->pb_iodone))(bp); + else if (bp->pb_flags & PBF_ASYNC) + xfs_buf_relse(bp); +} + +void +pagebuf_iodone( + xfs_buf_t *pb, + int dataio, + int schedule) +{ + pb->pb_flags &= ~(PBF_READ | PBF_WRITE); + if (pb->pb_error == 0) { + pb->pb_flags &= ~(PBF_PARTIAL | PBF_NONE); + } + + PB_TRACE(pb, "iodone", pb->pb_iodone); + + if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) { + if (schedule) { + INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb); + queue_work(dataio ? pagebuf_dataio_workqueue : + pagebuf_logio_workqueue, &pb->pb_iodone_work); + } else { + pagebuf_iodone_work(pb); + } + } else { + up(&pb->pb_iodonesema); + } +} + +/* + * pagebuf_ioerror + * + * pagebuf_ioerror sets the error code for a buffer. + */ +void +pagebuf_ioerror( /* mark/clear buffer error flag */ + xfs_buf_t *pb, /* buffer to mark */ + int error) /* error to store (0 if none) */ +{ + ASSERT(error >= 0 && error <= 0xffff); + pb->pb_error = (unsigned short)error; + PB_TRACE(pb, "ioerror", (unsigned long)error); +} + +/* + * pagebuf_iostart + * + * pagebuf_iostart initiates I/O on a buffer, based on the flags supplied. + * If necessary, it will arrange for any disk space allocation required, + * and it will break up the request if the block mappings require it. + * The pb_iodone routine in the buffer supplied will only be called + * when all of the subsidiary I/O requests, if any, have been completed. + * pagebuf_iostart calls the pagebuf_ioinitiate routine or + * pagebuf_iorequest, if the former routine is not defined, to start + * the I/O on a given low-level request. + */ +int +pagebuf_iostart( /* start I/O on a buffer */ + xfs_buf_t *pb, /* buffer to start */ + page_buf_flags_t flags) /* PBF_LOCK, PBF_ASYNC, PBF_READ, */ + /* PBF_WRITE, PBF_DELWRI, */ + /* PBF_DONT_BLOCK */ +{ + int status = 0; + + PB_TRACE(pb, "iostart", (unsigned long)flags); + + if (flags & PBF_DELWRI) { + pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC); + pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC); + pagebuf_delwri_queue(pb, 1); + return status; + } + + pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \ + PBF_READ_AHEAD | _PBF_RUN_QUEUES); + pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \ + PBF_READ_AHEAD | _PBF_RUN_QUEUES); + + BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL); + + /* For writes allow an alternate strategy routine to precede + * the actual I/O request (which may not be issued at all in + * a shutdown situation, for example). + */ + status = (flags & PBF_WRITE) ? + pagebuf_iostrategy(pb) : pagebuf_iorequest(pb); + + /* Wait for I/O if we are not an async request. + * Note: async I/O request completion will release the buffer, + * and that can already be done by this point. So using the + * buffer pointer from here on, after async I/O, is invalid. + */ + if (!status && !(flags & PBF_ASYNC)) + status = pagebuf_iowait(pb); + + return status; +} + +/* + * Helper routine for pagebuf_iorequest + */ + +STATIC __inline__ int +_pagebuf_iolocked( + xfs_buf_t *pb) +{ + ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE)); + if (pb->pb_flags & PBF_READ) + return pb->pb_locked; + return 0; +} + +STATIC __inline__ void +_pagebuf_iodone( + xfs_buf_t *pb, + int schedule) +{ + if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { + pb->pb_locked = 0; + pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), schedule); + } +} + +STATIC int +bio_end_io_pagebuf( + struct bio *bio, + unsigned int bytes_done, + int error) +{ + xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; + unsigned int i, blocksize = pb->pb_target->pbr_bsize; + unsigned int sectorshift = pb->pb_target->pbr_sshift; + struct bio_vec *bvec = bio->bi_io_vec; + + if (bio->bi_size) + return 1; + + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + pb->pb_error = EIO; + + for (i = 0; i < bio->bi_vcnt; i++, bvec++) { + struct page *page = bvec->bv_page; + + if (pb->pb_error) { + SetPageError(page); + } else if (blocksize == PAGE_CACHE_SIZE) { + SetPageUptodate(page); + } else if (!PagePrivate(page) && + (pb->pb_flags & _PBF_PAGE_CACHE)) { + unsigned long j, range; + + ASSERT(blocksize < PAGE_CACHE_SIZE); + range = (bvec->bv_offset + bvec->bv_len) >> sectorshift; + for (j = bvec->bv_offset >> sectorshift; j < range; j++) + set_bit(j, &page->private); + if (page->private == (unsigned long)(PAGE_CACHE_SIZE-1)) + SetPageUptodate(page); + } + + if (_pagebuf_iolocked(pb)) { + unlock_page(page); + } + } + + _pagebuf_iodone(pb, 1); + bio_put(bio); + return 0; +} + +void +_pagebuf_ioapply( + xfs_buf_t *pb) +{ + int i, map_i, total_nr_pages, nr_pages; + struct bio *bio; + int offset = pb->pb_offset; + int size = pb->pb_count_desired; + sector_t sector = pb->pb_bn; + unsigned int blocksize = pb->pb_target->pbr_bsize; + int locking = _pagebuf_iolocked(pb); + + total_nr_pages = pb->pb_page_count; + map_i = 0; + + /* Special code path for reading a sub page size pagebuf in -- + * we populate up the whole page, and hence the other metadata + * in the same page. This optimization is only valid when the + * filesystem block size and the page size are equal. + */ + if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) && + (pb->pb_flags & PBF_READ) && locking && + (blocksize == PAGE_CACHE_SIZE)) { + bio = bio_alloc(GFP_NOIO, 1); + + bio->bi_bdev = pb->pb_target->pbr_bdev; + bio->bi_sector = sector - (offset >> BBSHIFT); + bio->bi_end_io = bio_end_io_pagebuf; + bio->bi_private = pb; + + bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0); + size = 0; + + atomic_inc(&pb->pb_io_remaining); + + goto submit_io; + } + + /* Lock down the pages which we need to for the request */ + if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) { + for (i = 0; size; i++) { + int nbytes = PAGE_CACHE_SIZE - offset; + struct page *page = pb->pb_pages[i]; + + if (nbytes > size) + nbytes = size; + + lock_page(page); + + size -= nbytes; + offset = 0; + } + offset = pb->pb_offset; + size = pb->pb_count_desired; + } + +next_chunk: + atomic_inc(&pb->pb_io_remaining); + nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); + if (nr_pages > total_nr_pages) + nr_pages = total_nr_pages; + + bio = bio_alloc(GFP_NOIO, nr_pages); + bio->bi_bdev = pb->pb_target->pbr_bdev; + bio->bi_sector = sector; + bio->bi_end_io = bio_end_io_pagebuf; + bio->bi_private = pb; + + for (; size && nr_pages; nr_pages--, map_i++) { + int nbytes = PAGE_CACHE_SIZE - offset; + + if (nbytes > size) + nbytes = size; + + if (bio_add_page(bio, pb->pb_pages[map_i], + nbytes, offset) < nbytes) + break; + + offset = 0; + sector += nbytes >> BBSHIFT; + size -= nbytes; + total_nr_pages--; + } + +submit_io: + if (likely(bio->bi_size)) { + submit_bio((pb->pb_flags & PBF_READ) ? READ : WRITE, bio); + if (size) + goto next_chunk; + } else { + bio_put(bio); + pagebuf_ioerror(pb, EIO); + } + + if (pb->pb_flags & _PBF_RUN_QUEUES) { + pb->pb_flags &= ~_PBF_RUN_QUEUES; + if (atomic_read(&pb->pb_io_remaining) > 1) + blk_run_address_space(pb->pb_target->pbr_mapping); + } +} + +/* + * pagebuf_iorequest -- the core I/O request routine. + */ +int +pagebuf_iorequest( /* start real I/O */ + xfs_buf_t *pb) /* buffer to convey to device */ +{ + PB_TRACE(pb, "iorequest", 0); + + if (pb->pb_flags & PBF_DELWRI) { + pagebuf_delwri_queue(pb, 1); + return 0; + } + + if (pb->pb_flags & PBF_WRITE) { + _pagebuf_wait_unpin(pb); + } + + pagebuf_hold(pb); + + /* Set the count to 1 initially, this will stop an I/O + * completion callout which happens before we have started + * all the I/O from calling pagebuf_iodone too early. + */ + atomic_set(&pb->pb_io_remaining, 1); + _pagebuf_ioapply(pb); + _pagebuf_iodone(pb, 0); + + pagebuf_rele(pb); + return 0; +} + +/* + * pagebuf_iowait + * + * pagebuf_iowait waits for I/O to complete on the buffer supplied. + * It returns immediately if no I/O is pending. In any case, it returns + * the error code, if any, or 0 if there is no error. + */ +int +pagebuf_iowait( + xfs_buf_t *pb) +{ + PB_TRACE(pb, "iowait", 0); + if (atomic_read(&pb->pb_io_remaining)) + blk_run_address_space(pb->pb_target->pbr_mapping); + down(&pb->pb_iodonesema); + PB_TRACE(pb, "iowaited", (long)pb->pb_error); + return pb->pb_error; +} + +caddr_t +pagebuf_offset( + xfs_buf_t *pb, + size_t offset) +{ + struct page *page; + + offset += pb->pb_offset; + + page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT]; + return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1)); +} + +/* + * pagebuf_iomove + * + * Move data into or out of a buffer. + */ +void +pagebuf_iomove( + xfs_buf_t *pb, /* buffer to process */ + size_t boff, /* starting buffer offset */ + size_t bsize, /* length to copy */ + caddr_t data, /* data address */ + page_buf_rw_t mode) /* read/write flag */ +{ + size_t bend, cpoff, csize; + struct page *page; + + bend = boff + bsize; + while (boff < bend) { + page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)]; + cpoff = page_buf_poff(boff + pb->pb_offset); + csize = min_t(size_t, + PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff); + + ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); + + switch (mode) { + case PBRW_ZERO: + memset(page_address(page) + cpoff, 0, csize); + break; + case PBRW_READ: + memcpy(data, page_address(page) + cpoff, csize); + break; + case PBRW_WRITE: + memcpy(page_address(page) + cpoff, data, csize); + } + + boff += csize; + data += csize; + } +} + +/* + * Handling of buftargs. + */ + +void +xfs_free_buftarg( + xfs_buftarg_t *btp, + int external) +{ + xfs_flush_buftarg(btp, 1); + if (external) + xfs_blkdev_put(btp->pbr_bdev); + kmem_free(btp, sizeof(*btp)); +} + +void +xfs_incore_relse( + xfs_buftarg_t *btp, + int delwri_only, + int wait) +{ + invalidate_bdev(btp->pbr_bdev, 1); + truncate_inode_pages(btp->pbr_mapping, 0LL); +} + +void +xfs_setsize_buftarg( + xfs_buftarg_t *btp, + unsigned int blocksize, + unsigned int sectorsize) +{ + btp->pbr_bsize = blocksize; + btp->pbr_sshift = ffs(sectorsize) - 1; + btp->pbr_smask = sectorsize - 1; + + if (set_blocksize(btp->pbr_bdev, sectorsize)) { + printk(KERN_WARNING + "XFS: Cannot set_blocksize to %u on device %s\n", + sectorsize, XFS_BUFTARG_NAME(btp)); + } +} + +xfs_buftarg_t * +xfs_alloc_buftarg( + struct block_device *bdev) +{ + xfs_buftarg_t *btp; + + btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); + + btp->pbr_dev = bdev->bd_dev; + btp->pbr_bdev = bdev; + btp->pbr_mapping = bdev->bd_inode->i_mapping; + xfs_setsize_buftarg(btp, PAGE_CACHE_SIZE, bdev_hardsect_size(bdev)); + + return btp; +} + + +/* + * Pagebuf delayed write buffer handling + */ + +STATIC LIST_HEAD(pbd_delwrite_queue); +STATIC spinlock_t pbd_delwrite_lock = SPIN_LOCK_UNLOCKED; + +STATIC void +pagebuf_delwri_queue( + xfs_buf_t *pb, + int unlock) +{ + PB_TRACE(pb, "delwri_q", (long)unlock); + ASSERT(pb->pb_flags & PBF_DELWRI); + + spin_lock(&pbd_delwrite_lock); + /* If already in the queue, dequeue and place at tail */ + if (!list_empty(&pb->pb_list)) { + if (unlock) { + atomic_dec(&pb->pb_hold); + } + list_del(&pb->pb_list); + } + + list_add_tail(&pb->pb_list, &pbd_delwrite_queue); + pb->pb_queuetime = jiffies; + spin_unlock(&pbd_delwrite_lock); + + if (unlock) + pagebuf_unlock(pb); +} + +void +pagebuf_delwri_dequeue( + xfs_buf_t *pb) +{ + PB_TRACE(pb, "delwri_uq", 0); + spin_lock(&pbd_delwrite_lock); + list_del_init(&pb->pb_list); + pb->pb_flags &= ~PBF_DELWRI; + spin_unlock(&pbd_delwrite_lock); +} + +STATIC void +pagebuf_runall_queues( + struct workqueue_struct *queue) +{ + flush_workqueue(queue); +} + +/* Defines for pagebuf daemon */ +STATIC DECLARE_COMPLETION(pagebuf_daemon_done); +STATIC struct task_struct *pagebuf_daemon_task; +STATIC int pagebuf_daemon_active; +STATIC int force_flush; + +STATIC void +pagebuf_daemon_wakeup(void) +{ + force_flush = 1; + barrier(); + wake_up_process(pagebuf_daemon_task); +} + +STATIC int +pagebuf_daemon( + void *data) +{ + struct list_head tmp; + xfs_buf_t *pb, *n; + + /* Set up the thread */ + daemonize("xfsbufd"); + current->flags |= PF_MEMALLOC; + + pagebuf_daemon_task = current; + pagebuf_daemon_active = 1; + barrier(); + + INIT_LIST_HEAD(&tmp); + do { + /* swsusp */ + if (current->flags & PF_FREEZE) + refrigerator(PF_FREEZE); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(xfs_flush_interval); + + spin_lock(&pbd_delwrite_lock); + list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { + PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); + ASSERT(pb->pb_flags & PBF_DELWRI); + + if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { + if (!force_flush && + time_before(jiffies, + pb->pb_queuetime + + xfs_age_buffer)) { + pagebuf_unlock(pb); + break; + } + + pb->pb_flags &= ~PBF_DELWRI; + pb->pb_flags |= PBF_WRITE; + list_move(&pb->pb_list, &tmp); + } + } + spin_unlock(&pbd_delwrite_lock); + + while (!list_empty(&tmp)) { + pb = list_entry(tmp.next, xfs_buf_t, pb_list); + list_del_init(&pb->pb_list); + pagebuf_iostrategy(pb); + blk_run_address_space(pb->pb_target->pbr_mapping); + } + + if (as_list_len > 0) + purge_addresses(); + + force_flush = 0; + } while (pagebuf_daemon_active); + + complete_and_exit(&pagebuf_daemon_done, 0); +} + +/* + * Go through all incore buffers, and release buffers if they belong to + * the given device. This is used in filesystem error handling to + * preserve the consistency of its metadata. + */ +int +xfs_flush_buftarg( + xfs_buftarg_t *target, + int wait) +{ + struct list_head tmp; + xfs_buf_t *pb, *n; + int pincount = 0; + + pagebuf_runall_queues(pagebuf_dataio_workqueue); + pagebuf_runall_queues(pagebuf_logio_workqueue); + + INIT_LIST_HEAD(&tmp); + spin_lock(&pbd_delwrite_lock); + list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { + + if (pb->pb_target != target) + continue; + + ASSERT(pb->pb_flags & PBF_DELWRI); + PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); + if (pagebuf_ispin(pb)) { + pincount++; + continue; + } + + pb->pb_flags &= ~PBF_DELWRI; + pb->pb_flags |= PBF_WRITE; + list_move(&pb->pb_list, &tmp); + } + spin_unlock(&pbd_delwrite_lock); + + /* + * Dropped the delayed write list lock, now walk the temporary list + */ + list_for_each_entry_safe(pb, n, &tmp, pb_list) { + if (wait) + pb->pb_flags &= ~PBF_ASYNC; + else + list_del_init(&pb->pb_list); + + pagebuf_lock(pb); + pagebuf_iostrategy(pb); + } + + /* + * Remaining list items must be flushed before returning + */ + while (!list_empty(&tmp)) { + pb = list_entry(tmp.next, xfs_buf_t, pb_list); + + list_del_init(&pb->pb_list); + xfs_iowait(pb); + xfs_buf_relse(pb); + } + + if (wait) + blk_run_address_space(target->pbr_mapping); + + return pincount; +} + +STATIC int +pagebuf_daemon_start(void) +{ + int rval; + + pagebuf_logio_workqueue = create_workqueue("xfslogd"); + if (!pagebuf_logio_workqueue) + return -ENOMEM; + + pagebuf_dataio_workqueue = create_workqueue("xfsdatad"); + if (!pagebuf_dataio_workqueue) { + destroy_workqueue(pagebuf_logio_workqueue); + return -ENOMEM; + } + + rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES); + if (rval < 0) { + destroy_workqueue(pagebuf_logio_workqueue); + destroy_workqueue(pagebuf_dataio_workqueue); + } + + return rval; +} + +/* + * pagebuf_daemon_stop + * + * Note: do not mark as __exit, it is called from pagebuf_terminate. + */ +STATIC void +pagebuf_daemon_stop(void) +{ + pagebuf_daemon_active = 0; + barrier(); + wait_for_completion(&pagebuf_daemon_done); + + destroy_workqueue(pagebuf_logio_workqueue); + destroy_workqueue(pagebuf_dataio_workqueue); +} + +/* + * Initialization and Termination + */ + +int __init +pagebuf_init(void) +{ + int i; + + pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (pagebuf_cache == NULL) { + printk("pagebuf: couldn't init pagebuf cache\n"); + pagebuf_terminate(); + return -ENOMEM; + } + + for (i = 0; i < NHASH; i++) { + spin_lock_init(&pbhash[i].pb_hash_lock); + INIT_LIST_HEAD(&pbhash[i].pb_hash); + } + +#ifdef PAGEBUF_TRACE + pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP); +#endif + + pagebuf_daemon_start(); + return 0; +} + + +/* + * pagebuf_terminate. + * + * Note: do not mark as __exit, this is also called from the __init code. + */ +void +pagebuf_terminate(void) +{ + pagebuf_daemon_stop(); + +#ifdef PAGEBUF_TRACE + ktrace_free(pagebuf_trace_buf); +#endif + + kmem_cache_destroy(pagebuf_cache); +} diff --git a/fs/xfs/linux/xfs_buf.h b/fs/xfs/linux/xfs_buf.h new file mode 100644 index 000000000..f97e6c0cd --- /dev/null +++ b/fs/xfs/linux/xfs_buf.h @@ -0,0 +1,594 @@ +/* + * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * Written by Steve Lord, Jim Mostek, Russell Cattelan at SGI + */ + +#ifndef __XFS_BUF_H__ +#define __XFS_BUF_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Base types + */ + +#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) + +#define page_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) +#define page_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) +#define page_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) +#define page_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) + +typedef enum page_buf_rw_e { + PBRW_READ = 1, /* transfer into target memory */ + PBRW_WRITE = 2, /* transfer from target memory */ + PBRW_ZERO = 3 /* Zero target memory */ +} page_buf_rw_t; + + +typedef enum page_buf_flags_e { /* pb_flags values */ + PBF_READ = (1 << 0), /* buffer intended for reading from device */ + PBF_WRITE = (1 << 1), /* buffer intended for writing to device */ + PBF_MAPPED = (1 << 2), /* buffer mapped (pb_addr valid) */ + PBF_PARTIAL = (1 << 3), /* buffer partially read */ + PBF_ASYNC = (1 << 4), /* initiator will not wait for completion */ + PBF_NONE = (1 << 5), /* buffer not read at all */ + PBF_DELWRI = (1 << 6), /* buffer has dirty pages */ + PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */ + PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ + PBF_FS_DATAIOD = (1 << 9), /* schedule IO completion on fs datad */ + PBF_FORCEIO = (1 << 10), /* ignore any cache state */ + PBF_FLUSH = (1 << 11), /* flush disk write cache */ + PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ + + /* flags used only as arguments to access routines */ + PBF_LOCK = (1 << 14), /* lock requested */ + PBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */ + PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */ + + /* flags used only internally */ + _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ + _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ + _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ +} page_buf_flags_t; + +#define PBF_UPDATE (PBF_READ | PBF_WRITE) +#define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0) +#define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0) + +typedef struct xfs_buftarg { + dev_t pbr_dev; + struct block_device *pbr_bdev; + struct address_space *pbr_mapping; + unsigned int pbr_bsize; + unsigned int pbr_sshift; + size_t pbr_smask; +} xfs_buftarg_t; + +/* + * xfs_buf_t: Buffer structure for page cache-based buffers + * + * This buffer structure is used by the page cache buffer management routines + * to refer to an assembly of pages forming a logical buffer. The actual + * I/O is performed with buffer_head or bio structures, as required by drivers, + * for drivers which do not understand this structure. The buffer structure is + * used on temporary basis only, and discarded when released. + * + * The real data storage is recorded in the page cache. Metadata is + * hashed to the inode for the block device on which the file system resides. + * File data is hashed to the inode for the file. Pages which are only + * partially filled with data have bits set in their block_map entry + * to indicate which disk blocks in the page are not valid. + */ + +struct xfs_buf; +typedef void (*page_buf_iodone_t)(struct xfs_buf *); + /* call-back function on I/O completion */ +typedef void (*page_buf_relse_t)(struct xfs_buf *); + /* call-back function on I/O completion */ +typedef int (*page_buf_bdstrat_t)(struct xfs_buf *); + +#define PB_PAGES 4 + +typedef struct xfs_buf { + struct semaphore pb_sema; /* semaphore for lockables */ + unsigned long pb_queuetime; /* time buffer was queued */ + atomic_t pb_pin_count; /* pin count */ + wait_queue_head_t pb_waiters; /* unpin waiters */ + struct list_head pb_list; + page_buf_flags_t pb_flags; /* status flags */ + struct list_head pb_hash_list; + xfs_buftarg_t *pb_target; /* logical object */ + atomic_t pb_hold; /* reference count */ + xfs_daddr_t pb_bn; /* block number for I/O */ + loff_t pb_file_offset; /* offset in file */ + size_t pb_buffer_length; /* size of buffer in bytes */ + size_t pb_count_desired; /* desired transfer size */ + void *pb_addr; /* virtual address of buffer */ + struct work_struct pb_iodone_work; + atomic_t pb_io_remaining;/* #outstanding I/O requests */ + page_buf_iodone_t pb_iodone; /* I/O completion function */ + page_buf_relse_t pb_relse; /* releasing function */ + page_buf_bdstrat_t pb_strat; /* pre-write function */ + struct semaphore pb_iodonesema; /* Semaphore for I/O waiters */ + void *pb_fspriv; + void *pb_fspriv2; + void *pb_fspriv3; + unsigned short pb_error; /* error code on I/O */ + unsigned short pb_page_count; /* size of page array */ + unsigned short pb_offset; /* page offset in first page */ + unsigned char pb_locked; /* page array is locked */ + unsigned char pb_hash_index; /* hash table index */ + struct page **pb_pages; /* array of page pointers */ + struct page *pb_page_array[PB_PAGES]; /* inline pages */ +#ifdef PAGEBUF_LOCK_TRACKING + int pb_last_holder; +#endif +} xfs_buf_t; + + +/* Finding and Reading Buffers */ + +extern xfs_buf_t *pagebuf_find( /* find buffer for block if */ + /* the block is in memory */ + xfs_buftarg_t *, /* inode for block */ + loff_t, /* starting offset of range */ + size_t, /* length of range */ + page_buf_flags_t); /* PBF_LOCK */ + +extern xfs_buf_t *pagebuf_get( /* allocate a buffer */ + xfs_buftarg_t *, /* inode for buffer */ + loff_t, /* starting offset of range */ + size_t, /* length of range */ + page_buf_flags_t); /* PBF_LOCK, PBF_READ, */ + /* PBF_ASYNC */ + +extern xfs_buf_t *pagebuf_lookup( + xfs_buftarg_t *, + loff_t, /* starting offset of range */ + size_t, /* length of range */ + page_buf_flags_t); /* PBF_READ, PBF_WRITE, */ + /* PBF_FORCEIO, */ + +extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */ + /* no memory or disk address */ + size_t len, + xfs_buftarg_t *); /* mount point "fake" inode */ + +extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct */ + /* without disk address */ + size_t len, + xfs_buftarg_t *); /* mount point "fake" inode */ + +extern int pagebuf_associate_memory( + xfs_buf_t *, + void *, + size_t); + +extern void pagebuf_hold( /* increment reference count */ + xfs_buf_t *); /* buffer to hold */ + +extern void pagebuf_readahead( /* read ahead into cache */ + xfs_buftarg_t *, /* target for buffer (or NULL) */ + loff_t, /* starting offset of range */ + size_t, /* length of range */ + page_buf_flags_t); /* additional read flags */ + +/* Releasing Buffers */ + +extern void pagebuf_free( /* deallocate a buffer */ + xfs_buf_t *); /* buffer to deallocate */ + +extern void pagebuf_rele( /* release hold on a buffer */ + xfs_buf_t *); /* buffer to release */ + +/* Locking and Unlocking Buffers */ + +extern int pagebuf_cond_lock( /* lock buffer, if not locked */ + /* (returns -EBUSY if locked) */ + xfs_buf_t *); /* buffer to lock */ + +extern int pagebuf_lock_value( /* return count on lock */ + xfs_buf_t *); /* buffer to check */ + +extern int pagebuf_lock( /* lock buffer */ + xfs_buf_t *); /* buffer to lock */ + +extern void pagebuf_unlock( /* unlock buffer */ + xfs_buf_t *); /* buffer to unlock */ + +/* Buffer Read and Write Routines */ + +extern void pagebuf_iodone( /* mark buffer I/O complete */ + xfs_buf_t *, /* buffer to mark */ + int, /* use data/log helper thread. */ + int); /* run completion locally, or in + * a helper thread. */ + +extern void pagebuf_ioerror( /* mark buffer in error (or not) */ + xfs_buf_t *, /* buffer to mark */ + int); /* error to store (0 if none) */ + +extern int pagebuf_iostart( /* start I/O on a buffer */ + xfs_buf_t *, /* buffer to start */ + page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC, */ + /* PBF_READ, PBF_WRITE, */ + /* PBF_DELWRI */ + +extern int pagebuf_iorequest( /* start real I/O */ + xfs_buf_t *); /* buffer to convey to device */ + +extern int pagebuf_iowait( /* wait for buffer I/O done */ + xfs_buf_t *); /* buffer to wait on */ + +extern void pagebuf_iomove( /* move data in/out of pagebuf */ + xfs_buf_t *, /* buffer to manipulate */ + size_t, /* starting buffer offset */ + size_t, /* length in buffer */ + caddr_t, /* data pointer */ + page_buf_rw_t); /* direction */ + +static inline int pagebuf_iostrategy(xfs_buf_t *pb) +{ + return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb); +} + +static inline int pagebuf_geterror(xfs_buf_t *pb) +{ + return pb ? pb->pb_error : ENOMEM; +} + +/* Buffer Utility Routines */ + +extern caddr_t pagebuf_offset( /* pointer at offset in buffer */ + xfs_buf_t *, /* buffer to offset into */ + size_t); /* offset */ + +/* Pinning Buffer Storage in Memory */ + +extern void pagebuf_pin( /* pin buffer in memory */ + xfs_buf_t *); /* buffer to pin */ + +extern void pagebuf_unpin( /* unpin buffered data */ + xfs_buf_t *); /* buffer to unpin */ + +extern int pagebuf_ispin( /* check if buffer is pinned */ + xfs_buf_t *); /* buffer to check */ + +/* Delayed Write Buffer Routines */ + +extern void pagebuf_delwri_dequeue(xfs_buf_t *); + +/* Buffer Daemon Setup Routines */ + +extern int pagebuf_init(void); +extern void pagebuf_terminate(void); + + +#ifdef PAGEBUF_TRACE +extern ktrace_t *pagebuf_trace_buf; +extern void pagebuf_trace( + xfs_buf_t *, /* buffer being traced */ + char *, /* description of operation */ + void *, /* arbitrary diagnostic value */ + void *); /* return address */ +#else +# define pagebuf_trace(pb, id, ptr, ra) do { } while (0) +#endif + +#define pagebuf_target_name(target) \ + ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; }) + + + + + +/* These are just for xfs_syncsub... it sets an internal variable + * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t + */ +#define XFS_B_ASYNC PBF_ASYNC +#define XFS_B_DELWRI PBF_DELWRI +#define XFS_B_READ PBF_READ +#define XFS_B_WRITE PBF_WRITE +#define XFS_B_STALE PBF_STALE + +#define XFS_BUF_TRYLOCK PBF_TRYLOCK +#define XFS_INCORE_TRYLOCK PBF_TRYLOCK +#define XFS_BUF_LOCK PBF_LOCK +#define XFS_BUF_MAPPED PBF_MAPPED + +#define BUF_BUSY PBF_DONT_BLOCK + +#define XFS_BUF_BFLAGS(x) ((x)->pb_flags) +#define XFS_BUF_ZEROFLAGS(x) \ + ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI)) + +#define XFS_BUF_STALE(x) ((x)->pb_flags |= XFS_B_STALE) +#define XFS_BUF_UNSTALE(x) ((x)->pb_flags &= ~XFS_B_STALE) +#define XFS_BUF_ISSTALE(x) ((x)->pb_flags & XFS_B_STALE) +#define XFS_BUF_SUPER_STALE(x) do { \ + XFS_BUF_STALE(x); \ + xfs_buf_undelay(x); \ + XFS_BUF_DONE(x); \ + } while (0) + +#define XFS_BUF_MANAGE PBF_FS_MANAGED +#define XFS_BUF_UNMANAGE(x) ((x)->pb_flags &= ~PBF_FS_MANAGED) + +static inline void xfs_buf_undelay(xfs_buf_t *pb) +{ + if (pb->pb_flags & PBF_DELWRI) { + if (pb->pb_list.next != &pb->pb_list) { + pagebuf_delwri_dequeue(pb); + pagebuf_rele(pb); + } else { + pb->pb_flags &= ~PBF_DELWRI; + } + } +} + +#define XFS_BUF_DELAYWRITE(x) ((x)->pb_flags |= PBF_DELWRI) +#define XFS_BUF_UNDELAYWRITE(x) xfs_buf_undelay(x) +#define XFS_BUF_ISDELAYWRITE(x) ((x)->pb_flags & PBF_DELWRI) + +#define XFS_BUF_ERROR(x,no) pagebuf_ioerror(x,no) +#define XFS_BUF_GETERROR(x) pagebuf_geterror(x) +#define XFS_BUF_ISERROR(x) (pagebuf_geterror(x)?1:0) + +#define XFS_BUF_DONE(x) ((x)->pb_flags &= ~(PBF_PARTIAL|PBF_NONE)) +#define XFS_BUF_UNDONE(x) ((x)->pb_flags |= PBF_PARTIAL|PBF_NONE) +#define XFS_BUF_ISDONE(x) (!(PBF_NOT_DONE(x))) + +#define XFS_BUF_BUSY(x) ((x)->pb_flags |= PBF_FORCEIO) +#define XFS_BUF_UNBUSY(x) ((x)->pb_flags &= ~PBF_FORCEIO) +#define XFS_BUF_ISBUSY(x) (1) + +#define XFS_BUF_ASYNC(x) ((x)->pb_flags |= PBF_ASYNC) +#define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC) +#define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC) + +#define XFS_BUF_FLUSH(x) ((x)->pb_flags |= PBF_FLUSH) +#define XFS_BUF_UNFLUSH(x) ((x)->pb_flags &= ~PBF_FLUSH) +#define XFS_BUF_ISFLUSH(x) ((x)->pb_flags & PBF_FLUSH) + +#define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n") +#define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n") +#define XFS_BUF_ISSHUT(x) (0) + +#define XFS_BUF_HOLD(x) pagebuf_hold(x) +#define XFS_BUF_READ(x) ((x)->pb_flags |= PBF_READ) +#define XFS_BUF_UNREAD(x) ((x)->pb_flags &= ~PBF_READ) +#define XFS_BUF_ISREAD(x) ((x)->pb_flags & PBF_READ) + +#define XFS_BUF_WRITE(x) ((x)->pb_flags |= PBF_WRITE) +#define XFS_BUF_UNWRITE(x) ((x)->pb_flags &= ~PBF_WRITE) +#define XFS_BUF_ISWRITE(x) ((x)->pb_flags & PBF_WRITE) + +#define XFS_BUF_ISUNINITIAL(x) (0) +#define XFS_BUF_UNUNINITIAL(x) (0) + +#define XFS_BUF_BP_ISMAPPED(bp) 1 + +#define XFS_BUF_DATAIO(x) ((x)->pb_flags |= PBF_FS_DATAIOD) +#define XFS_BUF_UNDATAIO(x) ((x)->pb_flags &= ~PBF_FS_DATAIOD) + +#define XFS_BUF_IODONE_FUNC(buf) (buf)->pb_iodone +#define XFS_BUF_SET_IODONE_FUNC(buf, func) \ + (buf)->pb_iodone = (func) +#define XFS_BUF_CLR_IODONE_FUNC(buf) \ + (buf)->pb_iodone = NULL +#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func) \ + (buf)->pb_strat = (func) +#define XFS_BUF_CLR_BDSTRAT_FUNC(buf) \ + (buf)->pb_strat = NULL + +#define XFS_BUF_FSPRIVATE(buf, type) \ + ((type)(buf)->pb_fspriv) +#define XFS_BUF_SET_FSPRIVATE(buf, value) \ + (buf)->pb_fspriv = (void *)(value) +#define XFS_BUF_FSPRIVATE2(buf, type) \ + ((type)(buf)->pb_fspriv2) +#define XFS_BUF_SET_FSPRIVATE2(buf, value) \ + (buf)->pb_fspriv2 = (void *)(value) +#define XFS_BUF_FSPRIVATE3(buf, type) \ + ((type)(buf)->pb_fspriv3) +#define XFS_BUF_SET_FSPRIVATE3(buf, value) \ + (buf)->pb_fspriv3 = (void *)(value) +#define XFS_BUF_SET_START(buf) + +#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \ + (buf)->pb_relse = (value) + +#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr) + +extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset) +{ + if (bp->pb_flags & PBF_MAPPED) + return XFS_BUF_PTR(bp) + offset; + return (xfs_caddr_t) pagebuf_offset(bp, offset); +} + +#define XFS_BUF_SET_PTR(bp, val, count) \ + pagebuf_associate_memory(bp, val, count) +#define XFS_BUF_ADDR(bp) ((bp)->pb_bn) +#define XFS_BUF_SET_ADDR(bp, blk) \ + ((bp)->pb_bn = (blk)) +#define XFS_BUF_OFFSET(bp) ((bp)->pb_file_offset) +#define XFS_BUF_SET_OFFSET(bp, off) \ + ((bp)->pb_file_offset = (off)) +#define XFS_BUF_COUNT(bp) ((bp)->pb_count_desired) +#define XFS_BUF_SET_COUNT(bp, cnt) \ + ((bp)->pb_count_desired = (cnt)) +#define XFS_BUF_SIZE(bp) ((bp)->pb_buffer_length) +#define XFS_BUF_SET_SIZE(bp, cnt) \ + ((bp)->pb_buffer_length = (cnt)) +#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) +#define XFS_BUF_SET_VTYPE(bp, type) +#define XFS_BUF_SET_REF(bp, ref) + +#define XFS_BUF_ISPINNED(bp) pagebuf_ispin(bp) + +#define XFS_BUF_VALUSEMA(bp) pagebuf_lock_value(bp) +#define XFS_BUF_CPSEMA(bp) (pagebuf_cond_lock(bp) == 0) +#define XFS_BUF_VSEMA(bp) pagebuf_unlock(bp) +#define XFS_BUF_PSEMA(bp,x) pagebuf_lock(bp) +#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema); + +/* setup the buffer target from a buftarg structure */ +#define XFS_BUF_SET_TARGET(bp, target) \ + (bp)->pb_target = (target) +#define XFS_BUF_TARGET(bp) ((bp)->pb_target) +#define XFS_BUFTARG_NAME(target) \ + pagebuf_target_name(target) + +#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) +#define XFS_BUF_SET_VTYPE(bp, type) +#define XFS_BUF_SET_REF(bp, ref) + +#define xfs_buf_read(target, blkno, len, flags) \ + pagebuf_get((target), (blkno), (len), \ + PBF_LOCK | PBF_READ | PBF_MAPPED) +#define xfs_buf_get(target, blkno, len, flags) \ + pagebuf_get((target), (blkno), (len), \ + PBF_LOCK | PBF_MAPPED) + +#define xfs_buf_read_flags(target, blkno, len, flags) \ + pagebuf_get((target), (blkno), (len), PBF_READ | (flags)) +#define xfs_buf_get_flags(target, blkno, len, flags) \ + pagebuf_get((target), (blkno), (len), (flags)) + +static inline int xfs_bawrite(void *mp, xfs_buf_t *bp) +{ + bp->pb_fspriv3 = mp; + bp->pb_strat = xfs_bdstrat_cb; + xfs_buf_undelay(bp); + return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES); +} + +static inline void xfs_buf_relse(xfs_buf_t *bp) +{ + if (!bp->pb_relse) + pagebuf_unlock(bp); + pagebuf_rele(bp); +} + +#define xfs_bpin(bp) pagebuf_pin(bp) +#define xfs_bunpin(bp) pagebuf_unpin(bp) + +#define xfs_buftrace(id, bp) \ + pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0)) + +#define xfs_biodone(pb) \ + pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), 0) + +#define xfs_incore(buftarg,blkno,len,lockit) \ + pagebuf_find(buftarg, blkno ,len, lockit) + + +#define xfs_biomove(pb, off, len, data, rw) \ + pagebuf_iomove((pb), (off), (len), (data), \ + ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ) + +#define xfs_biozero(pb, off, len) \ + pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO) + + +static inline int XFS_bwrite(xfs_buf_t *pb) +{ + int iowait = (pb->pb_flags & PBF_ASYNC) == 0; + int error = 0; + + if (!iowait) + pb->pb_flags |= _PBF_RUN_QUEUES; + + xfs_buf_undelay(pb); + pagebuf_iostrategy(pb); + if (iowait) { + error = pagebuf_iowait(pb); + xfs_buf_relse(pb); + } + return error; +} + +#define XFS_bdwrite(pb) \ + pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC) + +static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp) +{ + bp->pb_strat = xfs_bdstrat_cb; + bp->pb_fspriv3 = mp; + + return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC); +} + +#define XFS_bdstrat(bp) pagebuf_iorequest(bp) + +#define xfs_iowait(pb) pagebuf_iowait(pb) + +#define xfs_baread(target, rablkno, ralen) \ + pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK) + +#define xfs_buf_get_empty(len, target) pagebuf_get_empty((len), (target)) +#define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target)) +#define xfs_buf_free(bp) pagebuf_free(bp) + + +/* + * Handling of buftargs. + */ + +extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *); +extern void xfs_free_buftarg(xfs_buftarg_t *, int); +extern void xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); +extern void xfs_incore_relse(xfs_buftarg_t *, int, int); +extern int xfs_flush_buftarg(xfs_buftarg_t *, int); + +#define xfs_getsize_buftarg(buftarg) \ + block_size((buftarg)->pbr_bdev) +#define xfs_readonly_buftarg(buftarg) \ + bdev_read_only((buftarg)->pbr_bdev) +#define xfs_binval(buftarg) \ + xfs_flush_buftarg(buftarg, 1) +#define XFS_bflush(buftarg) \ + xfs_flush_buftarg(buftarg, 1) + +#endif /* __XFS_BUF_H__ */ diff --git a/fs/xfs/linux/xfs_cred.h b/fs/xfs/linux/xfs_cred.h new file mode 100644 index 000000000..00c45849d --- /dev/null +++ b/fs/xfs/linux/xfs_cred.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_CRED_H__ +#define __XFS_CRED_H__ + +/* + * Credentials + */ +typedef struct cred { + /* EMPTY */ +} cred_t; + +extern struct cred *sys_cred; + +/* this is a hack.. (assums sys_cred is the only cred_t in the system) */ +static __inline int capable_cred(cred_t *cr, int cid) +{ + return (cr == sys_cred) ? 1 : capable(cid); +} + +#endif /* __XFS_CRED_H__ */ diff --git a/fs/xfs/linux/xfs_file.c b/fs/xfs/linux/xfs_file.c new file mode 100644 index 000000000..8d9f3b55f --- /dev/null +++ b/fs/xfs/linux/xfs_file.c @@ -0,0 +1,546 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_sb.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_trans.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_alloc.h" +#include "xfs_btree.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_error.h" +#include "xfs_rw.h" + +#include + +static struct vm_operations_struct linvfs_file_vm_ops; + + +STATIC inline ssize_t +__linvfs_read( + struct kiocb *iocb, + char __user *buf, + int ioflags, + size_t count, + loff_t pos) +{ + struct iovec iov = {buf, count}; + struct file *file = iocb->ki_filp; + vnode_t *vp = LINVFS_GET_VP(file->f_dentry->d_inode); + ssize_t rval; + + BUG_ON(iocb->ki_pos != pos); + + if (unlikely(file->f_flags & O_DIRECT)) + ioflags |= IO_ISDIRECT; + VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval); + return rval; +} + + +STATIC ssize_t +linvfs_read( + struct kiocb *iocb, + char __user *buf, + size_t count, + loff_t pos) +{ + return __linvfs_read(iocb, buf, 0, count, pos); +} + +STATIC ssize_t +linvfs_read_invis( + struct kiocb *iocb, + char __user *buf, + size_t count, + loff_t pos) +{ + return __linvfs_read(iocb, buf, IO_INVIS, count, pos); +} + + +STATIC inline ssize_t +__linvfs_write( + struct kiocb *iocb, + const char *buf, + int ioflags, + size_t count, + loff_t pos) +{ + struct iovec iov = {(void *)buf, count}; + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + vnode_t *vp = LINVFS_GET_VP(inode); + ssize_t rval; + + BUG_ON(iocb->ki_pos != pos); + if (unlikely(file->f_flags & O_DIRECT)) { + ioflags |= IO_ISDIRECT; + VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, + ioflags, NULL, rval); + } else { + down(&inode->i_sem); + VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, + ioflags, NULL, rval); + up(&inode->i_sem); + } + + return rval; +} + + +STATIC ssize_t +linvfs_write( + struct kiocb *iocb, + const char __user *buf, + size_t count, + loff_t pos) +{ + return __linvfs_write(iocb, buf, 0, count, pos); +} + +STATIC ssize_t +linvfs_write_invis( + struct kiocb *iocb, + const char __user *buf, + size_t count, + loff_t pos) +{ + return __linvfs_write(iocb, buf, IO_INVIS, count, pos); +} + + +STATIC inline ssize_t +__linvfs_readv( + struct file *file, + const struct iovec *iov, + int ioflags, + unsigned long nr_segs, + loff_t *ppos) +{ + struct inode *inode = file->f_mapping->host; + vnode_t *vp = LINVFS_GET_VP(inode); + struct kiocb kiocb; + ssize_t rval; + + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = *ppos; + + if (unlikely(file->f_flags & O_DIRECT)) + ioflags |= IO_ISDIRECT; + VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval); + if (rval == -EIOCBQUEUED) + rval = wait_on_sync_kiocb(&kiocb); + + *ppos = kiocb.ki_pos; + return rval; +} + +STATIC ssize_t +linvfs_readv( + struct file *file, + const struct iovec *iov, + unsigned long nr_segs, + loff_t *ppos) +{ + return __linvfs_readv(file, iov, 0, nr_segs, ppos); +} + +STATIC ssize_t +linvfs_readv_invis( + struct file *file, + const struct iovec *iov, + unsigned long nr_segs, + loff_t *ppos) +{ + return __linvfs_readv(file, iov, IO_INVIS, nr_segs, ppos); +} + + +STATIC inline ssize_t +__linvfs_writev( + struct file *file, + const struct iovec *iov, + int ioflags, + unsigned long nr_segs, + loff_t *ppos) +{ + struct inode *inode = file->f_mapping->host; + vnode_t *vp = LINVFS_GET_VP(inode); + struct kiocb kiocb; + ssize_t rval; + + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = *ppos; + if (unlikely(file->f_flags & O_DIRECT)) { + ioflags |= IO_ISDIRECT; + VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, + ioflags, NULL, rval); + } else { + down(&inode->i_sem); + VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, + ioflags, NULL, rval); + up(&inode->i_sem); + } + + if (rval == -EIOCBQUEUED) + rval = wait_on_sync_kiocb(&kiocb); + + *ppos = kiocb.ki_pos; + return rval; +} + + +STATIC ssize_t +linvfs_writev( + struct file *file, + const struct iovec *iov, + unsigned long nr_segs, + loff_t *ppos) +{ + return __linvfs_writev(file, iov, 0, nr_segs, ppos); +} + +STATIC ssize_t +linvfs_writev_invis( + struct file *file, + const struct iovec *iov, + unsigned long nr_segs, + loff_t *ppos) +{ + return __linvfs_writev(file, iov, IO_INVIS, nr_segs, ppos); +} + +STATIC ssize_t +linvfs_sendfile( + struct file *filp, + loff_t *ppos, + size_t count, + read_actor_t actor, + void *target) +{ + vnode_t *vp = LINVFS_GET_VP(filp->f_dentry->d_inode); + int error; + + VOP_SENDFILE(vp, filp, ppos, 0, count, actor, target, NULL, error); + return error; +} + + +STATIC int +linvfs_open( + struct inode *inode, + struct file *filp) +{ + vnode_t *vp = LINVFS_GET_VP(inode); + int error; + + if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) + return -EFBIG; + + ASSERT(vp); + VOP_OPEN(vp, NULL, error); + return -error; +} + + +STATIC int +linvfs_release( + struct inode *inode, + struct file *filp) +{ + vnode_t *vp = LINVFS_GET_VP(inode); + int error = 0; + + if (vp) + VOP_RELEASE(vp, error); + return -error; +} + + +STATIC int +linvfs_fsync( + struct file *filp, + struct dentry *dentry, + int datasync) +{ + struct inode *inode = dentry->d_inode; + vnode_t *vp = LINVFS_GET_VP(inode); + int error; + int flags = FSYNC_WAIT; + + if (datasync) + flags |= FSYNC_DATA; + + ASSERT(vp); + VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error); + return -error; +} + +/* + * linvfs_readdir maps to VOP_READDIR(). + * We need to build a uio, cred, ... + */ + +#define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen)) + +STATIC int +linvfs_readdir( + struct file *filp, + void *dirent, + filldir_t filldir) +{ + int error = 0; + vnode_t *vp; + uio_t uio; + iovec_t iov; + int eof = 0; + caddr_t read_buf; + int namelen, size = 0; + size_t rlen = PAGE_CACHE_SIZE; + xfs_off_t start_offset, curr_offset; + xfs_dirent_t *dbp = NULL; + + vp = LINVFS_GET_VP(filp->f_dentry->d_inode); + ASSERT(vp); + + /* Try fairly hard to get memory */ + do { + if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL))) + break; + rlen >>= 1; + } while (rlen >= 1024); + + if (read_buf == NULL) + return -ENOMEM; + + uio.uio_iov = &iov; + uio.uio_segflg = UIO_SYSSPACE; + curr_offset = filp->f_pos; + if (filp->f_pos != 0x7fffffff) + uio.uio_offset = filp->f_pos; + else + uio.uio_offset = 0xffffffff; + + while (!eof) { + uio.uio_resid = iov.iov_len = rlen; + iov.iov_base = read_buf; + uio.uio_iovcnt = 1; + + start_offset = uio.uio_offset; + + VOP_READDIR(vp, &uio, NULL, &eof, error); + if ((uio.uio_offset == start_offset) || error) { + size = 0; + break; + } + + size = rlen - uio.uio_resid; + dbp = (xfs_dirent_t *)read_buf; + while (size > 0) { + namelen = strlen(dbp->d_name); + + if (filldir(dirent, dbp->d_name, namelen, + (loff_t) curr_offset & 0x7fffffff, + (ino_t) dbp->d_ino, + DT_UNKNOWN)) { + goto done; + } + size -= dbp->d_reclen; + curr_offset = (loff_t)dbp->d_off /* & 0x7fffffff */; + dbp = nextdp(dbp); + } + } +done: + if (!error) { + if (size == 0) + filp->f_pos = uio.uio_offset & 0x7fffffff; + else if (dbp) + filp->f_pos = curr_offset; + } + + kfree(read_buf); + return -error; +} + + +STATIC int +linvfs_file_mmap( + struct file *filp, + struct vm_area_struct *vma) +{ + struct inode *ip = filp->f_dentry->d_inode; + vnode_t *vp = LINVFS_GET_VP(ip); + vattr_t va = { .va_mask = XFS_AT_UPDATIME }; + int error; + + if ((vp->v_type == VREG) && (vp->v_vfsp->vfs_flag & VFS_DMI)) { + xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); + + error = -XFS_SEND_MMAP(mp, vma, 0); + if (error) + return error; + } + + vma->vm_ops = &linvfs_file_vm_ops; + + VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error); + return 0; +} + + +STATIC int +linvfs_ioctl( + struct inode *inode, + struct file *filp, + unsigned int cmd, + unsigned long arg) +{ + int error; + vnode_t *vp = LINVFS_GET_VP(inode); + + ASSERT(vp); + VOP_IOCTL(vp, inode, filp, 0, cmd, arg, error); + VMODIFY(vp); + + /* NOTE: some of the ioctl's return positive #'s as a + * byte count indicating success, such as + * readlink_by_handle. So we don't "sign flip" + * like most other routines. This means true + * errors need to be returned as a negative value. + */ + return error; +} + +STATIC int +linvfs_ioctl_invis( + struct inode *inode, + struct file *filp, + unsigned int cmd, + unsigned long arg) +{ + int error; + vnode_t *vp = LINVFS_GET_VP(inode); + + ASSERT(vp); + VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, arg, error); + VMODIFY(vp); + + /* NOTE: some of the ioctl's return positive #'s as a + * byte count indicating success, such as + * readlink_by_handle. So we don't "sign flip" + * like most other routines. This means true + * errors need to be returned as a negative value. + */ + return error; +} + +#ifdef HAVE_VMOP_MPROTECT +STATIC int +linvfs_mprotect( + struct vm_area_struct *vma, + unsigned int newflags) +{ + vnode_t *vp = LINVFS_GET_VP(vma->vm_file->f_dentry->d_inode); + int error = 0; + + if ((vp->v_type == VREG) && (vp->v_vfsp->vfs_flag & VFS_DMI)) { + if ((vma->vm_flags & VM_MAYSHARE) && + (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE)) { + xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp); + + error = XFS_SEND_MMAP(mp, vma, VM_WRITE); + } + } + return error; +} +#endif /* HAVE_VMOP_MPROTECT */ + + +struct file_operations linvfs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .readv = linvfs_readv, + .writev = linvfs_writev, + .aio_read = linvfs_read, + .aio_write = linvfs_write, + .sendfile = linvfs_sendfile, + .ioctl = linvfs_ioctl, + .mmap = linvfs_file_mmap, + .open = linvfs_open, + .release = linvfs_release, + .fsync = linvfs_fsync, +}; + +struct file_operations linvfs_invis_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .readv = linvfs_readv_invis, + .writev = linvfs_writev_invis, + .aio_read = linvfs_read_invis, + .aio_write = linvfs_write_invis, + .sendfile = linvfs_sendfile, + .ioctl = linvfs_ioctl_invis, + .mmap = linvfs_file_mmap, + .open = linvfs_open, + .release = linvfs_release, + .fsync = linvfs_fsync, +}; + + +struct file_operations linvfs_dir_operations = { + .read = generic_read_dir, + .readdir = linvfs_readdir, + .ioctl = linvfs_ioctl, + .fsync = linvfs_fsync, +}; + +static struct vm_operations_struct linvfs_file_vm_ops = { + .nopage = filemap_nopage, +#ifdef HAVE_VMOP_MPROTECT + .mprotect = linvfs_mprotect, +#endif +}; diff --git a/fs/xfs/linux/xfs_fs_subr.c b/fs/xfs/linux/xfs_fs_subr.c new file mode 100644 index 000000000..afad97018 --- /dev/null +++ b/fs/xfs/linux/xfs_fs_subr.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + +/* + * Stub for no-op vnode operations that return error status. + */ +int +fs_noerr() +{ + return 0; +} + +/* + * Operation unsupported under this file system. + */ +int +fs_nosys() +{ + return ENOSYS; +} + +/* + * Stub for inactive, strategy, and read/write lock/unlock. Does nothing. + */ +/* ARGSUSED */ +void +fs_noval() +{ +} + +/* + * vnode pcache layer for vnode_tosspages. + * 'last' parameter unused but left in for IRIX compatibility + */ +void +fs_tosspages( + bhv_desc_t *bdp, + xfs_off_t first, + xfs_off_t last, + int fiopt) +{ + vnode_t *vp = BHV_TO_VNODE(bdp); + struct inode *ip = LINVFS_GET_IP(vp); + + if (VN_CACHED(vp)) + truncate_inode_pages(ip->i_mapping, first); +} + + +/* + * vnode pcache layer for vnode_flushinval_pages. + * 'last' parameter unused but left in for IRIX compatibility + */ +void +fs_flushinval_pages( + bhv_desc_t *bdp, + xfs_off_t first, + xfs_off_t last, + int fiopt) +{ + vnode_t *vp = BHV_TO_VNODE(bdp); + struct inode *ip = LINVFS_GET_IP(vp); + + if (VN_CACHED(vp)) { + filemap_fdatawrite(ip->i_mapping); + filemap_fdatawait(ip->i_mapping); + + truncate_inode_pages(ip->i_mapping, first); + } +} + +/* + * vnode pcache layer for vnode_flush_pages. + * 'last' parameter unused but left in for IRIX compatibility + */ +int +fs_flush_pages( + bhv_desc_t *bdp, + xfs_off_t first, + xfs_off_t last, + uint64_t flags, + int fiopt) +{ + vnode_t *vp = BHV_TO_VNODE(bdp); + struct inode *ip = LINVFS_GET_IP(vp); + + if (VN_CACHED(vp)) { + filemap_fdatawrite(ip->i_mapping); + filemap_fdatawait(ip->i_mapping); + } + + return 0; +} diff --git a/fs/xfs/linux/xfs_fs_subr.h b/fs/xfs/linux/xfs_fs_subr.h new file mode 100644 index 000000000..198b8dd78 --- /dev/null +++ b/fs/xfs/linux/xfs_fs_subr.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2000, 2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUBR_H__ +#define __XFS_SUBR_H__ + +/* + * Utilities shared among file system implementations. + */ + +struct cred; + +extern int fs_noerr(void); +extern int fs_nosys(void); +extern int fs_nodev(void); +extern void fs_noval(void); +extern void fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int); +extern void fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int); +extern int fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int); + +#endif /* __XFS_FS_SUBR_H__ */ diff --git a/fs/xfs/linux/xfs_globals.c b/fs/xfs/linux/xfs_globals.c new file mode 100644 index 000000000..1144a8b9f --- /dev/null +++ b/fs/xfs/linux/xfs_globals.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * This file contains globals needed by XFS that were normally defined + * somewhere else in IRIX. + */ + +#include "xfs.h" +#include "xfs_cred.h" +#include "xfs_sysctl.h" + +/* + * System memory size - used to scale certain data structures in XFS. + */ +unsigned long xfs_physmem; + +/* + * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, + * other XFS code uses these values. + */ + +xfs_param_t xfs_params = { + /* MIN DFLT MAX */ + .restrict_chown = { 0, 1, 1 }, + .sgid_inherit = { 0, 0, 1 }, + .symlink_mode = { 0, 0, 1 }, + .panic_mask = { 0, 0, 127 }, + .error_level = { 0, 3, 11 }, + .sync_interval = { USER_HZ, 30*USER_HZ, 7200*USER_HZ }, + .stats_clear = { 0, 0, 1 }, + .inherit_sync = { 0, 1, 1 }, + .inherit_nodump = { 0, 1, 1 }, + .inherit_noatim = { 0, 1, 1 }, + .flush_interval = { USER_HZ/2, USER_HZ, 30*USER_HZ }, + .age_buffer = { 1*USER_HZ, 15*USER_HZ, 7200*USER_HZ }, +}; + +/* + * Global system credential structure. + */ +cred_t sys_cred_val, *sys_cred = &sys_cred_val; + diff --git a/fs/xfs/linux/xfs_globals.h b/fs/xfs/linux/xfs_globals.h new file mode 100644 index 000000000..e81e2f38a --- /dev/null +++ b/fs/xfs/linux/xfs_globals.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_GLOBALS_H__ +#define __XFS_GLOBALS_H__ + +/* + * This file declares globals needed by XFS that were normally defined + * somewhere else in IRIX. + */ + +extern uint64_t xfs_panic_mask; /* set to cause more panics */ +extern unsigned long xfs_physmem; +extern struct cred *sys_cred; + +#endif /* __XFS_GLOBALS_H__ */ diff --git a/fs/xfs/linux/xfs_ioctl.c b/fs/xfs/linux/xfs_ioctl.c new file mode 100644 index 000000000..d6402d746 --- /dev/null +++ b/fs/xfs/linux/xfs_ioctl.c @@ -0,0 +1,1236 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + +#include "xfs_fs.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_bit.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_cap.h" +#include "xfs_mac.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_dfrag.h" +#include "xfs_fsops.h" + +#include +#include +#include +#include + +/* + * ioctl commands that are used by Linux filesystems + */ +#define XFS_IOC_GETXFLAGS _IOR('f', 1, long) +#define XFS_IOC_SETXFLAGS _IOW('f', 2, long) +#define XFS_IOC_GETVERSION _IOR('v', 1, long) + + +/* + * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to + * a file or fs handle. + * + * XFS_IOC_PATH_TO_FSHANDLE + * returns fs handle for a mount point or path within that mount point + * XFS_IOC_FD_TO_HANDLE + * returns full handle for a FD opened in user space + * XFS_IOC_PATH_TO_HANDLE + * returns full handle for a path + */ +STATIC int +xfs_find_handle( + unsigned int cmd, + unsigned long arg) +{ + int hsize; + xfs_handle_t handle; + xfs_fsop_handlereq_t hreq; + struct inode *inode; + struct vnode *vp; + + if (copy_from_user(&hreq, (xfs_fsop_handlereq_t *)arg, sizeof(hreq))) + return -XFS_ERROR(EFAULT); + + memset((char *)&handle, 0, sizeof(handle)); + + switch (cmd) { + case XFS_IOC_PATH_TO_FSHANDLE: + case XFS_IOC_PATH_TO_HANDLE: { + struct nameidata nd; + int error; + + error = user_path_walk_link(hreq.path, &nd); + if (error) + return error; + + ASSERT(nd.dentry); + ASSERT(nd.dentry->d_inode); + inode = igrab(nd.dentry->d_inode); + path_release(&nd); + break; + } + + case XFS_IOC_FD_TO_HANDLE: { + struct file *file; + + file = fget(hreq.fd); + if (!file) + return -EBADF; + + ASSERT(file->f_dentry); + ASSERT(file->f_dentry->d_inode); + inode = igrab(file->f_dentry->d_inode); + fput(file); + break; + } + + default: + ASSERT(0); + return -XFS_ERROR(EINVAL); + } + + if (inode->i_sb->s_magic != XFS_SB_MAGIC) { + /* we're not in XFS anymore, Toto */ + iput(inode); + return -XFS_ERROR(EINVAL); + } + + /* we need the vnode */ + vp = LINVFS_GET_VP(inode); + if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { + iput(inode); + return -XFS_ERROR(EBADF); + } + + /* now we can grab the fsid */ + memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t)); + hsize = sizeof(xfs_fsid_t); + + if (cmd != XFS_IOC_PATH_TO_FSHANDLE) { + xfs_inode_t *ip; + bhv_desc_t *bhv; + int lock_mode; + + /* need to get access to the xfs_inode to read the generation */ + bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops); + ASSERT(bhv); + ip = XFS_BHVTOI(bhv); + ASSERT(ip); + lock_mode = xfs_ilock_map_shared(ip); + + /* fill in fid section of handle from inode */ + handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) - + sizeof(handle.ha_fid.xfs_fid_len); + handle.ha_fid.xfs_fid_pad = 0; + handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen; + handle.ha_fid.xfs_fid_ino = ip->i_ino; + + xfs_iunlock_map_shared(ip, lock_mode); + + hsize = XFS_HSIZE(handle); + } + + /* now copy our handle into the user buffer & write out the size */ + if (copy_to_user((xfs_handle_t *)hreq.ohandle, &handle, hsize) || + copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) { + iput(inode); + return -XFS_ERROR(EFAULT); + } + + iput(inode); + return 0; +} + + +/* + * Convert userspace handle data into vnode (and inode). + * We [ab]use the fact that all the fsop_handlereq ioctl calls + * have a data structure argument whose first component is always + * a xfs_fsop_handlereq_t, so we can cast to and from this type. + * This allows us to optimise the copy_from_user calls and gives + * a handy, shared routine. + * + * If no error, caller must always VN_RELE the returned vp. + */ +STATIC int +xfs_vget_fsop_handlereq( + xfs_mount_t *mp, + struct inode *parinode, /* parent inode pointer */ + int cap, /* capability level for op */ + unsigned long arg, /* userspace data pointer */ + unsigned long size, /* size of expected struct */ + /* output arguments */ + xfs_fsop_handlereq_t *hreq, + vnode_t **vp, + struct inode **inode) +{ + void *hanp; + size_t hlen; + xfs_fid_t *xfid; + xfs_handle_t *handlep; + xfs_handle_t handle; + xfs_inode_t *ip; + struct inode *inodep; + vnode_t *vpp; + xfs_ino_t ino; + __u32 igen; + int error; + + if (!capable(cap)) + return XFS_ERROR(EPERM); + + /* + * Only allow handle opens under a directory. + */ + if (!S_ISDIR(parinode->i_mode)) + return XFS_ERROR(ENOTDIR); + + /* + * Copy the handle down from the user and validate + * that it looks to be in the correct format. + */ + if (copy_from_user(hreq, (struct xfs_fsop_handlereq *)arg, size)) + return XFS_ERROR(EFAULT); + + hanp = hreq->ihandle; + hlen = hreq->ihandlen; + handlep = &handle; + + if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep)) + return XFS_ERROR(EINVAL); + if (copy_from_user(handlep, hanp, hlen)) + return XFS_ERROR(EFAULT); + if (hlen < sizeof(*handlep)) + memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen); + if (hlen > sizeof(handlep->ha_fsid)) { + if (handlep->ha_fid.xfs_fid_len != + (hlen - sizeof(handlep->ha_fsid) + - sizeof(handlep->ha_fid.xfs_fid_len)) + || handlep->ha_fid.xfs_fid_pad) + return XFS_ERROR(EINVAL); + } + + /* + * Crack the handle, obtain the inode # & generation # + */ + xfid = (struct xfs_fid *)&handlep->ha_fid; + if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) { + ino = xfid->xfs_fid_ino; + igen = xfid->xfs_fid_gen; + } else { + return XFS_ERROR(EINVAL); + } + + /* + * Get the XFS inode, building a vnode to go with it. + */ + error = xfs_iget(mp, NULL, ino, XFS_ILOCK_SHARED, &ip, 0); + if (error) + return error; + if (ip == NULL) + return XFS_ERROR(EIO); + if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { + xfs_iput_new(ip, XFS_ILOCK_SHARED); + return XFS_ERROR(ENOENT); + } + + vpp = XFS_ITOV(ip); + inodep = LINVFS_GET_IP(vpp); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + *vp = vpp; + *inode = inodep; + return 0; +} + +STATIC int +xfs_open_by_handle( + xfs_mount_t *mp, + unsigned long arg, + struct file *parfilp, + struct inode *parinode) +{ + int error; + int new_fd; + int permflag; + struct file *filp; + struct inode *inode; + struct dentry *dentry; + vnode_t *vp; + xfs_fsop_handlereq_t hreq; + + error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg, + sizeof(xfs_fsop_handlereq_t), + &hreq, &vp, &inode); + if (error) + return -error; + + /* Restrict xfs_open_by_handle to directories & regular files. */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { + iput(inode); + return -XFS_ERROR(EINVAL); + } + +#if BITS_PER_LONG != 32 + hreq.oflags |= O_LARGEFILE; +#endif + /* Put open permission in namei format. */ + permflag = hreq.oflags; + if ((permflag+1) & O_ACCMODE) + permflag++; + if (permflag & O_TRUNC) + permflag |= 2; + + if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && + (permflag & FMODE_WRITE) && IS_APPEND(inode)) { + iput(inode); + return -XFS_ERROR(EPERM); + } + + if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) { + iput(inode); + return -XFS_ERROR(EACCES); + } + + /* Can't write directories. */ + if ( S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) { + iput(inode); + return -XFS_ERROR(EISDIR); + } + + if ((new_fd = get_unused_fd()) < 0) { + iput(inode); + return new_fd; + } + + dentry = d_alloc_anon(inode); + if (dentry == NULL) { + iput(inode); + put_unused_fd(new_fd); + return -XFS_ERROR(ENOMEM); + } + + /* Ensure umount returns EBUSY on umounts while this file is open. */ + mntget(parfilp->f_vfsmnt); + + /* Create file pointer. */ + filp = dentry_open(dentry, parfilp->f_vfsmnt, hreq.oflags); + if (IS_ERR(filp)) { + put_unused_fd(new_fd); + return -XFS_ERROR(-PTR_ERR(filp)); + } + if (inode->i_mode & S_IFREG) + filp->f_op = &linvfs_invis_file_operations; + + fd_install(new_fd, filp); + return new_fd; +} + +STATIC int +xfs_readlink_by_handle( + xfs_mount_t *mp, + unsigned long arg, + struct file *parfilp, + struct inode *parinode) +{ + int error; + struct iovec aiov; + struct uio auio; + struct inode *inode; + xfs_fsop_handlereq_t hreq; + vnode_t *vp; + __u32 olen; + + error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg, + sizeof(xfs_fsop_handlereq_t), + &hreq, &vp, &inode); + if (error) + return -error; + + /* Restrict this handle operation to symlinks only. */ + if (vp->v_type != VLNK) { + VN_RELE(vp); + return -XFS_ERROR(EINVAL); + } + + if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) { + VN_RELE(vp); + return -XFS_ERROR(EFAULT); + } + aiov.iov_len = olen; + aiov.iov_base = hreq.ohandle; + + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = 0; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_resid = olen; + + VOP_READLINK(vp, &auio, IO_INVIS, NULL, error); + + VN_RELE(vp); + return (olen - auio.uio_resid); +} + +STATIC int +xfs_fssetdm_by_handle( + xfs_mount_t *mp, + unsigned long arg, + struct file *parfilp, + struct inode *parinode) +{ + int error; + struct fsdmidata fsd; + xfs_fsop_setdm_handlereq_t dmhreq; + struct inode *inode; + bhv_desc_t *bdp; + vnode_t *vp; + + error = xfs_vget_fsop_handlereq(mp, parinode, CAP_MKNOD, arg, + sizeof(xfs_fsop_setdm_handlereq_t), + (xfs_fsop_handlereq_t *)&dmhreq, + &vp, &inode); + if (error) + return -error; + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + VN_RELE(vp); + return -XFS_ERROR(EPERM); + } + + if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { + VN_RELE(vp); + return -XFS_ERROR(EFAULT); + } + + bdp = bhv_base_unlocked(VN_BHV_HEAD(vp)); + error = xfs_set_dmattrs(bdp, fsd.fsd_dmevmask, fsd.fsd_dmstate, NULL); + + VN_RELE(vp); + if (error) + return -error; + return 0; +} + +STATIC int +xfs_attrlist_by_handle( + xfs_mount_t *mp, + unsigned long arg, + struct file *parfilp, + struct inode *parinode) +{ + int error; + attrlist_cursor_kern_t *cursor; + xfs_fsop_attrlist_handlereq_t al_hreq; + struct inode *inode; + vnode_t *vp; + + error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg, + sizeof(xfs_fsop_attrlist_handlereq_t), + (xfs_fsop_handlereq_t *)&al_hreq, + &vp, &inode); + if (error) + return -error; + + cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; + VOP_ATTR_LIST(vp, al_hreq.buffer, al_hreq.buflen, al_hreq.flags, + cursor, NULL, error); + VN_RELE(vp); + if (error) + return -error; + return 0; +} + +STATIC int +xfs_attrmulti_by_handle( + xfs_mount_t *mp, + unsigned long arg, + struct file *parfilp, + struct inode *parinode) +{ + int error; + xfs_attr_multiop_t *ops; + xfs_fsop_attrmulti_handlereq_t am_hreq; + struct inode *inode; + vnode_t *vp; + int i, size; + + error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg, + sizeof(xfs_fsop_attrmulti_handlereq_t), + (xfs_fsop_handlereq_t *)&am_hreq, + &vp, &inode); + if (error) + return -error; + + size = am_hreq.opcount * sizeof(attr_multiop_t); + ops = (xfs_attr_multiop_t *)kmalloc(size, GFP_KERNEL); + if (!ops) { + VN_RELE(vp); + return -XFS_ERROR(ENOMEM); + } + + if (copy_from_user(ops, am_hreq.ops, size)) { + kfree(ops); + VN_RELE(vp); + return -XFS_ERROR(EFAULT); + } + + for (i = 0; i < am_hreq.opcount; i++) { + switch(ops[i].am_opcode) { + case ATTR_OP_GET: + VOP_ATTR_GET(vp,ops[i].am_attrname, ops[i].am_attrvalue, + &ops[i].am_length, ops[i].am_flags, + NULL, ops[i].am_error); + break; + case ATTR_OP_SET: + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + ops[i].am_error = EPERM; + break; + } + VOP_ATTR_SET(vp,ops[i].am_attrname, ops[i].am_attrvalue, + ops[i].am_length, ops[i].am_flags, + NULL, ops[i].am_error); + break; + case ATTR_OP_REMOVE: + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + ops[i].am_error = EPERM; + break; + } + VOP_ATTR_REMOVE(vp, ops[i].am_attrname, ops[i].am_flags, + NULL, ops[i].am_error); + break; + default: + ops[i].am_error = EINVAL; + } + } + + if (copy_to_user(am_hreq.ops, ops, size)) + error = -XFS_ERROR(EFAULT); + + kfree(ops); + VN_RELE(vp); + return error; +} + +/* prototypes for a few of the stack-hungry cases that have + * their own functions. Functions are defined after their use + * so gcc doesn't get fancy and inline them with -03 */ + +STATIC int +xfs_ioc_space( + bhv_desc_t *bdp, + vnode_t *vp, + struct file *filp, + int flags, + unsigned int cmd, + unsigned long arg); + +STATIC int +xfs_ioc_bulkstat( + xfs_mount_t *mp, + unsigned int cmd, + unsigned long arg); + +STATIC int +xfs_ioc_fsgeometry_v1( + xfs_mount_t *mp, + unsigned long arg); + +STATIC int +xfs_ioc_fsgeometry( + xfs_mount_t *mp, + unsigned long arg); + +STATIC int +xfs_ioc_xattr( + vnode_t *vp, + xfs_inode_t *ip, + struct file *filp, + unsigned int cmd, + unsigned long arg); + +STATIC int +xfs_ioc_getbmap( + bhv_desc_t *bdp, + struct file *filp, + int flags, + unsigned int cmd, + unsigned long arg); + +STATIC int +xfs_ioc_getbmapx( + bhv_desc_t *bdp, + unsigned long arg); + +int +xfs_ioctl( + bhv_desc_t *bdp, + struct inode *inode, + struct file *filp, + int ioflags, + unsigned int cmd, + unsigned long arg) +{ + int error; + vnode_t *vp; + xfs_inode_t *ip; + xfs_mount_t *mp; + + vp = LINVFS_GET_VP(inode); + + vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address); + + ip = XFS_BHVTOI(bdp); + mp = ip->i_mount; + + switch (cmd) { + + case XFS_IOC_ALLOCSP: + case XFS_IOC_FREESP: + case XFS_IOC_RESVSP: + case XFS_IOC_UNRESVSP: + case XFS_IOC_ALLOCSP64: + case XFS_IOC_FREESP64: + case XFS_IOC_RESVSP64: + case XFS_IOC_UNRESVSP64: + /* + * Only allow the sys admin to reserve space unless + * unwritten extents are enabled. + */ + if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg); + + case XFS_IOC_DIOINFO: { + struct dioattr da; + xfs_buftarg_t *target = + (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? + mp->m_rtdev_targp : mp->m_ddev_targp; + + da.d_mem = da.d_miniosz = 1 << target->pbr_sshift; + /* The size dio will do in one go */ + da.d_maxiosz = 64 * PAGE_CACHE_SIZE; + + if (copy_to_user((struct dioattr *)arg, &da, sizeof(da))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_FSBULKSTAT_SINGLE: + case XFS_IOC_FSBULKSTAT: + case XFS_IOC_FSINUMBERS: + return xfs_ioc_bulkstat(mp, cmd, arg); + + case XFS_IOC_FSGEOMETRY_V1: + return xfs_ioc_fsgeometry_v1(mp, arg); + + case XFS_IOC_FSGEOMETRY: + return xfs_ioc_fsgeometry(mp, arg); + + case XFS_IOC_GETVERSION: + case XFS_IOC_GETXFLAGS: + case XFS_IOC_SETXFLAGS: + case XFS_IOC_FSGETXATTR: + case XFS_IOC_FSSETXATTR: + case XFS_IOC_FSGETXATTRA: + return xfs_ioc_xattr(vp, ip, filp, cmd, arg); + + case XFS_IOC_FSSETDM: { + struct fsdmidata dmi; + + if (copy_from_user(&dmi, (struct fsdmidata *)arg, sizeof(dmi))) + return -XFS_ERROR(EFAULT); + + error = xfs_set_dmattrs(bdp, dmi.fsd_dmevmask, dmi.fsd_dmstate, + NULL); + return -error; + } + + case XFS_IOC_GETBMAP: + case XFS_IOC_GETBMAPA: + return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg); + + case XFS_IOC_GETBMAPX: + return xfs_ioc_getbmapx(bdp, arg); + + case XFS_IOC_FD_TO_HANDLE: + case XFS_IOC_PATH_TO_HANDLE: + case XFS_IOC_PATH_TO_FSHANDLE: + return xfs_find_handle(cmd, arg); + + case XFS_IOC_OPEN_BY_HANDLE: + return xfs_open_by_handle(mp, arg, filp, inode); + + case XFS_IOC_FSSETDM_BY_HANDLE: + return xfs_fssetdm_by_handle(mp, arg, filp, inode); + + case XFS_IOC_READLINK_BY_HANDLE: + return xfs_readlink_by_handle(mp, arg, filp, inode); + + case XFS_IOC_ATTRLIST_BY_HANDLE: + return xfs_attrlist_by_handle(mp, arg, filp, inode); + + case XFS_IOC_ATTRMULTI_BY_HANDLE: + return xfs_attrmulti_by_handle(mp, arg, filp, inode); + + case XFS_IOC_SWAPEXT: { + error = xfs_swapext((struct xfs_swapext *)arg); + return -error; + } + + case XFS_IOC_FSCOUNTS: { + xfs_fsop_counts_t out; + + error = xfs_fs_counts(mp, &out); + if (error) + return -error; + + if (copy_to_user((char *)arg, &out, sizeof(out))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_SET_RESBLKS: { + xfs_fsop_resblks_t inout; + __uint64_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&inout, (char *)arg, sizeof(inout))) + return -XFS_ERROR(EFAULT); + + /* input parameter is passed in resblks field of structure */ + in = inout.resblks; + error = xfs_reserve_blocks(mp, &in, &inout); + if (error) + return -error; + + if (copy_to_user((char *)arg, &inout, sizeof(inout))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_GET_RESBLKS: { + xfs_fsop_resblks_t out; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + error = xfs_reserve_blocks(mp, NULL, &out); + if (error) + return -error; + + if (copy_to_user((char *)arg, &out, sizeof(out))) + return -XFS_ERROR(EFAULT); + + return 0; + } + + case XFS_IOC_FSGROWFSDATA: { + xfs_growfs_data_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&in, (char *)arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_growfs_data(mp, &in); + return -error; + } + + case XFS_IOC_FSGROWFSLOG: { + xfs_growfs_log_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&in, (char *)arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_growfs_log(mp, &in); + return -error; + } + + case XFS_IOC_FSGROWFSRT: { + xfs_growfs_rt_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&in, (char *)arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_growfs_rt(mp, &in); + return -error; + } + + case XFS_IOC_FREEZE: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + freeze_bdev(inode->i_sb->s_bdev); + return 0; + + case XFS_IOC_THAW: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + thaw_bdev(inode->i_sb->s_bdev, inode->i_sb); + return 0; + + case XFS_IOC_GOINGDOWN: { + __uint32_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(in, (__uint32_t *)arg)) + return -XFS_ERROR(EFAULT); + + error = xfs_fs_goingdown(mp, in); + return -error; + } + + case XFS_IOC_ERROR_INJECTION: { + xfs_error_injection_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&in, (char *)arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_errortag_add(in.errtag, mp); + return -error; + } + + case XFS_IOC_ERROR_CLEARALL: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + error = xfs_errortag_clearall(mp); + return -error; + + default: + return -ENOTTY; + } +} + +STATIC int +xfs_ioc_space( + bhv_desc_t *bdp, + vnode_t *vp, + struct file *filp, + int ioflags, + unsigned int cmd, + unsigned long arg) +{ + xfs_flock64_t bf; + int attr_flags = 0; + int error; + + if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND)) + return -XFS_ERROR(EPERM); + + if (!(filp->f_flags & FMODE_WRITE)) + return -XFS_ERROR(EBADF); + + if (vp->v_type != VREG) + return -XFS_ERROR(EINVAL); + + if (copy_from_user(&bf, (xfs_flock64_t *)arg, sizeof(bf))) + return -XFS_ERROR(EFAULT); + + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) + attr_flags |= ATTR_NONBLOCK; + if (ioflags & IO_INVIS) + attr_flags |= ATTR_DMI; + + error = xfs_change_file_space(bdp, cmd, &bf, filp->f_pos, + NULL, attr_flags); + return -error; +} + +STATIC int +xfs_ioc_bulkstat( + xfs_mount_t *mp, + unsigned int cmd, + unsigned long arg) +{ + xfs_fsop_bulkreq_t bulkreq; + int count; /* # of records returned */ + xfs_ino_t inlast; /* last inode number */ + int done; + int error; + + /* done = 1 if there are more stats to get and if bulkstat */ + /* should be called again (unused here, but used in dmapi) */ + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); + + if (copy_from_user(&bulkreq, (xfs_fsop_bulkreq_t *)arg, + sizeof(xfs_fsop_bulkreq_t))) + return -XFS_ERROR(EFAULT); + + if (copy_from_user(&inlast, (__s64 *)bulkreq.lastip, + sizeof(__s64))) + return -XFS_ERROR(EFAULT); + + if ((count = bulkreq.icount) <= 0) + return -XFS_ERROR(EINVAL); + + if (cmd == XFS_IOC_FSINUMBERS) + error = xfs_inumbers(mp, NULL, &inlast, &count, + bulkreq.ubuffer); + else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) + error = xfs_bulkstat_single(mp, &inlast, + bulkreq.ubuffer, &done); + else { /* XFS_IOC_FSBULKSTAT */ + if (count == 1 && inlast != 0) { + inlast++; + error = xfs_bulkstat_single(mp, &inlast, + bulkreq.ubuffer, &done); + } else { + error = xfs_bulkstat(mp, NULL, &inlast, &count, + (bulkstat_one_pf)xfs_bulkstat_one, NULL, + sizeof(xfs_bstat_t), bulkreq.ubuffer, + BULKSTAT_FG_QUICK, &done); + } + } + + if (error) + return -error; + + if (bulkreq.ocount != NULL) { + if (copy_to_user((xfs_ino_t *)bulkreq.lastip, &inlast, + sizeof(xfs_ino_t))) + return -XFS_ERROR(EFAULT); + + if (copy_to_user((__s32 *)bulkreq.ocount, &count, + sizeof(count))) + return -XFS_ERROR(EFAULT); + } + + return 0; +} + +STATIC int +xfs_ioc_fsgeometry_v1( + xfs_mount_t *mp, + unsigned long arg) +{ + xfs_fsop_geom_v1_t fsgeo; + int error; + + error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); + if (error) + return -error; + + if (copy_to_user((xfs_fsop_geom_t *)arg, &fsgeo, sizeof(fsgeo))) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_ioc_fsgeometry( + xfs_mount_t *mp, + unsigned long arg) +{ + xfs_fsop_geom_t fsgeo; + int error; + + error = xfs_fs_geometry(mp, &fsgeo, 4); + if (error) + return -error; + + if (copy_to_user((xfs_fsop_geom_t *)arg, &fsgeo, sizeof(fsgeo))) + return -XFS_ERROR(EFAULT); + return 0; +} + +/* + * Linux extended inode flags interface. + */ +#define LINUX_XFLAG_SYNC 0x00000008 /* Synchronous updates */ +#define LINUX_XFLAG_IMMUTABLE 0x00000010 /* Immutable file */ +#define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */ +#define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */ +#define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */ + +STATIC unsigned int +xfs_merge_ioc_xflags( + unsigned int flags, + unsigned int start) +{ + unsigned int xflags = start; + + if (flags & LINUX_XFLAG_IMMUTABLE) + xflags |= XFS_XFLAG_IMMUTABLE; + else + xflags &= ~XFS_XFLAG_IMMUTABLE; + if (flags & LINUX_XFLAG_APPEND) + xflags |= XFS_XFLAG_APPEND; + else + xflags &= ~XFS_XFLAG_APPEND; + if (flags & LINUX_XFLAG_SYNC) + xflags |= XFS_XFLAG_SYNC; + else + xflags &= ~XFS_XFLAG_SYNC; + if (flags & LINUX_XFLAG_NOATIME) + xflags |= XFS_XFLAG_NOATIME; + else + xflags &= ~XFS_XFLAG_NOATIME; + if (flags & LINUX_XFLAG_NODUMP) + xflags |= XFS_XFLAG_NODUMP; + else + xflags &= ~XFS_XFLAG_NODUMP; + + return xflags; +} + +STATIC int +xfs_ioc_xattr( + vnode_t *vp, + xfs_inode_t *ip, + struct file *filp, + unsigned int cmd, + unsigned long arg) +{ + struct fsxattr fa; + vattr_t va; + int error; + int attr_flags; + unsigned int flags; + + switch (cmd) { + case XFS_IOC_FSGETXATTR: { + va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS; + VOP_GETATTR(vp, &va, 0, NULL, error); + if (error) + return -error; + + fa.fsx_xflags = va.va_xflags; + fa.fsx_extsize = va.va_extsize; + fa.fsx_nextents = va.va_nextents; + + if (copy_to_user((struct fsxattr *)arg, &fa, sizeof(fa))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_FSSETXATTR: { + if (copy_from_user(&fa, (struct fsxattr *)arg, sizeof(fa))) + return -XFS_ERROR(EFAULT); + + attr_flags = 0; + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) + attr_flags |= ATTR_NONBLOCK; + + va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE; + va.va_xflags = fa.fsx_xflags; + va.va_extsize = fa.fsx_extsize; + + VOP_SETATTR(vp, &va, attr_flags, NULL, error); + if (!error) + vn_revalidate(vp); /* update Linux inode flags */ + return -error; + } + + case XFS_IOC_FSGETXATTRA: { + va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_ANEXTENTS; + VOP_GETATTR(vp, &va, 0, NULL, error); + if (error) + return -error; + + fa.fsx_xflags = va.va_xflags; + fa.fsx_extsize = va.va_extsize; + fa.fsx_nextents = va.va_anextents; + + if (copy_to_user((struct fsxattr *)arg, &fa, sizeof(fa))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_GETXFLAGS: { + flags = 0; + if (ip->i_d.di_flags & XFS_XFLAG_IMMUTABLE) + flags |= LINUX_XFLAG_IMMUTABLE; + if (ip->i_d.di_flags & XFS_XFLAG_APPEND) + flags |= LINUX_XFLAG_APPEND; + if (ip->i_d.di_flags & XFS_XFLAG_SYNC) + flags |= LINUX_XFLAG_SYNC; + if (ip->i_d.di_flags & XFS_XFLAG_NOATIME) + flags |= LINUX_XFLAG_NOATIME; + if (ip->i_d.di_flags & XFS_XFLAG_NODUMP) + flags |= LINUX_XFLAG_NODUMP; + if (copy_to_user((unsigned int *)arg, &flags, sizeof(flags))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_SETXFLAGS: { + if (copy_from_user(&flags, (unsigned int *)arg, sizeof(flags))) + return -XFS_ERROR(EFAULT); + + if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \ + LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \ + LINUX_XFLAG_SYNC)) + return -XFS_ERROR(EOPNOTSUPP); + + attr_flags = 0; + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) + attr_flags |= ATTR_NONBLOCK; + + va.va_mask = XFS_AT_XFLAGS; + va.va_xflags = xfs_merge_ioc_xflags(flags, ip->i_d.di_flags); + + VOP_SETATTR(vp, &va, attr_flags, NULL, error); + if (!error) + vn_revalidate(vp); /* update Linux inode flags */ + return -error; + } + + case XFS_IOC_GETVERSION: { + flags = LINVFS_GET_IP(vp)->i_generation; + if (copy_to_user((unsigned int *)arg, &flags, sizeof(flags))) + return -XFS_ERROR(EFAULT); + return 0; + } + + default: + return -ENOTTY; + } +} + +STATIC int +xfs_ioc_getbmap( + bhv_desc_t *bdp, + struct file *filp, + int ioflags, + unsigned int cmd, + unsigned long arg) +{ + struct getbmap bm; + int iflags; + int error; + + if (copy_from_user(&bm, (struct getbmap *)arg, sizeof(bm))) + return -XFS_ERROR(EFAULT); + + if (bm.bmv_count < 2) + return -XFS_ERROR(EINVAL); + + iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); + if (ioflags & IO_INVIS) + iflags |= BMV_IF_NO_DMAPI_READ; + + error = xfs_getbmap(bdp, &bm, (struct getbmap *)arg+1, iflags); + if (error) + return -error; + + if (copy_to_user((struct getbmap *)arg, &bm, sizeof(bm))) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_ioc_getbmapx( + bhv_desc_t *bdp, + unsigned long arg) +{ + struct getbmapx bmx; + struct getbmap bm; + int iflags; + int error; + + if (copy_from_user(&bmx, (struct getbmapx *)arg, sizeof(bmx))) + return -XFS_ERROR(EFAULT); + + if (bmx.bmv_count < 2) + return -XFS_ERROR(EINVAL); + + /* + * Map input getbmapx structure to a getbmap + * structure for xfs_getbmap. + */ + GETBMAP_CONVERT(bmx, bm); + + iflags = bmx.bmv_iflags; + + if (iflags & (~BMV_IF_VALID)) + return -XFS_ERROR(EINVAL); + + iflags |= BMV_IF_EXTENDED; + + error = xfs_getbmap(bdp, &bm, (struct getbmapx *)arg+1, iflags); + if (error) + return -error; + + GETBMAP_CONVERT(bm, bmx); + + if (copy_to_user((struct getbmapx *)arg, &bmx, sizeof(bmx))) + return -XFS_ERROR(EFAULT); + + return 0; +} diff --git a/fs/xfs/linux/xfs_iops.c b/fs/xfs/linux/xfs_iops.c new file mode 100644 index 000000000..4b3e61d6c --- /dev/null +++ b/fs/xfs/linux/xfs_iops.c @@ -0,0 +1,708 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_bit.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_cap.h" +#include "xfs_mac.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" + +#include + + +/* + * Pull the link count and size up from the xfs inode to the linux inode + */ +STATIC void +validate_fields( + struct inode *ip) +{ + vnode_t *vp = LINVFS_GET_VP(ip); + vattr_t va; + int error; + + va.va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS; + VOP_GETATTR(vp, &va, ATTR_LAZY, NULL, error); + if (likely(!error)) { + ip->i_nlink = va.va_nlink; + ip->i_blocks = va.va_nblocks; + + /* we're under i_sem so i_size can't change under us */ + if (i_size_read(ip) != va.va_size) + i_size_write(ip, va.va_size); + } +} + +/* + * Determine whether a process has a valid fs_struct (kernel daemons + * like knfsd don't have an fs_struct). + * + * XXX(hch): nfsd is broken, better fix it instead. + */ +STATIC inline int +has_fs_struct(struct task_struct *task) +{ + return (task->fs != init_task.fs); +} + +STATIC int +linvfs_mknod( + struct inode *dir, + struct dentry *dentry, + int mode, + dev_t rdev) +{ + struct inode *ip; + vattr_t va; + vnode_t *vp = NULL, *dvp = LINVFS_GET_VP(dir); + xfs_acl_t *default_acl = NULL; + attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; + int error; + + /* + * Irix uses Missed'em'V split, but doesn't want to see + * the upper 5 bits of (14bit) major. + */ + if (!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff) + return -EINVAL; + + if (test_default_acl && test_default_acl(dvp)) { + if (!_ACL_ALLOC(default_acl)) + return -ENOMEM; + if (!_ACL_GET_DEFAULT(dvp, default_acl)) { + _ACL_FREE(default_acl); + default_acl = NULL; + } + } + + if (IS_POSIXACL(dir) && !default_acl && has_fs_struct(current)) + mode &= ~current->fs->umask; + + memset(&va, 0, sizeof(va)); + va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; + va.va_type = IFTOVT(mode); + va.va_mode = mode; + + switch (mode & S_IFMT) { + case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: + va.va_rdev = sysv_encode_dev(rdev); + va.va_mask |= XFS_AT_RDEV; + /*FALLTHROUGH*/ + case S_IFREG: + VOP_CREATE(dvp, dentry, &va, &vp, NULL, error); + break; + case S_IFDIR: + VOP_MKDIR(dvp, dentry, &va, &vp, NULL, error); + break; + default: + error = EINVAL; + break; + } + + if (default_acl) { + if (!error) { + error = _ACL_INHERIT(vp, &va, default_acl); + if (!error) { + VMODIFY(vp); + } else { + struct dentry teardown = {}; + int err2; + + /* Oh, the horror. + * If we can't add the ACL we must back out. + * ENOSPC can hit here, among other things. + */ + teardown.d_inode = ip = LINVFS_GET_IP(vp); + teardown.d_name = dentry->d_name; + remove_inode_hash(ip); + make_bad_inode(ip); + if (S_ISDIR(mode)) + VOP_RMDIR(dvp, &teardown, NULL, err2); + else + VOP_REMOVE(dvp, &teardown, NULL, err2); + VN_RELE(vp); + } + } + _ACL_FREE(default_acl); + } + + if (!error) { + ASSERT(vp); + ip = LINVFS_GET_IP(vp); + + if (S_ISCHR(mode) || S_ISBLK(mode)) + ip->i_rdev = rdev; + else if (S_ISDIR(mode)) + validate_fields(ip); + d_instantiate(dentry, ip); + validate_fields(dir); + } + return -error; +} + +STATIC int +linvfs_create( + struct inode *dir, + struct dentry *dentry, + int mode, + struct nameidata *nd) +{ + return linvfs_mknod(dir, dentry, mode, 0); +} + +STATIC int +linvfs_mkdir( + struct inode *dir, + struct dentry *dentry, + int mode) +{ + return linvfs_mknod(dir, dentry, mode|S_IFDIR, 0); +} + +STATIC struct dentry * +linvfs_lookup( + struct inode *dir, + struct dentry *dentry, + struct nameidata *nd) +{ + struct inode *ip = NULL; + vnode_t *vp, *cvp = NULL; + int error; + + if (dentry->d_name.len >= MAXNAMELEN) + return ERR_PTR(-ENAMETOOLONG); + + vp = LINVFS_GET_VP(dir); + VOP_LOOKUP(vp, dentry, &cvp, 0, NULL, NULL, error); + if (!error) { + ASSERT(cvp); + ip = LINVFS_GET_IP(cvp); + if (!ip) { + VN_RELE(cvp); + return ERR_PTR(-EACCES); + } + } + if (error && (error != ENOENT)) + return ERR_PTR(-error); + return d_splice_alias(ip, dentry); +} + +STATIC int +linvfs_link( + struct dentry *old_dentry, + struct inode *dir, + struct dentry *dentry) +{ + struct inode *ip; /* inode of guy being linked to */ + vnode_t *tdvp; /* target directory for new name/link */ + vnode_t *vp; /* vp of name being linked */ + int error; + + ip = old_dentry->d_inode; /* inode being linked to */ + if (S_ISDIR(ip->i_mode)) + return -EPERM; + + tdvp = LINVFS_GET_VP(dir); + vp = LINVFS_GET_VP(ip); + + VOP_LINK(tdvp, vp, dentry, NULL, error); + if (!error) { + VMODIFY(tdvp); + VN_HOLD(vp); + validate_fields(ip); + d_instantiate(dentry, ip); + } + return -error; +} + +STATIC int +linvfs_unlink( + struct inode *dir, + struct dentry *dentry) +{ + struct inode *inode; + vnode_t *dvp; /* directory containing name to remove */ + int error; + + inode = dentry->d_inode; + dvp = LINVFS_GET_VP(dir); + + VOP_REMOVE(dvp, dentry, NULL, error); + if (!error) { + validate_fields(dir); /* For size only */ + validate_fields(inode); + } + + return -error; +} + +STATIC int +linvfs_symlink( + struct inode *dir, + struct dentry *dentry, + const char *symname) +{ + struct inode *ip; + vattr_t va; + vnode_t *dvp; /* directory containing name to remove */ + vnode_t *cvp; /* used to lookup symlink to put in dentry */ + int error; + + dvp = LINVFS_GET_VP(dir); + cvp = NULL; + + memset(&va, 0, sizeof(va)); + va.va_type = VLNK; + va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO; + va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; + + error = 0; + VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); + if (!error && cvp) { + ASSERT(cvp->v_type == VLNK); + ip = LINVFS_GET_IP(cvp); + d_instantiate(dentry, ip); + validate_fields(dir); + validate_fields(ip); /* size needs update */ + } + return -error; +} + +STATIC int +linvfs_rmdir( + struct inode *dir, + struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + vnode_t *dvp = LINVFS_GET_VP(dir); + int error; + + VOP_RMDIR(dvp, dentry, NULL, error); + if (!error) { + validate_fields(inode); + validate_fields(dir); + } + return -error; +} + +STATIC int +linvfs_rename( + struct inode *odir, + struct dentry *odentry, + struct inode *ndir, + struct dentry *ndentry) +{ + struct inode *new_inode = ndentry->d_inode; + vnode_t *fvp; /* from directory */ + vnode_t *tvp; /* target directory */ + int error; + + fvp = LINVFS_GET_VP(odir); + tvp = LINVFS_GET_VP(ndir); + + VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error); + if (error) + return -error; + + if (new_inode) + validate_fields(new_inode); + + validate_fields(odir); + if (ndir != odir) + validate_fields(ndir); + return 0; +} + +STATIC int +linvfs_readlink( + struct dentry *dentry, + char *buf, + int size) +{ + vnode_t *vp = LINVFS_GET_VP(dentry->d_inode); + uio_t uio; + iovec_t iov; + int error; + + iov.iov_base = buf; + iov.iov_len = size; + + uio.uio_iov = &iov; + uio.uio_offset = 0; + uio.uio_segflg = UIO_USERSPACE; + uio.uio_resid = size; + uio.uio_iovcnt = 1; + + VOP_READLINK(vp, &uio, 0, NULL, error); + if (error) + return -error; + + return (size - uio.uio_resid); +} + +/* + * careful here - this function can get called recursively, so + * we need to be very careful about how much stack we use. + * uio is kmalloced for this reason... + */ +STATIC int +linvfs_follow_link( + struct dentry *dentry, + struct nameidata *nd) +{ + vnode_t *vp; + uio_t *uio; + iovec_t iov; + int error; + char *link; + + ASSERT(dentry); + ASSERT(nd); + + link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL); + if (!link) + return -ENOMEM; + + uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL); + if (!uio) { + kfree(link); + return -ENOMEM; + } + + vp = LINVFS_GET_VP(dentry->d_inode); + + iov.iov_base = link; + iov.iov_len = MAXNAMELEN; + + uio->uio_iov = &iov; + uio->uio_offset = 0; + uio->uio_segflg = UIO_SYSSPACE; + uio->uio_resid = MAXNAMELEN; + uio->uio_iovcnt = 1; + + VOP_READLINK(vp, uio, 0, NULL, error); + if (error) { + kfree(uio); + kfree(link); + return -error; + } + + link[MAXNAMELEN - uio->uio_resid] = '\0'; + kfree(uio); + + /* vfs_follow_link returns (-) errors */ + error = vfs_follow_link(nd, link); + kfree(link); + return error; +} + +#ifdef CONFIG_XFS_POSIX_ACL +STATIC int +linvfs_permission( + struct inode *inode, + int mode, + struct nameidata *nd) +{ + vnode_t *vp = LINVFS_GET_VP(inode); + int error; + + mode <<= 6; /* convert from linux to vnode access bits */ + VOP_ACCESS(vp, mode, NULL, error); + return -error; +} +#else +#define linvfs_permission NULL +#endif + +STATIC int +linvfs_getattr( + struct vfsmount *mnt, + struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + vnode_t *vp = LINVFS_GET_VP(inode); + int error = 0; + + if (unlikely(vp->v_flag & VMODIFIED)) + error = vn_revalidate(vp); + if (!error) + generic_fillattr(inode, stat); + return 0; +} + +STATIC int +linvfs_setattr( + struct dentry *dentry, + struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + unsigned int ia_valid = attr->ia_valid; + vnode_t *vp = LINVFS_GET_VP(inode); + vattr_t vattr; + int flags = 0; + int error; + + memset(&vattr, 0, sizeof(vattr_t)); + if (ia_valid & ATTR_UID) { + vattr.va_mask |= XFS_AT_UID; + vattr.va_uid = attr->ia_uid; + } + if (ia_valid & ATTR_GID) { + vattr.va_mask |= XFS_AT_GID; + vattr.va_gid = attr->ia_gid; + } + if (ia_valid & ATTR_SIZE) { + vattr.va_mask |= XFS_AT_SIZE; + vattr.va_size = attr->ia_size; + } + if (ia_valid & ATTR_ATIME) { + vattr.va_mask |= XFS_AT_ATIME; + vattr.va_atime = attr->ia_atime; + } + if (ia_valid & ATTR_MTIME) { + vattr.va_mask |= XFS_AT_MTIME; + vattr.va_mtime = attr->ia_mtime; + } + if (ia_valid & ATTR_CTIME) { + vattr.va_mask |= XFS_AT_CTIME; + vattr.va_ctime = attr->ia_ctime; + } + if (ia_valid & ATTR_MODE) { + vattr.va_mask |= XFS_AT_MODE; + vattr.va_mode = attr->ia_mode; + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + inode->i_mode &= ~S_ISGID; + } + + if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) + flags = ATTR_UTIME; +#ifdef ATTR_NO_BLOCK + if ((ia_valid & ATTR_NO_BLOCK)) + flags |= ATTR_NONBLOCK; +#endif + + VOP_SETATTR(vp, &vattr, flags, NULL, error); + if (error) + return(-error); /* Positive error up from XFS */ + if (ia_valid & ATTR_SIZE) { + error = vmtruncate(inode, attr->ia_size); + } + + if (!error) { + vn_revalidate(vp); + } + return error; +} + +STATIC void +linvfs_truncate( + struct inode *inode) +{ + block_truncate_page(inode->i_mapping, inode->i_size, linvfs_get_block); +} + +STATIC int +linvfs_setxattr( + struct dentry *dentry, + const char *name, + const void *data, + size_t size, + int flags) +{ + vnode_t *vp = LINVFS_GET_VP(dentry->d_inode); + char *attr = (char *)name; + attrnames_t *namesp; + int xflags = 0; + int error; + + namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT); + if (!namesp) + return -EOPNOTSUPP; + attr += namesp->attr_namelen; + error = namesp->attr_capable(vp, NULL); + if (error) + return error; + + /* Convert Linux syscall to XFS internal ATTR flags */ + if (flags & XATTR_CREATE) + xflags |= ATTR_CREATE; + if (flags & XATTR_REPLACE) + xflags |= ATTR_REPLACE; + xflags |= namesp->attr_flag; + return namesp->attr_set(vp, attr, (void *)data, size, xflags); +} + +STATIC ssize_t +linvfs_getxattr( + struct dentry *dentry, + const char *name, + void *data, + size_t size) +{ + vnode_t *vp = LINVFS_GET_VP(dentry->d_inode); + char *attr = (char *)name; + attrnames_t *namesp; + int xflags = 0; + ssize_t error; + + namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT); + if (!namesp) + return -EOPNOTSUPP; + attr += namesp->attr_namelen; + error = namesp->attr_capable(vp, NULL); + if (error) + return error; + + /* Convert Linux syscall to XFS internal ATTR flags */ + if (!size) { + xflags |= ATTR_KERNOVAL; + data = NULL; + } + xflags |= namesp->attr_flag; + return namesp->attr_get(vp, attr, (void *)data, size, xflags); +} + +STATIC ssize_t +linvfs_listxattr( + struct dentry *dentry, + char *data, + size_t size) +{ + vnode_t *vp = LINVFS_GET_VP(dentry->d_inode); + int error, xflags = ATTR_KERNAMELS; + ssize_t result; + + if (!size) + xflags |= ATTR_KERNOVAL; + xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS; + + error = attr_generic_list(vp, data, size, xflags, &result); + if (error < 0) + return error; + return result; +} + +STATIC int +linvfs_removexattr( + struct dentry *dentry, + const char *name) +{ + vnode_t *vp = LINVFS_GET_VP(dentry->d_inode); + char *attr = (char *)name; + attrnames_t *namesp; + int xflags = 0; + int error; + + namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT); + if (!namesp) + return -EOPNOTSUPP; + attr += namesp->attr_namelen; + error = namesp->attr_capable(vp, NULL); + if (error) + return error; + xflags |= namesp->attr_flag; + return namesp->attr_remove(vp, attr, xflags); +} + + +struct inode_operations linvfs_file_inode_operations = { + .permission = linvfs_permission, + .truncate = linvfs_truncate, + .getattr = linvfs_getattr, + .setattr = linvfs_setattr, + .setxattr = linvfs_setxattr, + .getxattr = linvfs_getxattr, + .listxattr = linvfs_listxattr, + .removexattr = linvfs_removexattr, +}; + +struct inode_operations linvfs_dir_inode_operations = { + .create = linvfs_create, + .lookup = linvfs_lookup, + .link = linvfs_link, + .unlink = linvfs_unlink, + .symlink = linvfs_symlink, + .mkdir = linvfs_mkdir, + .rmdir = linvfs_rmdir, + .mknod = linvfs_mknod, + .rename = linvfs_rename, + .permission = linvfs_permission, + .getattr = linvfs_getattr, + .setattr = linvfs_setattr, + .setxattr = linvfs_setxattr, + .getxattr = linvfs_getxattr, + .listxattr = linvfs_listxattr, + .removexattr = linvfs_removexattr, +}; + +struct inode_operations linvfs_symlink_inode_operations = { + .readlink = linvfs_readlink, + .follow_link = linvfs_follow_link, + .permission = linvfs_permission, + .getattr = linvfs_getattr, + .setattr = linvfs_setattr, + .setxattr = linvfs_setxattr, + .getxattr = linvfs_getxattr, + .listxattr = linvfs_listxattr, + .removexattr = linvfs_removexattr, +}; diff --git a/fs/xfs/linux/xfs_iops.h b/fs/xfs/linux/xfs_iops.h new file mode 100644 index 000000000..f0f5c870f --- /dev/null +++ b/fs/xfs/linux/xfs_iops.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_IOPS_H__ +#define __XFS_IOPS_H__ + +extern struct inode_operations linvfs_file_inode_operations; +extern struct inode_operations linvfs_dir_inode_operations; +extern struct inode_operations linvfs_symlink_inode_operations; + +extern struct file_operations linvfs_file_operations; +extern struct file_operations linvfs_invis_file_operations; +extern struct file_operations linvfs_dir_operations; + +extern struct address_space_operations linvfs_aops; + +extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int); +extern void linvfs_unwritten_done(struct buffer_head *, int); + +extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *, + int, unsigned int, unsigned long); + +#endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/linux/xfs_linux.h b/fs/xfs/linux/xfs_linux.h new file mode 100644 index 000000000..70481f85f --- /dev/null +++ b/fs/xfs/linux/xfs_linux.h @@ -0,0 +1,367 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_LINUX__ +#define __XFS_LINUX__ + +#include +#include + +/* + * Some types are conditional depending on the target system. + * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. + * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well + * as requiring XFS_BIG_BLKNOS to be set. + */ +#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) +# define XFS_BIG_BLKNOS 1 +# if BITS_PER_LONG == 64 +# define XFS_BIG_INUMS 1 +# else +# define XFS_BIG_INUMS 0 +# endif +#else +# define XFS_BIG_BLKNOS 0 +# define XFS_BIG_INUMS 0 +#endif + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Feature macros (disable/enable) + */ +#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ +#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */ + +/* + * State flag for unwritten extent buffers. + * + * We need to be able to distinguish between these and delayed + * allocate buffers within XFS. The generic IO path code does + * not need to distinguish - we use the BH_Delay flag for both + * delalloc and these ondisk-uninitialised buffers. + */ +BUFFER_FNS(PrivateStart, unwritten); +static inline void set_buffer_unwritten_io(struct buffer_head *bh) +{ + bh->b_end_io = linvfs_unwritten_done; +} + +#define xfs_refcache_size xfs_params.refcache_size.val +#define xfs_refcache_purge_count xfs_params.refcache_purge.val +#define restricted_chown xfs_params.restrict_chown.val +#define irix_sgid_inherit xfs_params.sgid_inherit.val +#define irix_symlink_mode xfs_params.symlink_mode.val +#define xfs_panic_mask xfs_params.panic_mask.val +#define xfs_error_level xfs_params.error_level.val +#define xfs_syncd_interval (xfs_params.sync_interval.val * HZ / USER_HZ) +#define xfs_stats_clear xfs_params.stats_clear.val +#define xfs_inherit_sync xfs_params.inherit_sync.val +#define xfs_inherit_nodump xfs_params.inherit_nodump.val +#define xfs_inherit_noatime xfs_params.inherit_noatim.val +#define xfs_flush_interval (xfs_params.flush_interval.val * HZ / USER_HZ) +#define xfs_age_buffer (xfs_params.age_buffer.val * HZ / USER_HZ) + +#define current_cpu() smp_processor_id() +#define current_pid() (current->pid) +#define current_fsuid(cred) (current->fsuid) +#define current_fsgid(cred) (current->fsgid) + +#define NBPP PAGE_SIZE +#define DPPSHFT (PAGE_SHIFT - 9) +#define NDPP (1 << (PAGE_SHIFT - 9)) +#define dtop(DD) (((DD) + NDPP - 1) >> DPPSHFT) +#define dtopt(DD) ((DD) >> DPPSHFT) +#define dpoff(DD) ((DD) & (NDPP-1)) + +#define NBBY 8 /* number of bits per byte */ +#define NBPC PAGE_SIZE /* Number of bytes per click */ +#define BPCSHIFT PAGE_SHIFT /* LOG2(NBPC) if exact */ + +/* + * Size of block device i/o is parameterized here. + * Currently the system supports page-sized i/o. + */ +#define BLKDEV_IOSHIFT BPCSHIFT +#define BLKDEV_IOSIZE (1<>BPCSHIFT) +#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) +#define btoc64(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) +#define btoct64(x) ((__uint64_t)(x)>>BPCSHIFT) +#define io_btoc(x) (((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT) +#define io_btoct(x) ((__psunsigned_t)(x)>>IO_BPCSHIFT) + +/* off_t bytes to clicks */ +#define offtoc(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) +#define offtoct(x) ((xfs_off_t)(x)>>BPCSHIFT) + +/* clicks to off_t bytes */ +#define ctooff(x) ((xfs_off_t)(x)<>BPCSHIFT) +#define ctob64(x) ((__uint64_t)(x)<>BPCSHIFT) + +#ifndef CELL_CAPABLE +#define FSC_NOTIFY_NAME_CHANGED(vp) +#endif + +#ifndef ENOATTR +#define ENOATTR ENODATA /* Attribute not found */ +#endif + +/* Note: EWRONGFS never visible outside the kernel */ +#define EWRONGFS EINVAL /* Mount with wrong filesystem type */ + +/* + * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't + * return codes out of its known range in errno. + * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't + * conflict with any code we use already or any code a driver may use) + * XXX Some options (currently we do #2): + * 1/ New error code ["Filesystem is corrupted", _after_ glibc updated] + * 2/ 990 ["Unknown error 990"] + * 3/ EUCLEAN ["Structure needs cleaning"] + * 4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace] + */ +#define EFSCORRUPTED 990 /* Filesystem is corrupted */ + +#define SYNCHRONIZE() barrier() +#define __return_address __builtin_return_address(0) + +/* + * IRIX (BSD) quotactl makes use of separate commands for user/group, + * whereas on Linux the syscall encodes this information into the cmd + * field (see the QCMD macro in quota.h). These macros help keep the + * code portable - they are not visible from the syscall interface. + */ +#define Q_XSETGQLIM XQM_CMD(0x8) /* set groups disk limits */ +#define Q_XGETGQUOTA XQM_CMD(0x9) /* get groups disk limits */ + +/* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */ +/* we may well need to fine-tune this if it ever becomes an issue. */ +#define DQUOT_MAX_HEURISTIC 1024 /* NR_DQUOTS */ +#define ndquot DQUOT_MAX_HEURISTIC + +/* IRIX uses the current size of the name cache to guess a good value */ +/* - this isn't the same but is a good enough starting point for now. */ +#define DQUOT_HASH_HEURISTIC files_stat.nr_files + +/* IRIX inodes maintain the project ID also, zero this field on Linux */ +#define DEFAULT_PROJID 0 +#define dfltprid DEFAULT_PROJID + +#define MAXPATHLEN 1024 + +#define MIN(a,b) (min(a,b)) +#define MAX(a,b) (max(a,b)) +#define howmany(x, y) (((x)+((y)-1))/(y)) +#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) + +#define xfs_stack_trace() dump_stack() + +#define xfs_itruncate_data(ip, off) \ + (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) + + +/* Move the kernel do_div definition off to one side */ + +#if defined __i386__ +/* For ia32 we need to pull some tricks to get past various versions + * of the compiler which do not like us using do_div in the middle + * of large functions. + */ +static inline __u32 xfs_do_div(void *a, __u32 b, int n) +{ + __u32 mod; + + switch (n) { + case 4: + mod = *(__u32 *)a % b; + *(__u32 *)a = *(__u32 *)a / b; + return mod; + case 8: + { + unsigned long __upper, __low, __high, __mod; + __u64 c = *(__u64 *)a; + __upper = __high = c >> 32; + __low = c; + if (__high) { + __upper = __high % (b); + __high = __high / (b); + } + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); + asm("":"=A" (c):"a" (__low),"d" (__high)); + *(__u64 *)a = c; + return __mod; + } + } + + /* NOTREACHED */ + return 0; +} + +/* Side effect free 64 bit mod operation */ +static inline __u32 xfs_do_mod(void *a, __u32 b, int n) +{ + switch (n) { + case 4: + return *(__u32 *)a % b; + case 8: + { + unsigned long __upper, __low, __high, __mod; + __u64 c = *(__u64 *)a; + __upper = __high = c >> 32; + __low = c; + if (__high) { + __upper = __high % (b); + __high = __high / (b); + } + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); + asm("":"=A" (c):"a" (__low),"d" (__high)); + return __mod; + } + } + + /* NOTREACHED */ + return 0; +} +#else +static inline __u32 xfs_do_div(void *a, __u32 b, int n) +{ + __u32 mod; + + switch (n) { + case 4: + mod = *(__u32 *)a % b; + *(__u32 *)a = *(__u32 *)a / b; + return mod; + case 8: + mod = do_div(*(__u64 *)a, b); + return mod; + } + + /* NOTREACHED */ + return 0; +} + +/* Side effect free 64 bit mod operation */ +static inline __u32 xfs_do_mod(void *a, __u32 b, int n) +{ + switch (n) { + case 4: + return *(__u32 *)a % b; + case 8: + { + __u64 c = *(__u64 *)a; + return do_div(c, b); + } + } + + /* NOTREACHED */ + return 0; +} +#endif + +#undef do_div +#define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a)) +#define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a)) + +static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) +{ + x += y - 1; + do_div(x, y); + return(x * y); +} + +#endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/linux/xfs_lrw.c b/fs/xfs/linux/xfs_lrw.c new file mode 100644 index 000000000..4bacdb76a --- /dev/null +++ b/fs/xfs/linux/xfs_lrw.c @@ -0,0 +1,1028 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +/* + * fs/xfs/linux/xfs_lrw.c (Linux Read Write stuff) + * + */ + +#include "xfs.h" + +#include "xfs_fs.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_bit.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_cap.h" +#include "xfs_mac.h" +#include "xfs_attr.h" +#include "xfs_inode_item.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_iomap.h" + +#include + + +#if defined(XFS_RW_TRACE) +void +xfs_rw_enter_trace( + int tag, + xfs_iocore_t *io, + const struct iovec *iovp, + size_t segs, + loff_t offset, + int ioflags) +{ + xfs_inode_t *ip = XFS_IO_INODE(io); + + if (ip->i_rwtrace == NULL) + return; + ktrace_enter(ip->i_rwtrace, + (void *)(unsigned long)tag, + (void *)ip, + (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), + (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), + (void *)(__psint_t)iovp, + (void *)((unsigned long)segs), + (void *)((unsigned long)((offset >> 32) & 0xffffffff)), + (void *)((unsigned long)(offset & 0xffffffff)), + (void *)((unsigned long)ioflags), + (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)), + (void *)((unsigned long)(io->io_new_size & 0xffffffff)), + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL); +} + +void +xfs_inval_cached_trace( + xfs_iocore_t *io, + xfs_off_t offset, + xfs_off_t len, + xfs_off_t first, + xfs_off_t last) +{ + xfs_inode_t *ip = XFS_IO_INODE(io); + + if (ip->i_rwtrace == NULL) + return; + ktrace_enter(ip->i_rwtrace, + (void *)(__psint_t)XFS_INVAL_CACHED, + (void *)ip, + (void *)((unsigned long)((offset >> 32) & 0xffffffff)), + (void *)((unsigned long)(offset & 0xffffffff)), + (void *)((unsigned long)((len >> 32) & 0xffffffff)), + (void *)((unsigned long)(len & 0xffffffff)), + (void *)((unsigned long)((first >> 32) & 0xffffffff)), + (void *)((unsigned long)(first & 0xffffffff)), + (void *)((unsigned long)((last >> 32) & 0xffffffff)), + (void *)((unsigned long)(last & 0xffffffff)), + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL, + (void *)NULL); +} +#endif + +/* + * xfs_iozero + * + * xfs_iozero clears the specified range of buffer supplied, + * and marks all the affected blocks as valid and modified. If + * an affected block is not allocated, it will be allocated. If + * an affected block is not completely overwritten, and is not + * valid before the operation, it will be read from disk before + * being partially zeroed. + */ +STATIC int +xfs_iozero( + struct inode *ip, /* inode */ + loff_t pos, /* offset in file */ + size_t count, /* size of data to zero */ + loff_t end_size) /* max file size to set */ +{ + unsigned bytes; + struct page *page; + struct address_space *mapping; + char *kaddr; + int status; + + mapping = ip->i_mapping; + do { + unsigned long index, offset; + + offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ + index = pos >> PAGE_CACHE_SHIFT; + bytes = PAGE_CACHE_SIZE - offset; + if (bytes > count) + bytes = count; + + status = -ENOMEM; + page = grab_cache_page(mapping, index); + if (!page) + break; + + kaddr = kmap(page); + status = mapping->a_ops->prepare_write(NULL, page, offset, + offset + bytes); + if (status) { + goto unlock; + } + + memset((void *) (kaddr + offset), 0, bytes); + flush_dcache_page(page); + status = mapping->a_ops->commit_write(NULL, page, offset, + offset + bytes); + if (!status) { + pos += bytes; + count -= bytes; + if (pos > i_size_read(ip)) + i_size_write(ip, pos < end_size ? pos : end_size); + } + +unlock: + kunmap(page); + unlock_page(page); + page_cache_release(page); + if (status) + break; + } while (count); + + return (-status); +} + +/* + * xfs_inval_cached_pages + * + * This routine is responsible for keeping direct I/O and buffered I/O + * somewhat coherent. From here we make sure that we're at least + * temporarily holding the inode I/O lock exclusively and then call + * the page cache to flush and invalidate any cached pages. If there + * are no cached pages this routine will be very quick. + */ +void +xfs_inval_cached_pages( + vnode_t *vp, + xfs_iocore_t *io, + xfs_off_t offset, + int write, + int relock) +{ + xfs_mount_t *mp; + + if (!VN_CACHED(vp)) { + return; + } + + mp = io->io_mount; + + /* + * We need to get the I/O lock exclusively in order + * to safely invalidate pages and mappings. + */ + if (relock) { + XFS_IUNLOCK(mp, io, XFS_IOLOCK_SHARED); + XFS_ILOCK(mp, io, XFS_IOLOCK_EXCL); + } + + /* Writing beyond EOF creates a hole that must be zeroed */ + if (write && (offset > XFS_SIZE(mp, io))) { + xfs_fsize_t isize; + + XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + isize = XFS_SIZE(mp, io); + if (offset > isize) { + xfs_zero_eof(vp, io, offset, isize, offset); + } + XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + } + + xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1); + VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED); + if (relock) { + XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); + } +} + +ssize_t /* bytes read, or (-) error */ +xfs_read( + bhv_desc_t *bdp, + struct kiocb *iocb, + const struct iovec *iovp, + unsigned int segs, + loff_t *offset, + int ioflags, + cred_t *credp) +{ + struct file *file = iocb->ki_filp; + size_t size = 0; + ssize_t ret; + xfs_fsize_t n; + xfs_inode_t *ip; + xfs_mount_t *mp; + vnode_t *vp; + unsigned long seg; + + ip = XFS_BHVTOI(bdp); + vp = BHV_TO_VNODE(bdp); + mp = ip->i_mount; + + XFS_STATS_INC(xs_read_calls); + + /* START copy & waste from filemap.c */ + for (seg = 0; seg < segs; seg++) { + const struct iovec *iv = &iovp[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + size += iv->iov_len; + if (unlikely((ssize_t)(size|iv->iov_len) < 0)) + return XFS_ERROR(-EINVAL); + } + /* END copy & waste from filemap.c */ + + if (ioflags & IO_ISDIRECT) { + xfs_buftarg_t *target = + (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? + mp->m_rtdev_targp : mp->m_ddev_targp; + if ((*offset & target->pbr_smask) || + (size & target->pbr_smask)) { + if (*offset == ip->i_d.di_size) { + return (0); + } + return -XFS_ERROR(EINVAL); + } + } + + n = XFS_MAXIOFFSET(mp) - *offset; + if ((n <= 0) || (size == 0)) + return 0; + + if (n < size) + size = n; + + if (XFS_FORCED_SHUTDOWN(mp)) { + return -EIO; + } + + /* OK so we are holding the I/O lock for the duration + * of the submission, then what happens if the I/O + * does not really happen here, but is scheduled + * later? + */ + xfs_ilock(ip, XFS_IOLOCK_SHARED); + + if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && + !(ioflags & IO_INVIS)) { + vrwlock_t locktype = VRWLOCK_READ; + + ret = XFS_SEND_DATA(mp, DM_EVENT_READ, + BHV_TO_VNODE(bdp), *offset, size, + FILP_DELAY_FLAG(file), &locktype); + if (ret) { + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return -ret; + } + } + + xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, + iovp, segs, *offset, ioflags); + ret = __generic_file_aio_read(iocb, iovp, segs, offset); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + + if (ret > 0) + XFS_STATS_ADD(xs_read_bytes, ret); + + if (likely(!(ioflags & IO_INVIS))) + xfs_ichgtime(ip, XFS_ICHGTIME_ACC); + + return ret; +} + +ssize_t +xfs_sendfile( + bhv_desc_t *bdp, + struct file *filp, + loff_t *offset, + int ioflags, + size_t count, + read_actor_t actor, + void *target, + cred_t *credp) +{ + ssize_t ret; + xfs_fsize_t n; + xfs_inode_t *ip; + xfs_mount_t *mp; + vnode_t *vp; + + ip = XFS_BHVTOI(bdp); + vp = BHV_TO_VNODE(bdp); + mp = ip->i_mount; + + XFS_STATS_INC(xs_read_calls); + + n = XFS_MAXIOFFSET(mp) - *offset; + if ((n <= 0) || (count == 0)) + return 0; + + if (n < count) + count = n; + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; + + xfs_ilock(ip, XFS_IOLOCK_SHARED); + + if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && + (!(ioflags & IO_INVIS))) { + vrwlock_t locktype = VRWLOCK_READ; + int error; + + error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count, + FILP_DELAY_FLAG(filp), &locktype); + if (error) { + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return -error; + } + } + xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore, + target, count, *offset, ioflags); + ret = generic_file_sendfile(filp, offset, count, actor, target); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + + XFS_STATS_ADD(xs_read_bytes, ret); + xfs_ichgtime(ip, XFS_ICHGTIME_ACC); + return ret; +} + +/* + * This routine is called to handle zeroing any space in the last + * block of the file that is beyond the EOF. We do this since the + * size is being increased without writing anything to that block + * and we don't want anyone to read the garbage on the disk. + */ +STATIC int /* error (positive) */ +xfs_zero_last_block( + struct inode *ip, + xfs_iocore_t *io, + xfs_off_t offset, + xfs_fsize_t isize, + xfs_fsize_t end_size) +{ + xfs_fileoff_t last_fsb; + xfs_mount_t *mp; + int nimaps; + int zero_offset; + int zero_len; + int isize_fsb_offset; + int error = 0; + xfs_bmbt_irec_t imap; + loff_t loff; + size_t lsize; + + ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); + ASSERT(offset > isize); + + mp = io->io_mount; + + isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize); + if (isize_fsb_offset == 0) { + /* + * There are no extra bytes in the last block on disk to + * zero, so return. + */ + return 0; + } + + last_fsb = XFS_B_TO_FSBT(mp, isize); + nimaps = 1; + error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap, + &nimaps, NULL); + if (error) { + return error; + } + ASSERT(nimaps > 0); + /* + * If the block underlying isize is just a hole, then there + * is nothing to zero. + */ + if (imap.br_startblock == HOLESTARTBLOCK) { + return 0; + } + /* + * Zero the part of the last block beyond the EOF, and write it + * out sync. We need to drop the ilock while we do this so we + * don't deadlock when the buffer cache calls back to us. + */ + XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); + loff = XFS_FSB_TO_B(mp, last_fsb); + lsize = XFS_FSB_TO_B(mp, 1); + + zero_offset = isize_fsb_offset; + zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset; + + error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size); + + XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + ASSERT(error >= 0); + return error; +} + +/* + * Zero any on disk space between the current EOF and the new, + * larger EOF. This handles the normal case of zeroing the remainder + * of the last block in the file and the unusual case of zeroing blocks + * out beyond the size of the file. This second case only happens + * with fixed size extents and when the system crashes before the inode + * size was updated but after blocks were allocated. If fill is set, + * then any holes in the range are filled and zeroed. If not, the holes + * are left alone as holes. + */ + +int /* error (positive) */ +xfs_zero_eof( + vnode_t *vp, + xfs_iocore_t *io, + xfs_off_t offset, /* starting I/O offset */ + xfs_fsize_t isize, /* current inode size */ + xfs_fsize_t end_size) /* terminal inode size */ +{ + struct inode *ip = LINVFS_GET_IP(vp); + xfs_fileoff_t start_zero_fsb; + xfs_fileoff_t end_zero_fsb; + xfs_fileoff_t prev_zero_fsb; + xfs_fileoff_t zero_count_fsb; + xfs_fileoff_t last_fsb; + xfs_extlen_t buf_len_fsb; + xfs_extlen_t prev_zero_count; + xfs_mount_t *mp; + int nimaps; + int error = 0; + xfs_bmbt_irec_t imap; + loff_t loff; + size_t lsize; + + ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); + ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + + mp = io->io_mount; + + /* + * First handle zeroing the block on which isize resides. + * We only zero a part of that block so it is handled specially. + */ + error = xfs_zero_last_block(ip, io, offset, isize, end_size); + if (error) { + ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); + ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + return error; + } + + /* + * Calculate the range between the new size and the old + * where blocks needing to be zeroed may exist. To get the + * block where the last byte in the file currently resides, + * we need to subtract one from the size and truncate back + * to a block boundary. We subtract 1 in case the size is + * exactly on a block boundary. + */ + last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; + start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); + end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); + ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); + if (last_fsb == end_zero_fsb) { + /* + * The size was only incremented on its last block. + * We took care of that above, so just return. + */ + return 0; + } + + ASSERT(start_zero_fsb <= end_zero_fsb); + prev_zero_fsb = NULLFILEOFF; + prev_zero_count = 0; + while (start_zero_fsb <= end_zero_fsb) { + nimaps = 1; + zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; + error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb, + 0, NULL, 0, &imap, &nimaps, NULL); + if (error) { + ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); + ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + return error; + } + ASSERT(nimaps > 0); + + if (imap.br_state == XFS_EXT_UNWRITTEN || + imap.br_startblock == HOLESTARTBLOCK) { + /* + * This loop handles initializing pages that were + * partially initialized by the code below this + * loop. It basically zeroes the part of the page + * that sits on a hole and sets the page as P_HOLE + * and calls remapf if it is a mapped file. + */ + prev_zero_fsb = NULLFILEOFF; + prev_zero_count = 0; + start_zero_fsb = imap.br_startoff + + imap.br_blockcount; + ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); + continue; + } + + /* + * There are blocks in the range requested. + * Zero them a single write at a time. We actually + * don't zero the entire range returned if it is + * too big and simply loop around to get the rest. + * That is not the most efficient thing to do, but it + * is simple and this path should not be exercised often. + */ + buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount, + mp->m_writeio_blocks << 8); + /* + * Drop the inode lock while we're doing the I/O. + * We'll still have the iolock to protect us. + */ + XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + + loff = XFS_FSB_TO_B(mp, start_zero_fsb); + lsize = XFS_FSB_TO_B(mp, buf_len_fsb); + + error = xfs_iozero(ip, loff, lsize, end_size); + + if (error) { + goto out_lock; + } + + prev_zero_fsb = start_zero_fsb; + prev_zero_count = buf_len_fsb; + start_zero_fsb = imap.br_startoff + buf_len_fsb; + ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); + + XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + } + + return 0; + +out_lock: + + XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + ASSERT(error >= 0); + return error; +} + +ssize_t /* bytes written, or (-) error */ +xfs_write( + bhv_desc_t *bdp, + struct kiocb *iocb, + const struct iovec *iovp, + unsigned int segs, + loff_t *offset, + int ioflags, + cred_t *credp) +{ + struct file *file = iocb->ki_filp; + size_t size = 0; + xfs_inode_t *xip; + xfs_mount_t *mp; + ssize_t ret; + int error = 0; + xfs_fsize_t isize, new_size; + xfs_fsize_t n, limit; + xfs_iocore_t *io; + vnode_t *vp; + unsigned long seg; + int iolock; + int eventsent = 0; + vrwlock_t locktype; + + XFS_STATS_INC(xs_write_calls); + + vp = BHV_TO_VNODE(bdp); + xip = XFS_BHVTOI(bdp); + + /* START copy & waste from filemap.c */ + for (seg = 0; seg < segs; seg++) { + const struct iovec *iv = &iovp[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + size += iv->iov_len; + if (unlikely((ssize_t)(size|iv->iov_len) < 0)) + return XFS_ERROR(-EINVAL); + } + /* END copy & waste from filemap.c */ + + if (size == 0) + return 0; + + io = &xip->i_iocore; + mp = io->io_mount; + + if (XFS_FORCED_SHUTDOWN(mp)) { + return -EIO; + } + + if (ioflags & IO_ISDIRECT) { + xfs_buftarg_t *target = + (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? + mp->m_rtdev_targp : mp->m_ddev_targp; + + if ((*offset & target->pbr_smask) || + (size & target->pbr_smask)) { + return XFS_ERROR(-EINVAL); + } + iolock = XFS_IOLOCK_SHARED; + locktype = VRWLOCK_WRITE_DIRECT; + } else { + iolock = XFS_IOLOCK_EXCL; + locktype = VRWLOCK_WRITE; + } + + xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); + + isize = xip->i_d.di_size; + limit = XFS_MAXIOFFSET(mp); + + if (file->f_flags & O_APPEND) + *offset = isize; + +start: + n = limit - *offset; + if (n <= 0) { + xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); + return -EFBIG; + } + + if (n < size) + size = n; + + new_size = *offset + size; + if (new_size > isize) { + io->io_new_size = new_size; + } + + if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && + !(ioflags & IO_INVIS) && !eventsent)) { + loff_t savedsize = *offset; + int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); + + xfs_iunlock(xip, XFS_ILOCK_EXCL); + error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, + *offset, size, + dmflags, &locktype); + if (error) { + xfs_iunlock(xip, iolock); + return -error; + } + xfs_ilock(xip, XFS_ILOCK_EXCL); + eventsent = 1; + + /* + * The iolock was dropped and reaquired in XFS_SEND_DATA + * so we have to recheck the size when appending. + * We will only "goto start;" once, since having sent the + * event prevents another call to XFS_SEND_DATA, which is + * what allows the size to change in the first place. + */ + if ((file->f_flags & O_APPEND) && + savedsize != xip->i_d.di_size) { + *offset = isize = xip->i_d.di_size; + goto start; + } + } + + /* + * On Linux, generic_file_write updates the times even if + * no data is copied in so long as the write had a size. + * + * We must update xfs' times since revalidate will overcopy xfs. + */ + if (size && !(ioflags & IO_INVIS)) + xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + + /* + * If the offset is beyond the size of the file, we have a couple + * of things to do. First, if there is already space allocated + * we need to either create holes or zero the disk or ... + * + * If there is a page where the previous size lands, we need + * to zero it out up to the new size. + */ + + if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) { + error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, + isize, *offset + size); + if (error) { + xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); + return(-error); + } + } + xfs_iunlock(xip, XFS_ILOCK_EXCL); + + /* + * If we're writing the file then make sure to clear the + * setuid and setgid bits if the process is not being run + * by root. This keeps people from modifying setuid and + * setgid binaries. + */ + + if (((xip->i_d.di_mode & S_ISUID) || + ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == + (S_ISGID | S_IXGRP))) && + !capable(CAP_FSETID)) { + error = xfs_write_clear_setuid(xip); + if (error) { + xfs_iunlock(xip, iolock); + return -error; + } + } + +retry: + if (ioflags & IO_ISDIRECT) { + xfs_inval_cached_pages(vp, io, *offset, 1, 1); + xfs_rw_enter_trace(XFS_DIOWR_ENTER, + io, iovp, segs, *offset, ioflags); + } else { + xfs_rw_enter_trace(XFS_WRITE_ENTER, + io, iovp, segs, *offset, ioflags); + } + ret = generic_file_aio_write_nolock(iocb, iovp, segs, offset); + + if ((ret == -ENOSPC) && + DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && + !(ioflags & IO_INVIS)) { + + xfs_rwunlock(bdp, locktype); + error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, + DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, + 0, 0, 0); /* Delay flag intentionally unused */ + if (error) + return -error; + xfs_rwlock(bdp, locktype); + *offset = xip->i_d.di_size; + goto retry; + } + + if (*offset > xip->i_d.di_size) { + xfs_ilock(xip, XFS_ILOCK_EXCL); + if (*offset > xip->i_d.di_size) { + struct inode *inode = LINVFS_GET_IP(vp); + + xip->i_d.di_size = *offset; + i_size_write(inode, *offset); + xip->i_update_core = 1; + xip->i_update_size = 1; + } + xfs_iunlock(xip, XFS_ILOCK_EXCL); + } + + if (ret <= 0) { + xfs_rwunlock(bdp, locktype); + return ret; + } + + XFS_STATS_ADD(xs_write_bytes, ret); + + /* Handle various SYNC-type writes */ + if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) { + + /* + * If we're treating this as O_DSYNC and we have not updated the + * size, force the log. + */ + + if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) + && !(xip->i_update_size)) { + /* + * If an allocation transaction occurred + * without extending the size, then we have to force + * the log up the proper point to ensure that the + * allocation is permanent. We can't count on + * the fact that buffered writes lock out direct I/O + * writes - the direct I/O write could have extended + * the size nontransactionally, then finished before + * we started. xfs_write_file will think that the file + * didn't grow but the update isn't safe unless the + * size change is logged. + * + * Force the log if we've committed a transaction + * against the inode or if someone else has and + * the commit record hasn't gone to disk (e.g. + * the inode is pinned). This guarantees that + * all changes affecting the inode are permanent + * when we return. + */ + + xfs_inode_log_item_t *iip; + xfs_lsn_t lsn; + + iip = xip->i_itemp; + if (iip && iip->ili_last_lsn) { + lsn = iip->ili_last_lsn; + xfs_log_force(mp, lsn, + XFS_LOG_FORCE | XFS_LOG_SYNC); + } else if (xfs_ipincount(xip) > 0) { + xfs_log_force(mp, (xfs_lsn_t)0, + XFS_LOG_FORCE | XFS_LOG_SYNC); + } + + } else { + xfs_trans_t *tp; + + /* + * O_SYNC or O_DSYNC _with_ a size update are handled + * the same way. + * + * If the write was synchronous then we need to make + * sure that the inode modification time is permanent. + * We'll have updated the timestamp above, so here + * we use a synchronous transaction to log the inode. + * It's not fast, but it's necessary. + * + * If this a dsync write and the size got changed + * non-transactionally, then we need to ensure that + * the size change gets logged in a synchronous + * transaction. + */ + + tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); + if ((error = xfs_trans_reserve(tp, 0, + XFS_SWRITE_LOG_RES(mp), + 0, 0, 0))) { + /* Transaction reserve failed */ + xfs_trans_cancel(tp, 0); + } else { + /* Transaction reserve successful */ + xfs_ilock(xip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); + xfs_trans_ihold(tp, xip); + xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0, (xfs_lsn_t)0); + xfs_iunlock(xip, XFS_ILOCK_EXCL); + } + } + } /* (ioflags & O_SYNC) */ + + xfs_rwunlock(bdp, locktype); + return(ret); +} + +/* + * All xfs metadata buffers except log state machine buffers + * get this attached as their b_bdstrat callback function. + * This is so that we can catch a buffer + * after prematurely unpinning it to forcibly shutdown the filesystem. + */ +int +xfs_bdstrat_cb(struct xfs_buf *bp) +{ + xfs_mount_t *mp; + + mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); + if (!XFS_FORCED_SHUTDOWN(mp)) { + pagebuf_iorequest(bp); + return 0; + } else { + xfs_buftrace("XFS__BDSTRAT IOERROR", bp); + /* + * Metadata write that didn't get logged but + * written delayed anyway. These aren't associated + * with a transaction, and can be ignored. + */ + if (XFS_BUF_IODONE_FUNC(bp) == NULL && + (XFS_BUF_ISREAD(bp)) == 0) + return (xfs_bioerror_relse(bp)); + else + return (xfs_bioerror(bp)); + } +} + + +int +xfs_bmap(bhv_desc_t *bdp, + xfs_off_t offset, + ssize_t count, + int flags, + xfs_iomap_t *iomapp, + int *niomaps) +{ + xfs_inode_t *ip = XFS_BHVTOI(bdp); + xfs_iocore_t *io = &ip->i_iocore; + + ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); + ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == + ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); + + return xfs_iomap(io, offset, count, flags, iomapp, niomaps); +} + +/* + * Wrapper around bdstrat so that we can stop data + * from going to disk in case we are shutting down the filesystem. + * Typically user data goes thru this path; one of the exceptions + * is the superblock. + */ +int +xfsbdstrat( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + ASSERT(mp); + if (!XFS_FORCED_SHUTDOWN(mp)) { + /* Grio redirection would go here + * if (XFS_BUF_IS_GRIO(bp)) { + */ + + pagebuf_iorequest(bp); + return 0; + } + + xfs_buftrace("XFSBDSTRAT IOERROR", bp); + return (xfs_bioerror_relse(bp)); +} + +/* + * If the underlying (data/log/rt) device is readonly, there are some + * operations that cannot proceed. + */ +int +xfs_dev_is_read_only( + xfs_mount_t *mp, + char *message) +{ + if (xfs_readonly_buftarg(mp->m_ddev_targp) || + xfs_readonly_buftarg(mp->m_logdev_targp) || + (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { + cmn_err(CE_NOTE, + "XFS: %s required on read-only device.", message); + cmn_err(CE_NOTE, + "XFS: write access unavailable, cannot proceed."); + return EROFS; + } + return 0; +} diff --git a/fs/xfs/linux/xfs_lrw.h b/fs/xfs/linux/xfs_lrw.h new file mode 100644 index 000000000..faf0afc70 --- /dev/null +++ b/fs/xfs/linux/xfs_lrw.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_LRW_H__ +#define __XFS_LRW_H__ + +struct vnode; +struct bhv_desc; +struct xfs_mount; +struct xfs_iocore; +struct xfs_inode; +struct xfs_bmbt_irec; +struct xfs_buf; +struct xfs_iomap; + +#if defined(XFS_RW_TRACE) +/* + * Defines for the trace mechanisms in xfs_lrw.c. + */ +#define XFS_RW_KTRACE_SIZE 128 + +#define XFS_READ_ENTER 1 +#define XFS_WRITE_ENTER 2 +#define XFS_IOMAP_READ_ENTER 3 +#define XFS_IOMAP_WRITE_ENTER 4 +#define XFS_IOMAP_READ_MAP 5 +#define XFS_IOMAP_WRITE_MAP 6 +#define XFS_IOMAP_WRITE_NOSPACE 7 +#define XFS_ITRUNC_START 8 +#define XFS_ITRUNC_FINISH1 9 +#define XFS_ITRUNC_FINISH2 10 +#define XFS_CTRUNC1 11 +#define XFS_CTRUNC2 12 +#define XFS_CTRUNC3 13 +#define XFS_CTRUNC4 14 +#define XFS_CTRUNC5 15 +#define XFS_CTRUNC6 16 +#define XFS_BUNMAPI 17 +#define XFS_INVAL_CACHED 18 +#define XFS_DIORD_ENTER 19 +#define XFS_DIOWR_ENTER 20 +#define XFS_SENDFILE_ENTER 21 +#define XFS_WRITEPAGE_ENTER 22 +#define XFS_RELEASEPAGE_ENTER 23 +#define XFS_IOMAP_ALLOC_ENTER 24 +#define XFS_IOMAP_ALLOC_MAP 25 +#define XFS_IOMAP_UNWRITTEN 26 +extern void xfs_rw_enter_trace(int, struct xfs_iocore *, + const struct iovec *, size_t, loff_t, int); +extern void xfs_inval_cached_trace(struct xfs_iocore *, + xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t); +#else +#define xfs_rw_enter_trace(tag, io, iovec, segs, offset, ioflags) +#define xfs_inval_cached_trace(io, offset, len, first, last) +#endif + +/* + * Maximum count of bmaps used by read and write paths. + */ +#define XFS_MAX_RW_NBMAPS 4 + +extern int xfs_bmap(struct bhv_desc *, xfs_off_t, ssize_t, int, + struct xfs_iomap *, int *); +extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *); +extern int xfs_bdstrat_cb(struct xfs_buf *); + +extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t, + xfs_fsize_t, xfs_fsize_t); +extern void xfs_inval_cached_pages(struct vnode *, struct xfs_iocore *, + xfs_off_t, int, int); +extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *, + const struct iovec *, unsigned int, + loff_t *, int, struct cred *); +extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *, + const struct iovec *, unsigned int, + loff_t *, int, struct cred *); +extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *, + loff_t *, int, size_t, read_actor_t, + void *, struct cred *); + +extern int xfs_dev_is_read_only(struct xfs_mount *, char *); + +#define XFS_FSB_TO_DB_IO(io,fsb) \ + (((io)->io_flags & XFS_IOCORE_RT) ? \ + XFS_FSB_TO_BB((io)->io_mount, (fsb)) : \ + XFS_FSB_TO_DADDR((io)->io_mount, (fsb))) + +#endif /* __XFS_LRW_H__ */ diff --git a/fs/xfs/linux/xfs_stats.c b/fs/xfs/linux/xfs_stats.c new file mode 100644 index 000000000..b7de296e1 --- /dev/null +++ b/fs/xfs/linux/xfs_stats.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" +#include + +DEFINE_PER_CPU(struct xfsstats, xfsstats); + +STATIC int +xfs_read_xfsstats( + char *buffer, + char **start, + off_t offset, + int count, + int *eof, + void *data) +{ + int c, i, j, len, val; + __uint64_t xs_xstrat_bytes = 0; + __uint64_t xs_write_bytes = 0; + __uint64_t xs_read_bytes = 0; + + static struct xstats_entry { + char *desc; + int endpoint; + } xstats[] = { + { "extent_alloc", XFSSTAT_END_EXTENT_ALLOC }, + { "abt", XFSSTAT_END_ALLOC_BTREE }, + { "blk_map", XFSSTAT_END_BLOCK_MAPPING }, + { "bmbt", XFSSTAT_END_BLOCK_MAP_BTREE }, + { "dir", XFSSTAT_END_DIRECTORY_OPS }, + { "trans", XFSSTAT_END_TRANSACTIONS }, + { "ig", XFSSTAT_END_INODE_OPS }, + { "log", XFSSTAT_END_LOG_OPS }, + { "push_ail", XFSSTAT_END_TAIL_PUSHING }, + { "xstrat", XFSSTAT_END_WRITE_CONVERT }, + { "rw", XFSSTAT_END_READ_WRITE_OPS }, + { "attr", XFSSTAT_END_ATTRIBUTE_OPS }, + { "icluster", XFSSTAT_END_INODE_CLUSTER }, + { "vnodes", XFSSTAT_END_VNODE_OPS }, + { "buf", XFSSTAT_END_BUF }, + }; + + /* Loop over all stats groups */ + for (i=j=len = 0; i < sizeof(xstats)/sizeof(struct xstats_entry); i++) { + len += sprintf(buffer + len, xstats[i].desc); + /* inner loop does each group */ + while (j < xstats[i].endpoint) { + val = 0; + /* sum over all cpus */ + for (c = 0; c < NR_CPUS; c++) { + if (!cpu_possible(c)) continue; + val += *(((__u32*)&per_cpu(xfsstats, c) + j)); + } + len += sprintf(buffer + len, " %u", val); + j++; + } + buffer[len++] = '\n'; + } + /* extra precision counters */ + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_possible(i)) continue; + xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes; + xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes; + xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; + } + + len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n", + xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); + len += sprintf(buffer + len, "debug %u\n", +#if defined(XFSDEBUG) + 1); +#else + 0); +#endif + + if (offset >= len) { + *start = buffer; + *eof = 1; + return 0; + } + *start = buffer + offset; + if ((len -= offset) > count) + return count; + *eof = 1; + + return len; +} + +void +xfs_init_procfs(void) +{ + if (!proc_mkdir("fs/xfs", 0)) + return; + create_proc_read_entry("fs/xfs/stat", 0, 0, xfs_read_xfsstats, NULL); +} + +void +xfs_cleanup_procfs(void) +{ + remove_proc_entry("fs/xfs/stat", NULL); + remove_proc_entry("fs/xfs", NULL); +} diff --git a/fs/xfs/linux/xfs_stats.h b/fs/xfs/linux/xfs_stats.h new file mode 100644 index 000000000..04566006f --- /dev/null +++ b/fs/xfs/linux/xfs_stats.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_STATS_H__ +#define __XFS_STATS_H__ + + +#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) + +#include + +/* + * XFS global statistics + */ +struct xfsstats { +# define XFSSTAT_END_EXTENT_ALLOC 4 + __uint32_t xs_allocx; + __uint32_t xs_allocb; + __uint32_t xs_freex; + __uint32_t xs_freeb; +# define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4) + __uint32_t xs_abt_lookup; + __uint32_t xs_abt_compare; + __uint32_t xs_abt_insrec; + __uint32_t xs_abt_delrec; +# define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7) + __uint32_t xs_blk_mapr; + __uint32_t xs_blk_mapw; + __uint32_t xs_blk_unmap; + __uint32_t xs_add_exlist; + __uint32_t xs_del_exlist; + __uint32_t xs_look_exlist; + __uint32_t xs_cmp_exlist; +# define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4) + __uint32_t xs_bmbt_lookup; + __uint32_t xs_bmbt_compare; + __uint32_t xs_bmbt_insrec; + __uint32_t xs_bmbt_delrec; +# define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4) + __uint32_t xs_dir_lookup; + __uint32_t xs_dir_create; + __uint32_t xs_dir_remove; + __uint32_t xs_dir_getdents; +# define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3) + __uint32_t xs_trans_sync; + __uint32_t xs_trans_async; + __uint32_t xs_trans_empty; +# define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7) + __uint32_t xs_ig_attempts; + __uint32_t xs_ig_found; + __uint32_t xs_ig_frecycle; + __uint32_t xs_ig_missed; + __uint32_t xs_ig_dup; + __uint32_t xs_ig_reclaims; + __uint32_t xs_ig_attrchg; +# define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5) + __uint32_t xs_log_writes; + __uint32_t xs_log_blocks; + __uint32_t xs_log_noiclogs; + __uint32_t xs_log_force; + __uint32_t xs_log_force_sleep; +# define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10) + __uint32_t xs_try_logspace; + __uint32_t xs_sleep_logspace; + __uint32_t xs_push_ail; + __uint32_t xs_push_ail_success; + __uint32_t xs_push_ail_pushbuf; + __uint32_t xs_push_ail_pinned; + __uint32_t xs_push_ail_locked; + __uint32_t xs_push_ail_flushing; + __uint32_t xs_push_ail_restarts; + __uint32_t xs_push_ail_flush; +# define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2) + __uint32_t xs_xstrat_quick; + __uint32_t xs_xstrat_split; +# define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2) + __uint32_t xs_write_calls; + __uint32_t xs_read_calls; +# define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4) + __uint32_t xs_attr_get; + __uint32_t xs_attr_set; + __uint32_t xs_attr_remove; + __uint32_t xs_attr_list; +# define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3) + __uint32_t xs_iflush_count; + __uint32_t xs_icluster_flushcnt; + __uint32_t xs_icluster_flushinode; +# define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8) + __uint32_t vn_active; /* # vnodes not on free lists */ + __uint32_t vn_alloc; /* # times vn_alloc called */ + __uint32_t vn_get; /* # times vn_get called */ + __uint32_t vn_hold; /* # times vn_hold called */ + __uint32_t vn_rele; /* # times vn_rele called */ + __uint32_t vn_reclaim; /* # times vn_reclaim called */ + __uint32_t vn_remove; /* # times vn_remove called */ + __uint32_t vn_free; /* # times vn_free called */ +#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9) + __uint32_t pb_get; + __uint32_t pb_create; + __uint32_t pb_get_locked; + __uint32_t pb_get_locked_waited; + __uint32_t pb_busy_locked; + __uint32_t pb_miss_locked; + __uint32_t pb_page_retries; + __uint32_t pb_page_found; + __uint32_t pb_get_read; +/* Extra precision counters */ + __uint64_t xs_xstrat_bytes; + __uint64_t xs_write_bytes; + __uint64_t xs_read_bytes; +}; + +DECLARE_PER_CPU(struct xfsstats, xfsstats); + +/* We don't disable preempt, not too worried about poking the + * wrong cpu's stat for now */ +#define XFS_STATS_INC(count) (__get_cpu_var(xfsstats).count++) +#define XFS_STATS_DEC(count) (__get_cpu_var(xfsstats).count--) +#define XFS_STATS_ADD(count, inc) (__get_cpu_var(xfsstats).count += (inc)) + +extern void xfs_init_procfs(void); +extern void xfs_cleanup_procfs(void); + + +#else /* !CONFIG_PROC_FS */ + +# define XFS_STATS_INC(count) +# define XFS_STATS_DEC(count) +# define XFS_STATS_ADD(count, inc) + +static __inline void xfs_init_procfs(void) { }; +static __inline void xfs_cleanup_procfs(void) { }; + +#endif /* !CONFIG_PROC_FS */ + +#endif /* __XFS_STATS_H__ */ diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c new file mode 100644 index 000000000..bbaf61bee --- /dev/null +++ b/fs/xfs/linux/xfs_super.c @@ -0,0 +1,850 @@ +/* + * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_clnt.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_bit.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_cap.h" +#include "xfs_mac.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_version.h" + +#include +#include +#include +#include + +STATIC struct quotactl_ops linvfs_qops; +STATIC struct super_operations linvfs_sops; +STATIC struct export_operations linvfs_export_ops; +STATIC kmem_cache_t * linvfs_inode_cachep; + +STATIC struct xfs_mount_args * +xfs_args_allocate( + struct super_block *sb) +{ + struct xfs_mount_args *args; + + args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP); + args->logbufs = args->logbufsize = -1; + strncpy(args->fsname, sb->s_id, MAXNAMELEN); + + /* Copy the already-parsed mount(2) flags we're interested in */ + if (sb->s_flags & MS_NOATIME) + args->flags |= XFSMNT_NOATIME; + + /* Default to 32 bit inodes on Linux all the time */ + args->flags |= XFSMNT_32BITINODES; + + return args; +} + +__uint64_t +xfs_max_file_offset( + unsigned int blockshift) +{ + unsigned int pagefactor = 1; + unsigned int bitshift = BITS_PER_LONG - 1; + + /* Figure out maximum filesize, on Linux this can depend on + * the filesystem blocksize (on 32 bit platforms). + * __block_prepare_write does this in an [unsigned] long... + * page->index << (PAGE_CACHE_SHIFT - bbits) + * So, for page sized blocks (4K on 32 bit platforms), + * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is + * (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) + * but for smaller blocksizes it is less (bbits = log2 bsize). + * Note1: get_block_t takes a long (implicit cast from above) + * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch + * can optionally convert the [unsigned] long from above into + * an [unsigned] long long. + */ + +#if BITS_PER_LONG == 32 +# if defined(CONFIG_LBD) + ASSERT(sizeof(sector_t) == 8); + pagefactor = PAGE_CACHE_SIZE; + bitshift = BITS_PER_LONG; +# else + pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift); +# endif +#endif + + return (((__uint64_t)pagefactor) << bitshift) - 1; +} + +STATIC __inline__ void +xfs_set_inodeops( + struct inode *inode) +{ + vnode_t *vp = LINVFS_GET_VP(inode); + + if (vp->v_type == VNON) { + make_bad_inode(inode); + } else if (S_ISREG(inode->i_mode)) { + inode->i_op = &linvfs_file_inode_operations; + inode->i_fop = &linvfs_file_operations; + inode->i_mapping->a_ops = &linvfs_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &linvfs_dir_inode_operations; + inode->i_fop = &linvfs_dir_operations; + } else if (S_ISLNK(inode->i_mode)) { + inode->i_op = &linvfs_symlink_inode_operations; + if (inode->i_blocks) + inode->i_mapping->a_ops = &linvfs_aops; + } else { + inode->i_op = &linvfs_file_inode_operations; + init_special_inode(inode, inode->i_mode, inode->i_rdev); + } +} + +STATIC __inline__ void +xfs_revalidate_inode( + xfs_mount_t *mp, + vnode_t *vp, + xfs_inode_t *ip) +{ + struct inode *inode = LINVFS_GET_IP(vp); + + inode->i_mode = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type); + inode->i_nlink = ip->i_d.di_nlink; + inode->i_uid = ip->i_d.di_uid; + inode->i_gid = ip->i_d.di_gid; + if (((1 << vp->v_type) & ((1<i_rdev = 0; + } else { + xfs_dev_t dev = ip->i_df.if_u2.if_rdev; + inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev)); + } + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_generation = ip->i_d.di_gen; + i_size_write(inode, ip->i_d.di_size); + inode->i_blocks = + XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); + inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; + inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; + inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; + inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; + inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; + inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; + if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= ~S_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) + inode->i_flags |= S_APPEND; + else + inode->i_flags &= ~S_APPEND; + if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) + inode->i_flags |= S_SYNC; + else + inode->i_flags &= ~S_SYNC; + if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) + inode->i_flags |= S_NOATIME; + else + inode->i_flags &= ~S_NOATIME; + vp->v_flag &= ~VMODIFIED; +} + +void +xfs_initialize_vnode( + bhv_desc_t *bdp, + vnode_t *vp, + bhv_desc_t *inode_bhv, + int unlock) +{ + xfs_inode_t *ip = XFS_BHVTOI(inode_bhv); + struct inode *inode = LINVFS_GET_IP(vp); + + if (!inode_bhv->bd_vobj) { + vp->v_vfsp = bhvtovfs(bdp); + bhv_desc_init(inode_bhv, ip, vp, &xfs_vnodeops); + bhv_insert(VN_BHV_HEAD(vp), inode_bhv); + } + + vp->v_type = IFTOVT(ip->i_d.di_mode); + + /* Have we been called during the new inode create process, + * in which case we are too early to fill in the Linux inode. + */ + if (vp->v_type == VNON) + return; + + xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); + + /* For new inodes we need to set the ops vectors, + * and unlock the inode. + */ + if (unlock && (inode->i_state & I_NEW)) { + xfs_set_inodeops(inode); + unlock_new_inode(inode); + } +} + +void +xfs_flush_inode( + xfs_inode_t *ip) +{ + struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip)); + + filemap_flush(inode->i_mapping); +} + +void +xfs_flush_device( + xfs_inode_t *ip) +{ + sync_blockdev(XFS_ITOV(ip)->v_vfsp->vfs_super->s_bdev); + xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); +} + +int +xfs_blkdev_get( + xfs_mount_t *mp, + const char *name, + struct block_device **bdevp) +{ + int error = 0; + + *bdevp = open_bdev_excl(name, 0, mp); + if (IS_ERR(*bdevp)) { + error = PTR_ERR(*bdevp); + printk("XFS: Invalid device [%s], error=%d\n", name, error); + } + + return -error; +} + +void +xfs_blkdev_put( + struct block_device *bdev) +{ + if (bdev) + close_bdev_excl(bdev); +} + + +STATIC struct inode * +linvfs_alloc_inode( + struct super_block *sb) +{ + vnode_t *vp; + + vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_cachep, + kmem_flags_convert(KM_SLEEP)); + if (!vp) + return NULL; + return LINVFS_GET_IP(vp); +} + +STATIC void +linvfs_destroy_inode( + struct inode *inode) +{ + kmem_cache_free(linvfs_inode_cachep, LINVFS_GET_VP(inode)); +} + +STATIC void +init_once( + void *data, + kmem_cache_t *cachep, + unsigned long flags) +{ + vnode_t *vp = (vnode_t *)data; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) + inode_init_once(LINVFS_GET_IP(vp)); +} + +STATIC int +init_inodecache( void ) +{ + linvfs_inode_cachep = kmem_cache_create("linvfs_icache", + sizeof(vnode_t), 0, + SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, + init_once, NULL); + + if (linvfs_inode_cachep == NULL) + return -ENOMEM; + return 0; +} + +STATIC void +destroy_inodecache( void ) +{ + if (kmem_cache_destroy(linvfs_inode_cachep)) + printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__); +} + +/* + * Attempt to flush the inode, this will actually fail + * if the inode is pinned, but we dirty the inode again + * at the point when it is unpinned after a log write, + * since this is when the inode itself becomes flushable. + */ +STATIC void +linvfs_write_inode( + struct inode *inode, + int sync) +{ + vnode_t *vp = LINVFS_GET_VP(inode); + int error, flags = FLUSH_INODE; + + if (vp) { + vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + if (sync) + flags |= FLUSH_SYNC; + VOP_IFLUSH(vp, flags, error); + } +} + +STATIC void +linvfs_clear_inode( + struct inode *inode) +{ + vnode_t *vp = LINVFS_GET_VP(inode); + + if (vp) { + vn_rele(vp); + vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + /* + * Do all our cleanup, and remove this vnode. + */ + vn_remove(vp); + } +} + + +#define SYNCD_FLAGS (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR) + +STATIC int +xfssyncd( + void *arg) +{ + vfs_t *vfsp = (vfs_t *) arg; + int error; + + daemonize("xfssyncd"); + + vfsp->vfs_sync_task = current; + wmb(); + wake_up(&vfsp->vfs_wait_sync_task); + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(xfs_syncd_interval); + /* swsusp */ + if (current->flags & PF_FREEZE) + refrigerator(PF_FREEZE); + if (vfsp->vfs_flag & VFS_UMOUNT) + break; + if (vfsp->vfs_flag & VFS_RDONLY) + continue; + VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error); + } + + vfsp->vfs_sync_task = NULL; + wmb(); + wake_up(&vfsp->vfs_wait_sync_task); + + return 0; +} + +STATIC int +linvfs_start_syncd( + vfs_t *vfsp) +{ + int pid; + + pid = kernel_thread(xfssyncd, (void *) vfsp, + CLONE_VM | CLONE_FS | CLONE_FILES); + if (pid < 0) + return -pid; + wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task); + return 0; +} + +STATIC void +linvfs_stop_syncd( + vfs_t *vfsp) +{ + vfsp->vfs_flag |= VFS_UMOUNT; + wmb(); + + wake_up_process(vfsp->vfs_sync_task); + wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task); +} + +STATIC void +linvfs_put_super( + struct super_block *sb) +{ + vfs_t *vfsp = LINVFS_GET_VFS(sb); + int error; + + linvfs_stop_syncd(vfsp); + VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error); + if (!error) + VFS_UNMOUNT(vfsp, 0, NULL, error); + if (error) { + printk("XFS unmount got error %d\n", error); + printk("%s: vfsp/0x%p left dangling!\n", __FUNCTION__, vfsp); + return; + } + + vfs_deallocate(vfsp); +} + +STATIC void +linvfs_write_super( + struct super_block *sb) +{ + vfs_t *vfsp = LINVFS_GET_VFS(sb); + int error; + + if (sb->s_flags & MS_RDONLY) { + sb->s_dirt = 0; /* paranoia */ + return; + } + /* Push the log and superblock a little */ + VFS_SYNC(vfsp, SYNC_FSDATA, NULL, error); + sb->s_dirt = 0; +} + +STATIC int +linvfs_sync_super( + struct super_block *sb, + int wait) +{ + vfs_t *vfsp = LINVFS_GET_VFS(sb); + int error; + int flags = SYNC_FSDATA; + + if (wait) + flags |= SYNC_WAIT; + + VFS_SYNC(vfsp, flags, NULL, error); + sb->s_dirt = 0; + + return -error; +} + +STATIC int +linvfs_statfs( + struct super_block *sb, + struct kstatfs *statp) +{ + vfs_t *vfsp = LINVFS_GET_VFS(sb); + int error; + + VFS_STATVFS(vfsp, statp, NULL, error); + return -error; +} + +STATIC int +linvfs_remount( + struct super_block *sb, + int *flags, + char *options) +{ + vfs_t *vfsp = LINVFS_GET_VFS(sb); + struct xfs_mount_args *args = xfs_args_allocate(sb); + int error; + + VFS_PARSEARGS(vfsp, options, args, 1, error); + if (!error) + VFS_MNTUPDATE(vfsp, flags, args, error); + kmem_free(args, sizeof(*args)); + return -error; +} + +STATIC void +linvfs_freeze_fs( + struct super_block *sb) +{ + VFS_FREEZE(LINVFS_GET_VFS(sb)); +} + +STATIC struct dentry * +linvfs_get_parent( + struct dentry *child) +{ + int error; + vnode_t *vp, *cvp; + struct dentry *parent; + struct inode *ip = NULL; + struct dentry dotdot; + + dotdot.d_name.name = ".."; + dotdot.d_name.len = 2; + dotdot.d_inode = 0; + + cvp = NULL; + vp = LINVFS_GET_VP(child->d_inode); + VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error); + + if (!error) { + ASSERT(cvp); + ip = LINVFS_GET_IP(cvp); + if (!ip) { + VN_RELE(cvp); + return ERR_PTR(-EACCES); + } + } + if (error) + return ERR_PTR(-error); + parent = d_alloc_anon(ip); + if (!parent) { + VN_RELE(cvp); + parent = ERR_PTR(-ENOMEM); + } + return parent; +} + +STATIC struct dentry * +linvfs_get_dentry( + struct super_block *sb, + void *data) +{ + vnode_t *vp; + struct inode *inode; + struct dentry *result; + xfs_fid2_t xfid; + vfs_t *vfsp = LINVFS_GET_VFS(sb); + int error; + + xfid.fid_len = sizeof(xfs_fid2_t) - sizeof(xfid.fid_len); + xfid.fid_pad = 0; + xfid.fid_gen = ((__u32 *)data)[1]; + xfid.fid_ino = ((__u32 *)data)[0]; + + VFS_VGET(vfsp, &vp, (fid_t *)&xfid, error); + if (error || vp == NULL) + return ERR_PTR(-ESTALE) ; + + inode = LINVFS_GET_IP(vp); + result = d_alloc_anon(inode); + if (!result) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + return result; +} + +STATIC int +linvfs_show_options( + struct seq_file *m, + struct vfsmount *mnt) +{ + struct vfs *vfsp = LINVFS_GET_VFS(mnt->mnt_sb); + int error; + + VFS_SHOWARGS(vfsp, m, error); + return error; +} + +STATIC int +linvfs_getxstate( + struct super_block *sb, + struct fs_quota_stat *fqs) +{ + struct vfs *vfsp = LINVFS_GET_VFS(sb); + int error; + + VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error); + return -error; +} + +STATIC int +linvfs_setxstate( + struct super_block *sb, + unsigned int flags, + int op) +{ + struct vfs *vfsp = LINVFS_GET_VFS(sb); + int error; + + VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error); + return -error; +} + +STATIC int +linvfs_getxquota( + struct super_block *sb, + int type, + qid_t id, + struct fs_disk_quota *fdq) +{ + struct vfs *vfsp = LINVFS_GET_VFS(sb); + int error, getmode; + + getmode = (type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETQUOTA; + VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error); + return -error; +} + +STATIC int +linvfs_setxquota( + struct super_block *sb, + int type, + qid_t id, + struct fs_disk_quota *fdq) +{ + struct vfs *vfsp = LINVFS_GET_VFS(sb); + int error, setmode; + + setmode = (type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETQLIM; + VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error); + return -error; +} + +STATIC int +linvfs_fill_super( + struct super_block *sb, + void *data, + int silent) +{ + vnode_t *rootvp; + struct vfs *vfsp = vfs_allocate(); + struct xfs_mount_args *args = xfs_args_allocate(sb); + struct kstatfs statvfs; + int error, error2; + + vfsp->vfs_super = sb; + LINVFS_SET_VFS(sb, vfsp); + if (sb->s_flags & MS_RDONLY) + vfsp->vfs_flag |= VFS_RDONLY; + bhv_insert_all_vfsops(vfsp); + + VFS_PARSEARGS(vfsp, (char *)data, args, 0, error); + if (error) { + bhv_remove_all_vfsops(vfsp, 1); + goto fail_vfsop; + } + + sb_min_blocksize(sb, BBSIZE); + sb->s_export_op = &linvfs_export_ops; + sb->s_qcop = &linvfs_qops; + sb->s_op = &linvfs_sops; + + VFS_MOUNT(vfsp, args, NULL, error); + if (error) { + bhv_remove_all_vfsops(vfsp, 1); + goto fail_vfsop; + } + + VFS_STATVFS(vfsp, &statvfs, NULL, error); + if (error) + goto fail_unmount; + + sb->s_dirt = 1; + sb->s_magic = statvfs.f_type; + sb->s_blocksize = statvfs.f_bsize; + sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1; + sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); + set_posix_acl_flag(sb); + + VFS_ROOT(vfsp, &rootvp, error); + if (error) + goto fail_unmount; + + sb->s_root = d_alloc_root(LINVFS_GET_IP(rootvp)); + if (!sb->s_root) { + error = ENOMEM; + goto fail_vnrele; + } + if (is_bad_inode(sb->s_root->d_inode)) { + error = EINVAL; + goto fail_vnrele; + } + if ((error = linvfs_start_syncd(vfsp))) + goto fail_vnrele; + vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address); + + kmem_free(args, sizeof(*args)); + return 0; + +fail_vnrele: + if (sb->s_root) { + dput(sb->s_root); + sb->s_root = NULL; + } else { + VN_RELE(rootvp); + } + +fail_unmount: + VFS_UNMOUNT(vfsp, 0, NULL, error2); + +fail_vfsop: + vfs_deallocate(vfsp); + kmem_free(args, sizeof(*args)); + return -error; +} + +STATIC struct super_block * +linvfs_get_sb( + struct file_system_type *fs_type, + int flags, + const char *dev_name, + void *data) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, linvfs_fill_super); +} + + +STATIC struct export_operations linvfs_export_ops = { + .get_parent = linvfs_get_parent, + .get_dentry = linvfs_get_dentry, +}; + +STATIC struct super_operations linvfs_sops = { + .alloc_inode = linvfs_alloc_inode, + .destroy_inode = linvfs_destroy_inode, + .write_inode = linvfs_write_inode, + .clear_inode = linvfs_clear_inode, + .put_super = linvfs_put_super, + .write_super = linvfs_write_super, + .sync_fs = linvfs_sync_super, + .write_super_lockfs = linvfs_freeze_fs, + .statfs = linvfs_statfs, + .remount_fs = linvfs_remount, + .show_options = linvfs_show_options, +}; + +STATIC struct quotactl_ops linvfs_qops = { + .get_xstate = linvfs_getxstate, + .set_xstate = linvfs_setxstate, + .get_xquota = linvfs_getxquota, + .set_xquota = linvfs_setxquota, +}; + +STATIC struct file_system_type xfs_fs_type = { + .owner = THIS_MODULE, + .name = "xfs", + .get_sb = linvfs_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + + +STATIC int __init +init_xfs_fs( void ) +{ + int error; + struct sysinfo si; + static char message[] __initdata = KERN_INFO \ + XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"; + + printk(message); + + si_meminfo(&si); + xfs_physmem = si.totalram; + + ktrace_init(64); + + error = init_inodecache(); + if (error < 0) + goto undo_inodecache; + + error = pagebuf_init(); + if (error < 0) + goto undo_pagebuf; + + vn_init(); + xfs_init(); + uuid_init(); + vfs_initdmapi(); + vfs_initquota(); + + error = register_filesystem(&xfs_fs_type); + if (error) + goto undo_register; + return 0; + +undo_register: + pagebuf_terminate(); + +undo_pagebuf: + destroy_inodecache(); + +undo_inodecache: + return error; +} + +STATIC void __exit +exit_xfs_fs( void ) +{ + vfs_exitquota(); + vfs_exitdmapi(); + unregister_filesystem(&xfs_fs_type); + xfs_cleanup(); + pagebuf_terminate(); + destroy_inodecache(); + ktrace_uninit(); +} + +module_init(init_xfs_fs); +module_exit(exit_xfs_fs); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); +MODULE_LICENSE("GPL"); diff --git a/fs/xfs/linux/xfs_super.h b/fs/xfs/linux/xfs_super.h new file mode 100644 index 000000000..557626919 --- /dev/null +++ b/fs/xfs/linux/xfs_super.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPER_H__ +#define __XFS_SUPER_H__ + +#ifdef CONFIG_XFS_DMAPI +# define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops) +# define vfs_initdmapi() dmapi_init() +# define vfs_exitdmapi() dmapi_uninit() +#else +# define vfs_insertdmapi(vfs) do { } while (0) +# define vfs_initdmapi() do { } while (0) +# define vfs_exitdmapi() do { } while (0) +#endif + +#ifdef CONFIG_XFS_QUOTA +# define vfs_insertquota(vfs) vfs_insertops(vfsp, &xfs_qmops) +extern void xfs_qm_init(void); +extern void xfs_qm_exit(void); +# define vfs_initquota() xfs_qm_init() +# define vfs_exitquota() xfs_qm_exit() +#else +# define vfs_insertquota(vfs) do { } while (0) +# define vfs_initquota() do { } while (0) +# define vfs_exitquota() do { } while (0) +#endif + +#ifdef CONFIG_XFS_POSIX_ACL +# define XFS_ACL_STRING "ACLs, " +# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL) +#else +# define XFS_ACL_STRING +# define set_posix_acl_flag(sb) do { } while (0) +#endif + +#ifdef CONFIG_XFS_SECURITY +# define XFS_SECURITY_STRING "security attributes, " +# define ENOSECURITY 0 +#else +# define XFS_SECURITY_STRING +# define ENOSECURITY EOPNOTSUPP +#endif + +#ifdef CONFIG_XFS_RT +# define XFS_REALTIME_STRING "realtime, " +#else +# define XFS_REALTIME_STRING +#endif + +#if XFS_BIG_BLKNOS +# if XFS_BIG_INUMS +# define XFS_BIGFS_STRING "large block/inode numbers, " +# else +# define XFS_BIGFS_STRING "large block numbers, " +# endif +#else +# define XFS_BIGFS_STRING +#endif + +#ifdef CONFIG_XFS_TRACE +# define XFS_TRACE_STRING "tracing, " +#else +# define XFS_TRACE_STRING +#endif + +#ifdef XFSDEBUG +# define XFS_DBG_STRING "debug" +#else +# define XFS_DBG_STRING "no debug" +#endif + +#define XFS_BUILD_OPTIONS XFS_ACL_STRING \ + XFS_SECURITY_STRING \ + XFS_REALTIME_STRING \ + XFS_BIGFS_STRING \ + XFS_TRACE_STRING \ + XFS_DBG_STRING /* DBG must be last */ + +#define LINVFS_GET_VFS(s) \ + (vfs_t *)((s)->s_fs_info) +#define LINVFS_SET_VFS(s, vfsp) \ + ((s)->s_fs_info = vfsp) + +struct xfs_inode; +struct xfs_mount; +struct xfs_buftarg; +struct block_device; + +extern __uint64_t xfs_max_file_offset(unsigned int); + +extern void xfs_initialize_vnode(bhv_desc_t *, vnode_t *, bhv_desc_t *, int); + +extern void xfs_flush_inode(struct xfs_inode *); +extern void xfs_flush_device(struct xfs_inode *); + +extern int xfs_blkdev_get(struct xfs_mount *, const char *, + struct block_device **); +extern void xfs_blkdev_put(struct block_device *); + +#endif /* __XFS_SUPER_H__ */ diff --git a/fs/xfs/linux/xfs_sysctl.c b/fs/xfs/linux/xfs_sysctl.c new file mode 100644 index 000000000..b9a97c9d7 --- /dev/null +++ b/fs/xfs/linux/xfs_sysctl.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" +#include "xfs_rw.h" +#include +#include + + +static struct ctl_table_header *xfs_table_header; + + +#ifdef CONFIG_PROC_FS +STATIC int +xfs_stats_clear_proc_handler( + ctl_table *ctl, + int write, + struct file *filp, + void *buffer, + size_t *lenp) +{ + int c, ret, *valp = ctl->data; + __uint32_t vn_active; + + ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp); + + if (!ret && write && *valp) { + printk("XFS Clearing xfsstats\n"); + for (c = 0; c < NR_CPUS; c++) { + if (!cpu_possible(c)) continue; + preempt_disable(); + /* save vn_active, it's a universal truth! */ + vn_active = per_cpu(xfsstats, c).vn_active; + memset(&per_cpu(xfsstats, c), 0, + sizeof(struct xfsstats)); + per_cpu(xfsstats, c).vn_active = vn_active; + preempt_enable(); + } + xfs_stats_clear = 0; + } + + return ret; +} +#endif /* CONFIG_PROC_FS */ + +STATIC ctl_table xfs_table[] = { + {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max}, + + {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max}, + + {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max}, + + {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.panic_mask.min, &xfs_params.panic_mask.max}, + + {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.error_level.min, &xfs_params.error_level.max}, + + {XFS_SYNC_INTERVAL, "sync_interval", &xfs_params.sync_interval.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.sync_interval.min, &xfs_params.sync_interval.max}, + + {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max}, + + {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max}, + + {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max}, + + {XFS_FLUSH_INTERVAL, "flush_interval", &xfs_params.flush_interval.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.flush_interval.min, &xfs_params.flush_interval.max}, + + {XFS_AGE_BUFFER, "age_buffer", &xfs_params.age_buffer.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.age_buffer.min, &xfs_params.age_buffer.max}, + + /* please keep this the last entry */ +#ifdef CONFIG_PROC_FS + {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val, + sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler, + &sysctl_intvec, NULL, + &xfs_params.stats_clear.min, &xfs_params.stats_clear.max}, +#endif /* CONFIG_PROC_FS */ + + {0} +}; + +STATIC ctl_table xfs_dir_table[] = { + {FS_XFS, "xfs", NULL, 0, 0555, xfs_table}, + {0} +}; + +STATIC ctl_table xfs_root_table[] = { + {CTL_FS, "fs", NULL, 0, 0555, xfs_dir_table}, + {0} +}; + +void +xfs_sysctl_register(void) +{ + xfs_table_header = register_sysctl_table(xfs_root_table, 1); +} + +void +xfs_sysctl_unregister(void) +{ + if (xfs_table_header) + unregister_sysctl_table(xfs_table_header); +} diff --git a/fs/xfs/linux/xfs_sysctl.h b/fs/xfs/linux/xfs_sysctl.h new file mode 100644 index 000000000..0532d4012 --- /dev/null +++ b/fs/xfs/linux/xfs_sysctl.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#ifndef __XFS_SYSCTL_H__ +#define __XFS_SYSCTL_H__ + +#include + +/* + * Tunable xfs parameters + */ + +typedef struct xfs_sysctl_val { + int min; + int val; + int max; +} xfs_sysctl_val_t; + +typedef struct xfs_param { + xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/ + xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID bit if process' GID + * is not a member of the parent dir + * GID */ + xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ + xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */ + xfs_sysctl_val_t error_level; /* Degree of reporting for problems */ + xfs_sysctl_val_t sync_interval; /* time between sync calls */ + xfs_sysctl_val_t stats_clear; /* Reset all XFS statistics to zero. */ + xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */ + xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */ + xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */ + xfs_sysctl_val_t flush_interval;/* interval between runs of the + * delwri flush daemon. */ + xfs_sysctl_val_t age_buffer; /* time for buffer to age before + * we flush it. */ +} xfs_param_t; + +/* + * xfs_error_level: + * + * How much error reporting will be done when internal problems are + * encountered. These problems normally return an EFSCORRUPTED to their + * caller, with no other information reported. + * + * 0 No error reports + * 1 Report EFSCORRUPTED errors that will cause a filesystem shutdown + * 5 Report all EFSCORRUPTED errors (all of the above errors, plus any + * additional errors that are known to not cause shutdowns) + * + * xfs_panic_mask bit 0x8 turns the error reports into panics + */ + +enum { + XFS_RESTRICT_CHOWN = 3, + XFS_SGID_INHERIT = 4, + XFS_SYMLINK_MODE = 5, + XFS_PANIC_MASK = 6, + XFS_ERRLEVEL = 7, + XFS_SYNC_INTERVAL = 8, + XFS_STATS_CLEAR = 12, + XFS_INHERIT_SYNC = 13, + XFS_INHERIT_NODUMP = 14, + XFS_INHERIT_NOATIME = 15, + XFS_FLUSH_INTERVAL = 16, + XFS_AGE_BUFFER = 17, +}; + +extern xfs_param_t xfs_params; + +#ifdef CONFIG_SYSCTL +extern void xfs_sysctl_register(void); +extern void xfs_sysctl_unregister(void); +#else +# define xfs_sysctl_register() do { } while (0) +# define xfs_sysctl_unregister() do { } while (0) +#endif /* CONFIG_SYSCTL */ + +#endif /* __XFS_SYSCTL_H__ */ diff --git a/fs/xfs/linux/xfs_version.h b/fs/xfs/linux/xfs_version.h new file mode 100644 index 000000000..96f963944 --- /dev/null +++ b/fs/xfs/linux/xfs_version.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * Dummy file that can contain a timestamp to put into the + * XFS init string, to help users keep track of what they're + * running + */ + +#ifndef __XFS_VERSION_H__ +#define __XFS_VERSION_H__ + +#define XFS_VERSION_STRING "SGI XFS" + +#endif /* __XFS_VERSION_H__ */ diff --git a/fs/xfs/linux/xfs_vfs.c b/fs/xfs/linux/xfs_vfs.c new file mode 100644 index 000000000..2b75cccdf --- /dev/null +++ b/fs/xfs/linux/xfs_vfs.c @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_macros.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_clnt.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_imap.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_quota.h" + +int +vfs_mount( + struct bhv_desc *bdp, + struct xfs_mount_args *args, + struct cred *cr) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_mount) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->vfs_mount)(next, args, cr)); +} + +int +vfs_parseargs( + struct bhv_desc *bdp, + char *s, + struct xfs_mount_args *args, + int f) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_parseargs) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->vfs_parseargs)(next, s, args, f)); +} + +int +vfs_showargs( + struct bhv_desc *bdp, + struct seq_file *m) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_showargs) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->vfs_showargs)(next, m)); +} + +int +vfs_unmount( + struct bhv_desc *bdp, + int fl, + struct cred *cr) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_unmount) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->vfs_unmount)(next, fl, cr)); +} + +int +vfs_mntupdate( + struct bhv_desc *bdp, + int *fl, + struct xfs_mount_args *args) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_mntupdate) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->vfs_mntupdate)(next, fl, args)); +} + +int +vfs_root( + struct bhv_desc *bdp, + struct vnode **vpp) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_root) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->vfs_root)(next, vpp)); +} + +int +vfs_statvfs( + struct bhv_desc *bdp, + xfs_statfs_t *sp, + struct vnode *vp) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_statvfs) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->vfs_statvfs)(next, sp, vp)); +} + +int +vfs_sync( + struct bhv_desc *bdp, + int fl, + struct cred *cr) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_sync) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->vfs_sync)(next, fl, cr)); +} + +int +vfs_vget( + struct bhv_desc *bdp, + struct vnode **vpp, + struct fid *fidp) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_vget) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->vfs_vget)(next, vpp, fidp)); +} + +int +vfs_dmapiops( + struct bhv_desc *bdp, + caddr_t addr) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_dmapiops) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->vfs_dmapiops)(next, addr)); +} + +int +vfs_quotactl( + struct bhv_desc *bdp, + int cmd, + int id, + caddr_t addr) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_quotactl) + next = BHV_NEXT(next); + return ((*bhvtovfsops(next)->vfs_quotactl)(next, cmd, id, addr)); +} + +void +vfs_init_vnode( + struct bhv_desc *bdp, + struct vnode *vp, + struct bhv_desc *bp, + int unlock) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_init_vnode) + next = BHV_NEXT(next); + ((*bhvtovfsops(next)->vfs_init_vnode)(next, vp, bp, unlock)); +} + +void +vfs_force_shutdown( + struct bhv_desc *bdp, + int fl, + char *file, + int line) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_force_shutdown) + next = BHV_NEXT(next); + ((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line)); +} + +void +vfs_freeze( + struct bhv_desc *bdp) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_freeze) + next = BHV_NEXT(next); + ((*bhvtovfsops(next)->vfs_freeze)(next)); +} + +vfs_t * +vfs_allocate( void ) +{ + struct vfs *vfsp; + + vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP); + bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); + init_waitqueue_head(&vfsp->vfs_wait_sync_task); + return vfsp; +} + +void +vfs_deallocate( + struct vfs *vfsp) +{ + bhv_head_destroy(VFS_BHVHEAD(vfsp)); + kmem_free(vfsp, sizeof(vfs_t)); +} + +void +vfs_insertops( + struct vfs *vfsp, + struct bhv_vfsops *vfsops) +{ + struct bhv_desc *bdp; + + bdp = kmem_alloc(sizeof(struct bhv_desc), KM_SLEEP); + bhv_desc_init(bdp, NULL, vfsp, vfsops); + bhv_insert(&vfsp->vfs_bh, bdp); +} + +void +vfs_insertbhv( + struct vfs *vfsp, + struct bhv_desc *bdp, + struct vfsops *vfsops, + void *mount) +{ + bhv_desc_init(bdp, mount, vfsp, vfsops); + bhv_insert_initial(&vfsp->vfs_bh, bdp); +} + +void +bhv_remove_vfsops( + struct vfs *vfsp, + int pos) +{ + struct bhv_desc *bhv; + + bhv = bhv_lookup_range(&vfsp->vfs_bh, pos, pos); + if (!bhv) + return; + bhv_remove(&vfsp->vfs_bh, bhv); + kmem_free(bhv, sizeof(*bhv)); +} + +void +bhv_remove_all_vfsops( + struct vfs *vfsp, + int freebase) +{ + struct xfs_mount *mp; + + bhv_remove_vfsops(vfsp, VFS_POSITION_QM); + bhv_remove_vfsops(vfsp, VFS_POSITION_DM); + if (!freebase) + return; + mp = XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfsp), &xfs_vfsops)); + VFS_REMOVEBHV(vfsp, &mp->m_bhv); + xfs_mount_free(mp, 0); +} + +void +bhv_insert_all_vfsops( + struct vfs *vfsp) +{ + struct xfs_mount *mp; + + mp = xfs_mount_init(); + vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp); + vfs_insertdmapi(vfsp); + vfs_insertquota(vfsp); +} diff --git a/fs/xfs/linux/xfs_vfs.h b/fs/xfs/linux/xfs_vfs.h new file mode 100644 index 000000000..dc1cd1973 --- /dev/null +++ b/fs/xfs/linux/xfs_vfs.h @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_VFS_H__ +#define __XFS_VFS_H__ + +#include +#include "xfs_fs.h" + +struct fid; +struct cred; +struct vnode; +struct kstatfs; +struct seq_file; +struct super_block; +struct xfs_mount_args; + +typedef struct kstatfs xfs_statfs_t; + +typedef struct vfs { + u_int vfs_flag; /* flags */ + xfs_fsid_t vfs_fsid; /* file system ID */ + xfs_fsid_t *vfs_altfsid; /* An ID fixed for life of FS */ + bhv_head_t vfs_bh; /* head of vfs behavior chain */ + struct super_block *vfs_super; /* Linux superblock structure */ + struct task_struct *vfs_sync_task; + wait_queue_head_t vfs_wait_sync_task; +} vfs_t; + +#define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */ + +#define bhvtovfs(bdp) ( (struct vfs *)BHV_VOBJ(bdp) ) +#define bhvtovfsops(bdp) ( (struct vfsops *)BHV_OPS(bdp) ) +#define VFS_BHVHEAD(vfs) ( &(vfs)->vfs_bh ) +#define VFS_REMOVEBHV(vfs, bdp) ( bhv_remove(VFS_BHVHEAD(vfs), bdp) ) + +#define VFS_POSITION_BASE BHV_POSITION_BASE /* chain bottom */ +#define VFS_POSITION_TOP BHV_POSITION_TOP /* chain top */ +#define VFS_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */ + +typedef enum { + VFS_BHV_UNKNOWN, /* not specified */ + VFS_BHV_XFS, /* xfs */ + VFS_BHV_DM, /* data migration */ + VFS_BHV_QM, /* quota manager */ + VFS_BHV_IO, /* IO path */ + VFS_BHV_END /* housekeeping end-of-range */ +} vfs_bhv_t; + +#define VFS_POSITION_XFS (BHV_POSITION_BASE) +#define VFS_POSITION_DM (VFS_POSITION_BASE+10) +#define VFS_POSITION_QM (VFS_POSITION_BASE+20) +#define VFS_POSITION_IO (VFS_POSITION_BASE+30) + +#define VFS_RDONLY 0x0001 /* read-only vfs */ +#define VFS_GRPID 0x0002 /* group-ID assigned from directory */ +#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */ +#define VFS_UMOUNT 0x0008 /* unmount in progress */ +#define VFS_END 0x0008 /* max flag */ + +#define SYNC_ATTR 0x0001 /* sync attributes */ +#define SYNC_CLOSE 0x0002 /* close file system down */ +#define SYNC_DELWRI 0x0004 /* look at delayed writes */ +#define SYNC_WAIT 0x0008 /* wait for i/o to complete */ +#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */ +#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ +#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ +#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ + +typedef int (*vfs_mount_t)(bhv_desc_t *, + struct xfs_mount_args *, struct cred *); +typedef int (*vfs_parseargs_t)(bhv_desc_t *, char *, + struct xfs_mount_args *, int); +typedef int (*vfs_showargs_t)(bhv_desc_t *, struct seq_file *); +typedef int (*vfs_unmount_t)(bhv_desc_t *, int, struct cred *); +typedef int (*vfs_mntupdate_t)(bhv_desc_t *, int *, + struct xfs_mount_args *); +typedef int (*vfs_root_t)(bhv_desc_t *, struct vnode **); +typedef int (*vfs_statvfs_t)(bhv_desc_t *, xfs_statfs_t *, struct vnode *); +typedef int (*vfs_sync_t)(bhv_desc_t *, int, struct cred *); +typedef int (*vfs_vget_t)(bhv_desc_t *, struct vnode **, struct fid *); +typedef int (*vfs_dmapiops_t)(bhv_desc_t *, caddr_t); +typedef int (*vfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t); +typedef void (*vfs_init_vnode_t)(bhv_desc_t *, + struct vnode *, bhv_desc_t *, int); +typedef void (*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int); +typedef void (*vfs_freeze_t)(bhv_desc_t *); + +typedef struct vfsops { + bhv_position_t vf_position; /* behavior chain position */ + vfs_mount_t vfs_mount; /* mount file system */ + vfs_parseargs_t vfs_parseargs; /* parse mount options */ + vfs_showargs_t vfs_showargs; /* unparse mount options */ + vfs_unmount_t vfs_unmount; /* unmount file system */ + vfs_mntupdate_t vfs_mntupdate; /* update file system options */ + vfs_root_t vfs_root; /* get root vnode */ + vfs_statvfs_t vfs_statvfs; /* file system statistics */ + vfs_sync_t vfs_sync; /* flush files */ + vfs_vget_t vfs_vget; /* get vnode from fid */ + vfs_dmapiops_t vfs_dmapiops; /* data migration */ + vfs_quotactl_t vfs_quotactl; /* disk quota */ + vfs_init_vnode_t vfs_init_vnode; /* initialize a new vnode */ + vfs_force_shutdown_t vfs_force_shutdown; /* crash and burn */ + vfs_freeze_t vfs_freeze; /* freeze fs for snapshot */ +} vfsops_t; + +/* + * VFS's. Operates on vfs structure pointers (starts at bhv head). + */ +#define VHEAD(v) ((v)->vfs_fbhv) +#define VFS_MOUNT(v, ma,cr, rv) ((rv) = vfs_mount(VHEAD(v), ma,cr)) +#define VFS_PARSEARGS(v, o,ma,f, rv) ((rv) = vfs_parseargs(VHEAD(v), o,ma,f)) +#define VFS_SHOWARGS(v, m, rv) ((rv) = vfs_showargs(VHEAD(v), m)) +#define VFS_UNMOUNT(v, f, cr, rv) ((rv) = vfs_unmount(VHEAD(v), f,cr)) +#define VFS_MNTUPDATE(v, fl, args, rv) ((rv) = vfs_mntupdate(VHEAD(v), fl, args)) +#define VFS_ROOT(v, vpp, rv) ((rv) = vfs_root(VHEAD(v), vpp)) +#define VFS_STATVFS(v, sp,vp, rv) ((rv) = vfs_statvfs(VHEAD(v), sp,vp)) +#define VFS_SYNC(v, flag,cr, rv) ((rv) = vfs_sync(VHEAD(v), flag,cr)) +#define VFS_VGET(v, vpp,fidp, rv) ((rv) = vfs_vget(VHEAD(v), vpp,fidp)) +#define VFS_DMAPIOPS(v, p, rv) ((rv) = vfs_dmapiops(VHEAD(v), p)) +#define VFS_QUOTACTL(v, c,id,p, rv) ((rv) = vfs_quotactl(VHEAD(v), c,id,p)) +#define VFS_INIT_VNODE(v, vp,b,ul) ( vfs_init_vnode(VHEAD(v), vp,b,ul) ) +#define VFS_FORCE_SHUTDOWN(v, fl,f,l) ( vfs_force_shutdown(VHEAD(v), fl,f,l) ) +#define VFS_FREEZE(v) ( vfs_freeze(VHEAD(v)) ) + +/* + * PVFS's. Operates on behavior descriptor pointers. + */ +#define PVFS_MOUNT(b, ma,cr, rv) ((rv) = vfs_mount(b, ma,cr)) +#define PVFS_PARSEARGS(b, o,ma,f, rv) ((rv) = vfs_parseargs(b, o,ma,f)) +#define PVFS_SHOWARGS(b, m, rv) ((rv) = vfs_showargs(b, m)) +#define PVFS_UNMOUNT(b, f,cr, rv) ((rv) = vfs_unmount(b, f,cr)) +#define PVFS_MNTUPDATE(b, fl, args, rv) ((rv) = vfs_mntupdate(b, fl, args)) +#define PVFS_ROOT(b, vpp, rv) ((rv) = vfs_root(b, vpp)) +#define PVFS_STATVFS(b, sp,vp, rv) ((rv) = vfs_statvfs(b, sp,vp)) +#define PVFS_SYNC(b, flag,cr, rv) ((rv) = vfs_sync(b, flag,cr)) +#define PVFS_VGET(b, vpp,fidp, rv) ((rv) = vfs_vget(b, vpp,fidp)) +#define PVFS_DMAPIOPS(b, p, rv) ((rv) = vfs_dmapiops(b, p)) +#define PVFS_QUOTACTL(b, c,id,p, rv) ((rv) = vfs_quotactl(b, c,id,p)) +#define PVFS_INIT_VNODE(b, vp,b2,ul) ( vfs_init_vnode(b, vp,b2,ul) ) +#define PVFS_FORCE_SHUTDOWN(b, fl,f,l) ( vfs_force_shutdown(b, fl,f,l) ) +#define PVFS_FREEZE(b) ( vfs_freeze(b) ) + +extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *); +extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int); +extern int vfs_showargs(bhv_desc_t *, struct seq_file *); +extern int vfs_unmount(bhv_desc_t *, int, struct cred *); +extern int vfs_mntupdate(bhv_desc_t *, int *, struct xfs_mount_args *); +extern int vfs_root(bhv_desc_t *, struct vnode **); +extern int vfs_statvfs(bhv_desc_t *, xfs_statfs_t *, struct vnode *); +extern int vfs_sync(bhv_desc_t *, int, struct cred *); +extern int vfs_vget(bhv_desc_t *, struct vnode **, struct fid *); +extern int vfs_dmapiops(bhv_desc_t *, caddr_t); +extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t); +extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int); +extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int); +extern void vfs_freeze(bhv_desc_t *); + +typedef struct bhv_vfsops { + struct vfsops bhv_common; + void * bhv_custom; +} bhv_vfsops_t; + +#define vfs_bhv_lookup(v, id) ( bhv_lookup_range(&(v)->vfs_bh, (id), (id)) ) +#define vfs_bhv_custom(b) ( ((bhv_vfsops_t *)BHV_OPS(b))->bhv_custom ) +#define vfs_bhv_set_custom(b,o) ( (b)->bhv_custom = (void *)(o)) +#define vfs_bhv_clr_custom(b) ( (b)->bhv_custom = NULL ) + +extern vfs_t *vfs_allocate(void); +extern void vfs_deallocate(vfs_t *); +extern void vfs_insertops(vfs_t *, bhv_vfsops_t *); +extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *); + +extern void bhv_insert_all_vfsops(struct vfs *); +extern void bhv_remove_all_vfsops(struct vfs *, int); +extern void bhv_remove_vfsops(struct vfs *, int); + +#endif /* __XFS_VFS_H__ */ diff --git a/fs/xfs/linux/xfs_vnode.c b/fs/xfs/linux/xfs_vnode.c new file mode 100644 index 000000000..9240efb2b --- /dev/null +++ b/fs/xfs/linux/xfs_vnode.c @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" + + +uint64_t vn_generation; /* vnode generation number */ +spinlock_t vnumber_lock = SPIN_LOCK_UNLOCKED; + +/* + * Dedicated vnode inactive/reclaim sync semaphores. + * Prime number of hash buckets since address is used as the key. + */ +#define NVSYNC 37 +#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) +sv_t vsync[NVSYNC]; + +/* + * Translate stat(2) file types to vnode types and vice versa. + * Aware of numeric order of S_IFMT and vnode type values. + */ +enum vtype iftovt_tab[] = { + VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, + VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON +}; + +u_short vttoif_tab[] = { + 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK +}; + + +void +vn_init(void) +{ + register sv_t *svp; + register int i; + + for (svp = vsync, i = 0; i < NVSYNC; i++, svp++) + init_sv(svp, SV_DEFAULT, "vsy", i); +} + +/* + * Clean a vnode of filesystem-specific data and prepare it for reuse. + */ +STATIC int +vn_reclaim( + struct vnode *vp) +{ + int error; + + XFS_STATS_INC(vn_reclaim); + vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address); + + /* + * Only make the VOP_RECLAIM call if there are behaviors + * to call. + */ + if (vp->v_fbhv) { + VOP_RECLAIM(vp, error); + if (error) + return -error; + } + ASSERT(vp->v_fbhv == NULL); + + VN_LOCK(vp); + vp->v_flag &= (VRECLM|VWAIT); + VN_UNLOCK(vp, 0); + + vp->v_type = VNON; + vp->v_fbhv = NULL; + +#ifdef XFS_VNODE_TRACE + ktrace_free(vp->v_trace); + vp->v_trace = NULL; +#endif + + return 0; +} + +STATIC void +vn_wakeup( + struct vnode *vp) +{ + VN_LOCK(vp); + if (vp->v_flag & VWAIT) + sv_broadcast(vptosync(vp)); + vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED); + VN_UNLOCK(vp, 0); +} + +int +vn_wait( + struct vnode *vp) +{ + VN_LOCK(vp); + if (vp->v_flag & (VINACT | VRECLM)) { + vp->v_flag |= VWAIT; + sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0); + return 1; + } + VN_UNLOCK(vp, 0); + return 0; +} + +struct vnode * +vn_initialize( + struct inode *inode) +{ + struct vnode *vp = LINVFS_GET_VP(inode); + + XFS_STATS_INC(vn_active); + XFS_STATS_INC(vn_alloc); + + vp->v_flag = VMODIFIED; + spinlock_init(&vp->v_lock, "v_lock"); + + spin_lock(&vnumber_lock); + if (!++vn_generation) /* v_number shouldn't be zero */ + vn_generation++; + vp->v_number = vn_generation; + spin_unlock(&vnumber_lock); + + ASSERT(VN_CACHED(vp) == 0); + + /* Initialize the first behavior and the behavior chain head. */ + vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode"); + +#ifdef XFS_VNODE_TRACE + vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP); + printk("Allocated VNODE_TRACE at 0x%p\n", vp->v_trace); +#endif /* XFS_VNODE_TRACE */ + + vn_trace_exit(vp, "vn_initialize", (inst_t *)__return_address); + return vp; +} + +/* + * Get a reference on a vnode. + */ +vnode_t * +vn_get( + struct vnode *vp, + vmap_t *vmap) +{ + struct inode *inode; + + XFS_STATS_INC(vn_get); + inode = LINVFS_GET_IP(vp); + if (inode->i_state & I_FREEING) + return NULL; + + inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino); + if (!inode) /* Inode not present */ + return NULL; + + vn_trace_exit(vp, "vn_get", (inst_t *)__return_address); + + return vp; +} + +/* + * Revalidate the Linux inode from the vnode. + */ +int +vn_revalidate( + struct vnode *vp) +{ + struct inode *inode; + vattr_t va; + int error; + + vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address); + ASSERT(vp->v_fbhv != NULL); + + va.va_mask = XFS_AT_STAT|XFS_AT_XFLAGS; + VOP_GETATTR(vp, &va, 0, NULL, error); + if (!error) { + inode = LINVFS_GET_IP(vp); + inode->i_mode = VTTOIF(va.va_type) | va.va_mode; + inode->i_nlink = va.va_nlink; + inode->i_uid = va.va_uid; + inode->i_gid = va.va_gid; + inode->i_blocks = va.va_nblocks; + inode->i_mtime = va.va_mtime; + inode->i_ctime = va.va_ctime; + inode->i_atime = va.va_atime; + if (va.va_xflags & XFS_XFLAG_IMMUTABLE) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= ~S_IMMUTABLE; + if (va.va_xflags & XFS_XFLAG_APPEND) + inode->i_flags |= S_APPEND; + else + inode->i_flags &= ~S_APPEND; + if (va.va_xflags & XFS_XFLAG_SYNC) + inode->i_flags |= S_SYNC; + else + inode->i_flags &= ~S_SYNC; + if (va.va_xflags & XFS_XFLAG_NOATIME) + inode->i_flags |= S_NOATIME; + else + inode->i_flags &= ~S_NOATIME; + VUNMODIFY(vp); + } + return -error; +} + +/* + * purge a vnode from the cache + * At this point the vnode is guaranteed to have no references (vn_count == 0) + * The caller has to make sure that there are no ways someone could + * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock). + */ +void +vn_purge( + struct vnode *vp, + vmap_t *vmap) +{ + vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address); + +again: + /* + * Check whether vp has already been reclaimed since our caller + * sampled its version while holding a filesystem cache lock that + * its VOP_RECLAIM function acquires. + */ + VN_LOCK(vp); + if (vp->v_number != vmap->v_number) { + VN_UNLOCK(vp, 0); + return; + } + + /* + * If vp is being reclaimed or inactivated, wait until it is inert, + * then proceed. Can't assume that vnode is actually reclaimed + * just because the reclaimed flag is asserted -- a vn_alloc + * reclaim can fail. + */ + if (vp->v_flag & (VINACT | VRECLM)) { + ASSERT(vn_count(vp) == 0); + vp->v_flag |= VWAIT; + sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0); + goto again; + } + + /* + * Another process could have raced in and gotten this vnode... + */ + if (vn_count(vp) > 0) { + VN_UNLOCK(vp, 0); + return; + } + + XFS_STATS_DEC(vn_active); + vp->v_flag |= VRECLM; + VN_UNLOCK(vp, 0); + + /* + * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells + * vp's filesystem to flush and invalidate all cached resources. + * When vn_reclaim returns, vp should have no private data, + * either in a system cache or attached to v_data. + */ + if (vn_reclaim(vp) != 0) + panic("vn_purge: cannot reclaim"); + + /* + * Wakeup anyone waiting for vp to be reclaimed. + */ + vn_wakeup(vp); +} + +/* + * Add a reference to a referenced vnode. + */ +struct vnode * +vn_hold( + struct vnode *vp) +{ + struct inode *inode; + + XFS_STATS_INC(vn_hold); + + VN_LOCK(vp); + inode = igrab(LINVFS_GET_IP(vp)); + ASSERT(inode); + VN_UNLOCK(vp, 0); + + return vp; +} + +/* + * Call VOP_INACTIVE on last reference. + */ +void +vn_rele( + struct vnode *vp) +{ + int vcnt; + int cache; + + XFS_STATS_INC(vn_rele); + + VN_LOCK(vp); + + vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address); + vcnt = vn_count(vp); + + /* + * Since we always get called from put_inode we know + * that i_count won't be decremented after we + * return. + */ + if (!vcnt) { + /* + * As soon as we turn this on, noone can find us in vn_get + * until we turn off VINACT or VRECLM + */ + vp->v_flag |= VINACT; + VN_UNLOCK(vp, 0); + + /* + * Do not make the VOP_INACTIVE call if there + * are no behaviors attached to the vnode to call. + */ + if (vp->v_fbhv) + VOP_INACTIVE(vp, NULL, cache); + + VN_LOCK(vp); + if (vp->v_flag & VWAIT) + sv_broadcast(vptosync(vp)); + + vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED); + } + + VN_UNLOCK(vp, 0); + + vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address); +} + +/* + * Finish the removal of a vnode. + */ +void +vn_remove( + struct vnode *vp) +{ + vmap_t vmap; + + /* Make sure we don't do this to the same vnode twice */ + if (!(vp->v_fbhv)) + return; + + XFS_STATS_INC(vn_remove); + vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address); + + /* + * After the following purge the vnode + * will no longer exist. + */ + VMAP(vp, vmap); + vn_purge(vp, &vmap); +} + + +#ifdef XFS_VNODE_TRACE + +#define KTRACE_ENTER(vp, vk, s, line, ra) \ + ktrace_enter( (vp)->v_trace, \ +/* 0 */ (void *)(__psint_t)(vk), \ +/* 1 */ (void *)(s), \ +/* 2 */ (void *)(__psint_t) line, \ +/* 3 */ (void *)(vn_count(vp)), \ +/* 4 */ (void *)(ra), \ +/* 5 */ (void *)(__psunsigned_t)(vp)->v_flag, \ +/* 6 */ (void *)(__psint_t)smp_processor_id(), \ +/* 7 */ (void *)(__psint_t)(current->pid), \ +/* 8 */ (void *)__return_address, \ +/* 9 */ 0, 0, 0, 0, 0, 0, 0) + +/* + * Vnode tracing code. + */ +void +vn_trace_entry(vnode_t *vp, char *func, inst_t *ra) +{ + KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra); +} + +void +vn_trace_exit(vnode_t *vp, char *func, inst_t *ra) +{ + KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra); +} + +void +vn_trace_hold(vnode_t *vp, char *file, int line, inst_t *ra) +{ + KTRACE_ENTER(vp, VNODE_KTRACE_HOLD, file, line, ra); +} + +void +vn_trace_ref(vnode_t *vp, char *file, int line, inst_t *ra) +{ + KTRACE_ENTER(vp, VNODE_KTRACE_REF, file, line, ra); +} + +void +vn_trace_rele(vnode_t *vp, char *file, int line, inst_t *ra) +{ + KTRACE_ENTER(vp, VNODE_KTRACE_RELE, file, line, ra); +} +#endif /* XFS_VNODE_TRACE */ diff --git a/fs/xfs/linux/xfs_vnode.h b/fs/xfs/linux/xfs_vnode.h new file mode 100644 index 000000000..af0b65fe5 --- /dev/null +++ b/fs/xfs/linux/xfs_vnode.h @@ -0,0 +1,651 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + * + * Portions Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef __XFS_VNODE_H__ +#define __XFS_VNODE_H__ + +struct uio; +struct file; +struct vattr; +struct xfs_iomap; +struct attrlist_cursor_kern; + +/* + * Vnode types. VNON means no type. + */ +enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK }; + +typedef xfs_ino_t vnumber_t; +typedef struct dentry vname_t; +typedef bhv_head_t vn_bhv_head_t; + +/* + * MP locking protocols: + * v_flag, v_vfsp VN_LOCK/VN_UNLOCK + * v_type read-only or fs-dependent + */ +typedef struct vnode { + __u32 v_flag; /* vnode flags (see below) */ + enum vtype v_type; /* vnode type */ + struct vfs *v_vfsp; /* ptr to containing VFS */ + vnumber_t v_number; /* in-core vnode number */ + vn_bhv_head_t v_bh; /* behavior head */ + spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */ + struct inode v_inode; /* Linux inode */ +#ifdef XFS_VNODE_TRACE + struct ktrace *v_trace; /* trace header structure */ +#endif +} vnode_t; + +#define v_fbhv v_bh.bh_first /* first behavior */ +#define v_fops v_bh.bh_first->bd_ops /* first behavior ops */ + +#define VNODE_POSITION_BASE BHV_POSITION_BASE /* chain bottom */ +#define VNODE_POSITION_TOP BHV_POSITION_TOP /* chain top */ +#define VNODE_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */ + +typedef enum { + VN_BHV_UNKNOWN, /* not specified */ + VN_BHV_XFS, /* xfs */ + VN_BHV_DM, /* data migration */ + VN_BHV_QM, /* quota manager */ + VN_BHV_IO, /* IO path */ + VN_BHV_END /* housekeeping end-of-range */ +} vn_bhv_t; + +#define VNODE_POSITION_XFS (VNODE_POSITION_BASE) +#define VNODE_POSITION_DM (VNODE_POSITION_BASE+10) +#define VNODE_POSITION_QM (VNODE_POSITION_BASE+20) +#define VNODE_POSITION_IO (VNODE_POSITION_BASE+30) + +/* + * Macros for dealing with the behavior descriptor inside of the vnode. + */ +#define BHV_TO_VNODE(bdp) ((vnode_t *)BHV_VOBJ(bdp)) +#define BHV_TO_VNODE_NULL(bdp) ((vnode_t *)BHV_VOBJNULL(bdp)) + +#define VN_BHV_HEAD(vp) ((bhv_head_t *)(&((vp)->v_bh))) +#define vn_bhv_head_init(bhp,name) bhv_head_init(bhp,name) +#define vn_bhv_remove(bhp,bdp) bhv_remove(bhp,bdp) +#define vn_bhv_lookup(bhp,ops) bhv_lookup(bhp,ops) +#define vn_bhv_lookup_unlocked(bhp,ops) bhv_lookup_unlocked(bhp,ops) + +/* + * Vnode to Linux inode mapping. + */ +#define LINVFS_GET_VP(inode) ((vnode_t *)list_entry(inode, vnode_t, v_inode)) +#define LINVFS_GET_IP(vp) (&(vp)->v_inode) + +/* + * Convert between vnode types and inode formats (since POSIX.1 + * defines mode word of stat structure in terms of inode formats). + */ +extern enum vtype iftovt_tab[]; +extern u_short vttoif_tab[]; +#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12]) +#define VTTOIF(indx) (vttoif_tab[(int)(indx)]) +#define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) + + +/* + * Vnode flags. + */ +#define VINACT 0x1 /* vnode is being inactivated */ +#define VRECLM 0x2 /* vnode is being reclaimed */ +#define VWAIT 0x4 /* waiting for VINACT/VRECLM to end */ +#define VMODIFIED 0x8 /* XFS inode state possibly differs */ + /* to the Linux inode state. */ + +/* + * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter. + */ +typedef enum vrwlock { + VRWLOCK_NONE, + VRWLOCK_READ, + VRWLOCK_WRITE, + VRWLOCK_WRITE_DIRECT, + VRWLOCK_TRY_READ, + VRWLOCK_TRY_WRITE +} vrwlock_t; + +/* + * Return values for VOP_INACTIVE. A return value of + * VN_INACTIVE_NOCACHE implies that the file system behavior + * has disassociated its state and bhv_desc_t from the vnode. + */ +#define VN_INACTIVE_CACHE 0 +#define VN_INACTIVE_NOCACHE 1 + +/* + * Values for the cmd code given to VOP_VNODE_CHANGE. + */ +typedef enum vchange { + VCHANGE_FLAGS_FRLOCKS = 0, + VCHANGE_FLAGS_ENF_LOCKING = 1, + VCHANGE_FLAGS_TRUNCATED = 2, + VCHANGE_FLAGS_PAGE_DIRTY = 3, + VCHANGE_FLAGS_IOEXCL_COUNT = 4 +} vchange_t; + + +typedef int (*vop_open_t)(bhv_desc_t *, struct cred *); +typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *, + const struct iovec *, unsigned int, + loff_t *, int, struct cred *); +typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *, + const struct iovec *, unsigned int, + loff_t *, int, struct cred *); +typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *, + loff_t *, int, size_t, read_actor_t, + void *, struct cred *); +typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, + int, unsigned int, unsigned long); +typedef int (*vop_getattr_t)(bhv_desc_t *, struct vattr *, int, + struct cred *); +typedef int (*vop_setattr_t)(bhv_desc_t *, struct vattr *, int, + struct cred *); +typedef int (*vop_access_t)(bhv_desc_t *, int, struct cred *); +typedef int (*vop_lookup_t)(bhv_desc_t *, vname_t *, vnode_t **, + int, vnode_t *, struct cred *); +typedef int (*vop_create_t)(bhv_desc_t *, vname_t *, struct vattr *, + vnode_t **, struct cred *); +typedef int (*vop_remove_t)(bhv_desc_t *, vname_t *, struct cred *); +typedef int (*vop_link_t)(bhv_desc_t *, vnode_t *, vname_t *, + struct cred *); +typedef int (*vop_rename_t)(bhv_desc_t *, vname_t *, vnode_t *, vname_t *, + struct cred *); +typedef int (*vop_mkdir_t)(bhv_desc_t *, vname_t *, struct vattr *, + vnode_t **, struct cred *); +typedef int (*vop_rmdir_t)(bhv_desc_t *, vname_t *, struct cred *); +typedef int (*vop_readdir_t)(bhv_desc_t *, struct uio *, struct cred *, + int *); +typedef int (*vop_symlink_t)(bhv_desc_t *, vname_t *, struct vattr *, + char *, vnode_t **, struct cred *); +typedef int (*vop_readlink_t)(bhv_desc_t *, struct uio *, int, + struct cred *); +typedef int (*vop_fsync_t)(bhv_desc_t *, int, struct cred *, + xfs_off_t, xfs_off_t); +typedef int (*vop_inactive_t)(bhv_desc_t *, struct cred *); +typedef int (*vop_fid2_t)(bhv_desc_t *, struct fid *); +typedef int (*vop_release_t)(bhv_desc_t *); +typedef int (*vop_rwlock_t)(bhv_desc_t *, vrwlock_t); +typedef void (*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t); +typedef int (*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int, + struct xfs_iomap *, int *); +typedef int (*vop_reclaim_t)(bhv_desc_t *); +typedef int (*vop_attr_get_t)(bhv_desc_t *, char *, char *, int *, int, + struct cred *); +typedef int (*vop_attr_set_t)(bhv_desc_t *, char *, char *, int, int, + struct cred *); +typedef int (*vop_attr_remove_t)(bhv_desc_t *, char *, int, struct cred *); +typedef int (*vop_attr_list_t)(bhv_desc_t *, char *, int, int, + struct attrlist_cursor_kern *, struct cred *); +typedef void (*vop_link_removed_t)(bhv_desc_t *, vnode_t *, int); +typedef void (*vop_vnode_change_t)(bhv_desc_t *, vchange_t, __psint_t); +typedef void (*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); +typedef void (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int); +typedef int (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, + uint64_t, int); +typedef int (*vop_iflush_t)(bhv_desc_t *, int); + + +typedef struct vnodeops { + bhv_position_t vn_position; /* position within behavior chain */ + vop_open_t vop_open; + vop_read_t vop_read; + vop_write_t vop_write; + vop_sendfile_t vop_sendfile; + vop_ioctl_t vop_ioctl; + vop_getattr_t vop_getattr; + vop_setattr_t vop_setattr; + vop_access_t vop_access; + vop_lookup_t vop_lookup; + vop_create_t vop_create; + vop_remove_t vop_remove; + vop_link_t vop_link; + vop_rename_t vop_rename; + vop_mkdir_t vop_mkdir; + vop_rmdir_t vop_rmdir; + vop_readdir_t vop_readdir; + vop_symlink_t vop_symlink; + vop_readlink_t vop_readlink; + vop_fsync_t vop_fsync; + vop_inactive_t vop_inactive; + vop_fid2_t vop_fid2; + vop_rwlock_t vop_rwlock; + vop_rwunlock_t vop_rwunlock; + vop_bmap_t vop_bmap; + vop_reclaim_t vop_reclaim; + vop_attr_get_t vop_attr_get; + vop_attr_set_t vop_attr_set; + vop_attr_remove_t vop_attr_remove; + vop_attr_list_t vop_attr_list; + vop_link_removed_t vop_link_removed; + vop_vnode_change_t vop_vnode_change; + vop_ptossvp_t vop_tosspages; + vop_pflushinvalvp_t vop_flushinval_pages; + vop_pflushvp_t vop_flush_pages; + vop_release_t vop_release; + vop_iflush_t vop_iflush; +} vnodeops_t; + +/* + * VOP's. + */ +#define _VOP_(op, vp) (*((vnodeops_t *)(vp)->v_fops)->op) + +#define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv) \ + rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr) +#define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv) \ + rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr) +#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \ + rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr) +#define VOP_BMAP(vp,of,sz,rw,b,n,rv) \ + rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n) +#define VOP_OPEN(vp, cr, rv) \ + rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr) +#define VOP_GETATTR(vp, vap, f, cr, rv) \ + rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr) +#define VOP_SETATTR(vp, vap, f, cr, rv) \ + rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr) +#define VOP_ACCESS(vp, mode, cr, rv) \ + rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr) +#define VOP_LOOKUP(vp,d,vpp,f,rdir,cr,rv) \ + rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr) +#define VOP_CREATE(dvp,d,vap,vpp,cr,rv) \ + rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr) +#define VOP_REMOVE(dvp,d,cr,rv) \ + rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr) +#define VOP_LINK(tdvp,fvp,d,cr,rv) \ + rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr) +#define VOP_RENAME(fvp,fnm,tdvp,tnm,cr,rv) \ + rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr) +#define VOP_MKDIR(dp,d,vap,vpp,cr,rv) \ + rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr) +#define VOP_RMDIR(dp,d,cr,rv) \ + rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr) +#define VOP_READDIR(vp,uiop,cr,eofp,rv) \ + rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp) +#define VOP_SYMLINK(dvp,d,vap,tnm,vpp,cr,rv) \ + rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr) +#define VOP_READLINK(vp,uiop,fl,cr,rv) \ + rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,fl,cr) +#define VOP_FSYNC(vp,f,cr,b,e,rv) \ + rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e) +#define VOP_INACTIVE(vp, cr, rv) \ + rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr) +#define VOP_RELEASE(vp, rv) \ + rv = _VOP_(vop_release, vp)((vp)->v_fbhv) +#define VOP_FID2(vp, fidp, rv) \ + rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp) +#define VOP_RWLOCK(vp,i) \ + (void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i) +#define VOP_RWLOCK_TRY(vp,i) \ + _VOP_(vop_rwlock, vp)((vp)->v_fbhv, i) +#define VOP_RWUNLOCK(vp,i) \ + (void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i) +#define VOP_FRLOCK(vp,c,fl,flags,offset,fr,rv) \ + rv = _VOP_(vop_frlock, vp)((vp)->v_fbhv,c,fl,flags,offset,fr) +#define VOP_RECLAIM(vp, rv) \ + rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv) +#define VOP_ATTR_GET(vp, name, val, vallenp, fl, cred, rv) \ + rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred) +#define VOP_ATTR_SET(vp, name, val, vallen, fl, cred, rv) \ + rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred) +#define VOP_ATTR_REMOVE(vp, name, flags, cred, rv) \ + rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred) +#define VOP_ATTR_LIST(vp, buf, buflen, fl, cursor, cred, rv) \ + rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred) +#define VOP_LINK_REMOVED(vp, dvp, linkzero) \ + (void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero) +#define VOP_VNODE_CHANGE(vp, cmd, val) \ + (void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val) +/* + * These are page cache functions that now go thru VOPs. + * 'last' parameter is unused and left in for IRIX compatibility + */ +#define VOP_TOSS_PAGES(vp, first, last, fiopt) \ + _VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt) +/* + * 'last' parameter is unused and left in for IRIX compatibility + */ +#define VOP_FLUSHINVAL_PAGES(vp, first, last, fiopt) \ + _VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt) +/* + * 'last' parameter is unused and left in for IRIX compatibility + */ +#define VOP_FLUSH_PAGES(vp, first, last, flags, fiopt, rv) \ + rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt) +#define VOP_IOCTL(vp, inode, filp, fl, cmd, arg, rv) \ + rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,fl,cmd,arg) +#define VOP_IFLUSH(vp, flags, rv) \ + rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags) + +/* + * Flags for read/write calls - same values as IRIX + */ +#define IO_ISDIRECT 0x00004 /* bypass page cache */ +#define IO_INVIS 0x00020 /* don't update inode timestamps */ + +/* + * Flags for VOP_IFLUSH call + */ +#define FLUSH_SYNC 1 /* wait for flush to complete */ +#define FLUSH_INODE 2 /* flush the inode itself */ +#define FLUSH_LOG 4 /* force the last log entry for + * this inode out to disk */ + +/* + * Flush/Invalidate options for VOP_TOSS_PAGES, VOP_FLUSHINVAL_PAGES and + * VOP_FLUSH_PAGES. + */ +#define FI_NONE 0 /* none */ +#define FI_REMAPF 1 /* Do a remapf prior to the operation */ +#define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation. + Prevent VM access to the pages until + the operation completes. */ + +/* + * Vnode attributes. va_mask indicates those attributes the caller + * wants to set or extract. + */ +typedef struct vattr { + int va_mask; /* bit-mask of attributes present */ + enum vtype va_type; /* vnode type (for create) */ + mode_t va_mode; /* file access mode and type */ + nlink_t va_nlink; /* number of references to file */ + uid_t va_uid; /* owner user id */ + gid_t va_gid; /* owner group id */ + xfs_ino_t va_nodeid; /* file id */ + xfs_off_t va_size; /* file size in bytes */ + u_long va_blocksize; /* blocksize preferred for i/o */ + struct timespec va_atime; /* time of last access */ + struct timespec va_mtime; /* time of last modification */ + struct timespec va_ctime; /* time file changed */ + u_int va_gen; /* generation number of file */ + xfs_dev_t va_rdev; /* device the special file represents */ + __int64_t va_nblocks; /* number of blocks allocated */ + u_long va_xflags; /* random extended file flags */ + u_long va_extsize; /* file extent size */ + u_long va_nextents; /* number of extents in file */ + u_long va_anextents; /* number of attr extents in file */ + int va_projid; /* project id */ +} vattr_t; + +/* + * setattr or getattr attributes + */ +#define XFS_AT_TYPE 0x00000001 +#define XFS_AT_MODE 0x00000002 +#define XFS_AT_UID 0x00000004 +#define XFS_AT_GID 0x00000008 +#define XFS_AT_FSID 0x00000010 +#define XFS_AT_NODEID 0x00000020 +#define XFS_AT_NLINK 0x00000040 +#define XFS_AT_SIZE 0x00000080 +#define XFS_AT_ATIME 0x00000100 +#define XFS_AT_MTIME 0x00000200 +#define XFS_AT_CTIME 0x00000400 +#define XFS_AT_RDEV 0x00000800 +#define XFS_AT_BLKSIZE 0x00001000 +#define XFS_AT_NBLOCKS 0x00002000 +#define XFS_AT_VCODE 0x00004000 +#define XFS_AT_MAC 0x00008000 +#define XFS_AT_UPDATIME 0x00010000 +#define XFS_AT_UPDMTIME 0x00020000 +#define XFS_AT_UPDCTIME 0x00040000 +#define XFS_AT_ACL 0x00080000 +#define XFS_AT_CAP 0x00100000 +#define XFS_AT_INF 0x00200000 +#define XFS_AT_XFLAGS 0x00400000 +#define XFS_AT_EXTSIZE 0x00800000 +#define XFS_AT_NEXTENTS 0x01000000 +#define XFS_AT_ANEXTENTS 0x02000000 +#define XFS_AT_PROJID 0x04000000 +#define XFS_AT_SIZE_NOPERM 0x08000000 +#define XFS_AT_GENCOUNT 0x10000000 + +#define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ + XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ + XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\ + XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\ + XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\ + XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT) + +#define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ + XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ + XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\ + XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID) + +#define XFS_AT_TIMES (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME) + +#define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME) + +#define XFS_AT_NOSET (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\ + XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\ + XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT) + +/* + * Modes. + */ +#define VSUID S_ISUID /* set user id on execution */ +#define VSGID S_ISGID /* set group id on execution */ +#define VSVTX S_ISVTX /* save swapped text even after use */ +#define VREAD S_IRUSR /* read, write, execute permissions */ +#define VWRITE S_IWUSR +#define VEXEC S_IXUSR + +#define MODEMASK S_IALLUGO /* mode bits plus permission bits */ + +/* + * Check whether mandatory file locking is enabled. + */ +#define MANDLOCK(vp, mode) \ + ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID) + +extern void vn_init(void); +extern int vn_wait(struct vnode *); +extern vnode_t *vn_initialize(struct inode *); + +/* + * Acquiring and invalidating vnodes: + * + * if (vn_get(vp, version, 0)) + * ...; + * vn_purge(vp, version); + * + * vn_get and vn_purge must be called with vmap_t arguments, sampled + * while a lock that the vnode's VOP_RECLAIM function acquires is + * held, to ensure that the vnode sampled with the lock held isn't + * recycled (VOP_RECLAIMed) or deallocated between the release of the lock + * and the subsequent vn_get or vn_purge. + */ + +/* + * vnode_map structures _must_ match vn_epoch and vnode structure sizes. + */ +typedef struct vnode_map { + vfs_t *v_vfsp; + vnumber_t v_number; /* in-core vnode number */ + xfs_ino_t v_ino; /* inode # */ +} vmap_t; + +#define VMAP(vp, vmap) {(vmap).v_vfsp = (vp)->v_vfsp, \ + (vmap).v_number = (vp)->v_number, \ + (vmap).v_ino = (vp)->v_inode.i_ino; } + +extern void vn_purge(struct vnode *, vmap_t *); +extern vnode_t *vn_get(struct vnode *, vmap_t *); +extern int vn_revalidate(struct vnode *); +extern void vn_remove(struct vnode *); + +static inline int vn_count(struct vnode *vp) +{ + return atomic_read(&LINVFS_GET_IP(vp)->i_count); +} + +/* + * Vnode reference counting functions (and macros for compatibility). + */ +extern vnode_t *vn_hold(struct vnode *); +extern void vn_rele(struct vnode *); + +#if defined(XFS_VNODE_TRACE) +#define VN_HOLD(vp) \ + ((void)vn_hold(vp), \ + vn_trace_hold(vp, __FILE__, __LINE__, (inst_t *)__return_address)) +#define VN_RELE(vp) \ + (vn_trace_rele(vp, __FILE__, __LINE__, (inst_t *)__return_address), \ + iput(LINVFS_GET_IP(vp))) +#else +#define VN_HOLD(vp) ((void)vn_hold(vp)) +#define VN_RELE(vp) (iput(LINVFS_GET_IP(vp))) +#endif + +/* + * Vname handling macros. + */ +#define VNAME(dentry) ((char *) (dentry)->d_name.name) +#define VNAMELEN(dentry) ((dentry)->d_name.len) +#define VNAME_TO_VNODE(dentry) (LINVFS_GET_VP((dentry)->d_inode)) + +/* + * Vnode spinlock manipulation. + */ +#define VN_LOCK(vp) mutex_spinlock(&(vp)->v_lock) +#define VN_UNLOCK(vp, s) mutex_spinunlock(&(vp)->v_lock, s) +#define VN_FLAGSET(vp,b) vn_flagset(vp,b) +#define VN_FLAGCLR(vp,b) vn_flagclr(vp,b) + +static __inline__ void vn_flagset(struct vnode *vp, uint flag) +{ + spin_lock(&vp->v_lock); + vp->v_flag |= flag; + spin_unlock(&vp->v_lock); +} + +static __inline__ void vn_flagclr(struct vnode *vp, uint flag) +{ + spin_lock(&vp->v_lock); + vp->v_flag &= ~flag; + spin_unlock(&vp->v_lock); +} + +/* + * Update modify/access/change times on the vnode + */ +#define VN_MTIMESET(vp, tvp) (LINVFS_GET_IP(vp)->i_mtime = *(tvp)) +#define VN_ATIMESET(vp, tvp) (LINVFS_GET_IP(vp)->i_atime = *(tvp)) +#define VN_CTIMESET(vp, tvp) (LINVFS_GET_IP(vp)->i_ctime = *(tvp)) + +/* + * Some useful predicates. + */ +#define VN_MAPPED(vp) mapping_mapped(LINVFS_GET_IP(vp)->i_mapping) +#define VN_CACHED(vp) (LINVFS_GET_IP(vp)->i_mapping->nrpages) +#define VN_DIRTY(vp) mapping_tagged(LINVFS_GET_IP(vp)->i_mapping, \ + PAGECACHE_TAG_DIRTY) +#define VMODIFY(vp) VN_FLAGSET(vp, VMODIFIED) +#define VUNMODIFY(vp) VN_FLAGCLR(vp, VMODIFIED) + +/* + * Flags to VOP_SETATTR/VOP_GETATTR. + */ +#define ATTR_UTIME 0x01 /* non-default utime(2) request */ +#define ATTR_DMI 0x08 /* invocation from a DMI function */ +#define ATTR_LAZY 0x80 /* set/get attributes lazily */ +#define ATTR_NONBLOCK 0x100 /* return EAGAIN if operation would block */ + +/* + * Flags to VOP_FSYNC and VOP_RECLAIM. + */ +#define FSYNC_NOWAIT 0 /* asynchronous flush */ +#define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */ +#define FSYNC_INVAL 0x2 /* flush and invalidate cached data */ +#define FSYNC_DATA 0x4 /* synchronous fsync of data only */ + +/* + * Tracking vnode activity. + */ +#if defined(XFS_VNODE_TRACE) + +#define VNODE_TRACE_SIZE 16 /* number of trace entries */ +#define VNODE_KTRACE_ENTRY 1 +#define VNODE_KTRACE_EXIT 2 +#define VNODE_KTRACE_HOLD 3 +#define VNODE_KTRACE_REF 4 +#define VNODE_KTRACE_RELE 5 + +extern void vn_trace_entry(struct vnode *, char *, inst_t *); +extern void vn_trace_exit(struct vnode *, char *, inst_t *); +extern void vn_trace_hold(struct vnode *, char *, int, inst_t *); +extern void vn_trace_ref(struct vnode *, char *, int, inst_t *); +extern void vn_trace_rele(struct vnode *, char *, int, inst_t *); + +#define VN_TRACE(vp) \ + vn_trace_ref(vp, __FILE__, __LINE__, (inst_t *)__return_address) +#else +#define vn_trace_entry(a,b,c) +#define vn_trace_exit(a,b,c) +#define vn_trace_hold(a,b,c,d) +#define vn_trace_ref(a,b,c,d) +#define vn_trace_rele(a,b,c,d) +#define VN_TRACE(vp) +#endif + +#endif /* __XFS_VNODE_H__ */ diff --git a/include/asm-alpha/rmap.h b/include/asm-alpha/rmap.h new file mode 100644 index 000000000..08b2236ef --- /dev/null +++ b/include/asm-alpha/rmap.h @@ -0,0 +1,7 @@ +#ifndef _ALPHA_RMAP_H +#define _ALPHA_RMAP_H + +/* nothing to see, move along */ +#include + +#endif diff --git a/include/asm-arm/arch-cl7500/ide.h b/include/asm-arm/arch-cl7500/ide.h new file mode 100644 index 000000000..78f97a3b2 --- /dev/null +++ b/include/asm-arm/arch-cl7500/ide.h @@ -0,0 +1,50 @@ +/* + * linux/include/asm-arm/arch-cl7500/ide.h + * + * Copyright (c) 1997 Russell King + * + * Modifications: + * 29-07-1998 RMK Major re-work of IDE architecture specific code + */ +#include +#include + +/* + * Set up a hw structure for a specified data port, control port and IRQ. + * This should follow whatever the default interface uses. + */ +static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, + unsigned long ctrl_port, int *irq) +{ + unsigned long reg = data_port; + int i; + + memset(hw, 0, sizeof(*hw)); + + for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { + hw->io_ports[i] = reg; + reg += 1; + } + if (ctrl_port) { + hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; + } else { + hw->io_ports[IDE_CONTROL_OFFSET] = data_port + 0x206; + } + if (irq != NULL) + *irq = 0; + hw->io_ports[IDE_IRQ_OFFSET] = 0; +} + +/* + * This registers the standard ports for this architecture with the IDE + * driver. + */ +static __inline__ void +ide_init_default_hwifs(void) +{ + hw_regs_t hw; + + ide_init_hwif_ports(&hw, ISASLOT_IO + 0x1f0, ISASLOT_IO + 0x3f6, NULL); + hw.irq = IRQ_ISA_14; + ide_register_hw(&hw); +} diff --git a/include/asm-arm/arch-cl7500/keyboard.h b/include/asm-arm/arch-cl7500/keyboard.h new file mode 100644 index 000000000..660b31a0e --- /dev/null +++ b/include/asm-arm/arch-cl7500/keyboard.h @@ -0,0 +1,16 @@ +/* + * linux/include/asm-arm/arch-cl7500/keyboard.h + * from linux/include/asm-arm/arch-rpc/keyboard.h + * + * Keyboard driver definitions for CL7500 architecture + * + * Copyright (C) 1998-2001 Russell King + */ +#include +#define NR_SCANCODES 128 + +extern int ps2kbd_init_hw(void); + +#define kbd_disable_irq() disable_irq(IRQ_KEYBOARDRX) +#define kbd_enable_irq() enable_irq(IRQ_KEYBOARDRX) +#define kbd_init_hw() ps2kbd_init_hw() diff --git a/include/asm-arm/arch-clps711x/keyboard.h b/include/asm-arm/arch-clps711x/keyboard.h new file mode 100644 index 000000000..30ab2199f --- /dev/null +++ b/include/asm-arm/arch-clps711x/keyboard.h @@ -0,0 +1,26 @@ +/* + * linux/include/asm-arm/arch-clps711x/keyboard.h + * + * Copyright (C) 1998-2001 Russell King + */ +#include + +#define NR_SCANCODES 128 + +#define kbd_disable_irq() do { } while (0) +#define kbd_enable_irq() do { } while (0) + +/* + * EDB7211 keyboard driver + */ +extern void edb7211_kbd_init_hw(void); +extern void clps711x_kbd_init_hw(void); + +static inline void kbd_init_hw(void) +{ + if (machine_is_edb7211()) + edb7211_kbd_init_hw(); + + if (machine_is_autcpu12()) + clps711x_kbd_init_hw(); +} diff --git a/include/asm-arm/arch-ebsa110/ide.h b/include/asm-arm/arch-ebsa110/ide.h new file mode 100644 index 000000000..35eff5c28 --- /dev/null +++ b/include/asm-arm/arch-ebsa110/ide.h @@ -0,0 +1 @@ +/* no ide */ diff --git a/include/asm-arm/arch-ebsa285/ide.h b/include/asm-arm/arch-ebsa285/ide.h new file mode 100644 index 000000000..09c0310b6 --- /dev/null +++ b/include/asm-arm/arch-ebsa285/ide.h @@ -0,0 +1,49 @@ +/* + * linux/include/asm-arm/arch-ebsa285/ide.h + * + * Copyright (C) 1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Modifications: + * 29-07-1998 RMK Major re-work of IDE architecture specific code + */ +#include + +/* + * Set up a hw structure for a specified data port, control port and IRQ. + * This should follow whatever the default interface uses. + */ +static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, + unsigned long ctrl_port, int *irq) +{ + unsigned long reg = data_port; + int i; + + for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { + hw->io_ports[i] = reg; + reg += 1; + } + hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; + if (irq) + *irq = 0; +} + +/* + * This registers the standard ports for this architecture with the IDE + * driver. + */ +static __inline__ void ide_init_default_hwifs(void) +{ +#if 0 + hw_regs_t hw; + + memset(hw, 0, sizeof(*hw)); + + ide_init_hwif_ports(&hw, 0x1f0, 0x3f6, NULL); + hw.irq = IRQ_HARDDISK; + ide_register_hw(&hw); +#endif +} diff --git a/include/asm-arm/arch-iop3xx/ide.h b/include/asm-arm/arch-iop3xx/ide.h new file mode 100644 index 000000000..c2b0265dd --- /dev/null +++ b/include/asm-arm/arch-iop3xx/ide.h @@ -0,0 +1,49 @@ +/* + * include/asm-arm/arch-iop3xx/ide.h + * + * Generic IDE functions for IOP310 systems + * + * Author: Deepak Saxena + * + * Copyright 2001 MontaVista Software Inc. + * + * 09/26/2001 - Sharon Baartmans + * Fixed so it actually works. + */ + +#ifndef _ASM_ARCH_IDE_H_ +#define _ASM_ARCH_IDE_H_ + +/* + * Set up a hw structure for a specified data port, control port and IRQ. + * This should follow whatever the default interface uses. + */ +static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, + unsigned long ctrl_port, int *irq) +{ + unsigned long reg = data_port; + int i; + int regincr = 1; + + memset(hw, 0, sizeof(*hw)); + + for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { + hw->io_ports[i] = reg; + reg += regincr; + } + + hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; + + if (irq) *irq = 0; +} + +/* + * This registers the standard ports for this architecture with the IDE + * driver. + */ +static __inline__ void ide_init_default_hwifs(void) +{ + /* There are no standard ports */ +} + +#endif diff --git a/include/asm-arm/arch-l7200/ide.h b/include/asm-arm/arch-l7200/ide.h new file mode 100644 index 000000000..62ee12ada --- /dev/null +++ b/include/asm-arm/arch-l7200/ide.h @@ -0,0 +1,27 @@ +/* + * linux/include/asm-arm/arch-l7200/ide.h + * + * Copyright (c) 2000 Steve Hill (sjhill@cotw.com) + * + * Changelog: + * 03-29-2000 SJH Created file placeholder + */ +#include + +/* + * Set up a hw structure for a specified data port, control port and IRQ. + * This should follow whatever the default interface uses. + */ +static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, + unsigned long ctrl_port, int *irq) +{ +} + +/* + * This registers the standard ports for this architecture with the IDE + * driver. + */ +static __inline__ void +ide_init_default_hwifs(void) +{ +} diff --git a/include/asm-arm/arch-l7200/keyboard.h b/include/asm-arm/arch-l7200/keyboard.h new file mode 100644 index 000000000..6628bd381 --- /dev/null +++ b/include/asm-arm/arch-l7200/keyboard.h @@ -0,0 +1,51 @@ +/* + * linux/include/asm-arm/arch-l7200/keyboard.h + * + * Keyboard driver definitions for LinkUp Systems L7200 architecture + * + * Copyright (C) 2000 Scott A McConnell (samcconn@cotw.com) + * Steve Hill (sjhill@cotw.com) + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive for + * more details. + * + * Changelog: + * 07-18-2000 SAM Created file + * 07-28-2000 SJH Complete rewrite + */ + +#include + +#error This needs fixing --rmk + +/* + * Layout of L7200 keyboard registers + */ +struct KBD_Port { + unsigned int KBDR; + unsigned int KBDMR; + unsigned int KBSBSR; + unsigned int Reserved; + unsigned int KBKSR; +}; + +#define KBD_BASE IO_BASE_2 + 0x4000 +#define l7200kbd_hwregs ((volatile struct KBD_Port *) (KBD_BASE)) + +extern void l7200kbd_init_hw(void); +extern int l7200kbd_translate(unsigned char scancode, unsigned char *keycode, + char raw_mode); + +#define kbd_setkeycode(sc,kc) (-EINVAL) +#define kbd_getkeycode(sc) (-EINVAL) + +#define kbd_translate(sc, kcp, rm) ({ *(kcp) = (sc); 1; }) +#define kbd_unexpected_up(kc) (0200) +#define kbd_leds(leds) do {} while (0) +#define kbd_init_hw() l7200kbd_init_hw() +#define kbd_sysrq_xlate ((unsigned char *)NULL) +#define kbd_disable_irq() disable_irq(IRQ_GCTC2) +#define kbd_enable_irq() enable_irq(IRQ_GCTC2) + +#define SYSRQ_KEY 13 diff --git a/include/asm-arm/arch-nexuspci/ide.h b/include/asm-arm/arch-nexuspci/ide.h new file mode 100644 index 000000000..5514808d5 --- /dev/null +++ b/include/asm-arm/arch-nexuspci/ide.h @@ -0,0 +1,37 @@ +/* + * linux/include/asm-arm/arch-nexuspci/ide.h + * + * Copyright (c) 1998 Russell King + * + * Modifications: + * 29-07-1998 RMK Major re-work of IDE architecture specific code + */ +#include + +/* + * Set up a hw structure for a specified data port, control port and IRQ. + * This should follow whatever the default interface uses. + */ +static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, + unsigned long ctrl_port, int *irq) +{ + unsigned long reg = data_port; + int i; + + for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { + hw->io_ports[i] = reg; + reg += 1; + } + hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; + if (irq) + *irq = 0; +} + +/* + * This registers the standard ports for this architecture with the IDE + * driver. + */ +static __inline__ void ide_init_default_hwifs(void) +{ + /* There are no standard ports */ +} diff --git a/include/asm-arm/arch-pxa/ide.h b/include/asm-arm/arch-pxa/ide.h new file mode 100644 index 000000000..a9efdce2b --- /dev/null +++ b/include/asm-arm/arch-pxa/ide.h @@ -0,0 +1,54 @@ +/* + * linux/include/asm-arm/arch-pxa/ide.h + * + * Author: George Davis + * Created: Jan 10, 2002 + * Copyright: MontaVista Software Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * + * Originally based upon linux/include/asm-arm/arch-sa1100/ide.h + * + */ + +#include +#include +#include + + +/* + * Set up a hw structure for a specified data port, control port and IRQ. + * This should follow whatever the default interface uses. + */ +static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, + unsigned long ctrl_port, int *irq) +{ + unsigned long reg = data_port; + int i; + int regincr = 1; + + memset(hw, 0, sizeof(*hw)); + + for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { + hw->io_ports[i] = reg; + reg += regincr; + } + + hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; + + if (irq) + *irq = 0; +} + + +/* + * Register the standard ports for this architecture with the IDE driver. + */ +static __inline__ void +ide_init_default_hwifs(void) +{ + /* Nothing to declare... */ +} diff --git a/include/asm-arm/arch-pxa/keyboard.h b/include/asm-arm/arch-pxa/keyboard.h new file mode 100644 index 000000000..7bec3179b --- /dev/null +++ b/include/asm-arm/arch-pxa/keyboard.h @@ -0,0 +1,28 @@ +/* + * linux/include/asm-arm/arch-pxa/keyboard.h + * + * This file contains the architecture specific keyboard definitions + */ + +#ifndef _PXA_KEYBOARD_H +#define _PXA_KEYBOARD_H + +#include +#include + +extern struct kbd_ops_struct *kbd_ops; + +#define kbd_disable_irq() do { } while(0); +#define kbd_enable_irq() do { } while(0); + +extern int sa1111_kbd_init_hw(void); + +static inline void kbd_init_hw(void) +{ + if (machine_is_lubbock()) + sa1111_kbd_init_hw(); +} + + +#endif /* _PXA_KEYBOARD_H */ + diff --git a/include/asm-arm/arch-rpc/ide.h b/include/asm-arm/arch-rpc/ide.h new file mode 100644 index 000000000..92c7030ab --- /dev/null +++ b/include/asm-arm/arch-rpc/ide.h @@ -0,0 +1,48 @@ +/* + * linux/include/asm-arm/arch-rpc/ide.h + * + * Copyright (C) 1997 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Modifications: + * 29-07-1998 RMK Major re-work of IDE architecture specific code + */ +#include + +/* + * Set up a hw structure for a specified data port, control port and IRQ. + * This should follow whatever the default interface uses. + */ +static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, + unsigned long ctrl_port, int *irq) +{ + unsigned long reg = data_port; + int i; + + memset(hw, 0, sizeof(*hw)); + + for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { + hw->io_ports[i] = reg; + reg += 1; + } + hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; + if (irq) + *irq = 0; +} + +/* + * This registers the standard ports for this architecture with the IDE + * driver. + */ +static __inline__ void +ide_init_default_hwifs(void) +{ + hw_regs_t hw; + + ide_init_hwif_ports(&hw, 0x1f0, 0x3f6, NULL); + hw.irq = IRQ_HARDDISK; + ide_register_hw(&hw, NULL); +} diff --git a/include/asm-arm/arch-s3c2410/ide.h b/include/asm-arm/arch-s3c2410/ide.h new file mode 100644 index 000000000..de651e75d --- /dev/null +++ b/include/asm-arm/arch-s3c2410/ide.h @@ -0,0 +1,49 @@ +/* linux/include/asm-arm/arch-s3c2410/ide.h + * + * Copyright (C) 1997 Russell King + * Copyright (C) 2003 Simtec Electronics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Modifications: + * 29-07-1998 RMK Major re-work of IDE architecture specific code + * 16-05-2003 BJD Changed to work with BAST IDE ports + * 04-09-2003 BJD Modifications for V2.6 + */ + +#ifndef __ASM_ARCH_IDE_H +#define __ASM_ARCH_IDE_H + +#include + +/* + * Set up a hw structure for a specified data port, control port and IRQ. + * This should follow whatever the default interface uses. + */ + +static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, + unsigned long ctrl_port, int *irq) +{ + unsigned long reg = data_port; + int i; + + memset(hw, 0, sizeof(*hw)); + + for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { + hw->io_ports[i] = reg; + reg += 1; + } + hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; + if (irq) + *irq = 0; +} + +/* we initialise our ide devices from the main ide core, due to problems + * with doing it in this function +*/ + +#define ide_init_default_hwifs() do { } while(0) + +#endif /* __ASM_ARCH_IDE_H */ diff --git a/include/asm-arm/arch-sa1100/keyboard.h b/include/asm-arm/arch-sa1100/keyboard.h new file mode 100644 index 000000000..3dacd71d9 --- /dev/null +++ b/include/asm-arm/arch-sa1100/keyboard.h @@ -0,0 +1,23 @@ +/* + * linux/include/asm-arm/arch-sa1100/keyboard.h + * Created 16 Dec 1999 by Nicolas Pitre + * This file contains the SA1100 architecture specific keyboard definitions + */ +#ifndef _SA1100_KEYBOARD_H +#define _SA1100_KEYBOARD_H + +#include +#include + +extern void gc_kbd_init_hw(void); +extern void smartio_kbd_init_hw(void); + +static inline void kbd_init_hw(void) +{ + if (machine_is_graphicsclient()) + gc_kbd_init_hw(); + if (machine_is_adsbitsy()) + smartio_kbd_init_hw(); +} + +#endif /* _SA1100_KEYBOARD_H */ diff --git a/include/asm-arm/arch-shark/ide.h b/include/asm-arm/arch-shark/ide.h new file mode 100644 index 000000000..f6a99b22f --- /dev/null +++ b/include/asm-arm/arch-shark/ide.h @@ -0,0 +1,47 @@ +/* + * linux/include/asm-arm/arch-shark/ide.h + * + * by Alexander Schulz + * + * derived from: + * linux/include/asm-arm/arch-ebsa285/ide.h + * Copyright (c) 1998 Russell King + */ + +#include + +/* + * Set up a hw structure for a specified data port, control port and IRQ. + * This should follow whatever the default interface uses. + */ +static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, + unsigned long ctrl_port, int *irq) +{ + unsigned long reg = data_port; + int i; + + memset(hw, 0, sizeof(*hw)); + + for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { + hw->io_ports[i] = reg; + reg += 1; + } + hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; + if (irq) + *irq = 0; +} + +/* + * This registers the standard ports for this architecture with the IDE + * driver. + */ +static __inline__ void +ide_init_default_hwifs(void) +{ + hw_regs_t hw; + + ide_init_hwif_ports(&hw, 0x1f0, 0x3f6, NULL); + hw.irq = 14; + ide_register_hw(&hw,NULL); +} + diff --git a/include/asm-arm/arch-shark/keyboard.h b/include/asm-arm/arch-shark/keyboard.h new file mode 100644 index 000000000..52b5ed6e1 --- /dev/null +++ b/include/asm-arm/arch-shark/keyboard.h @@ -0,0 +1,68 @@ +/* + * linux/include/asm-arm/arch-shark/keyboard.h + * by Alexander Schulz + * + * Derived from linux/include/asm-arm/arch-ebsa285/keyboard.h + * (C) 1998 Russell King + * (C) 1998 Phil Blundell + */ +#include +#include +#include +#include +#include + +#define KEYBOARD_IRQ IRQ_ISA_KEYBOARD +#define NR_SCANCODES 128 + +#define kbd_disable_irq() do { } while (0) +#define kbd_enable_irq() do { } while (0) + +extern int pckbd_setkeycode(unsigned int scancode, unsigned int keycode); +extern int pckbd_getkeycode(unsigned int scancode); +extern int pckbd_translate(unsigned char scancode, unsigned char *keycode, + char raw_mode); +extern char pckbd_unexpected_up(unsigned char keycode); +extern void pckbd_leds(unsigned char leds); +extern void pckbd_init_hw(void); +extern unsigned char pckbd_sysrq_xlate[128]; + +static inline void kbd_init_hw(void) +{ + k_setkeycode = pckbd_setkeycode; + k_getkeycode = pckbd_getkeycode; + k_translate = pckbd_translate; + k_unexpected_up = pckbd_unexpected_up; + k_leds = pckbd_leds; +#ifdef CONFIG_MAGIC_SYSRQ + k_sysrq_key = 0x54; + k_sysrq_xlate = pckbd_sysrq_xlate; +#endif + pckbd_init_hw(); +} + +/* + * PC Keyboard specifics + */ + +/* resource allocation */ +#define kbd_request_region() request_region(0x60, 16, "keyboard") +#define kbd_request_irq(handler) request_irq(KEYBOARD_IRQ, handler, 0, \ + "keyboard", NULL) + +/* How to access the keyboard macros on this platform. */ +#define kbd_read_input() inb(KBD_DATA_REG) +#define kbd_read_status() inb(KBD_STATUS_REG) +#define kbd_write_output(val) outb(val, KBD_DATA_REG) +#define kbd_write_command(val) outb(val, KBD_CNTL_REG) + +/* Some stoneage hardware needs delays after some operations. */ +#define kbd_pause() do { } while(0) + +/* + * Machine specific bits for the PS/2 driver + */ +#define aux_request_irq(hand, dev_id) \ + request_irq(AUX_IRQ, hand, SA_SHIRQ, "PS/2 Mouse", dev_id) + +#define aux_free_irq(dev_id) free_irq(AUX_IRQ, dev_id) diff --git a/include/asm-arm/arch-tbox/ide.h b/include/asm-arm/arch-tbox/ide.h new file mode 100644 index 000000000..d66e67c94 --- /dev/null +++ b/include/asm-arm/arch-tbox/ide.h @@ -0,0 +1,3 @@ +/* + * linux/include/asm-arm/arch-tbox/ide.h + */ diff --git a/include/asm-arm/rmap.h b/include/asm-arm/rmap.h new file mode 100644 index 000000000..bb9ee93c1 --- /dev/null +++ b/include/asm-arm/rmap.h @@ -0,0 +1,6 @@ +#ifndef _ARM_RMAP_H +#define _ARM_RMAP_H + +#include + +#endif /* _ARM_RMAP_H */ diff --git a/include/asm-arm26/rmap.h b/include/asm-arm26/rmap.h new file mode 100644 index 000000000..6d5b6e092 --- /dev/null +++ b/include/asm-arm26/rmap.h @@ -0,0 +1,66 @@ +#ifndef _ARM_RMAP_H +#define _ARM_RMAP_H + +/* + * linux/include/asm-arm26/proc-armv/rmap.h + * + * Architecture dependant parts of the reverse mapping code, + * + * ARM is different since hardware page tables are smaller than + * the page size and Linux uses a "duplicate" one with extra info. + * For rmap this means that the first 2 kB of a page are the hardware + * page tables and the last 2 kB are the software page tables. + */ + +static inline void pgtable_add_rmap(struct page *page, struct mm_struct * mm, unsigned long address) +{ + page->mapping = (void *)mm; + page->index = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1); + inc_page_state(nr_page_table_pages); +} + +static inline void pgtable_remove_rmap(struct page *page) +{ + page->mapping = NULL; + page->index = 0; + dec_page_state(nr_page_table_pages); +} + +static inline struct mm_struct * ptep_to_mm(pte_t * ptep) +{ + struct page * page = virt_to_page(ptep); + return (struct mm_struct *)page->mapping; +} + +/* The page table takes half of the page */ +#define PTE_MASK ((PAGE_SIZE / 2) - 1) + +static inline unsigned long ptep_to_address(pte_t * ptep) +{ + struct page * page = virt_to_page(ptep); + unsigned long low_bits; + + low_bits = ((unsigned long)ptep & PTE_MASK) * PTRS_PER_PTE; + return page->index + low_bits; +} + +//FIXME!!! IS these correct? +static inline pte_addr_t ptep_to_paddr(pte_t *ptep) +{ + return (pte_addr_t)ptep; +} + +static inline pte_t *rmap_ptep_map(pte_addr_t pte_paddr) +{ + return (pte_t *)pte_paddr; +} + +static inline void rmap_ptep_unmap(pte_t *pte) +{ + return; +} + + +//#include + +#endif /* _ARM_RMAP_H */ diff --git a/include/asm-cris/rmap.h b/include/asm-cris/rmap.h new file mode 100644 index 000000000..c5bf2a811 --- /dev/null +++ b/include/asm-cris/rmap.h @@ -0,0 +1,7 @@ +#ifndef _CRIS_RMAP_H +#define _CRIS_RMAP_H + +/* nothing to see, move along :) */ +#include + +#endif diff --git a/include/asm-generic/rmap.h b/include/asm-generic/rmap.h new file mode 100644 index 000000000..f743d9f80 --- /dev/null +++ b/include/asm-generic/rmap.h @@ -0,0 +1,91 @@ +#ifndef _GENERIC_RMAP_H +#define _GENERIC_RMAP_H +/* + * linux/include/asm-generic/rmap.h + * + * Architecture dependent parts of the reverse mapping code, + * this version should work for most architectures with a + * 'normal' page table layout. + * + * We use the struct page of the page table page to find out + * the process and full address of a page table entry: + * - page->mapping points to the process' mm_struct + * - page->index has the high bits of the address + * - the lower bits of the address are calculated from the + * offset of the page table entry within the page table page + * + * For CONFIG_HIGHPTE, we need to represent the address of a pte in a + * scalar pte_addr_t. The pfn of the pte's page is shifted left by PAGE_SIZE + * bits and is then ORed with the byte offset of the pte within its page. + * + * For CONFIG_HIGHMEM4G, the pte_addr_t is 32 bits. 20 for the pfn, 12 for + * the offset. + * + * For CONFIG_HIGHMEM64G, the pte_addr_t is 64 bits. 52 for the pfn, 12 for + * the offset. + */ +#include + +static inline void pgtable_add_rmap(struct page * page, struct mm_struct * mm, unsigned long address) +{ +#ifdef BROKEN_PPC_PTE_ALLOC_ONE + /* OK, so PPC calls pte_alloc() before mem_map[] is setup ... ;( */ + extern int mem_init_done; + + if (!mem_init_done) + return; +#endif + page->mapping = (void *)mm; + page->index = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1); + inc_page_state(nr_page_table_pages); +} + +static inline void pgtable_remove_rmap(struct page * page) +{ + page->mapping = NULL; + page->index = 0; + dec_page_state(nr_page_table_pages); +} + +static inline struct mm_struct * ptep_to_mm(pte_t * ptep) +{ + struct page * page = kmap_atomic_to_page(ptep); + return (struct mm_struct *) page->mapping; +} + +static inline unsigned long ptep_to_address(pte_t * ptep) +{ + struct page * page = kmap_atomic_to_page(ptep); + unsigned long low_bits; + low_bits = ((unsigned long)ptep & (PTRS_PER_PTE*sizeof(pte_t) - 1)) + * (PAGE_SIZE/sizeof(pte_t)); + return page->index + low_bits; +} + +#ifdef CONFIG_HIGHPTE +static inline pte_addr_t ptep_to_paddr(pte_t *ptep) +{ + pte_addr_t paddr; + paddr = ((pte_addr_t)page_to_pfn(kmap_atomic_to_page(ptep))) << PAGE_SHIFT; + return paddr + (pte_addr_t)((unsigned long)ptep & ~PAGE_MASK); +} +#else +static inline pte_addr_t ptep_to_paddr(pte_t *ptep) +{ + return (pte_addr_t)ptep; +} +#endif + +#ifndef CONFIG_HIGHPTE +static inline pte_t *rmap_ptep_map(pte_addr_t pte_paddr) +{ + return (pte_t *)pte_paddr; +} + +static inline void rmap_ptep_unmap(pte_t *pte) +{ + return; +} +#endif + +#endif /* _GENERIC_RMAP_H */ diff --git a/include/asm-h8300/aki3068net/machine-depend.h b/include/asm-h8300/aki3068net/machine-depend.h new file mode 100644 index 000000000..510b86b5d --- /dev/null +++ b/include/asm-h8300/aki3068net/machine-depend.h @@ -0,0 +1,29 @@ +/* AE-3068 board depend header */ + +/* TIMER rate define */ +#ifdef H8300_TIMER_DEFINE +#define H8300_TIMER_COUNT_DATA 20000*10/8192 +#define H8300_TIMER_FREQ 20000*1000/8192 +#endif + +/* AE-3068 RTL8019AS Config */ +#ifdef H8300_NE_DEFINE + +#define NE2000_ADDR 0x200000 +#define NE2000_IRQ 5 +#define NE2000_BYTE volatile unsigned short + +#define WCRL 0xfee023 +#define MAR0A 0xffff20 +#define ETCR0A 0xffff24 +#define DTCR0A 0xffff27 +#define MAR0B 0xffff28 +#define DTCR0B 0xffff2f + +#define H8300_INIT_NE() \ +do { \ + wordlength = 1; \ + outb_p(0x48, ioaddr + EN0_DCFG); \ +} while(0) + +#endif diff --git a/include/asm-h8300/edosk2674/machine-depend.h b/include/asm-h8300/edosk2674/machine-depend.h new file mode 100644 index 000000000..1e98b40e5 --- /dev/null +++ b/include/asm-h8300/edosk2674/machine-depend.h @@ -0,0 +1,70 @@ +/* EDOSK2674 board depend header */ + +/* TIMER rate define */ +#ifdef H8300_TIMER_DEFINE +#define H8300_TIMER_COUNT_DATA 33000*10/8192 +#define H8300_TIMER_FREQ 33000*1000/8192 +#endif + +/* EDOSK-2674R SMSC Network Controler Target Depend impliments */ +#ifdef H8300_SMSC_DEFINE + +#define SMSC_BASE 0xf80000 +#define SMSC_IRQ 16 + +/* sorry quick hack */ +#if defined(outw) +# undef outw +#endif +#define outw(d,a) edosk2674_smsc_outw(d,(volatile unsigned short *)(a)) +#if defined(inw) +# undef inw +#endif +#define inw(a) edosk2674_smsc_inw((volatile unsigned short *)(a)) +#if defined(outsw) +# undef outsw +#endif +#define outsw(a,p,l) edosk2674_smsc_outsw((volatile unsigned short *)(a),p,l) +#if defined(insw) +# undef insw +#endif +#define insw(a,p,l) edosk2674_smsc_insw((volatile unsigned short *)(a),p,l) + +static inline void edosk2674_smsc_outw( + unsigned short d, + volatile unsigned short *a + ) +{ + *a = (d >> 8) | (d << 8); +} + +static inline unsigned short edosk2674_smsc_inw( + volatile unsigned short *a + ) +{ + unsigned short d; + d = *a; + return (d >> 8) | (d << 8); +} + +static inline void edosk2674_smsc_outsw( + volatile unsigned short *a, + unsigned short *p, + unsigned long l + ) +{ + for (; l != 0; --l, p++) + *a = *p; +} + +static inline void edosk2674_smsc_insw( + volatile unsigned short *a, + unsigned short *p, + unsigned long l + ) +{ + for (; l != 0; --l, p++) + *p = *a; +} + +#endif diff --git a/include/asm-h8300/generic/machine-depend.h b/include/asm-h8300/generic/machine-depend.h new file mode 100644 index 000000000..2d78096e5 --- /dev/null +++ b/include/asm-h8300/generic/machine-depend.h @@ -0,0 +1,17 @@ +/* machine depend header */ + +/* TIMER rate define */ +#ifdef H8300_TIMER_DEFINE +#include +#if defined(CONFIG_H83007) || defined(CONFIG_H83068) || defined(CONFIG_H8S2678) +#define H8300_TIMER_COUNT_DATA CONFIG_CPU_CLOCK*10/8192 +#define H8300_TIMER_FREQ CONFIG_CPU_CLOCK*1000/8192 +#endif + +#if defined(CONFIG_H8_3002) || defined(CONFIG_H83048) +#define H8300_TIMER_COUNT_DATA CONFIG_CPU_CLOCK*10/8 +#define H8300_TIMER_FREQ CONFIG_CPU_CLOCK*1000/8 +#endif + +#endif + diff --git a/include/asm-h8300/generic/timer_rate.h b/include/asm-h8300/generic/timer_rate.h new file mode 100644 index 000000000..0f6f4190e --- /dev/null +++ b/include/asm-h8300/generic/timer_rate.h @@ -0,0 +1,15 @@ +#include + +#if defined(CONFIG_H83007) || defined(CONFIG_H83068) || defined(CONFIG_H8S2678) +#define H8300_TIMER_COUNT_DATA CONFIG_CPU_CLOCK*10/8192 +#define H8300_TIMER_FREQ CONFIG_CPU_CLOCK*1000/8192 +#endif + +#if defined(H8_3002) || defined(CONFIG_H83048) +#define H8300_TIMER_COUNT_DATA CONFIG_CPU_CLOCK*10/8 +#define H8300_TIMER_FREQ CONFIG_CPU_CLOCK*1000/8 +#endif + +#if !defined(H8300_TIMER_COUNT_DATA) +#error illigal configuration +#endif diff --git a/include/asm-h8300/h8300_smsc.h b/include/asm-h8300/h8300_smsc.h new file mode 100644 index 000000000..f8fa7f9cc --- /dev/null +++ b/include/asm-h8300/h8300_smsc.h @@ -0,0 +1,20 @@ +/****************************************************************************/ + +/* + * h8300_smsc.h -- SMSC in H8/300H and H8S Evalution Board. + * + * (C) Copyright 2003, Yoshinori Sato + */ + +/****************************************************************************/ +#ifndef h8300smsc_h +#define h8300smsc_h +/****************************************************************************/ + +/* Such a description is OK ? */ +#define H8300_SMSC_DEFINE +#include +#undef H8300_SMSC_DEFINE + +/****************************************************************************/ +#endif /* h8300smsc_h */ diff --git a/include/asm-h8300/h8max/machine-depend.h b/include/asm-h8300/h8max/machine-depend.h new file mode 100644 index 000000000..e87d22e6d --- /dev/null +++ b/include/asm-h8300/h8max/machine-depend.h @@ -0,0 +1,100 @@ +/* H8MAX board depend header */ + +/* TIMER rate define */ +#ifdef H8300_TIMER_DEFINE +#define H8300_TIMER_COUNT_DATA 25000*10/8192 +#define H8300_TIMER_FREQ 25000*1000/8192 +#endif + +/* H8MAX RTL8019AS Config */ +#ifdef H8300_NE_DEFINE + +#define NE2000_ADDR 0x800600 +#define NE2000_IRQ 4 +#define NE2000_IRQ_VECTOR (12 + NE2000_IRQ) +#define NE2000_BYTE volatile unsigned short + +/* sorry quick hack */ +#if defined(outb) +# undef outb +#endif +#define outb(d,a) h8max_outb((d),(a) - NE2000_ADDR) +#if defined(inb) +# undef inb +#endif +#define inb(a) h8max_inb((a) - NE2000_ADDR) +#if defined(outb_p) +# undef outb_p +#endif +#define outb_p(d,a) h8max_outb((d),(a) - NE2000_ADDR) +#if defined(inb_p) +# undef inb_p +#endif +#define inb_p(a) h8max_inb((a) - NE2000_ADDR) +#if defined(outsw) +# undef outsw +#endif +#define outsw(a,p,l) h8max_outsw((a) - NE2000_ADDR,(unsigned short *)p,l) +#if defined(insw) +# undef insw +#endif +#define insw(a,p,l) h8max_insw((a) - NE2000_ADDR,(unsigned short *)p,l) +#if defined(outsb) +# undef outsb +#endif +#define outsb(a,p,l) h8max_outsb((a) - NE2000_ADDR,(unsigned char *)p,l) +#if defined(insb) +# undef insb +#endif +#define insb(a,p,l) h8max_insb((a) - NE2000_ADDR,(unsigned char *)p,l) + +#define H8300_INIT_NE() \ +do { \ + wordlength = 2; \ + h8max_outb(0x49, ioaddr + EN0_DCFG); \ + SA_prom[14] = SA_prom[15] = 0x57;\ +} while(0) + +static inline void h8max_outb(unsigned char d,unsigned char a) +{ + *(unsigned short *)(NE2000_ADDR + (a << 1)) = d; +} + +static inline unsigned char h8max_inb(unsigned char a) +{ + return *(unsigned char *)(NE2000_ADDR + (a << 1) +1); +} + +static inline void h8max_outsw(unsigned char a,unsigned short *p,unsigned long l) +{ + unsigned short d; + for (; l != 0; --l, p++) { + d = (((*p) >> 8) & 0xff) | ((*p) << 8); + *(unsigned short *)(NE2000_ADDR + (a << 1)) = d; + } +} + +static inline void h8max_insw(unsigned char a,unsigned short *p,unsigned long l) +{ + unsigned short d; + for (; l != 0; --l, p++) { + d = *(unsigned short *)(NE2000_ADDR + (a << 1)); + *p = (d << 8)|((d >> 8) & 0xff); + } +} + +static inline void h8max_outsb(unsigned char a,unsigned char *p,unsigned long l) +{ + for (; l != 0; --l, p++) { + *(unsigned short *)(NE2000_ADDR + (a << 1)) = *p; + } +} + +static inline void h8max_insb(unsigned char a,unsigned char *p,unsigned long l) +{ + for (; l != 0; --l, p++) { + *p = *((unsigned char *)(NE2000_ADDR + (a << 1))+1); + } +} + +#endif diff --git a/include/asm-i386/rmap.h b/include/asm-i386/rmap.h new file mode 100644 index 000000000..353afee85 --- /dev/null +++ b/include/asm-i386/rmap.h @@ -0,0 +1,21 @@ +#ifndef _I386_RMAP_H +#define _I386_RMAP_H + +/* nothing to see, move along */ +#include + +#ifdef CONFIG_HIGHPTE +static inline pte_t *rmap_ptep_map(pte_addr_t pte_paddr) +{ + unsigned long pfn = (unsigned long)(pte_paddr >> PAGE_SHIFT); + unsigned long off = ((unsigned long)pte_paddr) & ~PAGE_MASK; + return (pte_t *)((char *)kmap_atomic(pfn_to_page(pfn), KM_PTE2) + off); +} + +static inline void rmap_ptep_unmap(pte_t *pte) +{ + kunmap_atomic(pte, KM_PTE2); +} +#endif + +#endif diff --git a/include/asm-ia64/rmap.h b/include/asm-ia64/rmap.h new file mode 100644 index 000000000..179c565dd --- /dev/null +++ b/include/asm-ia64/rmap.h @@ -0,0 +1,7 @@ +#ifndef _ASM_IA64_RMAP_H +#define _ASM_IA64_RMAP_H + +/* nothing to see, move along */ +#include + +#endif /* _ASM_IA64_RMAP_H */ diff --git a/include/asm-m68k/rmap.h b/include/asm-m68k/rmap.h new file mode 100644 index 000000000..85119e414 --- /dev/null +++ b/include/asm-m68k/rmap.h @@ -0,0 +1,7 @@ +#ifndef _M68K_RMAP_H +#define _M68K_RMAP_H + +/* nothing to see, move along */ +#include + +#endif diff --git a/include/asm-m68knommu/rmap.h b/include/asm-m68knommu/rmap.h new file mode 100644 index 000000000..b3664ccd5 --- /dev/null +++ b/include/asm-m68knommu/rmap.h @@ -0,0 +1,2 @@ +/* Do not need anything here */ + diff --git a/include/asm-mips/rmap.h b/include/asm-mips/rmap.h new file mode 100644 index 000000000..c9efd7b98 --- /dev/null +++ b/include/asm-mips/rmap.h @@ -0,0 +1,7 @@ +#ifndef __ASM_RMAP_H +#define __ASM_RMAP_H + +/* nothing to see, move along */ +#include + +#endif /* __ASM_RMAP_H */ diff --git a/include/asm-parisc/rmap.h b/include/asm-parisc/rmap.h new file mode 100644 index 000000000..4ea8eb454 --- /dev/null +++ b/include/asm-parisc/rmap.h @@ -0,0 +1,7 @@ +#ifndef _PARISC_RMAP_H +#define _PARISC_RMAP_H + +/* nothing to see, move along */ +#include + +#endif diff --git a/include/asm-ppc/rmap.h b/include/asm-ppc/rmap.h new file mode 100644 index 000000000..50556b5ff --- /dev/null +++ b/include/asm-ppc/rmap.h @@ -0,0 +1,9 @@ +#ifndef _PPC_RMAP_H +#define _PPC_RMAP_H + +/* PPC calls pte_alloc() before mem_map[] is setup ... */ +#define BROKEN_PPC_PTE_ALLOC_ONE + +#include + +#endif diff --git a/include/asm-ppc64/rmap.h b/include/asm-ppc64/rmap.h new file mode 100644 index 000000000..cf58a01ec --- /dev/null +++ b/include/asm-ppc64/rmap.h @@ -0,0 +1,9 @@ +#ifndef _PPC64_RMAP_H +#define _PPC64_RMAP_H + +/* PPC64 calls pte_alloc() before mem_map[] is setup ... */ +#define BROKEN_PPC_PTE_ALLOC_ONE + +#include + +#endif diff --git a/include/asm-s390/rmap.h b/include/asm-s390/rmap.h new file mode 100644 index 000000000..43d6a87b6 --- /dev/null +++ b/include/asm-s390/rmap.h @@ -0,0 +1,7 @@ +#ifndef _S390_RMAP_H +#define _S390_RMAP_H + +/* nothing to see, move along */ +#include + +#endif diff --git a/include/asm-sh/rmap.h b/include/asm-sh/rmap.h new file mode 100644 index 000000000..31db8cc07 --- /dev/null +++ b/include/asm-sh/rmap.h @@ -0,0 +1,7 @@ +#ifndef _SH_RMAP_H +#define _SH_RMAP_H + +/* nothing to see, move along */ +#include + +#endif diff --git a/include/asm-sparc/rmap.h b/include/asm-sparc/rmap.h new file mode 100644 index 000000000..06063cffe --- /dev/null +++ b/include/asm-sparc/rmap.h @@ -0,0 +1,7 @@ +#ifndef _SPARC_RMAP_H +#define _SPARC_RMAP_H + +/* nothing to see, move along */ +#include + +#endif diff --git a/include/asm-sparc64/rmap.h b/include/asm-sparc64/rmap.h new file mode 100644 index 000000000..681849b2d --- /dev/null +++ b/include/asm-sparc64/rmap.h @@ -0,0 +1,7 @@ +#ifndef _SPARC64_RMAP_H +#define _SPARC64_RMAP_H + +/* nothing to see, move along */ +#include + +#endif diff --git a/include/asm-um/rmap.h b/include/asm-um/rmap.h new file mode 100644 index 000000000..a244d486b --- /dev/null +++ b/include/asm-um/rmap.h @@ -0,0 +1,6 @@ +#ifndef __UM_RMAP_H +#define __UM_RMAP_H + +#include "asm/arch/rmap.h" + +#endif diff --git a/include/asm-v850/rmap.h b/include/asm-v850/rmap.h new file mode 100644 index 000000000..c0ebee6f4 --- /dev/null +++ b/include/asm-v850/rmap.h @@ -0,0 +1 @@ +/* Do not need anything here */ diff --git a/include/asm-x86_64/rmap.h b/include/asm-x86_64/rmap.h new file mode 100644 index 000000000..24c1783ed --- /dev/null +++ b/include/asm-x86_64/rmap.h @@ -0,0 +1,7 @@ +#ifndef _X8664_RMAP_H +#define _X8664_RMAP_H + +/* nothing to see, move along */ +#include + +#endif diff --git a/include/linux/ninline.h b/include/linux/ninline.h new file mode 100644 index 000000000..d3f752516 --- /dev/null +++ b/include/linux/ninline.h @@ -0,0 +1,151 @@ +#ifndef _NX_INLINE_H +#define _NX_INLINE_H + + +// #define NX_DEBUG + +#include +#include + +#include "vserver/network.h" + +#if defined(NX_DEBUG) +#define nxdprintk(x...) printk("nxd: " x) +#else +#define nxdprintk(x...) +#endif + + +void free_nx_info(struct nx_info *); + +extern int proc_pid_nx_info(struct task_struct *, char *); + + +#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__) + +static __inline__ struct nx_info *__get_nx_info(struct nx_info *nxi, const char *_file, int _line) +{ + if (!nxi) + return NULL; + nxdprintk("get_nx_info(%p[%d.%d])\t%s:%d\n", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0, + _file, _line); + atomic_inc(&nxi->nx_refcount); + return nxi; +} + +#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__) + +static __inline__ void __put_nx_info(struct nx_info *nxi, const char *_file, int _line) +{ + if (!nxi) + return; + nxdprintk("put_nx_info(%p[%d.%d])\t%s:%d\n", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0, + _file, _line); + if (atomic_dec_and_lock(&nxi->nx_refcount, &nxlist_lock)) { + list_del(&nxi->nx_list); + spin_unlock(&nxlist_lock); + free_nx_info(nxi); + } +} + + +#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__) + +static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi, + const char *_file, int _line) +{ + BUG_ON(*nxp); + if (!nxi) + return; + nxdprintk("set_nx_info(%p[#%d.%d])\t%s:%d\n", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0, + _file, _line); + *nxp = __get_nx_info(nxi, _file, _line); +} + +#define clr_nx_info(p) __clr_nx_info(p,__FILE__,__LINE__) + +static inline void __clr_nx_info(struct nx_info **nxp, + const char *_file, int _line) +{ + struct nx_info *nxo = *nxp; + + if (!nxo) + return; + nxdprintk("clr_nx_info(%p[#%d.%d])\t%s:%d\n", + nxo, nxo?nxo->nx_id:0, nxo?atomic_read(&nxo->nx_refcount):0, + _file, _line); + *nxp = NULL; + wmb(); + __put_nx_info(nxo, _file, _line); +} + + +#define task_get_nx_info(i) __task_get_nx_info(i,__FILE__,__LINE__) + +static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p, + const char *_file, int _line) +{ + struct nx_info *nxi; + + task_lock(p); + nxi = __get_nx_info(p->nx_info, _file, _line); + task_unlock(p); + return nxi; +} + +#define nx_verify_info(p,i) \ + __nx_verify_info((p)->nx_info,i,__FILE__,__LINE__) + +static __inline__ void __nx_verify_info( + struct nx_info *ipa, struct nx_info *ipb, + const char *_file, int _line) +{ + if (ipa == ipb) + return; + printk(KERN_ERR "ip bad assumption (%p==%p) at %s:%d\n", + ipa, ipb, _file, _line); +} + + +#define nx_task_nid(t) ((t)->nid) + +#define nx_current_nid() nx_task_nid(current) + +#define nx_check(c,m) __nx_check(nx_current_nid(),c,m) + +#define nx_weak_check(c,m) ((m) ? nx_check(c,m) : 1) + +#undef nxdprintk +#define nxdprintk(x...) + + +#define __nx_flags(v,m,f) (((v) & (m)) ^ (f)) + +#define __nx_task_flags(t,m,f) \ + (((t) && ((t)->nx_info)) ? \ + __nx_flags((t)->nx_info->nx_flags,(m),(f)) : 0) + +#define nx_current_flags() \ + ((current->nx_info) ? current->nx_info->nx_flags : 0) + +#define nx_flags(m,f) __nx_flags(nx_current_flags(),(m),(f)) + + +#define nx_current_ncaps() \ + ((current->nx_info) ? current->nx_info->nx_ncaps : 0) + +#define nx_ncaps(c) (nx_current_ncaps() & (c)) + + + +#define sock_nx_init(s) do { \ + (s)->sk_nid = 0; \ + (s)->sk_nx_info = NULL; \ + } while (0) + + + +#endif diff --git a/include/linux/vinline.h b/include/linux/vinline.h new file mode 100644 index 000000000..07bb3698a --- /dev/null +++ b/include/linux/vinline.h @@ -0,0 +1,462 @@ +#ifndef _VX_INLINE_H +#define _VX_INLINE_H + + +// #define VX_DEBUG + +#include +#include + +#include "vserver/context.h" +#include "vserver/limit.h" +#include "vserver/cvirt.h" + +#if defined(VX_DEBUG) +#define vxdprintk(x...) printk("vxd: " x) +#else +#define vxdprintk(x...) +#endif + + + +void free_vx_info(struct vx_info *); + +extern int proc_pid_vx_info(struct task_struct *, char *); + + +#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__) + +static __inline__ struct vx_info *__get_vx_info(struct vx_info *vxi, + const char *_file, int _line) +{ + if (!vxi) + return NULL; + vxdprintk("get_vx_info(%p[#%d.%d])\t%s:%d\n", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0, + _file, _line); + atomic_inc(&vxi->vx_refcount); + return vxi; +} + +#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__) + +static __inline__ void __put_vx_info(struct vx_info *vxi, const char *_file, int _line) +{ + if (!vxi) + return; + vxdprintk("put_vx_info(%p[#%d.%d])\t%s:%d\n", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0, + _file, _line); + if (atomic_dec_and_lock(&vxi->vx_refcount, &vxlist_lock)) { + list_del(&vxi->vx_list); + spin_unlock(&vxlist_lock); + free_vx_info(vxi); + } +} + +#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__) + +static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi, + const char *_file, int _line) +{ + BUG_ON(*vxp); + if (!vxi) + return; + vxdprintk("set_vx_info(%p[#%d.%d])\t%s:%d\n", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0, + _file, _line); + *vxp = __get_vx_info(vxi, _file, _line); +} + +#define clr_vx_info(p) __clr_vx_info(p,__FILE__,__LINE__) + +static inline void __clr_vx_info(struct vx_info **vxp, + const char *_file, int _line) +{ + struct vx_info *vxo = *vxp; + + vxdprintk("clr_vx_info(%p[#%d.%d])\t%s:%d\n", + vxo, vxo?vxo->vx_id:0, vxo?atomic_read(&vxo->vx_refcount):0, + _file, _line); + *vxp = NULL; + wmb(); + __put_vx_info(vxo, _file, _line); +} + + +#define task_get_vx_info(i) __task_get_vx_info(i,__FILE__,__LINE__) + +static __inline__ struct vx_info *__task_get_vx_info(struct task_struct *p, + const char *_file, int _line) +{ + struct vx_info *vxi; + + task_lock(p); + vxi = __get_vx_info(p->vx_info, _file, _line); + task_unlock(p); + return vxi; +} + + +#define vx_verify_info(p,i) \ + __vx_verify_info((p)->vx_info,i,__FILE__,__LINE__) + +static __inline__ void __vx_verify_info( + struct vx_info *vxa, struct vx_info *vxb, + const char *_file, int _line) +{ + if (vxa == vxb) + return; + printk(KERN_ERR "vx bad assumption (%p==%p) at %s:%d\n", + vxa, vxb, _file, _line); +} + + +#define vx_task_xid(t) ((t)->xid) + +#define vx_current_xid() vx_task_xid(current) + +#define vx_check(c,m) __vx_check(vx_current_xid(),c,m) + +#define vx_weak_check(c,m) ((m) ? vx_check(c,m) : 1) + + +/* + * check current context for ADMIN/WATCH and + * optionally agains supplied argument + */ +static __inline__ int __vx_check(xid_t cid, xid_t id, unsigned int mode) +{ + if (mode & VX_ARG_MASK) { + if ((mode & VX_IDENT) && + (id == cid)) + return 1; + } + if (mode & VX_ATR_MASK) { + if ((mode & VX_DYNAMIC) && + (id >= MIN_D_CONTEXT) && + (id <= MAX_S_CONTEXT)) + return 1; + if ((mode & VX_STATIC) && + (id > 1) && (id < MIN_D_CONTEXT)) + return 1; + } + return (((mode & VX_ADMIN) && (cid == 0)) || + ((mode & VX_WATCH) && (cid == 1))); +} + + +#define __vx_flags(v,m,f) (((v) & (m)) ^ (f)) + +#define __vx_task_flags(t,m,f) \ + (((t) && ((t)->vx_info)) ? \ + __vx_flags((t)->vx_info->vx_flags,(m),(f)) : 0) + +#define vx_current_flags() \ + ((current->vx_info) ? current->vx_info->vx_flags : 0) + +#define vx_flags(m,f) __vx_flags(vx_current_flags(),(m),(f)) + + +#define vx_current_ccaps() \ + ((current->vx_info) ? current->vx_info->vx_ccaps : 0) + +#define vx_ccaps(c) (vx_current_ccaps() & (c)) + +#define vx_current_bcaps() \ + (((current->vx_info) && !vx_flags(VXF_STATE_SETUP, 0)) ? \ + current->vx_info->vx_bcaps : cap_bset) + + +#define VX_DEBUG_ACC_RSS 0 +#define VX_DEBUG_ACC_VM 0 +#define VX_DEBUG_ACC_VML 0 + +#undef vxdprintk +#if (VX_DEBUG_ACC_RSS) || (VX_DEBUG_ACC_VM) || (VX_DEBUG_ACC_VML) +#define vxdprintk(x...) printk("vxd: " x) +#else +#define vxdprintk(x...) +#endif + +#define vx_acc_page(m, d, v, r) \ + __vx_acc_page(&(m->v), m->mm_vx_info, r, d, __FILE__, __LINE__) + +static inline void __vx_acc_page(unsigned long *v, struct vx_info *vxi, + int res, int dir, char *file, int line) +{ + if (v) { + if (dir > 0) + ++(*v); + else + --(*v); + } + if (vxi) { + if (dir > 0) + atomic_inc(&vxi->limit.res[res]); + else + atomic_dec(&vxi->limit.res[res]); + } +} + + +#define vx_acc_pages(m, p, v, r) \ + __vx_acc_pages(&(m->v), m->mm_vx_info, r, p, __FILE__, __LINE__) + +static inline void __vx_acc_pages(unsigned long *v, struct vx_info *vxi, + int res, int pages, char *file, int line) +{ + if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) || + (res == RLIMIT_AS && VX_DEBUG_ACC_VM) || + (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML)) + vxdprintk("vx_acc_pages [%5d,%2d]: %5d += %5d in %s:%d\n", + (vxi?vxi->vx_id:-1), res, + (vxi?atomic_read(&vxi->limit.res[res]):0), + pages, file, line); + if (pages == 0) + return; + if (v) + *v += pages; + if (vxi) + atomic_add(pages, &vxi->limit.res[res]); +} + + + +#define vx_acc_vmpage(m,d) vx_acc_page(m, d, total_vm, RLIMIT_AS) +#define vx_acc_vmlpage(m,d) vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_rsspage(m,d) vx_acc_page(m, d, rss, RLIMIT_RSS) + +#define vx_acc_vmpages(m,p) vx_acc_pages(m, p, total_vm, RLIMIT_AS) +#define vx_acc_vmlpages(m,p) vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_rsspages(m,p) vx_acc_pages(m, p, rss, RLIMIT_RSS) + +#define vx_pages_add(s,r,p) __vx_acc_pages(0, s, r, p, __FILE__, __LINE__) +#define vx_pages_sub(s,r,p) __vx_pages_add(s, r, -(p)) + +#define vx_vmpages_inc(m) vx_acc_vmpage(m, 1) +#define vx_vmpages_dec(m) vx_acc_vmpage(m,-1) +#define vx_vmpages_add(m,p) vx_acc_vmpages(m, p) +#define vx_vmpages_sub(m,p) vx_acc_vmpages(m,-(p)) + +#define vx_vmlocked_inc(m) vx_acc_vmlpage(m, 1) +#define vx_vmlocked_dec(m) vx_acc_vmlpage(m,-1) +#define vx_vmlocked_add(m,p) vx_acc_vmlpages(m, p) +#define vx_vmlocked_sub(m,p) vx_acc_vmlpages(m,-(p)) + +#define vx_rsspages_inc(m) vx_acc_rsspage(m, 1) +#define vx_rsspages_dec(m) vx_acc_rsspage(m,-1) +#define vx_rsspages_add(m,p) vx_acc_rsspages(m, p) +#define vx_rsspages_sub(m,p) vx_acc_rsspages(m,-(p)) + + + +#define vx_pages_avail(m, p, r) \ + __vx_pages_avail((m)->mm_vx_info, (r), (p), __FILE__, __LINE__) + +static inline int __vx_pages_avail(struct vx_info *vxi, + int res, int pages, char *file, int line) +{ + if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) || + (res == RLIMIT_AS && VX_DEBUG_ACC_VM) || + (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML)) + printk("vx_pages_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n", + (vxi?vxi->vx_id:-1), res, + (vxi?vxi->limit.rlim[res]:1), + (vxi?atomic_read(&vxi->limit.res[res]):0), + pages, file, line); + if (!vxi) + return 1; + if (vxi->limit.rlim[res] == RLIM_INFINITY) + return 1; + if (atomic_read(&vxi->limit.res[res]) + pages < vxi->limit.rlim[res]) + return 1; + return 0; +} + +#define vx_vmpages_avail(m,p) vx_pages_avail(m, p, RLIMIT_AS) +#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK) +#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS) + +/* file limits */ + +#define VX_DEBUG_ACC_FILE 0 +#define VX_DEBUG_ACC_OPENFD 0 + +#undef vxdprintk +#if (VX_DEBUG_ACC_FILE) || (VX_DEBUG_ACC_OPENFD) +#define vxdprintk(x...) printk("vxd: " x) +#else +#define vxdprintk(x...) +#endif + + +#define vx_acc_cres(v,d,r) \ + __vx_acc_cres((v), (r), (d), __FILE__, __LINE__) + +static inline void __vx_acc_cres(struct vx_info *vxi, + int res, int dir, char *file, int line) +{ + if (vxi) { + if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) || + (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD)) + printk("vx_acc_cres[%5d,%2d]: %5d%s in %s:%d\n", + (vxi?vxi->vx_id:-1), res, + (vxi?atomic_read(&vxi->limit.res[res]):0), + (dir>0)?"++":"--", file, line); + if (dir > 0) + atomic_inc(&vxi->limit.res[res]); + else + atomic_dec(&vxi->limit.res[res]); + } +} + +#define vx_files_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_NOFILE) +#define vx_files_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_NOFILE) + +#define vx_openfd_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD) +#define vx_openfd_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD) + +#define vx_cres_avail(v,n,r) \ + __vx_cres_avail((v), (r), (n), __FILE__, __LINE__) + +static inline int __vx_cres_avail(struct vx_info *vxi, + int res, int num, char *file, int line) +{ + if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) || + (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD)) + printk("vx_cres_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n", + (vxi?vxi->vx_id:-1), res, + (vxi?vxi->limit.rlim[res]:1), + (vxi?atomic_read(&vxi->limit.res[res]):0), + num, file, line); + if (!vxi) + return 1; + if (vxi->limit.rlim[res] == RLIM_INFINITY) + return 1; + if (vxi->limit.rlim[res] < atomic_read(&vxi->limit.res[res]) + num) + return 0; + return 1; +} + +#define vx_files_avail(n) \ + vx_cres_avail(current->vx_info, (n), RLIMIT_NOFILE) + +#define vx_openfd_avail(n) \ + vx_cres_avail(current->vx_info, (n), RLIMIT_OPENFD) + +/* socket limits */ + +#define vx_sock_inc(f) vx_acc_cres(current->vx_info, 1, VLIMIT_SOCK) +#define vx_sock_dec(f) vx_acc_cres(current->vx_info,-1, VLIMIT_SOCK) + +#define vx_sock_avail(n) \ + vx_cres_avail(current->vx_info, (n), VLIMIT_SOCK) + +/* procfs ioctls */ + +#define FIOC_GETXFLG _IOR('x', 5, long) +#define FIOC_SETXFLG _IOW('x', 6, long) + +/* utsname virtualization */ + +static inline struct new_utsname *vx_new_utsname(void) +{ + if (current->vx_info) + return ¤t->vx_info->cvirt.utsname; + return &system_utsname; +} + +#define vx_new_uts(x) ((vx_new_utsname())->x) + +/* generic flag merging */ + +#define vx_mask_flags(v,f,m) (((v) & ~(m)) | ((f) & (m))) + +#define vx_mask_mask(v,f,m) (((v) & ~(m)) | ((v) & (f) & (m))) + + +/* socket accounting */ + +#include + +static inline int vx_sock_type(int family) +{ + int type = 4; + + if (family > 0 && family < 3) + type = family; + else if (family == PF_INET6) + type = 3; + return type; +} + +#define vx_acc_sock(v,f,p,s) \ + __vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__) + +static inline void __vx_acc_sock(struct vx_info *vxi, + int family, int pos, int size, char *file, int line) +{ + if (vxi) { + int type = vx_sock_type(family); + + atomic_inc(&vxi->cacct.sock[type][pos].count); + atomic_add(size, &vxi->cacct.sock[type][pos].total); + } +} + +#define vx_sock_recv(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s)) +#define vx_sock_send(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s)) +#define vx_sock_fail(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s)) + + +#define sock_vx_init(s) do { \ + (s)->sk_xid = 0; \ + (s)->sk_vx_info = NULL; \ + } while (0) + + +/* pid faking stuff */ + + +#define vx_map_tgid(v,p) \ + __vx_map_tgid((v), (p), __FILE__, __LINE__) + +static inline int __vx_map_tgid(struct vx_info *vxi, int pid, + char *file, int line) +{ + if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) { + vxdprintk("vx_map_tgid: %p/%llx: %d -> %d in %s:%d\n", + vxi, vxi->vx_flags, pid, + (pid == vxi->vx_initpid)?1:pid, + file, line); + if (pid == vxi->vx_initpid) + return 1; + } + return pid; +} + +#define vx_rmap_tgid(v,p) \ + __vx_rmap_tgid((v), (p), __FILE__, __LINE__) + +static inline int __vx_rmap_tgid(struct vx_info *vxi, int pid, + char *file, int line) +{ + if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) { + vxdprintk("vx_rmap_tgid: %p/%llx: %d -> %d in %s:%d\n", + vxi, vxi->vx_flags, pid, + (pid == 1)?vxi->vx_initpid:pid, + file, line); + if ((pid == 1) && vxi->vx_initpid) + return vxi->vx_initpid; + } + return pid; +} + +#undef vxdprintk +#define vxdprintk(x...) + +#endif diff --git a/include/linux/vserver.h b/include/linux/vserver.h new file mode 100644 index 000000000..2c39ebbe0 --- /dev/null +++ b/include/linux/vserver.h @@ -0,0 +1,9 @@ +#ifndef _LINUX_VSERVER_H +#define _LINUX_VSERVER_H + +#include +#include +#include +#include + +#endif diff --git a/include/linux/vserver/context.h b/include/linux/vserver/context.h new file mode 100644 index 000000000..76926038e --- /dev/null +++ b/include/linux/vserver/context.h @@ -0,0 +1,176 @@ +#ifndef _VX_CONTEXT_H +#define _VX_CONTEXT_H + +#include + +#define MAX_S_CONTEXT 65535 /* Arbitrary limit */ +#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */ + +#define VX_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */ + +#ifdef __KERNEL__ + +#include +#include + +#define _VX_INFO_DEF_ +#include "cvirt.h" +#include "limit.h" +#include "sched.h" +#undef _VX_INFO_DEF_ + +struct vx_info { + struct list_head vx_list; /* linked list of contexts */ + xid_t vx_id; /* context id */ + atomic_t vx_refcount; /* refcount */ + struct vx_info *vx_parent; /* parent context */ + + struct namespace *vx_namespace; /* private namespace */ + struct fs_struct *vx_fs; /* private namespace fs */ + uint64_t vx_flags; /* VX_INFO_xxx */ + uint64_t vx_bcaps; /* bounding caps (system) */ + uint64_t vx_ccaps; /* context caps (vserver) */ + + pid_t vx_initpid; /* PID of fake init process */ + + struct _vx_limit limit; /* vserver limits */ + struct _vx_sched sched; /* vserver scheduler */ + struct _vx_cvirt cvirt; /* virtual/bias stuff */ + struct _vx_cacct cacct; /* context accounting */ + + char vx_name[65]; /* vserver name */ +}; + + +extern spinlock_t vxlist_lock; +extern struct list_head vx_infos; + + +#define VX_ADMIN 0x0001 +#define VX_WATCH 0x0002 +#define VX_DUMMY 0x0008 + +#define VX_IDENT 0x0010 +#define VX_EQUIV 0x0020 +#define VX_PARENT 0x0040 +#define VX_CHILD 0x0080 + +#define VX_ARG_MASK 0x00F0 + +#define VX_DYNAMIC 0x0100 +#define VX_STATIC 0x0200 + +#define VX_ATR_MASK 0x0F00 + + +void free_vx_info(struct vx_info *); + +extern struct vx_info *find_vx_info(int); +extern struct vx_info *find_or_create_vx_info(int); +extern int vx_info_id_valid(int); + +extern int vx_migrate_task(struct task_struct *, struct vx_info *); + +#endif /* __KERNEL__ */ + +#include "switch.h" + +/* vinfo commands */ + +#define VCMD_task_xid VC_CMD(VINFO, 1, 0) +#define VCMD_task_nid VC_CMD(VINFO, 2, 0) + +#ifdef __KERNEL__ +extern int vc_task_xid(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_vx_info VC_CMD(VINFO, 5, 0) +#define VCMD_nx_info VC_CMD(VINFO, 6, 0) + +struct vcmd_vx_info_v0 { + uint32_t xid; + uint32_t initpid; + /* more to come */ +}; + +#ifdef __KERNEL__ +extern int vc_vx_info(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_ctx_create VC_CMD(VPROC, 1, 0) +#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 0) + +#ifdef __KERNEL__ +extern int vc_ctx_create(uint32_t, void __user *); +extern int vc_ctx_migrate(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0) +#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0) + +struct vcmd_ctx_flags_v0 { + uint64_t flagword; + uint64_t mask; +}; + +#ifdef __KERNEL__ +extern int vc_get_cflags(uint32_t, void __user *); +extern int vc_set_cflags(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VXF_INFO_LOCK 0x00000001 +#define VXF_INFO_SCHED 0x00000002 +#define VXF_INFO_NPROC 0x00000004 +#define VXF_INFO_PRIVATE 0x00000008 + +#define VXF_INFO_INIT 0x00000010 +#define VXF_INFO_HIDE 0x00000020 +#define VXF_INFO_ULIMIT 0x00000040 +#define VXF_INFO_NSPACE 0x00000080 + +#define VXF_SCHED_HARD 0x00000100 +#define VXF_SCHED_PRIO 0x00000200 +#define VXF_SCHED_PAUSE 0x00000400 + +#define VXF_VIRT_MEM 0x00010000 +#define VXF_VIRT_UPTIME 0x00020000 +#define VXF_VIRT_CPU 0x00040000 + +#define VXF_HIDE_MOUNT 0x01000000 +#define VXF_HIDE_NETIF 0x02000000 + +#define VXF_STATE_SETUP (1ULL<<32) +#define VXF_STATE_INIT (1ULL<<33) + +#define VXF_FORK_RSS (1ULL<<48) + +#define VXF_ONE_TIME (0x0003ULL<<32) + +#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 0) +#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 0) + +struct vcmd_ctx_caps_v0 { + uint64_t bcaps; + uint64_t ccaps; + uint64_t cmask; +}; + +#ifdef __KERNEL__ +extern int vc_get_ccaps(uint32_t, void __user *); +extern int vc_set_ccaps(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VXC_SET_UTSNAME 0x00000001 +#define VXC_SET_RLIMIT 0x00000002 + +#define VXC_ICMP_PING 0x00000100 + +#define VXC_SECURE_MOUNT 0x00010000 + + +#endif /* _VX_CONTEXT_H */ diff --git a/include/linux/vserver/cvirt.h b/include/linux/vserver/cvirt.h new file mode 100644 index 000000000..ba3a25356 --- /dev/null +++ b/include/linux/vserver/cvirt.h @@ -0,0 +1,133 @@ +#if defined(__KERNEL__) && defined(_VX_INFO_DEF_) + +#include +#include +#include +#include +#include + +/* context sub struct */ + +struct _vx_cvirt { + int max_threads; + + unsigned int bias_cswtch; + struct timespec bias_idle; + struct timespec bias_tp; + uint64_t bias_jiffies; + + struct new_utsname utsname; +}; + +struct sock_acc { + atomic_t count; + atomic_t total; +}; + +struct _vx_cacct { + atomic_t nr_threads; + int nr_running; + + unsigned long total_forks; + + struct sock_acc sock[5][3]; +}; + + +static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos) +{ + return atomic_read(&cacct->sock[type][pos].count); +} + + +static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos) +{ + return atomic_read(&cacct->sock[type][pos].total); +} + + +extern uint64_t vx_idle_jiffies(void); + +static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt) +{ + uint64_t idle_jiffies = vx_idle_jiffies(); + + // new->virt.bias_cswtch = kstat.context_swtch; + cvirt->bias_jiffies = get_jiffies_64(); + + jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle); + do_posix_clock_monotonic_gettime(&cvirt->bias_tp); + + down_read(&uts_sem); + cvirt->utsname = system_utsname; + up_read(&uts_sem); +} + +static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt) +{ + return; +} + +static inline void vx_info_init_cacct(struct _vx_cacct *cacct) +{ + int i,j; + + atomic_set(&cacct->nr_threads, 1); + for (i=0; i<5; i++) { + for (j=0; j<3; j++) { + atomic_set(&cacct->sock[i][j].count, 0); + atomic_set(&cacct->sock[i][j].total, 0); + } + } +} + +static inline void vx_info_exit_cacct(struct _vx_cacct *cacct) +{ + return; +} + +static inline int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer) +{ + int length = 0; + return length; +} + +static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer) +{ + int i,j, length = 0; + static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" }; + + for (i=0; i<5; i++) { + length += sprintf(buffer + length, + "%s:", type[i]); + for (j=0; j<3; j++) { + length += sprintf(buffer + length, + "\t%12lu/%-12lu" + ,vx_sock_count(cacct, i, j) + ,vx_sock_total(cacct, i, j) + ); + } + buffer[length++] = '\n'; + } + return length; +} + +#else /* _VX_INFO_DEF_ */ +#ifndef _VX_CVIRT_H +#define _VX_CVIRT_H + +#include "switch.h" + +/* cvirt vserver commands */ + + +#ifdef __KERNEL__ + +struct timespec; + +void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle); + +#endif /* __KERNEL__ */ + +#endif /* _VX_CVIRT_H */ +#endif diff --git a/include/linux/vserver/inode.h b/include/linux/vserver/inode.h new file mode 100644 index 000000000..aa8852f43 --- /dev/null +++ b/include/linux/vserver/inode.h @@ -0,0 +1,67 @@ +#ifndef _VX_INODE_H +#define _VX_INODE_H + +#include "switch.h" + +/* inode vserver commands */ + +#define VCMD_get_iattr_v0 VC_CMD(INODE, 1, 0) +#define VCMD_set_iattr_v0 VC_CMD(INODE, 2, 0) + +#define VCMD_get_iattr VC_CMD(INODE, 1, 1) +#define VCMD_set_iattr VC_CMD(INODE, 2, 1) + +struct vcmd_ctx_iattr_v0 { + /* device handle in id */ + uint64_t ino; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + +struct vcmd_ctx_iattr_v1 { + const char __user *name; + uint32_t xid; + uint32_t flags; + uint32_t mask; +}; + + +#define IATTR_XID 0x01000000 + +#define IATTR_ADMIN 0x00000001 +#define IATTR_WATCH 0x00000002 +#define IATTR_HIDE 0x00000004 +#define IATTR_FLAGS 0x00000007 + +#define IATTR_BARRIER 0x00010000 +#define IATTR_IUNLINK 0x00020000 +#define IATTR_IMMUTABLE 0x00040000 + + +#ifdef CONFIG_PROC_SECURE +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN | IATTR_HIDE ) +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN ) +#else +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN ) +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN ) +#endif + +#ifdef __KERNEL__ + +#define vx_hide_check(c,m) (((m) & IATTR_HIDE) ? vx_check(c,m) : 1) + +extern int vc_get_iattr_v0(uint32_t, void __user *); +extern int vc_set_iattr_v0(uint32_t, void __user *); + +extern int vc_get_iattr(uint32_t, void __user *); +extern int vc_set_iattr(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +/* inode ioctls */ + +#define FIOC_GETXFLG _IOR('x', 5, long) +#define FIOC_SETXFLG _IOW('x', 6, long) + +#endif /* _VX_INODE_H */ diff --git a/include/linux/vserver/legacy.h b/include/linux/vserver/legacy.h new file mode 100644 index 000000000..1372c0fa6 --- /dev/null +++ b/include/linux/vserver/legacy.h @@ -0,0 +1,54 @@ +#ifndef _VX_LEGACY_H +#define _VX_LEGACY_H + +#include "switch.h" +#include "network.h" + +/* compatibiliy vserver commands */ + +#define VCMD_new_s_context VC_CMD(COMPAT, 1, 1) +#define VCMD_set_ipv4root VC_CMD(COMPAT, 2, 3) + +#define VCMD_create_context VC_CMD(VSETUP, 1, 0) + +/* compatibiliy vserver arguments */ + +struct vcmd_new_s_context_v1 { + uint32_t remove_cap; + uint32_t flags; +}; + +struct vcmd_set_ipv4root_v3 { + /* number of pairs in id */ + uint32_t broadcast; + struct { + uint32_t ip; + uint32_t mask; + } nx_mask_pair[NB_IPV4ROOT]; +}; + + +#define VX_INFO_LOCK 1 /* Can't request a new vx_id */ +#define VX_INFO_NPROC 4 /* Limit number of processes in a context */ +#define VX_INFO_PRIVATE 8 /* Noone can join this security context */ +#define VX_INFO_INIT 16 /* This process wants to become the */ + /* logical process 1 of the security */ + /* context */ +#define VX_INFO_HIDEINFO 32 /* Hide some information in /proc */ +#define VX_INFO_ULIMIT 64 /* Use ulimit of the current process */ + /* to become the global limits */ + /* of the context */ +#define VX_INFO_NAMESPACE 128 /* save private namespace */ + + +#define NB_S_CONTEXT 16 + +#define NB_IPV4ROOT 16 + + +#ifdef __KERNEL__ +extern int vc_new_s_context(uint32_t, void __user *); +extern int vc_set_ipv4root(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_LEGACY_H */ diff --git a/include/linux/vserver/limit.h b/include/linux/vserver/limit.h new file mode 100644 index 000000000..27496c1f2 --- /dev/null +++ b/include/linux/vserver/limit.h @@ -0,0 +1,117 @@ +#if defined(__KERNEL__) && defined(_VX_INFO_DEF_) + +#include +#include + +/* context sub struct */ + +#define RLIMIT_OPENFD 12 + +#define NUM_RLIMITS 16 + +#define VLIMIT_SOCK 16 + + +struct _vx_limit { + atomic_t ticks; + + unsigned long rlim[NUM_RLIMITS]; /* Per context limit */ + atomic_t res[NUM_RLIMITS]; /* Current value */ +}; + +static inline void vx_info_init_limit(struct _vx_limit *limit) +{ + int lim; + + for (lim=0; limrlim[lim] = RLIM_INFINITY; + atomic_set(&limit->res[lim], 0); + } +} + +extern unsigned int vx_debug_limit; + +static inline void vx_info_exit_limit(struct _vx_limit *limit) +{ + int lim, value; + + for (lim=0; limres[lim]); + if (value && vx_debug_limit) + printk("!!! limit: %p[%d] = %d on exit.\n", + limit, lim, value); + } +} + + +static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer) +{ + return sprintf(buffer, + "PROC:\t%8d/%ld\n" + "VM:\t%8d/%ld\n" + "VML:\t%8d/%ld\n" + "RSS:\t%8d/%ld\n" + "FILES:\t%8d/%ld\n" + "OFD:\t%8d/%ld\n" + ,atomic_read(&limit->res[RLIMIT_NPROC]) + ,limit->rlim[RLIMIT_NPROC] + ,atomic_read(&limit->res[RLIMIT_AS]) + ,limit->rlim[RLIMIT_AS] + ,atomic_read(&limit->res[RLIMIT_MEMLOCK]) + ,limit->rlim[RLIMIT_MEMLOCK] + ,atomic_read(&limit->res[RLIMIT_RSS]) + ,limit->rlim[RLIMIT_RSS] + ,atomic_read(&limit->res[RLIMIT_NOFILE]) + ,limit->rlim[RLIMIT_NOFILE] + ,atomic_read(&limit->res[RLIMIT_OPENFD]) + ,limit->rlim[RLIMIT_OPENFD] + ); +} + +#else /* _VX_INFO_DEF_ */ +#ifndef _VX_LIMIT_H +#define _VX_LIMIT_H + +#include "switch.h" + +/* rlimit vserver commands */ + +#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0) +#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0) +#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0) + +struct vcmd_ctx_rlimit_v0 { + uint32_t id; + uint64_t minimum; + uint64_t softlimit; + uint64_t maximum; +}; + +struct vcmd_ctx_rlimit_mask_v0 { + uint32_t minimum; + uint32_t softlimit; + uint32_t maximum; +}; + +#define CRLIM_UNSET (0ULL) +#define CRLIM_INFINITY (~0ULL) +#define CRLIM_KEEP (~1ULL) + +#ifdef __KERNEL__ + +#include + +extern int vc_get_rlimit(uint32_t, void __user *); +extern int vc_set_rlimit(uint32_t, void __user *); +extern int vc_get_rlimit_mask(uint32_t, void __user *); + +struct sysinfo; + +void vx_vsi_meminfo(struct sysinfo *); +void vx_vsi_swapinfo(struct sysinfo *); + + +#endif /* __KERNEL__ */ + +#endif /* _VX_LIMIT_H */ +#endif diff --git a/include/linux/vserver/namespace.h b/include/linux/vserver/namespace.h new file mode 100644 index 000000000..140fc79f2 --- /dev/null +++ b/include/linux/vserver/namespace.h @@ -0,0 +1,55 @@ +#ifndef _VX_NAMESPACE_H +#define _VX_NAMESPACE_H + +#include + + +/* virtual host info names */ + +#define VCMD_vx_set_vhi_name VC_CMD(VHOST, 1, 0) +#define VCMD_vx_get_vhi_name VC_CMD(VHOST, 2, 0) + +struct vcmd_vx_vhi_name_v0 { + uint32_t field; + char name[65]; +}; + + +enum vx_vhi_name_field { + VHIN_CONTEXT=0, + VHIN_SYSNAME, + VHIN_NODENAME, + VHIN_RELEASE, + VHIN_VERSION, + VHIN_MACHINE, + VHIN_DOMAINNAME, +}; + + +#ifdef __KERNEL__ + +#include + +extern int vc_set_vhi_name(uint32_t, void __user *); +extern int vc_get_vhi_name(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_enter_namespace VC_CMD(PROCALT, 1, 0) +#define VCMD_cleanup_namespace VC_CMD(PROCALT, 2, 0) +#define VCMD_set_namespace VC_CMD(PROCALT, 3, 0) + +#ifdef __KERNEL__ + +struct vx_info; +struct namespace; +struct fs_struct; + +extern int vx_set_namespace(struct vx_info *, struct namespace *, struct fs_struct *); + +extern int vc_enter_namespace(uint32_t, void __user *); +extern int vc_cleanup_namespace(uint32_t, void __user *); +extern int vc_set_namespace(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_NAMESPACE_H */ diff --git a/include/linux/vserver/network.h b/include/linux/vserver/network.h new file mode 100644 index 000000000..b3c39b062 --- /dev/null +++ b/include/linux/vserver/network.h @@ -0,0 +1,142 @@ +#ifndef _VX_NETWORK_H +#define _VX_NETWORK_H + +#define MAX_N_CONTEXT 65535 /* Arbitrary limit */ + +#define IP_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */ + +#define NB_IPV4ROOT 16 + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include + + +struct nx_info { + struct list_head nx_list; /* linked list of nxinfos */ + nid_t nx_id; /* vnet id */ + atomic_t nx_refcount; + + uint64_t nx_flags; /* network flag word */ + uint64_t nx_ncaps; /* network capabilities */ + + int nbipv4; + __u32 ipv4[NB_IPV4ROOT]; /* Process can only bind to these IPs */ + /* The first one is used to connect */ + /* and for bind any service */ + /* The other must be used explicity */ + __u32 mask[NB_IPV4ROOT]; /* Netmask for each ipv4 */ + /* Used to select the proper source */ + /* address for sockets */ + __u32 v4_bcast; /* Broadcast address to receive UDP */ + + char nx_name[65]; /* network context name */ +}; + + +extern spinlock_t nxlist_lock; +extern struct list_head nx_infos; + + +void free_nx_info(struct nx_info *); +struct nx_info *create_nx_info(void); + +extern struct nx_info *find_nx_info(int); +extern int nx_info_id_valid(int); + +struct in_ifaddr; +struct net_device; + +int ifa_in_nx_info(struct in_ifaddr *, struct nx_info *); +int dev_in_nx_info(struct net_device *, struct nx_info *); + + +#endif /* __KERNEL__ */ + +#include "switch.h" + +/* vinfo commands */ + +#define VCMD_task_nid VC_CMD(VINFO, 2, 0) + +#ifdef __KERNEL__ +extern int vc_task_nid(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_nx_info VC_CMD(VINFO, 6, 0) + +struct vcmd_nx_info_v0 { + uint32_t nid; + /* more to come */ +}; + +#ifdef __KERNEL__ +extern int vc_nx_info(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_net_create VC_CMD(VNET, 1, 0) +#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0) + +#define VCMD_net_add VC_CMD(NETALT, 1, 0) +#define VCMD_net_remove VC_CMD(NETALT, 2, 0) + +struct vcmd_net_nx_v0 { + uint16_t type; + uint16_t count; + uint32_t ip[4]; + uint32_t mask[4]; + /* more to come */ +}; + +// IPN_TYPE_IPV4 + + +#ifdef __KERNEL__ +extern int vc_net_create(uint32_t, void __user *); +extern int vc_net_migrate(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0) +#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0) + +struct vcmd_net_flags_v0 { + uint64_t flagword; + uint64_t mask; +}; + +#ifdef __KERNEL__ +extern int vc_get_nflags(uint32_t, void __user *); +extern int vc_set_nflags(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define IPF_STATE_SETUP (1ULL<<32) + + +#define IPF_ONE_TIME (0x0001ULL<<32) + +#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0) +#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0) + +struct vcmd_net_caps_v0 { + uint64_t ncaps; + uint64_t cmask; +}; + +#ifdef __KERNEL__ +extern int vc_get_ncaps(uint32_t, void __user *); +extern int vc_set_ncaps(uint32_t, void __user *); + +#endif /* __KERNEL__ */ + +#define IPC_WOSSNAME 0x00000001 + + +#endif /* _VX_NETWORK_H */ diff --git a/include/linux/vserver/sched.h b/include/linux/vserver/sched.h new file mode 100644 index 000000000..d1a206800 --- /dev/null +++ b/include/linux/vserver/sched.h @@ -0,0 +1,139 @@ +#if defined(__KERNEL__) && defined(_VX_INFO_DEF_) + +#include +#include +#include +#include +#include + +/* context sub struct */ + +struct _vx_sched { + spinlock_t tokens_lock; /* lock for this structure */ + + int fill_rate; /* Fill rate: add X tokens... */ + int interval; /* Divisor: per Y jiffies */ + atomic_t tokens; /* number of CPU tokens in this context */ + int tokens_min; /* Limit: minimum for unhold */ + int tokens_max; /* Limit: no more than N tokens */ + uint32_t jiffies; /* add an integral multiple of Y to this */ + + uint64_t ticks; /* token tick events */ + cpumask_t cpus_allowed; /* cpu mask for context */ +}; + +static inline void vx_info_init_sched(struct _vx_sched *sched) +{ + /* scheduling; hard code starting values as constants */ + sched->fill_rate = 1; + sched->interval = 4; + sched->tokens_min = HZ >> 4; + sched->tokens_max = HZ >> 1; + sched->jiffies = jiffies; + sched->tokens_lock = SPIN_LOCK_UNLOCKED; + + atomic_set(&sched->tokens, HZ >> 2); + sched->cpus_allowed = CPU_MASK_ALL; +} + +static inline void vx_info_exit_sched(struct _vx_sched *sched) +{ + return; +} + +static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer) +{ + return sprintf(buffer, + "Ticks:\t%16lld\n" + "Token:\t\t%8d\n" + "FillRate:\t%8d\n" + "Interval:\t%8d\n" + "TokensMin:\t%8d\n" + "TokensMax:\t%8d\n" + ,sched->ticks + ,atomic_read(&sched->tokens) + ,sched->fill_rate + ,sched->interval + ,sched->tokens_min + ,sched->tokens_max + ); +} + + +#else /* _VX_INFO_DEF_ */ +#ifndef _VX_SCHED_H +#define _VX_SCHED_H + +#include "switch.h" + +/* sched vserver commands */ + +#define VCMD_set_sched VC_CMD(SCHED, 1, 2) + +struct vcmd_set_sched_v2 { + int32_t fill_rate; + int32_t interval; + int32_t tokens; + int32_t tokens_min; + int32_t tokens_max; + uint64_t cpu_mask; +}; + +#define SCHED_KEEP (-2) + +#ifdef __KERNEL__ + +extern int vc_set_sched_v1(uint32_t, void __user *); +extern int vc_set_sched(uint32_t, void __user *); + + +#define VAVAVOOM_RATIO 50 + +#include "context.h" + + +/* scheduling stuff */ + +int effective_vavavoom(struct task_struct *, int); + +int vx_tokens_recalc(struct vx_info *); + +/* new stuff ;) */ + +static inline int vx_tokens_avail(struct vx_info *vxi) +{ + return atomic_read(&vxi->sched.tokens); +} + +static inline void vx_consume_token(struct vx_info *vxi) +{ + atomic_dec(&vxi->sched.tokens); +} + +static inline int vx_need_resched(struct task_struct *p) +{ +#ifdef CONFIG_VSERVER_HARDCPU + struct vx_info *vxi = p->vx_info; + + if (vxi) { + int tokens; + + p->time_slice--; + if (atomic_read(&vxi->vx_refcount) < 1) + printk("need_resched: p=%p, s=%ld, ref=%d, id=%d/%d\n", + p, p->state, atomic_read(&vxi->vx_refcount), + vxi->vx_id, p->xid); + if ((tokens = vx_tokens_avail(vxi)) > 0) + vx_consume_token(vxi); + return ((p->time_slice == 0) || (tokens < 1)); + } +#endif + p->time_slice--; + return (p->time_slice == 0); +} + + +#endif /* __KERNEL__ */ + +#endif /* _VX_SCHED_H */ +#endif diff --git a/include/linux/vserver/signal.h b/include/linux/vserver/signal.h new file mode 100644 index 000000000..391112768 --- /dev/null +++ b/include/linux/vserver/signal.h @@ -0,0 +1,19 @@ +#ifndef _VX_SIGNAL_H +#define _VX_SIGNAL_H + +#include "switch.h" + +/* context signalling */ + +#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0) + +struct vcmd_ctx_kill_v0 { + int32_t pid; + int32_t sig; +}; + +#ifdef __KERNEL__ +extern int vc_ctx_kill(uint32_t, void __user *); + +#endif /* __KERNEL__ */ +#endif /* _VX_SIGNAL_H */ diff --git a/include/linux/vserver/switch.h b/include/linux/vserver/switch.h new file mode 100644 index 000000000..5fef6907b --- /dev/null +++ b/include/linux/vserver/switch.h @@ -0,0 +1,95 @@ +#ifndef _VX_SWITCH_H +#define _VX_SWITCH_H + +#include + +#define VC_CATEGORY(c) (((c) >> 24) & 0x3F) +#define VC_COMMAND(c) (((c) >> 16) & 0xFF) +#define VC_VERSION(c) ((c) & 0xFFF) + +#define VC_CMD(c,i,v) ((((VC_CAT_ ## c) & 0x3F) << 24) \ + | (((i) & 0xFF) << 16) | ((v) & 0xFFF)) + +/* + + Syscall Matrix V2.6 + + |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL| + |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | | + |INFO |SETUP | |MOVE | | | | | | + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SYSTEM |VERSION|VSETUP |VHOST | | | | |DEVICES| | + HOST | 00| 01| 02| 03| 04| 05| | 06| 07| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + CPU | |VPROC |PROCALT|PROCMIG|PROCTRL| | |SCHED. | | + PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + MEMORY | | | | | | | |SWAP | | + | 16| 17| 18| 19| 20| 21| | 22| 23| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + NETWORK| |VNET |NETALT |NETMIG |NETCTL | | |SERIAL | | + | 24| 25| 26| 27| 28| 29| | 30| 31| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + DISK | | | | | | | |INODE | | + VFS | 32| 33| 34| 35| 36| 37| | 38| 39| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + OTHER | | | | | | | |VINFO | | + | 40| 41| 42| 43| 44| 45| | 46| 47| + =======+=======+=======+=======+=======+=======+=======+ +=======+=======+ + SPECIAL| | | | |FLAGS | | | | | + | 48| 49| 50| 51| 52| 53| | 54| 55| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + SPECIAL| | | | |RLIMIT |SYSCALL| | |COMPAT | + | 56| 57| 58| 59| 60|TEST 61| | 62| 63| + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+ + +*/ + +#define VC_CAT_VERSION 0 + +#define VC_CAT_VSETUP 1 +#define VC_CAT_VHOST 2 + +#define VC_CAT_VPROC 9 +#define VC_CAT_PROCALT 10 +#define VC_CAT_PROCMIG 11 +#define VC_CAT_PROCTRL 12 + +#define VC_CAT_SCHED 14 + +#define VC_CAT_VNET 25 +#define VC_CAT_NETALT 26 +#define VC_CAT_NETMIG 27 +#define VC_CAT_NETCTRL 28 + +#define VC_CAT_INODE 38 + +#define VC_CAT_VINFO 46 + +#define VC_CAT_FLAGS 52 +#define VC_CAT_RLIMIT 60 + +#define VC_CAT_SYSTEST 61 +#define VC_CAT_COMPAT 63 + +/* interface version */ + +#define VCI_VERSION 0x00010016 + + +/* query version */ + +#define VCMD_get_version VC_CMD(VERSION, 0, 0) + + +#ifdef __KERNEL__ + +#include + +#define ENOTSUP -EOPNOTSUPP + +#else /* __KERNEL__ */ +#define __user +#endif /* __KERNEL__ */ + +#endif /* _VX_SWITCH_H */ diff --git a/include/linux/vserver/xid.h b/include/linux/vserver/xid.h new file mode 100644 index 000000000..ba52c2588 --- /dev/null +++ b/include/linux/vserver/xid.h @@ -0,0 +1,94 @@ +#ifndef _LINUX_XID_H_ +#define _LINUX_XID_H_ + +#ifdef CONFIG_INOXID_NONE + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(uid, gid, xid) (0) + +#define XIDINO_UID(uid, xid) (uid) +#define XIDINO_GID(gid, xid) (gid) + +#endif + + +#ifdef CONFIG_INOXID_GID16 + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0x0000FFFF + +#define INOXID_XID(uid, gid, xid) (((gid) >> 16) & 0xFFFF) + +#define XIDINO_UID(uid, xid) (uid) +#define XIDINO_GID(gid, xid) (((gid) & 0xFFFF) | ((xid) << 16)) + + +#endif + + +#ifdef CONFIG_INOXID_GID24 + +#define MAX_UID 0x00FFFFFF +#define MAX_GID 0x00FFFFFF + +#define INOXID_XID(uid, gid, xid) ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) + +#define XIDINO_UID(uid, xid) (((uid) & 0xFFFFFF) | (((xid) & 0xFF00) << 16)) +#define XIDINO_GID(gid, xid) (((gid) & 0xFFFFFF) | (((xid) & 0x00FF) << 24)) + +#endif + + +#ifdef CONFIG_INOXID_GID32 + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(uid, gid, xid) (xid) + +#define XIDINO_UID(uid, xid) (uid) +#define XIDINO_GID(gid, xid) (gid) + +#endif + + +#ifdef CONFIG_INOXID_RUNTIME + +#define MAX_UID 0xFFFFFFFF +#define MAX_GID 0xFFFFFFFF + +#define INOXID_XID(uid, gid, xid) (0) + +#define XIDINO_UID(uid, xid) (uid) +#define XIDINO_GID(gid, xid) (gid) + +#endif + + +#define INOXID_UID(uid, gid) ((uid) & MAX_UID) +#define INOXID_GID(uid, gid) ((gid) & MAX_GID) + +static inline uid_t vx_map_uid(uid_t uid) +{ + if ((uid > MAX_UID) && (uid != -1)) + uid = -2; + return (uid & MAX_UID); +} + +static inline gid_t vx_map_gid(gid_t gid) +{ + if ((gid > MAX_GID) && (gid != -1)) + gid = -2; + return (gid & MAX_GID); +} + + +#ifdef CONFIG_VSERVER_LEGACY +#define FIOC_GETXID _IOR('x', 1, long) +#define FIOC_SETXID _IOW('x', 2, long) +#define FIOC_SETXIDJ _IOW('x', 3, long) +#endif + +#endif /* _LINUX_XID_H_ */ diff --git a/kernel/vserver/Kconfig b/kernel/vserver/Kconfig new file mode 100644 index 000000000..635d8d488 --- /dev/null +++ b/kernel/vserver/Kconfig @@ -0,0 +1,72 @@ +# +# Linux VServer configuration +# + +menu "Linux VServer" + +config VSERVER_LEGACY + bool "Enable Legacy Kernel API" + default y + help + This enables the legacy API used in vs1.xx, which allows + to use older tools (for migration purposes). + +config PROC_SECURE + bool "Enable Proc Security" + depends on PROC_FS + default y + help + Hide proc entries by default for xid>1 + +config VSERVER_HARDCPU + bool "Enable Hard CPU Limits" + depends on EXPERIMENTAL + default n + help + Activate the Hard CPU Limits + +choice + prompt "Persistent Inode Context Tagging" + default INOXID_GID24 + help + This adds persistent context information to filesystems + mounted with the tagxid option. Tagging is a requirement + for per context disk limits and per context quota. + + +config INOXID_NONE + bool "Disabled" + help + no context information is store for inodes + +config INOXID_GID16 + bool "UID32/GID16" + help + reduces GID to 16 bit, but leaves UID at 32 bit. + +config INOXID_GID24 + bool "UID24/GID24" + help + uses the upper 8bit from UID and GID for XID tagging + which leaves 24bit for UID/GID each, which should be + more than sufficient for normal use. + +config INOXID_GID32 + bool "UID32/GID32" + help + this uses otherwise reserved inode fields in the on + disk representation, which limits the use to a few + filesystems (currently ext2 and ext3) + +config INOXID_MAGIC + bool "Runtime" + depends on EXPERIMENTAL + help + inodes are tagged when first accessed, this doesn't + require any persistant information, but might give + funny results for mixed access. + +endchoice + +endmenu + diff --git a/kernel/vserver/Makefile b/kernel/vserver/Makefile new file mode 100644 index 000000000..c035a77cd --- /dev/null +++ b/kernel/vserver/Makefile @@ -0,0 +1,12 @@ +# +# Makefile for the Linux vserver routines. +# + + +obj-y += vserver.o + +vserver-y := switch.o context.o namespace.o sched.o network.o inode.o \ + limit.o cvirt.o signal.o proc.o sysctl.o init.o + +vserver-$(CONFIG_VSERVER_LEGACY) += legacy.o + diff --git a/kernel/vserver/context.c b/kernel/vserver/context.c new file mode 100644 index 000000000..538834c57 --- /dev/null +++ b/kernel/vserver/context.c @@ -0,0 +1,558 @@ +/* + * linux/kernel/vserver/context.c + * + * Virtual Server: Context Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 context helper + * V0.02 vx_ctx_kill syscall command + * V0.03 replaced context_info calls + * V0.04 redesign of struct (de)alloc + * V0.05 rlimit basic implementation + * V0.06 task_xid and info commands + * V0.07 context flags and caps + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + + +/* system functions */ + + +LIST_HEAD(vx_infos); + +spinlock_t vxlist_lock + __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; + + +/* + * struct vx_info allocation and deallocation + */ + +static struct vx_info *alloc_vx_info(int id) +{ + struct vx_info *new = NULL; + + vxdprintk("alloc_vx_info(%d)\n", id); + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct vx_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct vx_info)); + new->vx_id = id; + INIT_LIST_HEAD(&new->vx_list); + /* rest of init goes here */ + + vx_info_init_limit(&new->limit); + vx_info_init_sched(&new->sched); + vx_info_init_cvirt(&new->cvirt); + vx_info_init_cacct(&new->cacct); + + new->vx_flags = VXF_STATE_SETUP|VXF_STATE_INIT; + new->vx_bcaps = CAP_INIT_EFF_SET; + new->vx_ccaps = 0; + + vxdprintk("alloc_vx_info(%d) = %p\n", id, new); + return new; +} + +void free_vx_info(struct vx_info *vxi) +{ + vxdprintk("free_vx_info(%p)\n", vxi); + if (vxi->vx_namespace) + put_namespace(vxi->vx_namespace); + if (vxi->vx_fs) + put_fs_struct(vxi->vx_fs); + + vx_info_exit_limit(&vxi->limit); + vx_info_exit_sched(&vxi->sched); + vx_info_exit_cvirt(&vxi->cvirt); + vx_info_exit_cacct(&vxi->cacct); + + BUG_ON(atomic_read(&vxi->vx_refcount)); + vxi->vx_id = -1; + + kfree(vxi); +} + + +/* + * struct vx_info search by id + * assumes vxlist_lock is held + */ + +static __inline__ struct vx_info *__find_vx_info(int id) +{ + struct vx_info *vxi; + + list_for_each_entry(vxi, &vx_infos, vx_list) + if (vxi->vx_id == id) + return vxi; + return 0; +} + + +/* + * struct vx_info ref stuff + */ + +struct vx_info *find_vx_info(int id) +{ + struct vx_info *vxi; + + if (id < 0) { + vxi = current->vx_info; + get_vx_info(vxi); + } else { + spin_lock(&vxlist_lock); + if ((vxi = __find_vx_info(id))) + get_vx_info(vxi); + spin_unlock(&vxlist_lock); + } + return vxi; +} + +/* + * verify that id is a valid xid + */ + +int vx_info_id_valid(int id) +{ + int valid; + + spin_lock(&vxlist_lock); + valid = (__find_vx_info(id) != NULL); + spin_unlock(&vxlist_lock); + return valid; +} + + +/* + * dynamic context id ... + */ + +static __inline__ xid_t __vx_dynamic_id(void) +{ + static xid_t seq = MAX_S_CONTEXT; + xid_t barrier = seq; + + do { + if (++seq > MAX_S_CONTEXT) + seq = MIN_D_CONTEXT; + if (!__find_vx_info(seq)) + return seq; + } while (barrier != seq); + return 0; +} + +static struct vx_info * __foc_vx_info(int id, int *err) +{ + struct vx_info *new, *vxi = NULL; + + vxdprintk("foc_vx_info(%d)\n", id); + if (!(new = alloc_vx_info(id))) { + *err = -ENOMEM; + return NULL; + } + + /* dirty hack until Spectator becomes a cap */ + if (id == 0 || id == 1) { + *err = -EBUSY; + return NULL; + } + + spin_lock(&vxlist_lock); + + /* dynamic context requested */ + if (id == VX_DYNAMIC_ID) { + id = __vx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + goto out_unlock; + } + new->vx_id = id; + } + /* existing context requested */ + else if ((vxi = __find_vx_info(id))) { + /* context in setup is not available */ + if (vxi->vx_flags & VXF_STATE_SETUP) { + vxdprintk("foc_vx_info(%d) = %p (not available)\n", id, vxi); + vxi = NULL; + *err = -EBUSY; + } else { + vxdprintk("foc_vx_info(%d) = %p (found)\n", id, vxi); + get_vx_info(vxi); + *err = 0; + } + goto out_unlock; + } + + /* new context requested */ + vxdprintk("foc_vx_info(%d) = %p (new)\n", id, new); + atomic_set(&new->vx_refcount, 1); + list_add(&new->vx_list, &vx_infos); + vxi = new, new = NULL; + *err = 1; + +out_unlock: + spin_unlock(&vxlist_lock); + if (new) + free_vx_info(new); + return vxi; +} + + +struct vx_info *find_or_create_vx_info(int id) +{ + int err; + + return __foc_vx_info(id, &err); +} + + +int vx_migrate_user(struct task_struct *p, struct vx_info *vxi) +{ + struct user_struct *new_user, *old_user; + + if (!p || !vxi) + BUG(); + new_user = alloc_uid(vxi->vx_id, p->uid); + if (!new_user) + return -ENOMEM; + + old_user = p->user; + if (new_user != old_user) { + atomic_inc(&new_user->processes); + atomic_dec(&old_user->processes); + p->user = new_user; + } + free_uid(old_user); + return 0; +} + +void vx_mask_bcaps(struct task_struct *p) +{ + struct vx_info *vxi = p->vx_info; + + p->cap_effective &= vxi->vx_bcaps; + p->cap_inheritable &= vxi->vx_bcaps; + p->cap_permitted &= vxi->vx_bcaps; +} + + +#include + +static inline int vx_nofiles_task(struct task_struct *tsk) +{ + struct files_struct *files = tsk->files; + const unsigned long *obptr, *cbptr; + int count, total; + + spin_lock(&files->file_lock); + obptr = files->open_fds->fds_bits; + cbptr = files->close_on_exec->fds_bits; + count = files->max_fds / (sizeof(unsigned long) * 8); + for (total = 0; count > 0; count--) { + if (*obptr) + total += hweight_long(*obptr); + obptr++; + /* if (*cbptr) + total += hweight_long(*cbptr); + cbptr++; */ + } + spin_unlock(&files->file_lock); + return total; +} + +static inline int vx_openfd_task(struct task_struct *tsk) +{ + struct files_struct *files = tsk->files; + const unsigned long *bptr; + int count, total; + + spin_lock(&files->file_lock); + bptr = files->open_fds->fds_bits; + count = files->max_fds / (sizeof(unsigned long) * 8); + for (total = 0; count > 0; count--) { + if (*bptr) + total += hweight_long(*bptr); + bptr++; + } + spin_unlock(&files->file_lock); + return total; +} + +/* + * migrate task to new context + * gets vxi, puts old_vxi on change + */ + +int vx_migrate_task(struct task_struct *p, struct vx_info *vxi) +{ + struct vx_info *old_vxi = task_get_vx_info(p); + int ret = 0; + + if (!p || !vxi) + BUG(); + + vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi, + vxi->vx_id, atomic_read(&vxi->vx_refcount)); + if (old_vxi == vxi) + goto out; + + if (!(ret = vx_migrate_user(p, vxi))) { + task_lock(p); + if (old_vxi) { + atomic_dec(&old_vxi->cacct.nr_threads); + atomic_dec(&old_vxi->limit.res[RLIMIT_NPROC]); + } + atomic_inc(&vxi->cacct.nr_threads); + atomic_inc(&vxi->limit.res[RLIMIT_NPROC]); + atomic_add(vx_nofiles_task(p), &vxi->limit.res[RLIMIT_NOFILE]); + atomic_add(vx_openfd_task(p), &vxi->limit.res[RLIMIT_OPENFD]); + set_vx_info(&p->vx_info, vxi); + p->xid = vxi->vx_id; + vx_mask_bcaps(p); + task_unlock(p); + + put_vx_info(old_vxi); + } +out: + put_vx_info(old_vxi); + return ret; +} + +int vx_set_init(struct vx_info *vxi, struct task_struct *p) +{ + if (!vxi) + return -EINVAL; + if (vxi->vx_initpid) + return -EPERM; + + vxi->vx_initpid = p->tgid; + return 0; +} + + +/* vserver syscall commands below here */ + +/* taks xid and vx_info functions */ + +#include + + +int vc_task_xid(uint32_t id, void __user *data) +{ + xid_t xid; + + if (id) { + struct task_struct *tsk; + + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + read_lock(&tasklist_lock); + tsk = find_task_by_pid(id); + xid = (tsk) ? tsk->xid : -ESRCH; + read_unlock(&tasklist_lock); + } + else + xid = current->xid; + return xid; +} + + +int vc_vx_info(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_vx_info_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.xid = vxi->vx_id; + vc_data.initpid = vxi->vx_initpid; + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +/* context functions */ + +int vc_ctx_create(uint32_t xid, void __user *data) +{ + // int ret = -ENOMEM; + struct vx_info *new_vxi; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((xid >= MIN_D_CONTEXT) && (xid != VX_DYNAMIC_ID)) + return -EINVAL; + + if (xid < 1) + return -EINVAL; + + new_vxi = __foc_vx_info(xid, &ret); + if (!new_vxi) + return ret; + if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) { + ret = -EEXIST; + goto out_put; + } + + ret = new_vxi->vx_id; + vx_migrate_task(current, new_vxi); +out_put: + put_vx_info(new_vxi); + return ret; +} + + +int vc_ctx_migrate(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* dirty hack until Spectator becomes a cap */ + if (id == 1) { + current->xid = 1; + return 0; + } + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + vx_migrate_task(current, vxi); + put_vx_info(vxi); + return 0; +} + + +int vc_get_cflags(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_flags_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.flagword = vxi->vx_flags; + + // vc_data.mask = ~0UL; + /* special STATE flag handling */ + vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME); + + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_cflags(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_flags_v0 vc_data; + uint64_t mask, trigger; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + /* special STATE flag handling */ + mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME); + trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword); + + if (trigger & VXF_STATE_SETUP) + vx_mask_bcaps(current); + if (trigger & VXF_STATE_INIT) + if (vxi == current->vx_info) + vx_set_init(vxi, current); + + vxi->vx_flags = vx_mask_flags(vxi->vx_flags, + vc_data.flagword, mask); + put_vx_info(vxi); + return 0; +} + +int vc_get_ccaps(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.bcaps = vxi->vx_bcaps; + vc_data.ccaps = vxi->vx_ccaps; + vc_data.cmask = ~0UL; + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_ccaps(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + vxi->vx_bcaps &= vc_data.bcaps; + vxi->vx_ccaps = vx_mask_flags(vxi->vx_ccaps, + vc_data.ccaps, vc_data.cmask); + put_vx_info(vxi); + return 0; +} + +#include + +EXPORT_SYMBOL_GPL(free_vx_info); +EXPORT_SYMBOL_GPL(vxlist_lock); + diff --git a/kernel/vserver/cvirt.c b/kernel/vserver/cvirt.c new file mode 100644 index 000000000..2b5c81e35 --- /dev/null +++ b/kernel/vserver/cvirt.c @@ -0,0 +1,41 @@ +/* + * linux/kernel/vserver/cvirt.c + * + * Virtual Server: Context Virtualization + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 broken out from limit.c + * + */ + +#include +#include +#include +#include +#include + +#include +#include + + +void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle) +{ + struct vx_info *vxi = current->vx_info; + + set_normalized_timespec(uptime, + uptime->tv_sec - vxi->cvirt.bias_tp.tv_sec, + uptime->tv_nsec - vxi->cvirt.bias_tp.tv_nsec); + if (!idle) + return; + set_normalized_timespec(idle, + idle->tv_sec - vxi->cvirt.bias_idle.tv_sec, + idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec); + return; +} + +uint64_t vx_idle_jiffies() +{ + return init_task.utime + init_task.stime; +} + diff --git a/kernel/vserver/init.c b/kernel/vserver/init.c new file mode 100644 index 000000000..8afd1fc64 --- /dev/null +++ b/kernel/vserver/init.c @@ -0,0 +1,42 @@ +/* + * linux/kernel/init.c + * + * Virtual Server Init + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 basic structure + * + */ + +#include +#include +#include +// #include +#include +#include + +int vserver_register_sysctl(void); +void vserver_unregister_sysctl(void); + + +static int __init init_vserver(void) +{ + int ret = 0; + + vserver_register_sysctl(); + return ret; +} + + +static void __exit exit_vserver(void) +{ + + vserver_unregister_sysctl(); + return; +} + + +module_init(init_vserver); +module_exit(exit_vserver); + diff --git a/kernel/vserver/inode.c b/kernel/vserver/inode.c new file mode 100644 index 000000000..87e2849f3 --- /dev/null +++ b/kernel/vserver/inode.c @@ -0,0 +1,220 @@ +/* + * linux/kernel/vserver/inode.c + * + * Virtual Server: File System Support + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 separated from vcontext V0.05 + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + + +static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint32_t *mask) +{ + if (!in || !in->i_sb) + return -ESRCH; + + *flags = IATTR_XID + | (IS_BARRIER(in) ? IATTR_BARRIER : 0) + | (IS_IUNLINK(in) ? IATTR_IUNLINK : 0) + | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0); + *mask = IATTR_IUNLINK | IATTR_IMMUTABLE; + + if (S_ISDIR(in->i_mode)) + *mask |= IATTR_BARRIER; + + if (in->i_sb->s_flags & MS_TAGXID) { + *xid = in->i_xid; + *mask |= IATTR_XID; + } + + if (in->i_sb->s_magic == PROC_SUPER_MAGIC) { + struct proc_dir_entry *entry = PROC_I(in)->pde; + + // check for specific inodes ? + if (entry) + *mask |= IATTR_FLAGS; + if (entry) + *flags |= (entry->vx_flags & IATTR_FLAGS); + else + *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS); + } + return 0; +} + +int vc_get_iattr(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + ret = __vc_get_iattr(nd.dentry->d_inode, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + +static int __vc_set_iattr(struct dentry *de, uint32_t *xid, uint32_t *flags, uint32_t *mask) +{ + struct inode *in = de->d_inode; + int error = 0, is_proc = 0; + + if (!in || !in->i_sb) + return -ESRCH; + + is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC); + if ((*mask & IATTR_FLAGS) && !is_proc) + return -EINVAL; + if ((*mask & IATTR_XID) && !(in->i_sb->s_flags & MS_TAGXID)) + return -EINVAL; + + down(&in->i_sem); + if (*mask & IATTR_XID) + in->i_xid = *xid; + + if (*mask & IATTR_FLAGS) { + struct proc_dir_entry *entry = PROC_I(in)->pde; + unsigned int iflags = PROC_I(in)->vx_flags; + + iflags = (iflags & ~(*mask & IATTR_FLAGS)) + | (*flags & IATTR_FLAGS); + PROC_I(in)->vx_flags = iflags; + if (entry) + entry->vx_flags = iflags; + } + + if (*mask & (IATTR_BARRIER | IATTR_IUNLINK | IATTR_IMMUTABLE)) { + struct iattr attr; + + attr.ia_valid = ATTR_ATTR_FLAG; + attr.ia_attr_flags = + (IS_IMMUTABLE(in) ? ATTR_FLAG_IMMUTABLE : 0) | + (IS_IUNLINK(in) ? ATTR_FLAG_IUNLINK : 0) | + (IS_BARRIER(in) ? ATTR_FLAG_BARRIER : 0); + + if (*mask & IATTR_IMMUTABLE) { + if (*flags & IATTR_IMMUTABLE) + attr.ia_attr_flags |= ATTR_FLAG_IMMUTABLE; + else + attr.ia_attr_flags &= ~ATTR_FLAG_IMMUTABLE; + } + if (*mask & IATTR_IUNLINK) { + if (*flags & IATTR_IUNLINK) + attr.ia_attr_flags |= ATTR_FLAG_IUNLINK; + else + attr.ia_attr_flags &= ~ATTR_FLAG_IUNLINK; + } + if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) { + if (*flags & IATTR_BARRIER) + attr.ia_attr_flags |= ATTR_FLAG_BARRIER; + else + attr.ia_attr_flags &= ~ATTR_FLAG_BARRIER; + } + if (in->i_op && in->i_op->setattr) + error = in->i_op->setattr(de, &attr); + else { + error = inode_change_ok(in, &attr); + if (!error) + error = inode_setattr(in, &attr); + } + } + + mark_inode_dirty(in); + up(&in->i_sem); + return 0; +} + +int vc_set_iattr(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_iattr_v1 vc_data; + int ret; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + ret = __vc_set_iattr(nd.dentry, + &vc_data.xid, &vc_data.flags, &vc_data.mask); + path_release(&nd); + } + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + ret = -EFAULT; + return ret; +} + + +#ifdef CONFIG_VSERVER_LEGACY +#include + +#define PROC_DYNAMIC_FIRST 0xF0000000UL + +int vx_proc_ioctl(struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg) +{ + struct proc_dir_entry *entry; + int error = 0; + int flags; + + if (inode->i_ino < PROC_DYNAMIC_FIRST) + return -ENOTTY; + + entry = PROC_I(inode)->pde; + + switch(cmd) { + case FIOC_GETXFLG: { + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + flags = entry->vx_flags; + if (capable(CAP_CONTEXT)) + error = put_user(flags, (int *) arg); + break; + } + case FIOC_SETXFLG: { + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (!capable(CAP_CONTEXT)) + break; + error = -EROFS; + if (IS_RDONLY(inode)) + break; + error = -EFAULT; + if (get_user(flags, (int *) arg)) + break; + error = 0; + entry->vx_flags = flags; + break; + } + default: + return -ENOTTY; + } + return error; +} +#endif + diff --git a/kernel/vserver/legacy.c b/kernel/vserver/legacy.c new file mode 100644 index 000000000..a620ae3b5 --- /dev/null +++ b/kernel/vserver/legacy.c @@ -0,0 +1,161 @@ +/* + * linux/kernel/vserver/legacy.c + * + * Virtual Server: Legacy Funtions + * + * Copyright (C) 2001-2003 Jacques Gelinas + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 broken out from vcontext.c V0.05 + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + + +static int vx_set_initpid(struct vx_info *vxi, int pid) +{ + if (vxi->vx_initpid) + return -EPERM; + + vxi->vx_initpid = pid; + return 0; +} + +int vc_new_s_context(uint32_t ctx, void __user *data) +{ + int ret = -ENOMEM; + struct vcmd_new_s_context_v1 vc_data; + struct vx_info *new_vxi; + + if (copy_from_user(&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + /* legacy hack, will be removed soon */ + if (ctx == -2) { + /* assign flags and initpid */ + if (!current->vx_info) + return -EINVAL; + ret = 0; + if (vc_data.flags & VX_INFO_INIT) + ret = vx_set_initpid(current->vx_info, current->tgid); + if (ret == 0) { + /* We keep the same vx_id, but lower the capabilities */ + current->vx_info->vx_bcaps &= (~vc_data.remove_cap); + // current->cap_bset &= (~vc_data.remove_cap); + ret = vx_current_xid(); + current->vx_info->vx_flags |= vc_data.flags; + } + return ret; + } + + if (!vx_check(0, VX_ADMIN) || + !capable(CAP_SYS_ADMIN) || vx_flags(VX_INFO_LOCK, 0)) + return -EPERM; + + /* ugly hack for Spectator */ + if (ctx == 1) { + current->xid = 1; + return 0; + } + + if (((ctx > MAX_S_CONTEXT) && (ctx != VX_DYNAMIC_ID)) || + (ctx == 0)) + return -EINVAL; + + if ((ctx == VX_DYNAMIC_ID) || (ctx < MIN_D_CONTEXT)) + new_vxi = find_or_create_vx_info(ctx); + else + new_vxi = find_vx_info(ctx); + + if (!new_vxi) + return -EINVAL; + new_vxi->vx_flags &= ~(VXF_STATE_SETUP|VXF_STATE_INIT); + + ret = vx_migrate_task(current, new_vxi); + if (ret == 0) { + current->vx_info->vx_bcaps &= (~vc_data.remove_cap); + // current->cap_bset &= (~vc_data.remove_cap); + new_vxi->vx_flags |= vc_data.flags; + if (vc_data.flags & VX_INFO_INIT) + vx_set_initpid(new_vxi, current->tgid); + if (vc_data.flags & VX_INFO_NAMESPACE) + vx_set_namespace(new_vxi, + current->namespace, current->fs); + if (vc_data.flags & VX_INFO_NPROC) + new_vxi->limit.rlim[RLIMIT_NPROC] = + current->rlim[RLIMIT_NPROC].rlim_max; + ret = new_vxi->vx_id; + } + put_vx_info(new_vxi); + return ret; +} + + + +/* set ipv4 root (syscall) */ + +int vc_set_ipv4root(uint32_t nbip, void __user *data) +{ + int i, err = -EPERM; + struct vcmd_set_ipv4root_v3 vc_data; + struct nx_info *new_nxi, *nxi = current->nx_info; + + if (nbip < 0 || nbip > NB_IPV4ROOT) + return -EINVAL; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + if (!nxi || nxi->ipv4[0] == 0 || capable(CAP_NET_ADMIN)) + // We are allowed to change everything + err = 0; + else if (nxi) { + int found = 0; + + // We are allowed to select a subset of the currently + // installed IP numbers. No new one allowed + // We can't change the broadcast address though + for (i=0; inbipv4; j++) { + if (nxip == nxi->ipv4[j]) { + found++; + break; + } + } + } + if ((found == nbip) && + (vc_data.broadcast == nxi->v4_bcast)) + err = 0; + } + if (err) + return err; + + new_nxi = create_nx_info(); + if (!new_nxi) + return -EINVAL; + + new_nxi->nbipv4 = nbip; + for (i=0; iipv4[i] = vc_data.nx_mask_pair[i].ip; + new_nxi->mask[i] = vc_data.nx_mask_pair[i].mask; + } + new_nxi->v4_bcast = vc_data.broadcast; + current->nx_info = new_nxi; + current->nid = new_nxi->nx_id; + put_nx_info(nxi); + return 0; +} + + diff --git a/kernel/vserver/limit.c b/kernel/vserver/limit.c new file mode 100644 index 000000000..5bd2fdcb9 --- /dev/null +++ b/kernel/vserver/limit.c @@ -0,0 +1,149 @@ +/* + * linux/kernel/vserver/limit.c + * + * Virtual Server: Context Limits + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * + */ + +#include +#include +#include +#include +#include + +#include +#include + + +static int is_valid_rlimit(int id) +{ + int valid = 0; + + switch (id) { + case RLIMIT_NPROC: + case RLIMIT_AS: + case RLIMIT_RSS: + case RLIMIT_MEMLOCK: + case RLIMIT_NOFILE: + valid = 1; + break; + } + return valid; +} + +static inline uint64_t vc_get_rlim(struct vx_info *vxi, int id) +{ + unsigned long limit; + + limit = vxi->limit.rlim[id]; + if (limit == RLIM_INFINITY) + return CRLIM_INFINITY; + return limit; +} + +int vc_get_rlimit(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_rlimit_v0 vc_data; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + if (!is_valid_rlimit(vc_data.id)) + return -ENOTSUPP; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + vc_data.maximum = vc_get_rlim(vxi, vc_data.id); + vc_data.minimum = CRLIM_UNSET; + vc_data.softlimit = CRLIM_UNSET; + put_vx_info(vxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_rlimit(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_ctx_rlimit_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + if (!is_valid_rlimit(vc_data.id)) + return -ENOTSUPP; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + if (vc_data.maximum != CRLIM_KEEP) + vxi->limit.rlim[vc_data.id] = vc_data.maximum; + printk("setting [%d] = %d\n", vc_data.id, (int)vc_data.maximum); + put_vx_info(vxi); + + return 0; +} + +int vc_get_rlimit_mask(uint32_t id, void __user *data) +{ + static struct vcmd_ctx_rlimit_mask_v0 mask = { + /* minimum */ + 0 + , /* softlimit */ + 0 + , /* maximum */ + (1 << RLIMIT_NPROC) | + (1 << RLIMIT_NOFILE) | + (1 << RLIMIT_MEMLOCK) | + (1 << RLIMIT_AS) | + (1 << RLIMIT_RSS) + }; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + if (copy_to_user(data, &mask, sizeof(mask))) + return -EFAULT; + return 0; +} + + +void vx_vsi_meminfo(struct sysinfo *val) +{ + struct vx_info *vxi = current->vx_info; + unsigned long v; + + v = vxi->limit.rlim[RLIMIT_RSS]; + if (v != RLIM_INFINITY) + val->totalram = min(val->totalram, v); + v = atomic_read(&vxi->limit.res[RLIMIT_RSS]); + val->freeram = (v < val->totalram) ? val->totalram - v : 0; + val->bufferram = 0; + val->totalhigh = 0; + val->freehigh = 0; + return; +} + +void vx_vsi_swapinfo(struct sysinfo *val) +{ + struct vx_info *vxi = current->vx_info; + unsigned long w,v; + + v = vxi->limit.rlim[RLIMIT_RSS]; + w = vxi->limit.rlim[RLIMIT_AS]; + if (w != RLIM_INFINITY) + val->totalswap = min(val->totalswap, w - + ((v != RLIM_INFINITY) ? v : 0)); + w = atomic_read(&vxi->limit.res[RLIMIT_AS]); + val->freeswap = (w < val->totalswap) ? val->totalswap - w : 0; + return; +} + diff --git a/kernel/vserver/namespace.c b/kernel/vserver/namespace.c new file mode 100644 index 000000000..2c76c6fb4 --- /dev/null +++ b/kernel/vserver/namespace.c @@ -0,0 +1,195 @@ +/* + * linux/kernel/vserver/namespace.c + * + * Virtual Server: Context Namespace Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 broken out from context.c 0.07 + * V0.02 added task locking for namespace + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + + +/* virtual host info names */ + +static char * vx_vhi_name(struct vx_info *vxi, int id) +{ + switch (id) { + case VHIN_CONTEXT: + return vxi->vx_name; + case VHIN_SYSNAME: + return vxi->cvirt.utsname.sysname; + case VHIN_NODENAME: + return vxi->cvirt.utsname.nodename; + case VHIN_RELEASE: + return vxi->cvirt.utsname.release; + case VHIN_VERSION: + return vxi->cvirt.utsname.version; + case VHIN_MACHINE: + return vxi->cvirt.utsname.machine; + case VHIN_DOMAINNAME: + return vxi->cvirt.utsname.domainname; + default: + return NULL; + } + return NULL; +} + +int vc_set_vhi_name(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_vx_vhi_name_v0 vc_data; + char *name; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + name = vx_vhi_name(vxi, vc_data.field); + if (name) + memcpy(name, vc_data.name, 65); + put_vx_info(vxi); + return (name ? 0 : -EFAULT); +} + +int vc_get_vhi_name(uint32_t id, void __user *data) +{ + struct vx_info *vxi; + struct vcmd_vx_vhi_name_v0 vc_data; + char *name; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + name = vx_vhi_name(vxi, vc_data.field); + if (!name) + goto out_put; + + memcpy(vc_data.name, name, 65); + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; +out_put: + put_vx_info(vxi); + return (name ? 0 : -EFAULT); +} + +/* namespace functions */ + +#include + +int vx_set_namespace(struct vx_info *vxi, struct namespace *ns, struct fs_struct *fs) +{ + struct fs_struct *fs_copy; + + if (vxi->vx_namespace) + return -EPERM; + if (!ns || !fs) + return -EINVAL; + + fs_copy = copy_fs_struct(fs); + if (!fs_copy) + return -ENOMEM; + + get_namespace(ns); + vxi->vx_namespace = ns; + vxi->vx_fs = fs_copy; + return 0; +} + +int vc_enter_namespace(uint32_t id, void *data) +{ + struct vx_info *vxi; + struct fs_struct *old_fs, *fs; + struct namespace *old_ns; + int ret = 0; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + ret = -EINVAL; + if (!vxi->vx_namespace) + goto out_put; + + ret = -ENOMEM; + fs = copy_fs_struct(vxi->vx_fs); + if (!fs) + goto out_put; + + ret = 0; + task_lock(current); + old_ns = current->namespace; + old_fs = current->fs; + get_namespace(vxi->vx_namespace); + current->namespace = vxi->vx_namespace; + current->fs = fs; + task_unlock(current); + + put_namespace(old_ns); + put_fs_struct(old_fs); +out_put: + put_vx_info(vxi); + return ret; +} + +int vc_cleanup_namespace(uint32_t id, void *data) +{ + down_write(¤t->namespace->sem); + // spin_lock(&dcache_lock); + spin_lock(&vfsmount_lock); + umount_unused(current->namespace->root, current->fs); + spin_unlock(&vfsmount_lock); + // spin_unlock(&dcache_lock); + up_write(¤t->namespace->sem); + return 0; +} + +int vc_set_namespace(uint32_t id, void __user *data) +{ + struct fs_struct *fs; + struct namespace *ns; + struct vx_info *vxi; + int ret; + + if (vx_check(0, VX_ADMIN|VX_WATCH)) + return -ENOSYS; + + task_lock(current); + vxi = get_vx_info(current->vx_info); + fs = current->fs; + atomic_inc(&fs->count); + ns = current->namespace; + get_namespace(current->namespace); + task_unlock(current); + + ret = vx_set_namespace(vxi, ns, fs); + + put_namespace(ns); + put_fs_struct(fs); + put_vx_info(vxi); + return ret; +} + diff --git a/kernel/vserver/network.c b/kernel/vserver/network.c new file mode 100644 index 000000000..479a19b47 --- /dev/null +++ b/kernel/vserver/network.c @@ -0,0 +1,513 @@ +/* + * linux/kernel/vserver/network.c + * + * Virtual Server: Network Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * V0.02 cleaned up implementation + * V0.03 added equiv nx commands + * + */ + +#include +#include +#include +#include + +#include + + +LIST_HEAD(nx_infos); + +spinlock_t nxlist_lock + __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; + + +/* + * struct nx_info allocation and deallocation + */ + +static struct nx_info *alloc_nx_info(void) +{ + struct nx_info *new = NULL; + + nxdprintk("alloc_nx_info()\n"); + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct nx_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct nx_info)); + /* rest of init goes here */ + + nxdprintk("alloc_nx_info() = %p\n", new); + return new; +} + +void free_nx_info(struct nx_info *nxi) +{ + nxdprintk("free_nx_info(%p)\n", nxi); + kfree(nxi); +} + +struct nx_info *create_nx_info(void) +{ + struct nx_info *new; + static int gnid = 1; + + nxdprintk("create_nx_info()\n"); + if (!(new = alloc_nx_info())) + return 0; + + spin_lock(&nxlist_lock); + + /* new ip info */ + atomic_set(&new->nx_refcount, 1); + new->nx_id = gnid++; + list_add(&new->nx_list, &nx_infos); + + spin_unlock(&nxlist_lock); + return new; +} + + +/* + * struct nx_info search by id + * assumes nxlist_lock is held + */ + +static __inline__ struct nx_info *__find_nx_info(int id) +{ + struct nx_info *nxi; + + list_for_each_entry(nxi, &nx_infos, nx_list) + if (nxi->nx_id == id) + return nxi; + return 0; +} + + +/* + * struct nx_info ref stuff + */ + +struct nx_info *find_nx_info(int id) +{ + struct nx_info *nxi; + + if (id < 0) { + nxi = current->nx_info; + get_nx_info(nxi); + } else { + spin_lock(&nxlist_lock); + if ((nxi = __find_nx_info(id))) + get_nx_info(nxi); + spin_unlock(&nxlist_lock); + } + return nxi; +} + +/* + * verify that id is a valid nid + */ + +int nx_info_id_valid(int id) +{ + int valid; + + spin_lock(&nxlist_lock); + valid = (__find_nx_info(id) != NULL); + spin_unlock(&nxlist_lock); + return valid; +} + + +/* + * dynamic context id ... + */ + +static __inline__ nid_t __nx_dynamic_id(void) +{ + static nid_t seq = MAX_N_CONTEXT; + nid_t barrier = seq; + + do { + if (++seq > MAX_N_CONTEXT) + seq = MIN_D_CONTEXT; + if (!__find_nx_info(seq)) + return seq; + } while (barrier != seq); + return 0; +} + +static struct nx_info * __foc_nx_info(int id, int *err) +{ + struct nx_info *new, *nxi = NULL; + + nxdprintk("foc_nx_info(%d)\n", id); + // if (!(new = alloc_nx_info(id))) { + if (!(new = alloc_nx_info())) { + *err = -ENOMEM; + return NULL; + } + + spin_lock(&nxlist_lock); + + /* dynamic context requested */ + if (id == IP_DYNAMIC_ID) { + id = __nx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + goto out_unlock; + } + new->nx_id = id; + } + /* existing context requested */ + else if ((nxi = __find_nx_info(id))) { + /* context in setup is not available */ + if (nxi->nx_flags & VXF_STATE_SETUP) { + nxdprintk("foc_nx_info(%d) = %p (not available)\n", id, nxi); + nxi = NULL; + *err = -EBUSY; + } else { + nxdprintk("foc_nx_info(%d) = %p (found)\n", id, nxi); + get_nx_info(nxi); + *err = 0; + } + goto out_unlock; + } + + /* new context requested */ + nxdprintk("foc_nx_info(%d) = %p (new)\n", id, new); + atomic_set(&new->nx_refcount, 1); + list_add(&new->nx_list, &nx_infos); + nxi = new, new = NULL; + *err = 1; + +out_unlock: + spin_unlock(&nxlist_lock); + if (new) + free_nx_info(new); + return nxi; +} + + +struct nx_info *find_or_create_nx_info(int id) +{ + int err; + + return __foc_nx_info(id, &err); +} + +/* + * migrate task to new network + */ + +int nx_migrate_task(struct task_struct *p, struct nx_info *nxi) +{ + struct nx_info *old_nxi = task_get_nx_info(p); + int ret = 0; + + if (!p || !nxi) + BUG(); + + nxdprintk("nx_migrate_task(%p,%p[#%d.%d)\n", p, nxi, + nxi->nx_id, atomic_read(&nxi->nx_refcount)); + if (old_nxi == nxi) + goto out; + + task_lock(p); + set_nx_info(&p->nx_info, nxi); + p->nid = nxi->nx_id; + task_unlock(p); + + put_nx_info(old_nxi); +out: + put_nx_info(old_nxi); + return ret; +} + + +#include +#include + +static inline int __addr_in_nx_info(u32 addr, struct nx_info *nxi) +{ + int i, nbip; + + nbip = nxi->nbipv4; + for (i=0; iipv4[i] == addr) + return 1; + return 0; +} + +int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi) +{ + if (!nxi) + return 1; + + return __addr_in_nx_info(ifa->ifa_address, nxi); +} + +int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi) +{ + struct in_device *in_dev = __in_dev_get(dev); + struct in_ifaddr **ifap = NULL; + struct in_ifaddr *ifa = NULL; + + if (!nxi) + return 1; + if (!in_dev) + return 0; + + for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; + ifap = &ifa->ifa_next) { + if (__addr_in_nx_info(ifa->ifa_address, nxi)) + return 1; + } + return 0; +} + + + + +/* vserver syscall commands below here */ + +/* taks nid and nx_info functions */ + +#include + + +int vc_task_nid(uint32_t id, void __user *data) +{ + nid_t nid; + + if (id) { + struct task_struct *tsk; + + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + read_lock(&tasklist_lock); + tsk = find_task_by_pid(id); + nid = (tsk) ? tsk->nid : -ESRCH; + read_unlock(&tasklist_lock); + } + else + nid = current->nid; + return nid; +} + + +int vc_nx_info(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_nx_info_v0 vc_data; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + nxi = find_nx_info(id); + if (!nxi) + return -ESRCH; + + vc_data.nid = nxi->nx_id; + put_nx_info(nxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + + +/* network functions */ + +int vc_net_create(uint32_t nid, void __user *data) +{ + // int ret = -ENOMEM; + struct nx_info *new_nxi; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((nid >= MIN_D_CONTEXT) && (nid != VX_DYNAMIC_ID)) + return -EINVAL; + + if (nid < 1) + return -EINVAL; + + new_nxi = __foc_nx_info(nid, &ret); + if (!new_nxi) + return ret; + if (!(new_nxi->nx_flags & VXF_STATE_SETUP)) { + ret = -EEXIST; + goto out_put; + } + + ret = new_nxi->nx_id; + nx_migrate_task(current, new_nxi); +out_put: + put_nx_info(new_nxi); + return ret; +} + + +int vc_net_migrate(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + nxi = find_nx_info(id); + if (!nxi) + return -ESRCH; + nx_migrate_task(current, nxi); + put_nx_info(nxi); + return 0; +} + +int vc_net_add(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_nx_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = find_nx_info(id); + if (!nxi) + return -ESRCH; + + // add ip to net context here + put_nx_info(nxi); + return 0; +} + +int vc_net_remove(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_nx_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = find_nx_info(id); + if (!nxi) + return -ESRCH; + + // rem ip from net context here + put_nx_info(nxi); + return 0; +} + + + +int vc_get_nflags(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_flags_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + nxi = find_nx_info(id); + if (!nxi) + return -ESRCH; + + vc_data.flagword = nxi->nx_flags; + + // vc_data.mask = ~0UL; + /* special STATE flag handling */ + vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, IPF_ONE_TIME); + + put_nx_info(nxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_nflags(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_flags_v0 vc_data; + uint64_t mask, trigger; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = find_nx_info(id); + if (!nxi) + return -ESRCH; + + /* special STATE flag handling */ + mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, IPF_ONE_TIME); + trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword); + // if (trigger & IPF_STATE_SETUP) + + nxi->nx_flags = vx_mask_flags(nxi->nx_flags, + vc_data.flagword, mask); + put_nx_info(nxi); + return 0; +} + +int vc_get_ncaps(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + nxi = find_nx_info(id); + if (!nxi) + return -ESRCH; + + vc_data.ncaps = nxi->nx_ncaps; + vc_data.cmask = ~0UL; + put_nx_info(nxi); + + if (copy_to_user (data, &vc_data, sizeof(vc_data))) + return -EFAULT; + return 0; +} + +int vc_set_ncaps(uint32_t id, void __user *data) +{ + struct nx_info *nxi; + struct vcmd_net_caps_v0 vc_data; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + nxi = find_nx_info(id); + if (!nxi) + return -ESRCH; + + nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps, + vc_data.ncaps, vc_data.cmask); + put_nx_info(nxi); + return 0; +} + + +#include + +EXPORT_SYMBOL_GPL(free_nx_info); +EXPORT_SYMBOL_GPL(nxlist_lock); + diff --git a/kernel/vserver/proc.c b/kernel/vserver/proc.c new file mode 100644 index 000000000..42bc18200 --- /dev/null +++ b/kernel/vserver/proc.c @@ -0,0 +1,905 @@ +/* + * linux/kernel/vserver/proc.c + * + * Virtual Context Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 basic structure + * V0.02 adaptation vs1.3.0 + * V0.03 proc permissions + * V0.04 locking/generic + * V0.05 next generation procfs + * V0.06 inode validation + * V0.07 generic rewrite vid + * + */ + +#include +#include +#include +#include + +#include +#include + + +static struct proc_dir_entry *proc_virtual; + +static struct proc_dir_entry *proc_vnet; + + +enum vid_directory_inos { + PROC_XID_INO = 32, + PROC_XID_INFO, + PROC_XID_STATUS, + PROC_XID_LIMIT, + PROC_XID_SCHED, + PROC_XID_CVIRT, + PROC_XID_CACCT, + + PROC_NID_INO = 64, + PROC_NID_INFO, + PROC_NID_STATUS, +}; + +#define PROC_VID_MASK 0x60 + + +/* first the actual feeds */ + + +static int proc_virtual_info(int vid, char *buffer) +{ + return sprintf(buffer, + "VCIVersion:\t%04x:%04x\n" + "VCISyscall:\t%d\n" + ,VCI_VERSION >> 16 + ,VCI_VERSION & 0xFFFF + ,__NR_vserver + ); +} + + +int proc_xid_info (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = find_vx_info(vid); + if (!vxi) + return 0; + length = sprintf(buffer, + "ID:\t%d\n" + "Info:\t%p\n" + "Init:\t%d\n" + ,vxi->vx_id + ,vxi + ,vxi->vx_initpid + ); + put_vx_info(vxi); + return length; +} + +int proc_xid_status (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = find_vx_info(vid); + if (!vxi) + return 0; + length = sprintf(buffer, + "RefC:\t%d\n" + "Flags:\t%016llx\n" + "BCaps:\t%016llx\n" + "CCaps:\t%016llx\n" + "Ticks:\t%d\n" + ,atomic_read(&vxi->vx_refcount) + ,vxi->vx_flags + ,vxi->vx_bcaps + ,vxi->vx_ccaps + ,atomic_read(&vxi->limit.ticks) + ); + put_vx_info(vxi); + return length; +} + +int proc_xid_limit (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = find_vx_info(vid); + if (!vxi) + return 0; + length = vx_info_proc_limit(&vxi->limit, buffer); + put_vx_info(vxi); + return length; +} + +int proc_xid_sched (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = find_vx_info(vid); + if (!vxi) + return 0; + length = vx_info_proc_sched(&vxi->sched, buffer); + put_vx_info(vxi); + return length; +} + +int proc_xid_cvirt (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = find_vx_info(vid); + if (!vxi) + return 0; + length = vx_info_proc_cvirt(&vxi->cvirt, buffer); + put_vx_info(vxi); + return length; +} + +int proc_xid_cacct (int vid, char *buffer) +{ + struct vx_info *vxi; + int length; + + vxi = find_vx_info(vid); + if (!vxi) + return 0; + length = vx_info_proc_cacct(&vxi->cacct, buffer); + put_vx_info(vxi); + return length; +} + + +static int proc_vnet_info(int vid, char *buffer) +{ + return sprintf(buffer, + "VCIVersion:\t%04x:%04x\n" + "VCISyscall:\t%d\n" + ,VCI_VERSION >> 16 + ,VCI_VERSION & 0xFFFF + ,__NR_vserver + ); +} + +#define atoquad(a) \ + (((a)>>0) & 0xff), (((a)>>8) & 0xff), \ + (((a)>>16) & 0xff), (((a)>>24) & 0xff) + +int proc_nid_info (int vid, char *buffer) +{ + struct nx_info *nxi; + int length, i; + + nxi = find_nx_info(vid); + if (!nxi) + return 0; + length = sprintf(buffer, + "ID:\t%d\n" + "Info:\t%p\n" + ,nxi->nx_id + ,nxi + ); + for (i=0; inbipv4; i++) { + length += sprintf(buffer + length, + "%d:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i, + atoquad(nxi->ipv4[i]), + atoquad(nxi->mask[i])); + } + put_nx_info(nxi); + return length; +} + +int proc_nid_status (int vid, char *buffer) +{ + struct nx_info *nxi; + int length; + + nxi = find_nx_info(vid); + if (!nxi) + return 0; + length = sprintf(buffer, + "RefC:\t%d\n" + ,atomic_read(&nxi->nx_refcount) + ); + put_nx_info(nxi); + return length; +} + +/* here the inode helpers */ + + + +#define fake_ino(id,ino) (((id)<<16)|(ino)) + +#define inode_vid(i) ((i)->i_ino >> 16) +#define inode_type(i) ((i)->i_ino & 0xFFFF) + +#define MAX_MULBY10 ((~0U-9)/10) + + +static struct inode *proc_vid_make_inode(struct super_block * sb, + int vid, int ino) +{ + struct inode *inode = new_inode(sb); + + if (!inode) + goto out; + + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(vid, ino); + + inode->i_uid = 0; + inode->i_gid = 0; + // inode->i_xid = xid; +out: + return inode; +} + +static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd) +{ + struct inode * inode = dentry->d_inode; + int vid, valid=0; + + vid = inode_vid(inode); + switch (inode_type(inode) & PROC_VID_MASK) { + case PROC_XID_INO: + valid = vx_info_id_valid(vid); + break; + case PROC_NID_INO: + valid = nx_info_id_valid(vid); + break; + } + if (valid) + return 1; + d_drop(dentry); + return 0; +} + +/* +static int proc_vid_delete_dentry(struct dentry * dentry) +{ + return 1; +} +*/ + + +#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) + +static ssize_t proc_vid_info_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + struct inode * inode = file->f_dentry->d_inode; + unsigned long page; + ssize_t length; + ssize_t end; + int vid; + + if (count > PROC_BLOCK_SIZE) + count = PROC_BLOCK_SIZE; + if (!(page = __get_free_page(GFP_KERNEL))) + return -ENOMEM; + + vid = inode_vid(inode); + length = PROC_I(inode)->op.proc_vid_read(vid, (char*)page); + + if (length < 0) { + free_page(page); + return length; + } + /* Static 4kB (or whatever) block capacity */ + if (*ppos >= length) { + free_page(page); + return 0; + } + if (count + *ppos > length) + count = length - *ppos; + end = count + *ppos; + copy_to_user(buf, (char *) page + *ppos, count); + *ppos = end; + free_page(page); + return count; +} + + + + + +/* here comes the lower level (vid) */ + +static struct file_operations proc_vid_info_file_operations = { + read: proc_vid_info_read, +}; + +static struct dentry_operations proc_vid_dentry_operations = { + d_revalidate: proc_vid_revalidate, +// d_delete: proc_vid_delete_dentry, +}; + + +struct vid_entry { + int type; + int len; + char *name; + mode_t mode; +}; + +#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} + +static struct vid_entry vx_base_stuff[] = { + E(PROC_XID_INFO, "info", S_IFREG|S_IRUGO), + E(PROC_XID_STATUS, "status", S_IFREG|S_IRUGO), + E(PROC_XID_LIMIT, "limit", S_IFREG|S_IRUGO), + E(PROC_XID_SCHED, "sched", S_IFREG|S_IRUGO), + E(PROC_XID_CVIRT, "cvirt", S_IFREG|S_IRUGO), + E(PROC_XID_CACCT, "cacct", S_IFREG|S_IRUGO), + {0,0,NULL,0} +}; + +static struct vid_entry vn_base_stuff[] = { + E(PROC_NID_INFO, "info", S_IFREG|S_IRUGO), + E(PROC_NID_STATUS, "status", S_IFREG|S_IRUGO), + {0,0,NULL,0} +}; + + + +static struct dentry *proc_vid_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode; + struct vid_entry *p; + int error; + + error = -ENOENT; + inode = NULL; + + switch (inode_type(dir)) { + case PROC_XID_INO: + p = vx_base_stuff; + break; + case PROC_NID_INO: + p = vn_base_stuff; + break; + default: + goto out; + } + + for (; p->name; p++) { + if (p->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, p->name, p->len)) + break; + } + if (!p->name) + goto out; + + error = -EINVAL; + inode = proc_vid_make_inode(dir->i_sb, inode_vid(dir), p->type); + if (!inode) + goto out; + + switch(p->type) { + case PROC_XID_INFO: + PROC_I(inode)->op.proc_vid_read = proc_xid_info; + break; + case PROC_XID_STATUS: + PROC_I(inode)->op.proc_vid_read = proc_xid_status; + break; + case PROC_XID_LIMIT: + PROC_I(inode)->op.proc_vid_read = proc_xid_limit; + break; + case PROC_XID_SCHED: + PROC_I(inode)->op.proc_vid_read = proc_xid_sched; + break; + case PROC_XID_CVIRT: + PROC_I(inode)->op.proc_vid_read = proc_xid_cvirt; + break; + case PROC_XID_CACCT: + PROC_I(inode)->op.proc_vid_read = proc_xid_cacct; + break; + + case PROC_NID_INFO: + PROC_I(inode)->op.proc_vid_read = proc_nid_info; + break; + case PROC_NID_STATUS: + PROC_I(inode)->op.proc_vid_read = proc_nid_status; + break; + + default: + printk("procfs: impossible type (%d)",p->type); + iput(inode); + return ERR_PTR(-EINVAL); + } + inode->i_mode = p->mode; +// inode->i_op = &proc_vid_info_inode_operations; + inode->i_fop = &proc_vid_info_file_operations; + inode->i_nlink = 1; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &proc_vid_dentry_operations; + d_add(dentry, inode); + error = 0; +out: + return ERR_PTR(error); +} + + +static int proc_vid_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + int i, size; + struct inode *inode = filp->f_dentry->d_inode; + struct vid_entry *p; + + i = filp->f_pos; + switch (i) { + case 0: + if (filldir(dirent, ".", 1, i, + inode->i_ino, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + case 1: + if (filldir(dirent, "..", 2, i, + PROC_ROOT_INO, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + default: + i -= 2; + switch (inode_type(inode)) { + case PROC_XID_INO: + size = sizeof(vx_base_stuff); + p = vx_base_stuff + i; + break; + case PROC_NID_INO: + size = sizeof(vn_base_stuff); + p = vn_base_stuff + i; + break; + default: + return 1; + } + if (i >= size/sizeof(struct vid_entry)) + return 1; + while (p->name) { + if (filldir(dirent, p->name, p->len, + filp->f_pos, fake_ino(inode_vid(inode), + p->type), p->mode >> 12) < 0) + return 0; + filp->f_pos++; + p++; + } + } + return 1; +} + + + + +/* now the upper level (virtual) */ + +static struct file_operations proc_vid_file_operations = { + read: generic_read_dir, + readdir: proc_vid_readdir, +}; + +static struct inode_operations proc_vid_inode_operations = { + lookup: proc_vid_lookup, +}; + + + +static __inline__ int atovid(const char *str, int len) +{ + int vid, c; + + vid = 0; + while (len-- > 0) { + c = *str - '0'; + str++; + if (c > 9) + return -1; + if (vid >= MAX_MULBY10) + return -1; + vid *= 10; + vid += c; + if (!vid) + return -1; + } + return vid; +} + + +struct dentry *proc_virtual_lookup(struct inode *dir, + struct dentry * dentry, struct nameidata *nd) +{ + int xid, len, ret; + struct vx_info *vxi; + const char *name; + struct inode *inode; + + name = dentry->d_name.name; + len = dentry->d_name.len; + ret = -ENOMEM; + + if (len == 7 && !memcmp(name, "current", 7)) { + inode = new_inode(dir->i_sb); + if (!inode) + goto out; + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(1, PROC_XID_INO); + inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_uid = inode->i_gid = 0; + inode->i_size = 64; +// inode->i_op = &proc_current_inode_operations; + d_add(dentry, inode); + return NULL; + } + if (len == 4 && !memcmp(name, "info", 4)) { + inode = proc_vid_make_inode(dir->i_sb, 0, PROC_XID_INFO); + if (!inode) + goto out; + inode->i_fop = &proc_vid_info_file_operations; + PROC_I(inode)->op.proc_vid_read = proc_virtual_info; + inode->i_mode = S_IFREG|S_IRUGO; +// inode->i_size = 64; +// inode->i_op = &proc_current_inode_operations; + d_add(dentry, inode); + return NULL; + } + + ret = -ENOENT; + xid = atovid(name, len); + if (xid < 0) + goto out; + vxi = find_vx_info(xid); + if (!vxi) + goto out; + + inode = NULL; + if (vx_check(xid, VX_ADMIN|VX_WATCH|VX_IDENT)) + inode = proc_vid_make_inode(dir->i_sb, + vxi->vx_id, PROC_XID_INO); + if (!inode) + goto out_release; + + inode->i_mode = S_IFDIR|S_IRUGO; + inode->i_op = &proc_vid_inode_operations; + inode->i_fop = &proc_vid_file_operations; + inode->i_nlink = 2; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &proc_vid_dentry_operations; + d_add(dentry, inode); + ret = 0; + +out_release: + put_vx_info(vxi); +out: + return ERR_PTR(ret); +} + + +struct dentry *proc_vnet_lookup(struct inode *dir, + struct dentry * dentry, struct nameidata *nd) +{ + int nid, len, ret; + struct nx_info *nxi; + const char *name; + struct inode *inode; + + name = dentry->d_name.name; + len = dentry->d_name.len; + ret = -ENOMEM; + if (len == 7 && !memcmp(name, "current", 7)) { + inode = new_inode(dir->i_sb); + if (!inode) + goto out; + inode->i_mtime = inode->i_atime = + inode->i_ctime = CURRENT_TIME; + inode->i_ino = fake_ino(1, PROC_NID_INO); + inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_uid = inode->i_gid = 0; + inode->i_size = 64; +// inode->i_op = &proc_current_inode_operations; + d_add(dentry, inode); + return NULL; + } + if (len == 4 && !memcmp(name, "info", 4)) { + inode = proc_vid_make_inode(dir->i_sb, 0, PROC_NID_INFO); + if (!inode) + goto out; + inode->i_fop = &proc_vid_info_file_operations; + PROC_I(inode)->op.proc_vid_read = proc_vnet_info; + inode->i_mode = S_IFREG|S_IRUGO; +// inode->i_size = 64; +// inode->i_op = &proc_current_inode_operations; + d_add(dentry, inode); + return NULL; + } + + ret = -ENOENT; + nid = atovid(name, len); + if (nid < 0) + goto out; + nxi = find_nx_info(nid); + if (!nxi) + goto out; + + inode = NULL; + if (1) + inode = proc_vid_make_inode(dir->i_sb, + nxi->nx_id, PROC_NID_INO); + if (!inode) + goto out_release; + + inode->i_mode = S_IFDIR|S_IRUGO; + inode->i_op = &proc_vid_inode_operations; + inode->i_fop = &proc_vid_file_operations; + inode->i_nlink = 2; + inode->i_flags|=S_IMMUTABLE; + + dentry->d_op = &proc_vid_dentry_operations; + d_add(dentry, inode); + ret = 0; + +out_release: + put_nx_info(nxi); +out: + return ERR_PTR(ret); +} + + + + +#define PROC_NUMBUF 10 +#define PROC_MAXVIDS 32 + + +static int get_xid_list(int index, unsigned int *xids) +{ + struct vx_info *p; + int nr_xids = 0; + + index--; + spin_lock(&vxlist_lock); + list_for_each_entry(p, &vx_infos, vx_list) { + int xid = p->vx_id; + + if (--index >= 0) + continue; + xids[nr_xids] = xid; + if (++nr_xids >= PROC_MAXVIDS) + break; + } + spin_unlock(&vxlist_lock); + return nr_xids; +} + +int proc_virtual_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + unsigned int xid_array[PROC_MAXVIDS]; + char buf[PROC_NUMBUF]; + unsigned int nr = filp->f_pos-3; + unsigned int nr_xids, i; + ino_t ino; + + switch ((long)filp->f_pos) { + case 0: + ino = fake_ino(0, PROC_XID_INO); + if (filldir(dirent, ".", 1, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 1: + ino = filp->f_dentry->d_parent->d_inode->i_ino; + if (filldir(dirent, "..", 2, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 2: + ino = fake_ino(0, PROC_XID_INFO); + if (filldir(dirent, "info", 4, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 3: + if (current->xid > 1) { + ino = fake_ino(1, PROC_XID_INO); + if (filldir(dirent, "current", 7, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + } + filp->f_pos++; + } + + nr_xids = get_xid_list(nr, xid_array); + + for (i = 0; i < nr_xids; i++) { + int xid = xid_array[i]; + ino_t ino = fake_ino(xid, PROC_XID_INO); + unsigned long j = PROC_NUMBUF; + + do buf[--j] = '0' + (xid % 10); while (xid/=10); + + if (filldir(dirent, buf+j, PROC_NUMBUF-j, + filp->f_pos, ino, DT_DIR) < 0) + break; + filp->f_pos++; + } + return 0; +} + + +static struct file_operations proc_virtual_dir_operations = { + read: generic_read_dir, + readdir: proc_virtual_readdir, +}; + +static struct inode_operations proc_virtual_dir_inode_operations = { + lookup: proc_virtual_lookup, +}; + + + +static int get_nid_list(int index, unsigned int *nids) +{ + struct nx_info *p; + int nr_nids = 0; + + index--; + spin_lock(&nxlist_lock); + list_for_each_entry(p, &nx_infos, nx_list) { + int nid = p->nx_id; + + if (--index >= 0) + continue; + nids[nr_nids] = nid; + if (++nr_nids >= PROC_MAXVIDS) + break; + } + spin_unlock(&nxlist_lock); + return nr_nids; +} + +int proc_vnet_readdir(struct file * filp, + void * dirent, filldir_t filldir) +{ + unsigned int nid_array[PROC_MAXVIDS]; + char buf[PROC_NUMBUF]; + unsigned int nr = filp->f_pos-3; + unsigned int nr_nids, i; + ino_t ino; + + switch ((long)filp->f_pos) { + case 0: + ino = fake_ino(0, PROC_NID_INO); + if (filldir(dirent, ".", 1, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 1: + ino = filp->f_dentry->d_parent->d_inode->i_ino; + if (filldir(dirent, "..", 2, + filp->f_pos, ino, DT_DIR) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 2: + ino = fake_ino(0, PROC_NID_INFO); + if (filldir(dirent, "info", 4, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + filp->f_pos++; + /* fall through */ + case 3: + if (current->xid > 1) { + ino = fake_ino(1, PROC_NID_INO); + if (filldir(dirent, "current", 7, + filp->f_pos, ino, DT_LNK) < 0) + return 0; + } + filp->f_pos++; + } + + nr_nids = get_nid_list(nr, nid_array); + + for (i = 0; i < nr_nids; i++) { + int nid = nid_array[i]; + ino_t ino = fake_ino(nid, PROC_NID_INO); + unsigned long j = PROC_NUMBUF; + + do buf[--j] = '0' + (nid % 10); while (nid/=10); + + if (filldir(dirent, buf+j, PROC_NUMBUF-j, + filp->f_pos, ino, DT_DIR) < 0) + break; + filp->f_pos++; + } + return 0; +} + + +static struct file_operations proc_vnet_dir_operations = { + read: generic_read_dir, + readdir: proc_vnet_readdir, +}; + +static struct inode_operations proc_vnet_dir_inode_operations = { + lookup: proc_vnet_lookup, +}; + + + +void proc_vx_init(void) +{ + struct proc_dir_entry *ent; + + ent = proc_mkdir("virtual", 0); + if (ent) { + ent->proc_fops = &proc_virtual_dir_operations; + ent->proc_iops = &proc_virtual_dir_inode_operations; + } + proc_virtual = ent; + + ent = proc_mkdir("vnet", 0); + if (ent) { + ent->proc_fops = &proc_vnet_dir_operations; + ent->proc_iops = &proc_vnet_dir_inode_operations; + } + proc_vnet = ent; +} + + + + +/* per pid info */ + + +char *task_vx_info(struct task_struct *p, char *buffer) +{ + return buffer + sprintf(buffer, + "XID:\t%d\n" + ,p->xid); +} + +int proc_pid_vx_info(struct task_struct *p, char *buffer) +{ + char * orig = buffer; + + buffer = task_vx_info(p, buffer); + return buffer - orig; +} + +char *task_nx_info(struct task_struct *p, char *buffer) +{ + return buffer + sprintf(buffer, + "NID:\t%d\n" + ,p->nid); +} + +int proc_pid_nx_info(struct task_struct *p, char *buffer) +{ + char * orig = buffer; + + buffer = task_nx_info(p, buffer); + return buffer - orig; +} + diff --git a/kernel/vserver/sched.c b/kernel/vserver/sched.c new file mode 100644 index 000000000..a75195a19 --- /dev/null +++ b/kernel/vserver/sched.c @@ -0,0 +1,162 @@ +/* + * linux/kernel/vserver/sched.c + * + * Virtual Server: Scheduler Support + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 adapted Sam Vilains version to 2.6.3 + * V0.02 removed legacy interface + * + */ + +#include +#include +#include +#include +#include + +#include +#include + + +/* + * recalculate the context's scheduling tokens + * + * ret > 0 : number of tokens available + * ret = 0 : context is paused + * ret < 0 : number of jiffies until new tokens arrive + * + */ +int vx_tokens_recalc(struct vx_info *vxi) +{ + long delta, tokens = 0; + + if (__vx_flags(vxi->vx_flags, VXF_SCHED_PAUSE, 0)) + /* we are paused */ + return 0; + + delta = jiffies - vxi->sched.jiffies; + + if (delta >= vxi->sched.interval) { + /* lockdown scheduler info */ + spin_lock(&vxi->sched.tokens_lock); + + /* calc integral token part */ + delta = jiffies - vxi->sched.jiffies; + tokens = delta / vxi->sched.interval; + delta = tokens * vxi->sched.interval; + tokens *= vxi->sched.fill_rate; + + atomic_add(tokens, &vxi->sched.tokens); + vxi->sched.jiffies += delta; + tokens = atomic_read(&vxi->sched.tokens); + + if (tokens > vxi->sched.tokens_max) { + tokens = vxi->sched.tokens_max; + atomic_set(&vxi->sched.tokens, tokens); + } + spin_unlock(&vxi->sched.tokens_lock); + } else { + /* no new tokens */ + if ((tokens = vx_tokens_avail(vxi)) < vxi->sched.tokens_min) { + /* enough tokens will be available in */ + if (vxi->sched.tokens_min == 0) + return delta - vxi->sched.interval; + return delta - vxi->sched.interval * + vxi->sched.tokens_min / vxi->sched.fill_rate; + } + } + /* we have some tokens left */ + return tokens; +} + +/* + * effective_prio - return the priority that is based on the static + * priority but is modified by bonuses/penalties. + * + * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] + * into a -4 ... 0 ... +4 bonus/penalty range. + * + * Additionally, we scale another amount based on the number of + * CPU tokens currently held by the context, if the process is + * part of a context (and the appropriate SCHED flag is set). + * This ranges from -5 ... 0 ... +15, quadratically. + * + * So, the total bonus is -9 .. 0 .. +19 + * We use ~50% of the full 0...39 priority range so that: + * + * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. + * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. + * unless that context is far exceeding its CPU allocation. + * + * Both properties are important to certain workloads. + */ +int effective_vavavoom(task_t *p, int max_prio) +{ + struct vx_info *vxi = p->vx_info; + int vavavoom, max; + + /* lots of tokens = lots of vavavoom + * no tokens = no vavavoom */ + if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) { + max = vxi->sched.tokens_max; + vavavoom = max - vavavoom; + max = max * max; + vavavoom = max_prio * VAVAVOOM_RATIO / 100 + * (vavavoom*vavavoom - (max >> 2)) / max; + /* alternative, geometric mapping + vavavoom = -( MAX_USER_PRIO*VAVAVOOM_RATIO/100 * vavavoom + / vxi->sched.tokens_max - + MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */ + } else + vavavoom = 0; + /* vavavoom = ( MAX_USER_PRIO*VAVAVOOM_RATIO/100*tokens_left(p) - + MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */ + + return vavavoom; +} + + +int vc_set_sched(uint32_t xid, void __user *data) +{ + struct vcmd_set_sched_v2 vc_data; + struct vx_info *vxi; + + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + vxi = find_vx_info(xid); + if (!vxi) + return -EINVAL; + + spin_lock(&vxi->sched.tokens_lock); + + if (vc_data.interval != SCHED_KEEP) + vxi->sched.interval = vc_data.interval; + if (vc_data.fill_rate != SCHED_KEEP) + vxi->sched.fill_rate = vc_data.fill_rate; + if (vc_data.tokens_min != SCHED_KEEP) + vxi->sched.tokens_min = vc_data.tokens_min; + if (vc_data.tokens_max != SCHED_KEEP) + vxi->sched.tokens_max = vc_data.tokens_max; + if (vc_data.tokens != SCHED_KEEP) + atomic_set(&vxi->sched.tokens, vc_data.tokens); + + /* Sanity check the resultant values */ + if (vxi->sched.fill_rate <= 0) + vxi->sched.fill_rate = 1; + if (vxi->sched.interval <= 0) + vxi->sched.interval = HZ; + if (vxi->sched.tokens_max == 0) + vxi->sched.tokens_max = 1; + if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max) + atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max); + if (vxi->sched.tokens_min > vxi->sched.tokens_max) + vxi->sched.tokens_min = vxi->sched.tokens_max; + + spin_unlock(&vxi->sched.tokens_lock); + put_vx_info(vxi); + return 0; +} + diff --git a/kernel/vserver/signal.c b/kernel/vserver/signal.c new file mode 100644 index 000000000..464ea1be4 --- /dev/null +++ b/kernel/vserver/signal.c @@ -0,0 +1,85 @@ +/* + * linux/kernel/vserver/signal.c + * + * Virtual Server: Signal Support + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 broken out from vcontext V0.05 + * + */ + +#include +#include + +#include +#include + +#include +#include + + +int vc_ctx_kill(uint32_t id, void __user *data) +{ + int retval, count=0; + struct vcmd_ctx_kill_v0 vc_data; + struct siginfo info; + struct task_struct *p; + struct vx_info *vxi; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + info.si_signo = vc_data.sig; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->pid; + info.si_uid = current->uid; + + vxi = find_vx_info(id); + if (!vxi) + return -ESRCH; + + retval = -ESRCH; + read_lock(&tasklist_lock); + switch (vc_data.pid) { + case -1: + case 0: + for_each_process(p) { + int err = 0; + + if (vx_task_xid(p) != id || p->pid <= 1 || + (vc_data.pid && vxi->vx_initpid == p->pid) || + !thread_group_leader(p)) + continue; + + err = send_sig_info(vc_data.sig, &info, p); + ++count; + if (err != -EPERM) + retval = err; + } + break; + + default: + p = find_task_by_pid(vc_data.pid); + if (p) { + if (!thread_group_leader(p)) { + struct task_struct *tg; + + tg = find_task_by_pid(p->tgid); + if (tg) + p = tg; + } + if ((id == -1) || (vx_task_xid(p) == id)) + retval = send_sig_info(vc_data.sig, &info, p); + } + break; + } + read_unlock(&tasklist_lock); + put_vx_info(vxi); + return retval; +} + + diff --git a/kernel/vserver/switch.c b/kernel/vserver/switch.c new file mode 100644 index 000000000..90fee1412 --- /dev/null +++ b/kernel/vserver/switch.c @@ -0,0 +1,170 @@ +/* + * linux/kernel/vserver/switch.c + * + * Virtual Server: Syscall Switch + * + * Copyright (C) 2003-2004 Herbert Pötzl + * + * V0.01 syscall switch + * V0.02 added signal to context + * V0.03 added rlimit functions + * V0.04 added iattr, task/xid functions + * + */ + +#include +#include +#include + +#include +#include + + +static inline int +vc_get_version(uint32_t id) +{ + return VCI_VERSION; +} + + +#include +#include +#include +#include +#include +#include +#include +#include + + +extern unsigned int vx_debug_switch; + + +extern asmlinkage long +sys_vserver(uint32_t cmd, uint32_t id, void __user *data) +{ + + if (vx_debug_switch) + printk( "vc: VCMD_%02d_%d[%d], %d\n", + VC_CATEGORY(cmd), VC_COMMAND(cmd), + VC_VERSION(cmd), id); + + switch (cmd) { + case VCMD_get_version: + return vc_get_version(id); + +#ifdef CONFIG_VSERVER_LEGACY + case VCMD_new_s_context: + return vc_new_s_context(id, data); + case VCMD_set_ipv4root: + return vc_set_ipv4root(id, data); +#endif + + case VCMD_task_xid: + return vc_task_xid(id, data); + case VCMD_vx_info: + return vc_vx_info(id, data); + + case VCMD_task_nid: + return vc_task_nid(id, data); + case VCMD_nx_info: + return vc_nx_info(id, data); + + case VCMD_set_namespace: + return vc_set_namespace(id, data); + case VCMD_cleanup_namespace: + return vc_cleanup_namespace(id, data); + } + + /* those are allowed while in setup too */ + if (!vx_check(0, VX_ADMIN|VX_WATCH) && + !vx_flags(VXF_STATE_SETUP,0)) + return -EPERM; + +#ifdef CONFIG_VSERVER_LEGACY + switch (cmd) { + case VCMD_set_cflags: + case VCMD_set_ccaps: + if (vx_check(0, VX_WATCH)) + return 0; + } +#endif + + switch (cmd) { + case VCMD_get_rlimit: + return vc_get_rlimit(id, data); + case VCMD_set_rlimit: + return vc_set_rlimit(id, data); + case VCMD_get_rlimit_mask: + return vc_get_rlimit_mask(id, data); + + case VCMD_vx_get_vhi_name: + return vc_get_vhi_name(id, data); + case VCMD_vx_set_vhi_name: + return vc_set_vhi_name(id, data); + + case VCMD_set_cflags: + return vc_set_cflags(id, data); + case VCMD_get_cflags: + return vc_get_cflags(id, data); + + case VCMD_set_ccaps: + return vc_set_ccaps(id, data); + case VCMD_get_ccaps: + return vc_get_ccaps(id, data); + + case VCMD_set_nflags: + return vc_set_nflags(id, data); + case VCMD_get_nflags: + return vc_get_nflags(id, data); + + case VCMD_set_ncaps: + return vc_set_ncaps(id, data); + case VCMD_get_ncaps: + return vc_get_ncaps(id, data); + + case VCMD_set_sched: + return vc_set_sched(id, data); + } + + /* below here only with VX_ADMIN */ + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + switch (cmd) { + case VCMD_ctx_kill: + return vc_ctx_kill(id, data); + +#ifdef CONFIG_VSERVER_LEGACY + case VCMD_create_context: + return vc_ctx_create(id, data); +#endif + + case VCMD_get_iattr: + return vc_get_iattr(id, data); + case VCMD_set_iattr: + return vc_set_iattr(id, data); + + case VCMD_enter_namespace: + return vc_enter_namespace(id, data); + + case VCMD_ctx_create: +#ifdef CONFIG_VSERVER_LEGACY + if (id == 1) { + current->xid = 1; + return 1; + } +#endif + return vc_ctx_create(id, data); + case VCMD_ctx_migrate: + return vc_ctx_migrate(id, data); + + case VCMD_net_create: + return vc_net_create(id, data); + case VCMD_net_migrate: + return vc_net_migrate(id, data); + + } + return -ENOSYS; +} + diff --git a/kernel/vserver/sysctl.c b/kernel/vserver/sysctl.c new file mode 100644 index 000000000..562fc0eab --- /dev/null +++ b/kernel/vserver/sysctl.c @@ -0,0 +1,150 @@ +/* + * linux/kernel/sysctl.c + * + * Virtual Context Support + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 basic structure + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +#define CTL_VSERVER 4242 /* unused? */ + +enum { + CTL_DEBUG_SWITCH = 1, + CTL_DEBUG_LIMIT, +}; + + +unsigned int vx_debug_switch = 0; +unsigned int vx_debug_limit = 0; + + +static struct ctl_table_header *vserver_table_header; +static ctl_table vserver_table[]; + + +void vserver_register_sysctl(void) +{ + if (!vserver_table_header) { + vserver_table_header = register_sysctl_table(vserver_table, 1); +#ifdef CONFIG_PROC_FS +// if (vserver_table[0].de) +// vserver_table[0].de->owner = THIS_MODULE; +#endif + } + +} + +void vserver_unregister_sysctl(void) +{ + if (vserver_table_header) { + unregister_sysctl_table(vserver_table_header); + vserver_table_header = NULL; + } +} + + +static int proc_dodebug(ctl_table *table, int write, + struct file *file, void *buffer, size_t *lenp) +{ + char tmpbuf[20], *p, c; + unsigned int value; + size_t left, len; + + if ((file->f_pos && !write) || !*lenp) { + *lenp = 0; + return 0; + } + + left = *lenp; + + if (write) { + if (!access_ok(VERIFY_READ, buffer, left)) + return -EFAULT; + p = (char *) buffer; + while (left && __get_user(c, p) >= 0 && isspace(c)) + left--, p++; + if (!left) + goto done; + + if (left > sizeof(tmpbuf) - 1) + return -EINVAL; + if (copy_from_user(tmpbuf, p, left)) + return -EFAULT; + tmpbuf[left] = '\0'; + + for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--) + value = 10 * value + (*p - '0'); + if (*p && !isspace(*p)) + return -EINVAL; + while (left && isspace(*p)) + left--, p++; + *(unsigned int *) table->data = value; + } else { + if (!access_ok(VERIFY_WRITE, buffer, left)) + return -EFAULT; + len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data); + if (len > left) + len = left; + if (__copy_to_user(buffer, tmpbuf, len)) + return -EFAULT; + if ((left -= len) > 0) { + if (put_user('\n', (char *)buffer + len)) + return -EFAULT; + left--; + } + } + +done: + *lenp -= left; + file->f_pos += *lenp; + return 0; +} + + + +static ctl_table debug_table[] = { + { + .ctl_name = CTL_DEBUG_SWITCH, + .procname = "debug_switch", + .data = &vx_debug_switch, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_LIMIT, + .procname = "debug_limit", + .data = &vx_debug_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { .ctl_name = 0 } +}; + +static ctl_table vserver_table[] = { + { + .ctl_name = CTL_VSERVER, + .procname = "vserver", + .mode = 0555, + .child = debug_table + }, + { .ctl_name = 0 } +}; + diff --git a/net/bluetooth/syms.c b/net/bluetooth/syms.c new file mode 100644 index 000000000..20d81017f --- /dev/null +++ b/net/bluetooth/syms.c @@ -0,0 +1,84 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2000-2001 Qualcomm Incorporated + + Written 2000,2001 by Maxim Krasnyansky + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +/* + * Bluetooth symbols. + * + * $Id: syms.c,v 1.1 2002/03/08 21:06:59 maxk Exp $ + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +/* HCI Core */ +EXPORT_SYMBOL(hci_alloc_dev); +EXPORT_SYMBOL(hci_free_dev); +EXPORT_SYMBOL(hci_register_dev); +EXPORT_SYMBOL(hci_unregister_dev); +EXPORT_SYMBOL(hci_suspend_dev); +EXPORT_SYMBOL(hci_resume_dev); + +EXPORT_SYMBOL(hci_register_proto); +EXPORT_SYMBOL(hci_unregister_proto); + +EXPORT_SYMBOL(hci_get_route); +EXPORT_SYMBOL(hci_connect); +EXPORT_SYMBOL(hci_dev_get); +EXPORT_SYMBOL(hci_conn_auth); +EXPORT_SYMBOL(hci_conn_encrypt); + +EXPORT_SYMBOL(hci_send_acl); +EXPORT_SYMBOL(hci_send_sco); +EXPORT_SYMBOL(hci_send_cmd); +EXPORT_SYMBOL(hci_si_event); + +/* Bluetooth lib */ +EXPORT_SYMBOL(bt_dump); +EXPORT_SYMBOL(baswap); +EXPORT_SYMBOL(batostr); +EXPORT_SYMBOL(bt_err); + +/* Bluetooth sockets */ +EXPORT_SYMBOL(bt_sock_register); +EXPORT_SYMBOL(bt_sock_unregister); +EXPORT_SYMBOL(bt_sock_alloc); +EXPORT_SYMBOL(bt_sock_link); +EXPORT_SYMBOL(bt_sock_unlink); +EXPORT_SYMBOL(bt_sock_recvmsg); +EXPORT_SYMBOL(bt_sock_poll); +EXPORT_SYMBOL(bt_accept_enqueue); +EXPORT_SYMBOL(bt_accept_dequeue); +EXPORT_SYMBOL(bt_sock_wait_state); + +EXPORT_SYMBOL(proc_bt); diff --git a/sound/pci/ice1712/prodigy.c b/sound/pci/ice1712/prodigy.c new file mode 100644 index 000000000..eee13e644 --- /dev/null +++ b/sound/pci/ice1712/prodigy.c @@ -0,0 +1,663 @@ +/* + * ALSA driver for ICEnsemble VT1724 (Envy24HT) + * + * Lowlevel functions for AudioTrak Prodigy 7.1 (and possibly 192) cards + * Copyright (c) 2003 Dimitromanolakis Apostolos + * based on the aureon.c code (c) 2003 by Takashi Iwai + * + * version 0.82: Stable / not all features work yet (no communication with AC97 secondary) + * added 64x/128x oversampling switch (should be 64x only for 96khz) + * fixed some recording labels (still need to check the rest) + * recording is working probably thanks to correct wm8770 initialization + * + * version 0.5: Initial release: + * working: analog output, mixer, headphone amplifier switch + * not working: prety much everything else, at least i could verify that + * we have no digital output, no capture, pretty bad clicks and poops + * on mixer switch and other coll stuff. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * NOTES: + * + * + * + * - we reuse the akm4xxx_t record for storing the wm8770 codec data. + * both wm and akm codecs are pretty similar, so we can integrate + * both controls in the future, once if wm codecs are reused in + * many boards. + * + * - writing over SPI is implemented but reading is not yet. + * the SPDIF-in channel status, etc. can be read from CS chip. + * + * - DAC digital volumes are not implemented in the mixer. + * if they show better response than DAC analog volumes, we can use them + * instead. + * + * - Prodigy boards are equipped with AC97 STAC9744 chip , too. it's used to do + * the analog mixing but not easily controllable (it's not connected + * directly from envy24ht chip). so let's leave it as it is. + * + */ + +#define REVISION 0.82b + +#include +#include +#include +#include +#include +#include +#include + +#include "ice1712.h" +#include "envy24ht.h" +#include "prodigy.h" + + +static int prodigy_set_headphone_amp(ice1712_t *ice, int enable) +{ + unsigned int tmp, tmp2; + + tmp2 = tmp = snd_ice1712_gpio_read(ice); + if (enable) + tmp |= PRODIGY_HP_AMP_EN; + else + tmp &= ~ PRODIGY_HP_AMP_EN; + if (tmp != tmp2) { + snd_ice1712_gpio_write(ice, tmp); + return 1; + } + return 0; +} + + +static int prodigy_get_headphone_amp(ice1712_t *ice) +{ + unsigned int tmp = snd_ice1712_gpio_read(ice); + + return ( tmp & PRODIGY_HP_AMP_EN )!= 0; +} + + +/* + * write data in the SPI mode + */ +static void prodigy_spi_write(ice1712_t *ice, unsigned int cs, unsigned int data, int bits) +{ + unsigned int tmp; + int i; + + tmp = snd_ice1712_gpio_read(ice); + + snd_ice1712_gpio_set_mask(ice, ~(PRODIGY_WM_RW|PRODIGY_WM_DATA|PRODIGY_WM_CLK| + PRODIGY_WM_CS|PRODIGY_CS8415_CS|PRODIGY_HP_AMP_EN)); + tmp |= PRODIGY_WM_RW; + tmp &= ~cs; + snd_ice1712_gpio_write(ice, tmp); + udelay(1); + + for (i = bits - 1; i >= 0; i--) { + tmp &= ~PRODIGY_WM_CLK; + snd_ice1712_gpio_write(ice, tmp); + udelay(1); + if (data & (1 << i)) + tmp |= PRODIGY_WM_DATA; + else + tmp &= ~PRODIGY_WM_DATA; + snd_ice1712_gpio_write(ice, tmp); + udelay(1); + tmp |= PRODIGY_WM_CLK; + snd_ice1712_gpio_write(ice, tmp); + udelay(1); + } + + tmp &= ~PRODIGY_WM_CLK; + tmp |= cs; + snd_ice1712_gpio_write(ice, tmp); + udelay(1); + tmp |= PRODIGY_WM_CLK; + snd_ice1712_gpio_write(ice, tmp); + udelay(1); +} + + +/* + * get the current register value of WM codec + */ +static unsigned short wm_get(ice1712_t *ice, int reg) +{ + reg <<= 1; + return ((unsigned short)ice->akm[0].images[reg] << 8) | + ice->akm[0].images[reg + 1]; +} + +/* + * set the register value of WM codec and remember it + */ +static void wm_put(ice1712_t *ice, int reg, unsigned short val) +{ + prodigy_spi_write(ice, PRODIGY_WM_CS, (reg << 9) | (val & 0x1ff), 16); + reg <<= 1; + ice->akm[0].images[reg] = val >> 8; + ice->akm[0].images[reg + 1] = val; +} + + +/********************************* + ********* Controls section ****** + *********************************/ + +#define PRODIGY_CON_HPAMP \ + { \ + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .name = "Headphone Amplifier", \ + .info = prodigy_hpamp_info, \ + .get = prodigy_hpamp_get, \ + .put = prodigy_hpamp_put \ + } + +static int prodigy_hpamp_info(snd_kcontrol_t *k, snd_ctl_elem_info_t *uinfo) +{ + static char *texts[2] = { + "Off", "On" + }; + + uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; + uinfo->count = 1; + uinfo->value.enumerated.items = 2; + + if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items) + uinfo->value.enumerated.item = uinfo->value.enumerated.items - 1; + strcpy(uinfo->value.enumerated.name, texts[uinfo->value.enumerated.item]); + + return 0; +} + + +static int prodigy_hpamp_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol) +{ + ice1712_t *ice = snd_kcontrol_chip(kcontrol); + + ucontrol->value.integer.value[0] = prodigy_get_headphone_amp(ice); + return 0; +} + + +static int prodigy_hpamp_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol) +{ + ice1712_t *ice = snd_kcontrol_chip(kcontrol); + + return prodigy_set_headphone_amp(ice,ucontrol->value.integer.value[0]); +} + + + +#define PRODIGY_CON_DEEMP \ + { \ + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .name = "DAC De-emphasis", \ + .info = prodigy_deemp_info, \ + .get = prodigy_deemp_get, \ + .put = prodigy_deemp_put \ + } + +static int prodigy_deemp_info(snd_kcontrol_t *k, snd_ctl_elem_info_t *uinfo) +{ + static char *texts[2] = { "Off", "On" }; + + uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; + uinfo->count = 1; + uinfo->value.enumerated.items = 2; + + if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items) + uinfo->value.enumerated.item = uinfo->value.enumerated.items - 1; + strcpy(uinfo->value.enumerated.name, texts[uinfo->value.enumerated.item]); + + return 0; +} + +static int prodigy_deemp_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol) +{ + ice1712_t *ice = snd_kcontrol_chip(kcontrol); + ucontrol->value.integer.value[0] = (wm_get(ice, 0x15) & 0xf) == 0xf; + return 0; +} + +static int prodigy_deemp_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol) +{ + ice1712_t *ice = snd_kcontrol_chip(kcontrol); + int temp, temp2; + temp2 = temp = wm_get(ice, 0x15); + temp = (temp & ~0xf) | ((ucontrol->value.integer.value[0])*0xf); + if (temp != temp2) { + wm_put(ice,0x15,temp); + return 1; + } + return 0; +} + + +#define PRODIGY_CON_OVERSAMPLING \ + { \ + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .name = "ADC Oversampling", \ + .info = prodigy_oversampling_info, \ + .get = prodigy_oversampling_get, \ + .put = prodigy_oversampling_put \ + } + +static int prodigy_oversampling_info(snd_kcontrol_t *k, snd_ctl_elem_info_t *uinfo) +{ + static char *texts[2] = { "128x", "64x" }; + + uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; + uinfo->count = 1; + uinfo->value.enumerated.items = 2; + + if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items) + uinfo->value.enumerated.item = uinfo->value.enumerated.items - 1; + strcpy(uinfo->value.enumerated.name, texts[uinfo->value.enumerated.item]); + + return 0; +} + +static int prodigy_oversampling_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol) +{ + ice1712_t *ice = snd_kcontrol_chip(kcontrol); + ucontrol->value.integer.value[0] = (wm_get(ice, 0x17) & 0x8) == 0x8; + return 0; +} + +static int prodigy_oversampling_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol) +{ + int temp, temp2; + ice1712_t *ice = snd_kcontrol_chip(kcontrol); + + temp2 = temp = wm_get(ice, 0x17); + + if( ucontrol->value.integer.value[0] ) { + temp |= 0x8; + } else { + temp &= ~0x8; + } + + if (temp != temp2) { + wm_put(ice,0x17,temp); + return 1; + } + return 0; +} + + + + +/* + * DAC volume attenuation mixer control + */ +static int wm_dac_vol_info(snd_kcontrol_t *kcontrol, snd_ctl_elem_info_t *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; + uinfo->value.integer.min = 0; /* mute */ + uinfo->value.integer.max = 101; /* 0dB */ + return 0; +} + +static int wm_dac_vol_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol) +{ + ice1712_t *ice = snd_kcontrol_chip(kcontrol); + int idx; + unsigned short vol; + + down(&ice->gpio_mutex); + if (kcontrol->private_value) + idx = WM_DAC_MASTER_ATTEN; + else + idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id) + WM_DAC_ATTEN; + vol = wm_get(ice, idx) & 0x7f; + if (vol <= 0x1a) + ucontrol->value.integer.value[0] = 0; + else + ucontrol->value.integer.value[0] = vol - 0x1a; + up(&ice->gpio_mutex); + + return 0; +} + +static int wm_dac_vol_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol) +{ + ice1712_t *ice = snd_kcontrol_chip(kcontrol); + int idx; + unsigned short ovol, nvol; + int change; + + snd_ice1712_save_gpio_status(ice); + if (kcontrol->private_value) + idx = WM_DAC_MASTER_ATTEN; + else + idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id) + WM_DAC_ATTEN; + nvol = ucontrol->value.integer.value[0] + 0x1a; + ovol = wm_get(ice, idx) & 0x7f; + change = (ovol != nvol); + if (change) { + if (nvol <= 0x1a && ovol <= 0x1a) + change = 0; + else + wm_put(ice, idx, nvol | 0x180); /* update on zero detect */ + } + snd_ice1712_restore_gpio_status(ice); + return change; +} + +/* + * ADC gain mixer control + */ +static int wm_adc_vol_info(snd_kcontrol_t *kcontrol, snd_ctl_elem_info_t *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; + uinfo->value.integer.min = 0; /* -12dB */ + uinfo->value.integer.max = 0x1f; /* 19dB */ + return 0; +} + +static int wm_adc_vol_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol) +{ + ice1712_t *ice = snd_kcontrol_chip(kcontrol); + int idx; + unsigned short vol; + + down(&ice->gpio_mutex); + idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id) + WM_ADC_GAIN; + vol = wm_get(ice, idx) & 0x1f; + ucontrol->value.integer.value[0] = vol; + up(&ice->gpio_mutex); + return 0; +} + +static int wm_adc_vol_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol) +{ + ice1712_t *ice = snd_kcontrol_chip(kcontrol); + int idx; + unsigned short ovol, nvol; + int change; + + snd_ice1712_save_gpio_status(ice); + idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id) + WM_ADC_GAIN; + nvol = ucontrol->value.integer.value[0]; + ovol = wm_get(ice, idx) & 0x1f; + change = (ovol != nvol); + if (change) + wm_put(ice, idx, nvol); + snd_ice1712_restore_gpio_status(ice); + return change; +} + +/* + * ADC input mux mixer control + */ +static int wm_adc_mux_info(snd_kcontrol_t *kcontrol, snd_ctl_elem_info_t *uinfo) +{ + static char *texts[] = { + "CD Left", + "CD Right", + "Line Left", + "Line Right", + "Aux Left", + "Aux Right", + "Mic Left", + "Mic Right", + }; + uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; + uinfo->count = 2; + uinfo->value.enumerated.items = 8; + if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items) + uinfo->value.enumerated.item = uinfo->value.enumerated.items - 1; + strcpy(uinfo->value.enumerated.name, texts[uinfo->value.enumerated.item]); + return 0; +} + +static int wm_adc_mux_get(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t *ucontrol) +{ + ice1712_t *ice = snd_kcontrol_chip(kcontrol); + unsigned short val; + + down(&ice->gpio_mutex); + val = wm_get(ice, WM_ADC_MUX); + ucontrol->value.integer.value[0] = val & 7; + ucontrol->value.integer.value[1] = (val >> 4) & 7; + up(&ice->gpio_mutex); + return 0; +} + +static int wm_adc_mux_put(snd_kcontrol_t * kcontrol, snd_ctl_elem_value_t *ucontrol) +{ + ice1712_t *ice = snd_kcontrol_chip(kcontrol); + unsigned short oval, nval; + int change; + + snd_ice1712_save_gpio_status(ice); + oval = wm_get(ice, WM_ADC_MUX); + nval = oval & ~0x77; + nval |= ucontrol->value.integer.value[0] & 7; + nval |= (ucontrol->value.integer.value[1] & 7) << 4; + change = (oval != nval); + if (change) + wm_put(ice, WM_ADC_MUX, nval); + snd_ice1712_restore_gpio_status(ice); + return 0; +} + +/* + * mixers + */ + +static snd_kcontrol_new_t prodigy71_dac_control __devinitdata = { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "DAC Volume", + .count = 8, + .info = wm_dac_vol_info, + .get = wm_dac_vol_get, + .put = wm_dac_vol_put, +}; + +static snd_kcontrol_new_t wm_controls[] __devinitdata = { + { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Master Playback Volume", + .info = wm_dac_vol_info, + .get = wm_dac_vol_get, + .put = wm_dac_vol_put, + .private_value = 1, + }, + { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "ADC Volume", + .count = 2, + .info = wm_adc_vol_info, + .get = wm_adc_vol_get, + .put = wm_adc_vol_put, + }, + { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Capture Route", + .info = wm_adc_mux_info, + .get = wm_adc_mux_get, + .put = wm_adc_mux_put, + }, + PRODIGY_CON_HPAMP , + PRODIGY_CON_DEEMP , + PRODIGY_CON_OVERSAMPLING +}; + + +static int __devinit prodigy_add_controls(ice1712_t *ice) +{ + unsigned int i; + int err; + + err = snd_ctl_add(ice->card, snd_ctl_new1(&prodigy71_dac_control, ice)); + if (err < 0) + return err; + + for (i = 0; i < ARRAY_SIZE(wm_controls); i++) { + err = snd_ctl_add(ice->card, snd_ctl_new1(&wm_controls[i], ice)); + if (err < 0) + return err; + } + return 0; +} + + +/* + * initialize the chip + */ +static int __devinit prodigy_init(ice1712_t *ice) +{ + static unsigned short wm_inits[] = { + + /* These come first to reduce init pop noise */ + 0x1b, 0x000, /* ADC Mux */ + 0x1c, 0x009, /* Out Mux1 */ + 0x1d, 0x009, /* Out Mux2 */ + + 0x18, 0x000, /* All power-up */ + + 0x16, 0x022, /* I2S, normal polarity, 24bit, high-pass on */ + 0x17, 0x006, /* 128fs, slave mode */ + + 0x00, 0, /* DAC1 analog mute */ + 0x01, 0, /* DAC2 analog mute */ + 0x02, 0, /* DAC3 analog mute */ + 0x03, 0, /* DAC4 analog mute */ + 0x04, 0, /* DAC5 analog mute */ + 0x05, 0, /* DAC6 analog mute */ + 0x06, 0, /* DAC7 analog mute */ + 0x07, 0, /* DAC8 analog mute */ + 0x08, 0x100, /* master analog mute */ + + 0x09, 0x7f, /* DAC1 digital full */ + 0x0a, 0x7f, /* DAC2 digital full */ + 0x0b, 0x7f, /* DAC3 digital full */ + 0x0c, 0x7f, /* DAC4 digital full */ + 0x0d, 0x7f, /* DAC5 digital full */ + 0x0e, 0x7f, /* DAC6 digital full */ + 0x0f, 0x7f, /* DAC7 digital full */ + 0x10, 0x7f, /* DAC8 digital full */ + 0x11, 0x1FF, /* master digital full */ + + 0x12, 0x000, /* phase normal */ + 0x13, 0x090, /* unmute DAC L/R */ + 0x14, 0x000, /* all unmute */ + 0x15, 0x000, /* no deemphasis, no ZFLG */ + + 0x19, 0x000, /* -12dB ADC/L */ + 0x1a, 0x000 /* -12dB ADC/R */ + + }; + + static unsigned short cs_inits[] = { + 0x0441, /* RUN */ + 0x0100, /* no mute */ + 0x0200, /* */ + 0x0600, /* slave, 24bit */ + }; + + unsigned int tmp; + unsigned int i; + + printk(KERN_INFO "ice1724: AudioTrak Prodigy 7.1 driver rev. 0.82b\n"); + printk(KERN_INFO "ice1724: This driver is in beta stage. Forsuccess/failure reporting contact\n"); + printk(KERN_INFO "ice1724: Apostolos Dimitromanolakis \n"); + + ice->num_total_dacs = 8; + ice->num_total_adcs = 8; + + /* to remeber the register values */ + ice->akm = snd_kcalloc(sizeof(akm4xxx_t), GFP_KERNEL); + if (! ice->akm) + return -ENOMEM; + ice->akm_codecs = 1; + + snd_ice1712_gpio_set_dir(ice, 0xbfffff); /* fix this for the time being */ + + /* reset the wm codec as the SPI mode */ + snd_ice1712_save_gpio_status(ice); + snd_ice1712_gpio_set_mask(ice,~( PRODIGY_WM_RESET|PRODIGY_WM_CS| + PRODIGY_CS8415_CS|PRODIGY_HP_AMP_EN )); + + tmp = snd_ice1712_gpio_read(ice); + tmp &= ~PRODIGY_WM_RESET; + snd_ice1712_gpio_write(ice, tmp); + udelay(1); + tmp |= PRODIGY_WM_CS | PRODIGY_CS8415_CS; + snd_ice1712_gpio_write(ice, tmp); + udelay(1); + tmp |= PRODIGY_WM_RESET; + snd_ice1712_gpio_write(ice, tmp); + udelay(1); + + /* initialize WM8770 codec */ + for (i = 0; i < ARRAY_SIZE(wm_inits); i += 2) + wm_put(ice, wm_inits[i], wm_inits[i+1]); + + /* initialize CS8415A codec */ + for (i = 0; i < ARRAY_SIZE(cs_inits); i++) + prodigy_spi_write(ice, PRODIGY_CS8415_CS, + cs_inits[i] | 0x200000, 24); + + + prodigy_set_headphone_amp(ice, 1); + + snd_ice1712_restore_gpio_status(ice); + + return 0; +} + +/* + * Prodigy boards don't provide the EEPROM data except for the vendor IDs. + * hence the driver needs to sets up it properly. + */ + +static unsigned char prodigy71_eeprom[] __devinitdata = { + 0x2b, /* SYSCONF: clock 512, mpu401, spdif-in/ADC, 4DACs */ + 0x80, /* ACLINK: I2S */ + 0xf8, /* I2S: vol, 96k, 24bit, 192k */ + 0xc3, /* SPDIF: out-en, out-int, spdif-in */ + 0xff, /* GPIO_DIR */ + 0xff, /* GPIO_DIR1 */ + 0xbf, /* GPIO_DIR2 */ + 0x00, /* GPIO_MASK */ + 0x00, /* GPIO_MASK1 */ + 0x00, /* GPIO_MASK2 */ + 0x00, /* GPIO_STATE */ + 0x00, /* GPIO_STATE1 */ + 0x00, /* GPIO_STATE2 */ +}; + +/* entry point */ +struct snd_ice1712_card_info snd_vt1724_prodigy_cards[] __devinitdata = { + { + .subvendor = VT1724_SUBDEVICE_PRODIGY71, + .name = "Audiotrak Prodigy 7.1", + .chip_init = prodigy_init, + .build_controls = prodigy_add_controls, + .eeprom_size = sizeof(prodigy71_eeprom), + .eeprom_data = prodigy71_eeprom, + }, + { } /* terminator */ +}; diff --git a/sound/pci/ice1712/prodigy.h b/sound/pci/ice1712/prodigy.h new file mode 100644 index 000000000..1ff29fee2 --- /dev/null +++ b/sound/pci/ice1712/prodigy.h @@ -0,0 +1,67 @@ +#ifndef __SOUND_PRODIGY_H +#define __SOUND_PRODIGY_H + +/* + * ALSA driver for VIA VT1724 (Envy24HT) + * + * Lowlevel functions for Terratec PRODIGY cards + * + * Copyright (c) 2003 Takashi Iwai + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define PRODIGY_DEVICE_DESC "{AudioTrak,Prodigy 7.1}," + +#define VT1724_SUBDEVICE_PRODIGY71 0x33495345 /* PRODIGY 7.1 */ + +extern struct snd_ice1712_card_info snd_vt1724_prodigy_cards[]; + +/* GPIO bits */ +#define PRODIGY_CS8415_CS (1 << 23) +#define PRODIGY_CS8415_CDTO (1 << 22) +#define PRODIGY_WM_RESET (1 << 20) +#define PRODIGY_WM_CLK (1 << 19) +#define PRODIGY_WM_DATA (1 << 18) +#define PRODIGY_WM_RW (1 << 17) +#define PRODIGY_AC97_RESET (1 << 16) +#define PRODIGY_DIGITAL_SEL1 (1 << 15) +// #define PRODIGY_HP_SEL (1 << 14) +#define PRODIGY_WM_CS (1 << 12) + +#define PRODIGY_HP_AMP_EN (1 << 14) + + +/* WM8770 registers */ +#define WM_DAC_ATTEN 0x00 /* DAC1-8 analog attenuation */ +#define WM_DAC_MASTER_ATTEN 0x08 /* DAC master analog attenuation */ +#define WM_DAC_DIG_ATTEN 0x09 /* DAC1-8 digital attenuation */ +#define WM_DAC_DIG_MATER_ATTEN 0x11 /* DAC master digital attenuation */ +#define WM_PHASE_SWAP 0x12 /* DAC phase */ +#define WM_DAC_CTRL1 0x13 /* DAC control bits */ +#define WM_MUTE 0x14 /* mute controls */ +#define WM_DAC_CTRL2 0x15 /* de-emphasis and zefo-flag */ +#define WM_INT_CTRL 0x16 /* interface control */ +#define WM_MASTER 0x17 /* master clock and mode */ +#define WM_POWERDOWN 0x18 /* power-down controls */ +#define WM_ADC_GAIN 0x19 /* ADC gain L(19)/R(1a) */ +#define WM_ADC_MUX 0x1b /* input MUX */ +#define WM_OUT_MUX1 0x1c /* output MUX */ +#define WM_OUT_MUX2 0x1e /* output MUX */ +#define WM_RESET 0x1f /* software reset */ + + +#endif /* __SOUND_PRODIGY_H */ -- 2.43.0