drivers/atm/ambassador.c

   1 /*
   2   Madge Ambassador ATM Adapter driver.
   3   Copyright (C) 1995-1999  Madge Networks Ltd.
   4
   5   This program is free software; you can redistribute it and/or modify
   6   it under the terms of the GNU General Public License as published by
   7   the Free Software Foundation; either version 2 of the License, or
   8   (at your option) any later version.
   9
  10   This program is distributed in the hope that it will be useful,
  11   but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13   GNU General Public License for more details.
  14
  15   You should have received a copy of the GNU General Public License
  16   along with this program; if not, write to the Free Software
  17   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18
  19   The GNU GPL is contained in /usr/doc/copyright/GPL on a Debian
  20   system and in the file COPYING in the Linux kernel source.
  21 */
  22
  23 /* * dedicated to the memory of Graham Gordon 1971-1998 * */
  24
  25 #include <linux/module.h>
  26 #include <linux/types.h>
  27 #include <linux/pci.h>
  28 #include <linux/kernel.h>
  29 #include <linux/init.h>
  30 #include <linux/ioport.h>
  31 #include <linux/atmdev.h>
  32 #include <linux/delay.h>
  33 #include <linux/interrupt.h>
  34
  35 #include <asm/atomic.h>
  36 #include <asm/io.h>
  37 #include <asm/byteorder.h>
  38
  39 #include "ambassador.h"
  40
  41 #define maintainer_string "Giuliano Procida at Madge Networks <gprocida@madge.com>"
  42 #define description_string "Madge ATM Ambassador driver"
  43 #define version_string "1.2.4"
  44
  45 static inline void __init show_version (void) {
  46   printk ("%s version %s\n", description_string, version_string);
  47 }
  48
  49 /*
  50
  51   Theory of Operation
  52
  53   I Hardware, detection, initialisation and shutdown.
  54
  55   1. Supported Hardware
  56
  57   This driver is for the PCI ATMizer-based Ambassador card (except
  58   very early versions). It is not suitable for the similar EISA "TR7"
  59   card. Commercially, both cards are known as Collage Server ATM
  60   adapters.
  61
  62   The loader supports image transfer to the card, image start and few
  63   other miscellaneous commands.
  64
  65   Only AAL5 is supported with vpi = 0 and vci in the range 0 to 1023.
  66
  67   The cards are big-endian.
  68
  69   2. Detection
  70
  71   Standard PCI stuff, the early cards are detected and rejected.
  72
  73   3. Initialisation
  74
  75   The cards are reset and the self-test results are checked. The
  76   microcode image is then transferred and started. This waits for a
  77   pointer to a descriptor containing details of the host-based queues
  78   and buffers and various parameters etc. Once they are processed
  79   normal operations may begin. The BIA is read using a microcode
  80   command.
  81
  82   4. Shutdown
  83
  84   This may be accomplished either by a card reset or via the microcode
  85   shutdown command. Further investigation required.
  86
  87   5. Persistent state
  88
  89   The card reset does not affect PCI configuration (good) or the
  90   contents of several other "shared run-time registers" (bad) which
  91   include doorbell and interrupt control as well as EEPROM and PCI
  92   control. The driver must be careful when modifying these registers
  93   not to touch bits it does not use and to undo any changes at exit.
  94
  95   II Driver software
  96
  97   0. Generalities
  98
  99   The adapter is quite intelligent (fast) and has a simple interface
 100   (few features). VPI is always zero, 1024 VCIs are supported. There
 101   is limited cell rate support. UBR channels can be capped and ABR
 102   (explicit rate, but not EFCI) is supported. There is no CBR or VBR
 103   support.
 104
 105   1. Driver <-> Adapter Communication
 106
 107   Apart from the basic loader commands, the driver communicates
 108   through three entities: the command queue (CQ), the transmit queue
 109   pair (TXQ) and the receive queue pairs (RXQ). These three entities
 110   are set up by the host and passed to the microcode just after it has
 111   been started.
 112
 113   All queues are host-based circular queues. They are contiguous and
 114   (due to hardware limitations) have some restrictions as to their
 115   locations in (bus) memory. They are of the "full means the same as
 116   empty so don't do that" variety since the adapter uses pointers
 117   internally.
 118
 119   The queue pairs work as follows: one queue is for supply to the
 120   adapter, items in it are pending and are owned by the adapter; the
 121   other is the queue for return from the adapter, items in it have
 122   been dealt with by the adapter. The host adds items to the supply
 123   (TX descriptors and free RX buffer descriptors) and removes items
 124   from the return (TX and RX completions). The adapter deals with out
 125   of order completions.
 126
 127   Interrupts (card to host) and the doorbell (host to card) are used
 128   for signalling.
 129
 130   1. CQ
 131
 132   This is to communicate "open VC", "close VC", "get stats" etc. to
 133   the adapter. At most one command is retired every millisecond by the
 134   card. There is no out of order completion or notification. The
 135   driver needs to check the return code of the command, waiting as
 136   appropriate.
 137
 138   2. TXQ
 139
 140   TX supply items are of variable length (scatter gather support) and
 141   so the queue items are (more or less) pointers to the real thing.
 142   Each TX supply item contains a unique, host-supplied handle (the skb
 143   bus address seems most sensible as this works for Alphas as well,
 144   there is no need to do any endian conversions on the handles).
 145
 146   TX return items consist of just the handles above.
 147
 148   3. RXQ (up to 4 of these with different lengths and buffer sizes)
 149
 150   RX supply items consist of a unique, host-supplied handle (the skb
 151   bus address again) and a pointer to the buffer data area.
 152
 153   RX return items consist of the handle above, the VC, length and a
 154   status word. This just screams "oh so easy" doesn't it?
 155
 156   Note on RX pool sizes:
 157
 158   Each pool should have enough buffers to handle a back-to-back stream
 159   of minimum sized frames on a single VC. For example:
 160
 161     frame spacing = 3us (about right)
 162
 163     delay = IRQ lat + RX handling + RX buffer replenish = 20 (us)  (a guess)
 164
 165     min number of buffers for one VC = 1 + delay/spacing (buffers)
 166
 167     delay/spacing = latency = (20+2)/3 = 7 (buffers)  (rounding up)
 168
 169   The 20us delay assumes that there is no need to sleep; if we need to
 170   sleep to get buffers we are going to drop frames anyway.
 171
 172   In fact, each pool should have enough buffers to support the
 173   simultaneous reassembly of a separate frame on each VC and cope with
 174   the case in which frames complete in round robin cell fashion on
 175   each VC.
 176
 177   Only one frame can complete at each cell arrival, so if "n" VCs are
 178   open, the worst case is to have them all complete frames together
 179   followed by all starting new frames together.
 180
 181     desired number of buffers = n + delay/spacing
 182
 183   These are the extreme requirements, however, they are "n+k" for some
 184   "k" so we have only the constant to choose. This is the argument
 185   rx_lats which current defaults to 7.
 186
 187   Actually, "n ? n+k : 0" is better and this is what is implemented,
 188   subject to the limit given by the pool size.
 189
 190   4. Driver locking
 191
 192   Simple spinlocks are used around the TX and RX queue mechanisms.
 193   Anyone with a faster, working method is welcome to implement it.
 194
 195   The adapter command queue is protected with a spinlock. We always
 196   wait for commands to complete.
 197
 198   A more complex form of locking is used around parts of the VC open
 199   and close functions. There are three reasons for a lock: 1. we need
 200   to do atomic rate reservation and release (not used yet), 2. Opening
 201   sometimes involves two adapter commands which must not be separated
 202   by another command on the same VC, 3. the changes to RX pool size
 203   must be atomic. The lock needs to work over context switches, so we
 204   use a semaphore.
 205
 206   III Hardware Features and Microcode Bugs
 207
 208   1. Byte Ordering
 209
 210   *%^"$&%^$*&^"$(%^$#&^%$(&#%$*(&^#%!"!"!*!
 211
 212   2. Memory access
 213
 214   All structures that are not accessed using DMA must be 4-byte
 215   aligned (not a problem) and must not cross 4MB boundaries.
 216
 217   There is a DMA memory hole at E0000000-E00000FF (groan).
 218
 219   TX fragments (DMA read) must not cross 4MB boundaries (would be 16MB
 220   but for a hardware bug).
 221
 222   RX buffers (DMA write) must not cross 16MB boundaries and must
 223   include spare trailing bytes up to the next 4-byte boundary; they
 224   will be written with rubbish.
 225
 226   The PLX likes to prefetch; if reading up to 4 u32 past the end of
 227   each TX fragment is not a problem, then TX can be made to go a
 228   little faster by passing a flag at init that disables a prefetch
 229   workaround. We do not pass this flag. (new microcode only)
 230
 231   Now we:
 232   . Note that alloc_skb rounds up size to a 16byte boundary.
 233   . Ensure all areas do not traverse 4MB boundaries.
 234   . Ensure all areas do not start at a E00000xx bus address.
 235   (I cannot be certain, but this may always hold with Linux)
 236   . Make all failures cause a loud message.
 237   . Discard non-conforming SKBs (causes TX failure or RX fill delay).
 238   . Discard non-conforming TX fragment descriptors (the TX fails).
 239   In the future we could:
 240   . Allow RX areas that traverse 4MB (but not 16MB) boundaries.
 241   . Segment TX areas into some/more fragments, when necessary.
 242   . Relax checks for non-DMA items (ignore hole).
 243   . Give scatter-gather (iovec) requirements using ???. (?)
 244
 245   3. VC close is broken (only for new microcode)
 246
 247   The VC close adapter microcode command fails to do anything if any
 248   frames have been received on the VC but none have been transmitted.
 249   Frames continue to be reassembled and passed (with IRQ) to the
 250   driver.
 251
 252   IV To Do List
 253
 254   . Fix bugs!
 255
 256   . Timer code may be broken.
 257
 258   . Deal with buggy VC close (somehow) in microcode 12.
 259
 260   . Handle interrupted and/or non-blocking writes - is this a job for
 261     the protocol layer?
 262
 263   . Add code to break up TX fragments when they span 4MB boundaries.
 264
 265   . Add SUNI phy layer (need to know where SUNI lives on card).
 266
 267   . Implement a tx_alloc fn to (a) satisfy TX alignment etc. and (b)
 268     leave extra headroom space for Ambassador TX descriptors.
 269
 270   . Understand these elements of struct atm_vcc: recvq (proto?),
 271     sleep, callback, listenq, backlog_quota, reply and user_back.
 272
 273   . Adjust TX/RX skb allocation to favour IP with LANE/CLIP (configurable).
 274
 275   . Impose a TX-pending limit (2?) on each VC, help avoid TX q overflow.
 276
 277   . Decide whether RX buffer recycling is or can be made completely safe;
 278     turn it back on. It looks like Werner is going to axe this.
 279
 280   . Implement QoS changes on open VCs (involves extracting parts of VC open
 281     and close into separate functions and using them to make changes).
 282
 283   . Hack on command queue so that someone can issue multiple commands and wait
 284     on the last one (OR only "no-op" or "wait" commands are waited for).
 285
 286   . Eliminate need for while-schedule around do_command.
 287
 288 */
 289
 290 /********** microcode **********/
 291
 292 #ifdef AMB_NEW_MICROCODE
 293 #define UCODE(x) UCODE2(atmsar12.x)
 294 #else
 295 #define UCODE(x) UCODE2(atmsar11.x)
 296 #endif
 297 #define UCODE2(x) #x
 298
 299 static u32 __initdata ucode_start =
 300 #include UCODE(start)
 301 ;
 302
 303 static region __initdata ucode_regions[] = {
 304 #include UCODE(regions)
 305   { 0, 0 }
 306 };
 307
 308 static u32 __initdata ucode_data[] = {
 309 #include UCODE(data)
 310   0xdeadbeef
 311 };
 312
 313 static void do_housekeeping (unsigned long arg);
 314 /********** globals **********/
 315
 316 static unsigned short debug = 0;
 317 static unsigned int cmds = 8;
 318 static unsigned int txs = 32;
 319 static unsigned int rxs[NUM_RX_POOLS] = { 64, 64, 64, 64 };
 320 static unsigned int rxs_bs[NUM_RX_POOLS] = { 4080, 12240, 36720, 65535 };
 321 static unsigned int rx_lats = 7;
 322 static unsigned char pci_lat = 0;
 323
 324 static const unsigned long onegigmask = -1 << 30;
 325
 326 /********** access to adapter **********/
 327
 328 static inline void wr_plain (const amb_dev * dev, size_t addr, u32 data) {
 329   PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x", addr, data);
 330 #ifdef AMB_MMIO
 331   dev->membase[addr / sizeof(u32)] = data;
 332 #else
 333   outl (data, dev->iobase + addr);
 334 #endif
 335 }
 336
 337 static inline u32 rd_plain (const amb_dev * dev, size_t addr) {
 338 #ifdef AMB_MMIO
 339   u32 data = dev->membase[addr / sizeof(u32)];
 340 #else
 341   u32 data = inl (dev->iobase + addr);
 342 #endif
 343   PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x", addr, data);
 344   return data;
 345 }
 346
 347 static inline void wr_mem (const amb_dev * dev, size_t addr, u32 data) {
 348   u32 be = cpu_to_be32 (data);
 349   PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x b[%08x]", addr, data, be);
 350 #ifdef AMB_MMIO
 351   dev->membase[addr / sizeof(u32)] = be;
 352 #else
 353   outl (be, dev->iobase + addr);
 354 #endif
 355 }
 356
 357 static inline u32 rd_mem (const amb_dev * dev, size_t addr) {
 358 #ifdef AMB_MMIO
 359   u32 be = dev->membase[addr / sizeof(u32)];
 360 #else
 361   u32 be = inl (dev->iobase + addr);
 362 #endif
 363   u32 data = be32_to_cpu (be);
 364   PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x b[%08x]", addr, data, be);
 365   return data;
 366 }
 367
 368 /********** dump routines **********/
 369
 370 static inline void dump_registers (const amb_dev * dev) {
 371 #ifdef DEBUG_AMBASSADOR
 372   if (debug & DBG_REGS) {
 373     size_t i;
 374     PRINTD (DBG_REGS, "reading PLX control: ");
 375     for (i = 0x00; i < 0x30; i += sizeof(u32))
 376       rd_mem (dev, i);
 377     PRINTD (DBG_REGS, "reading mailboxes: ");
 378     for (i = 0x40; i < 0x60; i += sizeof(u32))
 379       rd_mem (dev, i);
 380     PRINTD (DBG_REGS, "reading doorb irqev irqen reset:");
 381     for (i = 0x60; i < 0x70; i += sizeof(u32))
 382       rd_mem (dev, i);
 383   }
 384 #else
 385   (void) dev;
 386 #endif
 387   return;
 388 }
 389
 390 static inline void dump_loader_block (volatile loader_block * lb) {
 391 #ifdef DEBUG_AMBASSADOR
 392   unsigned int i;
 393   PRINTDB (DBG_LOAD, "lb @ %p; res: %d, cmd: %d, pay:",
 394            lb, be32_to_cpu (lb->result), be32_to_cpu (lb->command));
 395   for (i = 0; i < MAX_COMMAND_DATA; ++i)
 396     PRINTDM (DBG_LOAD, " %08x", be32_to_cpu (lb->payload.data[i]));
 397   PRINTDE (DBG_LOAD, ", vld: %08x", be32_to_cpu (lb->valid));
 398 #else
 399   (void) lb;
 400 #endif
 401   return;
 402 }
 403
 404 static inline void dump_command (command * cmd) {
 405 #ifdef DEBUG_AMBASSADOR
 406   unsigned int i;
 407   PRINTDB (DBG_CMD, "cmd @ %p, req: %08x, pars:",
 408            cmd, /*be32_to_cpu*/ (cmd->request));
 409   for (i = 0; i < 3; ++i)
 410     PRINTDM (DBG_CMD, " %08x", /*be32_to_cpu*/ (cmd->args.par[i]));
 411   PRINTDE (DBG_CMD, "");
 412 #else
 413   (void) cmd;
 414 #endif
 415   return;
 416 }
 417
 418 static inline void dump_skb (char * prefix, unsigned int vc, struct sk_buff * skb) {
 419 #ifdef DEBUG_AMBASSADOR
 420   unsigned int i;
 421   unsigned char * data = skb->data;
 422   PRINTDB (DBG_DATA, "%s(%u) ", prefix, vc);
 423   for (i=0; i<skb->len && i < 256;i++)
 424     PRINTDM (DBG_DATA, "%02x ", data[i]);
 425   PRINTDE (DBG_DATA,"");
 426 #else
 427   (void) prefix;
 428   (void) vc;
 429   (void) skb;
 430 #endif
 431   return;
 432 }
 433
 434 /********** check memory areas for use by Ambassador **********/
 435
 436 /* see limitations under Hardware Features */
 437
 438 static inline int check_area (void * start, size_t length) {
 439   // assumes length > 0
 440   const u32 fourmegmask = -1 << 22;
 441   const u32 twofivesixmask = -1 << 8;
 442   const u32 starthole = 0xE0000000;
 443   u32 startaddress = virt_to_bus (start);
 444   u32 lastaddress = startaddress+length-1;
 445   if ((startaddress ^ lastaddress) & fourmegmask ||
 446       (startaddress & twofivesixmask) == starthole) {
 447     PRINTK (KERN_ERR, "check_area failure: [%x,%x] - mail maintainer!",
 448             startaddress, lastaddress);
 449     return -1;
 450   } else {
 451     return 0;
 452   }
 453 }
 454
 455 /********** free an skb (as per ATM device driver documentation) **********/
 456
 457 static inline void amb_kfree_skb (struct sk_buff * skb) {
 458   if (ATM_SKB(skb)->vcc->pop) {
 459     ATM_SKB(skb)->vcc->pop (ATM_SKB(skb)->vcc, skb);
 460   } else {
 461     dev_kfree_skb_any (skb);
 462   }
 463 }
 464
 465 /********** TX completion **********/
 466
 467 static inline void tx_complete (amb_dev * dev, tx_out * tx) {
 468   tx_simple * tx_descr = bus_to_virt (tx->handle);
 469   struct sk_buff * skb = tx_descr->skb;
 470
 471   PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx);
 472
 473   // VC layer stats
 474   atomic_inc(&ATM_SKB(skb)->vcc->stats->tx);
 475
 476   // free the descriptor
 477   kfree (tx_descr);
 478
 479   // free the skb
 480   amb_kfree_skb (skb);
 481
 482   dev->stats.tx_ok++;
 483   return;
 484 }
 485
 486 /********** RX completion **********/
 487
 488 static void rx_complete (amb_dev * dev, rx_out * rx) {
 489   struct sk_buff * skb = bus_to_virt (rx->handle);
 490   u16 vc = be16_to_cpu (rx->vc);
 491   // unused: u16 lec_id = be16_to_cpu (rx->lec_id);
 492   u16 status = be16_to_cpu (rx->status);
 493   u16 rx_len = be16_to_cpu (rx->length);
 494
 495   PRINTD (DBG_FLOW|DBG_RX, "rx_complete %p %p (len=%hu)", dev, rx, rx_len);
 496
 497   // XXX move this in and add to VC stats ???
 498   if (!status) {
 499     struct atm_vcc * atm_vcc = dev->rxer[vc];
 500     dev->stats.rx.ok++;
 501
 502     if (atm_vcc) {
 503
 504       if (rx_len <= atm_vcc->qos.rxtp.max_sdu) {
 505
 506         if (atm_charge (atm_vcc, skb->truesize)) {
 507
 508           // prepare socket buffer
 509           ATM_SKB(skb)->vcc = atm_vcc;
 510           skb_put (skb, rx_len);
 511
 512           dump_skb ("<<<", vc, skb);
 513
 514           // VC layer stats
 515           atomic_inc(&atm_vcc->stats->rx);
 516           do_gettimeofday(&skb->stamp);
 517           // end of our responsability
 518           atm_vcc->push (atm_vcc, skb);
 519           return;
 520
 521         } else {
 522           // someone fix this (message), please!
 523           PRINTD (DBG_INFO|DBG_RX, "dropped thanks to atm_charge (vc %hu, truesize %u)", vc, skb->truesize);
 524           // drop stats incremented in atm_charge
 525         }
 526
 527       } else {
 528         PRINTK (KERN_INFO, "dropped over-size frame");
 529         // should we count this?
 530         atomic_inc(&atm_vcc->stats->rx_drop);
 531       }
 532
 533     } else {
 534       PRINTD (DBG_WARN|DBG_RX, "got frame but RX closed for channel %hu", vc);
 535       // this is an adapter bug, only in new version of microcode
 536     }
 537
 538   } else {
 539     dev->stats.rx.error++;
 540     if (status & CRC_ERR)
 541       dev->stats.rx.badcrc++;
 542     if (status & LEN_ERR)
 543       dev->stats.rx.toolong++;
 544     if (status & ABORT_ERR)
 545       dev->stats.rx.aborted++;
 546     if (status & UNUSED_ERR)
 547       dev->stats.rx.unused++;
 548   }
 549
 550   dev_kfree_skb_any (skb);
 551   return;
 552 }
 553
 554 /*
 555
 556   Note on queue handling.
 557
 558   Here "give" and "take" refer to queue entries and a queue (pair)
 559   rather than frames to or from the host or adapter. Empty frame
 560   buffers are given to the RX queue pair and returned unused or
 561   containing RX frames. TX frames (well, pointers to TX fragment
 562   lists) are given to the TX queue pair, completions are returned.
 563
 564 */
 565
 566 /********** command queue **********/
 567
 568 // I really don't like this, but it's the best I can do at the moment
 569
 570 // also, the callers are responsible for byte order as the microcode
 571 // sometimes does 16-bit accesses (yuk yuk yuk)
 572
 573 static int command_do (amb_dev * dev, command * cmd) {
 574   amb_cq * cq = &dev->cq;
 575   volatile amb_cq_ptrs * ptrs = &cq->ptrs;
 576   command * my_slot;
 577   unsigned long timeout;
 578
 579   PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev);
 580
 581   if (test_bit (dead, &dev->flags))
 582     return 0;
 583
 584   spin_lock (&cq->lock);
 585
 586   // if not full...
 587   if (cq->pending < cq->maximum) {
 588     // remember my slot for later
 589     my_slot = ptrs->in;
 590     PRINTD (DBG_CMD, "command in slot %p", my_slot);
 591
 592     dump_command (cmd);
 593
 594     // copy command in
 595     *ptrs->in = *cmd;
 596     cq->pending++;
 597     ptrs->in = NEXTQ (ptrs->in, ptrs->start, ptrs->limit);
 598
 599     // mail the command
 600     wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in));
 601
 602     // prepare to wait for cq->pending milliseconds
 603     // effectively one centisecond on i386
 604     timeout = (cq->pending*HZ+999)/1000;
 605
 606     if (cq->pending > cq->high)
 607       cq->high = cq->pending;
 608     spin_unlock (&cq->lock);
 609
 610     while (timeout) {
 611       // go to sleep
 612       // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout);
 613       set_current_state(TASK_UNINTERRUPTIBLE);
 614       timeout = schedule_timeout (timeout);
 615     }
 616
 617     // wait for my slot to be reached (all waiters are here or above, until...)
 618     while (ptrs->out != my_slot) {
 619       PRINTD (DBG_CMD, "wait: command slot (now at %p)", ptrs->out);
 620       set_current_state(TASK_UNINTERRUPTIBLE);
 621       schedule();
 622     }
 623
 624     // wait on my slot (... one gets to its slot, and... )
 625     while (ptrs->out->request != cpu_to_be32 (SRB_COMPLETE)) {
 626       PRINTD (DBG_CMD, "wait: command slot completion");
 627       set_current_state(TASK_UNINTERRUPTIBLE);
 628       schedule();
 629     }
 630
 631     PRINTD (DBG_CMD, "command complete");
 632     // update queue (... moves the queue along to the next slot)
 633     spin_lock (&cq->lock);
 634     cq->pending--;
 635     // copy command out
 636     *cmd = *ptrs->out;
 637     ptrs->out = NEXTQ (ptrs->out, ptrs->start, ptrs->limit);
 638     spin_unlock (&cq->lock);
 639
 640     return 0;
 641   } else {
 642     cq->filled++;
 643     spin_unlock (&cq->lock);
 644     return -EAGAIN;
 645   }
 646
 647 }
 648
 649 /********** TX queue pair **********/
 650
 651 static inline int tx_give (amb_dev * dev, tx_in * tx) {
 652   amb_txq * txq = &dev->txq;
 653   unsigned long flags;
 654
 655   PRINTD (DBG_FLOW|DBG_TX, "tx_give %p", dev);
 656
 657   if (test_bit (dead, &dev->flags))
 658     return 0;
 659
 660   spin_lock_irqsave (&txq->lock, flags);
 661
 662   if (txq->pending < txq->maximum) {
 663     PRINTD (DBG_TX, "TX in slot %p", txq->in.ptr);
 664
 665     *txq->in.ptr = *tx;
 666     txq->pending++;
 667     txq->in.ptr = NEXTQ (txq->in.ptr, txq->in.start, txq->in.limit);
 668     // hand over the TX and ring the bell
 669     wr_mem (dev, offsetof(amb_mem, mb.adapter.tx_address), virt_to_bus (txq->in.ptr));
 670     wr_mem (dev, offsetof(amb_mem, doorbell), TX_FRAME);
 671
 672     if (txq->pending > txq->high)
 673       txq->high = txq->pending;
 674     spin_unlock_irqrestore (&txq->lock, flags);
 675     return 0;
 676   } else {
 677     txq->filled++;
 678     spin_unlock_irqrestore (&txq->lock, flags);
 679     return -EAGAIN;
 680   }
 681 }
 682
 683 static inline int tx_take (amb_dev * dev) {
 684   amb_txq * txq = &dev->txq;
 685   unsigned long flags;
 686
 687   PRINTD (DBG_FLOW|DBG_TX, "tx_take %p", dev);
 688
 689   spin_lock_irqsave (&txq->lock, flags);
 690
 691   if (txq->pending && txq->out.ptr->handle) {
 692     // deal with TX completion
 693     tx_complete (dev, txq->out.ptr);
 694     // mark unused again
 695     txq->out.ptr->handle = 0;
 696     // remove item
 697     txq->pending--;
 698     txq->out.ptr = NEXTQ (txq->out.ptr, txq->out.start, txq->out.limit);
 699
 700     spin_unlock_irqrestore (&txq->lock, flags);
 701     return 0;
 702   } else {
 703
 704     spin_unlock_irqrestore (&txq->lock, flags);
 705     return -1;
 706   }
 707 }
 708
 709 /********** RX queue pairs **********/
 710
 711 static inline int rx_give (amb_dev * dev, rx_in * rx, unsigned char pool) {
 712   amb_rxq * rxq = &dev->rxq[pool];
 713   unsigned long flags;
 714
 715   PRINTD (DBG_FLOW|DBG_RX, "rx_give %p[%hu]", dev, pool);
 716
 717   spin_lock_irqsave (&rxq->lock, flags);
 718
 719   if (rxq->pending < rxq->maximum) {
 720     PRINTD (DBG_RX, "RX in slot %p", rxq->in.ptr);
 721
 722     *rxq->in.ptr = *rx;
 723     rxq->pending++;
 724     rxq->in.ptr = NEXTQ (rxq->in.ptr, rxq->in.start, rxq->in.limit);
 725     // hand over the RX buffer
 726     wr_mem (dev, offsetof(amb_mem, mb.adapter.rx_address[pool]), virt_to_bus (rxq->in.ptr));
 727
 728     spin_unlock_irqrestore (&rxq->lock, flags);
 729     return 0;
 730   } else {
 731     spin_unlock_irqrestore (&rxq->lock, flags);
 732     return -1;
 733   }
 734 }
 735
 736 static inline int rx_take (amb_dev * dev, unsigned char pool) {
 737   amb_rxq * rxq = &dev->rxq[pool];
 738   unsigned long flags;
 739
 740   PRINTD (DBG_FLOW|DBG_RX, "rx_take %p[%hu]", dev, pool);
 741
 742   spin_lock_irqsave (&rxq->lock, flags);
 743
 744   if (rxq->pending && (rxq->out.ptr->status || rxq->out.ptr->length)) {
 745     // deal with RX completion
 746     rx_complete (dev, rxq->out.ptr);
 747     // mark unused again
 748     rxq->out.ptr->status = 0;
 749     rxq->out.ptr->length = 0;
 750     // remove item
 751     rxq->pending--;
 752     rxq->out.ptr = NEXTQ (rxq->out.ptr, rxq->out.start, rxq->out.limit);
 753
 754     if (rxq->pending < rxq->low)
 755       rxq->low = rxq->pending;
 756     spin_unlock_irqrestore (&rxq->lock, flags);
 757     return 0;
 758   } else {
 759     if (!rxq->pending && rxq->buffers_wanted)
 760       rxq->emptied++;
 761     spin_unlock_irqrestore (&rxq->lock, flags);
 762     return -1;
 763   }
 764 }
 765
 766 /********** RX Pool handling **********/
 767
 768 /* pre: buffers_wanted = 0, post: pending = 0 */
 769 static inline void drain_rx_pool (amb_dev * dev, unsigned char pool) {
 770   amb_rxq * rxq = &dev->rxq[pool];
 771
 772   PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pool %p %hu", dev, pool);
 773
 774   if (test_bit (dead, &dev->flags))
 775     return;
 776
 777   /* we are not quite like the fill pool routines as we cannot just
 778      remove one buffer, we have to remove all of them, but we might as
 779      well pretend... */
 780   if (rxq->pending > rxq->buffers_wanted) {
 781     command cmd;
 782     cmd.request = cpu_to_be32 (SRB_FLUSH_BUFFER_Q);
 783     cmd.args.flush.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
 784     while (command_do (dev, &cmd))
 785       schedule();
 786     /* the pool may also be emptied via the interrupt handler */
 787     while (rxq->pending > rxq->buffers_wanted)
 788       if (rx_take (dev, pool))
 789         schedule();
 790   }
 791
 792   return;
 793 }
 794
 795 static void drain_rx_pools (amb_dev * dev) {
 796   unsigned char pool;
 797
 798   PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pools %p", dev);
 799
 800   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 801     drain_rx_pool (dev, pool);
 802 }
 803
 804 static inline void fill_rx_pool (amb_dev * dev, unsigned char pool, int priority) {
 805   rx_in rx;
 806   amb_rxq * rxq;
 807
 808   PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pool %p %hu %x", dev, pool, priority);
 809
 810   if (test_bit (dead, &dev->flags))
 811     return;
 812
 813   rxq = &dev->rxq[pool];
 814   while (rxq->pending < rxq->maximum && rxq->pending < rxq->buffers_wanted) {
 815
 816     struct sk_buff * skb = alloc_skb (rxq->buffer_size, priority);
 817     if (!skb) {
 818       PRINTD (DBG_SKB|DBG_POOL, "failed to allocate skb for RX pool %hu", pool);
 819       return;
 820     }
 821     if (check_area (skb->data, skb->truesize)) {
 822       dev_kfree_skb_any (skb);
 823       return;
 824     }
 825     // cast needed as there is no %? for pointer differences
 826     PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
 827             skb, skb->head, (long) (skb->end - skb->head));
 828     rx.handle = virt_to_bus (skb);
 829     rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
 830     if (rx_give (dev, &rx, pool))
 831       dev_kfree_skb_any (skb);
 832
 833   }
 834
 835   return;
 836 }
 837
 838 // top up all RX pools (can also be called as a bottom half)
 839 static void fill_rx_pools (amb_dev * dev) {
 840   unsigned char pool;
 841
 842   PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pools %p", dev);
 843
 844   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 845     fill_rx_pool (dev, pool, GFP_ATOMIC);
 846
 847   return;
 848 }
 849
 850 /********** enable host interrupts **********/
 851
 852 static inline void interrupts_on (amb_dev * dev) {
 853   wr_plain (dev, offsetof(amb_mem, interrupt_control),
 854             rd_plain (dev, offsetof(amb_mem, interrupt_control))
 855             | AMB_INTERRUPT_BITS);
 856 }
 857
 858 /********** disable host interrupts **********/
 859
 860 static inline void interrupts_off (amb_dev * dev) {
 861   wr_plain (dev, offsetof(amb_mem, interrupt_control),
 862             rd_plain (dev, offsetof(amb_mem, interrupt_control))
 863             &~ AMB_INTERRUPT_BITS);
 864 }
 865
 866 /********** interrupt handling **********/
 867
 868 static irqreturn_t interrupt_handler(int irq, void *dev_id,
 869                                         struct pt_regs *pt_regs) {
 870   amb_dev * dev = (amb_dev *) dev_id;
 871   (void) pt_regs;
 872
 873   PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler: %p", dev_id);
 874
 875   if (!dev_id) {
 876     PRINTD (DBG_IRQ|DBG_ERR, "irq with NULL dev_id: %d", irq);
 877     return IRQ_NONE;
 878   }
 879
 880   {
 881     u32 interrupt = rd_plain (dev, offsetof(amb_mem, interrupt));
 882
 883     // for us or someone else sharing the same interrupt
 884     if (!interrupt) {
 885       PRINTD (DBG_IRQ, "irq not for me: %d", irq);
 886       return IRQ_NONE;
 887     }
 888
 889     // definitely for us
 890     PRINTD (DBG_IRQ, "FYI: interrupt was %08x", interrupt);
 891     wr_plain (dev, offsetof(amb_mem, interrupt), -1);
 892   }
 893
 894   {
 895     unsigned int irq_work = 0;
 896     unsigned char pool;
 897     for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 898       while (!rx_take (dev, pool))
 899         ++irq_work;
 900     while (!tx_take (dev))
 901       ++irq_work;
 902
 903     if (irq_work) {
 904 #ifdef FILL_RX_POOLS_IN_BH
 905       schedule_work (&dev->bh);
 906 #else
 907       fill_rx_pools (dev);
 908 #endif
 909
 910       PRINTD (DBG_IRQ, "work done: %u", irq_work);
 911     } else {
 912       PRINTD (DBG_IRQ|DBG_WARN, "no work done");
 913     }
 914   }
 915
 916   PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler done: %p", dev_id);
 917   return IRQ_HANDLED;
 918 }
 919
 920 /********** make rate (not quite as much fun as Horizon) **********/
 921
 922 static unsigned int make_rate (unsigned int rate, rounding r,
 923                                u16 * bits, unsigned int * actual) {
 924   unsigned char exp = -1; // hush gcc
 925   unsigned int man = -1;  // hush gcc
 926
 927   PRINTD (DBG_FLOW|DBG_QOS, "make_rate %u", rate);
 928
 929   // rates in cells per second, ITU format (nasty 16-bit floating-point)
 930   // given 5-bit e and 9-bit m:
 931   // rate = EITHER (1+m/2^9)*2^e    OR 0
 932   // bits = EITHER 1<<14 | e<<9 | m OR 0
 933   // (bit 15 is "reserved", bit 14 "non-zero")
 934   // smallest rate is 0 (special representation)
 935   // largest rate is (1+511/512)*2^31 = 4290772992 (< 2^32-1)
 936   // smallest non-zero rate is (1+0/512)*2^0 = 1 (> 0)
 937   // simple algorithm:
 938   // find position of top bit, this gives e
 939   // remove top bit and shift (rounding if feeling clever) by 9-e
 940
 941   // ucode bug: please don't set bit 14! so 0 rate not representable
 942
 943   if (rate > 0xffc00000U) {
 944     // larger than largest representable rate
 945
 946     if (r == round_up) {
 947         return -EINVAL;
 948     } else {
 949       exp = 31;
 950       man = 511;
 951     }
 952
 953   } else if (rate) {
 954     // representable rate
 955
 956     exp = 31;
 957     man = rate;
 958
 959     // invariant: rate = man*2^(exp-31)
 960     while (!(man & (1<<31))) {
 961       exp = exp - 1;
 962       man = man<<1;
 963     }
 964
 965     // man has top bit set
 966     // rate = (2^31+(man-2^31))*2^(exp-31)
 967     // rate = (1+(man-2^31)/2^31)*2^exp
 968     man = man<<1;
 969     man &= 0xffffffffU; // a nop on 32-bit systems
 970     // rate = (1+man/2^32)*2^exp
 971
 972     // exp is in the range 0 to 31, man is in the range 0 to 2^32-1
 973     // time to lose significance... we want m in the range 0 to 2^9-1
 974     // rounding presents a minor problem... we first decide which way
 975     // we are rounding (based on given rounding direction and possibly
 976     // the bits of the mantissa that are to be discarded).
 977
 978     switch (r) {
 979       case round_down: {
 980         // just truncate
 981         man = man>>(32-9);
 982         break;
 983       }
 984       case round_up: {
 985         // check all bits that we are discarding
 986         if (man & (-1>>9)) {
 987           man = (man>>(32-9)) + 1;
 988           if (man == (1<<9)) {
 989             // no need to check for round up outside of range
 990             man = 0;
 991             exp += 1;
 992           }
 993         } else {
 994           man = (man>>(32-9));
 995         }
 996         break;
 997       }
 998       case round_nearest: {
 999         // check msb that we are discarding
1000         if (man & (1<<(32-9-1))) {
1001           man = (man>>(32-9)) + 1;
1002           if (man == (1<<9)) {
1003             // no need to check for round up outside of range
1004             man = 0;
1005             exp += 1;
1006           }
1007         } else {
1008           man = (man>>(32-9));
1009         }
1010         break;
1011       }
1012     }
1013
1014   } else {
1015     // zero rate - not representable
1016
1017     if (r == round_down) {
1018       return -EINVAL;
1019     } else {
1020       exp = 0;
1021       man = 0;
1022     }
1023
1024   }
1025
1026   PRINTD (DBG_QOS, "rate: man=%u, exp=%hu", man, exp);
1027
1028   if (bits)
1029     *bits = /* (1<<14) | */ (exp<<9) | man;
1030
1031   if (actual)
1032     *actual = (exp >= 9)
1033       ? (1 << exp) + (man << (exp-9))
1034       : (1 << exp) + ((man + (1<<(9-exp-1))) >> (9-exp));
1035
1036   return 0;
1037 }
1038
1039 /********** Linux ATM Operations **********/
1040
1041 // some are not yet implemented while others do not make sense for
1042 // this device
1043
1044 /********** Open a VC **********/
1045
1046 static int amb_open (struct atm_vcc * atm_vcc)
1047 {
1048   int error;
1049
1050   struct atm_qos * qos;
1051   struct atm_trafprm * txtp;
1052   struct atm_trafprm * rxtp;
1053   u16 tx_rate_bits;
1054   u16 tx_vc_bits = -1; // hush gcc
1055   u16 tx_frame_bits = -1; // hush gcc
1056
1057   amb_dev * dev = AMB_DEV(atm_vcc->dev);
1058   amb_vcc * vcc;
1059   unsigned char pool = -1; // hush gcc
1060   short vpi = atm_vcc->vpi;
1061   int vci = atm_vcc->vci;
1062
1063   PRINTD (DBG_FLOW|DBG_VCC, "amb_open %x %x", vpi, vci);
1064
1065 #ifdef ATM_VPI_UNSPEC
1066   // UNSPEC is deprecated, remove this code eventually
1067   if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC) {
1068     PRINTK (KERN_WARNING, "rejecting open with unspecified VPI/VCI (deprecated)");
1069     return -EINVAL;
1070   }
1071 #endif
1072
1073   if (!(0 <= vpi && vpi < (1<<NUM_VPI_BITS) &&
1074         0 <= vci && vci < (1<<NUM_VCI_BITS))) {
1075     PRINTD (DBG_WARN|DBG_VCC, "VPI/VCI out of range: %hd/%d", vpi, vci);
1076     return -EINVAL;
1077   }
1078
1079   qos = &atm_vcc->qos;
1080
1081   if (qos->aal != ATM_AAL5) {
1082     PRINTD (DBG_QOS, "AAL not supported");
1083     return -EINVAL;
1084   }
1085
1086   // traffic parameters
1087
1088   PRINTD (DBG_QOS, "TX:");
1089   txtp = &qos->txtp;
1090   if (txtp->traffic_class != ATM_NONE) {
1091     switch (txtp->traffic_class) {
1092       case ATM_UBR: {
1093         // we take "the PCR" as a rate-cap
1094         int pcr = atm_pcr_goal (txtp);
1095         if (!pcr) {
1096           // no rate cap
1097           tx_rate_bits = 0;
1098           tx_vc_bits = TX_UBR;
1099           tx_frame_bits = TX_FRAME_NOTCAP;
1100         } else {
1101           rounding r;
1102           if (pcr < 0) {
1103             r = round_down;
1104             pcr = -pcr;
1105           } else {
1106             r = round_up;
1107           }
1108           error = make_rate (pcr, r, &tx_rate_bits, NULL);
1109           tx_vc_bits = TX_UBR_CAPPED;
1110           tx_frame_bits = TX_FRAME_CAPPED;
1111         }
1112         break;
1113       }
1114 #if 0
1115       case ATM_ABR: {
1116         pcr = atm_pcr_goal (txtp);
1117         PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1118         break;
1119       }
1120 #endif
1121       default: {
1122         // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1123         PRINTD (DBG_QOS, "request for non-UBR denied");
1124         return -EINVAL;
1125       }
1126     }
1127     PRINTD (DBG_QOS, "tx_rate_bits=%hx, tx_vc_bits=%hx",
1128             tx_rate_bits, tx_vc_bits);
1129   }
1130
1131   PRINTD (DBG_QOS, "RX:");
1132   rxtp = &qos->rxtp;
1133   if (rxtp->traffic_class == ATM_NONE) {
1134     // do nothing
1135   } else {
1136     // choose an RX pool (arranged in increasing size)
1137     for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1138       if ((unsigned int) rxtp->max_sdu <= dev->rxq[pool].buffer_size) {
1139         PRINTD (DBG_VCC|DBG_QOS|DBG_POOL, "chose pool %hu (max_sdu %u <= %u)",
1140                 pool, rxtp->max_sdu, dev->rxq[pool].buffer_size);
1141         break;
1142       }
1143     if (pool == NUM_RX_POOLS) {
1144       PRINTD (DBG_WARN|DBG_VCC|DBG_QOS|DBG_POOL,
1145               "no pool suitable for VC (RX max_sdu %d is too large)",
1146               rxtp->max_sdu);
1147       return -EINVAL;
1148     }
1149
1150     switch (rxtp->traffic_class) {
1151       case ATM_UBR: {
1152         break;
1153       }
1154 #if 0
1155       case ATM_ABR: {
1156         pcr = atm_pcr_goal (rxtp);
1157         PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1158         break;
1159       }
1160 #endif
1161       default: {
1162         // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1163         PRINTD (DBG_QOS, "request for non-UBR denied");
1164         return -EINVAL;
1165       }
1166     }
1167   }
1168
1169   // get space for our vcc stuff
1170   vcc = kmalloc (sizeof(amb_vcc), GFP_KERNEL);
1171   if (!vcc) {
1172     PRINTK (KERN_ERR, "out of memory!");
1173     return -ENOMEM;
1174   }
1175   atm_vcc->dev_data = (void *) vcc;
1176
1177   // no failures beyond this point
1178
1179   // we are not really "immediately before allocating the connection
1180   // identifier in hardware", but it will just have to do!
1181   set_bit(ATM_VF_ADDR,&atm_vcc->flags);
1182
1183   if (txtp->traffic_class != ATM_NONE) {
1184     command cmd;
1185
1186     vcc->tx_frame_bits = tx_frame_bits;
1187
1188     down (&dev->vcc_sf);
1189     if (dev->rxer[vci]) {
1190       // RXer on the channel already, just modify rate...
1191       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1192       cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1193       cmd.args.modify_rate.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1194       while (command_do (dev, &cmd))
1195         schedule();
1196       // ... and TX flags, preserving the RX pool
1197       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1198       cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1199       cmd.args.modify_flags.flags = cpu_to_be32
1200         ( (AMB_VCC(dev->rxer[vci])->rx_info.pool << SRB_POOL_SHIFT)
1201           | (tx_vc_bits << SRB_FLAGS_SHIFT) );
1202       while (command_do (dev, &cmd))
1203         schedule();
1204     } else {
1205       // no RXer on the channel, just open (with pool zero)
1206       cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1207       cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1208       cmd.args.open.flags = cpu_to_be32 (tx_vc_bits << SRB_FLAGS_SHIFT);
1209       cmd.args.open.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1210       while (command_do (dev, &cmd))
1211         schedule();
1212     }
1213     dev->txer[vci].tx_present = 1;
1214     up (&dev->vcc_sf);
1215   }
1216
1217   if (rxtp->traffic_class != ATM_NONE) {
1218     command cmd;
1219
1220     vcc->rx_info.pool = pool;
1221
1222     down (&dev->vcc_sf);
1223     /* grow RX buffer pool */
1224     if (!dev->rxq[pool].buffers_wanted)
1225       dev->rxq[pool].buffers_wanted = rx_lats;
1226     dev->rxq[pool].buffers_wanted += 1;
1227     fill_rx_pool (dev, pool, GFP_KERNEL);
1228
1229     if (dev->txer[vci].tx_present) {
1230       // TXer on the channel already
1231       // switch (from pool zero) to this pool, preserving the TX bits
1232       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1233       cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1234       cmd.args.modify_flags.flags = cpu_to_be32
1235         ( (pool << SRB_POOL_SHIFT)
1236           | (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT) );
1237     } else {
1238       // no TXer on the channel, open the VC (with no rate info)
1239       cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1240       cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1241       cmd.args.open.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
1242       cmd.args.open.rate = cpu_to_be32 (0);
1243     }
1244     while (command_do (dev, &cmd))
1245       schedule();
1246     // this link allows RX frames through
1247     dev->rxer[vci] = atm_vcc;
1248     up (&dev->vcc_sf);
1249   }
1250
1251   // indicate readiness
1252   set_bit(ATM_VF_READY,&atm_vcc->flags);
1253
1254   return 0;
1255 }
1256
1257 /********** Close a VC **********/
1258
1259 static void amb_close (struct atm_vcc * atm_vcc) {
1260   amb_dev * dev = AMB_DEV (atm_vcc->dev);
1261   amb_vcc * vcc = AMB_VCC (atm_vcc);
1262   u16 vci = atm_vcc->vci;
1263
1264   PRINTD (DBG_VCC|DBG_FLOW, "amb_close");
1265
1266   // indicate unreadiness
1267   clear_bit(ATM_VF_READY,&atm_vcc->flags);
1268
1269   // disable TXing
1270   if (atm_vcc->qos.txtp.traffic_class != ATM_NONE) {
1271     command cmd;
1272
1273     down (&dev->vcc_sf);
1274     if (dev->rxer[vci]) {
1275       // RXer still on the channel, just modify rate... XXX not really needed
1276       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1277       cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1278       cmd.args.modify_rate.rate = cpu_to_be32 (0);
1279       // ... and clear TX rate flags (XXX to stop RM cell output?), preserving RX pool
1280     } else {
1281       // no RXer on the channel, close channel
1282       cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1283       cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1284     }
1285     dev->txer[vci].tx_present = 0;
1286     while (command_do (dev, &cmd))
1287       schedule();
1288     up (&dev->vcc_sf);
1289   }
1290
1291   // disable RXing
1292   if (atm_vcc->qos.rxtp.traffic_class != ATM_NONE) {
1293     command cmd;
1294
1295     // this is (the?) one reason why we need the amb_vcc struct
1296     unsigned char pool = vcc->rx_info.pool;
1297
1298     down (&dev->vcc_sf);
1299     if (dev->txer[vci].tx_present) {
1300       // TXer still on the channel, just go to pool zero XXX not really needed
1301       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1302       cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1303       cmd.args.modify_flags.flags = cpu_to_be32
1304         (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT);
1305     } else {
1306       // no TXer on the channel, close the VC
1307       cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1308       cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1309     }
1310     // forget the rxer - no more skbs will be pushed
1311     if (atm_vcc != dev->rxer[vci])
1312       PRINTK (KERN_ERR, "%s vcc=%p rxer[vci]=%p",
1313               "arghhh! we're going to die!",
1314               vcc, dev->rxer[vci]);
1315     dev->rxer[vci] = NULL;
1316     while (command_do (dev, &cmd))
1317       schedule();
1318
1319     /* shrink RX buffer pool */
1320     dev->rxq[pool].buffers_wanted -= 1;
1321     if (dev->rxq[pool].buffers_wanted == rx_lats) {
1322       dev->rxq[pool].buffers_wanted = 0;
1323       drain_rx_pool (dev, pool);
1324     }
1325     up (&dev->vcc_sf);
1326   }
1327
1328   // free our structure
1329   kfree (vcc);
1330
1331   // say the VPI/VCI is free again
1332   clear_bit(ATM_VF_ADDR,&atm_vcc->flags);
1333
1334   return;
1335 }
1336
1337 /********** Set socket options for a VC **********/
1338
1339 // int amb_getsockopt (struct atm_vcc * atm_vcc, int level, int optname, void * optval, int optlen);
1340
1341 /********** Set socket options for a VC **********/
1342
1343 // int amb_setsockopt (struct atm_vcc * atm_vcc, int level, int optname, void * optval, int optlen);
1344
1345 /********** Send **********/
1346
1347 static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1348   amb_dev * dev = AMB_DEV(atm_vcc->dev);
1349   amb_vcc * vcc = AMB_VCC(atm_vcc);
1350   u16 vc = atm_vcc->vci;
1351   unsigned int tx_len = skb->len;
1352   unsigned char * tx_data = skb->data;
1353   tx_simple * tx_descr;
1354   tx_in tx;
1355
1356   if (test_bit (dead, &dev->flags))
1357     return -EIO;
1358
1359   PRINTD (DBG_FLOW|DBG_TX, "amb_send vc %x data %p len %u",
1360           vc, tx_data, tx_len);
1361
1362   dump_skb (">>>", vc, skb);
1363
1364   if (!dev->txer[vc].tx_present) {
1365     PRINTK (KERN_ERR, "attempt to send on RX-only VC %x", vc);
1366     return -EBADFD;
1367   }
1368
1369   // this is a driver private field so we have to set it ourselves,
1370   // despite the fact that we are _required_ to use it to check for a
1371   // pop function
1372   ATM_SKB(skb)->vcc = atm_vcc;
1373
1374   if (skb->len > (size_t) atm_vcc->qos.txtp.max_sdu) {
1375     PRINTK (KERN_ERR, "sk_buff length greater than agreed max_sdu, dropping...");
1376     return -EIO;
1377   }
1378
1379   if (check_area (skb->data, skb->len)) {
1380     atomic_inc(&atm_vcc->stats->tx_err);
1381     return -ENOMEM; // ?
1382   }
1383
1384   // allocate memory for fragments
1385   tx_descr = kmalloc (sizeof(tx_simple), GFP_KERNEL);
1386   if (!tx_descr) {
1387     PRINTK (KERN_ERR, "could not allocate TX descriptor");
1388     return -ENOMEM;
1389   }
1390   if (check_area (tx_descr, sizeof(tx_simple))) {
1391     kfree (tx_descr);
1392     return -ENOMEM;
1393   }
1394   PRINTD (DBG_TX, "fragment list allocated at %p", tx_descr);
1395
1396   tx_descr->skb = skb;
1397
1398   tx_descr->tx_frag.bytes = cpu_to_be32 (tx_len);
1399   tx_descr->tx_frag.address = cpu_to_be32 (virt_to_bus (tx_data));
1400
1401   tx_descr->tx_frag_end.handle = virt_to_bus (tx_descr);
1402   tx_descr->tx_frag_end.vc = 0;
1403   tx_descr->tx_frag_end.next_descriptor_length = 0;
1404   tx_descr->tx_frag_end.next_descriptor = 0;
1405 #ifdef AMB_NEW_MICROCODE
1406   tx_descr->tx_frag_end.cpcs_uu = 0;
1407   tx_descr->tx_frag_end.cpi = 0;
1408   tx_descr->tx_frag_end.pad = 0;
1409 #endif
1410
1411   tx.vc = cpu_to_be16 (vcc->tx_frame_bits | vc);
1412   tx.tx_descr_length = cpu_to_be16 (sizeof(tx_frag)+sizeof(tx_frag_end));
1413   tx.tx_descr_addr = cpu_to_be32 (virt_to_bus (&tx_descr->tx_frag));
1414
1415   while (tx_give (dev, &tx))
1416     schedule();
1417   return 0;
1418 }
1419
1420 /********** Change QoS on a VC **********/
1421
1422 // int amb_change_qos (struct atm_vcc * atm_vcc, struct atm_qos * qos, int flags);
1423
1424 /********** Free RX Socket Buffer **********/
1425
1426 #if 0
1427 static void amb_free_rx_skb (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1428   amb_dev * dev = AMB_DEV (atm_vcc->dev);
1429   amb_vcc * vcc = AMB_VCC (atm_vcc);
1430   unsigned char pool = vcc->rx_info.pool;
1431   rx_in rx;
1432
1433   // This may be unsafe for various reasons that I cannot really guess
1434   // at. However, I note that the ATM layer calls kfree_skb rather
1435   // than dev_kfree_skb at this point so we are least covered as far
1436   // as buffer locking goes. There may be bugs if pcap clones RX skbs.
1437
1438   PRINTD (DBG_FLOW|DBG_SKB, "amb_rx_free skb %p (atm_vcc %p, vcc %p)",
1439           skb, atm_vcc, vcc);
1440
1441   rx.handle = virt_to_bus (skb);
1442   rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
1443
1444   skb->data = skb->head;
1445   skb->tail = skb->head;
1446   skb->len = 0;
1447
1448   if (!rx_give (dev, &rx, pool)) {
1449     // success
1450     PRINTD (DBG_SKB|DBG_POOL, "recycled skb for pool %hu", pool);
1451     return;
1452   }
1453
1454   // just do what the ATM layer would have done
1455   dev_kfree_skb_any (skb);
1456
1457   return;
1458 }
1459 #endif
1460
1461 /********** Proc File Output **********/
1462
1463 static int amb_proc_read (struct atm_dev * atm_dev, loff_t * pos, char * page) {
1464   amb_dev * dev = AMB_DEV (atm_dev);
1465   int left = *pos;
1466   unsigned char pool;
1467
1468   PRINTD (DBG_FLOW, "amb_proc_read");
1469
1470   /* more diagnostics here? */
1471
1472   if (!left--) {
1473     amb_stats * s = &dev->stats;
1474     return sprintf (page,
1475                     "frames: TX OK %lu, RX OK %lu, RX bad %lu "
1476                     "(CRC %lu, long %lu, aborted %lu, unused %lu).\n",
1477                     s->tx_ok, s->rx.ok, s->rx.error,
1478                     s->rx.badcrc, s->rx.toolong,
1479                     s->rx.aborted, s->rx.unused);
1480   }
1481
1482   if (!left--) {
1483     amb_cq * c = &dev->cq;
1484     return sprintf (page, "cmd queue [cur/hi/max]: %u/%u/%u. ",
1485                     c->pending, c->high, c->maximum);
1486   }
1487
1488   if (!left--) {
1489     amb_txq * t = &dev->txq;
1490     return sprintf (page, "TX queue [cur/max high full]: %u/%u %u %u.\n",
1491                     t->pending, t->maximum, t->high, t->filled);
1492   }
1493
1494   if (!left--) {
1495     unsigned int count = sprintf (page, "RX queues [cur/max/req low empty]:");
1496     for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1497       amb_rxq * r = &dev->rxq[pool];
1498       count += sprintf (page+count, " %u/%u/%u %u %u",
1499                         r->pending, r->maximum, r->buffers_wanted, r->low, r->emptied);
1500     }
1501     count += sprintf (page+count, ".\n");
1502     return count;
1503   }
1504
1505   if (!left--) {
1506     unsigned int count = sprintf (page, "RX buffer sizes:");
1507     for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1508       amb_rxq * r = &dev->rxq[pool];
1509       count += sprintf (page+count, " %u", r->buffer_size);
1510     }
1511     count += sprintf (page+count, ".\n");
1512     return count;
1513   }
1514
1515 #if 0
1516   if (!left--) {
1517     // suni block etc?
1518   }
1519 #endif
1520
1521   return 0;
1522 }
1523
1524 /********** Operation Structure **********/
1525
1526 static const struct atmdev_ops amb_ops = {
1527   .open         = amb_open,
1528   .close        = amb_close,
1529   .send         = amb_send,
1530   .proc_read    = amb_proc_read,
1531   .owner        = THIS_MODULE,
1532 };
1533
1534 /********** housekeeping **********/
1535 static void do_housekeeping (unsigned long arg) {
1536   amb_dev * dev = (amb_dev *) arg;
1537
1538   // could collect device-specific (not driver/atm-linux) stats here
1539
1540   // last resort refill once every ten seconds
1541   fill_rx_pools (dev);
1542   mod_timer(&dev->housekeeping, jiffies + 10*HZ);
1543
1544   return;
1545 }
1546
1547 /********** creation of communication queues **********/
1548
1549 static int __init create_queues (amb_dev * dev, unsigned int cmds,
1550                                  unsigned int txs, unsigned int * rxs,
1551                                  unsigned int * rx_buffer_sizes) {
1552   unsigned char pool;
1553   size_t total = 0;
1554   void * memory;
1555   void * limit;
1556
1557   PRINTD (DBG_FLOW, "create_queues %p", dev);
1558
1559   total += cmds * sizeof(command);
1560
1561   total += txs * (sizeof(tx_in) + sizeof(tx_out));
1562
1563   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1564     total += rxs[pool] * (sizeof(rx_in) + sizeof(rx_out));
1565
1566   memory = kmalloc (total, GFP_KERNEL);
1567   if (!memory) {
1568     PRINTK (KERN_ERR, "could not allocate queues");
1569     return -ENOMEM;
1570   }
1571   if (check_area (memory, total)) {
1572     PRINTK (KERN_ERR, "queues allocated in nasty area");
1573     kfree (memory);
1574     return -ENOMEM;
1575   }
1576
1577   limit = memory + total;
1578   PRINTD (DBG_INIT, "queues from %p to %p", memory, limit);
1579
1580   PRINTD (DBG_CMD, "command queue at %p", memory);
1581
1582   {
1583     command * cmd = memory;
1584     amb_cq * cq = &dev->cq;
1585
1586     cq->pending = 0;
1587     cq->high = 0;
1588     cq->maximum = cmds - 1;
1589
1590     cq->ptrs.start = cmd;
1591     cq->ptrs.in = cmd;
1592     cq->ptrs.out = cmd;
1593     cq->ptrs.limit = cmd + cmds;
1594
1595     memory = cq->ptrs.limit;
1596   }
1597
1598   PRINTD (DBG_TX, "TX queue pair at %p", memory);
1599
1600   {
1601     tx_in * in = memory;
1602     tx_out * out;
1603     amb_txq * txq = &dev->txq;
1604
1605     txq->pending = 0;
1606     txq->high = 0;
1607     txq->filled = 0;
1608     txq->maximum = txs - 1;
1609
1610     txq->in.start = in;
1611     txq->in.ptr = in;
1612     txq->in.limit = in + txs;
1613
1614     memory = txq->in.limit;
1615     out = memory;
1616
1617     txq->out.start = out;
1618     txq->out.ptr = out;
1619     txq->out.limit = out + txs;
1620
1621     memory = txq->out.limit;
1622   }
1623
1624   PRINTD (DBG_RX, "RX queue pairs at %p", memory);
1625
1626   for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1627     rx_in * in = memory;
1628     rx_out * out;
1629     amb_rxq * rxq = &dev->rxq[pool];
1630
1631     rxq->buffer_size = rx_buffer_sizes[pool];
1632     rxq->buffers_wanted = 0;
1633
1634     rxq->pending = 0;
1635     rxq->low = rxs[pool] - 1;
1636     rxq->emptied = 0;
1637     rxq->maximum = rxs[pool] - 1;
1638
1639     rxq->in.start = in;
1640     rxq->in.ptr = in;
1641     rxq->in.limit = in + rxs[pool];
1642
1643     memory = rxq->in.limit;
1644     out = memory;
1645
1646     rxq->out.start = out;
1647     rxq->out.ptr = out;
1648     rxq->out.limit = out + rxs[pool];
1649
1650     memory = rxq->out.limit;
1651   }
1652
1653   if (memory == limit) {
1654     return 0;
1655   } else {
1656     PRINTK (KERN_ERR, "bad queue alloc %p != %p (tell maintainer)", memory, limit);
1657     kfree (limit - total);
1658     return -ENOMEM;
1659   }
1660
1661 }
1662
1663 /********** destruction of communication queues **********/
1664
1665 static void destroy_queues (amb_dev * dev) {
1666   // all queues assumed empty
1667   void * memory = dev->cq.ptrs.start;
1668   // includes txq.in, txq.out, rxq[].in and rxq[].out
1669
1670   PRINTD (DBG_FLOW, "destroy_queues %p", dev);
1671
1672   PRINTD (DBG_INIT, "freeing queues at %p", memory);
1673   kfree (memory);
1674
1675   return;
1676 }
1677
1678 /********** basic loader commands and error handling **********/
1679 // centisecond timeouts - guessing away here
1680 static unsigned int command_timeouts [] = {
1681         [host_memory_test]     = 15,
1682         [read_adapter_memory]  = 2,
1683         [write_adapter_memory] = 2,
1684         [adapter_start]        = 50,
1685         [get_version_number]   = 10,
1686         [interrupt_host]       = 1,
1687         [flash_erase_sector]   = 1,
1688         [adap_download_block]  = 1,
1689         [adap_erase_flash]     = 1,
1690         [adap_run_in_iram]     = 1,
1691         [adap_end_download]    = 1
1692 };
1693
1694
1695 unsigned int command_successes [] = {
1696         [host_memory_test]     = COMMAND_PASSED_TEST,
1697         [read_adapter_memory]  = COMMAND_READ_DATA_OK,
1698         [write_adapter_memory] = COMMAND_WRITE_DATA_OK,
1699         [adapter_start]        = COMMAND_COMPLETE,
1700         [get_version_number]   = COMMAND_COMPLETE,
1701         [interrupt_host]       = COMMAND_COMPLETE,
1702         [flash_erase_sector]   = COMMAND_COMPLETE,
1703         [adap_download_block]  = COMMAND_COMPLETE,
1704         [adap_erase_flash]     = COMMAND_COMPLETE,
1705         [adap_run_in_iram]     = COMMAND_COMPLETE,
1706         [adap_end_download]    = COMMAND_COMPLETE
1707 };
1708
1709 static  int decode_loader_result (loader_command cmd, u32 result)
1710 {
1711         int res;
1712         const char *msg;
1713
1714         if (result == command_successes[cmd])
1715                 return 0;
1716
1717         switch (result) {
1718                 case BAD_COMMAND:
1719                         res = -EINVAL;
1720                         msg = "bad command";
1721                         break;
1722                 case COMMAND_IN_PROGRESS:
1723                         res = -ETIMEDOUT;
1724                         msg = "command in progress";
1725                         break;
1726                 case COMMAND_PASSED_TEST:
1727                         res = 0;
1728                         msg = "command passed test";
1729                         break;
1730                 case COMMAND_FAILED_TEST:
1731                         res = -EIO;
1732                         msg = "command failed test";
1733                         break;
1734                 case COMMAND_READ_DATA_OK:
1735                         res = 0;
1736                         msg = "command read data ok";
1737                         break;
1738                 case COMMAND_READ_BAD_ADDRESS:
1739                         res = -EINVAL;
1740                         msg = "command read bad address";
1741                         break;
1742                 case COMMAND_WRITE_DATA_OK:
1743                         res = 0;
1744                         msg = "command write data ok";
1745                         break;
1746                 case COMMAND_WRITE_BAD_ADDRESS:
1747                         res = -EINVAL;
1748                         msg = "command write bad address";
1749                         break;
1750                 case COMMAND_WRITE_FLASH_FAILURE:
1751                         res = -EIO;
1752                         msg = "command write flash failure";
1753                         break;
1754                 case COMMAND_COMPLETE:
1755                         res = 0;
1756                         msg = "command complete";
1757                         break;
1758                 case COMMAND_FLASH_ERASE_FAILURE:
1759                         res = -EIO;
1760                         msg = "command flash erase failure";
1761                         break;
1762                 case COMMAND_WRITE_BAD_DATA:
1763                         res = -EINVAL;
1764                         msg = "command write bad data";
1765                         break;
1766                 default:
1767                         res = -EINVAL;
1768                         msg = "unknown error";
1769                         PRINTD (DBG_LOAD|DBG_ERR,
1770                                 "decode_loader_result got %d=%x !",
1771                                 result, result);
1772                         break;
1773         }
1774
1775         PRINTK (KERN_ERR, "%s", msg);
1776         return res;
1777 }
1778
1779 static int __init do_loader_command (volatile loader_block * lb,
1780                                      const amb_dev * dev, loader_command cmd) {
1781
1782   unsigned long timeout;
1783
1784   PRINTD (DBG_FLOW|DBG_LOAD, "do_loader_command");
1785
1786   /* do a command
1787
1788      Set the return value to zero, set the command type and set the
1789      valid entry to the right magic value. The payload is already
1790      correctly byte-ordered so we leave it alone. Hit the doorbell
1791      with the bus address of this structure.
1792
1793   */
1794
1795   lb->result = 0;
1796   lb->command = cpu_to_be32 (cmd);
1797   lb->valid = cpu_to_be32 (DMA_VALID);
1798   // dump_registers (dev);
1799   // dump_loader_block (lb);
1800   wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask);
1801
1802   timeout = command_timeouts[cmd] * HZ/100;
1803
1804   while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS))
1805     if (timeout) {
1806       set_current_state(TASK_UNINTERRUPTIBLE);
1807       timeout = schedule_timeout (timeout);
1808     } else {
1809       PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd);
1810       dump_registers (dev);
1811       dump_loader_block (lb);
1812       return -ETIMEDOUT;
1813     }
1814
1815   if (cmd == adapter_start) {
1816     // wait for start command to acknowledge...
1817     timeout = HZ/10;
1818     while (rd_plain (dev, offsetof(amb_mem, doorbell)))
1819       if (timeout) {
1820         timeout = schedule_timeout (timeout);
1821       } else {
1822         PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x",
1823                 be32_to_cpu (lb->result));
1824         dump_registers (dev);
1825         return -ETIMEDOUT;
1826       }
1827     return 0;
1828   } else {
1829     return decode_loader_result (cmd, be32_to_cpu (lb->result));
1830   }
1831
1832 }
1833
1834 /* loader: determine loader version */
1835
1836 static int __init get_loader_version (loader_block * lb,
1837                                       const amb_dev * dev, u32 * version) {
1838   int res;
1839
1840   PRINTD (DBG_FLOW|DBG_LOAD, "get_loader_version");
1841
1842   res = do_loader_command (lb, dev, get_version_number);
1843   if (res)
1844     return res;
1845   if (version)
1846     *version = be32_to_cpu (lb->payload.version);
1847   return 0;
1848 }
1849
1850 /* loader: write memory data blocks */
1851
1852 static int __init loader_write (loader_block * lb,
1853                                 const amb_dev * dev, const u32 * data,
1854                                 u32 address, unsigned int count) {
1855   unsigned int i;
1856   transfer_block * tb = &lb->payload.transfer;
1857
1858   PRINTD (DBG_FLOW|DBG_LOAD, "loader_write");
1859
1860   if (count > MAX_TRANSFER_DATA)
1861     return -EINVAL;
1862   tb->address = cpu_to_be32 (address);
1863   tb->count = cpu_to_be32 (count);
1864   for (i = 0; i < count; ++i)
1865     tb->data[i] = cpu_to_be32 (data[i]);
1866   return do_loader_command (lb, dev, write_adapter_memory);
1867 }
1868
1869 /* loader: verify memory data blocks */
1870
1871 static int __init loader_verify (loader_block * lb,
1872                                  const amb_dev * dev, const u32 * data,
1873                                  u32 address, unsigned int count) {
1874   unsigned int i;
1875   transfer_block * tb = &lb->payload.transfer;
1876   int res;
1877
1878   PRINTD (DBG_FLOW|DBG_LOAD, "loader_verify");
1879
1880   if (count > MAX_TRANSFER_DATA)
1881     return -EINVAL;
1882   tb->address = cpu_to_be32 (address);
1883   tb->count = cpu_to_be32 (count);
1884   res = do_loader_command (lb, dev, read_adapter_memory);
1885   if (!res)
1886     for (i = 0; i < count; ++i)
1887       if (tb->data[i] != cpu_to_be32 (data[i])) {
1888         res = -EINVAL;
1889         break;
1890       }
1891   return res;
1892 }
1893
1894 /* loader: start microcode */
1895
1896 static int __init loader_start (loader_block * lb,
1897                                 const amb_dev * dev, u32 address) {
1898   PRINTD (DBG_FLOW|DBG_LOAD, "loader_start");
1899
1900   lb->payload.start = cpu_to_be32 (address);
1901   return do_loader_command (lb, dev, adapter_start);
1902 }
1903
1904 /********** reset card **********/
1905
1906 static inline void sf (const char * msg)
1907 {
1908         PRINTK (KERN_ERR, "self-test failed: %s", msg);
1909 }
1910
1911 static int amb_reset (amb_dev * dev, int diags) {
1912   u32 word;
1913
1914   PRINTD (DBG_FLOW|DBG_LOAD, "amb_reset");
1915
1916   word = rd_plain (dev, offsetof(amb_mem, reset_control));
1917   // put card into reset state
1918   wr_plain (dev, offsetof(amb_mem, reset_control), word | AMB_RESET_BITS);
1919   // wait a short while
1920   udelay (10);
1921 #if 1
1922   // put card into known good state
1923   wr_plain (dev, offsetof(amb_mem, interrupt_control), AMB_DOORBELL_BITS);
1924   // clear all interrupts just in case
1925   wr_plain (dev, offsetof(amb_mem, interrupt), -1);
1926 #endif
1927   // clear self-test done flag
1928   wr_plain (dev, offsetof(amb_mem, mb.loader.ready), 0);
1929   // take card out of reset state
1930   wr_plain (dev, offsetof(amb_mem, reset_control), word &~ AMB_RESET_BITS);
1931
1932   if (diags) {
1933     unsigned long timeout;
1934     // 4.2 second wait
1935     timeout = HZ*42/10;
1936     while (timeout) {
1937       set_current_state(TASK_UNINTERRUPTIBLE);
1938       timeout = schedule_timeout (timeout);
1939     }
1940     // half second time-out
1941     timeout = HZ/2;
1942     while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready)))
1943       if (timeout) {
1944         set_current_state(TASK_UNINTERRUPTIBLE);
1945         timeout = schedule_timeout (timeout);
1946       } else {
1947         PRINTD (DBG_LOAD|DBG_ERR, "reset timed out");
1948         return -ETIMEDOUT;
1949       }
1950
1951     // get results of self-test
1952     // XXX double check byte-order
1953     word = rd_mem (dev, offsetof(amb_mem, mb.loader.result));
1954     if (word & SELF_TEST_FAILURE) {
1955       if (word & GPINT_TST_FAILURE)
1956         sf ("interrupt");
1957       if (word & SUNI_DATA_PATTERN_FAILURE)
1958         sf ("SUNI data pattern");
1959       if (word & SUNI_DATA_BITS_FAILURE)
1960         sf ("SUNI data bits");
1961       if (word & SUNI_UTOPIA_FAILURE)
1962         sf ("SUNI UTOPIA interface");
1963       if (word & SUNI_FIFO_FAILURE)
1964         sf ("SUNI cell buffer FIFO");
1965       if (word & SRAM_FAILURE)
1966         sf ("bad SRAM");
1967       // better return value?
1968       return -EIO;
1969     }
1970
1971   }
1972   return 0;
1973 }
1974
1975 /********** transfer and start the microcode **********/
1976
1977 static int __init ucode_init (loader_block * lb, amb_dev * dev) {
1978   unsigned int i = 0;
1979   unsigned int total = 0;
1980   const u32 * pointer = ucode_data;
1981   u32 address;
1982   unsigned int count;
1983   int res;
1984
1985   PRINTD (DBG_FLOW|DBG_LOAD, "ucode_init");
1986
1987   while (address = ucode_regions[i].start,
1988          count = ucode_regions[i].count) {
1989     PRINTD (DBG_LOAD, "starting region (%x, %u)", address, count);
1990     while (count) {
1991       unsigned int words;
1992       if (count <= MAX_TRANSFER_DATA)
1993         words = count;
1994       else
1995         words = MAX_TRANSFER_DATA;
1996       total += words;
1997       res = loader_write (lb, dev, pointer, address, words);
1998       if (res)
1999         return res;
2000       res = loader_verify (lb, dev, pointer, address, words);
2001       if (res)
2002         return res;
2003       count -= words;
2004       address += sizeof(u32) * words;
2005       pointer += words;
2006     }
2007     i += 1;
2008   }
2009   if (*pointer == 0xdeadbeef) {
2010     return loader_start (lb, dev, ucode_start);
2011   } else {
2012     // cast needed as there is no %? for pointer differnces
2013     PRINTD (DBG_LOAD|DBG_ERR,
2014             "offset=%li, *pointer=%x, address=%x, total=%u",
2015             (long) (pointer - ucode_data), *pointer, address, total);
2016     PRINTK (KERN_ERR, "incorrect microcode data");
2017     return -ENOMEM;
2018   }
2019 }
2020
2021 /********** give adapter parameters **********/
2022
2023 static inline u32 bus_addr(void * addr) {
2024     return cpu_to_be32 (virt_to_bus (addr));
2025 }
2026
2027 static int __init amb_talk (amb_dev * dev) {
2028   adap_talk_block a;
2029   unsigned char pool;
2030   unsigned long timeout;
2031
2032   PRINTD (DBG_FLOW, "amb_talk %p", dev);
2033
2034   a.command_start = bus_addr (dev->cq.ptrs.start);
2035   a.command_end   = bus_addr (dev->cq.ptrs.limit);
2036   a.tx_start      = bus_addr (dev->txq.in.start);
2037   a.tx_end        = bus_addr (dev->txq.in.limit);
2038   a.txcom_start   = bus_addr (dev->txq.out.start);
2039   a.txcom_end     = bus_addr (dev->txq.out.limit);
2040
2041   for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
2042     // the other "a" items are set up by the adapter
2043     a.rec_struct[pool].buffer_start = bus_addr (dev->rxq[pool].in.start);
2044     a.rec_struct[pool].buffer_end   = bus_addr (dev->rxq[pool].in.limit);
2045     a.rec_struct[pool].rx_start     = bus_addr (dev->rxq[pool].out.start);
2046     a.rec_struct[pool].rx_end       = bus_addr (dev->rxq[pool].out.limit);
2047     a.rec_struct[pool].buffer_size = cpu_to_be32 (dev->rxq[pool].buffer_size);
2048   }
2049
2050 #ifdef AMB_NEW_MICROCODE
2051   // disable fast PLX prefetching
2052   a.init_flags = 0;
2053 #endif
2054
2055   // pass the structure
2056   wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a));
2057
2058   // 2.2 second wait (must not touch doorbell during 2 second DMA test)
2059   timeout = HZ*22/10;
2060   while (timeout)
2061     timeout = schedule_timeout (timeout);
2062   // give the adapter another half second?
2063   timeout = HZ/2;
2064   while (rd_plain (dev, offsetof(amb_mem, doorbell)))
2065     if (timeout) {
2066       timeout = schedule_timeout (timeout);
2067     } else {
2068       PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out");
2069       return -ETIMEDOUT;
2070     }
2071
2072   return 0;
2073 }
2074
2075 // get microcode version
2076 static void __init amb_ucode_version (amb_dev * dev) {
2077   u32 major;
2078   u32 minor;
2079   command cmd;
2080   cmd.request = cpu_to_be32 (SRB_GET_VERSION);
2081   while (command_do (dev, &cmd)) {
2082     set_current_state(TASK_UNINTERRUPTIBLE);
2083     schedule();
2084   }
2085   major = be32_to_cpu (cmd.args.version.major);
2086   minor = be32_to_cpu (cmd.args.version.minor);
2087   PRINTK (KERN_INFO, "microcode version is %u.%u", major, minor);
2088 }
2089
2090 // swap bits within byte to get Ethernet ordering
2091 u8 bit_swap (u8 byte)
2092 {
2093     const u8 swap[] = {
2094       0x0, 0x8, 0x4, 0xc,
2095       0x2, 0xa, 0x6, 0xe,
2096       0x1, 0x9, 0x5, 0xd,
2097       0x3, 0xb, 0x7, 0xf
2098     };
2099     return ((swap[byte & 0xf]<<4) | swap[byte>>4]);
2100 }
2101
2102 // get end station address
2103 static void __init amb_esi (amb_dev * dev, u8 * esi) {
2104   u32 lower4;
2105   u16 upper2;
2106   command cmd;
2107
2108   cmd.request = cpu_to_be32 (SRB_GET_BIA);
2109   while (command_do (dev, &cmd)) {
2110     set_current_state(TASK_UNINTERRUPTIBLE);
2111     schedule();
2112   }
2113   lower4 = be32_to_cpu (cmd.args.bia.lower4);
2114   upper2 = be32_to_cpu (cmd.args.bia.upper2);
2115   PRINTD (DBG_LOAD, "BIA: lower4: %08x, upper2 %04x", lower4, upper2);
2116
2117   if (esi) {
2118     unsigned int i;
2119
2120     PRINTDB (DBG_INIT, "ESI:");
2121     for (i = 0; i < ESI_LEN; ++i) {
2122       if (i < 4)
2123           esi[i] = bit_swap (lower4>>(8*i));
2124       else
2125           esi[i] = bit_swap (upper2>>(8*(i-4)));
2126       PRINTDM (DBG_INIT, " %02x", esi[i]);
2127     }
2128
2129     PRINTDE (DBG_INIT, "");
2130   }
2131
2132   return;
2133 }
2134
2135 static void fixup_plx_window (amb_dev *dev, loader_block *lb)
2136 {
2137         // fix up the PLX-mapped window base address to match the block
2138         unsigned long blb;
2139         u32 mapreg;
2140         blb = virt_to_bus(lb);
2141         // the kernel stack had better not ever cross a 1Gb boundary!
2142         mapreg = rd_plain (dev, offsetof(amb_mem, stuff[10]));
2143         mapreg &= ~onegigmask;
2144         mapreg |= blb & onegigmask;
2145         wr_plain (dev, offsetof(amb_mem, stuff[10]), mapreg);
2146         return;
2147 }
2148
2149 static int __init amb_init (amb_dev * dev)
2150 {
2151   loader_block lb;
2152
2153   u32 version;
2154
2155   if (amb_reset (dev, 1)) {
2156     PRINTK (KERN_ERR, "card reset failed!");
2157   } else {
2158     fixup_plx_window (dev, &lb);
2159
2160     if (get_loader_version (&lb, dev, &version)) {
2161       PRINTK (KERN_INFO, "failed to get loader version");
2162     } else {
2163       PRINTK (KERN_INFO, "loader version is %08x", version);
2164
2165       if (ucode_init (&lb, dev)) {
2166         PRINTK (KERN_ERR, "microcode failure");
2167       } else if (create_queues (dev, cmds, txs, rxs, rxs_bs)) {
2168         PRINTK (KERN_ERR, "failed to get memory for queues");
2169       } else {
2170
2171         if (amb_talk (dev)) {
2172           PRINTK (KERN_ERR, "adapter did not accept queues");
2173         } else {
2174
2175           amb_ucode_version (dev);
2176           return 0;
2177
2178         } /* amb_talk */
2179
2180         destroy_queues (dev);
2181       } /* create_queues, ucode_init */
2182
2183       amb_reset (dev, 0);
2184     } /* get_loader_version */
2185
2186   } /* amb_reset */
2187
2188   return -EINVAL;
2189 }
2190
2191 static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev)
2192 {
2193       unsigned char pool;
2194       memset (dev, 0, sizeof(amb_dev));
2195
2196       // set up known dev items straight away
2197       dev->pci_dev = pci_dev;
2198       pci_set_drvdata(pci_dev, dev);
2199
2200       dev->iobase = pci_resource_start (pci_dev, 1);
2201       dev->irq = pci_dev->irq;
2202       dev->membase = bus_to_virt(pci_resource_start(pci_dev, 0));
2203
2204       // flags (currently only dead)
2205       dev->flags = 0;
2206
2207       // Allocate cell rates (fibre)
2208       // ATM_OC3_PCR = 1555200000/8/270*260/53 - 29/53
2209       // to be really pedantic, this should be ATM_OC3c_PCR
2210       dev->tx_avail = ATM_OC3_PCR;
2211       dev->rx_avail = ATM_OC3_PCR;
2212
2213 #ifdef FILL_RX_POOLS_IN_BH
2214       // initialise bottom half
2215       INIT_WORK(&dev->bh, (void (*)(void *)) fill_rx_pools, dev);
2216 #endif
2217
2218       // semaphore for txer/rxer modifications - we cannot use a
2219       // spinlock as the critical region needs to switch processes
2220       init_MUTEX (&dev->vcc_sf);
2221       // queue manipulation spinlocks; we want atomic reads and
2222       // writes to the queue descriptors (handles IRQ and SMP)
2223       // consider replacing "int pending" -> "atomic_t available"
2224       // => problem related to who gets to move queue pointers
2225       spin_lock_init (&dev->cq.lock);
2226       spin_lock_init (&dev->txq.lock);
2227       for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2228         spin_lock_init (&dev->rxq[pool].lock);
2229 }
2230
2231 static int setup_pci_dev(struct pci_dev *pci_dev)
2232 {
2233         unsigned char lat;
2234         int ret;
2235
2236         // enable bus master accesses
2237         pci_set_master(pci_dev);
2238
2239         ret = pci_enable_device(pci_dev);
2240         if (ret < 0)
2241                 goto out;
2242
2243         // frobnicate latency (upwards, usually)
2244         pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat);
2245
2246         if (!pci_lat)
2247                 pci_lat = (lat < MIN_PCI_LATENCY) ? MIN_PCI_LATENCY : lat;
2248
2249         if (lat != pci_lat) {
2250                 PRINTK (KERN_INFO, "Changing PCI latency timer from %hu to %hu",
2251                         lat, pci_lat);
2252                 pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat);
2253         }
2254 out:
2255         return ret;
2256 }
2257
2258 static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent)
2259 {
2260         amb_dev * dev;
2261         int err;
2262
2263         // read resources from PCI configuration space
2264         unsigned int irq = pci_dev->irq;
2265
2266         if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) {
2267                 PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2268                 err = -EINVAL;
2269                 goto out;
2270         }
2271
2272         PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at"
2273                 " IO %lx, IRQ %u, MEM %p", pci_resource_start(pci_dev, 1),
2274                 irq, bus_to_virt(pci_resource_start(pci_dev, 0)));
2275
2276         // check IO region
2277         err = pci_request_region(pci_dev, 1, DEV_LABEL);
2278         if (err < 0) {
2279                 PRINTK (KERN_ERR, "IO range already in use!");
2280                 goto out;
2281         }
2282
2283         dev = kmalloc (sizeof(amb_dev), GFP_KERNEL);
2284         if (!dev) {
2285                 PRINTK (KERN_ERR, "out of memory!");
2286                 err = -ENOMEM;
2287                 goto out_release;
2288         }
2289
2290         setup_dev(dev, pci_dev);
2291
2292         err = amb_init(dev);
2293         if (err < 0) {
2294                 PRINTK (KERN_ERR, "adapter initialisation failure");
2295                 goto out_free;
2296         }
2297
2298         err = setup_pci_dev(pci_dev);
2299         if (err < 0)
2300                 goto out_reset;
2301
2302         // grab (but share) IRQ and install handler
2303         err = request_irq(irq, interrupt_handler, SA_SHIRQ, DEV_LABEL, dev);
2304         if (err < 0) {
2305                 PRINTK (KERN_ERR, "request IRQ failed!");
2306                 goto out_disable;
2307         }
2308
2309         dev->atm_dev = atm_dev_register (DEV_LABEL, &amb_ops, -1, NULL);
2310         if (!dev->atm_dev) {
2311                 PRINTD (DBG_ERR, "failed to register Madge ATM adapter");
2312                 err = -EINVAL;
2313                 goto out_free_irq;
2314         }
2315
2316         PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p",
2317                 dev->atm_dev->number, dev, dev->atm_dev);
2318                 dev->atm_dev->dev_data = (void *) dev;
2319
2320         // register our address
2321         amb_esi (dev, dev->atm_dev->esi);
2322
2323         // 0 bits for vpi, 10 bits for vci
2324         dev->atm_dev->ci_range.vpi_bits = NUM_VPI_BITS;
2325         dev->atm_dev->ci_range.vci_bits = NUM_VCI_BITS;
2326
2327         init_timer(&dev->housekeeping);
2328         dev->housekeeping.function = do_housekeeping;
2329         dev->housekeeping.data = (unsigned long) dev;
2330         mod_timer(&dev->housekeeping, jiffies);
2331
2332         // enable host interrupts
2333         interrupts_on (dev);
2334
2335 out:
2336         return err;
2337
2338 out_free_irq:
2339         free_irq(irq, dev);
2340 out_disable:
2341         pci_disable_device(pci_dev);
2342 out_reset:
2343         amb_reset(dev, 0);
2344 out_free:
2345         kfree(dev);
2346 out_release:
2347         pci_release_region(pci_dev, 1);
2348         goto out;
2349 }
2350
2351
2352 static void __devexit amb_remove_one(struct pci_dev *pci_dev)
2353 {
2354         struct amb_dev *dev;
2355
2356         dev = pci_get_drvdata(pci_dev);
2357
2358         PRINTD(DBG_INFO|DBG_INIT, "closing %p (atm_dev = %p)", dev, dev->atm_dev);
2359         del_timer_sync(&dev->housekeeping);
2360         // the drain should not be necessary
2361         drain_rx_pools(dev);
2362         interrupts_off(dev);
2363         amb_reset(dev, 0);
2364         free_irq(dev->irq, dev);
2365         pci_disable_device(pci_dev);
2366         destroy_queues(dev);
2367         atm_dev_deregister(dev->atm_dev);
2368         kfree(dev);
2369         pci_release_region(pci_dev, 1);
2370 }
2371
2372 static void __init amb_check_args (void) {
2373   unsigned char pool;
2374   unsigned int max_rx_size;
2375
2376 #ifdef DEBUG_AMBASSADOR
2377   PRINTK (KERN_NOTICE, "debug bitmap is %hx", debug &= DBG_MASK);
2378 #else
2379   if (debug)
2380     PRINTK (KERN_NOTICE, "no debugging support");
2381 #endif
2382
2383   if (cmds < MIN_QUEUE_SIZE)
2384     PRINTK (KERN_NOTICE, "cmds has been raised to %u",
2385             cmds = MIN_QUEUE_SIZE);
2386
2387   if (txs < MIN_QUEUE_SIZE)
2388     PRINTK (KERN_NOTICE, "txs has been raised to %u",
2389             txs = MIN_QUEUE_SIZE);
2390
2391   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2392     if (rxs[pool] < MIN_QUEUE_SIZE)
2393       PRINTK (KERN_NOTICE, "rxs[%hu] has been raised to %u",
2394               pool, rxs[pool] = MIN_QUEUE_SIZE);
2395
2396   // buffers sizes should be greater than zero and strictly increasing
2397   max_rx_size = 0;
2398   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2399     if (rxs_bs[pool] <= max_rx_size)
2400       PRINTK (KERN_NOTICE, "useless pool (rxs_bs[%hu] = %u)",
2401               pool, rxs_bs[pool]);
2402     else
2403       max_rx_size = rxs_bs[pool];
2404
2405   if (rx_lats < MIN_RX_BUFFERS)
2406     PRINTK (KERN_NOTICE, "rx_lats has been raised to %u",
2407             rx_lats = MIN_RX_BUFFERS);
2408
2409   return;
2410 }
2411
2412 /********** module stuff **********/
2413
2414 MODULE_AUTHOR(maintainer_string);
2415 MODULE_DESCRIPTION(description_string);
2416 MODULE_LICENSE("GPL");
2417 module_param(debug,   ushort, 0644);
2418 module_param(cmds,    uint, 0);
2419 module_param(txs,     uint, 0);
2420 module_param_array(rxs,     uint, NULL, 0);
2421 module_param_array(rxs_bs,  uint, NULL, 0);
2422 module_param(rx_lats, uint, 0);
2423 module_param(pci_lat, byte, 0);
2424 MODULE_PARM_DESC(debug,   "debug bitmap, see .h file");
2425 MODULE_PARM_DESC(cmds,    "number of command queue entries");
2426 MODULE_PARM_DESC(txs,     "number of TX queue entries");
2427 MODULE_PARM_DESC(rxs,     "number of RX queue entries [" __MODULE_STRING(NUM_RX_POOLS) "]");
2428 MODULE_PARM_DESC(rxs_bs,  "size of RX buffers [" __MODULE_STRING(NUM_RX_POOLS) "]");
2429 MODULE_PARM_DESC(rx_lats, "number of extra buffers to cope with RX latencies");
2430 MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
2431
2432 /********** module entry **********/
2433
2434 static struct pci_device_id amb_pci_tbl[] = {
2435         { PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR, PCI_ANY_ID, PCI_ANY_ID,
2436           0, 0, 0 },
2437         { PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD, PCI_ANY_ID, PCI_ANY_ID,
2438           0, 0, 0 },
2439         { 0, }
2440 };
2441
2442 MODULE_DEVICE_TABLE(pci, amb_pci_tbl);
2443
2444 static struct pci_driver amb_driver = {
2445         .name =         "amb",
2446         .probe =        amb_probe,
2447         .remove =       __devexit_p(amb_remove_one),
2448         .id_table =     amb_pci_tbl,
2449 };
2450
2451 static int __init amb_module_init (void)
2452 {
2453   PRINTD (DBG_FLOW|DBG_INIT, "init_module");
2454
2455   // sanity check - cast needed as printk does not support %Zu
2456   if (sizeof(amb_mem) != 4*16 + 4*12) {
2457     PRINTK (KERN_ERR, "Fix amb_mem (is %lu words).",
2458             (unsigned long) sizeof(amb_mem));
2459     return -ENOMEM;
2460   }
2461
2462   show_version();
2463
2464   amb_check_args();
2465
2466   // get the juice
2467   return pci_module_init(&amb_driver);
2468 }
2469
2470 /********** module exit **********/
2471
2472 static void __exit amb_module_exit (void)
2473 {
2474   PRINTD (DBG_FLOW|DBG_INIT, "cleanup_module");
2475
2476   return pci_unregister_driver(&amb_driver);
2477 }
2478
2479 module_init(amb_module_init);
2480 module_exit(amb_module_exit);