vserver 1.9.3
[linux-2.6.git] / drivers / char / random.c
index e39179f..295e3f7 100644 (file)
@@ -401,6 +401,7 @@ static struct poolinfo {
  */
 static struct entropy_store *random_state; /* The default global store */
 static struct entropy_store *sec_random_state; /* secondary store */
+static struct entropy_store *urandom_state; /* For urandom */
 static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
 static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
 
@@ -493,6 +494,7 @@ struct entropy_store {
        /* mostly-read data: */
        struct poolinfo poolinfo;
        __u32           *pool;
+       const char      *name;
 
        /* read-write data: */
        spinlock_t lock ____cacheline_aligned_in_smp;
@@ -507,7 +509,8 @@ struct entropy_store {
  *
  * Returns an negative error if there is a problem.
  */
-static int create_entropy_store(int size, struct entropy_store **ret_bucket)
+static int create_entropy_store(int size, const char *name,
+                               struct entropy_store **ret_bucket)
 {
        struct  entropy_store   *r;
        struct  poolinfo        *p;
@@ -538,6 +541,7 @@ static int create_entropy_store(int size, struct entropy_store **ret_bucket)
        }
        memset(r->pool, 0, POOLBYTES);
        r->lock = SPIN_LOCK_UNLOCKED;
+       r->name = name;
        *ret_bucket = r;
        return 0;
 }
@@ -643,12 +647,8 @@ static void credit_entropy_store(struct entropy_store *r, int nbits)
        } else {
                r->entropy_count += nbits;
                if (nbits)
-                       DEBUG_ENT("%04d %04d : added %d bits to %s\n",
-                                 random_state->entropy_count,
-                                 sec_random_state->entropy_count,
-                                 nbits,
-                                 r == sec_random_state ? "secondary" :
-                                 r == random_state ? "primary" : "unknown");
+                       DEBUG_ENT("Added %d entropy credits to %s, now %d\n",
+                                 nbits, r->name, r->entropy_count);
        }
 
        spin_unlock_irqrestore(&r->lock, flags);
@@ -781,9 +781,9 @@ static void batch_entropy_process(void *private_)
 
 /* There is one of these per entropy source */
 struct timer_rand_state {
-       __u32           last_time;
-       __s32           last_delta,last_delta2;
-       int             dont_count_entropy:1;
+       cycles_t        last_time;
+       long            last_delta,last_delta2;
+       unsigned        dont_count_entropy:1;
 };
 
 static struct timer_rand_state keyboard_timer_state;
@@ -799,37 +799,31 @@ static struct timer_rand_state *irq_timer_state[NR_IRQS];
  * The number "num" is also added to the pool - it should somehow describe
  * the type of event which just happened.  This is currently 0-255 for
  * keyboard scan codes, and 256 upwards for interrupts.
- * On the i386, this is assumed to be at most 16 bits, and the high bits
- * are used for a high-resolution timer.
  *
  */
 static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
 {
-       __u32           time;
-       __s32           delta, delta2, delta3;
+       cycles_t        time;
+       long            delta, delta2, delta3;
        int             entropy = 0;
 
+       preempt_disable();
        /* if over the trickle threshold, use only 1 in 4096 samples */
        if ( random_state->entropy_count > trickle_thresh &&
             (__get_cpu_var(trickle_count)++ & 0xfff))
-               return;
+               goto out;
 
-#if defined (__i386__) || defined (__x86_64__)
-       if (cpu_has_tsc) {
-               __u32 high;
-               rdtsc(time, high);
-               num ^= high;
+       /*
+        * Use get_cycles() if implemented, otherwise fall back to
+        * jiffies.
+        */
+       time = get_cycles();
+       if (time != 0) {
+               if (sizeof(time) > 4)
+                       num ^= (u32)(time >> 32);
        } else {
                time = jiffies;
        }
-#elif defined (__sparc_v9__)
-       unsigned long tick = tick_ops->get_tick();
-
-       time = (unsigned int) tick;
-       num ^= (tick >> 32UL);
-#else
-       time = jiffies;
-#endif
 
        /*
         * Calculate number of bits of randomness we probably added.
@@ -868,6 +862,8 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
                entropy = int_ln_12bits(delta);
        }
        batch_entropy_store(num, time, entropy);
+out:
+       preempt_enable();
 }
 
 void add_keyboard_randomness(unsigned char scancode)
@@ -1328,8 +1324,7 @@ static inline void xfer_secondary_pool(struct entropy_store *r,
                          "(%d of %d requested)\n",
                          random_state->entropy_count,
                          sec_random_state->entropy_count,
-                         r == sec_random_state ? "secondary" : "unknown",
-                         bytes * 8, nbytes * 8, r->entropy_count);
+                         r->name, bytes * 8, nbytes * 8, r->entropy_count);
 
                bytes=extract_entropy(random_state, tmp, bytes,
                                      EXTRACT_ENTROPY_LIMIT);
@@ -1373,9 +1368,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void * buf,
        DEBUG_ENT("%04d %04d : trying to extract %d bits from %s\n",
                  random_state->entropy_count,
                  sec_random_state->entropy_count,
-                 nbytes * 8,
-                 r == sec_random_state ? "secondary" :
-                 r == random_state ? "primary" : "unknown");
+                 nbytes * 8, r->name);
 
        if (flags & EXTRACT_ENTROPY_LIMIT && nbytes >= r->entropy_count / 8)
                nbytes = r->entropy_count / 8;
@@ -1388,12 +1381,8 @@ static ssize_t extract_entropy(struct entropy_store *r, void * buf,
        if (r->entropy_count < random_write_wakeup_thresh)
                wake_up_interruptible(&random_write_wait);
 
-       DEBUG_ENT("%04d %04d : debiting %d bits from %s%s\n",
-                 random_state->entropy_count,
-                 sec_random_state->entropy_count,
-                 nbytes * 8,
-                 r == sec_random_state ? "secondary" :
-                 r == random_state ? "primary" : "unknown",
+       DEBUG_ENT("Debiting %d entropy credits from %s%s\n",
+                 nbytes * 8, r->name,
                  flags & EXTRACT_ENTROPY_LIMIT ? "" : " (unlimited)");
 
        spin_unlock_irqrestore(&r->lock, cpuflags);
@@ -1482,14 +1471,21 @@ static ssize_t extract_entropy(struct entropy_store *r, void * buf,
  */
 void get_random_bytes(void *buf, int nbytes)
 {
-       if (sec_random_state)  
-               extract_entropy(sec_random_state, (char *) buf, nbytes, 
-                               EXTRACT_ENTROPY_SECONDARY);
-       else if (random_state)
-               extract_entropy(random_state, (char *) buf, nbytes, 0);
-       else
+       struct entropy_store *r = urandom_state;
+       int flags = EXTRACT_ENTROPY_SECONDARY;
+
+       if (!r)
+               r = sec_random_state;
+       if (!r) {
+               r = random_state;
+               flags = 0;
+       }
+       if (!r) {
                printk(KERN_NOTICE "get_random_bytes called before "
                                   "random driver initialization\n");
+               return;
+       }
+       extract_entropy(r, (char *) buf, nbytes, flags);
 }
 
 EXPORT_SYMBOL(get_random_bytes);
@@ -1533,15 +1529,22 @@ static int __init rand_initialize(void)
 {
        int i;
 
-       if (create_entropy_store(DEFAULT_POOL_SIZE, &random_state))
+       if (create_entropy_store(DEFAULT_POOL_SIZE, "primary", &random_state))
                goto err;
        if (batch_entropy_init(BATCH_ENTROPY_SIZE, random_state))
                goto err;
-       if (create_entropy_store(SECONDARY_POOL_SIZE, &sec_random_state))
+       if (create_entropy_store(SECONDARY_POOL_SIZE, "secondary",
+                                &sec_random_state))
+               goto err;
+       if (create_entropy_store(SECONDARY_POOL_SIZE, "urandom",
+                                &urandom_state))
                goto err;
        clear_entropy_store(random_state);
        clear_entropy_store(sec_random_state);
+       clear_entropy_store(urandom_state);
        init_std_data(random_state);
+       init_std_data(sec_random_state);
+       init_std_data(urandom_state);
 #ifdef CONFIG_SYSCTL
        sysctl_init_random(random_state);
 #endif
@@ -1674,9 +1677,15 @@ static ssize_t
 urandom_read(struct file * file, char __user * buf,
                      size_t nbytes, loff_t *ppos)
 {
-       return extract_entropy(sec_random_state, buf, nbytes,
-                              EXTRACT_ENTROPY_USER |
-                              EXTRACT_ENTROPY_SECONDARY);
+       int flags = EXTRACT_ENTROPY_USER;
+       unsigned long cpuflags;
+
+       spin_lock_irqsave(&random_state->lock, cpuflags);
+       if (random_state->entropy_count > random_state->poolinfo.POOLBITS)
+               flags |= EXTRACT_ENTROPY_SECONDARY;
+       spin_unlock_irqrestore(&random_state->lock, cpuflags);
+
+       return extract_entropy(urandom_state, buf, nbytes, flags);
 }
 
 static unsigned int
@@ -1730,10 +1739,9 @@ static int
 random_ioctl(struct inode * inode, struct file * file,
             unsigned int cmd, unsigned long arg)
 {
-       int *tmp, size, ent_count;
+       int size, ent_count;
        int __user *p = (int __user *)arg;
        int retval;
-       unsigned long flags;
        
        switch (cmd) {
        case RNDGETENTCNT:
@@ -1754,40 +1762,6 @@ random_ioctl(struct inode * inode, struct file * file,
                if (random_state->entropy_count >= random_read_wakeup_thresh)
                        wake_up_interruptible(&random_read_wait);
                return 0;
-       case RNDGETPOOL:
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-               if (get_user(size, p) ||
-                   put_user(random_state->poolinfo.poolwords, p++))
-                       return -EFAULT;
-               if (size < 0)
-                       return -EFAULT;
-               if (size > random_state->poolinfo.poolwords)
-                       size = random_state->poolinfo.poolwords;
-
-               /* prepare to atomically snapshot pool */
-
-               tmp = kmalloc(size * sizeof(__u32), GFP_KERNEL);
-
-               if (!tmp)
-                       return -ENOMEM;
-
-               spin_lock_irqsave(&random_state->lock, flags);
-               ent_count = random_state->entropy_count;
-               memcpy(tmp, random_state->pool, size * sizeof(__u32));
-               spin_unlock_irqrestore(&random_state->lock, flags);
-
-               if (!copy_to_user(p, tmp, size * sizeof(__u32))) {
-                       kfree(tmp);
-                       return -EFAULT;
-               }
-
-               kfree(tmp);
-
-               if(put_user(ent_count, p++))
-                       return -EFAULT;
-
-               return 0;
        case RNDADDENTROPY:
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
@@ -1884,7 +1858,8 @@ static int change_poolsize(int poolsize)
        struct entropy_store    *new_store, *old_store;
        int                     ret;
        
-       if ((ret = create_entropy_store(poolsize, &new_store)))
+       if ((ret = create_entropy_store(poolsize, random_state->name,
+                                       &new_store)))
                return ret;
 
        add_entropy_words(new_store, random_state->pool,
@@ -2212,7 +2187,7 @@ static __u32 twothirdsMD4Transform (__u32 const buf[4], __u32 const in[12])
 #undef K3
 
 /* This should not be decreased so low that ISNs wrap too fast. */
-#define REKEY_INTERVAL 300
+#define REKEY_INTERVAL (300*HZ)
 /*
  * Bit layout of the tcp sequence numbers (before adding current time):
  * bit 24-31: increased after every key exchange
@@ -2238,43 +2213,55 @@ static __u32 twothirdsMD4Transform (__u32 const buf[4], __u32 const in[12])
 #define HASH_MASK      ( (1<<HASH_BITS)-1 )
 
 static struct keydata {
-       time_t rekey_time;
        __u32   count;          // already shifted to the final position
        __u32   secret[12];
 } ____cacheline_aligned ip_keydata[2];
 
-static spinlock_t ip_lock = SPIN_LOCK_UNLOCKED;
 static unsigned int ip_cnt;
 
-static struct keydata *__check_and_rekey(time_t time)
+static void rekey_seq_generator(void *private_);
+
+static DECLARE_WORK(rekey_work, rekey_seq_generator, NULL);
+
+/*
+ * Lock avoidance:
+ * The ISN generation runs lockless - it's just a hash over random data.
+ * State changes happen every 5 minutes when the random key is replaced.
+ * Synchronization is performed by having two copies of the hash function
+ * state and rekey_seq_generator always updates the inactive copy.
+ * The copy is then activated by updating ip_cnt.
+ * The implementation breaks down if someone blocks the thread
+ * that processes SYN requests for more than 5 minutes. Should never
+ * happen, and even if that happens only a not perfectly compliant
+ * ISN is generated, nothing fatal.
+ */
+static void rekey_seq_generator(void *private_)
 {
-       struct keydata *keyptr;
-       spin_lock_bh(&ip_lock);
-       keyptr = &ip_keydata[ip_cnt&1];
-       if (!keyptr->rekey_time || (time - keyptr->rekey_time) > REKEY_INTERVAL) {
-               keyptr = &ip_keydata[1^(ip_cnt&1)];
-               keyptr->rekey_time = time;
-               get_random_bytes(keyptr->secret, sizeof(keyptr->secret));
-               keyptr->count = (ip_cnt&COUNT_MASK)<<HASH_BITS;
-               mb();
-               ip_cnt++;
-       }
-       spin_unlock_bh(&ip_lock);
-       return keyptr;
+       struct keydata *keyptr = &ip_keydata[1^(ip_cnt&1)];
+
+       get_random_bytes(keyptr->secret, sizeof(keyptr->secret));
+       keyptr->count = (ip_cnt&COUNT_MASK)<<HASH_BITS;
+       smp_wmb();
+       ip_cnt++;
+       schedule_delayed_work(&rekey_work, REKEY_INTERVAL);
 }
 
-static inline struct keydata *check_and_rekey(time_t time)
+static inline struct keydata *get_keyptr(void)
 {
        struct keydata *keyptr = &ip_keydata[ip_cnt&1];
 
-       rmb();
-       if (!keyptr->rekey_time || (time - keyptr->rekey_time) > REKEY_INTERVAL) {
-               keyptr = __check_and_rekey(time);
-       }
+       smp_rmb();
 
        return keyptr;
 }
 
+static __init int seqgen_init(void)
+{
+       rekey_seq_generator(NULL);
+       return 0;
+}
+late_initcall(seqgen_init);
+
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr,
                                   __u16 sport, __u16 dport)
@@ -2282,14 +2269,12 @@ __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr,
        struct timeval  tv;
        __u32           seq;
        __u32           hash[12];
-       struct keydata *keyptr;
+       struct keydata *keyptr = get_keyptr();
 
        /* The procedure is the same as for IPv4, but addresses are longer.
         * Thus we must use twothirdsMD4Transform.
         */
 
-       do_gettimeofday(&tv);   /* We need the usecs below... */
-       keyptr = check_and_rekey(tv.tv_sec);
 
        memcpy(hash, saddr, 16);
        hash[4]=(sport << 16) + dport;
@@ -2297,38 +2282,22 @@ __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr,
 
        seq = twothirdsMD4Transform(daddr, hash) & HASH_MASK;
        seq += keyptr->count;
+
+       do_gettimeofday(&tv);
        seq += tv.tv_usec + tv.tv_sec*1000000;
 
        return seq;
 }
 EXPORT_SYMBOL(secure_tcpv6_sequence_number);
-
-__u32 secure_ipv6_id(__u32 *daddr)
-{
-       struct keydata *keyptr;
-
-       keyptr = check_and_rekey(get_seconds());
-
-       return halfMD4Transform(daddr, keyptr->secret);
-}
-
-EXPORT_SYMBOL(secure_ipv6_id);
 #endif
 
-
 __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
                                 __u16 sport, __u16 dport)
 {
        struct timeval  tv;
        __u32           seq;
        __u32   hash[4];
-       struct keydata *keyptr;
-
-       /*
-        * Pick a random secret every REKEY_INTERVAL seconds.
-        */
-       do_gettimeofday(&tv);   /* We need the usecs below... */
-       keyptr = check_and_rekey(tv.tv_sec);
+       struct keydata *keyptr = get_keyptr();
 
        /*
         *  Pick a unique starting offset for each TCP connection endpoints
@@ -2351,6 +2320,7 @@ __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
         *      That's funny, Linux has one built in!  Use it!
         *      (Networks are faster now - should this be increased?)
         */
+       do_gettimeofday(&tv);
        seq += tv.tv_usec + tv.tv_sec*1000000;
 #if 0
        printk("init_seq(%lx, %lx, %d, %d) = %d\n",
@@ -2369,7 +2339,7 @@ __u32 secure_ip_id(__u32 daddr)
        struct keydata *keyptr;
        __u32 hash[4];
 
-       keyptr = check_and_rekey(get_seconds());
+       keyptr = get_keyptr();
 
        /*
         *  Pick a unique starting offset for each IP destination.