2 * Copyright (C) 2003 Christophe Saout <christophe@saout.de>
4 * This file is released under the GPL.
7 #include <linux/module.h>
8 #include <linux/init.h>
9 #include <linux/kernel.h>
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/mempool.h>
13 #include <linux/slab.h>
14 #include <linux/crypto.h>
15 #include <linux/workqueue.h>
16 #include <asm/atomic.h>
17 #include <asm/scatterlist.h>
24 * per bio private data
27 struct dm_target *target;
29 struct bio *first_clone;
30 struct work_struct work;
36 * context holding the current state of a multi-part conversion
38 struct convert_context {
41 unsigned int offset_in;
42 unsigned int offset_out;
50 * Crypt: maps a linear range of a block device
51 * and encrypts / decrypts at the same time.
58 * pool for per bio private data and
59 * for encryption buffer pages
67 struct crypto_tfm *tfm;
69 int (*iv_generator)(struct crypt_config *cc, u8 *iv, sector_t sector);
76 #define MIN_POOL_PAGES 32
77 #define MIN_BIO_PAGES 8
79 static kmem_cache_t *_crypt_io_pool;
82 * Mempool alloc and free functions for the page
84 static void *mempool_alloc_page(int gfp_mask, void *data)
86 return alloc_page(gfp_mask);
89 static void mempool_free_page(void *page, void *data)
96 * Different IV generation algorithms
98 static int crypt_iv_plain(struct crypt_config *cc, u8 *iv, sector_t sector)
100 *(u32 *)iv = cpu_to_le32(sector & 0xffffffff);
101 if (cc->iv_size > sizeof(u32) / sizeof(u8))
102 memset(iv + (sizeof(u32) / sizeof(u8)), 0,
103 cc->iv_size - (sizeof(u32) / sizeof(u8)));
109 crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out,
110 struct scatterlist *in, unsigned int length,
111 int write, sector_t sector)
116 if (cc->iv_generator) {
117 r = cc->iv_generator(cc, iv, sector);
122 r = crypto_cipher_encrypt_iv(cc->tfm, out, in, length, iv);
124 r = crypto_cipher_decrypt_iv(cc->tfm, out, in, length, iv);
127 r = crypto_cipher_encrypt(cc->tfm, out, in, length);
129 r = crypto_cipher_decrypt(cc->tfm, out, in, length);
136 crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx,
137 struct bio *bio_out, struct bio *bio_in,
138 sector_t sector, int write)
140 ctx->bio_in = bio_in;
141 ctx->bio_out = bio_out;
144 ctx->idx_in = bio_in ? bio_in->bi_idx : 0;
145 ctx->idx_out = bio_out ? bio_out->bi_idx : 0;
146 ctx->sector = sector + cc->iv_offset;
151 * Encrypt / decrypt data from one bio to another one (can be the same one)
153 static int crypt_convert(struct crypt_config *cc,
154 struct convert_context *ctx)
158 while(ctx->idx_in < ctx->bio_in->bi_vcnt &&
159 ctx->idx_out < ctx->bio_out->bi_vcnt) {
160 struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in);
161 struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out);
162 struct scatterlist sg_in = {
163 .page = bv_in->bv_page,
164 .offset = bv_in->bv_offset + ctx->offset_in,
165 .length = 1 << SECTOR_SHIFT
167 struct scatterlist sg_out = {
168 .page = bv_out->bv_page,
169 .offset = bv_out->bv_offset + ctx->offset_out,
170 .length = 1 << SECTOR_SHIFT
173 ctx->offset_in += sg_in.length;
174 if (ctx->offset_in >= bv_in->bv_len) {
179 ctx->offset_out += sg_out.length;
180 if (ctx->offset_out >= bv_out->bv_len) {
185 r = crypt_convert_scatterlist(cc, &sg_out, &sg_in, sg_in.length,
186 ctx->write, ctx->sector);
197 * Generate a new unfragmented bio with the given size
198 * This should never violate the device limitations
199 * May return a smaller bio when running out of pages
202 crypt_alloc_buffer(struct crypt_config *cc, unsigned int size,
203 struct bio *base_bio, int *bio_vec_idx)
206 int nr_iovecs = dm_div_up(size, PAGE_SIZE);
207 int gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
208 int flags = current->flags;
212 * Tell VM to act less aggressively and fail earlier.
213 * This is not necessary but increases throughput.
214 * FIXME: Is this really intelligent?
216 current->flags &= ~PF_MEMALLOC;
219 bio = bio_clone(base_bio, GFP_NOIO);
221 bio = bio_alloc(GFP_NOIO, nr_iovecs);
223 if (flags & PF_MEMALLOC)
224 current->flags |= PF_MEMALLOC;
228 /* if the last bio was not complete, continue where that one ended */
229 bio->bi_idx = *bio_vec_idx;
230 bio->bi_vcnt = *bio_vec_idx;
232 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
234 /* bio->bi_idx pages have already been allocated */
235 size -= bio->bi_idx * PAGE_SIZE;
237 for(i = bio->bi_idx; i < nr_iovecs; i++) {
238 struct bio_vec *bv = bio_iovec_idx(bio, i);
240 bv->bv_page = mempool_alloc(cc->page_pool, gfp_mask);
245 * if additional pages cannot be allocated without waiting,
246 * return a partially allocated bio, the caller will then try
247 * to allocate additional bios while submitting this partial bio
249 if ((i - bio->bi_idx) == (MIN_BIO_PAGES - 1))
250 gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
253 if (size > PAGE_SIZE)
254 bv->bv_len = PAGE_SIZE;
258 bio->bi_size += bv->bv_len;
263 if (flags & PF_MEMALLOC)
264 current->flags |= PF_MEMALLOC;
272 * Remember the last bio_vec allocated to be able
273 * to correctly continue after the splitting.
275 *bio_vec_idx = bio->bi_vcnt;
280 static void crypt_free_buffer_pages(struct crypt_config *cc,
281 struct bio *bio, unsigned int bytes)
283 unsigned int start, end;
288 * This is ugly, but Jens Axboe thinks that using bi_idx in the
289 * endio function is too dangerous at the moment, so I calculate the
290 * correct position using bi_vcnt and bi_size.
291 * The bv_offset and bv_len fields might already be modified but we
292 * know that we always allocated whole pages.
293 * A fix to the bi_idx issue in the kernel is in the works, so
294 * we will hopefully be able to revert to the cleaner solution soon.
296 i = bio->bi_vcnt - 1;
297 bv = bio_iovec_idx(bio, i);
298 end = (i << PAGE_SHIFT) + (bv->bv_offset + bv->bv_len) - bio->bi_size;
301 start >>= PAGE_SHIFT;
307 for(i = start; i < end; i++) {
308 bv = bio_iovec_idx(bio, i);
309 BUG_ON(!bv->bv_page);
310 mempool_free(bv->bv_page, cc->page_pool);
316 * One of the bios was finished. Check for completion of
317 * the whole request and correctly clean up the buffer.
319 static void dec_pending(struct crypt_io *io, int error)
321 struct crypt_config *cc = (struct crypt_config *) io->target->private;
326 if (!atomic_dec_and_test(&io->pending))
330 bio_put(io->first_clone);
332 bio_endio(io->bio, io->bio->bi_size, io->error);
334 mempool_free(io, cc->io_pool);
340 * Needed because it would be very unwise to do decryption in an
341 * interrupt context, so bios returning from read requests get
344 static struct workqueue_struct *_kcryptd_workqueue;
346 static void kcryptd_do_work(void *data)
348 struct crypt_io *io = (struct crypt_io *) data;
349 struct crypt_config *cc = (struct crypt_config *) io->target->private;
350 struct convert_context ctx;
353 crypt_convert_init(cc, &ctx, io->bio, io->bio,
354 io->bio->bi_sector - io->target->begin, 0);
355 r = crypt_convert(cc, &ctx);
360 static void kcryptd_queue_io(struct crypt_io *io)
362 INIT_WORK(&io->work, kcryptd_do_work, io);
363 queue_work(_kcryptd_workqueue, &io->work);
367 * Decode key from its hex representation
369 static int crypt_decode_key(u8 *key, char *hex, int size)
377 for(i = 0; i < size; i++) {
381 key[i] = (u8)simple_strtoul(buffer, &endp, 16);
383 if (endp != &buffer[2])
394 * Encode key into its hex representation
396 static void crypt_encode_key(char *hex, u8 *key, int size)
400 for(i = 0; i < size; i++) {
401 sprintf(hex, "%02x", *key);
408 * Construct an encryption mapping:
409 * <cipher> <key> <iv_offset> <dev_path> <start>
411 static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
413 struct crypt_config *cc;
414 struct crypto_tfm *tfm;
422 ti->error = PFX "Not enough arguments";
427 cipher = strsep(&tmp, "-");
428 mode = strsep(&tmp, "-");
431 DMWARN(PFX "Unexpected additional cipher options");
433 key_size = strlen(argv[1]) >> 1;
435 cc = kmalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
438 PFX "Cannot allocate transparent encryption context";
442 if (!mode || strcmp(mode, "plain") == 0)
443 cc->iv_generator = crypt_iv_plain;
444 else if (strcmp(mode, "ecb") == 0)
445 cc->iv_generator = NULL;
447 ti->error = PFX "Invalid chaining mode";
451 if (cc->iv_generator)
452 crypto_flags = CRYPTO_TFM_MODE_CBC;
454 crypto_flags = CRYPTO_TFM_MODE_ECB;
456 tfm = crypto_alloc_tfm(cipher, crypto_flags);
458 ti->error = PFX "Error allocating crypto tfm";
461 if (crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER) {
462 ti->error = PFX "Expected cipher algorithm";
466 if (tfm->crt_cipher.cit_decrypt_iv && tfm->crt_cipher.cit_encrypt_iv)
467 /* at least a 32 bit sector number should fit in our buffer */
468 cc->iv_size = max(crypto_tfm_alg_ivsize(tfm),
469 (unsigned int)(sizeof(u32) / sizeof(u8)));
472 if (cc->iv_generator) {
473 DMWARN(PFX "Selected cipher does not support IVs");
474 cc->iv_generator = NULL;
478 cc->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
479 mempool_free_slab, _crypt_io_pool);
481 ti->error = PFX "Cannot allocate crypt io mempool";
485 cc->page_pool = mempool_create(MIN_POOL_PAGES, mempool_alloc_page,
486 mempool_free_page, NULL);
487 if (!cc->page_pool) {
488 ti->error = PFX "Cannot allocate page mempool";
493 cc->key_size = key_size;
494 if ((key_size == 0 && strcmp(argv[1], "-") != 0)
495 || crypt_decode_key(cc->key, argv[1], key_size) < 0) {
496 ti->error = PFX "Error decoding key";
500 if (tfm->crt_cipher.cit_setkey(tfm, cc->key, key_size) < 0) {
501 ti->error = PFX "Error setting key";
505 if (sscanf(argv[2], SECTOR_FORMAT, &cc->iv_offset) != 1) {
506 ti->error = PFX "Invalid iv_offset sector";
510 if (sscanf(argv[4], SECTOR_FORMAT, &cc->start) != 1) {
511 ti->error = PFX "Invalid device sector";
515 if (dm_get_device(ti, argv[3], cc->start, ti->len,
516 dm_table_get_mode(ti->table), &cc->dev)) {
517 ti->error = PFX "Device lookup failed";
525 mempool_destroy(cc->page_pool);
527 mempool_destroy(cc->io_pool);
529 crypto_free_tfm(tfm);
535 static void crypt_dtr(struct dm_target *ti)
537 struct crypt_config *cc = (struct crypt_config *) ti->private;
539 mempool_destroy(cc->page_pool);
540 mempool_destroy(cc->io_pool);
542 crypto_free_tfm(cc->tfm);
543 dm_put_device(ti, cc->dev);
547 static int crypt_endio(struct bio *bio, unsigned int done, int error)
549 struct crypt_io *io = (struct crypt_io *) bio->bi_private;
550 struct crypt_config *cc = (struct crypt_config *) io->target->private;
552 if (bio_data_dir(bio) == WRITE) {
554 * free the processed pages, even if
555 * it's only a partially completed write
557 crypt_free_buffer_pages(cc, bio, done);
566 * successful reads are decrypted by the worker thread
568 if ((bio_data_dir(bio) == READ)
569 && bio_flagged(bio, BIO_UPTODATE)) {
570 kcryptd_queue_io(io);
574 dec_pending(io, error);
578 static inline struct bio *
579 crypt_clone(struct crypt_config *cc, struct crypt_io *io, struct bio *bio,
580 sector_t sector, int *bvec_idx, struct convert_context *ctx)
584 if (bio_data_dir(bio) == WRITE) {
585 clone = crypt_alloc_buffer(cc, bio->bi_size,
586 io->first_clone, bvec_idx);
588 ctx->bio_out = clone;
589 if (crypt_convert(cc, ctx) < 0) {
590 crypt_free_buffer_pages(cc, clone,
598 * The block layer might modify the bvec array, so always
599 * copy the required bvecs because we need the original
600 * one in order to decrypt the whole bio data *afterwards*.
602 clone = bio_alloc(GFP_NOIO, bio_segments(bio));
605 clone->bi_vcnt = bio_segments(bio);
606 clone->bi_size = bio->bi_size;
607 memcpy(clone->bi_io_vec, bio_iovec(bio),
608 sizeof(struct bio_vec) * clone->bi_vcnt);
615 clone->bi_private = io;
616 clone->bi_end_io = crypt_endio;
617 clone->bi_bdev = cc->dev->bdev;
618 clone->bi_sector = cc->start + sector;
619 clone->bi_rw = bio->bi_rw;
624 static int crypt_map(struct dm_target *ti, struct bio *bio,
625 union map_info *map_context)
627 struct crypt_config *cc = (struct crypt_config *) ti->private;
628 struct crypt_io *io = mempool_alloc(cc->io_pool, GFP_NOIO);
629 struct convert_context ctx;
631 unsigned int remaining = bio->bi_size;
632 sector_t sector = bio->bi_sector - ti->begin;
637 io->first_clone = NULL;
639 atomic_set(&io->pending, 1); /* hold a reference */
641 if (bio_data_dir(bio) == WRITE)
642 crypt_convert_init(cc, &ctx, NULL, bio, sector, 1);
645 * The allocated buffers can be smaller than the whole bio,
646 * so repeat the whole process until all the data can be handled.
649 clone = crypt_clone(cc, io, bio, sector, &bvec_idx, &ctx);
653 if (!io->first_clone) {
655 * hold a reference to the first clone, because it
656 * holds the bio_vec array and that can't be freed
657 * before all other clones are released
660 io->first_clone = clone;
662 atomic_inc(&io->pending);
664 remaining -= clone->bi_size;
665 sector += bio_sectors(clone);
667 generic_make_request(clone);
669 /* out of memory -> run queues */
671 blk_congestion_wait(bio_data_dir(clone), HZ/100);
674 /* drop reference, clones could have returned before we reach this */
679 if (io->first_clone) {
680 dec_pending(io, -ENOMEM);
684 /* if no bio has been dispatched yet, we can directly return the error */
685 mempool_free(io, cc->io_pool);
689 static int crypt_status(struct dm_target *ti, status_type_t type,
690 char *result, unsigned int maxlen)
692 struct crypt_config *cc = (struct crypt_config *) ti->private;
695 const char *mode = NULL;
699 case STATUSTYPE_INFO:
703 case STATUSTYPE_TABLE:
704 cipher = crypto_tfm_alg_name(cc->tfm);
706 switch(cc->tfm->crt_cipher.cit_mode) {
707 case CRYPTO_TFM_MODE_CBC:
710 case CRYPTO_TFM_MODE_ECB:
717 snprintf(result, maxlen, "%s-%s ", cipher, mode);
718 offset = strlen(result);
720 if (cc->key_size > 0) {
721 if ((maxlen - offset) < ((cc->key_size << 1) + 1))
724 crypt_encode_key(result + offset, cc->key, cc->key_size);
725 offset += cc->key_size << 1;
727 if (offset >= maxlen)
729 result[offset++] = '-';
732 format_dev_t(buffer, cc->dev->bdev->bd_dev);
733 snprintf(result + offset, maxlen - offset, " " SECTOR_FORMAT
734 " %s " SECTOR_FORMAT, cc->iv_offset,
741 static struct target_type crypt_target = {
744 .module = THIS_MODULE,
748 .status = crypt_status,
751 static int __init dm_crypt_init(void)
755 _crypt_io_pool = kmem_cache_create("dm-crypt_io",
756 sizeof(struct crypt_io),
761 _kcryptd_workqueue = create_workqueue("kcryptd");
762 if (!_kcryptd_workqueue) {
764 DMERR(PFX "couldn't create kcryptd");
768 r = dm_register_target(&crypt_target);
770 DMERR(PFX "register failed %d", r);
777 destroy_workqueue(_kcryptd_workqueue);
779 kmem_cache_destroy(_crypt_io_pool);
783 static void __exit dm_crypt_exit(void)
785 int r = dm_unregister_target(&crypt_target);
788 DMERR(PFX "unregister failed %d", r);
790 destroy_workqueue(_kcryptd_workqueue);
791 kmem_cache_destroy(_crypt_io_pool);
794 module_init(dm_crypt_init);
795 module_exit(dm_crypt_exit);
797 MODULE_AUTHOR("Christophe Saout <christophe@saout.de>");
798 MODULE_DESCRIPTION(DM_NAME " target for transparent encryption / decryption");
799 MODULE_LICENSE("GPL");