This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / arch / s390 / hypfs / hypfs_diag.c
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
new file mode 100644 (file)
index 0000000..1785bce
--- /dev/null
@@ -0,0 +1,696 @@
+/*
+ *  fs/hypfs/hypfs_diag.c
+ *    Hypervisor filesystem for Linux on s390. Diag 204 and 224
+ *    implementation.
+ *
+ *    Copyright (C) IBM Corp. 2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <asm/ebcdic.h>
+#include "hypfs.h"
+
+#define LPAR_NAME_LEN 8                /* lpar name len in diag 204 data */
+#define CPU_NAME_LEN 16                /* type name len of cpus in diag224 name table */
+#define TMP_SIZE 64            /* size of temporary buffers */
+
+/* diag 204 subcodes */
+enum diag204_sc {
+       SUBC_STIB4 = 4,
+       SUBC_RSI = 5,
+       SUBC_STIB6 = 6,
+       SUBC_STIB7 = 7
+};
+
+/* The two available diag 204 data formats */
+enum diag204_format {
+       INFO_SIMPLE = 0,
+       INFO_EXT = 0x00010000
+};
+
+/* bit is set in flags, when physical cpu info is included in diag 204 data */
+#define LPAR_PHYS_FLG  0x80
+
+static char *diag224_cpu_names;                        /* diag 224 name table */
+static enum diag204_sc diag204_store_sc;       /* used subcode for store */
+static enum diag204_format diag204_info_type;  /* used diag 204 data format */
+
+static void *diag204_buf;              /* 4K aligned buffer for diag204 data */
+static void *diag204_buf_vmalloc;      /* vmalloc pointer for diag204 data */
+static int diag204_buf_pages;          /* number of pages for diag204 data */
+
+/*
+ * DIAG 204 data structures and member access functions.
+ *
+ * Since we have two different diag 204 data formats for old and new s390
+ * machines, we do not access the structs directly, but use getter functions for
+ * each struct member instead. This should make the code more readable.
+ */
+
+/* Time information block */
+
+struct info_blk_hdr {
+       __u8  npar;
+       __u8  flags;
+       __u16 tslice;
+       __u16 phys_cpus;
+       __u16 this_part;
+       __u64 curtod;
+} __attribute__ ((packed));
+
+struct x_info_blk_hdr {
+       __u8  npar;
+       __u8  flags;
+       __u16 tslice;
+       __u16 phys_cpus;
+       __u16 this_part;
+       __u64 curtod1;
+       __u64 curtod2;
+       char reserved[40];
+} __attribute__ ((packed));
+
+static inline int info_blk_hdr__size(enum diag204_format type)
+{
+       if (type == INFO_SIMPLE)
+               return sizeof(struct info_blk_hdr);
+       else /* INFO_EXT */
+               return sizeof(struct x_info_blk_hdr);
+}
+
+static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr)
+{
+       if (type == INFO_SIMPLE)
+               return ((struct info_blk_hdr *)hdr)->npar;
+       else /* INFO_EXT */
+               return ((struct x_info_blk_hdr *)hdr)->npar;
+}
+
+static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr)
+{
+       if (type == INFO_SIMPLE)
+               return ((struct info_blk_hdr *)hdr)->flags;
+       else /* INFO_EXT */
+               return ((struct x_info_blk_hdr *)hdr)->flags;
+}
+
+static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr)
+{
+       if (type == INFO_SIMPLE)
+               return ((struct info_blk_hdr *)hdr)->phys_cpus;
+       else /* INFO_EXT */
+               return ((struct x_info_blk_hdr *)hdr)->phys_cpus;
+}
+
+/* Partition header */
+
+struct part_hdr {
+       __u8 pn;
+       __u8 cpus;
+       char reserved[6];
+       char part_name[LPAR_NAME_LEN];
+} __attribute__ ((packed));
+
+struct x_part_hdr {
+       __u8  pn;
+       __u8  cpus;
+       __u8  rcpus;
+       __u8  pflag;
+       __u32 mlu;
+       char  part_name[LPAR_NAME_LEN];
+       char  lpc_name[8];
+       char  os_name[8];
+       __u64 online_cs;
+       __u64 online_es;
+       __u8  upid;
+       char  reserved1[3];
+       __u32 group_mlu;
+       char  group_name[8];
+       char  reserved2[32];
+} __attribute__ ((packed));
+
+static inline int part_hdr__size(enum diag204_format type)
+{
+       if (type == INFO_SIMPLE)
+               return sizeof(struct part_hdr);
+       else /* INFO_EXT */
+               return sizeof(struct x_part_hdr);
+}
+
+static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr)
+{
+       if (type == INFO_SIMPLE)
+               return ((struct part_hdr *)hdr)->cpus;
+       else /* INFO_EXT */
+               return ((struct x_part_hdr *)hdr)->rcpus;
+}
+
+static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
+                                      char *name)
+{
+       if (type == INFO_SIMPLE)
+               memcpy(name, ((struct part_hdr *)hdr)->part_name,
+                      LPAR_NAME_LEN);
+       else /* INFO_EXT */
+               memcpy(name, ((struct x_part_hdr *)hdr)->part_name,
+                      LPAR_NAME_LEN);
+       EBCASC(name, LPAR_NAME_LEN);
+       name[LPAR_NAME_LEN] = 0;
+       strstrip(name);
+}
+
+struct cpu_info {
+       __u16 cpu_addr;
+       char  reserved1[2];
+       __u8  ctidx;
+       __u8  cflag;
+       __u16 weight;
+       __u64 acc_time;
+       __u64 lp_time;
+} __attribute__ ((packed));
+
+struct x_cpu_info {
+       __u16 cpu_addr;
+       char  reserved1[2];
+       __u8  ctidx;
+       __u8  cflag;
+       __u16 weight;
+       __u64 acc_time;
+       __u64 lp_time;
+       __u16 min_weight;
+       __u16 cur_weight;
+       __u16 max_weight;
+       char  reseved2[2];
+       __u64 online_time;
+       __u64 wait_time;
+       __u32 pma_weight;
+       __u32 polar_weight;
+       char  reserved3[40];
+} __attribute__ ((packed));
+
+/* CPU info block */
+
+static inline int cpu_info__size(enum diag204_format type)
+{
+       if (type == INFO_SIMPLE)
+               return sizeof(struct cpu_info);
+       else /* INFO_EXT */
+               return sizeof(struct x_cpu_info);
+}
+
+static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr)
+{
+       if (type == INFO_SIMPLE)
+               return ((struct cpu_info *)hdr)->ctidx;
+       else /* INFO_EXT */
+               return ((struct x_cpu_info *)hdr)->ctidx;
+}
+
+static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr)
+{
+       if (type == INFO_SIMPLE)
+               return ((struct cpu_info *)hdr)->cpu_addr;
+       else /* INFO_EXT */
+               return ((struct x_cpu_info *)hdr)->cpu_addr;
+}
+
+static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr)
+{
+       if (type == INFO_SIMPLE)
+               return ((struct cpu_info *)hdr)->acc_time;
+       else /* INFO_EXT */
+               return ((struct x_cpu_info *)hdr)->acc_time;
+}
+
+static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr)
+{
+       if (type == INFO_SIMPLE)
+               return ((struct cpu_info *)hdr)->lp_time;
+       else /* INFO_EXT */
+               return ((struct x_cpu_info *)hdr)->lp_time;
+}
+
+static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr)
+{
+       if (type == INFO_SIMPLE)
+               return 0;       /* online_time not available in simple info */
+       else /* INFO_EXT */
+               return ((struct x_cpu_info *)hdr)->online_time;
+}
+
+/* Physical header */
+
+struct phys_hdr {
+       char reserved1[1];
+       __u8 cpus;
+       char reserved2[6];
+       char mgm_name[8];
+} __attribute__ ((packed));
+
+struct x_phys_hdr {
+       char reserved1[1];
+       __u8 cpus;
+       char reserved2[6];
+       char mgm_name[8];
+       char reserved3[80];
+} __attribute__ ((packed));
+
+static inline int phys_hdr__size(enum diag204_format type)
+{
+       if (type == INFO_SIMPLE)
+               return sizeof(struct phys_hdr);
+       else /* INFO_EXT */
+               return sizeof(struct x_phys_hdr);
+}
+
+static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr)
+{
+       if (type == INFO_SIMPLE)
+               return ((struct phys_hdr *)hdr)->cpus;
+       else /* INFO_EXT */
+               return ((struct x_phys_hdr *)hdr)->cpus;
+}
+
+/* Physical CPU info block */
+
+struct phys_cpu {
+       __u16 cpu_addr;
+       char  reserved1[2];
+       __u8  ctidx;
+       char  reserved2[3];
+       __u64 mgm_time;
+       char  reserved3[8];
+} __attribute__ ((packed));
+
+struct x_phys_cpu {
+       __u16 cpu_addr;
+       char  reserved1[2];
+       __u8  ctidx;
+       char  reserved2[3];
+       __u64 mgm_time;
+       char  reserved3[80];
+} __attribute__ ((packed));
+
+static inline int phys_cpu__size(enum diag204_format type)
+{
+       if (type == INFO_SIMPLE)
+               return sizeof(struct phys_cpu);
+       else /* INFO_EXT */
+               return sizeof(struct x_phys_cpu);
+}
+
+static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr)
+{
+       if (type == INFO_SIMPLE)
+               return ((struct phys_cpu *)hdr)->cpu_addr;
+       else /* INFO_EXT */
+               return ((struct x_phys_cpu *)hdr)->cpu_addr;
+}
+
+static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr)
+{
+       if (type == INFO_SIMPLE)
+               return ((struct phys_cpu *)hdr)->mgm_time;
+       else /* INFO_EXT */
+               return ((struct x_phys_cpu *)hdr)->mgm_time;
+}
+
+static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
+{
+       if (type == INFO_SIMPLE)
+               return ((struct phys_cpu *)hdr)->ctidx;
+       else /* INFO_EXT */
+               return ((struct x_phys_cpu *)hdr)->ctidx;
+}
+
+/* Diagnose 204 functions */
+
+static int diag204(unsigned long subcode, unsigned long size, void *addr)
+{
+       register unsigned long _subcode asm("0") = subcode;
+       register unsigned long _size asm("1") = size;
+
+       asm volatile ("   diag    %2,%0,0x204\n"
+                     "0: \n" ".section __ex_table,\"a\"\n"
+#ifndef __s390x__
+                     "    .align 4\n"
+                     "    .long  0b,0b\n"
+#else
+                     "    .align 8\n"
+                     "    .quad  0b,0b\n"
+#endif
+                     ".previous":"+d" (_subcode), "+d"(_size)
+                     :"d"(addr)
+                     :"memory");
+       if (_subcode)
+               return -1;
+       else
+               return _size;
+}
+
+/*
+ * For the old diag subcode 4 with simple data format we have to use real
+ * memory. If we use subcode 6 or 7 with extended data format, we can (and
+ * should) use vmalloc, since we need a lot of memory in that case. Currently
+ * up to 93 pages!
+ */
+
+static void diag204_free_buffer(void)
+{
+       if (!diag204_buf)
+               return;
+       if (diag204_buf_vmalloc) {
+               vfree(diag204_buf_vmalloc);
+               diag204_buf_vmalloc = NULL;
+       } else {
+               free_pages((unsigned long) diag204_buf, 0);
+       }
+       diag204_buf_pages = 0;
+       diag204_buf = NULL;
+}
+
+static void *diag204_alloc_vbuf(int pages)
+{
+       /* The buffer has to be page aligned! */
+       diag204_buf_vmalloc = vmalloc(PAGE_SIZE * (pages + 1));
+       if (!diag204_buf_vmalloc)
+               return ERR_PTR(-ENOMEM);
+       diag204_buf = (void*)((unsigned long)diag204_buf_vmalloc
+                               & ~0xfffUL) + 0x1000;
+       diag204_buf_pages = pages;
+       return diag204_buf;
+}
+
+static void *diag204_alloc_rbuf(void)
+{
+       diag204_buf = (void*)__get_free_pages(GFP_KERNEL,0);
+       if (diag204_buf)
+               return ERR_PTR(-ENOMEM);
+       diag204_buf_pages = 1;
+       return diag204_buf;
+}
+
+static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
+{
+       if (diag204_buf) {
+               *pages = diag204_buf_pages;
+               return diag204_buf;
+       }
+       if (fmt == INFO_SIMPLE) {
+               *pages = 1;
+               return diag204_alloc_rbuf();
+       } else {/* INFO_EXT */
+               *pages = diag204(SUBC_RSI | INFO_EXT, 0, NULL);
+               if (*pages <= 0)
+                       return ERR_PTR(-ENOSYS);
+               else
+                       return diag204_alloc_vbuf(*pages);
+       }
+}
+
+/*
+ * diag204_probe() has to find out, which type of diagnose 204 implementation
+ * we have on our machine. Currently there are three possible scanarios:
+ *   - subcode 4   + simple data format (only one page)
+ *   - subcode 4-6 + extended data format
+ *   - subcode 4-7 + extended data format
+ *
+ * Subcode 5 is used to retrieve the size of the data, provided by subcodes
+ * 6 and 7. Subcode 7 basically has the same function as subcode 6. In addition
+ * to subcode 6 it provides also information about secondary cpus.
+ * In order to get as much information as possible, we first try
+ * subcode 7, then 6 and if both fail, we use subcode 4.
+ */
+
+static int diag204_probe(void)
+{
+       void *buf;
+       int pages, rc;
+
+       buf = diag204_get_buffer(INFO_EXT, &pages);
+       if (!IS_ERR(buf)) {
+               if (diag204(SUBC_STIB7 | INFO_EXT, pages, buf) >= 0) {
+                       diag204_store_sc = SUBC_STIB7;
+                       diag204_info_type = INFO_EXT;
+                       goto out;
+               }
+               if (diag204(SUBC_STIB6 | INFO_EXT, pages, buf) >= 0) {
+                       diag204_store_sc = SUBC_STIB7;
+                       diag204_info_type = INFO_EXT;
+                       goto out;
+               }
+               diag204_free_buffer();
+       }
+
+       /* subcodes 6 and 7 failed, now try subcode 4 */
+
+       buf = diag204_get_buffer(INFO_SIMPLE, &pages);
+       if (IS_ERR(buf)) {
+               rc = PTR_ERR(buf);
+               goto fail_alloc;
+       }
+       if (diag204(SUBC_STIB4 | INFO_SIMPLE, pages, buf) >= 0) {
+               diag204_store_sc = SUBC_STIB4;
+               diag204_info_type = INFO_SIMPLE;
+               goto out;
+       } else {
+               rc = -ENOSYS;
+               goto fail_store;
+       }
+out:
+       rc = 0;
+fail_store:
+       diag204_free_buffer();
+fail_alloc:
+       return rc;
+}
+
+static void *diag204_store(void)
+{
+       void *buf;
+       int pages;
+
+       buf = diag204_get_buffer(diag204_info_type, &pages);
+       if (IS_ERR(buf))
+               goto out;
+       if (diag204(diag204_store_sc | diag204_info_type, pages, buf) < 0)
+               return ERR_PTR(-ENOSYS);
+out:
+       return buf;
+}
+
+/* Diagnose 224 functions */
+
+static void diag224(void *ptr)
+{
+       asm volatile("   diag    %0,%1,0x224\n"
+                    : :"d" (0), "d"(ptr) : "memory");
+}
+
+static int diag224_get_name_table(void)
+{
+       /* memory must be below 2GB */
+       diag224_cpu_names = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
+       if (!diag224_cpu_names)
+               return -ENOMEM;
+       diag224(diag224_cpu_names);
+       EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
+       return 0;
+}
+
+static void diag224_delete_name_table(void)
+{
+       kfree(diag224_cpu_names);
+}
+
+static int diag224_idx2name(int index, char *name)
+{
+       memcpy(name, diag224_cpu_names + ((index + 1) * CPU_NAME_LEN),
+               CPU_NAME_LEN);
+       name[CPU_NAME_LEN] = 0;
+       strstrip(name);
+       return 0;
+}
+
+__init int hypfs_diag_init(void)
+{
+       int rc;
+
+       if (diag204_probe()) {
+               printk(KERN_ERR "hypfs: diag 204 not working.");
+               return -ENODATA;
+       }
+       rc = diag224_get_name_table();
+       if (rc) {
+               diag224_delete_name_table();
+               printk(KERN_ERR "hypfs: could not get name table.\n");
+       }
+       return rc;
+}
+
+__exit void hypfs_diag_exit(void)
+{
+       diag224_delete_name_table();
+       diag204_free_buffer();
+}
+
+/*
+ * Functions to create the directory structure
+ * *******************************************
+ */
+
+static int hypfs_create_cpu_files(struct super_block *sb,
+                                 struct dentry *cpus_dir, void *cpu_info)
+{
+       struct dentry *cpu_dir;
+       char buffer[TMP_SIZE];
+       void *rc;
+
+       snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type,
+                                                           cpu_info));
+       cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer);
+       rc = hypfs_create_u64(sb, cpu_dir, "mgmtime",
+                             cpu_info__acc_time(diag204_info_type, cpu_info) -
+                             cpu_info__lp_time(diag204_info_type, cpu_info));
+       if (IS_ERR(rc))
+               return PTR_ERR(rc);
+       rc = hypfs_create_u64(sb, cpu_dir, "cputime",
+                             cpu_info__lp_time(diag204_info_type, cpu_info));
+       if (IS_ERR(rc))
+               return PTR_ERR(rc);
+       if (diag204_info_type == INFO_EXT) {
+               rc = hypfs_create_u64(sb, cpu_dir, "onlinetime",
+                                     cpu_info__online_time(diag204_info_type,
+                                                           cpu_info));
+               if (IS_ERR(rc))
+                       return PTR_ERR(rc);
+       }
+       diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
+       rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
+       if (IS_ERR(rc))
+               return PTR_ERR(rc);
+       return 0;
+}
+
+static void *hypfs_create_lpar_files(struct super_block *sb,
+                                    struct dentry *systems_dir, void *part_hdr)
+{
+       struct dentry *cpus_dir;
+       struct dentry *lpar_dir;
+       char lpar_name[LPAR_NAME_LEN + 1];
+       void *cpu_info;
+       int i;
+
+       part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
+       lpar_name[LPAR_NAME_LEN] = 0;
+       lpar_dir = hypfs_mkdir(sb, systems_dir, lpar_name);
+       if (IS_ERR(lpar_dir))
+               return lpar_dir;
+       cpus_dir = hypfs_mkdir(sb, lpar_dir, "cpus");
+       if (IS_ERR(cpus_dir))
+               return cpus_dir;
+       cpu_info = part_hdr + part_hdr__size(diag204_info_type);
+       for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) {
+               int rc;
+               rc = hypfs_create_cpu_files(sb, cpus_dir, cpu_info);
+               if (rc)
+                       return ERR_PTR(rc);
+               cpu_info += cpu_info__size(diag204_info_type);
+       }
+       return cpu_info;
+}
+
+static int hypfs_create_phys_cpu_files(struct super_block *sb,
+                                      struct dentry *cpus_dir, void *cpu_info)
+{
+       struct dentry *cpu_dir;
+       char buffer[TMP_SIZE];
+       void *rc;
+
+       snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type,
+                                                           cpu_info));
+       cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer);
+       if (IS_ERR(cpu_dir))
+               return PTR_ERR(cpu_dir);
+       rc = hypfs_create_u64(sb, cpu_dir, "mgmtime",
+                             phys_cpu__mgm_time(diag204_info_type, cpu_info));
+       if (IS_ERR(rc))
+               return PTR_ERR(rc);
+       diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
+       rc = hypfs_create_str(sb, cpu_dir, "type", buffer);
+       if (IS_ERR(rc))
+               return PTR_ERR(rc);
+       return 0;
+}
+
+static void *hypfs_create_phys_files(struct super_block *sb,
+                                    struct dentry *parent_dir, void *phys_hdr)
+{
+       int i;
+       void *cpu_info;
+       struct dentry *cpus_dir;
+
+       cpus_dir = hypfs_mkdir(sb, parent_dir, "cpus");
+       if (IS_ERR(cpus_dir))
+               return cpus_dir;
+       cpu_info = phys_hdr + phys_hdr__size(diag204_info_type);
+       for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) {
+               int rc;
+               rc = hypfs_create_phys_cpu_files(sb, cpus_dir, cpu_info);
+               if (rc)
+                       return ERR_PTR(rc);
+               cpu_info += phys_cpu__size(diag204_info_type);
+       }
+       return cpu_info;
+}
+
+int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
+{
+       struct dentry *systems_dir, *hyp_dir;
+       void *time_hdr, *part_hdr;
+       int i, rc;
+       void *buffer, *ptr;
+
+       buffer = diag204_store();
+       if (IS_ERR(buffer))
+               return PTR_ERR(buffer);
+
+       systems_dir = hypfs_mkdir(sb, root, "systems");
+       if (IS_ERR(systems_dir)) {
+               rc = PTR_ERR(systems_dir);
+               goto err_out;
+       }
+       time_hdr = (struct x_info_blk_hdr *)buffer;
+       part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type);
+       for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) {
+               part_hdr = hypfs_create_lpar_files(sb, systems_dir, part_hdr);
+               if (IS_ERR(part_hdr)) {
+                       rc = PTR_ERR(part_hdr);
+                       goto err_out;
+               }
+       }
+       if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) {
+               ptr = hypfs_create_phys_files(sb, root, part_hdr);
+               if (IS_ERR(ptr)) {
+                       rc = PTR_ERR(ptr);
+                       goto err_out;
+               }
+       }
+       hyp_dir = hypfs_mkdir(sb, root, "hyp");
+       if (IS_ERR(hyp_dir)) {
+               rc = PTR_ERR(hyp_dir);
+               goto err_out;
+       }
+       ptr = hypfs_create_str(sb, hyp_dir, "type", "LPAR Hypervisor");
+       if (IS_ERR(ptr)) {
+               rc = PTR_ERR(ptr);
+               goto err_out;
+       }
+       rc = 0;
+
+err_out:
+       return rc;
+}