2 * TUX - Integrated Application Protocols Layer and Object Cache
4 * Copyright (C) 2000, 2001, Ingo Molnar <mingo@redhat.com>
6 * Cleaned up logger output for Alpha.
7 * -- Phil Ezolt (Phillip.Ezolt@compaq.com) & Bill Carr (wcarr92@yahoo.com)
9 * logger.c: log requests finished by TUX.
12 #define __KERNEL_SYSCALLS__
15 /****************************************************************
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2, or (at your option)
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 ****************************************************************/
32 static spinlock_t log_lock = SPIN_LOCK_UNLOCKED;
33 static unsigned int log_head, log_tail;
34 static char * log_buffer = NULL;
35 static DECLARE_WAIT_QUEUE_HEAD(log_wait);
36 static DECLARE_WAIT_QUEUE_HEAD(log_full);
37 static int logger_pid = 0;
40 * High-speed TUX logging architecture:
42 * All fast threads share a common log-ringbuffer. (default size 1MB)
43 * Log entries are binary and are padded to be cacheline aligned, this
44 * ensures that there is no cache-pingpong between fast threads.
46 * The logger thread writes out pending log entries within 1 second
47 * (buffer-cache writes data out within 5 seconds). The logger thread
48 * gets activated once we have more than 25% of the log ringbuffer
49 * filled - or the 1 second log timeout expires. Fast threads block
50 * if if more than 95% of the ringbuffer is filled and unblock only
51 * if used logbuffer space drops below 90%.
53 * This architecture guarantees that 1) logging is reliable (no
54 * log entry is ever lost), 2) timely (touches disk within 6 seconds),
55 * 3) in the log-contention case the saturation behavior is still
56 * write-clustered, but 4) if the logger thread can keep up then
57 * the coupling is completely asynchron and parallel.
59 * The binary log format gives us about 50% saved IO/memory bandwith
60 * and 50% less on-disk used log space than the traditional W3C ASCII
63 * (We might switch to raw IO though to write the logfile.)
66 #define SOFT_LIMIT (LOG_LEN*25/100)
67 #define HARD_LIMIT (LOG_LEN*95/100)
68 #define HARD_RELAX_LIMIT (LOG_LEN*90/100)
70 unsigned int tux_logentry_align_order = 5;
72 #if SMP_CACHE_BYTES == 8
73 # define TUX_LOGENTRY_ALIGN 3
75 #if SMP_CACHE_BYTES == 16
76 # define TUX_LOGENTRY_ALIGN 4
78 #if SMP_CACHE_BYTES == 32
79 # define TUX_LOGENTRY_ALIGN 5
81 #if SMP_CACHE_BYTES == 64
82 # define TUX_LOGENTRY_ALIGN 6
84 #if SMP_CACHE_BYTES == 128
85 # define TUX_LOGENTRY_ALIGN 7
87 #if SMP_CACHE_BYTES == 256
88 # define TUX_LOGENTRY_ALIGN 8
98 #define ROUND_UP(x) (((((x)-1) >> TUX_LOGENTRY_ALIGN) + 1) \
99 << TUX_LOGENTRY_ALIGN)
101 static void __throttle_logging (void)
103 DECLARE_WAITQUEUE(wait, current);
106 add_wait_queue(&log_full, &wait);
108 static unsigned long last_warning = 0;
110 if (jiffies - last_warning > 10*HZ) {
111 last_warning = jiffies;
112 printk(KERN_NOTICE "TUX: log buffer overflow, have to throttle TUX thread!\n");
115 current->state = TASK_INTERRUPTIBLE;
117 spin_lock(&log_lock);
118 pending = log_head-log_tail;
119 spin_unlock(&log_lock);
121 if ((pending % LOG_LEN) < HARD_LIMIT)
126 current->state = TASK_RUNNING;
127 remove_wait_queue(&log_full, &wait);
131 #define CHECK_LOGPTR(ptr) \
133 if ((ptr < log_buffer) || (ptr > log_buffer + LOG_LEN)) { \
134 printk(KERN_ERR "TUX: ouch: log ptr %p > %p + %ld!\n", \
135 ptr, log_buffer, LOG_LEN); \
140 #define CHECK_LOGPTR(ptr) do { } while (0)
143 void __log_request (tux_req_t *req)
147 unsigned int inc, len, uri_len, pending, next_head, def_vhost_len = 0;
150 if (req->proto->pre_log)
151 req->proto->pre_log(req);
153 * Log the reply status (success, or type of failure)
155 if (!tux_log_incomplete && (!req->status || (req->bytes_sent == -1))) {
157 Dprintk("not logging req %p: {%s} [%d/%d]\n", req, req->uri_str, req->status, req->bytes_sent);
160 Dprintk("uri: {%s} [%d]\n", req->uri_str, req->uri_len);
162 #define NO_URI "<none>"
164 uri_len = req->uri_len;
165 uri_str = req->uri_str;
168 uri_len = sizeof(NO_URI)-1;
174 len += req->host_len;
176 def_vhost_len = strlen(tux_default_vhost);
177 len += def_vhost_len;
181 Dprintk("method_str: {%s} [%d]\n", req->method_str, req->method_len);
182 len += req->method_len + 1;
184 Dprintk("version_str: {%s} [%d]\n", req->version_str, req->version_len);
185 len += req->version_len + 1;
187 #if CONFIG_TUX_EXTENDED_LOG
188 Dprintk("user_agent_str: {%s} [%d]\n", req->user_agent_str, req->user_agent_len);
189 len += req->user_agent_len + 1;
191 if (tux_referer_logging) {
192 Dprintk("referer_str: {%s} [%d]\n", req->referer_str, req->referer_len);
193 len += req->referer_len;
197 inc = 5*sizeof(u32) + len;
198 #if CONFIG_TUX_EXTENDED_LOG
199 inc += 7*sizeof(u32);
202 spin_lock_irqsave(&log_lock, flags);
204 next_head = ROUND_UP(log_head + inc);
206 if (next_head < LOG_LEN) {
207 str = log_buffer + log_head;
208 if (str > log_buffer + LOG_LEN)
210 log_head = next_head;
212 if (log_head < LOG_LEN)
213 memset(log_buffer+log_head, 0, LOG_LEN-log_head);
215 log_head = ROUND_UP(inc);
218 if (str < log_buffer || str+inc >= log_buffer+LOG_LEN)
222 * Log record signature - this makes finding the next entry
223 * easier (since record length is variable), and makes the
224 * binary logfile more robust against potential data corruption
225 * and other damage. The signature also servers as a log format
226 * version identifier.
228 #if CONFIG_TUX_EXTENDED_LOG
229 *(u32 *)str = 0x2223beef;
231 *(u32 *)str = 0x1112beef;
238 * Log the client IP address:
241 *(u32 *)str = req->client_addr;
245 #if CONFIG_TUX_EXTENDED_LOG
247 * Log the client port number:
251 *(u32 *)str = req->client_port;
257 * Log the request timestamp, in units of 'seconds since 1970'.
259 *(u32 *)str = CURRENT_TIME.tv_sec;
263 #if CONFIG_TUX_EXTENDED_LOG
264 *(u32 *)str = req->accept_timestamp; str += sizeof(u32);
265 *(u32 *)str = req->parse_timestamp; str += sizeof(u32);
266 *(u32 *)str = req->output_timestamp; str += sizeof(u32);
267 *(u32 *)str = req->flush_timestamp; str += sizeof(u32);
268 *(u32 *)str = req->had_cachemiss; str += sizeof(u32);
269 *(u32 *)str = req->keep_alive; str += sizeof(u32);
272 * Log the requested file size (in fact, log actual bytes sent.)
274 *(u32 *)str = req->bytes_sent;
278 *(u32 *)str = req->status;
283 * Zero-terminated method, (base) URI, query and version string.
285 if (req->method_len) {
286 memcpy(str, req->method_str, req->method_len);
287 str += req->method_len;
294 memcpy(str, req->host, req->host_len);
295 str += req->host_len;
297 memcpy(str, tux_default_vhost, def_vhost_len);
298 str += def_vhost_len;
303 memcpy(str, uri_str, uri_len);
309 if (req->version_len) {
310 memcpy(str, req->version_str, req->version_len);
311 str += req->version_len;
315 #if CONFIG_TUX_EXTENDED_LOG
316 if (req->user_agent_len) {
317 memcpy(str, req->user_agent_str, req->user_agent_len);
318 str += req->user_agent_len;
325 if (tux_referer_logging && req->referer_len) {
326 memcpy(str, req->referer_str, req->referer_len);
327 str += req->referer_len;
333 * pad with spaces to next cacheline, with an ending newline.
334 * (not needed for the user-space log utility, but results in
335 * a more readable binary log file, and reduces the amount
336 * of cache pingpong.)
338 next = (char *)ROUND_UP((unsigned long)str);
342 memset(str, ' ', len);
344 pending = (log_head-log_tail) % LOG_LEN;
345 spin_unlock_irqrestore(&log_lock, flags);
347 if (pending >= SOFT_LIMIT)
350 if (pending >= HARD_LIMIT)
351 __throttle_logging();
354 void tux_push_pending (struct sock *sk)
356 struct tcp_opt *tp = tcp_sk(sk);
358 Dprintk("pushing pending frames on sock %p.\n", sk);
360 if ((sk->sk_state == TCP_ESTABLISHED) && !sk->sk_err) {
361 tp->ack.pingpong = tux_ack_pingpong;
363 __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 0), TCP_NAGLE_OFF);
368 inline void tux_push_req (tux_req_t *req)
371 tux_push_pending(req->sock->sk);
373 tux_push_pending(req->data_sock->sk);
376 void __put_data_sock (tux_req_t *req)
378 unlink_tux_data_socket(req);
379 if (req->data_sock->file)
380 fput(req->data_sock->file);
382 sock_release(req->data_sock);
383 req->data_sock = NULL;
386 void flush_request (tux_req_t *req, int cachemiss)
394 __set_task_state(current, TASK_RUNNING);
396 if (req->magic != TUX_MAGIC)
398 if (req->ti->thread != current)
401 if (req->bytes_expected && (req->bytes_sent != req->bytes_expected)) {
402 printk("hm, bytes_expected: %d != bytes_sent: %d!\n",
403 req->bytes_expected, req->bytes_sent);
407 SET_TIMESTAMP(req->flush_timestamp);
414 Dprintk("FLUSHING req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), sock, sk, req->keep_alive, req->status);
415 if (req->in_file->f_pos)
417 release_req_dentry(req);
420 if (req->docroot_dentry) {
421 dput(req->docroot_dentry);
422 req->docroot_dentry = NULL;
423 if (!req->docroot_mnt)
426 if (req->docroot_mnt) {
427 mntput(req->docroot_mnt);
428 req->docroot_mnt = NULL;
431 req->offset_start = 0;
434 req->total_file_len = 0;
439 req->ftp_command = 0;
443 if (test_bit(0, &req->idle_input))
445 if (test_bit(0, &req->wait_output_space))
453 req->usermodule_idx = 0;
455 if (req->module_dentry) {
456 dput(req->module_dentry);
457 req->module_dentry = NULL;
462 req->headers_len = 0;
464 req->method = METHOD_NONE;
466 req->method_str = NULL;
468 req->version_str = NULL;
469 req->version_len = 0;
474 req->objectname[0] = 0;
475 req->objectname_len = 0;
477 req->query_str = NULL;
480 req->cookies_str = NULL;
481 req->cookies_len = 0;
482 req->parse_cookies = 0;
484 req->contentlen_str = NULL;
485 req->contentlen_len = 0;
486 req->content_len = 0;
488 req->user_agent_str = NULL;
489 req->user_agent_len = 0;
491 req->may_send_gzip = 0;
492 req->content_gzipped = 0;
494 req->content_type_str = NULL;
495 req->content_type_len = 0;
497 req->accept_str = NULL;
500 req->accept_charset_str = NULL;
501 req->accept_charset_len = 0;
503 req->accept_encoding_str = NULL;
504 req->accept_encoding_len = 0;
506 req->accept_language_str = NULL;
507 req->accept_language_len = 0;
509 req->cache_control_str = NULL;
510 req->cache_control_len = 0;
512 req->if_modified_since_str = NULL;
513 req->if_modified_since_len = 0;
515 req->if_none_match_str = NULL;
516 req->if_none_match_len = 0;
518 req->if_range_str = NULL;
519 req->if_range_len = 0;
521 req->negotiate_str = NULL;
522 req->negotiate_len = 0;
524 req->pragma_str = NULL;
527 req->referer_str = NULL;
528 req->referer_len = 0;
530 req->post_data_str = NULL;
531 req->post_data_len = 0;
533 SET_TIMESTAMP(req->accept_timestamp);
534 #if CONFIG_TUX_EXTENDED_LOG
535 req->parse_timestamp = 0;
536 req->output_timestamp = 0;
537 req->flush_timestamp = 0;
541 req->total_bytes += req->bytes_sent;
544 req->bytes_expected = 0;
547 keep_alive = req->keep_alive;
548 clear_keepalive(req);
549 req->had_cachemiss = 0;
550 // first_timestamp and total_bytes is kept!
559 __free_page(req->abuf.page);
560 memset(&req->abuf, 0, sizeof(req->abuf));
562 if (sk && keep_alive) {
563 add_tux_atom(req, parse_request);
564 if (skb_queue_empty(&sk->sk_receive_queue)) {
565 spin_lock_irq(&req->ti->work_lock);
566 add_keepalive_timer(req);
567 if (test_and_set_bit(0, &req->idle_input))
570 * Avoid the race with the event callback:
572 if (skb_queue_empty(&sk->sk_receive_queue) ||
573 !test_and_clear_bit(0, &req->idle_input)) {
574 INC_STAT(nr_idle_input_pending);
575 spin_unlock_irq(&req->ti->work_lock);
579 del_keepalive_timer(req);
580 spin_unlock_irq(&req->ti->work_lock);
582 Dprintk("KEEPALIVE PENDING req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), req->sock, req->sock->sk, req->keep_alive, req->status);
583 add_req_to_workqueue(req);
584 INC_STAT(nr_keepalive_optimized);
588 del_timer_sync(&req->keepalive_timer);
589 del_timer_sync(&req->output_timer);
591 if (timer_pending(&req->keepalive_timer))
593 if (timer_pending(&req->output_timer))
595 if (!list_empty(&req->lru))
597 req->nr_keepalives = 0;
598 req->client_addr = 0;
599 req->client_port = 0;
601 req->ftp_offset_start = 0;
606 if (req->cwd_dentry) {
607 dput(req->cwd_dentry);
608 req->cwd_dentry = NULL;
613 mntput(req->cwd_mnt);
626 unlink_tux_socket(req);
629 * Close potential user-space file descriptors.
632 int fd = req->fd, ret;
635 Dprintk("closing req->fd: %d\n", fd);
649 static int warn_once = 1;
651 static loff_t log_filp_last_index;
653 static unsigned int writeout_log (void)
655 unsigned int len, pending, next_log_tail;
656 mm_segment_t oldmm = get_fs();
657 struct file *log_filp;
661 struct address_space *mapping;
664 Dprintk("TUX logger: opening log file {%s}.\n", tux_logfile);
665 log_filp = tux_open_file(tux_logfile, O_CREAT|O_APPEND|O_WRONLY|O_LARGEFILE);
668 printk(KERN_ERR "TUX: could not open log file {%s}!\n",
672 __set_current_state(TASK_INTERRUPTIBLE);
673 schedule_timeout(HZ);
676 spin_lock(&log_lock);
677 str = log_buffer + log_tail;
678 if (log_head < log_tail) {
679 len = LOG_LEN-log_tail;
682 len = log_head-log_tail;
683 next_log_tail = log_head;
687 spin_unlock(&log_lock);
690 ret = log_filp->f_op->write(log_filp, str, len, &log_filp->f_pos);
694 if (ret == -ENOSPC) {
695 printk(KERN_ERR "TUX: trying to write TUX logfile %s, but filesystem is full! Lost %d bytes of log data.\n", tux_logfile, len);
697 printk(KERN_ERR "TUX: log write %d != %d.\n", ret, len);
698 printk(KERN_ERR "TUX: log_filp: %p, str: %p, len: %d str[len-1]: %d.\n", log_filp, str, len, str[len-1]);
704 * Sync log data to disk:
706 inode = log_filp->f_dentry->d_inode;
707 mapping = inode->i_mapping;
708 if (mapping->nrpages > 256) { /* batch stuff up */
710 filemap_fdatawrite(inode->i_mapping);
713 * Now nuke old pagecache up to the place where we just
714 * started the I/O. There's no point in trying to invalidate
715 * pages after that, because they're currently in-flight.
717 invalidate_mapping_pages(mapping, 0, log_filp_last_index);
718 log_filp_last_index = log_filp->f_pos >> PAGE_CACHE_SHIFT;
723 spin_lock(&log_lock);
725 log_tail = next_log_tail;
726 pending = (log_head-log_tail) % LOG_LEN;
727 spin_unlock(&log_lock);
729 if (pending < HARD_LIMIT)
736 static DECLARE_WAIT_QUEUE_HEAD(stop_logger_wait);
737 static int stop_logger = 0;
739 static int logger_thread (void *data)
741 DECLARE_WAITQUEUE(wait, current);
744 daemonize("TUX logger");
748 printk(KERN_NOTICE "TUX: logger thread started.\n");
753 cpus_and(map, cpu_online_map, tux_log_cpu_mask);
754 if (!(cpus_empty(map)))
755 set_cpus_allowed(current, map);
761 spin_lock_irq(¤t->sighand->siglock);
762 siginitsetinv(¤t->blocked, 0);
764 spin_unlock_irq(¤t->sighand->siglock);
768 log_buffer = vmalloc(LOG_LEN);
769 memset(log_buffer, 0, LOG_LEN);
770 log_head = log_tail = 0;
772 current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
774 add_wait_queue(&log_wait, &wait);
777 Dprintk("logger does writeout - stop:%d.\n", stop_logger);
779 while (writeout_log() >= SOFT_LIMIT) {
788 Dprintk("logger does sleep - stop:%d.\n", stop_logger);
789 __set_current_state(TASK_INTERRUPTIBLE);
790 if (log_head != log_tail) {
791 __set_current_state(TASK_RUNNING);
794 schedule_timeout(HZ);
796 Dprintk("logger back from sleep - stop:%d.\n", stop_logger);
797 if (signal_pending(current))
800 remove_wait_queue(&log_wait, &wait);
805 wake_up(&stop_logger_wait);
812 void start_log_thread (void)
816 logger_pid = kernel_thread(logger_thread, NULL, 0);
821 void stop_log_thread (void)
823 DECLARE_WAITQUEUE(wait, current);
825 Dprintk("stopping logger thread %d ...\n", logger_pid);
827 __set_current_state(TASK_UNINTERRUPTIBLE);
828 add_wait_queue(&stop_logger_wait, &wait);
832 __set_current_state(TASK_RUNNING);
833 remove_wait_queue(&stop_logger_wait, &wait);
835 Dprintk("logger thread stopped!\n");