2 * TUX - Integrated Application Protocols Layer and Object Cache
4 * Copyright (C) 2000, 2001, Ingo Molnar <mingo@redhat.com>
6 * Cleaned up logger output for Alpha.
7 * -- Phil Ezolt (Phillip.Ezolt@compaq.com) & Bill Carr (wcarr92@yahoo.com)
9 * logger.c: log requests finished by TUX.
12 #define __KERNEL_SYSCALLS__
15 /****************************************************************
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2, or (at your option)
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 ****************************************************************/
32 static DEFINE_SPINLOCK(log_lock);
33 static unsigned int log_head, log_tail;
34 static char * log_buffer = NULL;
35 static DECLARE_WAIT_QUEUE_HEAD(log_wait);
36 static DECLARE_WAIT_QUEUE_HEAD(log_full);
37 static int logger_pid = 0;
40 * High-speed TUX logging architecture:
42 * All fast threads share a common log-ringbuffer. (default size 1MB)
43 * Log entries are binary and are padded to be cacheline aligned, this
44 * ensures that there is no cache-pingpong between fast threads.
46 * The logger thread writes out pending log entries within 1 second
47 * (buffer-cache writes data out within 5 seconds). The logger thread
48 * gets activated once we have more than 25% of the log ringbuffer
49 * filled - or the 1 second log timeout expires. Fast threads block
50 * if if more than 95% of the ringbuffer is filled and unblock only
51 * if used logbuffer space drops below 90%.
53 * This architecture guarantees that 1) logging is reliable (no
54 * log entry is ever lost), 2) timely (touches disk within 6 seconds),
55 * 3) in the log-contention case the saturation behavior is still
56 * write-clustered, but 4) if the logger thread can keep up then
57 * the coupling is completely asynchron and parallel.
59 * The binary log format gives us about 50% saved IO/memory bandwith
60 * and 50% less on-disk used log space than the traditional W3C ASCII
63 * (We might switch to raw IO though to write the logfile.)
66 #define SOFT_LIMIT (LOG_LEN*25/100)
67 #define HARD_LIMIT (LOG_LEN*95/100)
68 #define HARD_RELAX_LIMIT (LOG_LEN*90/100)
70 unsigned int tux_logentry_align_order = 5;
72 #if SMP_CACHE_BYTES == 8
73 # define TUX_LOGENTRY_ALIGN 3
75 #if SMP_CACHE_BYTES == 16
76 # define TUX_LOGENTRY_ALIGN 4
78 #if SMP_CACHE_BYTES == 32
79 # define TUX_LOGENTRY_ALIGN 5
81 #if SMP_CACHE_BYTES == 64
82 # define TUX_LOGENTRY_ALIGN 6
84 #if SMP_CACHE_BYTES == 128
85 # define TUX_LOGENTRY_ALIGN 7
87 #if SMP_CACHE_BYTES == 256
88 # define TUX_LOGENTRY_ALIGN 8
98 #define ROUND_UP(x) (((((x)-1) >> TUX_LOGENTRY_ALIGN) + 1) \
99 << TUX_LOGENTRY_ALIGN)
101 static void __throttle_logging (void)
103 DECLARE_WAITQUEUE(wait, current);
106 add_wait_queue(&log_full, &wait);
108 static unsigned long last_warning = 0;
110 if (jiffies - last_warning > 10*HZ) {
111 last_warning = jiffies;
112 printk(KERN_NOTICE "TUX: log buffer overflow, have to throttle TUX thread!\n");
115 current->state = TASK_INTERRUPTIBLE;
117 spin_lock(&log_lock);
118 pending = log_head-log_tail;
119 spin_unlock(&log_lock);
121 if ((pending % LOG_LEN) < HARD_LIMIT)
126 current->state = TASK_RUNNING;
127 remove_wait_queue(&log_full, &wait);
130 #ifdef CONFIG_TUX_DEBUG
131 #define CHECK_LOGPTR(ptr) \
133 if ((ptr < log_buffer) || (ptr > log_buffer + LOG_LEN)) { \
134 printk(KERN_ERR "TUX: ouch: log ptr %p > %p + %ld!\n", \
135 ptr, log_buffer, LOG_LEN); \
140 #define CHECK_LOGPTR(ptr) do { } while (0)
143 void __log_request (tux_req_t *req)
147 unsigned int inc, len, uri_len, pending, next_head, def_vhost_len = 0;
150 if (req->proto->pre_log)
151 req->proto->pre_log(req);
153 * Log the reply status (success, or type of failure)
155 if (!tux_log_incomplete && (!req->status || (req->bytes_sent == -1))) {
157 Dprintk("not logging req %p: {%s} [%d/%d]\n", req, req->uri_str, req->status, req->bytes_sent);
160 Dprintk("uri: {%s} [%d]\n", req->uri_str, req->uri_len);
162 #define NO_URI "<none>"
164 uri_len = req->uri_len;
165 uri_str = req->uri_str;
168 uri_len = sizeof(NO_URI)-1;
174 len += req->host_len;
176 def_vhost_len = strlen(tux_default_vhost);
177 len += def_vhost_len;
181 Dprintk("method_str: {%s} [%d]\n", req->method_str, req->method_len);
182 len += req->method_len + 1;
184 Dprintk("version_str: {%s} [%d]\n", req->version_str, req->version_len);
185 len += req->version_len + 1;
187 #ifdef CONFIG_TUX_EXTENDED_LOG
188 Dprintk("user_agent_str: {%s} [%d]\n", req->user_agent_str, req->user_agent_len);
189 len += req->user_agent_len + 1;
191 if (tux_referer_logging) {
192 Dprintk("referer_str: {%s} [%d]\n", req->referer_str, req->referer_len);
193 len += req->referer_len;
197 inc = 5*sizeof(u32) + len;
198 #ifdef CONFIG_TUX_EXTENDED_LOG
199 inc += 7*sizeof(u32);
202 spin_lock_irqsave(&log_lock, flags);
204 next_head = ROUND_UP(log_head + inc);
206 if (next_head < LOG_LEN) {
207 str = log_buffer + log_head;
208 if (str > log_buffer + LOG_LEN)
210 log_head = next_head;
212 if (log_head < LOG_LEN)
213 memset(log_buffer+log_head, 0, LOG_LEN-log_head);
215 log_head = ROUND_UP(inc);
218 if (str < log_buffer || str+inc >= log_buffer+LOG_LEN)
222 * Log record signature - this makes finding the next entry
223 * easier (since record length is variable), and makes the
224 * binary logfile more robust against potential data corruption
225 * and other damage. The signature also servers as a log format
226 * version identifier.
228 #ifdef CONFIG_TUX_EXTENDED_LOG
229 *(u32 *)str = 0x2223beef;
231 *(u32 *)str = 0x1112beef;
238 * Log the client IP address:
241 *(u32 *)str = req->client_addr;
245 #ifdef CONFIG_TUX_EXTENDED_LOG
247 * Log the client port number:
251 *(u32 *)str = req->client_port;
257 * Log the request timestamp, in units of 'seconds since 1970'.
259 *(u32 *)str = CURRENT_TIME.tv_sec;
263 #ifdef CONFIG_TUX_EXTENDED_LOG
264 *(u32 *)str = req->accept_timestamp; str += sizeof(u32);
265 *(u32 *)str = req->parse_timestamp; str += sizeof(u32);
266 *(u32 *)str = req->output_timestamp; str += sizeof(u32);
267 *(u32 *)str = req->flush_timestamp; str += sizeof(u32);
268 *(u32 *)str = req->had_cachemiss; str += sizeof(u32);
269 *(u32 *)str = req->keep_alive; str += sizeof(u32);
272 * Log the requested file size (in fact, log actual bytes sent.)
274 *(u32 *)str = req->bytes_sent;
278 *(u32 *)str = req->status;
283 * Zero-terminated method, (base) URI, query and version string.
285 if (req->method_len) {
286 memcpy(str, req->method_str, req->method_len);
287 str += req->method_len;
294 memcpy(str, req->host, req->host_len);
295 str += req->host_len;
297 memcpy(str, tux_default_vhost, def_vhost_len);
298 str += def_vhost_len;
303 memcpy(str, uri_str, uri_len);
309 if (req->version_len) {
310 memcpy(str, req->version_str, req->version_len);
311 str += req->version_len;
315 #ifdef CONFIG_TUX_EXTENDED_LOG
316 if (req->user_agent_len) {
317 memcpy(str, req->user_agent_str, req->user_agent_len);
318 str += req->user_agent_len;
325 if (tux_referer_logging && req->referer_len) {
326 memcpy(str, req->referer_str, req->referer_len);
327 str += req->referer_len;
333 * pad with spaces to next cacheline, with an ending newline.
334 * (not needed for the user-space log utility, but results in
335 * a more readable binary log file, and reduces the amount
336 * of cache pingpong.)
338 next = (char *)ROUND_UP((unsigned long)str);
342 memset(str, ' ', len);
344 pending = (log_head-log_tail) % LOG_LEN;
345 spin_unlock_irqrestore(&log_lock, flags);
347 if (pending >= SOFT_LIMIT)
350 if (pending >= HARD_LIMIT)
351 __throttle_logging();
354 void tux_push_pending (struct sock *sk)
356 struct tcp_sock *tp = tcp_sk(sk);
357 struct inet_connection_sock *icsk = inet_csk(sk);
359 Dprintk("pushing pending frames on sock %p.\n", sk);
361 if ((sk->sk_state == TCP_ESTABLISHED) && !sk->sk_err) {
362 icsk->icsk_ack.pingpong = tux_ack_pingpong;
364 __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 0), TCP_NAGLE_OFF);
369 inline void tux_push_req (tux_req_t *req)
372 tux_push_pending(req->sock->sk);
374 tux_push_pending(req->data_sock->sk);
377 void __put_data_sock (tux_req_t *req)
379 unlink_tux_data_socket(req);
380 if (req->data_sock->file)
381 fput(req->data_sock->file);
383 sock_release(req->data_sock);
384 req->data_sock = NULL;
387 void flush_request (tux_req_t *req, int cachemiss)
395 __set_task_state(current, TASK_RUNNING);
397 if (req->magic != TUX_MAGIC)
399 if (req->ti->thread != current)
401 #ifdef CONFIG_TUX_DEBUG
402 if (req->bytes_expected && (req->bytes_sent != req->bytes_expected)) {
403 printk("hm, bytes_expected: %d != bytes_sent: %d!\n",
404 req->bytes_expected, req->bytes_sent);
408 SET_TIMESTAMP(req->flush_timestamp);
415 Dprintk("FLUSHING req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), sock, sk, req->keep_alive, req->status);
416 if (req->in_file->f_pos)
418 release_req_dentry(req);
421 if (req->docroot_dentry) {
422 dput(req->docroot_dentry);
423 req->docroot_dentry = NULL;
424 if (!req->docroot_mnt)
427 if (req->docroot_mnt) {
428 mntput(req->docroot_mnt);
429 req->docroot_mnt = NULL;
432 req->offset_start = 0;
435 req->total_file_len = 0;
440 req->ftp_command = 0;
444 if (test_bit(0, &req->idle_input))
446 if (test_bit(0, &req->wait_output_space))
454 req->usermodule_idx = 0;
456 if (req->module_dentry) {
457 dput(req->module_dentry);
458 req->module_dentry = NULL;
463 req->headers_len = 0;
465 req->method = METHOD_NONE;
467 req->method_str = NULL;
469 req->version_str = NULL;
470 req->version_len = 0;
475 req->objectname[0] = 0;
476 req->objectname_len = 0;
478 req->query_str = NULL;
481 req->cookies_str = NULL;
482 req->cookies_len = 0;
483 req->parse_cookies = 0;
485 req->contentlen_str = NULL;
486 req->contentlen_len = 0;
487 req->content_len = 0;
489 req->user_agent_str = NULL;
490 req->user_agent_len = 0;
492 req->may_send_gzip = 0;
493 req->content_gzipped = 0;
495 req->content_type_str = NULL;
496 req->content_type_len = 0;
498 req->accept_str = NULL;
501 req->accept_charset_str = NULL;
502 req->accept_charset_len = 0;
504 req->accept_encoding_str = NULL;
505 req->accept_encoding_len = 0;
507 req->accept_language_str = NULL;
508 req->accept_language_len = 0;
510 req->cache_control_str = NULL;
511 req->cache_control_len = 0;
513 req->if_modified_since_str = NULL;
514 req->if_modified_since_len = 0;
516 req->if_none_match_str = NULL;
517 req->if_none_match_len = 0;
519 req->if_range_str = NULL;
520 req->if_range_len = 0;
522 req->negotiate_str = NULL;
523 req->negotiate_len = 0;
525 req->pragma_str = NULL;
528 req->referer_str = NULL;
529 req->referer_len = 0;
531 req->post_data_str = NULL;
532 req->post_data_len = 0;
534 SET_TIMESTAMP(req->accept_timestamp);
535 #ifdef CONFIG_TUX_EXTENDED_LOG
536 req->parse_timestamp = 0;
537 req->output_timestamp = 0;
538 req->flush_timestamp = 0;
542 req->total_bytes += req->bytes_sent;
544 #ifdef CONFIG_TUX_DEBUG
545 req->bytes_expected = 0;
548 keep_alive = req->keep_alive;
549 clear_keepalive(req);
550 req->had_cachemiss = 0;
551 // first_timestamp and total_bytes is kept!
560 __free_page(req->abuf.page);
561 memset(&req->abuf, 0, sizeof(req->abuf));
563 if (sk && keep_alive) {
564 add_tux_atom(req, parse_request);
565 if (skb_queue_empty(&sk->sk_receive_queue)) {
566 spin_lock_irq(&req->ti->work_lock);
567 add_keepalive_timer(req);
568 if (test_and_set_bit(0, &req->idle_input))
571 * Avoid the race with the event callback:
573 if (skb_queue_empty(&sk->sk_receive_queue) ||
574 !test_and_clear_bit(0, &req->idle_input)) {
575 INC_STAT(nr_idle_input_pending);
576 spin_unlock_irq(&req->ti->work_lock);
580 del_keepalive_timer(req);
581 spin_unlock_irq(&req->ti->work_lock);
583 Dprintk("KEEPALIVE PENDING req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), req->sock, req->sock->sk, req->keep_alive, req->status);
584 add_req_to_workqueue(req);
585 INC_STAT(nr_keepalive_optimized);
589 del_timer_sync(&req->keepalive_timer);
590 del_timer_sync(&req->output_timer);
592 if (timer_pending(&req->keepalive_timer))
594 if (timer_pending(&req->output_timer))
596 if (!list_empty(&req->lru))
598 req->nr_keepalives = 0;
599 req->client_addr = 0;
600 req->client_port = 0;
602 req->ftp_offset_start = 0;
607 if (req->cwd_dentry) {
608 dput(req->cwd_dentry);
609 req->cwd_dentry = NULL;
614 mntput(req->cwd_mnt);
627 unlink_tux_socket(req);
630 * Close potential user-space file descriptors.
633 int fd = req->fd, ret;
636 Dprintk("closing req->fd: %d\n", fd);
650 static int warn_once = 1;
652 static loff_t log_filp_last_index;
654 static unsigned int writeout_log (void)
656 unsigned int len, pending, next_log_tail;
657 mm_segment_t oldmm = get_fs();
658 struct file *log_filp;
662 struct address_space *mapping;
665 Dprintk("TUX logger: opening log file {%s}.\n", tux_logfile);
666 log_filp = tux_open_file(tux_logfile, O_CREAT|O_APPEND|O_WRONLY|O_LARGEFILE);
669 printk(KERN_ERR "TUX: could not open log file {%s}!\n",
673 __set_current_state(TASK_INTERRUPTIBLE);
674 schedule_timeout(HZ);
677 spin_lock(&log_lock);
678 str = log_buffer + log_tail;
679 if (log_head < log_tail) {
680 len = LOG_LEN-log_tail;
683 len = log_head-log_tail;
684 next_log_tail = log_head;
688 spin_unlock(&log_lock);
691 ret = log_filp->f_op->write(log_filp, str, len, &log_filp->f_pos);
695 if (ret == -ENOSPC) {
696 printk(KERN_ERR "TUX: trying to write TUX logfile %s, but filesystem is full! Lost %d bytes of log data.\n", tux_logfile, len);
698 printk(KERN_ERR "TUX: log write %d != %d.\n", ret, len);
699 printk(KERN_ERR "TUX: log_filp: %p, str: %p, len: %d str[len-1]: %d.\n", log_filp, str, len, str[len-1]);
705 * Sync log data to disk:
707 inode = log_filp->f_dentry->d_inode;
708 mapping = inode->i_mapping;
709 if (mapping->nrpages > 256) { /* batch stuff up */
710 mutex_lock(&inode->i_mutex);
711 filemap_fdatawrite(inode->i_mapping);
714 * Now nuke old pagecache up to the place where we just
715 * started the I/O. There's no point in trying to invalidate
716 * pages after that, because they're currently in-flight.
718 invalidate_mapping_pages(mapping, 0, log_filp_last_index);
719 log_filp_last_index = log_filp->f_pos >> PAGE_CACHE_SHIFT;
720 mutex_unlock(&inode->i_mutex);
724 spin_lock(&log_lock);
726 log_tail = next_log_tail;
727 pending = (log_head-log_tail) % LOG_LEN;
728 spin_unlock(&log_lock);
730 if (pending < HARD_LIMIT)
737 static DECLARE_WAIT_QUEUE_HEAD(stop_logger_wait);
738 static int stop_logger = 0;
740 static int logger_thread (void *data)
742 DECLARE_WAITQUEUE(wait, current);
745 daemonize("TUX logger");
749 printk(KERN_NOTICE "TUX: logger thread started.\n");
754 cpus_and(map, cpu_online_map, tux_log_cpu_mask);
755 if (!(cpus_empty(map)))
756 set_cpus_allowed(current, map);
762 spin_lock_irq(¤t->sighand->siglock);
763 siginitsetinv(¤t->blocked, 0);
765 spin_unlock_irq(¤t->sighand->siglock);
769 log_buffer = vmalloc(LOG_LEN);
774 memset(log_buffer, 0, LOG_LEN);
775 log_head = log_tail = 0;
777 current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
779 add_wait_queue(&log_wait, &wait);
782 Dprintk("logger does writeout - stop:%d.\n", stop_logger);
784 while (writeout_log() >= SOFT_LIMIT) {
793 Dprintk("logger does sleep - stop:%d.\n", stop_logger);
794 __set_current_state(TASK_INTERRUPTIBLE);
795 if (log_head != log_tail) {
796 __set_current_state(TASK_RUNNING);
799 schedule_timeout(HZ);
801 Dprintk("logger back from sleep - stop:%d.\n", stop_logger);
802 if (signal_pending(current))
805 remove_wait_queue(&log_wait, &wait);
810 wake_up(&stop_logger_wait);
817 void start_log_thread (void)
821 logger_pid = kernel_thread(logger_thread, NULL, 0);
826 void stop_log_thread (void)
828 DECLARE_WAITQUEUE(wait, current);
830 Dprintk("stopping logger thread %d ...\n", logger_pid);
832 __set_current_state(TASK_UNINTERRUPTIBLE);
833 add_wait_queue(&stop_logger_wait, &wait);
837 __set_current_state(TASK_RUNNING);
838 remove_wait_queue(&stop_logger_wait, &wait);
840 Dprintk("logger thread stopped!\n");