2 * TUX - Integrated Application Protocols Layer and Object Cache
4 * Copyright (C) 2000, 2001, Ingo Molnar <mingo@redhat.com>
6 * Cleaned up logger output for Alpha.
7 * -- Phil Ezolt (Phillip.Ezolt@compaq.com) & Bill Carr (wcarr92@yahoo.com)
9 * logger.c: log requests finished by TUX.
12 #define __KERNEL_SYSCALLS__
15 /****************************************************************
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2, or (at your option)
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 ****************************************************************/
32 static spinlock_t log_lock = SPIN_LOCK_UNLOCKED;
33 static unsigned int log_head, log_tail;
34 static char * log_buffer = NULL;
35 static DECLARE_WAIT_QUEUE_HEAD(log_wait);
36 static DECLARE_WAIT_QUEUE_HEAD(log_full);
37 static int logger_pid = 0;
40 * High-speed TUX logging architecture:
42 * All fast threads share a common log-ringbuffer. (default size 1MB)
43 * Log entries are binary and are padded to be cacheline aligned, this
44 * ensures that there is no cache-pingpong between fast threads.
46 * The logger thread writes out pending log entries within 1 second
47 * (buffer-cache writes data out within 5 seconds). The logger thread
48 * gets activated once we have more than 25% of the log ringbuffer
49 * filled - or the 1 second log timeout expires. Fast threads block
50 * if if more than 95% of the ringbuffer is filled and unblock only
51 * if used logbuffer space drops below 90%.
53 * This architecture guarantees that 1) logging is reliable (no
54 * log entry is ever lost), 2) timely (touches disk within 6 seconds),
55 * 3) in the log-contention case the saturation behavior is still
56 * write-clustered, but 4) if the logger thread can keep up then
57 * the coupling is completely asynchron and parallel.
59 * The binary log format gives us about 50% saved IO/memory bandwith
60 * and 50% less on-disk used log space than the traditional W3C ASCII
63 * (We might switch to raw IO though to write the logfile.)
66 #define SOFT_LIMIT (LOG_LEN*25/100)
67 #define HARD_LIMIT (LOG_LEN*95/100)
68 #define HARD_RELAX_LIMIT (LOG_LEN*90/100)
70 unsigned int tux_logentry_align_order = 5;
72 #if SMP_CACHE_BYTES == 8
73 # define TUX_LOGENTRY_ALIGN 3
75 #if SMP_CACHE_BYTES == 16
76 # define TUX_LOGENTRY_ALIGN 4
78 #if SMP_CACHE_BYTES == 32
79 # define TUX_LOGENTRY_ALIGN 5
81 #if SMP_CACHE_BYTES == 64
82 # define TUX_LOGENTRY_ALIGN 6
84 #if SMP_CACHE_BYTES == 128
85 # define TUX_LOGENTRY_ALIGN 7
87 #if SMP_CACHE_BYTES == 256
88 # define TUX_LOGENTRY_ALIGN 8
98 #define ROUND_UP(x) (((((x)-1) >> TUX_LOGENTRY_ALIGN) + 1) \
99 << TUX_LOGENTRY_ALIGN)
101 static void __throttle_logging (void)
103 DECLARE_WAITQUEUE(wait, current);
106 add_wait_queue(&log_full, &wait);
108 static unsigned long last_warning = 0;
110 if (jiffies - last_warning > 10*HZ) {
111 last_warning = jiffies;
112 printk(KERN_NOTICE "TUX: log buffer overflow, have to throttle TUX thread!\n");
115 current->state = TASK_INTERRUPTIBLE;
117 spin_lock(&log_lock);
118 pending = log_head-log_tail;
119 spin_unlock(&log_lock);
121 if ((pending % LOG_LEN) < HARD_LIMIT)
126 current->state = TASK_RUNNING;
127 remove_wait_queue(&log_full, &wait);
131 #define CHECK_LOGPTR(ptr) \
133 if ((ptr < log_buffer) || (ptr > log_buffer + LOG_LEN)) { \
134 printk(KERN_ERR "TUX: ouch: log ptr %p > %p + %ld!\n", \
135 ptr, log_buffer, LOG_LEN); \
140 #define CHECK_LOGPTR(ptr) do { } while (0)
143 void __log_request (tux_req_t *req)
147 unsigned int inc, len, uri_len, pending, next_head, def_vhost_len = 0;
150 if (req->proto->pre_log)
151 req->proto->pre_log(req);
153 * Log the reply status (success, or type of failure)
155 if (!tux_log_incomplete && (!req->status || (req->bytes_sent == -1))) {
157 Dprintk("not logging req %p: {%s} [%d/%d]\n", req, req->uri_str, req->status, req->bytes_sent);
160 Dprintk("uri: {%s} [%d]\n", req->uri_str, req->uri_len);
162 #define NO_URI "<none>"
164 uri_len = req->uri_len;
165 uri_str = req->uri_str;
168 uri_len = sizeof(NO_URI)-1;
174 len += req->host_len;
176 def_vhost_len = strlen(tux_default_vhost);
177 len += def_vhost_len;
181 Dprintk("method_str: {%s} [%d]\n", req->method_str, req->method_len);
182 len += req->method_len + 1;
184 Dprintk("version_str: {%s} [%d]\n", req->version_str, req->version_len);
185 len += req->version_len + 1;
187 #if CONFIG_TUX_EXTENDED_LOG
188 Dprintk("user_agent_str: {%s} [%d]\n", req->user_agent_str, req->user_agent_len);
189 len += req->user_agent_len + 1;
191 if (tux_referer_logging) {
192 Dprintk("referer_str: {%s} [%d]\n", req->referer_str, req->referer_len);
193 len += req->referer_len;
197 inc = 5*sizeof(u32) + len;
198 #if CONFIG_TUX_EXTENDED_LOG
199 inc += 7*sizeof(u32);
202 spin_lock_irqsave(&log_lock, flags);
204 next_head = ROUND_UP(log_head + inc);
206 if (next_head < LOG_LEN) {
207 str = log_buffer + log_head;
208 if (str > log_buffer + LOG_LEN)
210 log_head = next_head;
212 if (log_head < LOG_LEN)
213 memset(log_buffer+log_head, 0, LOG_LEN-log_head);
215 log_head = ROUND_UP(inc);
218 if (str < log_buffer || str+inc >= log_buffer+LOG_LEN)
222 * Log record signature - this makes finding the next entry
223 * easier (since record length is variable), and makes the
224 * binary logfile more robust against potential data corruption
225 * and other damage. The signature also servers as a log format
226 * version identifier.
228 #if CONFIG_TUX_EXTENDED_LOG
229 *(u32 *)str = 0x2223beef;
231 *(u32 *)str = 0x1112beef;
238 * Log the client IP address:
241 *(u32 *)str = req->client_addr;
245 #if CONFIG_TUX_EXTENDED_LOG
247 * Log the client port number:
251 *(u32 *)str = req->client_port;
257 * Log the request timestamp, in units of 'seconds since 1970'.
259 *(u32 *)str = CURRENT_TIME.tv_sec;
263 #if CONFIG_TUX_EXTENDED_LOG
264 *(u32 *)str = req->accept_timestamp; str += sizeof(u32);
265 *(u32 *)str = req->parse_timestamp; str += sizeof(u32);
266 *(u32 *)str = req->output_timestamp; str += sizeof(u32);
267 *(u32 *)str = req->flush_timestamp; str += sizeof(u32);
268 *(u32 *)str = req->had_cachemiss; str += sizeof(u32);
269 *(u32 *)str = req->keep_alive; str += sizeof(u32);
272 * Log the requested file size (in fact, log actual bytes sent.)
274 *(u32 *)str = req->bytes_sent;
278 *(u32 *)str = req->status;
283 * Zero-terminated method, (base) URI, query and version string.
285 if (req->method_len) {
286 memcpy(str, req->method_str, req->method_len);
287 str += req->method_len;
294 memcpy(str, req->host, req->host_len);
295 str += req->host_len;
297 memcpy(str, tux_default_vhost, def_vhost_len);
298 str += def_vhost_len;
303 memcpy(str, uri_str, uri_len);
309 if (req->version_len) {
310 memcpy(str, req->version_str, req->version_len);
311 str += req->version_len;
315 #if CONFIG_TUX_EXTENDED_LOG
316 if (req->user_agent_len) {
317 memcpy(str, req->user_agent_str, req->user_agent_len);
318 str += req->user_agent_len;
325 if (tux_referer_logging && req->referer_len) {
326 memcpy(str, req->referer_str, req->referer_len);
327 str += req->referer_len;
333 * pad with spaces to next cacheline, with an ending newline.
334 * (not needed for the user-space log utility, but results in
335 * a more readable binary log file, and reduces the amount
336 * of cache pingpong.)
338 next = (char *)ROUND_UP((unsigned long)str);
342 memset(str, ' ', len);
344 pending = (log_head-log_tail) % LOG_LEN;
345 spin_unlock_irqrestore(&log_lock, flags);
347 if (pending >= SOFT_LIMIT)
350 if (pending >= HARD_LIMIT)
351 __throttle_logging();
354 void tux_push_pending (struct sock *sk)
356 struct tcp_opt *tp = tcp_sk(sk);
358 Dprintk("pushing pending frames on sock %p.\n", sk);
360 if ((sk->sk_state == TCP_ESTABLISHED) && !sk->sk_err) {
361 tp->ack.pingpong = tux_ack_pingpong;
363 __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 0), TCP_NAGLE_OFF);
368 inline void tux_push_req (tux_req_t *req)
371 tux_push_pending(req->sock->sk);
373 tux_push_pending(req->data_sock->sk);
376 void __put_data_sock (tux_req_t *req)
378 unlink_tux_data_socket(req);
379 if (req->data_sock->file)
380 fput(req->data_sock->file);
382 sock_release(req->data_sock);
383 req->data_sock = NULL;
386 /* open-coded sys_close */
388 long tux_close(unsigned int fd)
391 struct files_struct *files = current->files;
393 spin_lock(&files->file_lock);
394 if (fd >= files->max_fds)
396 filp = files->fd[fd];
399 files->fd[fd] = NULL;
400 FD_CLR(fd, files->close_on_exec);
401 /* __put_unused_fd(files, fd); */
402 __FD_CLR(fd, files->open_fds);
403 if (fd < files->next_fd)
405 spin_unlock(&files->file_lock);
406 return filp_close(filp, files);
409 spin_unlock(&files->file_lock);
413 void flush_request (tux_req_t *req, int cachemiss)
421 __set_task_state(current, TASK_RUNNING);
423 if (req->magic != TUX_MAGIC)
425 if (req->ti->thread != current)
428 if (req->bytes_expected && (req->bytes_sent != req->bytes_expected)) {
429 printk("hm, bytes_expected: %d != bytes_sent: %d!\n",
430 req->bytes_expected, req->bytes_sent);
434 SET_TIMESTAMP(req->flush_timestamp);
441 Dprintk("FLUSHING req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), sock, sk, req->keep_alive, req->status);
442 if (req->in_file.f_pos)
444 release_req_dentry(req);
447 if (req->docroot_dentry) {
448 dput(req->docroot_dentry);
449 req->docroot_dentry = NULL;
450 if (!req->docroot_mnt)
453 if (req->docroot_mnt) {
454 mntput(req->docroot_mnt);
455 req->docroot_mnt = NULL;
458 req->offset_start = 0;
461 req->total_file_len = 0;
466 req->ftp_command = 0;
470 if (test_bit(0, &req->idle_input))
472 if (test_bit(0, &req->wait_output_space))
480 req->usermodule_idx = 0;
482 if (req->module_dentry) {
483 dput(req->module_dentry);
484 req->module_dentry = NULL;
489 req->headers_len = 0;
491 req->method = METHOD_NONE;
493 req->method_str = NULL;
495 req->version_str = NULL;
496 req->version_len = 0;
501 req->objectname[0] = 0;
502 req->objectname_len = 0;
504 req->query_str = NULL;
507 req->cookies_str = NULL;
508 req->cookies_len = 0;
509 req->parse_cookies = 0;
511 req->contentlen_str = NULL;
512 req->contentlen_len = 0;
513 req->content_len = 0;
515 req->user_agent_str = NULL;
516 req->user_agent_len = 0;
518 req->may_send_gzip = 0;
519 req->content_gzipped = 0;
521 req->content_type_str = NULL;
522 req->content_type_len = 0;
524 req->accept_str = NULL;
527 req->accept_charset_str = NULL;
528 req->accept_charset_len = 0;
530 req->accept_encoding_str = NULL;
531 req->accept_encoding_len = 0;
533 req->accept_language_str = NULL;
534 req->accept_language_len = 0;
536 req->cache_control_str = NULL;
537 req->cache_control_len = 0;
539 req->if_modified_since_str = NULL;
540 req->if_modified_since_len = 0;
542 req->if_none_match_str = NULL;
543 req->if_none_match_len = 0;
545 req->if_range_str = NULL;
546 req->if_range_len = 0;
548 req->negotiate_str = NULL;
549 req->negotiate_len = 0;
551 req->pragma_str = NULL;
554 req->referer_str = NULL;
555 req->referer_len = 0;
557 req->post_data_str = NULL;
558 req->post_data_len = 0;
560 SET_TIMESTAMP(req->accept_timestamp);
561 #if CONFIG_TUX_EXTENDED_LOG
562 req->parse_timestamp = 0;
563 req->output_timestamp = 0;
564 req->flush_timestamp = 0;
568 req->total_bytes += req->bytes_sent;
571 req->bytes_expected = 0;
574 keep_alive = req->keep_alive;
575 clear_keepalive(req);
576 req->had_cachemiss = 0;
577 // first_timestamp and total_bytes is kept!
586 __free_page(req->abuf.page);
587 memset(&req->abuf, 0, sizeof(req->abuf));
589 if (sk && keep_alive) {
590 add_tux_atom(req, parse_request);
591 if (skb_queue_empty(&sk->sk_receive_queue)) {
592 spin_lock_irq(&req->ti->work_lock);
593 add_keepalive_timer(req);
594 if (test_and_set_bit(0, &req->idle_input))
597 * Avoid the race with the event callback:
599 if (skb_queue_empty(&sk->sk_receive_queue) ||
600 !test_and_clear_bit(0, &req->idle_input)) {
601 INC_STAT(nr_idle_input_pending);
602 spin_unlock_irq(&req->ti->work_lock);
606 del_keepalive_timer(req);
607 spin_unlock_irq(&req->ti->work_lock);
609 Dprintk("KEEPALIVE PENDING req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), req->sock, req->sock->sk, req->keep_alive, req->status);
610 add_req_to_workqueue(req);
611 INC_STAT(nr_keepalive_optimized);
615 del_timer_sync(&req->keepalive_timer);
616 del_timer_sync(&req->output_timer);
618 if (timer_pending(&req->keepalive_timer))
620 if (timer_pending(&req->output_timer))
622 if (!list_empty(&req->lru))
624 req->nr_keepalives = 0;
625 req->client_addr = 0;
626 req->client_port = 0;
628 req->ftp_offset_start = 0;
633 if (req->cwd_dentry) {
634 dput(req->cwd_dentry);
635 req->cwd_dentry = NULL;
640 mntput(req->cwd_mnt);
653 unlink_tux_socket(req);
656 * Close potential user-space file descriptors.
659 int fd = req->fd, ret;
675 static int warn_once = 1;
677 static unsigned int writeout_log (void)
679 unsigned int len, pending, next_log_tail;
680 mm_segment_t oldmm = get_fs();
681 struct file *log_filp;
686 Dprintk("TUX logger: opening log file {%s}.\n", tux_logfile);
687 log_filp = tux_open_file(tux_logfile, O_CREAT|O_APPEND|O_WRONLY|O_LARGEFILE);
690 printk(KERN_ERR "TUX: could not open log file {%s}!\n",
694 __set_current_state(TASK_INTERRUPTIBLE);
695 schedule_timeout(HZ);
698 spin_lock(&log_lock);
699 str = log_buffer + log_tail;
700 if (log_head < log_tail) {
701 len = LOG_LEN-log_tail;
704 len = log_head-log_tail;
705 next_log_tail = log_head;
709 spin_unlock(&log_lock);
712 ret = log_filp->f_op->write(log_filp, str, len, &log_filp->f_pos);
716 if (ret == -ENOSPC) {
717 printk(KERN_ERR "TUX: trying to write TUX logfile %s, but filesystem is full! Lost %d bytes of log data.\n", tux_logfile, len);
719 printk(KERN_ERR "TUX: log write %d != %d.\n", ret, len);
720 printk(KERN_ERR "TUX: log_filp: %p, str: %p, len: %d str[len-1]: %d.\n", log_filp, str, len, str[len-1]);
726 * Sync log data to disk:
728 if (log_filp->f_op && log_filp->f_op->fsync) {
729 down(&log_filp->f_dentry->d_inode->i_sem);
730 log_filp->f_op->fsync(log_filp, log_filp->f_dentry, 1);
731 up(&log_filp->f_dentry->d_inode->i_sem);
735 * Reduce the cache footprint of the logger file - it's
736 * typically write-once.
738 invalidate_inode_pages(log_filp->f_dentry->d_inode->i_mapping);
741 spin_lock(&log_lock);
743 log_tail = next_log_tail;
744 pending = (log_head-log_tail) % LOG_LEN;
745 spin_unlock(&log_lock);
747 if (pending < HARD_LIMIT)
754 static DECLARE_WAIT_QUEUE_HEAD(stop_logger_wait);
755 static int stop_logger = 0;
757 static int logger_thread (void *data)
759 DECLARE_WAITQUEUE(wait, current);
762 daemonize("TUX logger");
766 printk(KERN_NOTICE "TUX: logger thread started.\n");
769 cpumask_t log_mask, map;
771 mask_to_cpumask(log_cpu_mask, &log_mask);
772 cpus_and(map, cpu_online_map, log_mask);
773 if(!(cpus_empty(map)))
774 set_cpus_allowed(current, map);
780 spin_lock_irq(¤t->sighand->siglock);
781 siginitsetinv(¤t->blocked, 0);
783 spin_unlock_irq(¤t->sighand->siglock);
787 log_buffer = vmalloc(LOG_LEN);
788 memset(log_buffer, 0, LOG_LEN);
789 log_head = log_tail = 0;
791 current->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
793 add_wait_queue(&log_wait, &wait);
796 Dprintk("logger does writeout - stop:%d.\n", stop_logger);
798 while (writeout_log() >= SOFT_LIMIT) {
807 Dprintk("logger does sleep - stop:%d.\n", stop_logger);
808 __set_current_state(TASK_INTERRUPTIBLE);
809 if (log_head != log_tail) {
810 __set_current_state(TASK_RUNNING);
813 schedule_timeout(HZ);
815 Dprintk("logger back from sleep - stop:%d.\n", stop_logger);
816 if (signal_pending(current))
819 remove_wait_queue(&log_wait, &wait);
824 wake_up(&stop_logger_wait);
831 void start_log_thread (void)
835 logger_pid = kernel_thread(logger_thread, NULL, 0);
840 void stop_log_thread (void)
842 DECLARE_WAITQUEUE(wait, current);
844 Dprintk("stopping logger thread %d ...\n", logger_pid);
846 __set_current_state(TASK_UNINTERRUPTIBLE);
847 add_wait_queue(&stop_logger_wait, &wait);
851 __set_current_state(TASK_RUNNING);
852 remove_wait_queue(&stop_logger_wait, &wait);
854 Dprintk("logger thread stopped!\n");