2 #include <sys/socket.h>
5 #include <netinet/in.h>
16 #include "codemuxlib.h"
18 #define CONF_FILE "/etc/codemux/codemux.conf"
20 #define REAL_WEBSERVER_CONFLINE "* root 1080"
21 #define TARG_SETSIZE 4096
23 /* set aside some small number of fds for us, allow the rest for
25 #define MAX_CONNS ((TARG_SETSIZE-20)/2)
27 /* no single service can take more than half the connections */
28 #define SERVICE_MAX (MAX_CONNS/2)
30 /* how many total connections before we get concerned about fairness
32 #define FAIRNESS_CUTOFF (MAX_CONNS * 0.85)
35 typedef struct FlowBuf {
36 int fb_refs; /* num refs */
37 char *fb_buf; /* actual buffer */
38 int fb_used; /* bytes used in buffer */
40 #define FB_SIZE 3800 /* max usable size */
41 #define FB_ALLOCSIZE 4000 /* extra to include IP address */
43 typedef struct SockInfo {
44 int si_peerFd; /* fd of peer */
45 struct in_addr si_cliAddr; /* address of client */
46 int si_blocked; /* are we blocked? */
47 int si_needsHeaderSince; /* since when are we waiting for a header */
48 int si_whichService; /* index of service */
49 FlowBuf *si_readBuf; /* read data into this buffer */
50 FlowBuf *si_writeBuf; /* drain this buffer for writing */
53 static SockInfo sockInfo[TARG_SETSIZE]; /* fd number of peer socket */
55 typedef struct ServiceSig {
56 char *ss_host; /* suffix in host */
59 int ss_slicePos; /* position in slices array */
62 static ServiceSig *serviceSig;
63 static int numServices;
64 static int confFileReadTime;
67 typedef struct SliceInfo {
69 int si_inUse; /* do any services refer to this? */
74 static SliceInfo *slices;
76 static int numActiveSlices;
77 static int numTotalSliceConns;
78 static int anySliceXidsNeeded;
80 typedef struct OurFDSet {
81 long __fds_bits[TARG_SETSIZE/32];
83 static OurFDSet masterReadSet, masterWriteSet;
84 static int highestSetFd;
85 static int numNeedingHeaders; /* how many conns waiting on headers? */
92 #define SO_SETXID SO_PEERCRED
94 /*-----------------------------------------------------------------*/
96 ServiceToSlice(int whichService)
100 return(&slices[serviceSig[whichService].ss_slicePos]);
102 /*-----------------------------------------------------------------*/
112 "numForks %d, numActiveSlices %d, numTotalSliceConns %d\n"
113 "numNeedingHeaders %d, anySliceXidsNeeded %d\n",
114 numForks, numActiveSlices, numTotalSliceConns,
115 numNeedingHeaders, anySliceXidsNeeded);
116 start += strlen(start);
118 for (i = 0; i < numSlices; i++) {
119 SliceInfo *si = &slices[i];
120 sprintf(start, "Slice %d: %s xid %d, %d conns, inUse %d\n",
121 i, si->si_sliceName, si->si_xid, si->si_numConns,
123 start += strlen(start);
126 for (i = 0; i < numServices; i++) {
127 ServiceSig *ss = &serviceSig[i];
128 sprintf(start, "Service %d: %s %s port %d, slice# %d\n", i, ss->ss_host,
129 ss->ss_slice, (int) ss->ss_port, ss->ss_slicePos);
130 start += strlen(start);
136 /*-----------------------------------------------------------------*/
140 /* walks through /etc/passwd, and gets the uid for every slice we
146 if (!anySliceXidsNeeded)
149 for (i = 0; i < numSlices; i++) {
150 SliceInfo *si = &slices[i];
153 for (i = 0; i < numServices; i++) {
154 SliceInfo *si = ServiceToSlice(i);
159 if ((f = fopen("/etc/passwd", "r")) == NULL)
162 while ((line = GetNextLine(f)) != NULL) {
166 if ((temp = strchr(line, ':')) == NULL)
167 continue; /* weird line */
168 *temp = '\0'; /* terminate slice name */
170 if ((temp = strchr(temp+1, ':')) == NULL)
171 continue; /* weird line */
172 if ((xid = atoi(temp+1)) < 1)
173 continue; /* weird xid */
175 /* we've got a slice name and xid, let's try to match */
176 for (i = 0; i < numSlices; i++) {
177 if (slices[i].si_xid == 0 &&
178 strcasecmp(slices[i].si_sliceName, line) == 0) {
179 slices[i].si_xid = xid;
185 /* assume service 0 is the root service, and don't check it since
186 it'll have xid zero */
187 anySliceXidsNeeded = FALSE;
188 for (i = 1; i < numSlices; i++) {
189 if (slices[i].si_xid == 0 && slices[i].si_inUse > 0) {
190 anySliceXidsNeeded = TRUE;
197 /*-----------------------------------------------------------------*/
199 SliceConnsInc(int whichService)
201 SliceInfo *si = ServiceToSlice(whichService);
205 numTotalSliceConns++;
207 if (si->si_numConns == 1)
210 /*-----------------------------------------------------------------*/
212 SliceConnsDec(int whichService)
214 SliceInfo *si = ServiceToSlice(whichService);
218 numTotalSliceConns--;
220 if (si->si_numConns == 0)
223 /*-----------------------------------------------------------------*/
225 WhichSlicePos(char *slice)
227 /* adds the new slice if necessary, returns the index into slice
228 array. Never change the ordering of existing slices */
230 static int numSlicesAlloc;
232 for (i = 0; i < numSlices; i++) {
233 if (strcasecmp(slice, slices[i].si_sliceName) == 0)
237 if (numSlices >= numSlicesAlloc) {
238 numSlicesAlloc = MAX(8, numSlicesAlloc * 2);
239 slices = realloc(slices, numSlicesAlloc * sizeof(SliceInfo));
242 memset(&slices[numSlices], 0, sizeof(SliceInfo));
243 slices[numSlices].si_sliceName = strdup(slice);
247 /*-----------------------------------------------------------------*/
253 ServiceSig *servs = NULL;
259 if (stat(CONF_FILE, &statBuf) != 0) {
260 fprintf(stderr, "failed stat on codemux.conf\n");
265 if (statBuf.st_mtime == confFileReadTime)
268 if ((f = fopen(CONF_FILE, "r")) == NULL) {
269 fprintf(stderr, "failed reading codemux.conf\n");
275 /* conf file entries look like
276 coblitz.codeen.org princeton_coblitz 3125
285 /* on the first pass, put in a fake entry for apache */
287 line = strdup(REAL_WEBSERVER_CONFLINE);
289 if ((line = GetNextLine(f)) == NULL)
293 memset(&serv, 0, sizeof(serv));
294 if (WordCount(line) != 3) {
295 fprintf(stderr, "bad line: %s\n", line);
298 serv.ss_port = port = atoi(GetField(line, 2));
299 if (port < 1 || port > 65535 || port == DEMUX_PORT) {
300 fprintf(stderr, "bad port: %s\n", line);
304 serv.ss_host = GetWord(line, 0);
305 serv.ss_slice = GetWord(line, 1);
306 if (num >= numAlloc) {
307 numAlloc = MAX(numAlloc * 2, 8);
308 servs = realloc(servs, numAlloc * sizeof(ServiceSig));
310 serv.ss_slicePos = WhichSlicePos(serv.ss_slice);
311 if (slices[serv.ss_slicePos].si_inUse == 0 &&
312 slices[serv.ss_slicePos].si_xid < 1)
313 anySliceXidsNeeded = TRUE; /* if new/inactive, we need xid */
321 if (numServices == 0) {
322 fprintf(stderr, "nothing found in codemux.conf\n");
328 for (i = 0; i < numServices; i++) {
329 free(serviceSig[i].ss_host);
330 free(serviceSig[i].ss_slice);
335 confFileReadTime = statBuf.st_mtime;
337 /*-----------------------------------------------------------------*/
338 static char *err400BadRequest =
339 "HTTP/1.0 400 Bad Request\r\n"
340 "Content-Type: text/html\r\n"
342 "You are trying to access a PlanetLab node, and your\n"
343 "request header exceeded the allowable size. Please\n"
344 "try again if you believe this error is temporary.\n";
345 /*-----------------------------------------------------------------*/
346 static char *err503Unavailable =
347 "HTTP/1.0 503 Service Unavailable\r\n"
348 "Content-Type: text/html\r\n"
350 "You are trying to access a PlanetLab node, but the service\n"
351 "seems to be unavailable at the moment. Please try again.\n";
352 /*-----------------------------------------------------------------*/
353 static char *err503TooBusy =
354 "HTTP/1.0 503 Service Unavailable\r\n"
355 "Content-Type: text/html\r\n"
357 "You are trying to access a PlanetLab node, but the service\n"
358 "seems to be overloaded at the moment. Please try again.\n";
359 /*-----------------------------------------------------------------*/
361 SetFd(int fd, OurFDSet *set)
363 if (highestSetFd < fd)
367 /*-----------------------------------------------------------------*/
369 ClearFd(int fd, OurFDSet *set)
373 /*-----------------------------------------------------------------*/
375 RemoveHeader(char *lower, char *real, int totalSize, char *header)
377 /* returns number of characters removed */
382 sprintf(h2, "\n%s", header);
384 if ((conn = strstr(lower, h2)) == NULL)
388 /* determine how many characters to remove */
389 if ((temp = strchr(conn, '\n')) != NULL)
390 len = (temp - conn) + 1;
392 len = strlen(conn) + 1;
393 start = conn - lower;
395 memmove(&real[start], &real[end], totalSize - end);
396 memmove(&lower[start], &lower[end], totalSize - end);
400 /*-----------------------------------------------------------------*/
402 InsertHeader(char *buf, int totalSize, char *header)
404 /* returns number of bytes inserted */
410 sprintf(h2, "%s\r\n", header);
413 /* if we don't encounter a \n, it means that we have only a single
414 line, and we'd converted the \n to a \0 */
415 if ((temp = strchr(buf, '\n')) == NULL)
416 temp = strchr(buf, '\0');
419 memmove(temp + len, temp, totalSize - (temp - buf));
420 memcpy(temp, h2, len);
424 /*-----------------------------------------------------------------*/
426 FindService(FlowBuf *fb, int *whichService, struct in_addr addr)
431 char *buf = fb->fb_buf;
439 if (strstr(buf, "\n\r\n") == NULL && strstr(buf, "\n\n") == NULL)
442 /* insert client info after first line */
443 sprintf(orig, "X-CoDemux-Client: %s", inet_ntoa(addr));
444 fb->fb_used += InsertHeader(buf, fb->fb_used + 1, orig);
446 /* get just the header, so we can work on it */
447 LOCAL_STR_DUP_LOWER(lowerBuf, buf);
448 if ((end = strstr(lowerBuf, "\n\r\n")) == NULL)
449 end = strstr(lowerBuf, "\n\n");
452 /* remove any existing connection, keep-alive headers, add ours */
453 fb->fb_used -= RemoveHeader(lowerBuf, buf, fb->fb_used + 1, "keep-alive:");
454 fb->fb_used -= RemoveHeader(lowerBuf, buf, fb->fb_used + 1, "connection:");
455 fb->fb_used += InsertHeader(buf, fb->fb_used + 1, "Connection: close");
456 InsertHeader(lowerBuf, fb->fb_used + 1, "connection: close");
458 /* isolate host, see if it matches */
459 if ((hostVal = strstr(lowerBuf, "\nhost:")) != NULL) {
461 hostVal += strlen("\nhost:");
462 if ((end = strchr(hostVal, '\n')) != NULL)
464 if ((end = strchr(hostVal, ':')) != NULL)
466 while (isspace(*hostVal))
468 if (strlen(hostVal) > 0) {
469 hostVal = GetWord(hostVal, 0);
470 for (i = 1; i < numServices; i++) {
471 if (serviceSig[i].ss_host != NULL &&
472 DoesDotlessSuffixMatch(hostVal, 0, serviceSig[i].ss_host)) {
475 /* printf("%s", buf); */
484 /* see if URL prefix matches */
485 if ((end = strchr(lowerBuf, '\n')) != NULL)
487 if ((url = GetField(lowerBuf, 1)) == NULL ||
489 /* bad request - let apache handle it ? */
493 url++; /* skip the leading slash */
494 for (i = 1; i < numServices; i++) {
495 if (serviceSig[i].ss_prefix != NULL &&
496 (len = strlen(serviceSig[i].ss_prefix)) > 0 &&
497 strncmp(url, serviceSig[i].ss_prefix, len) == 0 &&
498 (url[len] == ' ' || url[len] == '/')) {
499 int startPos = url - lowerBuf;
500 int stripLen = len + ((url[len] == '/') ? 1 : 0);
501 /* strip out prefix */
502 fb->fb_used -= stripLen;
503 memmove(&buf[startPos], &buf[startPos+stripLen],
504 fb->fb_used + 1 - startPos);
505 /* printf("%s", buf); */
512 /* default to first service */
516 /*-----------------------------------------------------------------*/
518 StartConnect(int origFD, int whichService)
521 struct sockaddr_in dest;
525 if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) {
529 /* make socket non-blocking */
530 if (fcntl(sock, F_SETFL, O_NONBLOCK) < 0) {
535 /* set addr structure */
536 memset(&dest, 0, sizeof(dest));
537 dest.sin_family = AF_INET;
538 dest.sin_port = htons(serviceSig[whichService].ss_port);
539 dest.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
541 /* start connection process - we should be told that it's in
543 if (connect(sock, (struct sockaddr *) &dest, sizeof(dest)) != -1 ||
544 errno != EINPROGRESS) {
549 SetFd(sock, &masterWriteSet); /* determine when connect finishes */
550 sockInfo[origFD].si_peerFd = sock;
551 si = &sockInfo[sock];
552 memset(si, 0, sizeof(SockInfo));
553 si->si_peerFd = origFD;
554 si->si_blocked = TRUE; /* still connecting */
555 si->si_whichService = whichService;
556 si->si_writeBuf = sockInfo[origFD].si_readBuf;
557 sockInfo[origFD].si_readBuf->fb_refs++;
558 if (whichService >= 0)
559 SliceConnsInc(whichService);
563 /*-----------------------------------------------------------------*/
565 WriteAvailData(int fd)
567 SockInfo *si = &sockInfo[fd];
568 FlowBuf *fb = si->si_writeBuf;
571 /* printf("trying to write fd %d\n", fd); */
572 if (fb->fb_used < 1 || si->si_blocked)
575 /* printf("trying to write %d bytes\n", fb->fb_used); */
576 /* write(STDOUT_FILENO, fb->fb_buf, fb->fb_used); */
577 if ((res = write(fd, fb->fb_buf, fb->fb_used)) > 0) {
579 if (fb->fb_used > 0) {
580 /* couldn't write all - assume blocked */
581 memmove(fb->fb_buf, &fb->fb_buf[res], fb->fb_used);
582 si->si_blocked = TRUE;
583 SetFd(fd, &masterWriteSet);
585 /* printf("wrote %d\n", res); */
589 /* we might have been full but didn't realize it */
590 if (res == -1 && errno == EAGAIN) {
591 si->si_blocked = TRUE;
592 SetFd(fd, &masterWriteSet);
596 /* otherwise, assume the worst */
599 /*-----------------------------------------------------------------*/
600 static OurFDSet socksToCloseVec;
601 static int numSocksToClose;
602 static int whichSocksToClose[TARG_SETSIZE];
603 /*-----------------------------------------------------------------*/
607 if (FD_ISSET(fd, &socksToCloseVec))
609 SetFd(fd, &socksToCloseVec);
610 whichSocksToClose[numSocksToClose] = fd;
613 /*-----------------------------------------------------------------*/
620 if (buf->fb_refs == 0) {
625 /*-----------------------------------------------------------------*/
627 ReallyCloseSocks(void)
631 memset(&socksToCloseVec, 0, sizeof(socksToCloseVec));
633 for (i = 0; i < numSocksToClose; i++) {
634 int fd = whichSocksToClose[i];
636 DecBuf(sockInfo[fd].si_readBuf);
637 DecBuf(sockInfo[fd].si_writeBuf);
638 ClearFd(fd, &masterReadSet);
639 ClearFd(fd, &masterWriteSet);
640 if (sockInfo[fd].si_needsHeaderSince) {
641 sockInfo[fd].si_needsHeaderSince = 0;
644 if (sockInfo[fd].si_whichService >= 0) {
645 SliceConnsDec(sockInfo[fd].si_whichService);
646 sockInfo[fd].si_whichService = -1;
651 /*-----------------------------------------------------------------*/
653 SocketReadyToRead(int fd)
655 SockInfo *si = &sockInfo[fd];
660 /* if peer is closed, close ourselves */
661 if (si->si_peerFd < 0 && (!si->si_needsHeaderSince)) {
666 if ((fb = si->si_readBuf) == NULL) {
667 fb = si->si_readBuf = calloc(1, sizeof(FlowBuf));
669 if (si->si_peerFd >= 0) {
670 sockInfo[si->si_peerFd].si_writeBuf = fb;
675 if (fb->fb_buf == NULL)
676 fb->fb_buf = malloc(FB_ALLOCSIZE);
678 /* determine read buffer size - if 0, then block reads and return */
679 if ((spaceLeft = FB_SIZE - fb->fb_used) < 0) {
680 if (si->si_needsHeaderSince) {
681 write(fd, err400BadRequest, strlen(err400BadRequest));
686 ClearFd(fd, &masterReadSet);
691 /* read as much as allowed, and is available */
692 if ((res = read(fd, &fb->fb_buf[fb->fb_used], spaceLeft)) == 0) {
694 if (fb->fb_used == 0 && si->si_peerFd >= 0) {
695 CloseSock(si->si_peerFd);
704 if (fb->fb_used == 0 && si->si_peerFd >= 0) {
705 CloseSock(si->si_peerFd);
711 fb->fb_buf[fb->fb_used] = 0; /* terminate it for convenience */
712 printf("sock %d, read %d, total %d\n", fd, res, fb->fb_used);
714 /* if we need header, check if we've gotten it. if so, do
715 modifications and continue. if not, check if we've read the
716 maximum, and if so, fail */
717 if (si->si_needsHeaderSince) {
721 #define STATUS_REQ "GET /codemux/status.txt"
722 if (strncasecmp(fb->fb_buf, STATUS_REQ, sizeof(STATUS_REQ)-1) == 0) {
728 printf("trying to find service\n");
729 if (FindService(fb, &whichService, si->si_cliAddr) != SUCCESS)
731 printf("found service %d\n", whichService);
732 slice = ServiceToSlice(whichService);
734 /* no service can have more than some absolute max number of
735 connections. Also, when we're too busy, start enforcing
736 fairness across the servers */
737 if (slice->si_numConns > SERVICE_MAX ||
738 (numTotalSliceConns > FAIRNESS_CUTOFF &&
739 slice->si_numConns > MAX_CONNS/numActiveSlices)) {
740 write(fd, err503TooBusy, strlen(err503TooBusy));
745 if (slice->si_xid > 0) {
746 setsockopt(fd, SOL_SOCKET, SO_SETXID,
747 &slice->si_xid, sizeof(slice->si_xid));
748 fprintf(stderr, "setsockopt() with XID = %d name = %s\n",
749 slice->si_xid, slice->si_sliceName);
752 si->si_needsHeaderSince = 0;
754 if (StartConnect(fd, whichService) != SUCCESS) {
755 write(fd, err503Unavailable, strlen(err503Unavailable));
762 /* write anything possible */
763 if (WriteAvailData(si->si_peerFd) != SUCCESS) {
764 /* assume the worst and close */
766 CloseSock(si->si_peerFd);
770 /*-----------------------------------------------------------------*/
772 SocketReadyToWrite(int fd)
774 SockInfo *si = &sockInfo[fd];
776 /* unblock it and read what it has */
777 si->si_blocked = FALSE;
778 ClearFd(fd, &masterWriteSet);
779 SetFd(fd, &masterReadSet);
781 /* enable reading on peer just in case it was off */
782 if (si->si_peerFd >= 0)
783 SetFd(si->si_peerFd, &masterReadSet);
785 /* if we have data, write it */
786 if (WriteAvailData(fd) != SUCCESS) {
787 /* assume the worst and close */
789 if (si->si_peerFd >= 0) {
790 CloseSock(si->si_peerFd);
796 /* if peer is closed and we're done writing, we should close */
797 if (si->si_peerFd < 0 && si->si_writeBuf->fb_used == 0)
800 /*-----------------------------------------------------------------*/
802 CloseReqlessConns(void)
804 static int lastSweep;
808 if (lastSweep == now)
812 if (numTotalSliceConns + numNeedingHeaders > MAX_CONNS ||
813 numNeedingHeaders > TARG_SETSIZE/20) {
814 /* second condition is probably an attack - close aggressively */
817 else if (numTotalSliceConns + numNeedingHeaders > FAIRNESS_CUTOFF ||
818 numNeedingHeaders > TARG_SETSIZE/40) {
819 /* sweep a little aggressively */
822 else if (numNeedingHeaders > TARG_SETSIZE/80) {
823 /* just sweep to close strays */
827 /* too little gained - not worth sweeping */
831 /* if it's too old, close it */
832 for (i = 0; i < highestSetFd+1; i++) {
833 if (sockInfo[i].si_needsHeaderSince &&
834 (now - sockInfo[i].si_needsHeaderSince) > maxAge)
838 /*-----------------------------------------------------------------*/
840 MainLoop(int lisSock)
843 OurFDSet tempReadSet, tempWriteSet;
845 int lastConfCheck = 0;
847 signal(SIGPIPE, SIG_IGN);
852 struct timeval timeout;
856 if (now - lastConfCheck > 300) {
858 GetSliceXids(); /* always call - in case new slices created */
862 /* see if there's any activity */
863 tempReadSet = masterReadSet;
864 tempWriteSet = masterWriteSet;
866 /* trim it down if needed */
867 while (highestSetFd > 1 &&
868 (!FD_ISSET(highestSetFd, &tempReadSet)) &&
869 (!FD_ISSET(highestSetFd, &tempWriteSet)))
873 res = select(highestSetFd+1, (fd_set *) &tempReadSet,
874 (fd_set *) &tempWriteSet, NULL, &timeout);
875 if (res < 0 && errno != EINTR) {
882 /* clear the bit for listen socket to avoid confusion */
883 ClearFd(lisSock, &tempReadSet);
885 ceiling = highestSetFd+1; /* copy it, since it changes during loop */
886 /* pass data back and forth as needed */
887 for (i = 0; i < ceiling; i++) {
888 if (FD_ISSET(i, &tempWriteSet))
889 SocketReadyToWrite(i);
891 for (i = 0; i < ceiling; i++) {
892 if (FD_ISSET(i, &tempReadSet))
893 SocketReadyToRead(i);
896 /* see if we need to close conns w/o requests */
902 /* try accepting new connections */
904 struct sockaddr_in addr;
905 socklen_t lenAddr = sizeof(addr);
906 if ((newSock = accept(lisSock, (struct sockaddr *) &addr,
908 memset(&sockInfo[newSock], 0, sizeof(SockInfo));
909 sockInfo[newSock].si_needsHeaderSince = now;
911 sockInfo[newSock].si_peerFd = -1;
912 sockInfo[newSock].si_cliAddr = addr.sin_addr;
913 sockInfo[newSock].si_whichService = -1;
914 SetFd(newSock, &masterReadSet);
916 } while (newSock >= 0);
919 /*-----------------------------------------------------------------*/
921 main(int argc, char *argv[])
925 if ((lisSock = CreatePrivateAcceptSocket(DEMUX_PORT, TRUE)) < 0) {
926 fprintf(stderr, "failed creating accept socket\n");
929 SetFd(lisSock, &masterReadSet);
934 /* this is the parent - just wait */
935 while (wait3(NULL, 0, NULL) < 1)
936 ; /* just keep waiting for a real pid */
945 /*-----------------------------------------------------------------*/