which included commits to RCS files with non-trunk default branches.
--- /dev/null
+CC = gcc
+CFLAGS = -Wall -O -g
+
+TARGS = codemux
+
+all: ${TARGS}
+
+clean:
+ rm -f ${TARGS} *.o *~
+
+SHARED_OBJ = applib.o gettimeofdayex.o
+
+CODEMUX_OBJ = codemux.o ${SHARED_OBJ}
+
+codemux: ${CODEMUX_OBJ}
+
--- /dev/null
+#ifndef _APPDEF_H_
+#define _APPDEF_H_
+
+/* useful definitions */
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef SUCCESS
+#define SUCCESS 0
+#endif
+
+#ifndef FAILURE
+#define FAILURE (-1)
+#endif
+
+#ifndef INT_MAX
+#ifdef MAX_INT
+#define INT_MAX MAXINT
+#endif
+#endif
+
+#ifndef MAX
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+#endif
+
+#ifndef MIN
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#endif
+
+#ifndef NELEMS
+#define NELEMS(x) (sizeof(x)/sizeof(x[0]))
+#endif
+
+#ifndef SKIP_CHARS
+#define SKIP_CHARS(x) while(!isspace((int)*x)) (x)++
+#endif
+
+#ifndef SKIP_SPACES
+#define SKIP_SPACES(x) while (isspace((int)*x)) (x)++
+#endif
+
+#ifndef SKIP_WORD
+#define SKIP_WORD(x) do { SKIP_SPACES(x); \
+ SKIP_CHARS(x); \
+ SKIP_SPACES(x);} while(0)
+#endif
+
+#endif //_APPDEF_H_
--- /dev/null
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <ctype.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <errno.h>
+#include "applib.h"
+#include "debug.h"
+#include "codns.h"
+
+
+
+int defaultTraceSync = TRUE;
+
+/*-----------------------------------------------------------------*/
+float
+DiffTimeVal(const struct timeval *start, const struct timeval *end)
+{
+ struct timeval temp;
+ if (end == NULL) {
+ end = &temp;
+ gettimeofday(&temp, NULL);
+ }
+ return(end->tv_sec - start->tv_sec +
+ 1e-6*(end->tv_usec - start->tv_usec));
+}
+/*-----------------------------------------------------------------*/
+int
+CreatePrivateAcceptSocketEx(int portNum, int nonBlocking, int loopbackOnly)
+{
+ int doReuse = 1;
+ struct linger doLinger;
+ int sock;
+ struct sockaddr_in sa;
+
+ /* Create socket. */
+ if ((sock = socket(PF_INET, SOCK_STREAM, 0)) == -1)
+ return(-1);
+
+ /* don't linger on close */
+ doLinger.l_onoff = doLinger.l_linger = 0;
+ if (setsockopt(sock, SOL_SOCKET, SO_LINGER,
+ &doLinger, sizeof(doLinger)) == -1) {
+ close(sock);
+ return(-1);
+ }
+
+ /* reuse addresses */
+ if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
+ &doReuse, sizeof(doReuse)) == -1) {
+ close(sock);
+ return(-1);
+ }
+
+ if (nonBlocking) {
+ /* make listen socket nonblocking */
+ if (fcntl(sock, F_SETFL, O_NDELAY) == -1) {
+ close(sock);
+ return(-1);
+ }
+ }
+
+ /* set up info for binding listen */
+ memset(&sa, 0, sizeof(sa));
+ sa.sin_family = AF_INET;
+ sa.sin_addr.s_addr = (loopbackOnly) ? htonl(INADDR_LOOPBACK)
+ : htonl(INADDR_ANY);
+ sa.sin_port = htons(portNum);
+
+ /* bind the sock */
+ if (bind(sock, (struct sockaddr *) &sa, sizeof(sa)) == -1) {
+ close(sock);
+ return(-1);
+ }
+
+ /* start listening */
+ if (listen(sock, 32) == -1) {
+ close(sock);
+ return(-1);
+ }
+
+ return(sock);
+}
+/*-----------------------------------------------------------------*/
+int
+CreatePrivateAcceptSocket(int portNum, int nonBlocking)
+{
+ return CreatePrivateAcceptSocketEx(portNum, nonBlocking, FALSE);
+}
+/*-----------------------------------------------------------------*/
+int
+CreatePublicUDPSocket(int portNum)
+{
+ struct sockaddr_in hb_sin;
+ int sock;
+
+ if ((sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0)
+ return(-1);
+
+ memset(&hb_sin, 0, sizeof(hb_sin));
+ hb_sin.sin_family = AF_INET;
+ hb_sin.sin_addr.s_addr = INADDR_ANY;
+ hb_sin.sin_port = htons(portNum);
+
+ if (bind(sock, (struct sockaddr *) &hb_sin, sizeof(hb_sin)) < 0) {
+ close(sock);
+ return(-1);
+ }
+ return(sock);
+}
+/*-----------------------------------------------------------------*/
+int
+MakeConnection(char *name, in_addr_t netAddr, int portNum, int nonBlocking)
+{
+ struct sockaddr_in saddr;
+ int fd;
+
+ if (name != NULL) {
+ struct hostent *ent;
+ if ((ent = gethostbyname(name)) == NULL) {
+ if (hdebugLog)
+ TRACE("failed in name lookup - %s\n", name);
+ return(-1);
+ }
+ memcpy(&netAddr, ent->h_addr, sizeof(netAddr));
+ }
+
+ if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) {
+ if (hdebugLog)
+ TRACE("failed creating socket - %d\n", errno);
+ return(-1);
+ }
+
+ if (nonBlocking) {
+ if (fcntl(fd, F_SETFL, O_NDELAY) < 0) {
+ if (hdebugLog)
+ TRACE("failed fcntl'ing socket - %d\n", errno);
+ close(fd);
+ return(-1);
+ }
+ }
+
+ saddr.sin_family = AF_INET;
+ saddr.sin_addr.s_addr = netAddr;
+ saddr.sin_port = htons(portNum);
+
+ if (connect(fd, (struct sockaddr *) &saddr,
+ sizeof(struct sockaddr_in)) < 0) {
+ if (errno == EINPROGRESS)
+ return(fd);
+ if (hdebugLog)
+ TRACE("failed connecting socket - %d\n", errno);
+ close(fd);
+ return(-1);
+ }
+
+ return(fd);
+}
+/*-----------------------------------------------------------------*/
+int
+MakeLoopbackConnection(int portNum, int nonBlocking)
+{
+ return(MakeConnection(NULL, htonl(INADDR_LOOPBACK),
+ portNum, nonBlocking));
+}
+/*-----------------------------------------------------------------*/
+char *
+GetField(const unsigned char *start, int whichField)
+{
+ int currField;
+
+ /* move to first non-blank char */
+ while (isspace(*start))
+ start++;
+
+ if (*start == '\0')
+ return(NULL);
+
+ for (currField = 0; currField < whichField; currField++) {
+ /* move over this field */
+ while (*start != '\0' && (!isspace(*start)))
+ start++;
+ /* move over blanks before next field */
+ while (isspace(*start))
+ start++;
+ if (*start == '\0')
+ return(NULL);
+ }
+ return((char *) start);
+}
+/* ---------------------------------------------------------------- */
+char *
+GetWord(const unsigned char *start, int whichWord)
+{
+ /* returns a newly allocated string containing the desired word,
+ or NULL if there was a problem */
+ unsigned char *temp;
+ int len = 0;
+ char *res;
+
+ temp = (unsigned char *) GetField(start, whichWord);
+ if (!temp)
+ return(NULL);
+ while (!(temp[len] == '\0' || isspace(temp[len])))
+ len++;
+ if (!len)
+ NiceExit(-1, "internal error");
+ res = (char *)xcalloc(1, len+1);
+ if (!res)
+ NiceExit(-1, "out of memory");
+ memcpy(res, temp, len);
+ return(res);
+}
+/* ---------------------------------------------------------------- */
+char *
+GetWordEx(const unsigned char *start, int whichWord,
+ char* dest, int max)
+{
+ /* returns a newly allocated string containing the desired word,
+ or NULL if there was a problem */
+ unsigned char *temp;
+ int len = 0;
+
+ memset(dest, 0, max);
+ temp = (unsigned char *) GetField(start, whichWord);
+ if (!temp)
+ return(NULL);
+ while (!(temp[len] == '\0' || isspace(temp[len])))
+ len++;
+ if (!len)
+ NiceExit(-1, "internal error");
+ if (len >= max-1)
+ len = max-1;
+ memcpy(dest, temp, len);
+ return(dest);
+}
+/*-----------------------------------------------------------------*/
+int
+Base36Digit(int a)
+{
+ if (a < 0)
+ return('0');
+ if (a > 35)
+ return('z');
+ if (a < 10)
+ return('0' + a);
+ return('a' + a-10);
+}
+/*-----------------------------------------------------------------*/
+int
+Base36To10(int a)
+{
+ if (a >= '0' && a <= '9')
+ return(a - '0');
+ if (a >= 'a' && a <= 'z')
+ return(10 + a - 'a');
+ if (a >= 'A' && a <= 'Z')
+ return(10 + a - 'A');
+ return(0);
+}
+/*-----------------------------------------------------------------*/
+int
+PopCount(int val)
+{
+ int i;
+ int count = 0;
+
+ for (i = 0; i < (int)sizeof(int) * 8; i++) {
+ if (val & (1<<i))
+ count++;
+ }
+ return(count);
+}
+/*-----------------------------------------------------------------*/
+int
+PopCountChar(int val)
+{
+ int i;
+ int count = 0;
+
+ for (i = 0; i < (int)sizeof(int) * 8; i++) {
+ if (val & (1<<i))
+ count++;
+ }
+ return(Base36Digit(count));
+}
+/*-----------------------------------------------------------------*/
+int
+LogValChar(int val)
+{
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ if (val <= (1<<i))
+ return(Base36Digit(i));
+ }
+ return(Base36Digit(32));
+}
+/*-----------------------------------------------------------------*/
+const char *
+StringOrNull(const char *s)
+{
+ if (s)
+ return(s);
+ return("(null)");
+}
+/*-----------------------------------------------------------------*/
+char *
+strchars(const char *string, const char *list)
+{
+ /* acts like strchr, but works with multiple characters */
+ int numChars = strlen(list);
+ int i;
+ const char *walk;
+
+ if (numChars < 1)
+ return(NULL);
+
+ for (walk = string; *walk; walk++) {
+ for (i = 0; i < numChars; i++) {
+ if (*walk == list[i])
+ return (char *)(walk);
+ }
+ }
+ return(NULL);
+}
+/*-----------------------------------------------------------------*/
+char *
+strnchars(const char *string, int length, const char *list)
+{
+ /* acts like strchr, but works with multiple characters, and
+ reads exactly length characters from string. */
+ int searchingfor[256] = {0};
+ const char *walk;
+
+ if ('\0' == *list)
+ return(NULL);
+
+ for (; *list; list++) {
+ /*
+ * Be careful with this cast.
+ * If *list == (char) -98 (extended ascii), then
+ * (unsigned)*list == (unsigned int)*list == 4294967198
+ * (int)(unsigned char)*list == (unsigned char)*list == 158
+ * The compiler automatically casts the character to an int before
+ * doing the array index.
+ */
+ searchingfor[(int)(unsigned char)*list] = 1;
+ }
+
+ for (walk = string; walk - string < length; walk++) {
+ /* likewise */
+ if (searchingfor[(int)(unsigned char)*walk]) {
+ return (char *)(walk);
+ }
+ }
+ return(NULL);
+}
+/*-----------------------------------------------------------------*/
+int
+strncspn(const char* string, int length, const char* reject)
+ /* like strcspn but reads to length characters */
+{
+ int count = 0;
+ int searchingfor[256] = {0};
+ const char* walk;
+
+ for (; *reject; reject++) {
+ /*
+ * Be careful with this cast.
+ * If *list == (char) -98 (extended ascii), then
+ * (unsigned)*list == (unsigned int)*list == 4294967198
+ * (int)(unsigned char)*list == (unsigned char)*list == 158
+ * The compiler automatically casts the character to an int before
+ * doing the array index.
+ */
+ searchingfor[(int)(unsigned char)*reject] = 1;
+ }
+
+ for(walk = string; walk - string < length; walk++) {
+ /* likewise */
+ if (searchingfor[(int)(unsigned char)*walk]) {
+ break;
+ } else {
+ count++;
+ }
+ }
+ return count;
+}
+/*-----------------------------------------------------------------*/
+char *
+strnchr(const char *string, int length, const char needle)
+{
+ /* acts like strchr, but
+ reads exactly length characters from string. */
+ const char *walk;
+
+ for (walk = string; walk - string < length; walk++) {
+ if (needle == *walk) {
+ return (char *)(walk);
+ }
+ }
+ return(NULL);
+}
+
+/*-----------------------------------------------------------------*/
+#ifndef HAS_STRNSTR
+/* same as strstr, except that si doesn't have to end at '\0' */
+char * strnstr(const char * s1, int s1_len, const char * s2)
+{
+ int l1, l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ return (char *) s1;
+
+ l1 = s1_len;
+ while (l1 >= l2) {
+ l1--;
+ if (!memcmp(s1,s2,l2))
+ return (char *) s1;
+ s1++;
+ }
+ return NULL;
+}
+#endif
+/*-----------------------------------------------------------------*/
+/* same as strnstr, except case insensitive */
+char * strncasestr(const char * s1, int s1_len, const char * s2)
+{
+ int l1, l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ return (char *) s1;
+
+ l1 = s1_len;
+ while (l1 >= l2) {
+ l1--;
+ if (!strncasecmp(s1,s2,l2))
+ return (char *) s1;
+ s1++;
+ }
+ return NULL;
+}
+
+/*-----------------------------------------------------------------*/
+char *
+StrdupLower(const char *orig)
+{
+ char *temp;
+ int i;
+
+ if ((temp = xstrdup(orig)) == NULL)
+ NiceExit(-1, "no memory in strduplower");
+ for (i = 0; temp[i]; i++) {
+ if (isupper((int) temp[i]))
+ temp[i] = tolower(temp[i]);
+ }
+ return(temp);
+}
+
+/*-----------------------------------------------------------------*/
+void
+StrcpyLower(char *dest, const char *src)
+{
+ /* copy 'src' to 'dest' in lower cases.
+ 'dest' should have enough free space to hold src */
+ int i;
+
+ for (i = 0; src[i]; i++) {
+ dest[i] = (isupper((int) src[i])) ? tolower(src[i]) : src[i];
+ }
+
+ /* mark it as NULL */
+ dest[i] = 0;
+}
+/*-----------------------------------------------------------------*/
+void
+StrcpyLowerExcept(char *dest, int dest_max, const char* src, const char* except)
+{
+ /* copy 'src' to 'dest' in lower cases, skipping the chars in except.
+ 'dest' should have enough free space to hold src */
+ int i, j;
+
+ if (src == NULL)
+ return;
+
+ for (i = 0, j= 0; src[i]; i++) {
+ if (strchr(except, src[i]))
+ continue;
+
+ if (j == dest_max - 1)
+ break;
+ dest[j++] = (isupper((int) src[i])) ? tolower(src[i]) : src[i];
+ }
+
+ /* mark it as NULL */
+ dest[j] = 0;
+}
+
+/*-----------------------------------------------------------------*/
+static char *
+GetNextLineBack(FILE *file, int lower, int stripComments)
+{
+ /* reads the next non-blank line of the file. strips off any leading
+ space, any comments, and any trailing space. returns a lowercase
+ version of the line that has been malloc'd */
+ char line[1024];
+
+ while (fgets(line, sizeof(line), file) != NULL) {
+ char *temp;
+ int len;
+
+ /* strip off any comments, leading and trailing whitespace */
+ if (stripComments) {
+ if ((temp = strchr(line, '#')) != NULL)
+ *temp = 0;
+ }
+ len = strlen(line);
+ while (len > 0 && isspace((int) line[len-1])) {
+ len--;
+ line[len] = 0;
+ }
+ temp = line;
+ while (isspace((int) *temp))
+ temp++;
+ if (temp[0] == 0)
+ continue; /* blank line, move on */
+
+ if (lower)
+ return(StrdupLower(temp));
+ return(xstrdup(temp));
+ }
+
+ return(NULL);
+}
+/*-----------------------------------------------------------------*/
+char *
+GetLowerNextLine(FILE *file)
+{
+ return(GetNextLineBack(file, TRUE, TRUE));
+}
+/*-----------------------------------------------------------------*/
+char *
+GetNextLine(FILE *file)
+{
+ return(GetNextLineBack(file, FALSE, TRUE));
+}
+/*-----------------------------------------------------------------*/
+char *
+GetNextLineNoCommStrip(FILE *file)
+{
+ return(GetNextLineBack(file, FALSE, FALSE));
+}
+/*-----------------------------------------------------------------*/
+int
+DoesSuffixMatch(char *start, int len, char *suffix)
+{
+ int sufLen = strlen(suffix);
+
+ if (len < 1)
+ len = strlen(start);
+ if (len < sufLen)
+ return(FALSE);
+ if (strncasecmp(start+len-sufLen, suffix, sufLen))
+ return(FALSE);
+ return(TRUE);
+}
+/*-----------------------------------------------------------------*/
+int
+DoesDotlessSuffixMatch(char *start, int len, char *suffix)
+{
+ /* ignores any dots on end of start, suffix */
+ int sufLen = strlen(suffix);
+
+ if (len < 1)
+ len = strlen(start);
+
+ while (len > 1 && start[len-1] == '.')
+ len--;
+ while (sufLen > 1 && suffix[sufLen-1] == '.')
+ sufLen--;
+
+ if (len < sufLen)
+ return(FALSE);
+ if (strncasecmp(start+len-sufLen, suffix, sufLen))
+ return(FALSE);
+ return(TRUE);
+}
+
+/*-----------------------------------------------------------------*/
+/* */
+/* Bit Vector Implementation */
+/* */
+/*-----------------------------------------------------------------*/
+#define BIT_INDEX (0x0000001F)
+
+void
+SetBits(int* bits, int idx, int maxNum)
+{
+ if (idx > (maxNum << 5)) {
+ TRACE("Invalid index: %d", idx);
+ return;
+ }
+ bits[(idx >> 5)] |= (1 << (idx & BIT_INDEX));
+}
+/*-----------------------------------------------------------------*/
+int
+GetBits(int* bits, int idx, int maxNum)
+{
+ if (idx > (maxNum << 5)) {
+ TRACE("Invalid index: %d", idx);
+ return FALSE;
+ }
+ return (bits[(idx >> 5)] & (1 << (idx & BIT_INDEX)));
+}
+
+/*-----------------------------------------------------------------*/
+static inline int
+GetNumBits_I(int bitvec)
+{
+ int i, count;
+
+ for (i = 0, count = 0; i < 32; i++)
+ if (bitvec & (1 << i)) count++;
+ return count;
+}
+
+/*-----------------------------------------------------------------*/
+int
+GetNumBits(int* bitvecs, int maxNum)
+{
+ int i, count;
+
+ /* get the number of bits that have been set to 1 */
+ for (i = 0, count = 0; i < maxNum; i++)
+ count += GetNumBits_I(bitvecs[i]);
+ return count;
+}
+
+/*-----------------------------------------------------------------*/
+
+/* Logging & Trace support */
+
+/* buffer threshold : when the size hits this value, it flushes its content
+ to the file */
+#define LOG_BYTES_THRESHOLD (32*1024)
+
+/* this flag indicates that it preserves the base file name for the current
+ one, and changes its name when it actually closes it off */
+#define CHANGE_FILE_NAME_ON_SAVE 0x01
+
+/* size of the actual log buffer */
+#define LOG_BYTES_MAX (2*LOG_BYTES_THRESHOLD)
+
+/* log/trace book keeping information */
+typedef struct ExtendedLog {
+ char buffer[LOG_BYTES_MAX]; /* 64 KB */
+ int bytes; /* number of bytes written */
+ int filesize; /* number of bytes written into this file so far */
+ int fd; /* file descriptor */
+ char* sig; /* base file name */
+ int flags; /* flags */
+ time_t nextday;
+} ExtendedLog, *PExtendedLog;
+
+/* maximum single file size */
+int maxSingleLogSize = 100 * (1024*1024);
+
+static time_t
+GetNextLogFileName(char* file, int size, const char* sig)
+{
+#define COMPRESS_EXT1 ".bz2"
+#define COMPRESS_EXT2 ".gz"
+
+ struct tm cur_tm;
+ time_t cur_t;
+ int idx = 0;
+
+ cur_t = timeex(NULL);
+ cur_tm = *gmtime(&cur_t);
+
+ for (;;) {
+ /* check if .bz2 exists */
+ snprintf(file, size, "%s.%04d%02d%02d_%03d%s",
+ sig, cur_tm.tm_year+1900, cur_tm.tm_mon+1, cur_tm.tm_mday,
+ idx, COMPRESS_EXT1);
+
+ if (access(file, F_OK) == 0) {
+ idx++;
+ continue;
+ }
+
+ /* check if .gz exists */
+ snprintf(file, size, "%s.%04d%02d%02d_%03d%s",
+ sig, cur_tm.tm_year+1900, cur_tm.tm_mon+1, cur_tm.tm_mday,
+ idx++, COMPRESS_EXT2);
+
+ if (access(file, F_OK) == 0)
+ continue;
+
+ /* strip the extension and see if the (uncompressed) file exists */
+ file[strlen(file) - sizeof(COMPRESS_EXT2) + 1] = 0;
+ if (access(file, F_OK) != 0)
+ break;
+ }
+
+ /* calculate & return the next day */
+ cur_t -= (3600*cur_tm.tm_hour + 60*cur_tm.tm_min + cur_tm.tm_sec);
+ return cur_t + 60*60*24;
+
+#undef COMPRESS_EXT1
+#undef COMPRESS_EXT2
+}
+
+/*-----------------------------------------------------------------*/
+static void
+FlushBuffer(HANDLE file)
+{
+ /* write data into the file */
+ ExtendedLog* pel = (ExtendedLog *)file;
+ int written;
+
+ if (pel == NULL || pel->fd < 0)
+ return;
+
+ if ((written = write(pel->fd, pel->buffer, pel->bytes)) > 0) {
+ pel->bytes -= written;
+
+ /* if it hasn't written all data, then we need to move memory */
+ if (pel->bytes > 0)
+ memmove(pel->buffer, pel->buffer + written, pel->bytes);
+ pel->buffer[pel->bytes] = 0;
+ pel->filesize += written;
+ }
+
+ /* if the filesize is bigger than maxSignleLogSize, then close it off */
+ if (pel->filesize >= maxSingleLogSize)
+ OpenLogF(file);
+}
+
+/*-----------------------------------------------------------------*/
+HANDLE
+CreateLogFHandle(const char* signature, int change_file_name_on_save)
+{
+ ExtendedLog* pel;
+
+ if ((pel = (ExtendedLog *)xcalloc(1, sizeof(ExtendedLog))) == NULL)
+ NiceExit(-1, "failed");
+
+ pel->fd = -1;
+ pel->sig = xstrdup(signature);
+ if (pel->sig == NULL)
+ NiceExit(-1, "signature copying failed");
+ if (change_file_name_on_save)
+ pel->flags |= CHANGE_FILE_NAME_ON_SAVE;
+
+ return pel;
+}
+
+
+/*-----------------------------------------------------------------*/
+int
+OpenLogF(HANDLE file)
+{
+ char filename[1024];
+ ExtendedLog* pel = (ExtendedLog *)file;
+
+ if (pel == NULL)
+ return -1;
+
+ if (pel->fd != -1)
+ close(pel->fd);
+
+ pel->nextday = GetNextLogFileName(filename, sizeof(filename), pel->sig);
+
+ /* change the file name only at saving time
+ use pel->sig as current file name */
+ if (pel->flags & CHANGE_FILE_NAME_ON_SAVE) {
+ if (access(pel->sig, F_OK) == 0)
+ rename(pel->sig, filename);
+ strcpy(filename, pel->sig);
+ }
+
+ /* file opening */
+ if ((pel->fd = open(filename, O_RDWR | O_CREAT | O_APPEND,
+ S_IRUSR | S_IWUSR)) == -1) {
+ char errMessage[2048];
+ sprintf(errMessage, "couldn't open the extended log file %s\n", filename);
+ NiceExit(-1, errMessage);
+ }
+
+ /* reset the file size */
+ pel->filesize = 0;
+ return 0;
+}
+
+/*-----------------------------------------------------------------*/
+int
+WriteLog(HANDLE file, const char* data, int size, int forceFlush)
+{
+ ExtendedLog* pel = (ExtendedLog *)file;
+
+ /* if an error might occur, then stop here */
+ if (pel == NULL || pel->fd < 0 || size > LOG_BYTES_MAX)
+ return -1;
+
+ if (data != NULL) {
+ /* flush the previous data, if this data would overfill the buffer */
+ if (pel->bytes + size >= LOG_BYTES_MAX)
+ FlushBuffer(file);
+
+ /* write into the buffer */
+ memcpy(pel->buffer + pel->bytes, data, size);
+ pel->bytes += size;
+ }
+
+ /* need to flush ? */
+ if ((forceFlush && (pel->bytes > 0)) || (pel->bytes >= LOG_BYTES_THRESHOLD))
+ FlushBuffer(file);
+
+ return 0;
+}
+
+/*-----------------------------------------------------------------*/
+void
+DailyReopenLogF(HANDLE file)
+{
+ /* check if current file is a day long,
+ opens another for today's file */
+ ExtendedLog* pel = (ExtendedLog *)file;
+
+ if (pel && (timeex(NULL) >= pel->nextday)) {
+ FlushLogF(file); /* flush */
+ OpenLogF(file); /* close previous one & reopen today's */
+ }
+}
+/*-----------------------------------------------------------------*/
+int
+HandleToFileNo(HANDLE file)
+{
+ ExtendedLog* pel = (ExtendedLog *)file;
+
+ return (pel != NULL) ? pel->fd : -1;
+}
+/*-----------------------------------------------------------------*/
+#define TO_HOST_L(x) x = ntohl(x);
+#define TO_NETWORK_L(x) x = htonl(x);
+void
+HtoN_LocalQueryInfo(LocalQueryInfo *p)
+{
+ TO_NETWORK_L(p->lqi_size);
+ TO_NETWORK_L(p->lqi_id);
+ TO_NETWORK_L(p->lqi_cache);
+}
+/*----------------------------------------------------------------*/
+void
+NtoH_LocalQueryResult(LocalQueryResult* p)
+{
+ TO_HOST_L(p->lq_id);
+ TO_HOST_L(p->lq_ttl);
+}
+/*----------------------------------------------------------------*/
+int
+CoDNSGetHostByNameSync(const char *name, struct in_addr *res)
+{
+ static int fd = -1;
+ LocalQuery query;
+ int size;
+ LocalQueryResult result;
+
+ /* create a connection to CoDNS */
+ if (fd == -1 && (fd = MakeLoopbackConnection(CODNS_PORT, 0)) < 0) {
+ TRACE("CoDNS connection try has failed!\n");
+
+ /* try to resolve names using gethostbyname() */
+ {
+ struct hostent* hp;
+ if ((hp = gethostbyname(name)) == NULL)
+ NiceExit(-1, "gethostbyname also failed!");
+ if (res)
+ *res = *(struct in_addr *)hp->h_addr;
+ }
+ return 0;
+ }
+
+ memset(&query, 0, sizeof(query));
+ size = strlen(name) + 1;
+ query.lq_info.lqi_size = size; /* length of name */
+ query.lq_info.lqi_cache = TRUE;
+ strcpy(query.lq_name, name);
+ size += sizeof(query.lq_info) + sizeof(query.lq_zero);
+
+ /* send a query */
+ HtoN_LocalQueryInfo(&query.lq_info);
+ if (write(fd, &query, size) != size) {
+ close(fd);
+ fd = -1;
+ return(-1);
+ }
+
+ /* get answer */
+ do {
+ size = read(fd, &result, sizeof(result));
+ } while (size == -1 && errno == EINTR);
+ /* close(fd); */
+ NtoH_LocalQueryResult(&result);
+
+ if (size != sizeof(result))
+ return(-1);
+
+ *res = result.lq_address[0];
+ return 0;
+}
+/*-----------------------------------------------------------------*/
+void
+FeedbackDelay(struct timeval *start, float minSec, float maxSec)
+{
+ float diff = DiffTimeVal(start, NULL);
+ if (diff < minSec)
+ diff = minSec;
+ if (diff > maxSec)
+ diff = maxSec;
+ usleep((unsigned int)(diff * 1e6));
+}
+/*-----------------------------------------------------------------*/
+static int niceExitLogFD = -1;
+int
+NiceExitOpenLog(char *logName)
+{
+ /* log file to record exit reasons */
+ if (niceExitLogFD >= 0)
+ close(niceExitLogFD);
+
+ niceExitLogFD = open(logName, O_WRONLY | O_APPEND | O_CREAT,
+ S_IRUSR | S_IWUSR);
+ return((niceExitLogFD >= 0) ? SUCCESS : FAILURE);
+}
+/*-----------------------------------------------------------------*/
+void
+NiceExitBack(int val, char *reason, char *file, int line)
+{
+ char buf[1024];
+ time_t currT = time(NULL);
+
+ sprintf(buf, "[%s, %d] %.24s %s\n", file, line, ctime(&currT), reason);
+ if (hdebugLog) {
+ TRACE("%s", buf);
+ FlushLogF(hdebugLog);
+ }
+ else
+ fprintf(stderr, "%s", buf);
+
+ if (niceExitLogFD >= 0)
+ write(niceExitLogFD, buf, strlen(buf));
+ exit(val);
+}
+/*-----------------------------------------------------------------*/
+int
+WordCount(char *buf)
+{
+ int count = 0;
+ int wasSpace = TRUE;
+
+ while (*buf != '\0') {
+ int isSpace = isspace(*buf);
+ if (wasSpace && (!isSpace))
+ count++;
+ wasSpace = isSpace;
+ buf++;
+ }
+ return(count);
+}
+/*-----------------------------------------------------------------*/
+char *
+ReadFile(const char *filename)
+{
+ int dummySize;
+ return(ReadFileEx(filename, &dummySize));
+}
+/*-----------------------------------------------------------------*/
+char *
+ReadFileEx(const char *filename, int *size)
+{
+ /* allocate a buffer, read the file into it and
+ return the buffer */
+ char *content = NULL;
+ struct stat buf;
+ int fd;
+
+ *size = -1;
+ if (access(filename, R_OK) < 0
+ || stat(filename, &buf) < 0
+ || (fd = open(filename, O_RDONLY)) < 0) {
+ TRACE("opening captcha file %s failed\n", filename);
+ exit(-1);
+ }
+
+ if ((content = (char *)xmalloc(buf.st_size + 1)) == NULL) {
+ TRACE("memory alloc failed\n");
+ exit(-1);
+ }
+
+ if (read(fd, content, buf.st_size) != buf.st_size) {
+ TRACE("opening captcha test file failed\n");
+ exit(-1);
+ }
+ close(fd);
+ content[buf.st_size] = 0;
+ *size = buf.st_size;
+ return content;
+}
+/*-----------------------------------------------------------------*/
+char *
+MmapFile(const char *filename, int *size)
+{
+ /* allocate a buffer, read the file into it and
+ return the buffer */
+ char *content = NULL;
+ struct stat buf;
+ int fd;
+
+ *size = -1;
+ if (access(filename, R_OK) < 0
+ || stat(filename, &buf) < 0
+ || (fd = open(filename, O_RDONLY)) < 0) {
+ TRACE("opening captcha file %s failed\n", filename);
+ exit(-1);
+ }
+
+ content = (char *)mmap(NULL, buf.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ close(fd);
+ if (content != MAP_FAILED) {
+ *size = buf.st_size;
+ return content;
+ }
+ return(NULL);
+}
+/*-----------------------------------------------------------------*/
+unsigned int
+HashString(const char *name, unsigned int hash, int endOnQuery,
+ int skipLastIfDot)
+{
+ /* if endOnQuery, we stop the hashing when we hit the question mark.
+ if skipLastIfDot, we check the last component of the path to see
+ if it includes a dot, and it so, we skip it. if both are specified,
+ we first try the query, and if that exists, we don't trim the path */
+
+ int i;
+ int len;
+ char *temp;
+
+ if (name == NULL)
+ return 0;
+
+ len = strlen(name);
+ if (endOnQuery && (temp = strchr(name, '?')) != NULL)
+ len = temp - name;
+ else if (skipLastIfDot) {
+ /* first, find last component by searching backward */
+ if ((temp = strrchr(name, '/')) != NULL) {
+ /* now search forward for the dot */
+ if (strchr(temp, '.') != NULL)
+ len = temp - name;
+ }
+ }
+
+ for (i = 0; i < len; i ++)
+ hash += (_rotl(hash, 19) + name[i]);
+
+ return hash;
+}
+/*-------------------------------------------------------------*/
+unsigned int
+CalcAgentHash(const char* agent)
+{
+ char p[strlen(agent)+1];
+ int i;
+
+ if (agent == NULL)
+ return 0;
+
+ /* we remove all spaces */
+ for (i = 0; *agent; agent++) {
+ if (isspace(*agent))
+ continue;
+ p[i++] = *agent;
+ }
+ p[i] = 0;
+
+ return HashString(p, 0, FALSE, FALSE);
+}
+/*-----------------------------------------------------------------*/
+char *
+ZapSpacesAndZeros(char *src)
+{
+ /* get rid of excess spaces between words, and remove any trailing
+ (post-decimal) zeros from floating point numbers */
+ static char smallLine[4096];
+ char *dst = smallLine;
+ char *word;
+ int addSpace = FALSE;
+
+ while (src != NULL &&
+ (word = GetWord((const unsigned char*)src, 0)) != NULL) {
+ char *temp;
+ int isDotNumber = TRUE;
+
+ src = GetField((const unsigned char*)src, 1); /* advance to next */
+
+ /* check to make sure it has exactly one decimal point */
+ if ((temp = strchr(word, '.')) != NULL &&
+ (temp = strchr(temp+1, '.')) == NULL) {
+ /* make sure it's all digits or the dot */
+ for (temp = word; *temp != '\0'; temp++) {
+ if (!(isdigit(*temp) || *temp == '.')) {
+ isDotNumber = FALSE;
+ break;
+ }
+ }
+ }
+ else
+ isDotNumber = FALSE;
+
+ if (isDotNumber) {
+ /* strip off any trailing zeros and possibly the decimal point */
+ int len = strlen(word) - 1;
+
+ while (word[len] == '0') {
+ word[len] = '\0';
+ len--;
+ }
+ if (word[len] == '.')
+ word[len] = '\0';
+ }
+
+ if (addSpace)
+ sprintf(dst, " %s", word);
+ else
+ sprintf(dst, "%s", word);
+ dst += strlen(dst);
+ addSpace = TRUE;
+ xfree(word);
+ }
+
+ *dst = 0;
+ return(smallLine);
+}
+/*-----------------------------------------------------------------*/
--- /dev/null
+#ifndef _APPLIB_H_
+#define _APPLIB_H_
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/mman.h>
+#include <netdb.h>
+#include <limits.h>
+#include "appdef.h"
+
+float DiffTimeVal(const struct timeval *start, const struct timeval *end);
+
+int CreatePrivateAcceptSocketEx(int portNum,
+ int nonBlocking, int loopbackOnly);
+int CreatePrivateAcceptSocket(int portNum, int nonBlocking);
+int CreatePublicUDPSocket(int portNum);
+int MakeLoopbackConnection(int portNum, int nonBlocking);
+int MakeConnection(char *name, in_addr_t netAddr,
+ int portNum, int nonBlocking);
+
+char *GetField(const unsigned char *start, int whichField);
+char *GetWord(const unsigned char *start, int whichWord);
+char *GetWordEx(const unsigned char *start, int whichWord,
+ char* dest, int max);
+int WordCount(char *buf);
+char *ZapSpacesAndZeros(char *src);
+
+int Base36Digit(int a);
+int Base36To10(int a);
+int PopCount(int val);
+int PopCountChar(int val);
+int LogValChar(int val);
+
+const char *StringOrNull(const char *s);
+char *strchars(const char *string, const char *list);
+char *strnchars(const char *string, int length, const char *list);
+
+#if defined(OS_FREEBSD) || defined(OS_DARWIN)
+#define HAS_STRNSTR
+#endif
+
+#ifndef HAS_STRNSTR
+char *strnstr(const char * s1, int s1_len, const char * s2);
+#endif
+
+char * strncasestr(const char * s1, int s1_len, const char * s2);
+int strncspn(const char* string, int length, const char* reject);
+char *strnchr(const char *string, int length, const char needle);
+char *StrdupLower(const char *orig);
+void StrcpyLower(char *dest, const char *src);
+void StrcpyLowerExcept(char *dest, int dest_max, const char* src, const char* except);
+char *GetLowerNextLine(FILE *file);
+char *GetNextLine(FILE *file);
+char *GetNextLineNoCommStrip(FILE *file);
+int DoesSuffixMatch(char *start, int len, char *suffix);
+int DoesDotlessSuffixMatch(char *start, int len, char *suffix);
+
+/* resolves a name using CoDNS */
+#include "codns.h"
+void HtoN_LocalQueryInfo(LocalQueryInfo *p);
+void NtoH_LocalQueryResult(LocalQueryResult* p);
+int CoDNSGetHostByNameSync(const char *name, struct in_addr *res);
+
+#ifdef OS_LINUX
+#include <alloca.h>
+#endif
+
+#include <string.h>
+
+/* allocate stack memory to copy "src" to "dest" in lower cases */
+#define LOCAL_STR_DUP_LOWER(dest, src) \
+ { dest = alloca(strlen(src) + 1); \
+ StrcpyLower(dest, src); \
+ }
+
+/* allocate stack memory to copy "src" to "dest" */
+#define LOCAL_STR_DUP(dest, src) \
+ { dest = alloca(strlen(src) + 1); \
+ strcpy(dest, src); \
+ }
+
+/* release memory pointer after checking NULL */
+#define FREE_PTR(x) if ((x) != NULL) { xfree(x);}
+
+/* Bit vector implementation */
+void SetBits(int* bits, int idx, int maxNum);
+int GetBits(int* bits, int idx, int maxNum);
+int GetNumBits(int* bitvecs, int maxNum);
+
+/* extended logging */
+typedef void* HANDLE;
+
+HANDLE CreateLogFHandle(const char* signature, int change_file_name_on_save);
+int OpenLogF(HANDLE file);
+int WriteLog(HANDLE file, const char* data, int size, int forceFlush);
+void DailyReopenLogF(HANDLE file);
+int HandleToFileNo(HANDLE file);
+
+/* flush the buffer */
+#define FlushLogF(h) WriteLog(h, NULL, 0, TRUE)
+
+/* maximum single log file size, defined in applib.c */
+extern int maxSingleLogSize;
+
+#include "gettimeofdayex.h"
+
+void FeedbackDelay(struct timeval *start, float minSec, float maxSec);
+
+int NiceExitOpenLog(char *logName);
+void NiceExitBack(int val, char *reason, char *file, int line) __attribute__ ((noreturn));
+#define NiceExit(val, reason) NiceExitBack(val, reason, __FILE__, __LINE__)
+
+char *ReadFile(const char *filename);
+char *ReadFileEx(const char *filename, int *size);
+char *MmapFile(const char *filename, int *size);
+
+/* use 32-bit unsigned integer, higher bits are thrown away */
+#define WORD_BITS 32
+#define MASK 0xFFFFFFFF
+/* bitwise left rotate operator */
+#define _rotl(Val, Bits) ((((Val)<<(Bits)) | (((Val) & MASK)>>(32 - (Bits)))) & MASK)
+
+unsigned int HashString(const char *name, unsigned int hash,
+ int endOnQuery, int skipLastIfDot);
+unsigned int CalcAgentHash(const char* agent);
+#endif
+
--- /dev/null
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include "applib.h"
+
+#define CONF_FILE "/etc/codemux/codemux.conf"
+#define DEMUX_PORT 80
+#define REAL_WEBSERVER_CONFLINE "* root 1080"
+#define TARG_SETSIZE 4096
+
+/* set aside some small number of fds for us, allow the rest for
+ connections */
+#define MAX_CONNS ((TARG_SETSIZE-20)/2)
+
+/* no single service can take more than half the connections */
+#define SERVICE_MAX (MAX_CONNS/2)
+
+/* how many total connections before we get concerned about fairness
+ among them */
+#define FAIRNESS_CUTOFF (MAX_CONNS * 0.85)
+
+
+typedef struct FlowBuf {
+ int fb_refs; /* num refs */
+ char *fb_buf; /* actual buffer */
+ int fb_used; /* bytes used in buffer */
+} FlowBuf;
+#define FB_SIZE 3800 /* max usable size */
+#define FB_ALLOCSIZE 4000 /* extra to include IP address */
+
+typedef struct SockInfo {
+ int si_peerFd; /* fd of peer */
+ struct in_addr si_cliAddr; /* address of client */
+ int si_blocked; /* are we blocked? */
+ int si_needsHeaderSince; /* since when are we waiting for a header */
+ int si_whichService; /* index of service */
+ FlowBuf *si_readBuf; /* read data into this buffer */
+ FlowBuf *si_writeBuf; /* drain this buffer for writing */
+} SockInfo;
+
+static SockInfo sockInfo[TARG_SETSIZE]; /* fd number of peer socket */
+
+typedef struct ServiceSig {
+ char *ss_host; /* suffix in host */
+ char *ss_slice;
+ short ss_port;
+ int ss_slicePos; /* position in slices array */
+} ServiceSig;
+
+static ServiceSig *serviceSig;
+static int numServices;
+static int confFileReadTime;
+static int now;
+
+typedef struct SliceInfo {
+ char *si_sliceName;
+ int si_inUse; /* do any services refer to this? */
+ int si_numConns;
+ int si_xid;
+} SliceInfo;
+
+static SliceInfo *slices;
+static int numSlices;
+static int numActiveSlices;
+static int numTotalSliceConns;
+static int anySliceXidsNeeded;
+
+typedef struct OurFDSet {
+ long __fds_bits[TARG_SETSIZE/32];
+} OurFDSet;
+static OurFDSet masterReadSet, masterWriteSet;
+static int highestSetFd;
+static int numNeedingHeaders; /* how many conns waiting on headers? */
+
+static int numForks;
+
+HANDLE hdebugLog;
+
+#ifndef SO_SETXID
+#define SO_SETXID SO_PEERCRED
+#endif
+/*-----------------------------------------------------------------*/
+static SliceInfo *
+ServiceToSlice(int whichService)
+{
+ if (whichService < 0)
+ return(NULL);
+ return(&slices[serviceSig[whichService].ss_slicePos]);
+}
+/*-----------------------------------------------------------------*/
+static void
+DumpStatus(int fd)
+{
+ char buf[65535];
+ char *start = buf;
+ int i;
+ int len;
+
+ sprintf(start,
+ "numForks %d, numActiveSlices %d, numTotalSliceConns %d\n"
+ "numNeedingHeaders %d, anySliceXidsNeeded %d\n",
+ numForks, numActiveSlices, numTotalSliceConns,
+ numNeedingHeaders, anySliceXidsNeeded);
+ start += strlen(start);
+
+ for (i = 0; i < numSlices; i++) {
+ SliceInfo *si = &slices[i];
+ sprintf(start, "Slice %d: %s xid %d, %d conns, inUse %d\n",
+ i, si->si_sliceName, si->si_xid, si->si_numConns,
+ si->si_inUse);
+ start += strlen(start);
+ }
+
+ for (i = 0; i < numServices; i++) {
+ ServiceSig *ss = &serviceSig[i];
+ sprintf(start, "Service %d: %s %s port %d, slice# %d\n", i, ss->ss_host,
+ ss->ss_slice, (int) ss->ss_port, ss->ss_slicePos);
+ start += strlen(start);
+ }
+
+ len = start - buf;
+ write(fd, buf, len);
+}
+/*-----------------------------------------------------------------*/
+static void
+GetSliceXids(void)
+{
+ /* walks through /etc/passwd, and gets the uid for every slice we
+ have */
+ FILE *f;
+ char *line;
+ int i;
+
+ if (!anySliceXidsNeeded)
+ return;
+
+ for (i = 0; i < numSlices; i++) {
+ SliceInfo *si = &slices[i];
+ si->si_inUse = 0;
+ }
+ for (i = 0; i < numServices; i++) {
+ SliceInfo *si = ServiceToSlice(i);
+ if (si != NULL)
+ si->si_inUse++;
+ }
+
+ if ((f = fopen("/etc/passwd", "r")) == NULL)
+ return;
+
+ while ((line = GetNextLine(f)) != NULL) {
+ char *temp;
+ int xid;
+
+ if ((temp = strchr(line, ':')) == NULL)
+ continue; /* weird line */
+ *temp = '\0'; /* terminate slice name */
+ temp++;
+ if ((temp = strchr(temp+1, ':')) == NULL)
+ continue; /* weird line */
+ if ((xid = atoi(temp+1)) < 1)
+ continue; /* weird xid */
+
+ /* we've got a slice name and xid, let's try to match */
+ for (i = 0; i < numSlices; i++) {
+ if (slices[i].si_xid == 0 &&
+ strcasecmp(slices[i].si_sliceName, line) == 0) {
+ slices[i].si_xid = xid;
+ break;
+ }
+ }
+ }
+
+ /* assume service 0 is the root service, and don't check it since
+ it'll have xid zero */
+ anySliceXidsNeeded = FALSE;
+ for (i = 1; i < numSlices; i++) {
+ if (slices[i].si_xid == 0 && slices[i].si_inUse > 0) {
+ anySliceXidsNeeded = TRUE;
+ break;
+ }
+ }
+
+ fclose(f);
+}
+/*-----------------------------------------------------------------*/
+static void
+SliceConnsInc(int whichService)
+{
+ SliceInfo *si = ServiceToSlice(whichService);
+
+ if (si == NULL)
+ return;
+ numTotalSliceConns++;
+ si->si_numConns++;
+ if (si->si_numConns == 1)
+ numActiveSlices++;
+}
+/*-----------------------------------------------------------------*/
+static void
+SliceConnsDec(int whichService)
+{
+ SliceInfo *si = ServiceToSlice(whichService);
+
+ if (si == NULL)
+ return;
+ numTotalSliceConns--;
+ si->si_numConns--;
+ if (si->si_numConns == 0)
+ numActiveSlices--;
+}
+/*-----------------------------------------------------------------*/
+static int
+WhichSlicePos(char *slice)
+{
+ /* adds the new slice if necessary, returns the index into slice
+ array. Never change the ordering of existing slices */
+ int i;
+ static int numSlicesAlloc;
+
+ for (i = 0; i < numSlices; i++) {
+ if (strcasecmp(slice, slices[i].si_sliceName) == 0)
+ return(i);
+ }
+
+ if (numSlices >= numSlicesAlloc) {
+ numSlicesAlloc = MAX(8, numSlicesAlloc * 2);
+ slices = realloc(slices, numSlicesAlloc * sizeof(SliceInfo));
+ }
+
+ memset(&slices[numSlices], 0, sizeof(SliceInfo));
+ slices[numSlices].si_sliceName = strdup(slice);
+ numSlices++;
+ return(numSlices-1);
+}
+/*-----------------------------------------------------------------*/
+static void
+ReadConfFile(void)
+{
+ int numAlloc = 0;
+ int num = 0;
+ ServiceSig *servs = NULL;
+ FILE *f;
+ char *line = NULL;
+ struct stat statBuf;
+ int i;
+
+ if (stat(CONF_FILE, &statBuf) != 0) {
+ fprintf(stderr, "failed stat on codemux.conf\n");
+ if (numServices)
+ return;
+ exit(-1);
+ }
+ if (statBuf.st_mtime == confFileReadTime)
+ return;
+
+ if ((f = fopen(CONF_FILE, "r")) == NULL) {
+ fprintf(stderr, "failed reading codemux.conf\n");
+ if (numServices)
+ return;
+ exit(-1);
+ }
+
+ /* conf file entries look like
+ coblitz.codeen.org princeton_coblitz 3125
+ */
+
+ while (1) {
+ ServiceSig serv;
+ int port;
+ if (line != NULL)
+ free(line);
+
+ /* on the first pass, put in a fake entry for apache */
+ if (num == 0)
+ line = strdup(REAL_WEBSERVER_CONFLINE);
+ else {
+ if ((line = GetNextLine(f)) == NULL)
+ break;
+ }
+
+ memset(&serv, 0, sizeof(serv));
+ if (WordCount(line) != 3) {
+ fprintf(stderr, "bad line: %s\n", line);
+ continue;
+ }
+ serv.ss_port = port = atoi(GetField(line, 2));
+ if (port < 1 || port > 65535 || port == DEMUX_PORT) {
+ fprintf(stderr, "bad port: %s\n", line);
+ continue;
+ }
+
+ serv.ss_host = GetWord(line, 0);
+ serv.ss_slice = GetWord(line, 1);
+ if (num >= numAlloc) {
+ numAlloc = MAX(numAlloc * 2, 8);
+ servs = realloc(servs, numAlloc * sizeof(ServiceSig));
+ }
+ serv.ss_slicePos = WhichSlicePos(serv.ss_slice);
+ if (slices[serv.ss_slicePos].si_inUse == 0 &&
+ slices[serv.ss_slicePos].si_xid < 1)
+ anySliceXidsNeeded = TRUE; /* if new/inactive, we need xid */
+ servs[num] = serv;
+ num++;
+ }
+
+ fclose(f);
+
+ if (num == 1) {
+ if (numServices == 0) {
+ fprintf(stderr, "nothing found in codemux.conf\n");
+ exit(-1);
+ }
+ return;
+ }
+
+ for (i = 0; i < numServices; i++) {
+ free(serviceSig[i].ss_host);
+ free(serviceSig[i].ss_slice);
+ }
+ free(serviceSig);
+ serviceSig = servs;
+ numServices = num;
+ confFileReadTime = statBuf.st_mtime;
+}
+/*-----------------------------------------------------------------*/
+static char *err400BadRequest =
+"HTTP/1.0 400 Bad Request\r\n"
+"Content-Type: text/html\r\n"
+"\r\n"
+"You are trying to access a PlanetLab node, and your\n"
+"request header exceeded the allowable size. Please\n"
+"try again if you believe this error is temporary.\n";
+/*-----------------------------------------------------------------*/
+static char *err503Unavailable =
+"HTTP/1.0 503 Service Unavailable\r\n"
+"Content-Type: text/html\r\n"
+"\r\n"
+"You are trying to access a PlanetLab node, but the service\n"
+"seems to be unavailable at the moment. Please try again.\n";
+/*-----------------------------------------------------------------*/
+static char *err503TooBusy =
+"HTTP/1.0 503 Service Unavailable\r\n"
+"Content-Type: text/html\r\n"
+"\r\n"
+"You are trying to access a PlanetLab node, but the service\n"
+"seems to be overloaded at the moment. Please try again.\n";
+/*-----------------------------------------------------------------*/
+static void
+SetFd(int fd, OurFDSet *set)
+{
+ if (highestSetFd < fd)
+ highestSetFd = fd;
+ FD_SET(fd, set);
+}
+/*-----------------------------------------------------------------*/
+static void
+ClearFd(int fd, OurFDSet *set)
+{
+ FD_CLR(fd, set);
+}
+/*-----------------------------------------------------------------*/
+static int
+RemoveHeader(char *lower, char *real, int totalSize, char *header)
+{
+ /* returns number of characters removed */
+ char h2[256];
+ int start, end, len;
+ char *temp, *conn;
+
+ sprintf(h2, "\n%s", header);
+
+ if ((conn = strstr(lower, h2)) == NULL)
+ return(0);
+
+ conn++;
+ /* determine how many characters to remove */
+ if ((temp = strchr(conn, '\n')) != NULL)
+ len = (temp - conn) + 1;
+ else
+ len = strlen(conn) + 1;
+ start = conn - lower;
+ end = start + len;
+ memmove(&real[start], &real[end], totalSize - end);
+ memmove(&lower[start], &lower[end], totalSize - end);
+
+ return(len);
+}
+/*-----------------------------------------------------------------*/
+static int
+InsertHeader(char *buf, int totalSize, char *header)
+{
+ /* returns number of bytes inserted */
+
+ char h2[256];
+ char *temp;
+ int len;
+
+ sprintf(h2, "%s\r\n", header);
+ len = strlen(h2);
+
+ /* if we don't encounter a \n, it means that we have only a single
+ line, and we'd converted the \n to a \0 */
+ if ((temp = strchr(buf, '\n')) == NULL)
+ temp = strchr(buf, '\0');
+ temp++;
+
+ memmove(temp + len, temp, totalSize - (temp - buf));
+ memcpy(temp, h2, len);
+
+ return(len);
+}
+/*-----------------------------------------------------------------*/
+static int
+FindService(FlowBuf *fb, int *whichService, struct in_addr addr)
+{
+ char *end;
+ char *lowerBuf;
+ char *hostVal;
+ char *buf = fb->fb_buf;
+ char orig[256];
+#if 0
+ char *url;
+ int i;
+ int len;
+#endif
+
+ if (strstr(buf, "\n\r\n") == NULL && strstr(buf, "\n\n") == NULL)
+ return(FAILURE);
+
+ /* insert client info after first line */
+ sprintf(orig, "X-CoDemux-Client: %s", inet_ntoa(addr));
+ fb->fb_used += InsertHeader(buf, fb->fb_used + 1, orig);
+
+ /* get just the header, so we can work on it */
+ LOCAL_STR_DUP_LOWER(lowerBuf, buf);
+ if ((end = strstr(lowerBuf, "\n\r\n")) == NULL)
+ end = strstr(lowerBuf, "\n\n");
+ *end = '\0';
+
+ /* remove any existing connection, keep-alive headers, add ours */
+ fb->fb_used -= RemoveHeader(lowerBuf, buf, fb->fb_used + 1, "keep-alive:");
+ fb->fb_used -= RemoveHeader(lowerBuf, buf, fb->fb_used + 1, "connection:");
+ fb->fb_used += InsertHeader(buf, fb->fb_used + 1, "Connection: close");
+ InsertHeader(lowerBuf, fb->fb_used + 1, "connection: close");
+
+ /* isolate host, see if it matches */
+ if ((hostVal = strstr(lowerBuf, "\nhost:")) != NULL) {
+ int i;
+ hostVal += strlen("\nhost:");
+ if ((end = strchr(hostVal, '\n')) != NULL)
+ *end = '\0';
+ if ((end = strchr(hostVal, ':')) != NULL)
+ *end = '\0';
+ while (isspace(*hostVal))
+ hostVal++;
+ if (strlen(hostVal) > 0) {
+ hostVal = GetWord(hostVal, 0);
+ for (i = 1; i < numServices; i++) {
+ if (serviceSig[i].ss_host != NULL &&
+ DoesDotlessSuffixMatch(hostVal, 0, serviceSig[i].ss_host)) {
+ *whichService = i;
+ free(hostVal);
+ /* printf("%s", buf); */
+ return(SUCCESS);
+ }
+ }
+ free(hostVal);
+ }
+ }
+
+#if 0
+ /* see if URL prefix matches */
+ if ((end = strchr(lowerBuf, '\n')) != NULL)
+ *end = 0;
+ if ((url = GetField(lowerBuf, 1)) == NULL ||
+ url[0] != '/') {
+ /* bad request - let apache handle it ? */
+ *whichService = 0;
+ return(SUCCESS);
+ }
+ url++; /* skip the leading slash */
+ for (i = 1; i < numServices; i++) {
+ if (serviceSig[i].ss_prefix != NULL &&
+ (len = strlen(serviceSig[i].ss_prefix)) > 0 &&
+ strncmp(url, serviceSig[i].ss_prefix, len) == 0 &&
+ (url[len] == ' ' || url[len] == '/')) {
+ int startPos = url - lowerBuf;
+ int stripLen = len + ((url[len] == '/') ? 1 : 0);
+ /* strip out prefix */
+ fb->fb_used -= stripLen;
+ memmove(&buf[startPos], &buf[startPos+stripLen],
+ fb->fb_used + 1 - startPos);
+ /* printf("%s", buf); */
+ *whichService = i;
+ return(SUCCESS);
+ }
+ }
+#endif
+
+ /* default to first service */
+ *whichService = 0;
+ return(SUCCESS);
+}
+/*-----------------------------------------------------------------*/
+static int
+StartConnect(int origFD, int whichService)
+{
+ int sock;
+ struct sockaddr_in dest;
+ SockInfo *si;
+
+ /* create socket */
+ if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) {
+ return(FAILURE);
+ }
+
+ /* make socket non-blocking */
+ if (fcntl(sock, F_SETFL, O_NONBLOCK) < 0) {
+ close(sock);
+ return(FAILURE);
+ }
+
+ /* set addr structure */
+ memset(&dest, 0, sizeof(dest));
+ dest.sin_family = AF_INET;
+ dest.sin_port = htons(serviceSig[whichService].ss_port);
+ dest.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+ /* start connection process - we should be told that it's in
+ progress */
+ if (connect(sock, (struct sockaddr *) &dest, sizeof(dest)) != -1 ||
+ errno != EINPROGRESS) {
+ close(sock);
+ return(FAILURE);
+ }
+
+ SetFd(sock, &masterWriteSet); /* determine when connect finishes */
+ sockInfo[origFD].si_peerFd = sock;
+ si = &sockInfo[sock];
+ memset(si, 0, sizeof(SockInfo));
+ si->si_peerFd = origFD;
+ si->si_blocked = TRUE; /* still connecting */
+ si->si_whichService = whichService;
+ si->si_writeBuf = sockInfo[origFD].si_readBuf;
+ sockInfo[origFD].si_readBuf->fb_refs++;
+ if (whichService >= 0)
+ SliceConnsInc(whichService);
+
+ return(SUCCESS);
+}
+/*-----------------------------------------------------------------*/
+static int
+WriteAvailData(int fd)
+{
+ SockInfo *si = &sockInfo[fd];
+ FlowBuf *fb = si->si_writeBuf;
+ int res;
+
+ /* printf("trying to write fd %d\n", fd); */
+ if (fb->fb_used < 1 || si->si_blocked)
+ return(SUCCESS);
+
+ /* printf("trying to write %d bytes\n", fb->fb_used); */
+ /* write(STDOUT_FILENO, fb->fb_buf, fb->fb_used); */
+ if ((res = write(fd, fb->fb_buf, fb->fb_used)) > 0) {
+ fb->fb_used -= res;
+ if (fb->fb_used > 0) {
+ /* couldn't write all - assume blocked */
+ memmove(fb->fb_buf, &fb->fb_buf[res], fb->fb_used);
+ si->si_blocked = TRUE;
+ SetFd(fd, &masterWriteSet);
+ }
+ /* printf("wrote %d\n", res); */
+ return(SUCCESS);
+ }
+
+ /* we might have been full but didn't realize it */
+ if (res == -1 && errno == EAGAIN) {
+ si->si_blocked = TRUE;
+ SetFd(fd, &masterWriteSet);
+ return(SUCCESS);
+ }
+
+ /* otherwise, assume the worst */
+ return(FAILURE);
+}
+/*-----------------------------------------------------------------*/
+static OurFDSet socksToCloseVec;
+static int numSocksToClose;
+static int whichSocksToClose[TARG_SETSIZE];
+/*-----------------------------------------------------------------*/
+static void
+CloseSock(int fd)
+{
+ if (FD_ISSET(fd, &socksToCloseVec))
+ return;
+ SetFd(fd, &socksToCloseVec);
+ whichSocksToClose[numSocksToClose] = fd;
+ numSocksToClose++;
+}
+/*-----------------------------------------------------------------*/
+static void
+DecBuf(FlowBuf *buf)
+{
+ if (buf == NULL)
+ return;
+ buf->fb_refs--;
+ if (buf->fb_refs == 0) {
+ free(buf->fb_buf);
+ free(buf);
+ }
+}
+/*-----------------------------------------------------------------*/
+static void
+ReallyCloseSocks(void)
+{
+ int i;
+
+ memset(&socksToCloseVec, 0, sizeof(socksToCloseVec));
+
+ for (i = 0; i < numSocksToClose; i++) {
+ int fd = whichSocksToClose[i];
+ close(fd);
+ DecBuf(sockInfo[fd].si_readBuf);
+ DecBuf(sockInfo[fd].si_writeBuf);
+ ClearFd(fd, &masterReadSet);
+ ClearFd(fd, &masterWriteSet);
+ if (sockInfo[fd].si_needsHeaderSince) {
+ sockInfo[fd].si_needsHeaderSince = 0;
+ numNeedingHeaders--;
+ }
+ if (sockInfo[fd].si_whichService >= 0) {
+ SliceConnsDec(sockInfo[fd].si_whichService);
+ sockInfo[fd].si_whichService = -1;
+ }
+ }
+ numSocksToClose = 0;
+}
+/*-----------------------------------------------------------------*/
+static void
+SocketReadyToRead(int fd)
+{
+ SockInfo *si = &sockInfo[fd];
+ int spaceLeft;
+ FlowBuf *fb;
+ int res;
+
+ /* if peer is closed, close ourselves */
+ if (si->si_peerFd < 0 && (!si->si_needsHeaderSince)) {
+ CloseSock(fd);
+ return;
+ }
+
+ if ((fb = si->si_readBuf) == NULL) {
+ fb = si->si_readBuf = calloc(1, sizeof(FlowBuf));
+ fb->fb_refs = 1;
+ if (si->si_peerFd >= 0) {
+ sockInfo[si->si_peerFd].si_writeBuf = fb;
+ fb->fb_refs = 2;
+ }
+ }
+
+ if (fb->fb_buf == NULL)
+ fb->fb_buf = malloc(FB_ALLOCSIZE);
+
+ /* determine read buffer size - if 0, then block reads and return */
+ if ((spaceLeft = FB_SIZE - fb->fb_used) < 0) {
+ if (si->si_needsHeaderSince) {
+ write(fd, err400BadRequest, strlen(err400BadRequest));
+ CloseSock(fd);
+ return;
+ }
+ else {
+ ClearFd(fd, &masterReadSet);
+ return;
+ }
+ }
+
+ /* read as much as allowed, and is available */
+ if ((res = read(fd, &fb->fb_buf[fb->fb_used], spaceLeft)) == 0) {
+ CloseSock(fd);
+ if (fb->fb_used == 0 && si->si_peerFd >= 0) {
+ CloseSock(si->si_peerFd);
+ si->si_peerFd = -1;
+ }
+ return;
+ }
+ if (res == -1) {
+ if (errno == EAGAIN)
+ return;
+ CloseSock(fd);
+ if (fb->fb_used == 0 && si->si_peerFd >= 0) {
+ CloseSock(si->si_peerFd);
+ si->si_peerFd = -1;
+ }
+ return;
+ }
+ fb->fb_used += res;
+ fb->fb_buf[fb->fb_used] = 0; /* terminate it for convenience */
+ printf("sock %d, read %d, total %d\n", fd, res, fb->fb_used);
+
+ /* if we need header, check if we've gotten it. if so, do
+ modifications and continue. if not, check if we've read the
+ maximum, and if so, fail */
+ if (si->si_needsHeaderSince) {
+ int whichService;
+ SliceInfo *slice;
+
+#define STATUS_REQ "GET /codemux/status.txt"
+ if (strncasecmp(fb->fb_buf, STATUS_REQ, sizeof(STATUS_REQ)-1) == 0) {
+ DumpStatus(fd);
+ CloseSock(fd);
+ return;
+ }
+
+ printf("trying to find service\n");
+ if (FindService(fb, &whichService, si->si_cliAddr) != SUCCESS)
+ return;
+ printf("found service %d\n", whichService);
+ slice = ServiceToSlice(whichService);
+
+ /* no service can have more than some absolute max number of
+ connections. Also, when we're too busy, start enforcing
+ fairness across the servers */
+ if (slice->si_numConns > SERVICE_MAX ||
+ (numTotalSliceConns > FAIRNESS_CUTOFF &&
+ slice->si_numConns > MAX_CONNS/numActiveSlices)) {
+ write(fd, err503TooBusy, strlen(err503TooBusy));
+ CloseSock(fd);
+ return;
+ }
+
+ if (slice->si_xid > 0) {
+ setsockopt(fd, SOL_SOCKET, SO_SETXID,
+ &slice->si_xid, sizeof(slice->si_xid));
+ fprintf(stderr, "setsockopt() with XID = %d name = %s\n",
+ slice->si_xid, slice->si_sliceName);
+ }
+
+ si->si_needsHeaderSince = 0;
+ numNeedingHeaders--;
+ if (StartConnect(fd, whichService) != SUCCESS) {
+ write(fd, err503Unavailable, strlen(err503Unavailable));
+ CloseSock(fd);
+ return;
+ }
+ return;
+ }
+
+ /* write anything possible */
+ if (WriteAvailData(si->si_peerFd) != SUCCESS) {
+ /* assume the worst and close */
+ CloseSock(fd);
+ CloseSock(si->si_peerFd);
+ si->si_peerFd = -1;
+ }
+}
+/*-----------------------------------------------------------------*/
+static void
+SocketReadyToWrite(int fd)
+{
+ SockInfo *si = &sockInfo[fd];
+
+ /* unblock it and read what it has */
+ si->si_blocked = FALSE;
+ ClearFd(fd, &masterWriteSet);
+ SetFd(fd, &masterReadSet);
+
+ /* enable reading on peer just in case it was off */
+ if (si->si_peerFd >= 0)
+ SetFd(si->si_peerFd, &masterReadSet);
+
+ /* if we have data, write it */
+ if (WriteAvailData(fd) != SUCCESS) {
+ /* assume the worst and close */
+ CloseSock(fd);
+ if (si->si_peerFd >= 0) {
+ CloseSock(si->si_peerFd);
+ si->si_peerFd = -1;
+ }
+ return;
+ }
+
+ /* if peer is closed and we're done writing, we should close */
+ if (si->si_peerFd < 0 && si->si_writeBuf->fb_used == 0)
+ CloseSock(fd);
+}
+/*-----------------------------------------------------------------*/
+static void
+CloseReqlessConns(void)
+{
+ static int lastSweep;
+ int maxAge;
+ int i;
+
+ if (lastSweep == now)
+ return;
+ lastSweep = now;
+
+ if (numTotalSliceConns + numNeedingHeaders > MAX_CONNS ||
+ numNeedingHeaders > TARG_SETSIZE/20) {
+ /* second condition is probably an attack - close aggressively */
+ maxAge = 5;
+ }
+ else if (numTotalSliceConns + numNeedingHeaders > FAIRNESS_CUTOFF ||
+ numNeedingHeaders > TARG_SETSIZE/40) {
+ /* sweep a little aggressively */
+ maxAge = 10;
+ }
+ else if (numNeedingHeaders > TARG_SETSIZE/80) {
+ /* just sweep to close strays */
+ maxAge = 30;
+ }
+ else {
+ /* too little gained - not worth sweeping */
+ return;
+ }
+
+ /* if it's too old, close it */
+ for (i = 0; i < highestSetFd+1; i++) {
+ if (sockInfo[i].si_needsHeaderSince &&
+ (now - sockInfo[i].si_needsHeaderSince) > maxAge)
+ CloseSock(i);
+ }
+}
+/*-----------------------------------------------------------------*/
+static void
+MainLoop(int lisSock)
+{
+ int i;
+ OurFDSet tempReadSet, tempWriteSet;
+ int res;
+ int lastConfCheck = 0;
+
+ signal(SIGPIPE, SIG_IGN);
+
+ while (1) {
+ int newSock;
+ int ceiling;
+ struct timeval timeout;
+
+ now = time(NULL);
+
+ if (now - lastConfCheck > 300) {
+ ReadConfFile();
+ GetSliceXids(); /* always call - in case new slices created */
+ lastConfCheck = now;
+ }
+
+ /* see if there's any activity */
+ tempReadSet = masterReadSet;
+ tempWriteSet = masterWriteSet;
+
+ /* trim it down if needed */
+ while (highestSetFd > 1 &&
+ (!FD_ISSET(highestSetFd, &tempReadSet)) &&
+ (!FD_ISSET(highestSetFd, &tempWriteSet)))
+ highestSetFd--;
+ timeout.tv_sec = 1;
+ timeout.tv_usec = 0;
+ res = select(highestSetFd+1, (fd_set *) &tempReadSet,
+ (fd_set *) &tempWriteSet, NULL, &timeout);
+ if (res < 0 && errno != EINTR) {
+ perror("select");
+ exit(-1);
+ }
+
+ now = time(NULL);
+
+ /* clear the bit for listen socket to avoid confusion */
+ ClearFd(lisSock, &tempReadSet);
+
+ ceiling = highestSetFd+1; /* copy it, since it changes during loop */
+ /* pass data back and forth as needed */
+ for (i = 0; i < ceiling; i++) {
+ if (FD_ISSET(i, &tempWriteSet))
+ SocketReadyToWrite(i);
+ }
+ for (i = 0; i < ceiling; i++) {
+ if (FD_ISSET(i, &tempReadSet))
+ SocketReadyToRead(i);
+ }
+
+ /* see if we need to close conns w/o requests */
+ CloseReqlessConns();
+
+ /* do all closes */
+ ReallyCloseSocks();
+
+ /* try accepting new connections */
+ do {
+ struct sockaddr_in addr;
+ socklen_t lenAddr = sizeof(addr);
+ if ((newSock = accept(lisSock, (struct sockaddr *) &addr,
+ &lenAddr)) >= 0) {
+ memset(&sockInfo[newSock], 0, sizeof(SockInfo));
+ sockInfo[newSock].si_needsHeaderSince = now;
+ numNeedingHeaders++;
+ sockInfo[newSock].si_peerFd = -1;
+ sockInfo[newSock].si_cliAddr = addr.sin_addr;
+ sockInfo[newSock].si_whichService = -1;
+ SetFd(newSock, &masterReadSet);
+ }
+ } while (newSock >= 0);
+ }
+}
+/*-----------------------------------------------------------------*/
+int
+main(int argc, char *argv[])
+{
+ int lisSock;
+
+ if ((lisSock = CreatePrivateAcceptSocket(DEMUX_PORT, TRUE)) < 0) {
+ fprintf(stderr, "failed creating accept socket\n");
+ exit(-1);
+ }
+ SetFd(lisSock, &masterReadSet);
+
+ while (1) {
+ numForks++;
+ if (fork()) {
+ /* this is the parent - just wait */
+ while (wait3(NULL, 0, NULL) < 1)
+ ; /* just keep waiting for a real pid */
+ }
+ else {
+ /* child process */
+ MainLoop(lisSock);
+ exit(-1);
+ }
+ }
+}
+/*-----------------------------------------------------------------*/
--- /dev/null
+coblitz.codeen.org princeton_coblitz 3125
+cdn.rd.tp.pl princeton_coblitz 3125
+nyud.net nyu_d 8080
+nyucd.net nyu_d 8080
+nyuld.net nyu_oasis 8096
+cob-web.org cornell_cobweb 8888
--- /dev/null
+#!/bin/sh
+#
+# chkconfig: 345 85 02
+# description: codemux startup script
+#
+
+PROC=codemux
+
+. /etc/rc.d/init.d/functions
+
+RETVAL=0
+
+pidfile=/var/run/$PROC.pid
+
+check_status() {
+ pid=`cat $pidfile 2>/dev/null`
+ #
+ # this eliminates a race condition between checking existence of pidfile
+ # and reading its value
+ #
+ [ -n "$pid" -a -d /proc/$pid ]
+}
+
+case "$1" in
+ start)
+ echo -n "starting $PROC:"
+ pid=`cat $pidfile 2>/dev/null`
+ if [ -n "$pid" ]; then
+ # check whether process really exists
+ # yes - don't try to start
+ [ -d /proc/$pid ] && action "already running" /bin/true && exit 1
+
+ # no - PID file is stale
+ rm -f $pidfile
+ fi
+
+ initlog -c /usr/local/planetlab/sbin/codemux
+
+ cmd=success
+ check_status && touch /var/lock/subsys/$PROC || cmd=failure
+ $cmd "$PROC startup"
+ echo
+ ;;
+
+ stop)
+ echo -n "shutting down $PROC: "
+ killproc $PROC
+ RETVAL=$?
+ echo
+ [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/$PROC
+ ;;
+
+ restart|reload)
+ $0 stop
+ $0 start
+ RETVAL=$?
+ ;;
+
+ status)
+ check_status && echo 'running' && exit 0 || \
+ echo 'not running' && exit 1
+ ;;
+
+ *)
+ echo "Usage: $0 {start|stop|restart|status}"
+ RETVAL=1
+esac
+
+exit $RETVAL
--- /dev/null
+Summary: CoDemux - HTTP port demultiplexer
+Name: codemux
+Version: 0.5
+Release: 1
+License: Private
+Group: System Environment/Base
+URL: http://codeen.cs.princeton.edu/
+Source0: %{name}-%{version}.tar.gz
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
+
+%description
+
+%prep
+%setup -q
+
+make clean
+
+%build
+make
+
+%install
+rm -rf $RPM_BUILD_ROOT
+
+make INSTALL_ROOT=$RPM_BUILD_ROOT install-all
+
+# Build and install in %post so that it gets put in the right site-packages directory
+#rm -rf $RPM_BUILD_ROOT/usr/lib/python*
+#install -D -m 755 python/Proper.py $RPM_BUILD_ROOT/%{_datadir}/%{name}/Proper.py
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files
+%defattr(-,root,root,-)
+%attr(0755,root,root) %{_initrddir}/codemux
+%config /etc/codemux/codemux.conf
+%attr(0755,root,root) /usr/local/planetlab/sbin/codemux
+
+%post
+chkconfig codemux reset
+
+if [ -z "$PL_BOOTCD" ]; then
+ /sbin/ldconfig
+ /etc/init.d/codemux restart
+fi
+
+%preun
+if [ "$1" = 0 ]; then
+ # erase, not upgrade
+ chkconfig --del codemux
+
+ # stop daemon if its currently running
+ if [ "`/etc/init.d/codemux status`" = "running" ]; then
+ /etc/init.d/codemux stop
+ fi
+fi
+
+%doc
+
+%changelog
+* Sun Apr 22 2007 KYOUNGSOO PARK <kyoungso@park.cs.princeton.edu> -
+- Initial build.
+
--- /dev/null
+#ifndef _CODNS_H_
+#define _CODNS_H_
+#include "ports.h"
+
+/* query info - fixed part */
+typedef struct LocalQueryInfo {
+ int lqi_size; /* length of the name string */
+ int lqi_id; /* query id */
+ int lqi_cache; /* not being used now */
+} LocalQueryInfo;
+
+/* query info + name
+ query structure expected from a client */
+#define MAX_QUERY_NAME 256
+#define SIG_SPLIT_TRANSACTION 0 /* signature for split-transaction */
+typedef struct LocalQuery {
+ int lq_zero; /* always set to SIG_SPLIT_TRANSACTION(=0) */
+ LocalQueryInfo lq_info; /* query info */
+ char lq_name[MAX_QUERY_NAME]; /* name */
+} LocalQuery;
+
+/* query result from CoDNS
+ we set MAX_ANSWERS for easy implementation.
+ if lq.address[i].s_addr == 0, that means it returned i-1 valid anwers. */
+#define MAX_ANSWERS 8
+typedef struct LocalQueryResult {
+ int lq_id; /* query id */
+ int lq_ttl; /* TTL of the record */
+ struct in_addr lq_address[MAX_ANSWERS]; /* IP addresses for the query */
+} LocalQueryResult;
+
+/*----------------------------------------------------------------------*/
+
+/* temporary section : from here to the end
+ used for defining variables or constants for testing */
+
+/* for testing in HBTWGET */
+#define HBTWGET_CODNS_ID (-3)
+
+#endif // _CODNS_H_
+
+
--- /dev/null
+#ifndef _DEBUG_H_
+#define _DEBUG_H_
+#include <stdio.h>
+#include "applib.h"
+#undef max
+
+
+/*
+ TRACE : print with function name
+ TRACE0 : print without function name
+ TRACE1 : print "buf" whose size is "size"
+*/
+
+#define DEBUG
+
+extern HANDLE hdebugLog;
+extern int defaultTraceSync;
+#define TRACE0(fmt, msg...) { \
+ char __buf[2048]; \
+ if (hdebugLog) { \
+ snprintf(__buf, sizeof(__buf), fmt, ##msg); \
+ WriteLog(hdebugLog, __buf, strlen(__buf), defaultTraceSync); \
+ } \
+}
+#define TRACE1(buf, size) { \
+ WriteLog(hdebugLog, buf, size, defaultTraceSync); \
+}
+#define TRACE(fmt, msg...) { \
+ char __buf[2048]; \
+ if (hdebugLog) { \
+ snprintf(__buf, sizeof(__buf), "[%s] " fmt, __FUNCTION__, ##msg); \
+ WriteLog(hdebugLog, __buf, strlen(__buf), defaultTraceSync); \
+ } \
+}
+#define TRACEX(fmt) { \
+ char __buf[2048]; \
+ if (hdebugLog) { \
+ snprintf(__buf, sizeof(__buf), "[%s] " fmt, __FUNCTION__); \
+ WriteLog(hdebugLog, __buf, strlen(__buf), defaultTraceSync); \
+ } \
+}
+
+#ifndef HERE
+#define HERE TRACE("file %s, line %d, func %s\n", __FILE__, __LINE__, __FUNCTION__)
+#endif
+
+#ifdef DEBUG
+#define ASSERT(exp) { \
+ if (!(exp)) { \
+ TRACE("ASSERTION (%s) FAILED in %s (%s:%d)\n", \
+ (#exp), __FUNCTION__, __FILE__, __LINE__); \
+ } \
+}
+#else
+#define ASSERT(exp) 1 ? (void)0 : (exp)
+#endif // DEBUG
+
+/*--------------------------------------------------------------
+ macros used for debugging memory leaks
+ if DEBUG_MEMORY_LEAK is enabled, we track down all the memory
+ allocation/freeing to count the number of allocations
+ -------------------------------------------------------------*/
+//#define DEBUG_MEMORY_LEAK
+
+#ifndef DEBUG_MEMORY_LEAK
+
+#define xcalloc(nmemb, size) calloc(nmemb, size)
+#define xmalloc(size) malloc(size)
+#define xrealloc(ptr, size) realloc(ptr, size)
+#define xstrdup(s) strdup(s)
+#define xfree(ptr) free(ptr)
+
+#else
+
+#define xcalloc(nmemb, size) dbgcalloc(nmemb, size, __FUNCTION__, \
+ __FILE__, __LINE__)
+#define xmalloc(size) dbgmalloc(size, __FUNCTION__,\
+ __FILE__, __LINE__)
+#define xrealloc(ptr, size) dbgrealloc(ptr, size, __FUNCTION__,\
+ __FILE__, __LINE__)
+#define xstrdup(s) dbgstrdup(s, __FUNCTION__, __FILE__, __LINE__)
+#define xfree(ptr) dbgfree(ptr, __FUNCTION__, __FILE__, __LINE__)
+
+void *dbgcalloc(size_t nmemb, size_t size,
+ const char* func, const char* file, const int line);
+void *dbgmalloc(size_t size,
+ const char* func, const char* file, const int line);
+void *dbgrealloc(void *ptr, size_t size,
+ const char* func, const char* file, const int line);
+char *dbgstrdup(const char *s,
+ const char* func, const char* file, const int line);
+void dbgfree(void *ptr, const char* func, const char* file, const int line);
+void dbg_print_memtrace(void);
+
+#endif /* DEBUG_MEMOTY_LEAK */
+
+#endif /* _DEBUG_H_ */
--- /dev/null
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gettimeofdayex.h"
+
+#if defined(__linux) && !defined(ALPHA)
+
+/* Macros */
+#define rdtsc(low, high) \
+ asm volatile("rdtsc":"=a" (low), "=d" (high))
+
+/* get cycle counts */
+#define GET_CC(cc) \
+do \
+{ \
+ cc = 0; \
+ unsigned long __cc_low__,__cc_high__; \
+ rdtsc(__cc_low__,__cc_high__); \
+ cc = __cc_high__; \
+ cc = (cc << 32) + __cc_low__; \
+} \
+while(0)
+
+/*-------------------------------------------------------------------------*/
+static int
+get_cpu_speed(double* cpu_speed)
+{
+ FILE* fp = NULL;
+ char *line = NULL;
+ size_t len = 0;
+ ssize_t num;
+ char* ptr = 0;
+
+ if (cpu_speed == NULL)
+ return -1;
+
+ if ((fp = fopen("/proc/cpuinfo", "r")) == NULL)
+ return -1;
+
+ while ((num = getline(&line, &len, fp)) != -1) {
+ ptr = strtok(line, ":\n");
+ if (ptr && strstr(ptr, "cpu MHz")) {
+ ptr = strtok(0," \r\t\n");
+ *cpu_speed = strtod(ptr, 0);
+ break;
+ }
+ }
+ if (line)
+ free(line);
+
+ fclose(fp);
+ return (*cpu_speed == 0) ? -1 : 0;
+}
+
+/*--------------------------------------------------------------------------*/
+int
+gettimeofdayex(struct timeval *tv, struct timezone *tz)
+{
+#define MAX_64_BIT_NUM ((unsigned long long)(-1))
+
+ /* TO DO: timezone adjustment */
+ static int first = 1, impossible = 0;
+ static double cpu_speed;
+ static struct timeval start;
+ static unsigned long long cc_start;
+ unsigned long long cc_now, cc_diff, usec;
+
+ /* initialize : get the current time
+ and fix it as a starting point */
+ if (first) {
+ if (get_cpu_speed(&cpu_speed) < 0)
+ impossible = 1;
+
+ GET_CC(cc_start);
+ gettimeofday(&start, 0);
+ if (tv)
+ *tv = start;
+ first = 0;
+ return 0;
+ }
+
+ /* if it's impossible to get cycle counts,
+ then use original gettimeofday() */
+ if (impossible)
+ return gettimeofday(tv, tz);
+
+ if (tv) {
+ GET_CC(cc_now);
+
+ /* when overflow happens, we need to take care of the carry bit,
+ otherwise we get wrong result since we are using unsigned variables.
+ assuming cycle counter starts from zero at time zero,
+ this would happen after 136 years on 4GHz CPU.
+ however, lets just play on the safer side.
+ */
+
+ cc_diff = (cc_now < cc_start) ? cc_now + (MAX_64_BIT_NUM - cc_start)
+ : cc_now - cc_start;
+ usec = (unsigned long long)(cc_diff / cpu_speed);
+
+ *tv = start;
+ tv->tv_sec += (usec / 1000000);
+ tv->tv_usec += (usec % 1000000);
+
+ if (tv->tv_usec >= 1000000) {
+ tv->tv_sec++;
+ tv->tv_usec -= 1000000;
+ }
+ }
+
+ return 0;
+}
+
+/*--------------------------------------------------------------------------*/
+time_t
+timeex(time_t *t)
+{
+ /* simple version of time() */
+ struct timeval s;
+
+ gettimeofdayex(&s, NULL);
+ if (t)
+ *t = (time_t)s.tv_sec;
+
+ return (time_t)s.tv_sec;
+}
+
+#else
+#include <time.h>
+/*--------------------------------------------------------------------------*/
+int
+gettimeofdayex(struct timeval *tv, struct timezone *tz)
+{
+ return(gettimeofday(tv, tz));
+}
+/*--------------------------------------------------------------------------*/
+time_t
+timeex(time_t *t)
+{
+ return(time(t));
+}
+/*--------------------------------------------------------------------------*/
+#endif
--- /dev/null
+#ifndef _GETTIMEOFDAYEX_H_
+#define _GETTIMEOFDAYEX_H_
+#include <sys/time.h>
+
+int gettimeofdayex(struct timeval *tv, struct timezone *tz);
+time_t timeex(time_t *t);
+
+#endif // _GETTIMEOFDAYEX_H_
--- /dev/null
+#ifndef _PORTS_H_
+#define _PORTS_H_
+
+#define PORTS_COBLITZ
+
+#ifdef PORTS_CODEEN
+
+#define HOMEDIR "/home/princeton_codeen"
+/* options in prox.conf or defined implicitly
+ 1161 snmpPort
+ 31415 APIDummyServPort
+ 3130 ICPPort */
+/* different ports for CoDNS and CoDNS2 */
+#ifdef CODNS2
+#define CODNS_PORT 24119 /* CODNS2 PORT */
+#define CODNS_STAT_PORT 24118 /* CODNS2 STATISTICS PORT */
+#define CODNS_HB_PORT 24121
+#else
+#define CODNS_PORT 4119 /* CODNS PORT */
+#define CODNS_STAT_PORT 4118 /* CODNS STATISTICS PORT */
+#define CODNS_HB_PORT 4121
+#endif
+
+#define HB_PORT 23127 /* CoDeeN heartbeat port */
+#define CODEMON_PORT 2123
+/* 3124 proxy listen port */
+#define CODEPLOY_PORT 2125
+#define FAKE_WEBSERVER_PORT 3126
+#define FAKE_WS_PORT_STR "3126"
+/* 3127 proxy listen port */
+/* 3128 proxy listen port */
+#define MAIN_PROXYPORT_STR "3128"
+#define CODEEN_TM_PORT 3129
+#define PROXY_PORT 3128
+#define PROXY_PORT2 3127
+#define PROXY_PORT3 3124
+
+/* used by redir_helper.c */
+#define QUERY_PORT 3122
+#define RTTSNAP_PORT 3110
+
+#elif defined(PORTS_COBLITZ)
+
+#define HOMEDIR "/home/princeton_coblitz"
+/* options in prox.conf
+ 2111 snmpPort
+ 2112 APIDummyServPort
+ 2113 ICPPort */
+#define CODNS_PORT 2119 /* CODNS PORT */
+#define CODNS_STAT_PORT 2120 /* CODNS STATISTICS PORT */
+#define CODNS_HB_PORT 2121
+#define HB_PORT 2122 /* CoDeeN heartbeat port */
+#define CODEMON_PORT 23126
+/* 2124 proxy listen port */
+#define CODEPLOY_PORT 3125
+#define FAKE_WEBSERVER_PORT 2126
+#define FAKE_WS_PORT_STR "2126"
+/* 2127 proxy listen port */
+/* 2128 proxy listen port */
+#define MAIN_PROXYPORT_STR "2128"
+#define CODEEN_TM_PORT 2129
+#define PROXY_PORT 2128
+#define PROXY_PORT2 2127
+#define PROXY_PORT3 2124
+
+/* used by redir_helper.c */
+#define QUERY_PORT 2122
+#define RTTSNAP_PORT 2110
+
+#else
+#error "need to define either PORTS_CODEEN or PORTS_COBLITZ"
+#endif
+
+
+#endif
+
+
+/*
+
+other changed files: prox_monitor prox.conf
+
+ 4119 tcp 415 princeton_codeen codns
+ 4119 udp 415 princeton_codeen codns
+ 4118 tcp 412 princeton_codeen codns
+ 4121 udp 411 princeton_codeen codns
+
+23126 tcp 412 princeton_codeen codemon
+23126 udp 412 princeton_codeen codemon
+23127 tcp 399 princeton_codeen codeen
+23127 udp 399 princeton_codeen codeen
+ 3126 tcp 399 princeton_codeen codeen
+ 1161 udp 401 princeton_codeen specified via snmpPort
+31415 tcp 401 princeton_codeen can be specified via APIDummyServPort
+ 3124 tcp 399 princeton_codeen specified in prox.conf
+ 3127 tcp 399 princeton_codeen specified in prox.conf
+ 3128 tcp 398 princeton_codeen specified in prox.conf
+ 3129 tcp 399 princeton_codeen codeen traffic mon
+ 3130 udp 401 princeton_codeen can be specified from ICPPort
+ 3125 tcp 410 princeton_codeen codeploy
+ 3135 tcp 370 princeton_codeen
+
+*/
--- /dev/null
+/*
+ * VServer IP isolation.
+ *
+ * This file implements netfilter hooks and AF_INET socket function
+ * overrides.
+ *
+ * Mark Huang <mlhuang@cs.princeton.edu>
+ * Copyright (C) 2004 The Trustees of Princeton University
+ *
+ * $Id: vnet_main.c,v 1.40 2007/03/08 15:46:07 mef Exp $
+ */
+
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/pkt_sched.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/route.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+#include "vnet_config.h"
+#include "vnet.h"
+#include "vnet_dbg.h"
+#include "vnet_compat.h"
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+
+#define HAVE_FUNCTIONALITY_REQUIRED_BY_DEMUX
+
+#include <net/inet_hashtables.h>
+
+static inline void
+vnet_timewait_put(struct sock* sk)
+{
+ inet_twsk_put((struct inet_timewait_sock *)sk);
+}
+
+static inline struct sock*
+vnet_tcp_lookup(u32 src_ip, u16 src_port,
+ u32 ip, u16 port, int dif)
+{
+ return inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
+}
+
+static inline int vnet_iif(const struct sk_buff *skb)
+{
+ return inet_iif(skb);
+}
+#endif
+
+#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,12)
+
+#define HAVE_FUNCTIONALITY_REQUIRED_BY_DEMUX
+
+static inline void
+vnet_timewait_put(struct sock* sk)
+{
+ /* net/tcp.h */
+ tcp_tw_put((struct tcp_tw_bucket*)sk);
+}
+
+static inline struct sock*
+vnet_tcp_lookup(u32 saddr, u16 sport, u32 daddr,u16 dport, int dif)
+{
+ extern struct sock *tcp_v4_lookup(u32, u16, u32, u16, int);
+ return tcp_v4_lookup(saddr, sport, daddr, dport, dif);
+}
+
+/* same as tcp_v4_iff() in net/ipv4/tcp_ipv4. */
+static inline int vnet_iif(const struct sk_buff *skb)
+{
+ return ((struct rtable *)skb->dst)->rt_iif;
+}
+#endif
+
+#ifndef HAVE_FUNCTIONALITY_REQUIRED_BY_DEMUX
+#warning DEMUX FUNCTIONALITY NOT SUPPORTED
+#endif
+
+int vnet_verbose = 1;
+
+/* We subdivide the 1: major class into 15 minor subclasses 1:1, 1:2,
+ * etc. so that we can represent multiple bandwidth limits. The 1:1
+ * subclass has children named 1:1000, 1:1001, etc., one for each
+ * context (up to 4096). Similarly, the 1:2 subclass has children
+ * named 1:2000, 1:2001, etc. By default, the 1:1 subclass represents
+ * the node bandwidth cap and 1:1000 represents the root context's
+ * share of it. */
+int vnet_root_class = TC_H_MAKE(1 << 16, 0x1000);
+
+#define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | \
+ (1 << NF_IP_LOCAL_OUT) | \
+ (1 << NF_IP_POST_ROUTING))
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
+
+/* Standard entry. */
+struct ipt_standard
+{
+ struct ipt_entry entry;
+ struct ipt_standard_target target;
+};
+
+struct ipt_error_target
+{
+ struct ipt_entry_target target;
+ char errorname[IPT_FUNCTION_MAXNAMELEN];
+};
+
+struct ipt_error
+{
+ struct ipt_entry entry;
+ struct ipt_error_target target;
+};
+
+#endif
+
+static struct
+{
+ struct ipt_replace repl;
+ struct ipt_standard entries[3];
+ struct ipt_error term;
+} initial_table __initdata =
+{
+ .repl =
+ {
+ .name = "vnet",
+ .valid_hooks = FILTER_VALID_HOOKS,
+ .num_entries = 4,
+ .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+ .hook_entry = { [NF_IP_LOCAL_IN] = 0,
+ [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard),
+ [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 2, },
+ .underflow = { [NF_IP_LOCAL_IN] = 0,
+ [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard),
+ [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 2, },
+ },
+
+ .entries =
+ {
+ /* LOCAL_IN: currently unused */
+ { .entry = { .target_offset = sizeof(struct ipt_entry),
+ .next_offset = sizeof(struct ipt_standard), },
+ .target = { .target = { .u = { .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)), }, },
+ .verdict = -NF_ACCEPT - 1, },
+ },
+
+ /* LOCAL_OUT: used for logging */
+ { .entry = { .target_offset = sizeof(struct ipt_entry),
+ .next_offset = sizeof(struct ipt_standard), },
+ .target = { .target = { .u = { .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)), }, },
+ .verdict = -NF_ACCEPT - 1, },
+ },
+
+ /* POST_ROUTING: used for priority classification */
+ { .entry = { .target_offset = sizeof(struct ipt_entry),
+ .next_offset = sizeof(struct ipt_standard), },
+ .target = { .target = { .u = { .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)), }, },
+ .verdict = -NF_ACCEPT - 1, },
+ },
+ },
+
+ /* ERROR */
+ .term =
+ {
+ .entry = { .target_offset = sizeof(struct ipt_entry),
+ .next_offset = sizeof(struct ipt_error), },
+ .target = { .target = { .u = { .user = { .target_size = IPT_ALIGN(sizeof(struct ipt_error_target)),
+ .name = IPT_ERROR_TARGET, }, }, },
+ .errorname = "ERROR", },
+ },
+};
+
+static struct ipt_table vnet_table = {
+ .name = "vnet",
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
+ .table = &initial_table.repl,
+#endif
+ .valid_hooks = FILTER_VALID_HOOKS,
+ .lock = RW_LOCK_UNLOCKED,
+ .me = THIS_MODULE,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+ .af = AF_INET,
+#endif
+};
+
+static inline u_int16_t
+get_dst_port(struct ip_conntrack_tuple *tuple)
+{
+ switch (tuple->dst.protonum) {
+ case IPPROTO_GRE:
+ /* XXX Truncate 32-bit GRE key to 16 bits */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
+ return tuple->dst.u.gre.key;
+#else
+ return htons(ntohl(tuple->dst.u.gre.key));
+#endif
+ case IPPROTO_ICMP:
+ /* Bind on ICMP echo ID */
+ return tuple->src.u.icmp.id;
+ case IPPROTO_TCP:
+ return tuple->dst.u.tcp.port;
+ case IPPROTO_UDP:
+ return tuple->dst.u.udp.port;
+ default:
+ return tuple->dst.u.all;
+ }
+}
+
+static inline u_int16_t
+get_src_port(struct ip_conntrack_tuple *tuple)
+{
+ switch (tuple->dst.protonum) {
+ case IPPROTO_GRE:
+ /* XXX Truncate 32-bit GRE key to 16 bits */
+ return htons(ntohl(tuple->src.u.gre.key));
+ case IPPROTO_ICMP:
+ /* Bind on ICMP echo ID */
+ return tuple->src.u.icmp.id;
+ case IPPROTO_TCP:
+ return tuple->src.u.tcp.port;
+ case IPPROTO_UDP:
+ return tuple->src.u.udp.port;
+ default:
+ return tuple->src.u.all;
+ }
+}
+
+
+
+static unsigned int
+vnet_hook(unsigned int hook,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ u_int8_t protocol;
+ u_int32_t ip;
+ u_int16_t port;
+ struct bind_key *key;
+ xid_t xid;
+ unsigned int verdict;
+ int priority;
+ struct sock *sk;
+ int need_to_free_sk = 0;
+
+ ct = ip_conntrack_get(*pskb, &ctinfo);
+ dir = CTINFO2DIR(ctinfo);
+
+ /* Default to marking packet with root context ID */
+ xid = 0;
+
+ switch (hook) {
+
+ case NF_IP_LOCAL_IN:
+ /* Multicast to 224.0.0.1 is one example */
+ if (!ct)
+ break;
+
+ /* Determine if the packet is destined for a bound port */
+ protocol = ct->tuplehash[dir].tuple.dst.protonum;
+ assert(ctinfo == IP_CT_RELATED ||
+ ctinfo == (IP_CT_IS_REPLY + IP_CT_RELATED) ||
+ protocol == (*pskb)->nh.iph->protocol);
+ ip = ct->tuplehash[dir].tuple.dst.ip;
+ port = get_dst_port(&ct->tuplehash[dir].tuple);
+
+ /* Check if the port is bound */
+ key = bind_get(protocol, ip, port, NULL);
+
+ if (key && key->sk != NULL) {
+
+ /* A new or established connection to a bound port */
+ sk = key->sk;
+
+#ifdef HAVE_FUNCTIONALITY_REQUIRED_BY_DEMUX
+ /* If the bound socket is a real TCP socket, then the context that
+ * bound the port could have re-assigned an established connection
+ * socket to another context. See if this is the case.
+ */
+ if (protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) {
+ struct sock *tcp_sk;
+ u_int32_t src_ip = ct->tuplehash[dir].tuple.src.ip;
+ u_int16_t src_port = get_src_port(&ct->tuplehash[dir].tuple);
+
+ tcp_sk = vnet_tcp_lookup(src_ip, src_port, ip, port, vnet_iif(*pskb));
+ if (tcp_sk) {
+ if (tcp_sk->sk_state == TCP_TIME_WAIT) {
+ sock_put(tcp_sk);
+ } else {
+ dbg("vnet_in:%d: established TCP socket %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u\n",
+ get_sk_xid(tcp_sk), NIPQUAD(src_ip), ntohs(src_port), NIPQUAD(ip), ntohs(port));
+ sk = tcp_sk;
+ need_to_free_sk = 1;
+ }
+ /* Remember to sock_put()! */
+ }
+ }
+#endif
+
+ /* Indicate to the stack that the packet was "expected", so that it does
+ * not generate a TCP RST or ICMP Unreachable message. This requires a
+ * kernel patch.
+ */
+ if (sk->sk_type == SOCK_RAW)
+ (*pskb)->sk = sk;
+
+ assert(sk);
+ xid = get_sk_xid(sk);
+
+ /* Steal the reply end of the connection */
+ if (get_ct_xid(ct, !dir) != xid) {
+ dbg("vnet_in:%d: stealing %sbound %s connection %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u from context %d\n", xid,
+ key ? "" : "un", print_protocol(protocol),
+ NIPQUAD(ip), ntohs(port),
+ NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip), ntohs(ct->tuplehash[!dir].tuple.dst.u.all),
+ get_ct_xid(ct, !dir));
+ set_ct_xid(ct, !dir, xid);
+ }
+
+ /* Store the owner (if any) of the other side of the connection (if
+ * localhost) in the peercred struct.
+ */
+ sk->sk_peercred.uid = sk->sk_peercred.gid = (__u32) get_ct_xid(ct, dir);
+
+ if (ctinfo == IP_CT_NEW) {
+ dbg("vnet_in: %s port %u.%u.%u.%u:%u bound by context %d\n",
+ print_protocol(protocol), NIPQUAD(ip), ntohs(port), xid);
+ }
+
+#ifdef HAVE_FUNCTIONALITY_REQUIRED_BY_DEMUX
+ if (need_to_free_sk) {
+ /*
+ if (sk->sk_state == TCP_TIME_WAIT)
+ vnet_timewait_put(sk);
+ else*/
+ sock_put(sk);
+ need_to_free_sk=0;
+ }
+#endif
+ bind_put(key);
+
+ } else if ((int) get_ct_xid(ct, !dir) == -1) {
+ /* A new connection to an unbound port */
+ if (ctinfo == IP_CT_NEW) {
+ dbg("vnet_in: %s port %u.%u.%u.%u:%u not bound\n",
+ print_protocol(protocol), NIPQUAD(ip), ntohs(port));
+ }
+ } else {
+ /* A new or established connection to an unbound port that could be
+ * associated with an active socket ("could be" because the socket
+ * could be closed and the connection in a WAIT state). In any case,
+ * give it to the last owner of the connection.
+ */
+ xid = get_ct_xid(ct, !dir);
+ }
+
+ break;
+
+ case NF_IP_LOCAL_OUT:
+ /* Get the context ID of the sender */
+ assert((*pskb)->sk);
+ xid = get_sk_xid((*pskb)->sk);
+
+ /* Default class */
+ priority = vnet_root_class;
+
+ if (ct) {
+ protocol = ct->tuplehash[dir].tuple.dst.protonum;
+ assert(ctinfo == IP_CT_RELATED ||
+ ctinfo == (IP_CT_IS_REPLY + IP_CT_RELATED) ||
+ protocol == (*pskb)->nh.iph->protocol);
+ ip = ct->tuplehash[dir].tuple.src.ip;
+ assert(ctinfo == IP_CT_RELATED ||
+ ctinfo == (IP_CT_IS_REPLY + IP_CT_RELATED) ||
+ ip == __constant_htonl(INADDR_ANY) || ip == (*pskb)->nh.iph->saddr);
+ port = get_src_port(&ct->tuplehash[dir].tuple);
+ } else {
+ protocol = port = 0;
+ }
+
+ if (xid) {
+ /* Multicast to 224.0.0.1 is one example */
+ if (!ct) {
+ dbg("vnet_out:%d: dropping untrackable IP packet\n", xid);
+ return NF_DROP;
+ }
+
+ /* XXX Is this guaranteed? */
+ if ((*pskb)->len < sizeof(struct iphdr)) {
+ dbg("vnet_out:%d: dropping runt IP packet\n", xid);
+ return NF_DROP;
+ }
+
+ /* Check source IP address */
+ if (inet_addr_type(ip) != RTN_LOCAL) {
+ dbg("vnet_out:%d: non-local source IP address %u.%u.%u.%u not allowed\n", xid,
+ NIPQUAD(ip));
+ return NF_DROP;
+ }
+
+ /* Sending of ICMP error messages not allowed */
+ if (protocol == IPPROTO_ICMP) {
+ struct icmphdr *icmph = (struct icmphdr *)((*pskb)->nh.raw + ((*pskb)->nh.iph->ihl * 4));
+
+ if ((unsigned char *) &icmph[1] > (*pskb)->tail) {
+ dbg("vnet_out:%d: dropping runt ICMP packet\n", xid);
+ return NF_DROP;
+ }
+
+ switch (icmph->type) {
+ case ICMP_ECHOREPLY:
+ case ICMP_ECHO:
+ case ICMP_TIMESTAMP:
+ case ICMP_TIMESTAMPREPLY:
+ case ICMP_INFO_REQUEST:
+ case ICMP_INFO_REPLY:
+ case ICMP_ADDRESS:
+ case ICMP_ADDRESSREPLY:
+ /* Guaranteed by icmp_pkt_to_tuple() */
+ assert(port == icmph->un.echo.id);
+ break;
+ default:
+ dbg("vnet_out:%d: sending of ICMP error messages not allowed\n", xid);
+ return NF_DROP;
+ }
+ }
+ } else {
+ /* Let root send anything it wants */
+ }
+
+ if (ct) {
+ /* Check if the port is bound by someone else */
+ key = bind_get(protocol, ip, port, NULL);
+ } else {
+ assert(xid == 0);
+ key = NULL;
+ }
+
+ if (key && key->sk != NULL) {
+ /* A new or established connection from a bound port */
+ assert(ct);
+
+ sk = key->sk;
+
+#ifdef HAVE_FUNCTIONALITY_REQUIRED_BY_DEMUX
+ /* If the bound socket is a real TCP socket, then the context that
+ * bound the port could have re-assigned an established connection
+ * socket to the sender's context. See if this is the case.
+ */
+ if (protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM && get_sk_xid(sk) != xid) {
+ struct sock *tcp_sk;
+ u_int32_t dst_ip = ct->tuplehash[dir].tuple.dst.ip;
+ u_int16_t dst_port = get_dst_port(&ct->tuplehash[dir].tuple);
+
+ tcp_sk = vnet_tcp_lookup(dst_ip, dst_port, ip, port, vnet_iif(*pskb));
+ if (tcp_sk) {
+ if (tcp_sk->sk_state == TCP_TIME_WAIT) {
+ sock_put(tcp_sk);
+ //vnet_timewait_put(tcp_sk);
+ } else {
+ need_to_free_sk = 1;
+ sk = tcp_sk;
+ /* Remember to sock_put()! */
+ }
+ }
+ }
+#endif
+
+ verdict = NF_ACCEPT;
+
+ /* Stealing connections from established sockets is not allowed */
+ assert(sk);
+ if (get_sk_xid(sk) != xid) {
+ if (xid) {
+ dbg("vnet_out:%d: %s port %u.%u.%u.%u:%u already bound by context %d\n", xid,
+ print_protocol(protocol), NIPQUAD(ip), ntohs(port), get_sk_xid(sk));
+ verdict = NF_DROP;
+ } else {
+ /* Let root send whatever it wants but do not steal the packet or
+ * connection. Kernel sockets owned by root may send packets on
+ * behalf of bound sockets (for instance, TCP ACK in SYN_RECV or
+ * TIME_WAIT).
+ */
+ xid = get_sk_xid(sk);
+ }
+ }
+
+#ifdef HAVE_FUNCTIONALITY_REQUIRED_BY_DEMUX
+ if (need_to_free_sk) {
+ /*
+ if (sk->sk_state == TCP_TIME_WAIT)
+ vnet_timewait_put(sk);
+ else */
+ sock_put(sk);
+ need_to_free_sk = 0;
+ }
+#endif
+ bind_put(key);
+
+ if (verdict == NF_DROP)
+ goto done;
+ } else {
+ /* A new or established or untrackable connection from an unbound port */
+
+ /* Reserved ports must be bound. Usually only root is capable of
+ * CAP_NET_BIND_SERVICE.
+ */
+ if (xid &&
+ (protocol == IPPROTO_TCP || protocol == IPPROTO_UDP) &&
+ ntohs(port) < PROT_SOCK) {
+ assert(ct);
+ dbg("vnet_out:%d: %s port %u is reserved\n", xid,
+ print_protocol(protocol), ntohs(port));
+ return NF_DROP;
+ }
+ }
+
+ if (ct) {
+ /* Steal the connection */
+ if (get_ct_xid(ct, dir) != xid) {
+ dbg("vnet_out:%d: stealing %sbound %s connection %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u from context %d\n", xid,
+ key ? "" : "un", print_protocol(protocol),
+ NIPQUAD(ip), ntohs(port),
+ NIPQUAD(ct->tuplehash[dir].tuple.dst.ip), ntohs(ct->tuplehash[dir].tuple.dst.u.all),
+ get_ct_xid(ct, dir));
+ set_ct_xid(ct, dir, xid);
+ }
+
+ /* Classify traffic once per connection */
+ if (ct->priority == (u_int32_t) -1) {
+ /* The POSTROUTING chain should classify packets into a minor subclass
+ * (1:1000, 1:2000, etc.) with -j CLASSIFY --set-class. Set the packet
+ * MARK early so that rules can take xid into account. */
+ set_skb_xid(*pskb, xid);
+ (*pskb)->priority = priority;
+ (void) ipt_do_table(pskb, NF_IP_POST_ROUTING, in, out, &vnet_table, NULL);
+ priority = (*pskb)->priority | xid;
+ dbg("vnet_out:%d: %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u class %x:%x\n", xid,
+ NIPQUAD(ip), ntohs(port),
+ NIPQUAD(ct->tuplehash[dir].tuple.dst.ip), ntohs(ct->tuplehash[dir].tuple.dst.u.all),
+ TC_H_MAJ(priority) >> 16, TC_H_MIN(priority));
+ ct->priority = priority;
+ } else
+ priority = ct->priority;
+ } else {
+ assert(xid == 0);
+ }
+
+ /* Set class */
+ (*pskb)->priority = priority;
+
+ break;
+
+ default:
+ /* Huh? */
+ assert(hook == NF_IP_LOCAL_IN || hook == NF_IP_LOCAL_OUT);
+ break;
+ }
+
+ /* Mark packet */
+ set_skb_xid(*pskb, xid);
+
+#ifdef VNET_DEBUG
+ if (vnet_verbose >= 3) {
+ if (ct)
+ print_conntrack(ct, ctinfo, hook);
+ if (vnet_verbose >= 4)
+ print_packet(*pskb);
+ }
+#endif
+
+ get_verdict:
+ verdict = ipt_do_table(pskb, hook, in, out, &vnet_table, NULL);
+
+ /* Pass to network taps */
+ if (verdict == NF_ACCEPT)
+ verdict = packet_hook(*pskb, hook);
+
+ done:
+ return verdict;
+}
+
+static struct nf_hook_ops vnet_ops[] = {
+ {
+ .hook = vnet_hook,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ .owner = THIS_MODULE,
+#endif
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_IN,
+ .priority = NF_IP_PRI_LAST,
+ },
+ {
+ .hook = vnet_hook,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ .owner = THIS_MODULE,
+#endif
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_OUT,
+ .priority = NF_IP_PRI_LAST,
+ },
+};
+
+/* Exported by net/ipv4/af_inet.c */
+extern struct net_proto_family inet_family_ops;
+extern struct proto_ops inet_stream_ops;
+extern struct proto_ops inet_dgram_ops;
+extern struct proto_ops inet_sockraw_ops;
+extern int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
+extern int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+ int addr_len, int flags);
+extern int inet_listen(struct socket *sock, int backlog);
+extern int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
+ int addr_len, int flags);
+extern int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+ size_t size);
+extern int inet_release(struct socket *sock);
+
+/* Exported by net/ipv4/tcp_ipv4.c */
+extern struct proto tcp_prot;
+extern int tcp_port_rover;
+extern int sysctl_local_port_range[2];
+
+/* Exported by net/ipv4/udp.c */
+extern struct proto udp_prot;
+extern int udp_port_rover;
+
+/* Functions that are not exported */
+static int (*inet_create)(struct socket *sock, int protocol);
+static ssize_t (*inet_sendpage)(struct socket *sock, struct page *page, int offset, size_t size, int flags);
+static void (*tcp_v4_hash)(struct sock *sk);
+static void (*tcp_v4_unhash)(struct sock *sk);
+static void (*udp_v4_hash)(struct sock *sk);
+static void (*udp_v4_unhash)(struct sock *sk);
+
+static int
+vnet_inet_create(struct socket *sock, int protocol)
+{
+ int ret;
+
+ if (sock->type == SOCK_RAW) {
+ /* Temporarily give CAP_NET_RAW to root VServer accounts */
+ if (current->euid)
+ return -EPERM;
+ cap_raise(current->cap_effective, CAP_NET_RAW);
+ }
+ ret = inet_create(sock, protocol);
+ if (sock->type == SOCK_RAW)
+ cap_lower(current->cap_effective, CAP_NET_RAW);
+ if (ret)
+ return ret;
+
+ if (sock->type == SOCK_RAW) {
+ struct sock *sk = sock->sk;
+ struct inet_opt *inet = inet_sk(sk);
+ /* Usually redundant and unused */
+ assert(inet->sport == htons(inet->num));
+ /* So we can track double raw binds */
+ inet->sport = 0;
+ }
+
+ return ret;
+}
+
+/* Make sure our bind table gets updated whenever the stack decides to
+ * unhash or rehash a socket.
+ */
+static void
+vnet_inet_unhash(struct sock *sk)
+{
+ struct inet_opt *inet = inet_sk(sk);
+ struct bind_key *key;
+
+ key = bind_get(sk->sk_protocol, inet->saddr, inet->sport, sk);
+ if (key) {
+ dbg("vnet_inet_unhash:%d: released %s port %u.%u.%u.%u:%u\n", get_sk_xid(sk),
+ print_protocol(sk->sk_protocol), NIPQUAD(inet->saddr), ntohs(inet->sport));
+ bind_del(key);
+ bind_put(key);
+ }
+
+ if (sk->sk_protocol == IPPROTO_TCP)
+ tcp_v4_unhash(sk);
+ else if (sk->sk_protocol == IPPROTO_UDP)
+ udp_v4_unhash(sk);
+}
+
+static void
+vnet_inet_hash(struct sock *sk)
+{
+ struct inet_opt *inet = inet_sk(sk);
+
+ if (bind_add(sk->sk_protocol, inet->saddr, inet->sport, sk) == 0) {
+ dbg("vnet_inet_hash:%d: bound %s port %u.%u.%u.%u:%u\n", get_sk_xid(sk),
+ print_protocol(sk->sk_protocol), NIPQUAD(inet->saddr), ntohs(inet->sport));
+ }
+
+ if (sk->sk_protocol == IPPROTO_TCP)
+ tcp_v4_hash(sk);
+ else if (sk->sk_protocol == IPPROTO_UDP)
+ udp_v4_hash(sk);
+}
+
+/* Port reservation */
+static int
+vnet_inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+ struct sock *sk = sock->sk;
+ struct inet_opt *inet = inet_sk(sk);
+ struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
+ struct bind_key *key;
+ int ret;
+
+ /* Bind socket */
+ if ((ret = inet_bind(sock, uaddr, addr_len)))
+ return ret;
+
+ lock_sock(sk);
+
+ /* Backward compatibility with safe raw sockets */
+ if (sock->type == SOCK_RAW) {
+ /* Runt sockaddr */
+ if (addr_len < sizeof(struct sockaddr_in))
+ ret = -EINVAL;
+ /* Non-local bind */
+ else if (sin->sin_addr.s_addr != __constant_htonl(INADDR_ANY) && inet_addr_type(sin->sin_addr.s_addr) != RTN_LOCAL)
+ ret = -EINVAL;
+ /* Unspecified port */
+ else if (!sin->sin_port)
+ ret = -EINVAL;
+ /* Reserved port */
+ else if ((sk->sk_protocol == IPPROTO_TCP || sk->sk_protocol == IPPROTO_UDP) &&
+ ntohs(sin->sin_port) < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ ret = -EACCES;
+ /* Double bind */
+ else if (inet->sport)
+ ret = -EINVAL;
+ if (ret)
+ goto done;
+ inet->saddr = sin->sin_addr.s_addr;
+ inet->sport = sin->sin_port;
+ }
+
+ key = bind_get(sk->sk_protocol, inet->saddr, inet->sport, NULL);
+ if (key) {
+ /*
+ * If we are root or own the already bound socket, and
+ * SO_REUSEADDR has been set on both.
+ */
+ if ((get_sk_xid(sk) == 0 || get_sk_xid(sk) == get_sk_xid(key->sk)) &&
+ key->sk->sk_reuse && sk->sk_reuse) {
+ if (key->ip == __constant_htonl(INADDR_ANY)) {
+ /* Keep the current bind key */
+ bind_put(key);
+ goto done;
+ } else if (inet->saddr == __constant_htonl(INADDR_ANY)) {
+ /* Consider the port to be bound to this socket now */
+ bind_del(key);
+ }
+ }
+ bind_put(key);
+ }
+
+ if ((ret = bind_add(sk->sk_protocol, inet->saddr, inet->sport, sk)) == 0) {
+ dbg("vnet_inet_bind:%d: bound %s port %u.%u.%u.%u:%u\n", get_sk_xid(sk),
+ print_protocol(sk->sk_protocol), NIPQUAD(inet->saddr), ntohs(inet->sport));
+ }
+
+ done:
+ release_sock(sk);
+ return ret;
+}
+
+/* Override TCP and UDP port rovers since they do not know about raw
+ * socket binds.
+ */
+static int
+vnet_autobind(struct sock *sk)
+{
+ int (*get_port)(struct sock *, unsigned short);
+ int low = sysctl_local_port_range[0];
+ int high = sysctl_local_port_range[1];
+ int remaining = (high - low) + 1;
+ int port;
+ struct inet_opt *inet = inet_sk(sk);
+ struct bind_key *key;
+
+ /* Must be locked */
+ assert(sock_owned_by_user(sk));
+
+ /* Already bound to a port */
+ if (inet->num)
+ return 0;
+
+ if (sk->sk_protocol == IPPROTO_TCP) {
+ get_port = tcp_prot.get_port;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+ /* Approximate the tcp_v4_get_port() strategy */
+ port = tcp_port_rover + 1;
+#else
+ /* Approximate the inet_csk_get_port() strategy */
+ port = net_random() % (high - low) + low;
+#endif
+ } else if (sk->sk_protocol == IPPROTO_UDP) {
+ get_port = udp_prot.get_port;
+ port = udp_port_rover;
+ } else if (sk->sk_prot->get_port) {
+ err("vnet_get_port:%d: %s unhandled\n", get_sk_xid(sk),
+ print_protocol(sk->sk_protocol));
+ if (sk->sk_prot->get_port(sk, 0))
+ return -EAGAIN;
+ inet->sport = htons(inet->num);
+ return 0;
+ } else {
+ return 0;
+ }
+
+ dbg("vnet_autobind:%d: roving %s port range %u.%u.%u.%u:%u-%u\n", get_sk_xid(sk),
+ print_protocol(sk->sk_protocol), NIPQUAD(inet->saddr), low, high);
+
+ /* Find a free port by linear search. Note that the standard
+ * udp_v4_get_port() function attempts to pick a port that
+ * keeps its hash tables balanced. If the UDP hash table keeps
+ * getting bombed, we should try implementing this strategy
+ * here.
+ */
+ do {
+ if (port < low || port > high)
+ port = low;
+
+ /* XXX We could probably try something more clever
+ * like checking to see if the bound socket is a
+ * regular TCP socket owned by the same context (or we
+ * are root) and, if so, letting tcp_v4_get_port()
+ * apply its fast reuse logic to determine if the port
+ * can be reused.
+ */
+ if (bind_add(sk->sk_protocol, inet->saddr, htons(port), sk)) {
+ dbg("vnet_get_port:%d: %s port %u.%u.%u.%u:%u already bound\n", get_sk_xid(sk),
+ print_protocol(sk->sk_protocol), NIPQUAD(inet->saddr), port);
+ goto next;
+ }
+
+ if (get_port(sk, port)) {
+ /* Can happen if we are unloaded when there are active sockets */
+ dbg("vnet_get_port:%d: failed to hash unbound %s port %u.%u.%u.%u:%u\n", get_sk_xid(sk),
+ print_protocol(sk->sk_protocol), NIPQUAD(inet->saddr), port);
+ key = bind_get(sk->sk_protocol, inet->saddr, htons(port), sk);
+ assert(key);
+ bind_del(key);
+ bind_put(key);
+ } else {
+ assert(port == inet->num);
+ inet->sport = htons(inet->num);
+ break;
+ }
+ next:
+ port++;
+ } while (--remaining > 0);
+
+ if (sk->sk_protocol == IPPROTO_UDP)
+ udp_port_rover = port;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+ else if (sk->sk_protocol == IPPROTO_TCP)
+ tcp_port_rover = port;
+#endif
+
+ if (remaining <= 0) {
+ err("vnet_get_port:%d: exhausted local %s port range %u.%u.%u.%u:%u-%u\n", get_sk_xid(sk),
+ print_protocol(sk->sk_protocol), NIPQUAD(inet->saddr), low, high);
+ return -EAGAIN;
+ } else {
+ dbg("vnet_get_port:%d: autobound %s port %u.%u.%u.%u:%u\n", get_sk_xid(sk),
+ print_protocol(sk->sk_protocol), NIPQUAD(inet->saddr), port);
+ return 0;
+ }
+}
+
+static int
+vnet_inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+ int addr_len, int flags)
+{
+ struct sock *sk = sock->sk;
+
+ lock_sock(sk);
+
+ /* Duplicates checks in inet_stream_connect() */
+ if (uaddr->sa_family != AF_UNSPEC &&
+ sock->state == SS_UNCONNECTED &&
+ sk->sk_state == TCP_CLOSE) {
+ /* We may need to bind the socket. */
+ if (!inet_sk(sk)->num && vnet_autobind(sk)) {
+ release_sock(sk);
+ return -EAGAIN;
+ }
+ }
+
+ release_sock(sk);
+
+ return inet_stream_connect(sock, uaddr, addr_len, flags);
+}
+
+static int
+vnet_inet_listen(struct socket *sock, int backlog)
+{
+ struct sock *sk = sock->sk;
+
+ lock_sock(sk);
+
+ /* Duplicates checks in inet_listen() */
+ if (sock->type == SOCK_STREAM &&
+ sock->state == SS_UNCONNECTED &&
+ sk->sk_state == TCP_CLOSE) {
+ /* We may need to bind the socket. */
+ if (!inet_sk(sk)->num && vnet_autobind(sk)) {
+ release_sock(sk);
+ return -EAGAIN;
+ }
+ }
+
+ release_sock(sk);
+
+ return inet_listen(sock, backlog);
+}
+
+static int
+vnet_inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
+ int addr_len, int flags)
+{
+ struct sock *sk = sock->sk;
+
+ lock_sock(sk);
+
+ /* Duplicates checks in inet_dgram_connect() */
+ if (uaddr->sa_family != AF_UNSPEC) {
+ /* We may need to bind the socket. */
+ if (!inet_sk(sk)->num && vnet_autobind(sk)) {
+ release_sock(sk);
+ return -EAGAIN;
+ }
+ }
+
+ release_sock(sk);
+
+ return inet_dgram_connect(sock, uaddr, addr_len, flags);
+}
+
+static int
+vnet_inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+ size_t size)
+{
+ struct sock *sk = sock->sk;
+
+ lock_sock(sk);
+
+ /* We may need to bind the socket. */
+ if (!inet_sk(sk)->num && vnet_autobind(sk)) {
+ release_sock(sk);
+ return -EAGAIN;
+ }
+
+ release_sock(sk);
+
+ return inet_sendmsg(iocb, sock, msg, size);
+}
+
+static ssize_t
+vnet_inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
+{
+ struct sock *sk = sock->sk;
+
+ lock_sock(sk);
+
+ /* We may need to bind the socket. */
+ if (!inet_sk(sk)->num && vnet_autobind(sk)) {
+ release_sock(sk);
+ return -EAGAIN;
+ }
+
+ release_sock(sk);
+
+ return inet_sendpage(sock, page, offset, size, flags);
+}
+
+static int
+vnet_inet_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct inet_opt *inet = inet_sk(sk);
+ struct bind_key *key;
+
+ /* Partial socket created by accept() */
+ if (!sk)
+ goto done;
+
+ lock_sock(sk);
+
+ key = bind_get(sk->sk_protocol, inet->saddr, inet->sport, sk);
+ if (key) {
+ dbg("vnet_inet_release:%d: released %s port %u.%u.%u.%u:%u\n", get_sk_xid(sk),
+ print_protocol(sk->sk_protocol), NIPQUAD(inet->saddr), ntohs(inet->sport));
+ bind_del(key);
+ bind_put(key);
+ }
+
+ release_sock(sk);
+
+ done:
+ return inet_release(sock);
+}
+
+/* Sanity check */
+#define override_op(op, from, to) do { assert((op) == (from)); (op) = (to); } while (0)
+
+static int __init
+vnet_init(void)
+{
+ int ret;
+
+ /* Initialize bind table */
+ ret = bind_init();
+ if (ret < 0)
+ return ret;
+
+ /* Register /proc entries */
+ ret = proc_init();
+ if (ret < 0)
+ goto cleanup_bind;
+
+ /* Register dummy netdevice */
+ ret = packet_init();
+ if (ret < 0)
+ goto cleanup_proc;
+
+ /* Register tap netdevice */
+ ret = tun_init();
+ if (ret < 0)
+ goto cleanup_packet;
+
+ /* Get pointers to unexported functions */
+ inet_create = inet_family_ops.create;
+ inet_sendpage = inet_dgram_ops.sendpage;
+ tcp_v4_hash = tcp_prot.hash;
+ tcp_v4_unhash = tcp_prot.unhash;
+ udp_v4_hash = udp_prot.hash;
+ udp_v4_unhash = udp_prot.unhash;
+
+ /* Override PF_INET socket operations */
+ override_op(inet_family_ops.create, inet_create, vnet_inet_create);
+ override_op(inet_stream_ops.bind, inet_bind, vnet_inet_bind);
+ override_op(inet_stream_ops.connect, inet_stream_connect, vnet_inet_stream_connect);
+ override_op(inet_stream_ops.listen, inet_listen, vnet_inet_listen);
+ override_op(inet_stream_ops.sendmsg, inet_sendmsg, vnet_inet_sendmsg);
+ override_op(inet_stream_ops.release, inet_release, vnet_inet_release);
+ override_op(inet_dgram_ops.bind, inet_bind, vnet_inet_bind);
+ override_op(inet_dgram_ops.connect, inet_dgram_connect, vnet_inet_dgram_connect);
+ override_op(inet_dgram_ops.sendmsg, inet_sendmsg, vnet_inet_sendmsg);
+ override_op(inet_dgram_ops.sendpage, inet_sendpage, vnet_inet_sendpage);
+ override_op(inet_dgram_ops.release, inet_release, vnet_inet_release);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
+ override_op(inet_sockraw_ops.bind, inet_bind, vnet_inet_bind);
+ override_op(inet_sockraw_ops.connect, inet_dgram_connect, vnet_inet_dgram_connect);
+ override_op(inet_sockraw_ops.sendmsg, inet_sendmsg, vnet_inet_sendmsg);
+ override_op(inet_sockraw_ops.sendpage, inet_sendpage, vnet_inet_sendpage);
+ override_op(inet_sockraw_ops.release, inet_release, vnet_inet_release);
+#endif
+ override_op(tcp_prot.hash, tcp_v4_hash, vnet_inet_hash);
+ override_op(tcp_prot.unhash, tcp_v4_unhash, vnet_inet_unhash);
+ override_op(udp_prot.hash, udp_v4_hash, vnet_inet_hash);
+ override_op(udp_prot.unhash, udp_v4_unhash, vnet_inet_unhash);
+
+ /* Register table */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
+ ret = ipt_register_table(&vnet_table, &initial_table.repl);
+#else
+ ret = ipt_register_table(&vnet_table);
+#endif
+ if (ret < 0)
+ goto cleanup_override;
+
+ /* Register hooks */
+ ret = nf_register_hook(&vnet_ops[0]);
+ if (ret < 0)
+ goto cleanup_table;
+
+ ret = nf_register_hook(&vnet_ops[1]);
+ if (ret < 0)
+ goto cleanup_hook0;
+
+ /* Enables any runtime kernel support for VNET */
+ vnet_active = 1;
+
+ /* Print banner */
+ printk("VNET: version " VNET_VERSION " compiled on " __DATE__ " at " __TIME__ "\n");
+
+ return ret;
+
+ cleanup_hook0:
+ nf_unregister_hook(&vnet_ops[0]);
+ cleanup_table:
+ ipt_unregister_table(&vnet_table);
+ cleanup_override:
+ inet_family_ops.create = inet_create;
+ inet_stream_ops.bind = inet_bind;
+ inet_stream_ops.connect = inet_stream_connect;
+ inet_stream_ops.listen = inet_listen;
+ inet_stream_ops.sendmsg = inet_sendmsg;
+ inet_stream_ops.release = inet_release;
+ inet_dgram_ops.bind = inet_bind;
+ inet_dgram_ops.connect = inet_dgram_connect;
+ inet_dgram_ops.sendmsg = inet_sendmsg;
+ inet_dgram_ops.sendpage = inet_sendpage;
+ inet_dgram_ops.release = inet_release;
+ tun_cleanup();
+ cleanup_packet:
+ packet_cleanup();
+ cleanup_proc:
+ proc_cleanup();
+ cleanup_bind:
+ bind_cleanup();
+
+ return ret;
+}
+
+static void __exit
+vnet_exit(void)
+{
+ unsigned int i;
+
+ /* Print banner */
+ printk("VNET: exiting\n");
+
+ /* Disables any runtime kernel support for VNET */
+ vnet_active = 0;
+
+ /* Stop handling packets first */
+ for (i = 0; i < sizeof(vnet_ops)/sizeof(struct nf_hook_ops); i++)
+ nf_unregister_hook(&vnet_ops[i]);
+
+ ipt_unregister_table(&vnet_table);
+
+ /* Stop handling PF_INET socket operations */
+ override_op(inet_family_ops.create, vnet_inet_create, inet_create);
+ override_op(inet_stream_ops.bind, vnet_inet_bind, inet_bind);
+ override_op(inet_stream_ops.connect, vnet_inet_stream_connect, inet_stream_connect);
+ override_op(inet_stream_ops.listen, vnet_inet_listen, inet_listen);
+ override_op(inet_stream_ops.sendmsg, vnet_inet_sendmsg, inet_sendmsg);
+ override_op(inet_stream_ops.release, vnet_inet_release, inet_release);
+ override_op(inet_dgram_ops.bind, vnet_inet_bind, inet_bind);
+ override_op(inet_dgram_ops.connect, vnet_inet_dgram_connect, inet_dgram_connect);
+ override_op(inet_dgram_ops.sendmsg, vnet_inet_sendmsg, inet_sendmsg);
+ override_op(inet_dgram_ops.sendpage, vnet_inet_sendpage, inet_sendpage);
+ override_op(inet_dgram_ops.release, vnet_inet_release, inet_release);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
+ override_op(inet_sockraw_ops.bind, vnet_inet_bind, inet_bind);
+ override_op(inet_sockraw_ops.connect, vnet_inet_dgram_connect, inet_dgram_connect);
+ override_op(inet_sockraw_ops.sendmsg, vnet_inet_sendmsg, inet_sendmsg);
+ override_op(inet_sockraw_ops.sendpage, vnet_inet_sendpage, inet_sendpage);
+ override_op(inet_sockraw_ops.release, vnet_inet_release, inet_release);
+#endif
+ override_op(tcp_prot.hash, vnet_inet_hash, tcp_v4_hash);
+ override_op(tcp_prot.unhash, vnet_inet_unhash, tcp_v4_unhash);
+ override_op(udp_prot.hash, vnet_inet_hash, udp_v4_hash);
+ override_op(udp_prot.unhash, vnet_inet_unhash, udp_v4_unhash);
+
+ /* Disable tap netdevice */
+ tun_cleanup();
+
+ /* Disable vnet netdevice and stop handling PF_PACKET sockets */
+ packet_cleanup();
+
+ /* Unregister /proc handlers */
+ proc_cleanup();
+
+ /* Cleanup bind table (must be after nf_unregister_hook()) */
+ bind_cleanup();
+}
+
+module_init(vnet_init);
+module_exit(vnet_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mark Huang <mlhuang@cs.princeton.edu>");
+MODULE_DESCRIPTION("VServer IP isolation");