2 * Copyright (C) 2010 Luigi Rizzo, Francesco Magno, Universita` di Pisa
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * kernel variables and functions that are not available in Windows.
30 #include <net/pfil.h> /* provides PFIL_IN and PFIL_OUT */
31 #include <arpa/inet.h>
32 #include <netinet/in.h> /* in_addr */
37 /* credentials check */
39 cred_check(void *_insn, int proto, struct ifnet *oif,
40 struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip,
41 u_int16_t src_port, struct bsd_ucred *u, int *ugid_lookupp,
48 * as good as anywhere, place here the missing calls
54 void *_ret = ExAllocatePoolWithTag(NonPagedPool, size, 'wfpi');
56 memset(_ret, 0, size);
61 panic(const char *fmt, ...)
74 for (i = 1; ; i++, bits >>= 1) {
82 do_gettimeofday(struct timeval *tv)
84 static LARGE_INTEGER prevtime; //system time in 100-nsec resolution
85 static LARGE_INTEGER prevcount; //RTC counter value
86 static LARGE_INTEGER freq; //frequency
88 LARGE_INTEGER currtime;
89 LARGE_INTEGER currcount;
90 if (prevtime.QuadPart == 0) { //first time we ask for system time
91 KeQuerySystemTime(&prevtime);
92 prevcount = KeQueryPerformanceCounter(&freq);
93 currtime.QuadPart = prevtime.QuadPart;
95 KeQuerySystemTime(&currtime);
96 currcount = KeQueryPerformanceCounter(&freq);
97 if (currtime.QuadPart == prevtime.QuadPart) {
98 //time has NOT changed, calculate time using ticks and DO NOT update
99 LONGLONG difftime = 0; //difference in 100-nsec
100 LONGLONG diffcount = 0; //clock count difference
101 //printf("time has NOT changed\n");
102 diffcount = currcount.QuadPart - prevcount.QuadPart;
103 diffcount *= 10000000;
104 difftime = diffcount / freq.QuadPart;
105 currtime.QuadPart += difftime;
107 //time has changed, update and return SystemTime
108 //printf("time has changed\n");
109 prevtime.QuadPart = currtime.QuadPart;
110 prevcount.QuadPart = currcount.QuadPart;
113 currtime.QuadPart /= 10; //convert in usec
114 tv->tv_sec = currtime.QuadPart / (LONGLONG)1000000;
115 tv->tv_usec = currtime.QuadPart % (LONGLONG)1000000;
116 //printf("sec %d usec %d\n",tv->tv_sec, tv->tv_usec);
119 int time_uptime_w32()
123 KeQuerySystemTime(&tm);
124 ret = (int)(tm.QuadPart / (LONGLONG)1000000);
130 * Windows version of firewall hook. We receive a partial copy of
131 * the packet which points to the original buffers. In output,
132 * the refcount has been already incremented.
133 * The function reconstructs
134 * the whole packet in a contiguous memory area, builds a fake mbuf,
135 * calls the firewall, does the eventual cleaning and returns
136 * to MiniportSend or ProtocolReceive, which will silently return
137 * (dropping packet) or continue its execution (allowing packet).
138 * The memory area contains:
139 * - the fake mbuf, filled with data needed by ipfw, and information
143 void hexdump(PUCHAR,int, const char *);
144 static char _if_in[] = "incoming";
145 static char _if_out[] = "outgoing";
148 ipfw2_qhandler_w32(PNDIS_PACKET pNdisPacket, int direction,
151 unsigned int BufferCount = 0;
152 unsigned TotalPacketLength = 0;
153 PNDIS_BUFFER pCurrentBuffer = NULL;
154 PNDIS_BUFFER pNextBuffer = NULL;
156 unsigned char* payload = NULL;
158 unsigned short EtherType = 0;
161 PNDIS_BUFFER pNdisBuffer, old_head, old_tail;
162 NDIS_HANDLE PacketPool;
166 /* In NDIS, packets are a chain of NDIS_BUFFER. We query
167 * the packet to get a pointer of chain's head, the length
168 * of the chain, and the length of the packet itself.
169 * Then allocate a buffer for the mbuf and the payload.
171 NdisQueryPacket(pNdisPacket, NULL, &BufferCount,
172 &pCurrentBuffer, &TotalPacketLength);
173 m = malloc(sizeof(struct mbuf) + TotalPacketLength, 0, 0 );
174 if (m == NULL) //resource shortage, drop the packet
177 /* set mbuf fields to point past the MAC header.
178 * Also set additional W32 info
180 payload = (unsigned char*)(m + 1);
181 m->m_len = m->m_pkthdr.len = TotalPacketLength-14;
182 m->m_pkthdr.rcvif = (void *)((direction==INCOMING) ? _if_in : NULL);
183 m->m_data = payload + 14; /* past the MAC header */
184 m->direction = direction;
185 m->context = Context;
186 m->pkt = pNdisPacket;
189 * Now copy the data from the Windows buffers to the mbuf.
191 for (i=0, ofs = 0; i < BufferCount; i++) {
193 NdisQueryBufferSafe(pCurrentBuffer, &src, &l,
195 bcopy(src, payload + ofs, l);
197 NdisGetNextBuffer(pCurrentBuffer, &pNextBuffer);
198 pCurrentBuffer = pNextBuffer;
201 * Identify EtherType. If the packet is not IP, simply allow
202 * and don't bother the firewall. XXX should be done before.
204 EtherType = *(unsigned short*)(payload + 12);
205 EtherType = RtlUshortByteSwap(EtherType);
206 if (EtherType != 0x0800) {
207 //DbgPrint("ethertype = %X, skipping ipfw\n",EtherType);
213 * Now build a buffer descriptor to replace the original chain.
216 PacketPool = direction == OUTGOING ?
217 pAdapt->SendPacketPoolHandle : pAdapt->RecvPacketPoolHandle;
218 NdisAllocateBuffer(&Status, &pNdisBuffer,
219 PacketPool, payload, m->m_pkthdr.len+14);
220 if (Status != NDIS_STATUS_SUCCESS)
223 * Save the old buffer pointers, and put the new one
226 pNdisBuffer->Next = NULL;
227 old_head = NDIS_PACKET_FIRST_NDIS_BUFFER(pNdisPacket);
228 old_tail = NDIS_PACKET_LAST_NDIS_BUFFER(pNdisPacket);
229 NdisReinitializePacket(pNdisPacket);
230 NdisChainBufferAtFront(pNdisPacket, pNdisBuffer);
232 if (direction == INCOMING) {
233 DBGPRINT(("incoming: proto %u (%s), src %08X, dst %08X, sport %u, dport %u, len %u\n", *(payload+14+9), texify_proto(*(payload+14+9)), *(unsigned int*)(payload+14+12), *(unsigned int*)(payload+14+16), ntohs((*((unsigned short int*)(payload+14+20)))), ntohs((*((unsigned short int*)(payload+14+22)))), TotalPacketLength));
235 DBGPRINT(("outgoing: proto %u (%s), src %08X, dst %08X, sport %u, dport %u, len %u\n", *(payload+14+9), texify_proto(*(payload+14+9)), *(unsigned int*)(payload+14+12), *(unsigned int*)(payload+14+16), ntohs((*((unsigned short int*)(payload+14+20)))), ntohs((*((unsigned short int*)(payload+14+22)))), TotalPacketLength));
238 if (direction == INCOMING)
239 ret = ipfw_check_hook(NULL, &m, NULL, PFIL_IN, NULL);
241 ret = ipfw_check_hook(NULL, &m, (struct ifnet*)_if_out, PFIL_OUT, NULL);
244 /* Accept. Restore the old buffer chain, free
245 * the mbuf and return PASS.
247 //DBGPRINT(("accepted\n"));
248 NdisReinitializePacket(pNdisPacket);
249 NDIS_PACKET_FIRST_NDIS_BUFFER(pNdisPacket) = old_head;
250 NDIS_PACKET_LAST_NDIS_BUFFER(pNdisPacket) = old_tail;
251 NdisFreeBuffer(pNdisBuffer);
254 } else if (ret == 0) {
255 /* dummynet has kept the packet, will reinject later. */
256 //DBGPRINT(("kept by dummynet\n"));
260 * Packet dropped by ipfw or dummynet. Nothing to do as
261 * FREE_PKT already freed the fake mbuf
263 //DBGPRINT(("dropped by dummynet, ret = %i\n", ret));
267 /* for some reason we cannot proceed. Free any resources
268 * including those received from above, and return
269 * faking success. XXX this must be fixed later.
271 NdisFreePacket(pNdisPacket);
276 * Windows reinjection function.
277 * The packet is already available as m->pkt, so we only
278 * need to send it to the right place.
279 * Normally a ndis intermediate driver allocates
280 * a fresh descriptor, while the actual data's ownership is
281 * retained by the protocol, or the miniport below.
282 * Since an intermediate driver behaves as a miniport driver
283 * at the upper edge (towards the protocol), and as a protocol
284 * driver at the lower edge (towards the NIC), when we handle a
285 * packet we have a reserved area in both directions (we can use
286 * only one for each direction at our own discretion).
287 * Normally this area is used to save a pointer to the original
288 * packet, so when the driver is done with it, the original descriptor
289 * can be retrieved, and the resources freed (packet descriptor,
290 * buffer descriptor(s) and the actual data). In our driver this
291 * area is used to mark the reinjected packets as 'orphan', because
292 * the original descriptor is gone long ago. This way we can handle
293 * correctly the resource freeing when the callback function
298 netisr_dispatch(int num, struct mbuf *m)
300 unsigned char* payload = (unsigned char*)(m+1);
301 PADAPT pAdapt = m->context;
303 PNDIS_PACKET pPacket = m->pkt;
304 PNDIS_BUFFER pNdisBuffer;
305 NDIS_HANDLE PacketPool;
312 DbgPrint("reinject %s\n", m->direction == OUTGOING ?
313 "outgoing" : "incoming");
315 NdisAcquireSpinLock(&pAdapt->Lock);
316 if (m->direction == OUTGOING) {
317 //we must first check if the adapter is going down,
318 // in this case abort the reinjection
319 if (pAdapt->PTDeviceState > NdisDeviceStateD0) {
320 pAdapt->OutstandingSends--;
321 // XXX should we notify up ?
322 NdisReleaseSpinLock(&pAdapt->Lock);
326 /* if the upper miniport edge is not initialized or
327 * the miniport edge is in low power state, abort
328 * XXX we should notify the error.
330 if (!pAdapt->MiniportHandle ||
331 pAdapt->MPDeviceState > NdisDeviceStateD0) {
332 NdisReleaseSpinLock(&pAdapt->Lock);
336 NdisReleaseSpinLock(&pAdapt->Lock);
338 if (m->direction == OUTGOING) {
340 /* use the 8-bytes protocol reserved area, the first
341 * field is used to mark/the packet as 'orphan', the
342 * second stores the pointer to the mbuf, so in the
343 * the SendComplete handler we know that this is a
344 * reinjected packet and can free correctly.
346 SendRsvd = (PSEND_RSVD)(pPacket->ProtocolReserved);
347 SendRsvd->OriginalPkt = NULL;
350 NdisSend(&Status, pAdapt->BindingHandle, pPacket);
351 if (Status != NDIS_STATUS_PENDING) {
352 /* done, call the callback now */
353 PtSendComplete(m->context, m->pkt, Status);
355 return; /* unconditional return here. */
357 /* There's no need to check the 8-bytes miniport
358 * reserved area since the path going up will be always
359 * syncronous, and all the cleanup will be done inline.
360 * If the reinjected packed comes from a PtReceivePacket,
361 * there will be no callback.
362 * Otherwise PtReceiveComplete will be called but will just
363 * return since all the cleaning is alreqady done */
364 // do the actual receive.
365 ULONG Proc = KeGetCurrentProcessorNumber();
366 pAdapt->ReceivedIndicationFlags[Proc] = TRUE;
367 NdisMEthIndicateReceive(pAdapt->MiniportHandle, NULL, payload, 14, payload+14, m->m_len, m->m_len);
368 NdisMEthIndicateReceiveComplete(pAdapt->MiniportHandle);
369 pAdapt->ReceivedIndicationFlags[Proc] = FALSE;
372 /* NDIS_PACKET exists and must be freed only if
373 * the packet come from a PtReceivePacket, oherwise
374 * m->pkt will ne null.
378 NdisUnchainBufferAtFront(m->pkt, &pNdisBuffer);
379 NdisFreeBuffer(pNdisBuffer);
380 NdisFreePacket(m->pkt);
385 void win_freem(void *); /* wrapper for m_freem() for protocol.c */
394 * not implemented in linux.
395 * taken from /usr/src/lib/libc/string/strlcpy.c
398 strlcpy(char *dst, const char *src, size_t siz)
404 /* Copy as many bytes as will fit */
405 if (n != 0 && --n != 0) {
407 if ((*d++ = *s++) == 0)
412 /* Not enough room in dst, add NUL and traverse rest of src */
415 *d = '\0'; /* NUL-terminate dst */
420 return(s - src - 1); /* count does not include NUL */
423 void CleanupReinjected(PNDIS_PACKET Packet, struct mbuf* m, PADAPT pAdapt)
425 PNDIS_BUFFER pNdisBuffer;
427 NdisQueryPacket(Packet, NULL, NULL, &pNdisBuffer, NULL);
428 NdisUnchainBufferAtFront(Packet, &pNdisBuffer);
429 NdisFreeBuffer(pNdisBuffer);
431 NdisFreePacket(Packet);
432 ADAPT_DECR_PENDING_SENDS(pAdapt);
436 ipfw2_qhandler_w32_oldstyle(int direction,
437 NDIS_HANDLE ProtocolBindingContext,
438 unsigned char* HeaderBuffer,
439 unsigned int HeaderBufferSize,
440 unsigned char* LookAheadBuffer,
441 unsigned int LookAheadBufferSize,
442 unsigned int PacketSize)
445 unsigned char* payload = NULL;
446 unsigned short EtherType = 0;
449 /* We are in a special case when NIC signals an incoming
450 * packet using old style calls. This is done passing
451 * a pointer to the MAC header and a pointer to the
452 * rest of the packet.
453 * We simply allocate space for the mbuf and the
454 * subsequent payload section.
456 m = malloc(sizeof(struct mbuf) + HeaderBufferSize + LookAheadBufferSize, 0, 0 );
457 if (m == NULL) //resource shortage, drop the packet
460 /* set mbuf fields to point past the MAC header.
461 * Also set additional W32 info.
462 * m->pkt here is set to null because the notification
463 * from the NIC has come with a header+loolahead buffer,
464 * no NDIS_PACKET has been provided.
466 payload = (unsigned char*)(m + 1);
467 m->m_len = m->m_pkthdr.len = HeaderBufferSize+LookAheadBufferSize-14;
468 m->m_data = payload + 14; /* past the MAC header */
469 m->direction = direction;
470 m->context = ProtocolBindingContext;
474 * Now copy the data from the Windows buffers to the mbuf.
476 bcopy(HeaderBuffer, payload, HeaderBufferSize);
477 bcopy(LookAheadBuffer, payload+HeaderBufferSize, LookAheadBufferSize);
478 //hexdump(payload,HeaderBufferSize+LookAheadBufferSize,"qhandler");
480 * Identify EtherType. If the packet is not IP, simply allow
481 * and don't bother the firewall. XXX should be done before.
483 EtherType = *(unsigned short*)(payload + 12);
484 EtherType = RtlUshortByteSwap(EtherType);
485 if (EtherType != 0x0800) {
486 //DbgPrint("ethertype = %X, skipping ipfw\n",EtherType);
491 //DbgPrint("incoming_raw: proto %u (%s), src %08X, dst %08X, sport %u, dport %u, len %u\n", *(payload+14+9), texify_proto(*(payload+14+9)), *(unsigned int*)(payload+14+12), *(unsigned int*)(payload+14+16), ntohs((*((unsigned short int*)(payload+14+20)))), ntohs((*((unsigned short int*)(payload+14+22)))), HeaderBufferSize+LookAheadBufferSize);
493 /* Query the firewall */
494 ret = ipfw_check_hook(NULL, &m, NULL, PFIL_IN, NULL);
497 /* Accept. Free the mbuf and return PASS. */
498 //DbgPrint("accepted\n");
501 } else if (ret == 0) {
502 /* dummynet has kept the packet, will reinject later. */
503 //DbgPrint("kept by dummynet\n");
507 * Packet dropped by ipfw or dummynet. Nothing to do as
508 * FREE_PKT already freed the fake mbuf
510 //DbgPrint("dropped by dummynet, ret = %i\n", ret);
515 /* forward declaration because those functions are used only here,
516 * no point to make them visible in passthru/protocol/miniport */
517 int do_ipfw_set_ctl(struct sock __unused *sk, int cmd,
518 void __user *user, unsigned int len);
519 int do_ipfw_get_ctl(struct sock __unused *sk, int cmd,
520 void __user *user, int *len);
524 IN PDEVICE_OBJECT pDeviceObject,
531 This is the dispatch routine for handling device ioctl requests.
535 pDeviceObject - Pointer to the device object.
537 pIrp - Pointer to the request packet.
545 PIO_STACK_LOCATION pIrpSp;
546 NTSTATUS NtStatus = STATUS_SUCCESS;
547 unsigned long BytesReturned = 0;
548 unsigned long FunctionCode;
550 struct sockopt *sopt;
553 UNREFERENCED_PARAMETER(pDeviceObject);
555 pIrpSp = IoGetCurrentIrpStackLocation(pIrp);
558 * Using METHOD_BUFFERED as communication method, the userland
559 * side calls DeviceIoControl passing an input buffer and an output
560 * and their respective length (ipfw uses the same length for both).
561 * The system creates a single I/O buffer, with len=max(inlen,outlen).
562 * In the kernel we can read information from this buffer (which is
563 * directly accessible), overwrite it with our results, and set
564 * IoStatus.Information with the number of bytes that the system must
565 * copy back to userland.
566 * In our sockopt emulation, the initial part of the buffer contains
567 * a struct sockopt, followed by the data area.
570 len = pIrpSp->Parameters.DeviceIoControl.InputBufferLength;
571 if (len < sizeof(struct sockopt))
573 return STATUS_NOT_SUPPORTED; // XXX find better value
575 sopt = pIrp->AssociatedIrp.SystemBuffer;
577 FunctionCode = pIrpSp->Parameters.DeviceIoControl.IoControlCode;
579 len = sopt->sopt_valsize;
581 switch (FunctionCode)
583 case IP_FW_SETSOCKOPT:
584 ret = do_ipfw_set_ctl(NULL, sopt->sopt_name, sopt+1, len);
587 case IP_FW_GETSOCKOPT:
588 ret = do_ipfw_get_ctl(NULL, sopt->sopt_name, sopt+1, &len);
589 sopt->sopt_valsize = len;
590 //sanity check on len
591 if (len + sizeof(struct sockopt) <= pIrpSp->Parameters.DeviceIoControl.InputBufferLength)
592 BytesReturned = len + sizeof(struct sockopt);
594 BytesReturned = pIrpSp->Parameters.DeviceIoControl.InputBufferLength;
598 NtStatus = STATUS_NOT_SUPPORTED;
602 pIrp->IoStatus.Information = BytesReturned;
603 pIrp->IoStatus.Status = NtStatus;
604 IoCompleteRequest(pIrp, IO_NO_INCREMENT);
609 void dummynet(void * __unused unused);
610 void ipfw_tick(void * vnetx);
613 __in struct _KDPC *Dpc,
614 __in_opt PVOID DeferredContext,
615 __in_opt PVOID SystemArgument1,
616 __in_opt PVOID SystemArgument2
623 __in struct _KDPC *Dpc,
624 __in_opt PVOID DeferredContext,
625 __in_opt PVOID SystemArgument1,
626 __in_opt PVOID SystemArgument2
629 ipfw_tick(DeferredContext);