2 * Copyright (C) 2010 Luigi Rizzo, Francesco Magno, Universita` di Pisa
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * kernel variables and functions that are not available in Windows.
30 #include <net/pfil.h> /* provides PFIL_IN and PFIL_OUT */
31 #include <arpa/inet.h>
32 #include <netinet/in.h> /* in_addr */
37 /* credentials check */
39 cred_check(void *_insn, int proto, struct ifnet *oif,
40 struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip,
41 u_int16_t src_port, struct bsd_ucred *u, int *ugid_lookupp,
48 * as good as anywhere, place here the missing calls
54 void *_ret = ExAllocatePoolWithTag(NonPagedPool, size, 'wfpi');
56 memset(_ret, 0, size);
61 panic(const char *fmt, ...)
74 for (i = 1; ; i++, bits >>= 1) {
82 do_gettimeofday(struct timeval *tv)
84 static LARGE_INTEGER prevtime; //system time in 100-nsec resolution
85 static LARGE_INTEGER prevcount; //RTC counter value
86 static LARGE_INTEGER freq; //frequency
88 LARGE_INTEGER currtime;
89 LARGE_INTEGER currcount;
90 if (prevtime.QuadPart == 0) { //first time we ask for system time
91 KeQuerySystemTime(&prevtime);
92 prevcount = KeQueryPerformanceCounter(&freq);
93 currtime.QuadPart = prevtime.QuadPart;
95 KeQuerySystemTime(&currtime);
96 currcount = KeQueryPerformanceCounter(&freq);
97 if (currtime.QuadPart == prevtime.QuadPart) {
98 //time has NOT changed, calculate time using ticks and DO NOT update
99 LONGLONG difftime = 0; //difference in 100-nsec
100 LONGLONG diffcount = 0; //clock count difference
101 //printf("time has NOT changed\n");
102 diffcount = currcount.QuadPart - prevcount.QuadPart;
103 diffcount *= 10000000;
104 difftime = diffcount / freq.QuadPart;
105 currtime.QuadPart += difftime;
107 //time has changed, update and return SystemTime
108 //printf("time has changed\n");
109 prevtime.QuadPart = currtime.QuadPart;
110 prevcount.QuadPart = currcount.QuadPart;
113 currtime.QuadPart /= 10; //convert in usec
114 tv->tv_sec = currtime.QuadPart / (LONGLONG)1000000;
115 tv->tv_usec = currtime.QuadPart % (LONGLONG)1000000;
116 //printf("sec %d usec %d\n",tv->tv_sec, tv->tv_usec);
119 int time_uptime_w32()
123 KeQuerySystemTime(&tm);
124 ret = (int)(tm.QuadPart / (LONGLONG)1000000);
130 * Windows version of firewall hook. We receive a partial copy of
131 * the packet which points to the original buffers. In output,
132 * the refcount has been already incremented.
133 * The function reconstructs
134 * the whole packet in a contiguous memory area, builds a fake mbuf,
135 * calls the firewall, does the eventual cleaning and returns
136 * to MiniportSend or ProtocolReceive, which will silently return
137 * (dropping packet) or continue its execution (allowing packet).
138 * The memory area contains:
139 * - the fake mbuf, filled with data needed by ipfw, and information
143 void hexdump(PUCHAR,int, const char *);
144 static char _if_in[] = "incoming";
145 static char _if_out[] = "outgoing";
148 ipfw2_qhandler_w32(PNDIS_PACKET pNdisPacket, int direction,
151 unsigned int BufferCount = 0;
152 unsigned TotalPacketLength = 0;
153 PNDIS_BUFFER pCurrentBuffer = NULL;
154 PNDIS_BUFFER pNextBuffer = NULL;
156 unsigned char* payload = NULL;
158 unsigned short EtherType = 0;
161 PNDIS_BUFFER pNdisBuffer, old_head, old_tail;
162 NDIS_HANDLE PacketPool;
166 /* In NDIS, packets are a chain of NDIS_BUFFER. We query
167 * the packet to get a pointer of chain's head, the length
168 * of the chain, and the length of the packet itself.
169 * Then allocate a buffer for the mbuf and the payload.
171 NdisQueryPacket(pNdisPacket, NULL, &BufferCount,
172 &pCurrentBuffer, &TotalPacketLength);
173 m = malloc(sizeof(struct mbuf) + TotalPacketLength, 0, 0 );
174 if (m == NULL) //resource shortage, drop the packet
177 /* set mbuf fields to point past the MAC header.
178 * Also set additional W32 info
180 payload = (unsigned char*)(m + 1);
181 m->m_len = m->m_pkthdr.len = TotalPacketLength-14;
182 m->m_pkthdr.rcvif = (void *)((direction==INCOMING) ? _if_in : NULL);
183 m->m_data = payload + 14; /* past the MAC header */
184 m->direction = direction;
185 m->context = Context;
186 m->pkt = pNdisPacket;
188 /* m_skb != NULL is used in the ip_output routine to check
189 * for packets that come from the stack and differentiate
190 * from those internally generated by ipfw.
191 * The pointer is not used, just needs to be non-null.
193 m->m_skb = (void *)pNdisPacket;
195 * Now copy the data from the Windows buffers to the mbuf.
197 for (i=0, ofs = 0; i < BufferCount; i++) {
199 NdisQueryBufferSafe(pCurrentBuffer, &src, &l,
201 bcopy(src, payload + ofs, l);
203 NdisGetNextBuffer(pCurrentBuffer, &pNextBuffer);
204 pCurrentBuffer = pNextBuffer;
207 * Identify EtherType. If the packet is not IP, simply allow
208 * and don't bother the firewall. XXX should be done before.
210 EtherType = *(unsigned short*)(payload + 12);
211 EtherType = RtlUshortByteSwap(EtherType);
212 if (EtherType != 0x0800) {
213 //DbgPrint("ethertype = %X, skipping ipfw\n",EtherType);
219 * Now build a buffer descriptor to replace the original chain.
222 PacketPool = direction == OUTGOING ?
223 pAdapt->SendPacketPoolHandle : pAdapt->RecvPacketPoolHandle;
224 NdisAllocateBuffer(&Status, &pNdisBuffer,
225 PacketPool, payload, m->m_pkthdr.len+14);
226 if (Status != NDIS_STATUS_SUCCESS)
229 * Save the old buffer pointers, and put the new one
232 pNdisBuffer->Next = NULL;
233 old_head = NDIS_PACKET_FIRST_NDIS_BUFFER(pNdisPacket);
234 old_tail = NDIS_PACKET_LAST_NDIS_BUFFER(pNdisPacket);
235 NdisReinitializePacket(pNdisPacket);
236 NdisChainBufferAtFront(pNdisPacket, pNdisBuffer);
238 if (direction == INCOMING) {
239 DBGPRINT(("incoming: proto %u (%s), src %08X, dst %08X, sport %u, dport %u, len %u\n", *(payload+14+9), texify_proto(*(payload+14+9)), *(unsigned int*)(payload+14+12), *(unsigned int*)(payload+14+16), ntohs((*((unsigned short int*)(payload+14+20)))), ntohs((*((unsigned short int*)(payload+14+22)))), TotalPacketLength));
241 DBGPRINT(("outgoing: proto %u (%s), src %08X, dst %08X, sport %u, dport %u, len %u\n", *(payload+14+9), texify_proto(*(payload+14+9)), *(unsigned int*)(payload+14+12), *(unsigned int*)(payload+14+16), ntohs((*((unsigned short int*)(payload+14+20)))), ntohs((*((unsigned short int*)(payload+14+22)))), TotalPacketLength));
244 if (direction == INCOMING)
245 ret = ipfw_check_hook(NULL, &m, NULL, PFIL_IN, NULL);
247 ret = ipfw_check_hook(NULL, &m, (struct ifnet*)_if_out, PFIL_OUT, NULL);
250 /* Accept. Restore the old buffer chain, free
251 * the mbuf and return PASS.
253 //DBGPRINT(("accepted\n"));
254 NdisReinitializePacket(pNdisPacket);
255 NDIS_PACKET_FIRST_NDIS_BUFFER(pNdisPacket) = old_head;
256 NDIS_PACKET_LAST_NDIS_BUFFER(pNdisPacket) = old_tail;
257 NdisFreeBuffer(pNdisBuffer);
260 } else if (ret == 0) {
261 /* dummynet has kept the packet, will reinject later. */
262 //DBGPRINT(("kept by dummynet\n"));
266 * Packet dropped by ipfw or dummynet. Nothing to do as
267 * FREE_PKT already freed the fake mbuf
269 //DBGPRINT(("dropped by dummynet, ret = %i\n", ret));
273 /* for some reason we cannot proceed. Free any resources
274 * including those received from above, and return
275 * faking success. XXX this must be fixed later.
277 NdisFreePacket(pNdisPacket);
282 * Windows reinjection function.
283 * The packet is already available as m->pkt, so we only
284 * need to send it to the right place.
285 * Normally a ndis intermediate driver allocates
286 * a fresh descriptor, while the actual data's ownership is
287 * retained by the protocol, or the miniport below.
288 * Since an intermediate driver behaves as a miniport driver
289 * at the upper edge (towards the protocol), and as a protocol
290 * driver at the lower edge (towards the NIC), when we handle a
291 * packet we have a reserved area in both directions (we can use
292 * only one for each direction at our own discretion).
293 * Normally this area is used to save a pointer to the original
294 * packet, so when the driver is done with it, the original descriptor
295 * can be retrieved, and the resources freed (packet descriptor,
296 * buffer descriptor(s) and the actual data). In our driver this
297 * area is used to mark the reinjected packets as 'orphan', because
298 * the original descriptor is gone long ago. This way we can handle
299 * correctly the resource freeing when the callback function
304 netisr_dispatch(int num, struct mbuf *m)
306 unsigned char* payload = (unsigned char*)(m+1);
307 PADAPT pAdapt = m->context;
309 PNDIS_PACKET pPacket = m->pkt;
310 PNDIS_BUFFER pNdisBuffer;
311 NDIS_HANDLE PacketPool;
318 DbgPrint("reinject %s\n", m->direction == OUTGOING ?
319 "outgoing" : "incoming");
321 NdisAcquireSpinLock(&pAdapt->Lock);
322 if (m->direction == OUTGOING) {
323 //we must first check if the adapter is going down,
324 // in this case abort the reinjection
325 if (pAdapt->PTDeviceState > NdisDeviceStateD0) {
326 pAdapt->OutstandingSends--;
327 // XXX should we notify up ?
328 NdisReleaseSpinLock(&pAdapt->Lock);
332 /* if the upper miniport edge is not initialized or
333 * the miniport edge is in low power state, abort
334 * XXX we should notify the error.
336 if (!pAdapt->MiniportHandle ||
337 pAdapt->MPDeviceState > NdisDeviceStateD0) {
338 NdisReleaseSpinLock(&pAdapt->Lock);
342 NdisReleaseSpinLock(&pAdapt->Lock);
344 if (m->direction == OUTGOING) {
346 /* use the 8-bytes protocol reserved area, the first
347 * field is used to mark/the packet as 'orphan', the
348 * second stores the pointer to the mbuf, so in the
349 * the SendComplete handler we know that this is a
350 * reinjected packet and can free correctly.
352 SendRsvd = (PSEND_RSVD)(pPacket->ProtocolReserved);
353 SendRsvd->OriginalPkt = NULL;
356 NdisSend(&Status, pAdapt->BindingHandle, pPacket);
357 if (Status != NDIS_STATUS_PENDING) {
358 /* done, call the callback now */
359 PtSendComplete(m->context, m->pkt, Status);
361 return; /* unconditional return here. */
363 /* There's no need to check the 8-bytes miniport
364 * reserved area since the path going up will be always
365 * syncronous, and all the cleanup will be done inline.
366 * If the reinjected packed comes from a PtReceivePacket,
367 * there will be no callback.
368 * Otherwise PtReceiveComplete will be called but will just
369 * return since all the cleaning is alreqady done */
370 // do the actual receive.
371 ULONG Proc = KeGetCurrentProcessorNumber();
372 pAdapt->ReceivedIndicationFlags[Proc] = TRUE;
373 NdisMEthIndicateReceive(pAdapt->MiniportHandle, NULL, payload, 14, payload+14, m->m_len, m->m_len);
374 NdisMEthIndicateReceiveComplete(pAdapt->MiniportHandle);
375 pAdapt->ReceivedIndicationFlags[Proc] = FALSE;
378 /* NDIS_PACKET exists and must be freed only if
379 * the packet come from a PtReceivePacket, oherwise
380 * m->pkt will ne null.
384 NdisUnchainBufferAtFront(m->pkt, &pNdisBuffer);
385 NdisFreeBuffer(pNdisBuffer);
386 NdisFreePacket(m->pkt);
391 void win_freem(void *); /* wrapper for m_freem() for protocol.c */
400 * not implemented in linux.
401 * taken from /usr/src/lib/libc/string/strlcpy.c
404 strlcpy(char *dst, const char *src, size_t siz)
410 /* Copy as many bytes as will fit */
411 if (n != 0 && --n != 0) {
413 if ((*d++ = *s++) == 0)
418 /* Not enough room in dst, add NUL and traverse rest of src */
421 *d = '\0'; /* NUL-terminate dst */
426 return(s - src - 1); /* count does not include NUL */
429 void CleanupReinjected(PNDIS_PACKET Packet, struct mbuf* m, PADAPT pAdapt)
431 PNDIS_BUFFER pNdisBuffer;
433 NdisQueryPacket(Packet, NULL, NULL, &pNdisBuffer, NULL);
434 NdisUnchainBufferAtFront(Packet, &pNdisBuffer);
435 NdisFreeBuffer(pNdisBuffer);
437 NdisFreePacket(Packet);
438 ADAPT_DECR_PENDING_SENDS(pAdapt);
442 ipfw2_qhandler_w32_oldstyle(int direction,
443 NDIS_HANDLE ProtocolBindingContext,
444 unsigned char* HeaderBuffer,
445 unsigned int HeaderBufferSize,
446 unsigned char* LookAheadBuffer,
447 unsigned int LookAheadBufferSize,
448 unsigned int PacketSize)
451 unsigned char* payload = NULL;
452 unsigned short EtherType = 0;
455 /* We are in a special case when NIC signals an incoming
456 * packet using old style calls. This is done passing
457 * a pointer to the MAC header and a pointer to the
458 * rest of the packet.
459 * We simply allocate space for the mbuf and the
460 * subsequent payload section.
462 m = malloc(sizeof(struct mbuf) + HeaderBufferSize + LookAheadBufferSize, 0, 0 );
463 if (m == NULL) //resource shortage, drop the packet
466 /* set mbuf fields to point past the MAC header.
467 * Also set additional W32 info.
468 * m->pkt here is set to null because the notification
469 * from the NIC has come with a header+loolahead buffer,
470 * no NDIS_PACKET has been provided.
472 payload = (unsigned char*)(m + 1);
473 m->m_len = m->m_pkthdr.len = HeaderBufferSize+LookAheadBufferSize-14;
474 m->m_data = payload + 14; /* past the MAC header */
475 m->direction = direction;
476 m->context = ProtocolBindingContext;
480 * Now copy the data from the Windows buffers to the mbuf.
482 bcopy(HeaderBuffer, payload, HeaderBufferSize);
483 bcopy(LookAheadBuffer, payload+HeaderBufferSize, LookAheadBufferSize);
484 //hexdump(payload,HeaderBufferSize+LookAheadBufferSize,"qhandler");
486 * Identify EtherType. If the packet is not IP, simply allow
487 * and don't bother the firewall. XXX should be done before.
489 EtherType = *(unsigned short*)(payload + 12);
490 EtherType = RtlUshortByteSwap(EtherType);
491 if (EtherType != 0x0800) {
492 //DbgPrint("ethertype = %X, skipping ipfw\n",EtherType);
497 //DbgPrint("incoming_raw: proto %u (%s), src %08X, dst %08X, sport %u, dport %u, len %u\n", *(payload+14+9), texify_proto(*(payload+14+9)), *(unsigned int*)(payload+14+12), *(unsigned int*)(payload+14+16), ntohs((*((unsigned short int*)(payload+14+20)))), ntohs((*((unsigned short int*)(payload+14+22)))), HeaderBufferSize+LookAheadBufferSize);
499 /* Query the firewall */
500 ret = ipfw_check_hook(NULL, &m, NULL, PFIL_IN, NULL);
503 /* Accept. Free the mbuf and return PASS. */
504 //DbgPrint("accepted\n");
507 } else if (ret == 0) {
508 /* dummynet has kept the packet, will reinject later. */
509 //DbgPrint("kept by dummynet\n");
513 * Packet dropped by ipfw or dummynet. Nothing to do as
514 * FREE_PKT already freed the fake mbuf
516 //DbgPrint("dropped by dummynet, ret = %i\n", ret);
521 /* forward declaration because those functions are used only here,
522 * no point to make them visible in passthru/protocol/miniport */
523 int do_ipfw_set_ctl(struct sock *sk, int cmd,
524 void __user *user, unsigned int len);
525 int do_ipfw_get_ctl(struct sock *sk, int cmd,
526 void __user *user, int *len);
530 IN PDEVICE_OBJECT pDeviceObject,
537 This is the dispatch routine for handling device ioctl requests.
541 pDeviceObject - Pointer to the device object.
543 pIrp - Pointer to the request packet.
551 PIO_STACK_LOCATION pIrpSp;
552 NTSTATUS NtStatus = STATUS_SUCCESS;
553 unsigned long BytesReturned = 0;
554 unsigned long FunctionCode;
556 struct sockopt *sopt;
559 UNREFERENCED_PARAMETER(pDeviceObject);
561 pIrpSp = IoGetCurrentIrpStackLocation(pIrp);
564 * Using METHOD_BUFFERED as communication method, the userland
565 * side calls DeviceIoControl passing an input buffer and an output
566 * and their respective length (ipfw uses the same length for both).
567 * The system creates a single I/O buffer, with len=max(inlen,outlen).
568 * In the kernel we can read information from this buffer (which is
569 * directly accessible), overwrite it with our results, and set
570 * IoStatus.Information with the number of bytes that the system must
571 * copy back to userland.
572 * In our sockopt emulation, the initial part of the buffer contains
573 * a struct sockopt, followed by the data area.
576 len = pIrpSp->Parameters.DeviceIoControl.InputBufferLength;
577 if (len < sizeof(struct sockopt))
579 return STATUS_NOT_SUPPORTED; // XXX find better value
581 sopt = pIrp->AssociatedIrp.SystemBuffer;
583 FunctionCode = pIrpSp->Parameters.DeviceIoControl.IoControlCode;
585 len = sopt->sopt_valsize;
587 switch (FunctionCode)
589 case IP_FW_SETSOCKOPT:
590 ret = do_ipfw_set_ctl(NULL, sopt->sopt_name, sopt+1, len);
593 case IP_FW_GETSOCKOPT:
594 ret = do_ipfw_get_ctl(NULL, sopt->sopt_name, sopt+1, &len);
595 sopt->sopt_valsize = len;
596 //sanity check on len
597 if (len + sizeof(struct sockopt) <= pIrpSp->Parameters.DeviceIoControl.InputBufferLength)
598 BytesReturned = len + sizeof(struct sockopt);
600 BytesReturned = pIrpSp->Parameters.DeviceIoControl.InputBufferLength;
604 NtStatus = STATUS_NOT_SUPPORTED;
608 pIrp->IoStatus.Information = BytesReturned;
609 pIrp->IoStatus.Status = NtStatus;
610 IoCompleteRequest(pIrp, IO_NO_INCREMENT);
615 void dummynet(void * unused);
616 void ipfw_tick(void * vnetx);
619 __in struct _KDPC *Dpc,
620 __in_opt PVOID DeferredContext,
621 __in_opt PVOID SystemArgument1,
622 __in_opt PVOID SystemArgument2
629 __in struct _KDPC *Dpc,
630 __in_opt PVOID DeferredContext,
631 __in_opt PVOID SystemArgument1,
632 __in_opt PVOID SystemArgument2
635 ipfw_tick(DeferredContext);