X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fdccp%2Fipv4.c;h=90c74b4adb7396443a36d8124633e2d22abd5551;hb=refs%2Fheads%2Fvserver;hp=dc0487b5bacec375dda26bbdc4515b5a5cedc621;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index dc0487b5b..90c74b4ad 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -10,7 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#include #include #include #include @@ -18,8 +17,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -28,14 +29,14 @@ #include "ackvec.h" #include "ccid.h" #include "dccp.h" +#include "feat.h" -struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { - .lhash_lock = RW_LOCK_UNLOCKED, - .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), -}; - -EXPORT_SYMBOL_GPL(dccp_hashinfo); +/* + * This is the global socket data structure used for responding to + * the Out-of-the-blue (OOTB) packets. A control sock will be created + * for this socket at the initialization time. + */ +static struct socket *dccp_v4_ctl_socket; static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) { @@ -43,33 +44,18 @@ static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) inet_csk_bind_conflict); } -static void dccp_v4_hash(struct sock *sk) -{ - inet_hash(&dccp_hashinfo, sk); -} - -void dccp_unhash(struct sock *sk) -{ - inet_unhash(&dccp_hashinfo, sk); -} - -EXPORT_SYMBOL_GPL(dccp_unhash); - int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct rtable *rt; - u32 daddr, nexthop; + __be32 daddr, nexthop; int tmp; int err; dp->dccps_role = DCCP_ROLE_CLIENT; - if (dccp_service_not_initialized(sk)) - return -EPROTO; - if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -127,13 +113,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->u.dst); - dp->dccps_gar = - dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, - inet->daddr, - inet->sport, - usin->sin_port); - dccp_update_gss(sk, dp->dccps_iss); - + dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, inet->daddr, + inet->sport, inet->dport); inet->id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); @@ -176,7 +157,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, /* We don't check in the destentry if pmtu discovery is forbidden * on this route. We just assume that no packet_to_big packets * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the + * There is a small race when the user changes this flag in the * route, but I think that's acceptable. */ if ((dst = __sk_dst_check(sk, 0)) == NULL) @@ -197,7 +178,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, dccp_sync_mss(sk, mtu); /* - * From: draft-ietf-dccp-spec-11.txt + * From RFC 4340, sec. 14.1: * * DCCP-Sync packets are the best choice for upward * probing, since DCCP-Sync probes do not risk application @@ -207,88 +188,6 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, } /* else let the usual retransmit timer handle it */ } -static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) -{ - int err; - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; - const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_ack_bits); - struct sk_buff *skb; - - if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) - return; - - skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); - if (skb == NULL) - return; - - /* Reserve space for headers. */ - skb_reserve(skb, MAX_DCCP_HEADER); - - skb->dst = dst_clone(rxskb->dst); - - skb->h.raw = skb_push(skb, dccp_hdr_ack_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_ack_len); - - /* Build DCCP header and checksum it. */ - dh->dccph_type = DCCP_PKT_ACK; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_ack_len / 4; - dh->dccph_x = 1; - - dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); - - bh_lock_sock(dccp_ctl_socket->sk); - err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, - rxskb->nh.iph->daddr, - rxskb->nh.iph->saddr, NULL); - bh_unlock_sock(dccp_ctl_socket->sk); - - if (err == NET_XMIT_CN || err == 0) { - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); - } -} - -static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, - struct request_sock *req) -{ - dccp_v4_ctl_send_ack(skb); -} - -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) -{ - int err = -1; - struct sk_buff *skb; - - /* First, grab a route. */ - - if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) - goto out; - - skb = dccp_make_response(sk, dst, req); - if (skb != NULL) { - const struct inet_request_sock *ireq = inet_rsk(req); - - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, - ireq->rmt_addr, - ireq->opt); - if (err == NET_XMIT_CN) - err = 0; - } - -out: - dst_release(dst); - return err; -} - /* * This routine is called by the ICMP module when it gets some sort of error * condition. If err < 0 then the socket should be closed and the error @@ -301,7 +200,7 @@ out: * check at all. A more general error queue to queue errors for later handling * is probably better. */ -void dccp_v4_err(struct sk_buff *skb, u32 info) +static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + @@ -327,7 +226,7 @@ void dccp_v4_err(struct sk_buff *skb, u32 info) } if (sk->sk_state == DCCP_TIME_WAIT) { - inet_twsk_put((struct inet_timewait_sock *)sk); + inet_twsk_put(inet_twsk(sk)); return; } @@ -345,7 +244,7 @@ void dccp_v4_err(struct sk_buff *skb, u32 info) seq = dccp_hdr_seq(skb); if (sk->sk_state != DCCP_LISTEN && !between48(seq, dp->dccps_swl, dp->dccps_swh)) { - NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); + NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -445,45 +344,24 @@ out: sock_put(sk); } -/* This routine computes an IPv4 DCCP checksum. */ -void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) +static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb, + __be32 src, __be32 dst) +{ + return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum); +} + +void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb) { const struct inet_sock *inet = inet_sk(sk); struct dccp_hdr *dh = dccp_hdr(skb); - dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr); + dccp_csum_outgoing(skb); + dh->dccph_checksum = dccp_v4_csum_finish(skb, inet->saddr, inet->daddr); } EXPORT_SYMBOL_GPL(dccp_v4_send_check); -int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) -{ - struct sk_buff *skb; - /* - * FIXME: what if rebuild_header fails? - * Should we be doing a rebuild_header here? - */ - int err = inet_sk_rebuild_header(sk); - - if (err != 0) - return err; - - skb = dccp_make_reset(sk, sk->sk_dst_cache, code); - if (skb != NULL) { - const struct inet_sock *inet = inet_sk(sk); - - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = ip_build_and_send_pkt(skb, sk, - inet->saddr, inet->daddr, NULL); - if (err == NET_XMIT_CN) - err = 0; - } - - return err; -} - -static inline u64 dccp_v4_init_sequence(const struct sock *sk, - const struct sk_buff *skb) +static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb) { return secure_dccp_sequence_number(skb->nh.iph->daddr, skb->nh.iph->saddr, @@ -491,90 +369,6 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk, dccp_hdr(skb)->dccph_sport); } -int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) -{ - struct inet_request_sock *ireq; - struct dccp_sock dp; - struct request_sock *req; - struct dccp_request_sock *dreq; - const __u32 saddr = skb->nh.iph->saddr; - const __u32 daddr = skb->nh.iph->daddr; - const __u32 service = dccp_hdr_request(skb)->dccph_req_service; - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; - - /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ - if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) { - reset_code = DCCP_RESET_CODE_NO_CONNECTION; - goto drop; - } - - if (dccp_bad_service_code(sk, service)) { - reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; - goto drop; - } - /* - * TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - if (inet_csk_reqsk_queue_is_full(sk)) - goto drop; - - /* - * Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) - goto drop; - - req = reqsk_alloc(sk->sk_prot->rsk_prot); - if (req == NULL) - goto drop; - - /* FIXME: process options */ - - dccp_openreq_init(req, &dp, skb); - - ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; - req->rcv_wnd = 100; /* Fake, option parsing will get the - right value */ - ireq->opt = NULL; - - /* - * Step 3: Process LISTEN state - * - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * - * In fact we defer setting S.GSR, S.SWL, S.SWH to - * dccp_create_openreq_child. - */ - dreq = dccp_rsk(req); - dreq->dreq_isr = dcb->dccpd_seq; - dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); - dreq->dreq_service = service; - - if (dccp_v4_send_response(sk, req, NULL)) - goto drop_and_free; - - inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); - return 0; - -drop_and_free: - reqsk_free(req); -drop: - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); - dcb->dccpd_reset_code = reset_code; - return -1; -} - -EXPORT_SYMBOL_GPL(dccp_v4_conn_request); - /* * The three way handshake has completed - we got a valid ACK or DATAACK - * now create the new socket. @@ -644,61 +438,22 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req != NULL) return dccp_check_req(sk, skb, req, prev); - nsk = __inet_lookup_established(&dccp_hashinfo, - iph->saddr, dh->dccph_sport, - iph->daddr, ntohs(dh->dccph_dport), - inet_iif(skb)); + nsk = inet_lookup_established(&dccp_hashinfo, + iph->saddr, dh->dccph_sport, + iph->daddr, dh->dccph_dport, + inet_iif(skb)); if (nsk != NULL) { if (nsk->sk_state != DCCP_TIME_WAIT) { bh_lock_sock(nsk); return nsk; } - inet_twsk_put((struct inet_timewait_sock *)nsk); + inet_twsk_put(inet_twsk(nsk)); return NULL; } return sk; } -int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, - const u32 daddr) -{ - const struct dccp_hdr* dh = dccp_hdr(skb); - int checksum_len; - u32 tmp; - - if (dh->dccph_cscov == 0) - checksum_len = skb->len; - else { - checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : - skb->len; - } - - tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, - IPPROTO_DCCP, tmp); -} - -static int dccp_v4_verify_checksum(struct sk_buff *skb, - const u32 saddr, const u32 daddr) -{ - struct dccp_hdr *dh = dccp_hdr(skb); - int checksum_len; - u32 tmp; - - if (dh->dccph_cscov == 0) - checksum_len = skb->len; - else { - checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : - skb->len; - } - tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, - IPPROTO_DCCP, tmp) == 0 ? 0 : -1; -} - static struct dst_entry* dccp_v4_route_skb(struct sock *sk, struct sk_buff *skb) { @@ -712,9 +467,10 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, .uli_u = { .ports = { .sport = dccp_hdr(skb)->dccph_dport, .dport = dccp_hdr(skb)->dccph_sport } - } + } }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; @@ -723,7 +479,37 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, return &rt->u.dst; } -static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, + struct dst_entry *dst) +{ + int err = -1; + struct sk_buff *skb; + + /* First, grab a route. */ + + if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) + goto out; + + skb = dccp_make_response(sk, dst, req); + if (skb != NULL) { + const struct inet_request_sock *ireq = inet_rsk(req); + struct dccp_hdr *dh = dccp_hdr(skb); + + dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr, + ireq->rmt_addr); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, + ireq->rmt_addr, + ireq->opt); + err = net_xmit_eval(err); + } + +out: + dst_release(dst); + return err; +} + +static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { int err; struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; @@ -732,7 +518,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) sizeof(struct dccp_hdr_reset); struct sk_buff *skb; struct dst_entry *dst; - u64 seqno; + u64 seqno = 0; /* Never send a reset in response to a reset. */ if (rxdh->dccph_type == DCCP_PKT_RESET) @@ -741,21 +527,20 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) return; - dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb); + dst = dccp_v4_route_skb(dccp_v4_ctl_socket->sk, rxskb); if (dst == NULL) return; - skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); + skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, + GFP_ATOMIC); if (skb == NULL) goto out; /* Reserve space for headers. */ - skb_reserve(skb, MAX_DCCP_HEADER); + skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); skb->dst = dst_clone(dst); - skb->h.raw = skb_push(skb, dccp_hdr_reset_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_reset_len); + dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len); /* Build DCCP header and checksum it. */ dh->dccph_type = DCCP_PKT_RESET; @@ -766,25 +551,24 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code; - /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */ - seqno = 0; + /* See "8.3.1. Abnormal Termination" in RFC 4340 */ if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); dccp_hdr_set_seq(dh, seqno); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); - dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr, - rxskb->nh.iph->daddr); + dccp_csum_outgoing(skb); + dh->dccph_checksum = dccp_v4_csum_finish(skb, rxskb->nh.iph->saddr, + rxskb->nh.iph->daddr); - bh_lock_sock(dccp_ctl_socket->sk); - err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, + bh_lock_sock(dccp_v4_ctl_socket->sk); + err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); - bh_unlock_sock(dccp_ctl_socket->sk); + bh_unlock_sock(dccp_v4_ctl_socket->sk); - if (err == NET_XMIT_CN || err == 0) { + if (net_xmit_eval(err) == 0) { DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); } @@ -792,6 +576,103 @@ out: dst_release(dst); } +static void dccp_v4_reqsk_destructor(struct request_sock *req) +{ + kfree(inet_rsk(req)->opt); +} + +static struct request_sock_ops dccp_request_sock_ops __read_mostly = { + .family = PF_INET, + .obj_size = sizeof(struct dccp_request_sock), + .rtx_syn_ack = dccp_v4_send_response, + .send_ack = dccp_reqsk_send_ack, + .destructor = dccp_v4_reqsk_destructor, + .send_reset = dccp_v4_ctl_send_reset, +}; + +int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) +{ + struct inet_request_sock *ireq; + struct request_sock *req; + struct dccp_request_sock *dreq; + const __be32 service = dccp_hdr_request(skb)->dccph_req_service; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; + + /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ + if (((struct rtable *)skb->dst)->rt_flags & + (RTCF_BROADCAST | RTCF_MULTICAST)) { + reset_code = DCCP_RESET_CODE_NO_CONNECTION; + goto drop; + } + + if (dccp_bad_service_code(sk, service)) { + reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + goto drop; + } + /* + * TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ + if (inet_csk_reqsk_queue_is_full(sk)) + goto drop; + + /* + * Accept backlog is full. If we have already queued enough + * of warm entries in syn queue, drop request. It is better than + * clogging syn queue with openreqs with exponentially increasing + * timeout. + */ + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + goto drop; + + req = reqsk_alloc(&dccp_request_sock_ops); + if (req == NULL) + goto drop; + + if (dccp_parse_options(sk, skb)) + goto drop_and_free; + + dccp_reqsk_init(req, skb); + + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + + ireq = inet_rsk(req); + ireq->loc_addr = skb->nh.iph->daddr; + ireq->rmt_addr = skb->nh.iph->saddr; + ireq->opt = NULL; + + /* + * Step 3: Process LISTEN state + * + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * + * In fact we defer setting S.GSR, S.SWL, S.SWH to + * dccp_create_openreq_child. + */ + dreq = dccp_rsk(req); + dreq->dreq_isr = dcb->dccpd_seq; + dreq->dreq_iss = dccp_v4_init_sequence(skb); + dreq->dreq_service = service; + + if (dccp_v4_send_response(sk, req, NULL)) + goto drop_and_free; + + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + return 0; + +drop_and_free: + reqsk_free(req); +drop: + DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + dcb->dccpd_reset_code = reset_code; + return -1; +} + +EXPORT_SYMBOL_GPL(dccp_v4_conn_request); + int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { struct dccp_hdr *dh = dccp_hdr(skb); @@ -804,24 +685,23 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) /* * Step 3: Process LISTEN state - * If S.state == LISTEN, - * If P.type == Request or P contains a valid Init Cookie - * option, - * * Must scan the packet's options to check for an Init - * Cookie. Only the Init Cookie is processed here, - * however; other options are processed in Step 8. This - * scan need only be performed if the endpoint uses Init - * Cookies * - * * Generate a new socket and switch to that socket * - * Set S := new socket for this port pair - * S.state = RESPOND - * Choose S.ISS (initial seqno) or set from Init Cookie - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * Continue with S.state == RESPOND - * * A Response packet will be generated in Step 11 * - * Otherwise, - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return + * If P.type == Request or P contains a valid Init Cookie option, + * (* Must scan the packet's options to check for Init + * Cookies. Only Init Cookies are processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies *) + * (* Generate a new socket and switch to that socket *) + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookies + * Initialize S.GAR := S.ISS + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies + * Continue with S.state == RESPOND + * (* A Response packet will be generated in Step 11 *) + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return * * NOTE: the check for the packet types is done in * dccp_rcv_state_process @@ -844,7 +724,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - dccp_v4_ctl_send_reset(skb); + dccp_v4_ctl_send_reset(sk, skb); discard: kfree_skb(skb); return 0; @@ -852,81 +732,94 @@ discard: EXPORT_SYMBOL_GPL(dccp_v4_do_rcv); +/** + * dccp_invalid_packet - check for malformed packets + * Implements RFC 4340, 8.5: Step 1: Check header basics + * Packets that fail these checks are ignored and do not receive Resets. + */ int dccp_invalid_packet(struct sk_buff *skb) { const struct dccp_hdr *dh; + unsigned int cscov; if (skb->pkt_type != PACKET_HOST) return 1; + /* If the packet is shorter than 12 bytes, drop packet and return */ if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n"); + DCCP_WARN("pskb_may_pull failed\n"); return 1; } dh = dccp_hdr(skb); - /* If the packet type is not understood, drop packet and return */ + /* If P.type is not understood, drop packet and return */ if (dh->dccph_type >= DCCP_PKT_INVALID) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n"); + DCCP_WARN("invalid packet type\n"); return 1; } /* - * If P.Data Offset is too small for packet type, or too large for - * packet, drop packet and return + * If P.Data Offset is too small for packet type, drop packet and return */ if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " - "too small 1\n", - dh->dccph_doff); + DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff); return 1; } - + /* + * If P.Data Offset is too too large for packet, drop packet and return + */ if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " - "too small 2\n", - dh->dccph_doff); + DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff); return 1; } - dh = dccp_hdr(skb); - /* * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet * has short sequence numbers), drop packet and return */ - if (dh->dccph_x == 0 && - dh->dccph_type != DCCP_PKT_DATA && - dh->dccph_type != DCCP_PKT_ACK && - dh->dccph_type != DCCP_PKT_DATAACK) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack " - "nor DataAck and P.X == 0\n", - dccp_packet_name(dh->dccph_type)); + if (dh->dccph_type >= DCCP_PKT_DATA && + dh->dccph_type <= DCCP_PKT_DATAACK && dh->dccph_x == 0) { + DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n", + dccp_packet_name(dh->dccph_type)); return 1; } + /* + * If P.CsCov is too large for the packet size, drop packet and return. + * This must come _before_ checksumming (not as RFC 4340 suggests). + */ + cscov = dccp_csum_coverage(skb); + if (cscov > skb->len) { + DCCP_WARN("P.CsCov %u exceeds packet length %d\n", + dh->dccph_cscov, skb->len); + return 1; + } + + /* If header checksum is incorrect, drop packet and return. + * (This step is completed in the AF-dependent functions.) */ + skb->csum = skb_checksum(skb, 0, cscov, 0); + return 0; } EXPORT_SYMBOL_GPL(dccp_invalid_packet); /* this is called when real data arrives */ -int dccp_v4_rcv(struct sk_buff *skb) +static int dccp_v4_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; struct sock *sk; + int min_cov; - /* Step 1: Check header basics: */ + /* Step 1: Check header basics */ if (dccp_invalid_packet(skb)) goto discard_it; - /* If the header checksum is incorrect, drop packet and return */ - if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, - skb->nh.iph->daddr) < 0) { - LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n", - __FUNCTION__); + /* Step 1: If header checksum is incorrect, drop packet and return */ + if (dccp_v4_csum_finish(skb, skb->nh.iph->saddr, skb->nh.iph->daddr)) { + DCCP_WARN("dropped packet with invalid checksum\n"); goto discard_it; } @@ -948,23 +841,20 @@ int dccp_v4_rcv(struct sk_buff *skb) dccp_pr_debug_cat("\n"); } else { DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); - dccp_pr_debug_cat(", ack=%llu\n", - (unsigned long long) + dccp_pr_debug_cat(", ack=%llu\n", (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); } /* Step 2: - * Look up flow ID in table and get corresponding socket */ + * Look up flow ID in table and get corresponding socket */ sk = __inet_lookup(&dccp_hashinfo, skb->nh.iph->saddr, dh->dccph_sport, - skb->nh.iph->daddr, ntohs(dh->dccph_dport), + skb->nh.iph->daddr, dh->dccph_dport, inet_iif(skb)); - /* + /* * Step 2: - * If no socket ... - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return + * If no socket ... */ if (sk == NULL) { dccp_pr_debug("failed to look up flow ID in table and " @@ -972,177 +862,99 @@ int dccp_v4_rcv(struct sk_buff *skb) goto no_dccp_socket; } - /* + /* * Step 2: - * ... or S.state == TIMEWAIT, + * ... or S.state == TIMEWAIT, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ - if (sk->sk_state == DCCP_TIME_WAIT) { - dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: " - "do_time_wait\n"); - goto do_time_wait; + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n"); + inet_twsk_put(inet_twsk(sk)); + goto no_dccp_socket; + } + + /* + * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage + * o if MinCsCov = 0, only packets with CsCov = 0 are accepted + * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov + */ + min_cov = dccp_sk(sk)->dccps_pcrlen; + if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { + dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n", + dh->dccph_cscov, min_cov); + /* FIXME: "Such packets SHOULD be reported using Data Dropped + * options (Section 11.7) with Drop Code 0, Protocol + * Constraints." */ + goto discard_and_relse; } if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; nf_reset(skb); - return sk_receive_skb(sk, skb); + return sk_receive_skb(sk, skb, 1); no_dccp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; /* * Step 2: + * If no socket ... * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - dccp_v4_ctl_send_reset(skb); + dccp_v4_ctl_send_reset(sk, skb); } discard_it: - /* Discard frame. */ kfree_skb(skb); return 0; discard_and_relse: sock_put(sk); goto discard_it; - -do_time_wait: - inet_twsk_put((struct inet_timewait_sock *)sk); - goto no_dccp_socket; } -struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { - .queue_xmit = ip_queue_xmit, - .send_check = dccp_v4_send_check, - .rebuild_header = inet_sk_rebuild_header, - .conn_request = dccp_v4_conn_request, - .syn_recv_sock = dccp_v4_request_recv_sock, - .net_header_len = sizeof(struct iphdr), - .setsockopt = ip_setsockopt, - .getsockopt = ip_getsockopt, - .addr2sockaddr = inet_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in), +static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { + .queue_xmit = ip_queue_xmit, + .send_check = dccp_v4_send_check, + .rebuild_header = inet_sk_rebuild_header, + .conn_request = dccp_v4_conn_request, + .syn_recv_sock = dccp_v4_request_recv_sock, + .net_header_len = sizeof(struct iphdr), + .setsockopt = ip_setsockopt, + .getsockopt = ip_getsockopt, + .addr2sockaddr = inet_csk_addr2sockaddr, + .sockaddr_len = sizeof(struct sockaddr_in), +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_ip_setsockopt, + .compat_getsockopt = compat_ip_getsockopt, +#endif }; -int dccp_v4_init_sock(struct sock *sk) +static int dccp_v4_init_sock(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); - static int dccp_ctl_socket_init = 1; - - dccp_options_init(&dp->dccps_options); - do_gettimeofday(&dp->dccps_epoch); - - if (dp->dccps_options.dccpo_send_ack_vector) { - dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, - GFP_KERNEL); - if (dp->dccps_hc_rx_ackvec == NULL) - return -ENOMEM; - } - - /* - * FIXME: We're hardcoding the CCID, and doing this at this point makes - * the listening (master) sock get CCID control blocks, which is not - * necessary, but for now, to not mess with the test userspace apps, - * lets leave it here, later the real solution is to do this in a - * setsockopt(CCIDs-I-want/accept). -acme - */ - if (likely(!dccp_ctl_socket_init)) { - dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid, - sk); - dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid, - sk); - if (dp->dccps_hc_rx_ccid == NULL || - dp->dccps_hc_tx_ccid == NULL) { - ccid_exit(dp->dccps_hc_rx_ccid, sk); - ccid_exit(dp->dccps_hc_tx_ccid, sk); - if (dp->dccps_options.dccpo_send_ack_vector) { - dccp_ackvec_free(dp->dccps_hc_rx_ackvec); - dp->dccps_hc_rx_ackvec = NULL; - } - dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; - return -ENOMEM; - } - } else - dccp_ctl_socket_init = 0; - - dccp_init_xmit_timers(sk); - icsk->icsk_rto = DCCP_TIMEOUT_INIT; - sk->sk_state = DCCP_CLOSED; - sk->sk_write_space = dccp_write_space; - icsk->icsk_af_ops = &dccp_ipv4_af_ops; - icsk->icsk_sync_mss = dccp_sync_mss; - dp->dccps_mss_cache = 536; - dp->dccps_role = DCCP_ROLE_UNDEFINED; - dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; - - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_v4_init_sock); - -int dccp_v4_destroy_sock(struct sock *sk) -{ - struct dccp_sock *dp = dccp_sk(sk); + static __u8 dccp_v4_ctl_sock_initialized; + int err = dccp_init_sock(sk, dccp_v4_ctl_sock_initialized); - /* - * DCCP doesn't use sk_write_queue, just sk_send_head - * for retransmissions - */ - if (sk->sk_send_head != NULL) { - kfree_skb(sk->sk_send_head); - sk->sk_send_head = NULL; - } - - /* Clean up a referenced DCCP bind bucket. */ - if (inet_csk(sk)->icsk_bind_hash != NULL) - inet_put_port(&dccp_hashinfo, sk); - - kfree(dp->dccps_service_list); - dp->dccps_service_list = NULL; - - ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); - if (dp->dccps_options.dccpo_send_ack_vector) { - dccp_ackvec_free(dp->dccps_hc_rx_ackvec); - dp->dccps_hc_rx_ackvec = NULL; + if (err == 0) { + if (unlikely(!dccp_v4_ctl_sock_initialized)) + dccp_v4_ctl_sock_initialized = 1; + inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops; } - ccid_exit(dp->dccps_hc_rx_ccid, sk); - ccid_exit(dp->dccps_hc_tx_ccid, sk); - dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_v4_destroy_sock); - -static void dccp_v4_reqsk_destructor(struct request_sock *req) -{ - kfree(inet_rsk(req)->opt); + return err; } -static struct request_sock_ops dccp_request_sock_ops = { - .family = PF_INET, - .obj_size = sizeof(struct dccp_request_sock), - .rtx_syn_ack = dccp_v4_send_response, - .send_ack = dccp_v4_reqsk_send_ack, - .destructor = dccp_v4_reqsk_destructor, - .send_reset = dccp_v4_ctl_send_reset, -}; - static struct timewait_sock_ops dccp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct inet_timewait_sock), }; -struct proto dccp_prot = { +static struct proto dccp_v4_prot = { .name = "DCCP", .owner = THIS_MODULE, .close = dccp_close, @@ -1155,17 +967,110 @@ struct proto dccp_prot = { .sendmsg = dccp_sendmsg, .recvmsg = dccp_recvmsg, .backlog_rcv = dccp_v4_do_rcv, - .hash = dccp_v4_hash, + .hash = dccp_hash, .unhash = dccp_unhash, .accept = inet_csk_accept, .get_port = dccp_v4_get_port, .shutdown = dccp_shutdown, - .destroy = dccp_v4_destroy_sock, + .destroy = dccp_destroy_sock, .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), .rsk_prot = &dccp_request_sock_ops, .twsk_prot = &dccp_timewait_sock_ops, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_dccp_setsockopt, + .compat_getsockopt = compat_dccp_getsockopt, +#endif +}; + +static struct net_protocol dccp_v4_protocol = { + .handler = dccp_v4_rcv, + .err_handler = dccp_v4_err, + .no_policy = 1, }; -EXPORT_SYMBOL_GPL(dccp_prot); +static const struct proto_ops inet_dccp_ops = { + .family = PF_INET, + .owner = THIS_MODULE, + .release = inet_release, + .bind = inet_bind, + .connect = inet_stream_connect, + .socketpair = sock_no_socketpair, + .accept = inet_accept, + .getname = inet_getname, + /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ + .poll = dccp_poll, + .ioctl = inet_ioctl, + /* FIXME: work on inet_listen to rename it to sock_common_listen */ + .listen = inet_dccp_listen, + .shutdown = inet_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = inet_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif +}; + +static struct inet_protosw dccp_v4_protosw = { + .type = SOCK_DCCP, + .protocol = IPPROTO_DCCP, + .prot = &dccp_v4_prot, + .ops = &inet_dccp_ops, + .capability = -1, + .no_check = 0, + .flags = INET_PROTOSW_ICSK, +}; + +static int __init dccp_v4_init(void) +{ + int err = proto_register(&dccp_v4_prot, 1); + + if (err != 0) + goto out; + + err = inet_add_protocol(&dccp_v4_protocol, IPPROTO_DCCP); + if (err != 0) + goto out_proto_unregister; + + inet_register_protosw(&dccp_v4_protosw); + + err = inet_csk_ctl_sock_create(&dccp_v4_ctl_socket, PF_INET, + SOCK_DCCP, IPPROTO_DCCP); + if (err) + goto out_unregister_protosw; +out: + return err; +out_unregister_protosw: + inet_unregister_protosw(&dccp_v4_protosw); + inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); +out_proto_unregister: + proto_unregister(&dccp_v4_prot); + goto out; +} + +static void __exit dccp_v4_exit(void) +{ + inet_unregister_protosw(&dccp_v4_protosw); + inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); + proto_unregister(&dccp_v4_prot); +} + +module_init(dccp_v4_init); +module_exit(dccp_v4_exit); + +/* + * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) + * values directly, Also cover the case where the protocol is not specified, + * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP + */ +MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6"); +MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnaldo Carvalho de Melo "); +MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");