/* * Copyright (c) 2000-2018 University of Utah and the Flux Group. * * {{{EMULAB-LICENSE * * This file is part of the Emulab network testbed software. * * This file is free software: you can redistribute it and/or modify it * under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or (at * your option) any later version. * * This file is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public * License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this file. If not, see . * * }}} */ /* * Network routines. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "decls.h" #include "utils.h" #ifdef NO_SOCKET_TIMO #include #endif #ifdef STATS unsigned long nonetbufs; #define DOSTAT(x) (x) #else #define DOSTAT(x) #endif /* Max number of times to attempt bind to port before failing. */ #define MAXBINDATTEMPTS 1 #define IS_MCAST_ADDR(sa) ((ntohl((sa).s_addr) >> 28) == 14) /* Max number of hops multicast hops. */ #define MCAST_TTL 5 static int sock = -1; #ifdef USE_REUSEADDR_COMPAT static int selfsock = -1; #endif struct in_addr myipaddr; static int nobufdelay = -1; int broadcast = 0; static int isclient = 0; static int sndportnum; /* kept in network order */ /* * Convert a string to an IPv4 address. We first try to interpret it as * an IPv4 address. If that fails, we attempt to resolve it as a host name. * Return non-zero on success. */ int GetIP(char *str, struct in_addr *in) { struct hostent *he; if (inet_aton(str, in) == 0) { if ((he = gethostbyname(str)) == NULL) return 0; memcpy(in, he->h_addr, sizeof(*in)); } return 1; } /* * Return the maximum size of a socket buffer. * Computes it dynamically on the first call. * * XXX assumes send/recv max sizes are the same. */ int GetSockbufSize(void) { static int sbsize = 0; if (sbsize == 0) { int sock; if ((sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0) FrisPfatal("Could not allocate a socket"); for (sbsize = sockbufsize; sbsize > 0; sbsize -= (16*1024)) { int i = sbsize; if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &i, sizeof(i)) >= 0) break; } if (sbsize < 0) { int i = 0; unsigned int ilen = sizeof(i); if (getsockopt(sock, SOL_SOCKET, SO_SNDBUF, &i, &ilen) < 0) i = sockbufsize; sbsize = i; } else { int i = 0; unsigned int ilen = sizeof(i); if (getsockopt(sock, SOL_SOCKET, SO_SNDBUF, &i, &ilen) < 0) FrisPfatal("Could not read sockbuf size"); #ifdef linux /* In Linux, getsockopt returns 2 * actual value */ if (i == 2 * sbsize) i = sbsize; #endif if (i != sbsize) { FrisWarning("Actual socket buffer size is %d" " (instead of %d)", i, sbsize); sbsize = i; } } close(sock); FrisLog("Maximum socket buffer size of %d bytes", sbsize); } return sbsize; } /* * Find the subnet broadcast address associated with the given interface * address. We use this to limit broadcasts to a single interface. * Returns zero if we successfully produced an address (in *bcaddr), * non-zero otherwise. */ static int GetBcastAddr(struct in_addr *ifaddr, struct in_addr *bcaddr) { struct ifaddrs *ifa, *nifa; struct sockaddr_in *sin; if (getifaddrs(&ifa) != 0) { FrisPwarning("Could not get interface list"); return 1; } for (nifa = ifa; nifa != NULL; nifa = nifa->ifa_next) { if (nifa->ifa_addr->sa_family != AF_INET) continue; if ((nifa->ifa_flags & (IFF_UP|IFF_BROADCAST)) != (IFF_UP|IFF_BROADCAST)) continue; sin = (struct sockaddr_in *)nifa->ifa_addr; if (ifaddr->s_addr == sin->sin_addr.s_addr) { *bcaddr = ((struct sockaddr_in *)nifa->ifa_broadaddr)->sin_addr; freeifaddrs(ifa); return 0; } } freeifaddrs(ifa); FrisWarning("Could not find interface %s", inet_ntoa(*ifaddr)); return 1; } /* * Bind one port from the given range. * If lo == hi == 0, then we let the kernel choose. * If lo == hi != 0, then we must get that port. * Otherwise we loop over the range til we get one. * Returns the port bound or 0 if unsuccessful. */ static in_port_t BindPort(in_addr_t addr, in_port_t portlo, in_port_t porthi) { struct sockaddr_in name; socklen_t sl = sizeof(name); name.sin_family = AF_INET; name.sin_addr.s_addr = htonl(addr); name.sin_port = htons(portlo); /* * Let the kernel choose. */ if (portlo == 0) { if (bind(sock, (struct sockaddr *)&name, sl) != 0) { FrisPwarning("Bind to %s:%d failed", inet_ntoa(name.sin_addr), portlo); return 0; } if (getsockname(sock, (struct sockaddr *)&name, &sl) < 0) FrisPfatal("could not determine bound port"); return(ntohs(name.sin_port)); } /* * Specific port. Try a few times to get it. */ if (portlo == porthi) { int i = MAXBINDATTEMPTS; while (i) { if (bind(sock, (struct sockaddr *)&name, sl) == 0) return portlo; if (--i) { FrisPwarning("Bind to %s:%d failed. " "Will try %d more times!", inet_ntoa(name.sin_addr), portlo, i); sleep(5); } } FrisPwarning("Bind to %s:%d failed", inet_ntoa(name.sin_addr), portlo); return 0; } /* * Port range, gotta loop through trying to grab one. */ while (portlo <= porthi) { name.sin_port = htons(portlo); if (bind(sock, (struct sockaddr *)&name, sl) == 0) return portlo; portlo++; } return 0; } static void CommonInit(int portlo, int porthi, int dobind) { int i; char buf[BUFSIZ]; struct hostent *he; sockbufsize = GetSockbufSize(); if ((sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0) FrisPfatal("Could not allocate a socket"); i = sockbufsize; if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &i, sizeof(i)) < 0) FrisPwarning("Could not set send socket buffer size to %d", i); i = sockbufsize; if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &i, sizeof(i)) < 0) FrisPwarning("Could not set receive socket buffer size to %d", i); /* * At present, we use a multicast address in both directions. */ if (IS_MCAST_ADDR(mcastaddr)) { unsigned int loop = 0, ttl = MCAST_TTL; struct ip_mreq mreq; FrisLog("Using Multicast %s", inet_ntoa(mcastaddr)); mreq.imr_multiaddr.s_addr = mcastaddr.s_addr; if (mcastif.s_addr) mreq.imr_interface.s_addr = mcastif.s_addr; else mreq.imr_interface.s_addr = htonl(INADDR_ANY); if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0) FrisPfatal("setsockopt(IPPROTO_IP, IP_ADD_MEMBERSHIP)"); if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)) < 0) FrisPfatal("setsockopt(IPPROTO_IP, IP_MULTICAST_TTL)"); /* Disable local echo */ if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)) < 0) FrisPfatal("setsockopt(IPPROTO_IP, IP_MULTICAST_LOOP)"); if (mcastif.s_addr && setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF, &mcastif, sizeof(mcastif)) < 0) { FrisPfatal("setsockopt(IPPROTO_IP, IP_MULTICAST_IF)"); } #ifdef USE_REUSEADDR /* * Allow use of the desired port in the presense of other * non-MC use. Also allows for multiple clients of the same * stream. */ if (isclient) { i = 1; if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &i, sizeof(i))) FrisWarning("Could not set SO_REUSEADDR"); } #endif #ifdef WITH_IGMP IGMPInit(&mcastif, &mcastaddr); #endif } else if (broadcast) { FrisLog("Setting broadcast mode"); /* * If they are using the local broadcast address and they * have specified an interface, attempt to limit broadcasts * to that interface by using the subnet broadcast address. * Otherwise we issue a dire warning about the consequences * of broadcasting to all interfaces. */ if (ntohl(mcastaddr.s_addr) == INADDR_BROADCAST) { struct in_addr bcaddr; if (mcastif.s_addr && GetBcastAddr(&mcastif, &bcaddr) == 0) { FrisLog("Limiting broadcasts using %s", inet_ntoa(bcaddr)); mcastaddr = bcaddr; } else FrisWarning("WARNING: will broadcast " "to ALL configured interfaces!"); } i = 1; if (setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &i, sizeof(i)) < 0) FrisPfatal("setsockopt(SOL_SOCKET, SO_BROADCAST)"); } if (dobind) { in_addr_t addr = INADDR_ANY; #ifdef USE_REUSEADDR /* * For REUSEADDR to work in the face of unrelated apps that * bind INADDR_ANY:port, we must NOT also bind INADDR_ANY. */ if (isclient && IS_MCAST_ADDR(mcastaddr)) addr = ntohl(mcastaddr.s_addr); #endif portnum = BindPort(addr, portlo, porthi); /* * Could not get a port. * Note that we exit with a magic value. This is for server * wrapper-scripts so that they can differentiate this case * and try again with a different port. * * Note also that if portlo == 0, it cannot be a port * conflict so we do not retry. */ if (portnum == 0) { FrisError("Could not bind %s:%d!\n", inet_ntoa(mcastaddr), portnum); exit(portlo ? EADDRINUSE : -1); } FrisLog("Bound to %s:%d", inet_ntoa(mcastaddr), portnum); } else { portnum = portlo; FrisLog("NOT binding to %s:%d", inet_ntoa(mcastaddr), portnum); } sndportnum = htons(portnum); #ifndef NO_SOCKET_TIMO /* * We use a socket level timeout instead of polling for data. */ { struct timeval timeout; timeout.tv_sec = 0; timeout.tv_usec = PKTRCV_TIMEOUT; if (setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) < 0) FrisPfatal("setsockopt(SOL_SOCKET, SO_RCVTIMEO)"); } #endif /* * If a specific interface IP is specified, use that to * tag our outgoing packets. Otherwise we use the IP address * associated with our hostname. */ if (mcastif.s_addr) myipaddr.s_addr = mcastif.s_addr; else { if (gethostname(buf, sizeof(buf)) < 0) FrisPfatal("gethostname failed"); if ((he = gethostbyname(buf)) == 0) FrisFatal("gethostbyname: %s", hstrerror(h_errno)); memcpy((char *)&myipaddr, he->h_addr, sizeof(myipaddr)); } /* * Compute the out of buffer space delay. */ if (nobufdelay < 0) nobufdelay = sleeptime(100, NULL, 1); } int ClientNetInit(int port) { isclient = 1; #ifdef SAME_HOST_HACK CommonInit(port, port, 0); #else CommonInit(port, port, 1); #endif #ifdef USE_REUSEADDR_COMPAT /* * Bind a unicast socket for our interface address and the port. * * XXX this is for backward compatibility with an older server and * is used for two purposes. * * One, is so that we can receive a unicast JOIN reply from an old * server (the new server always multicasts JOIN replies). The old * client would see this reply because it would just bind the port * using INADDR_ANY in CommonInit, insuring that it would get the * JOIN reply. But with SO_REUSEADDR, we do not bind to INADDR_ANY, * we bind explicitly to the MC address and port and thus would not * see the reply without also binding the unicast (interface) address * and port. * * Two, we send all our multicast packets (joins and block requests) * out this interface so that the packets are stamped with the IP of * the interface and not the MC IP. The old server required that the * packet source IP match the source IP in the frisbee packet header * (the new server allows packets that come from the MC address). * We could just set the frisbee packet header srcip to be the MC * address instead, but we use that address for logging and stats * and want to keep it correct. */ if (myipaddr.s_addr && IS_MCAST_ADDR(mcastaddr)) { struct sockaddr_in name; int i; if ((selfsock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0) FrisPfatal("Could not allocate a socket"); i = MCAST_TTL; if (setsockopt(selfsock, IPPROTO_IP, IP_MULTICAST_TTL, &i, sizeof(i)) < 0) FrisWarning("Could not set MC TTL"); /* Disable local echo */ i = 0; if (setsockopt(selfsock, IPPROTO_IP, IP_MULTICAST_LOOP, &i, sizeof(i)) < 0) FrisWarning("Could not clear local echo"); /* Make sure we use the correct interface */ if (mcastif.s_addr && setsockopt(selfsock, IPPROTO_IP, IP_MULTICAST_IF, &mcastif, sizeof(mcastif)) < 0) { FrisWarning("Could not set MCAST_IF"); } /* Set REUSEADDR */ i = 1; if (setsockopt(selfsock, SOL_SOCKET, SO_REUSEADDR, &i, sizeof(i))) FrisWarning("Could not set SO_REUSEADDR"); name.sin_family = AF_INET; name.sin_port = htons(portnum); name.sin_addr.s_addr = myipaddr.s_addr; if (bind(selfsock, (struct sockaddr *)&name, sizeof(name)) < 0) FrisPfatal("Could not bind to %s:%d", inet_ntoa(name.sin_addr), portnum); #ifndef NO_SOCKET_TIMO /* * We use a socket level timeout instead of polling for data. */ { struct timeval timeout; timeout.tv_sec = 0; timeout.tv_usec = PKTRCV_TIMEOUT; if (setsockopt(selfsock, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) < 0) FrisPfatal("setsockopt(SOL_SOCKET, SO_RCVTIMEO)"); } #endif } #endif return 1; } unsigned long ClientNetID(void) { return ntohl(myipaddr.s_addr); } int ServerNetInit(int portlo, int porthi) { isclient = 0; CommonInit(portlo, porthi, 1); #ifdef linux /* * Enabled extended error reporting so that we get back ENOBUFS * when we overrun the sent socket or NIC send buffers. * For now we just do this on the client. */ { int i = 1; if (setsockopt(sock, SOL_IP, IP_RECVERR, &i, sizeof(i)) < 0) FrisPwarning("Could not enable extended errors"); } #endif return 1; } /* * XXX hack. * * Cisco switches without a multicast router defined have an unfortunate * habit of losing our IGMP membership. This function allows us to send * a report message to remind the switch we are still around. * * We need a better way to do this! */ int NetMCKeepAlive(void) { struct ip_mreq mreq; if (broadcast || (ntohl(mcastaddr.s_addr) >> 28) != 14) return 0; if (sock == -1) return 1; #ifdef WITH_IGMP /* Send a direct V2 report packet if possible */ if (IGMPSendReport() == 0) return 0; #endif mreq.imr_multiaddr.s_addr = mcastaddr.s_addr; if (mcastif.s_addr) mreq.imr_interface.s_addr = mcastif.s_addr; else mreq.imr_interface.s_addr = htonl(INADDR_ANY); if (setsockopt(sock, IPPROTO_IP, IP_DROP_MEMBERSHIP, &mreq, sizeof(mreq)) < 0 || setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0) return 1; return 0; } /* * Look for a packet on the socket. Propogate the errors back to the caller * exactly as the system call does. Remember that we set up a socket timeout * above, so we will get EWOULDBLOCK errors when no data is available. * * The amount of data received is determined from the datalen of the hdr. * All packets are actually the same size/structure. * * Returns 0 for a good packet, 1 for a bad packet, -1 on timeout. */ int PacketReceive(Packet_t *p) { struct sockaddr_in from; int mlen; unsigned int alen; #ifdef NO_SOCKET_TIMO fd_set ready; struct timeval tv; int rv; tv.tv_sec = 0; tv.tv_usec = PKTRCV_TIMEOUT; FD_ZERO(&ready); FD_SET(sock, &ready); rv = select(sock+1, &ready, NULL, NULL, &tv); if (rv < 0) { if (errno == EINTR) return -1; FrisPfatal("PacketReceive(select)"); } if (rv == 0) return -1; #endif alen = sizeof(from); bzero(&from, alen); if ((mlen = recvfrom(sock, p, sizeof(*p), 0, (struct sockaddr *)&from, &alen)) < 0) { if (errno == EWOULDBLOCK || errno == EINTR) return -1; FrisPfatal("PacketReceive(recvfrom)"); } /* * Basic integrity checks */ if ((uint32_t)mlen < sizeof(p->hdr) + p->hdr.datalen) { FrisLog("Bad message length (%d != %d)", mlen, p->hdr.datalen); return 1; } #ifdef SAME_HOST_HACK /* * If using a host alias for the client, a message may get * the wrong IP, so rig the IP check to make it always work. */ if (p->hdr.srcip != from.sin_addr.s_addr) from.sin_addr.s_addr = p->hdr.srcip; /* * Also, we aren't binding to a port on the client side, so the * first message to the server will contain the actual port we * will use from now on. */ if (!isclient && sndportnum == htons(portnum) && sndportnum != from.sin_port) sndportnum = from.sin_port; #endif /* * XXX accept packets from the MC address. This will be the case with * newer clients that bind to the MC address instead of INADDR_ANY. * * Note that on a client, certain packets should only come from the * server. These include: BLOCK replies and PROGRESS requests. * Don't rewrite the address in these cases so that the following * check will catch them (or a later caller check on hdr.srcip). */ if (from.sin_addr.s_addr == mcastaddr.s_addr) { if (isclient && (p->hdr.subtype == PKTSUBTYPE_BLOCK || (p->hdr.subtype == PKTSUBTYPE_PROGRESS && p->hdr.type == PKTTYPE_REQUEST))) ; else from.sin_addr.s_addr = p->hdr.srcip; } if (p->hdr.srcip != from.sin_addr.s_addr) { FrisLog("Bad message source (%x != %x)", ntohl(from.sin_addr.s_addr), ntohl(p->hdr.srcip)); return 1; } if (sndportnum != from.sin_port) { FrisLog("Bad message port (%d != %d)", ntohs(from.sin_port), ntohs(sndportnum)); return 1; } return 0; } #ifdef USE_REUSEADDR_COMPAT /* * Same as PacketReceive but read from unicast (self) socket. * * Returns 0 for a good packet, 1 for a bad packet, -1 on timeout. */ int PacketRequest(Packet_t *p) { struct sockaddr_in from; int mlen; unsigned int alen; if (selfsock < 0) return -1; #ifdef NO_SOCKET_TIMO { fd_set ready; struct timeval tv; int rv, maxfd; tv.tv_sec = 0; tv.tv_usec = PKTRCV_TIMEOUT; FD_ZERO(&ready); FD_SET(selfsock, &ready); rv = select(selfsock+1, &ready, NULL, NULL, &tv); if (rv < 0) { if (errno == EINTR) return -1; FrisPfatal("PacketRequest(select)"); } if (rv == 0) return -1; } #endif alen = sizeof(from); bzero(&from, alen); if ((mlen = recvfrom(selfsock, p, sizeof(*p), 0, (struct sockaddr *)&from, &alen)) < 0) { if (errno == EWOULDBLOCK || errno == EINTR) return -1; FrisPfatal("PacketRequest(recvfrom)"); } /* * Basic integrity checks */ if (mlen < sizeof(p->hdr) + p->hdr.datalen) { FrisLog("Bad message length (%d != %d)", mlen, p->hdr.datalen); return 1; } #ifdef SAME_HOST_HACK /* * If using a host alias for the client, a message may get * the wrong IP, so rig the IP check to make it always work. */ if (p->hdr.srcip != from.sin_addr.s_addr) from.sin_addr.s_addr = p->hdr.srcip; /* * Also, we aren't binding to a port on the client side, so the * first message to the server will contain the actual port we * will use from now on. */ if (!isclient && sndportnum == htons(portnum) && sndportnum != from.sin_port) sndportnum = from.sin_port; #endif if (p->hdr.srcip != from.sin_addr.s_addr) { FrisLog("Bad message source (%x != %x)", ntohl(from.sin_addr.s_addr), ntohl(p->hdr.srcip)); return 1; } if (sndportnum != from.sin_port) { FrisLog("Bad message port (%d != %d)", ntohs(from.sin_port), ntohs(sndportnum)); return 1; } return 0; } #endif #ifndef MSG_DONTWAIT #define MSG_DONTWAIT 0 #endif /* * We use blocking sends since there is no point in giving up. All packets * go to the same place, whether client or server. * * The amount of data sent is determined from the datalen of the packet hdr. * All packets are actually the same size/structure. */ void PacketSend(Packet_t *p, int *resends) { struct sockaddr_in to; int len, delays, rc; int fd = sock; len = sizeof(p->hdr) + p->hdr.datalen; p->hdr.srcip = myipaddr.s_addr; to.sin_family = AF_INET; to.sin_port = sndportnum; to.sin_addr.s_addr = mcastaddr.s_addr; delays = 0; #ifdef USE_REUSEADDR_COMPAT /* send out selfsock so the source IP is ours and not the MC addr */ if (selfsock >= 0) fd = selfsock; #endif while ((rc = sendto(fd, (void *)p, len, MSG_DONTWAIT, (struct sockaddr *)&to, sizeof(to))) <= 0) { if (rc < 0 && !(errno == ENOBUFS || errno == EAGAIN)) FrisPfatal("PacketSend(sendto)"); /* * ENOBUFS (BSD) or EAGAIN (Linux, because we set DONTWAIT) * means there was not enough socket space for the packet. * Okay to sleep a bit to let things drain. * * Note that on BSD, ENOBUFS is also returned when the NIC * send buffers are full, so we should never lose a packet * on the send path. * * On Linux, we get this behavior as well by turning on * the extended error message passing (IP_RECVERR). */ delays++; fsleep(nobufdelay); } DOSTAT(nonetbufs += delays); if (resends != 0) *resends = delays; } /* * Basically the same as above, but instead of sending to the multicast * group, send to the (unicast) IP in the packet header. This simplifies * the logic in a number of places, by avoiding having to deal with * multicast packets that are not destined for us, but for someone else. */ void PacketReply(Packet_t *p, int firenforget) { struct sockaddr_in to; int len; int fd = sock; len = sizeof(p->hdr) + p->hdr.datalen; to.sin_family = AF_INET; to.sin_port = sndportnum; to.sin_addr.s_addr = p->hdr.srcip; p->hdr.srcip = myipaddr.s_addr; #ifdef USE_REUSEADDR_COMPAT /* send out selfsock so the source IP is ours and not the MC addr */ if (selfsock >= 0) fd = selfsock; #endif while (sendto(fd, (void *)p, len, 0, (struct sockaddr *)&to, sizeof(to)) < 0) { if (errno != ENOBUFS && errno != EAGAIN) FrisPfatal("PacketReply(sendto)"); if (firenforget) break; /* * ENOBUFS means we ran out of mbufs. Okay to sleep a bit * to let things drain. */ DOSTAT(nonetbufs++); fsleep(nobufdelay); } } int PacketValid(Packet_t *p, int nchunks) { switch (p->hdr.type) { case PKTTYPE_REQUEST: case PKTTYPE_REPLY: break; default: return 0; } switch (p->hdr.subtype) { case PKTSUBTYPE_BLOCK: if (p->hdr.datalen < sizeof(p->msg.block)) return 0; if (p->msg.block.chunk < 0 || p->msg.block.chunk >= nchunks || p->msg.block.block < 0 || p->msg.block.block >= MAXCHUNKSIZE) return 0; break; case PKTSUBTYPE_REQUEST: if (p->hdr.datalen < sizeof(p->msg.request)) return 0; if (p->msg.request.chunk < 0 || p->msg.request.chunk >= nchunks || p->msg.request.block < 0 || p->msg.request.block >= MAXCHUNKSIZE || p->msg.request.count < 0 || p->msg.request.block+p->msg.request.count > MAXCHUNKSIZE) return 0; break; case PKTSUBTYPE_PREQUEST: if (p->hdr.datalen < sizeof(p->msg.prequest)) return 0; if (p->msg.prequest.chunk < 0 || p->msg.prequest.chunk >= nchunks) return 0; break; case PKTSUBTYPE_JOIN: if (p->hdr.datalen < sizeof(p->msg.join)) return 0; break; case PKTSUBTYPE_JOIN2: if (p->hdr.datalen < sizeof(p->msg.join2)) return 0; break; case PKTSUBTYPE_LEAVE: if (p->hdr.datalen < sizeof(p->msg.leave)) return 0; break; case PKTSUBTYPE_LEAVE2: if (p->hdr.datalen < sizeof(p->msg.leave2)) return 0; break; case PKTSUBTYPE_PROGRESS: if (p->hdr.datalen < sizeof(p->msg.progress.hdr)) return 0; break; default: return 0; } return 1; } /* * Functions for communicating with the master server. * * TODO: protocol for negotiating the protocol version: * On the client, send a request with our current version and: * - get a version error back: server must be V01, so redo with V01 * - otherwise header reply contains version * if not our version, must be a lower version, so redo with that version * On the server: * - version less than our current version, use that version * - version greater than ours, reply with our version */ #ifdef MASTER_SERVER int MsgSend(int msock, MasterMsg_t *msg, size_t size, int timo) { void *buf = msg; int cc; struct timeval tv, now, then; fd_set wfds; if (timo) { tv.tv_sec = timo; tv.tv_usec = 0; gettimeofday(&then, NULL); timeradd(&then, &tv, &then); } while (size > 0) { if (timo) { gettimeofday(&now, NULL); if (timercmp(&now, &then, >=)) { cc = 0; } else { timersub(&then, &now, &tv); FD_ZERO(&wfds); FD_SET(msock, &wfds); cc = select(msock+1, NULL, &wfds, NULL, &tv); } if (cc <= 0) { if (cc == 0) { errno = ETIMEDOUT; cc = -1; } break; } } cc = write(msock, buf, size); if (cc <= 0) break; size -= cc; buf += cc; } if (size != 0) { char *estr = "master server message send"; if (cc == 0) fprintf(stderr, "%s: Unexpected EOF\n", estr); else perror(estr); return 0; } return 1; } int MsgReceive(int msock, MasterMsg_t *msg, size_t size, int timo) { void *buf = msg; int cc; struct timeval tv, now, then; fd_set rfds; if (timo) { tv.tv_sec = timo; tv.tv_usec = 0; gettimeofday(&then, NULL); timeradd(&then, &tv, &then); } while (size > 0) { if (timo) { gettimeofday(&now, NULL); if (timercmp(&now, &then, >=)) { cc = 0; } else { timersub(&then, &now, &tv); FD_ZERO(&rfds); FD_SET(msock, &rfds); cc = select(msock+1, &rfds, NULL, NULL, &tv); } if (cc <= 0) { if (cc == 0) { errno = ETIMEDOUT; cc = -1; } break; } } cc = read(msock, buf, size); if (cc <= 0) break; size -= cc; buf += cc; } if (size != 0) { char *estr = "master server message receive"; if (cc == 0) fprintf(stderr, "%s: Unexpected EOF\n", estr); else perror(estr); return 0; } return 1; } /* * Contact the master server to discover download information for imageid. * 'sip' and 'sport' are the addr/port of the master server, 'method' * specifies the desired download method, 'askonly' is set to just ask * for information about the image (without starting a server), 'timeout' * is how long to wait for a response. * * If 'hostip' is not zero, then we are requesting information on behalf of * that node. The calling node (us) must have "proxy" permission on the * server for this to work. * * On success, return non-zero with 'reply' filled in with the server's * response IN HOST ORDER. On failure returns zero. */ int ClientNetFindServer(in_addr_t sip, in_port_t sport, in_addr_t hostip, char *imageid, int method, int askonly, int timeout, GetReply *reply, struct in_addr *myip) { struct sockaddr_in name; MasterMsg_t msg; int msock, len; if ((msock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) { perror("Could not allocate socket for master server"); return 0; } if (sport == 0) sport = MS_PORTNUM; /* XXX need connection timeout! */ name.sin_family = AF_INET; name.sin_addr.s_addr = htonl(sip); name.sin_port = htons(sport); if (connect(msock, (struct sockaddr *)&name, sizeof(name)) < 0) { fprintf(stderr, "Connecting to master server %s:%d failed: %s", inet_ntoa(name.sin_addr), sport, strerror(errno)); close(msock); return 0; } /* * XXX recover the IP address of the interface used to talk to * the server. */ if (myip) { struct sockaddr_in me; socklen_t len = sizeof me; if (getsockname(msock, (struct sockaddr *)&me, &len) < 0) { perror("getsockname"); close(msock); return 0; } *myip = me.sin_addr; } memset(&msg, 0, sizeof msg); strncpy((char *)msg.hdr.version, MS_MSGVERS_1, sizeof(msg.hdr.version)); msg.hdr.type = htonl(MS_MSGTYPE_GETREQUEST); msg.body.getrequest.hostip = htonl(hostip); if (askonly) { msg.body.getrequest.status = 1; msg.body.getrequest.methods = MS_METHOD_ANY; } else { msg.body.getrequest.methods = method; } len = strlen(imageid); if (len > MS_MAXIDLEN) len = MS_MAXIDLEN; msg.body.getrequest.idlen = htons(len); strncpy((char *)msg.body.getrequest.imageid, imageid, MS_MAXIDLEN); len = sizeof msg.hdr + sizeof msg.body.getrequest; if (!MsgSend(msock, &msg, len, timeout)) { close(msock); return 0; } memset(&msg, 0, sizeof msg); len = sizeof msg.hdr + sizeof msg.body.getreply; if (!MsgReceive(msock, &msg, len, timeout)) { close(msock); return 0; } close(msock); if (strncmp((char *)msg.hdr.version, MS_MSGVERS_1, sizeof(msg.hdr.version))) { fprintf(stderr, "Got incorrect version from master server %s:%d\n", inet_ntoa(name.sin_addr), sport); return 0; } if (ntohl(msg.hdr.type) != MS_MSGTYPE_GETREPLY) { fprintf(stderr, "Got incorrect reply from master server %s:%d\n", inet_ntoa(name.sin_addr), sport); return 0; } /* * Convert the reply info to host order */ *reply = msg.body.getreply; reply->error = ntohs(reply->error); reply->servaddr = ntohl(reply->servaddr); reply->addr = ntohl(reply->addr); reply->port = ntohs(reply->port); reply->sigtype = ntohs(reply->sigtype); if (reply->sigtype == MS_SIGTYPE_MTIME) *(uint32_t *)reply->signature = ntohl(*(uint32_t *)reply->signature); reply->hisize = ntohl(reply->hisize); reply->losize = ntohl(reply->losize); return 1; } /* * Contact the master server to negotiate an upload for a 'file' to store * under the given 'imageid'. * * 'sip' and 'sport' are the addr/port of the master server, 'askonly' is * set to just see if the upload is allowed and to get characteristics of * any existing copy of the image, 'timeout' is how long to wait for a * response. * * If 'hostip' is not zero, then we are requesting information on behalf of * that node. The calling node (us) must have "proxy" permission on the * server for this to work. * * On success, return non-zero with 'reply' filled in with the server's * response IN HOST ORDER. On failure returns zero. */ int ClientNetPutRequest(in_addr_t sip, in_port_t sport, in_addr_t hostip, char *imageid, uint64_t isize, uint32_t mtime, int timeout, int askonly, int reqtimo, PutReply *reply) { struct sockaddr_in name; MasterMsg_t msg; int msock, len; if ((msock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) { perror("Could not allocate socket for master server"); return 0; } if (sport == 0) sport = MS_PORTNUM; /* XXX need connection timeout! */ name.sin_family = AF_INET; name.sin_addr.s_addr = htonl(sip); name.sin_port = htons(sport); if (connect(msock, (struct sockaddr *)&name, sizeof(name)) < 0) { fprintf(stderr, "Connecting to master server %s:%d failed: %s\n", inet_ntoa(name.sin_addr), sport, strerror(errno)); close(msock); return 0; } memset(&msg, 0, sizeof msg); strncpy((char *)msg.hdr.version, MS_MSGVERS_1, sizeof(msg.hdr.version)); msg.hdr.type = htonl(MS_MSGTYPE_PUTREQUEST); msg.body.putrequest.hostip = htonl(hostip); if (askonly) msg.body.putrequest.status = 1; len = strlen(imageid); if (len > MS_MAXIDLEN) len = MS_MAXIDLEN; msg.body.putrequest.idlen = htons(len); strncpy((char *)msg.body.putrequest.imageid, imageid, MS_MAXIDLEN); if (isize > 0) { msg.body.putrequest.hisize = htonl(isize >> 32); msg.body.putrequest.losize = htonl(isize); } if (mtime) msg.body.putrequest.mtime = htonl(mtime); /* XXX have the server wait longer than us so we timeout first */ if (timeout) msg.body.putrequest.timeout = htonl(timeout+2); len = sizeof msg.hdr + sizeof msg.body.putrequest; if (!MsgSend(msock, &msg, len, reqtimo)) { close(msock); return 0; } memset(&msg, 0, sizeof msg); len = sizeof msg.hdr + sizeof msg.body.putreply; if (!MsgReceive(msock, &msg, len, reqtimo)) { close(msock); return 0; } close(msock); if (strncmp((char *)msg.hdr.version, MS_MSGVERS_1, sizeof(msg.hdr.version))) { fprintf(stderr, "Got incorrect version from master server %s:%d\n", inet_ntoa(name.sin_addr), sport); return 0; } if (ntohl(msg.hdr.type) != MS_MSGTYPE_PUTREPLY) { fprintf(stderr, "Got incorrect reply from master server %s:%d\n", inet_ntoa(name.sin_addr), sport); return 0; } /* * Convert the reply info to host order */ *reply = msg.body.putreply; reply->error = ntohs(reply->error); reply->addr = ntohl(reply->addr); reply->port = ntohs(reply->port); reply->sigtype = ntohs(reply->sigtype); if (reply->sigtype == MS_SIGTYPE_MTIME) *(uint32_t *)reply->signature = ntohl(*(uint32_t *)reply->signature); reply->hisize = ntohl(reply->hisize); reply->losize = ntohl(reply->losize); reply->himaxsize = ntohl(reply->himaxsize); reply->lomaxsize = ntohl(reply->lomaxsize); return 1; } #endif /* * Functions for dealing with IGMP */ #ifdef WITH_IGMP #include /* for older *BSD that needs n_long */ #include #include #ifdef IGMP_MEMBERSHIP_QUERY #define IGMP_QUERY IGMP_MEMBERSHIP_QUERY #else #define IGMP_QUERY IGMP_HOST_MEMBERSHIP_QUERY #endif #ifdef IGMP_V2_MEMBERSHIP_REPORT #define IGMP_REPORT IGMP_V2_MEMBERSHIP_REPORT #else #define IGMP_REPORT IGMP_v2_HOST_MEMBERSHIP_REPORT #endif static struct igmp qpacket, rpacket; static struct in_addr mciface; static struct sockaddr_in allhosts, mcgroup; static uint16_t igmp_csum(struct igmp *pkt) { char *addr = (char *)pkt; int cc = sizeof(*pkt); uint32_t csum = 0; while (cc >= sizeof(uint16_t)) { csum += *(uint16_t *)addr; addr += sizeof(uint16_t); cc -= sizeof(uint16_t); } if (cc > 0) csum = csum + *(uint8_t *)addr; while ((csum >> 16) != 0) csum = (csum >> 16) + (csum & 0xFFFF); return(~csum); } static int igmp_opensocket(void) { char ra[4]; int ttl = 1; int sock; sock = socket(AF_INET, SOCK_RAW, IPPROTO_IGMP); if (sock < 0) { perror("IGMP socket"); return -1; } /* set TTL */ if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)) < 0) { perror("setsockopt(MULTICAST_TTL)"); close(sock); return -1; } /* fix interface */ if (mciface.s_addr != 0 && setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF, &mciface, sizeof(mciface)) < 0) { perror("setsockopt(MULTICAST_IF)"); close(sock); return -1; } /* set router alert option */ ra[0] = IPOPT_RA; ra[1] = 4; ra[2] = ra[3] = '\0'; if (setsockopt(sock, IPPROTO_IP, IP_OPTIONS, &ra, sizeof(ra)) < 0) { perror("setsockopt(RA)"); close(sock); return -1; } return sock; } void IGMPInit(struct in_addr *iface, struct in_addr *mcaddr) { /* build a prototype query packet */ qpacket.igmp_type = IGMP_QUERY; qpacket.igmp_code = 0x64; memset(&qpacket.igmp_group, 0, sizeof(qpacket.igmp_group)); qpacket.igmp_cksum = 0; qpacket.igmp_cksum = igmp_csum(&qpacket); /* sockaddr for queries */ allhosts.sin_family = AF_INET; allhosts.sin_port = htons(0); allhosts.sin_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP); if (mcaddr != NULL) { /* build a prototype report packet */ rpacket.igmp_type = IGMP_REPORT; rpacket.igmp_code = 0; rpacket.igmp_group = *mcaddr; rpacket.igmp_cksum = 0; rpacket.igmp_cksum = igmp_csum(&rpacket); /* sockaddr for reports */ mcgroup.sin_family = AF_INET; mcgroup.sin_port = htons(0); mcgroup.sin_addr = *mcaddr; } /* remember the interface */ if (iface != NULL) mciface = *iface; else mciface.s_addr = 0; } int IGMPSendQuery(void) { int rv, sock; if ((sock = igmp_opensocket()) < 0) return -1; rv = sendto(sock, &qpacket, sizeof(qpacket), 0, (struct sockaddr *)&allhosts, sizeof(allhosts)); if (rv < 0) perror("query sendto"); close(sock); return (rv != sizeof(qpacket)); } int IGMPSendReport(void) { int rv, sock; if (mcgroup.sin_addr.s_addr == 0) return 0; if ((sock = igmp_opensocket()) < 0) return -1; rv = sendto(sock, &rpacket, sizeof(rpacket), 0, (struct sockaddr *)&mcgroup, sizeof(mcgroup)); if (rv < 0) perror("report sendto"); close(sock); return (rv != sizeof(rpacket)); } #endif