From cd704497a506c665e6ab845a5a8178d73d7cb377 Mon Sep 17 00:00:00 2001
From: Mike Hibler <hibler@cs.utah.edu>
Date: Thu, 13 Mar 2025 08:28:37 -0600
Subject: [PATCH] Be more rigorous in filtering incoming packets.

In one situation, we were getting our own MC packets echoed back to us and
we were accepting our JOIN reply messages as JOIN request messages and
sending replies to those. The situation deteriorated rapidly.
---
 clientside/os/frisbee.redux/client.c  | 17 ++++++++--
 clientside/os/frisbee.redux/network.c | 16 +++++++++-
 clientside/os/frisbee.redux/server.c  | 45 +++++++++++++++++++++++++--
 3 files changed, 72 insertions(+), 6 deletions(-)

diff --git a/clientside/os/frisbee.redux/client.c b/clientside/os/frisbee.redux/client.c
index 6cac96f993..e391553c9d 100644
--- a/clientside/os/frisbee.redux/client.c
+++ b/clientside/os/frisbee.redux/client.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2021 University of Utah and the Flux Group.
+ * Copyright (c) 2000-2025 University of Utah and the Flux Group.
  * 
  * {{{EMULAB-LICENSE
  * 
@@ -709,6 +709,7 @@ ClientRecvThread(void *arg)
 	STCounter = servertimo * TIMEOUT_HZ;
 
 	while (1) {
+		int rv;
 #ifdef TRACE_EVENTS
 		static int needstamp = 1;
 		struct timeval pstamp;
@@ -729,9 +730,21 @@ ClientRecvThread(void *arg)
 		 * see that block for longer than our timeout period,
 		 * leading us to issue another request, etc.
 		 */
-		if (PacketReceive(p) != 0) {
+		rv = PacketReceive(p);
+		if (rv != 0) {
 			pthread_testcancel();
 
+			/*
+			 * Bad packet, just continue.
+			 *
+			 * XXX if we keep getting bad packets as opposed to no
+			 * packets, the timeouts below won't trigger. The
+			 * callers of PacketReceive should not be counting on
+			 * it for ticking off their timers.
+			 */
+			if (rv > 0)
+				continue;
+
 			/*
 			 * First see if we should exit
 			 */
diff --git a/clientside/os/frisbee.redux/network.c b/clientside/os/frisbee.redux/network.c
index 947c360d5c..b88cd7ce50 100644
--- a/clientside/os/frisbee.redux/network.c
+++ b/clientside/os/frisbee.redux/network.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2020 University of Utah and the Flux Group.
+ * Copyright (c) 2000-2025 University of Utah and the Flux Group.
  * 
  * {{{EMULAB-LICENSE
  * 
@@ -670,6 +670,20 @@ PacketReceive(Packet_t *p)
 			from.sin_addr.s_addr = p->hdr.srcip;
 	}
 
+	/*
+	 * Got a copy of our own MC message. Probably using an SR-IOV interface.
+	 */
+	if (!isclient && mcastif.s_addr && p->hdr.srcip == myipaddr.s_addr) {
+		static int mcbouncewarned = 0;
+
+		if (!mcbouncewarned) {
+			FrisLog("Received our own message! "
+				"Bad MC implementation--"
+				"expect bad high bad message count!");
+			mcbouncewarned = 1;
+		}
+		return 1;
+	}
 	if (p->hdr.srcip != from.sin_addr.s_addr) {
 		FrisLog("Bad message source (%x != %x)",
 			ntohl(from.sin_addr.s_addr), ntohl(p->hdr.srcip));
diff --git a/clientside/os/frisbee.redux/server.c b/clientside/os/frisbee.redux/server.c
index 028c8a908c..501e19c2dc 100644
--- a/clientside/os/frisbee.redux/server.c
+++ b/clientside/os/frisbee.redux/server.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2021 University of Utah and the Flux Group.
+ * Copyright (c) 2000-2025 University of Utah and the Flux Group.
  * 
  * {{{EMULAB-LICENSE
  * 
@@ -804,9 +804,14 @@ ServerRecvThread(void *arg)
 		keepalive = (int)(((unsigned long long)keepalive * 1000000) /
 				  PKTRCV_TIMEOUT);
 	while (1) {
+		int rv;
 		pthread_testcancel();
-		if (PacketReceive(p) != 0) {
-			if (keepalive && ++idles > keepalive) {
+		rv = PacketReceive(p);
+		if (rv != 0) {
+			if (rv > 0) {
+				DOSTAT(badpackets++);
+			}
+			else if (keepalive && ++idles > keepalive) {
 				if (NetMCKeepAlive()) {
 					FrisWarning("Multicast keepalive failed");
 					if (++kafails > 5) {
@@ -839,6 +844,39 @@ ServerRecvThread(void *arg)
 					FileInfo.chunks, p->msg.request.block);
 			continue;
 		}
+		/*
+		 * Server should only see certain TYPEs (request/reply)
+		 * for certain SUBTYPEs.
+		 */
+		if (p->hdr.type == PKTTYPE_REQUEST) {
+			switch (p->hdr.subtype) {
+			case PKTSUBTYPE_BLOCK:
+			case PKTSUBTYPE_PROGRESS:
+			{
+				struct in_addr ipaddr = { p->hdr.srcip };
+				DOSTAT(badpackets++);
+				FrisLog("REQUEST packet %d from %s, ignored",
+					p->hdr.subtype, inet_ntoa(ipaddr));
+				continue;
+			}
+			default:
+				break;
+			}
+		} else {
+			switch (p->hdr.subtype) {
+			case PKTSUBTYPE_PROGRESS:
+				break;
+			default:
+			{
+				struct in_addr ipaddr = { p->hdr.srcip };
+				DOSTAT(badpackets++);
+				FrisLog("REPLY packet %d from %s, ignored",
+					p->hdr.subtype, inet_ntoa(ipaddr));
+				continue;
+			}
+			}
+		}
+		
 		gettimeofday(&LastReq, 0);
 		if (!gotone) {
 			FirstReq = LastReq;
@@ -1803,6 +1841,7 @@ dumpstats(void)
 		ru.ru_stime.tv_sec, ru.ru_stime.tv_usec/1000);
 	FrisLog("  max/total clients: %d/%d",
 		maxclientnum, totalclients);
+	FrisLog("  bad msgs dropped:  %d", Stats.badpackets);
 	FrisLog("  msgs in/out:       %d/%d",
 		Stats.msgin, Stats.joinrep + Stats.blockssent);
 	FrisLog("  joins/leaves:      %d/%d", Stats.joins, Stats.leaves);
-- 
GitLab