From d69151f992b5d6dd306cd42487fa325a35574a18 Mon Sep 17 00:00:00 2001 From: David Johnson <johnsond@flux.utah.edu> Date: Fri, 28 Sep 2007 06:21:38 +0000 Subject: [PATCH] This is the port of Kirk's (and others) linux endnodeshaping patches from 2.4 to 2.6. PLR and delays (the two schedulers we need) seem to be working nicely, but need more testing. Note that the iptables patch isn't tested yet, but it's the right one to apply... There is still a locking bug in the PLR sched, but it only affects stats dumping so we don't care at the moment. The two imq patches are from the linux imq project, like the old ones. --- .../iptables_mods/iptables-1.3.6-imq.diff | 221 +++ ...2.6.20-1.2944.fc6.emulab-1.linkdelay.patch | 1254 +++++++++++++++++ .../iproute-2.6.20-070313-emulab.patch | 324 +++++ 3 files changed, 1799 insertions(+) create mode 100644 delay/linux/iptables_mods/iptables-1.3.6-imq.diff create mode 100644 delay/linux/kmods/linux-2.6.20-1.2944.fc6.emulab-1.linkdelay.patch create mode 100644 delay/linux/tc_mods/iproute-2.6.20-070313-emulab.patch diff --git a/delay/linux/iptables_mods/iptables-1.3.6-imq.diff b/delay/linux/iptables_mods/iptables-1.3.6-imq.diff new file mode 100644 index 0000000000..262fef1a1e --- /dev/null +++ b/delay/linux/iptables_mods/iptables-1.3.6-imq.diff @@ -0,0 +1,221 @@ +--- iptables-1.3.6.orig/extensions.orig/.IMQ-test6 Thu Jan 1 01:00:00 1970 ++++ iptables-1.3.6/extensions/.IMQ-test6 Mon Jun 16 10:12:47 2003 +@@ -0,0 +1,3 @@ ++#!/bin/sh ++# True if IMQ target patch is applied. ++[ -f $KERNEL_DIR/net/ipv6/netfilter/ip6t_IMQ.c ] && echo IMQ +--- iptables-1.3.6.orig/extensions.orig/libip6t_IMQ.c Thu Jan 1 01:00:00 1970 ++++ iptables-1.3.6/extensions/libip6t_IMQ.c Mon Jun 16 10:12:47 2003 +@@ -0,0 +1,101 @@ ++/* Shared library add-on to iptables to add IMQ target support. */ ++#include <stdio.h> ++#include <string.h> ++#include <stdlib.h> ++#include <getopt.h> ++ ++#include <ip6tables.h> ++#include <linux/netfilter_ipv6/ip6_tables.h> ++#include <linux/netfilter_ipv6/ip6t_IMQ.h> ++ ++/* Function which prints out usage message. */ ++static void ++help(void) ++{ ++ printf( ++"IMQ target v%s options:\n" ++" --todev <N> enqueue to imq<N>, defaults to 0\n", ++IPTABLES_VERSION); ++} ++ ++static struct option opts[] = { ++ { "todev", 1, 0, '1' }, ++ { 0 } ++}; ++ ++/* Initialize the target. */ ++static void ++init(struct ip6t_entry_target *t, unsigned int *nfcache) ++{ ++ struct ip6t_imq_info *mr = (struct ip6t_imq_info*)t->data; ++ ++ mr->todev = 0; ++ *nfcache |= NFC_UNKNOWN; ++} ++ ++/* Function which parses command options; returns true if it ++ ate an option */ ++static int ++parse(int c, char **argv, int invert, unsigned int *flags, ++ const struct ip6t_entry *entry, ++ struct ip6t_entry_target **target) ++{ ++ struct ip6t_imq_info *mr = (struct ip6t_imq_info*)(*target)->data; ++ ++ switch(c) { ++ case '1': ++ if (check_inverse(optarg, &invert, NULL, 0)) ++ exit_error(PARAMETER_PROBLEM, ++ "Unexpected `!' after --todev"); ++ mr->todev=atoi(optarg); ++ break; ++ default: ++ return 0; ++ } ++ return 1; ++} ++ ++static void ++final_check(unsigned int flags) ++{ ++} ++ ++/* Prints out the targinfo. */ ++static void ++print(const struct ip6t_ip6 *ip, ++ const struct ip6t_entry_target *target, ++ int numeric) ++{ ++ struct ip6t_imq_info *mr = (struct ip6t_imq_info*)target->data; ++ ++ printf("IMQ: todev %u ", mr->todev); ++} ++ ++/* Saves the union ipt_targinfo in parsable form to stdout. */ ++static void ++save(const struct ip6t_ip6 *ip, const struct ip6t_entry_target *target) ++{ ++ struct ip6t_imq_info *mr = (struct ip6t_imq_info*)target->data; ++ ++ printf("--todev %u", mr->todev); ++} ++ ++static struct ip6tables_target imq = { ++ .next = NULL, ++ .name = "IMQ", ++ .version = IPTABLES_VERSION, ++ .size = IP6T_ALIGN(sizeof(struct ip6t_imq_info)), ++ .userspacesize = IP6T_ALIGN(sizeof(struct ip6t_imq_info)), ++ .help = &help, ++ .init = &init, ++ .parse = &parse, ++ .final_check = &final_check, ++ .print = &print, ++ .save = &save, ++ .extra_opts = opts ++}; ++ ++static __attribute__((constructor)) void _init(void) ++{ ++ register_target6(&imq); ++} +--- iptables-1.3.6.orig/extensions.orig/.IMQ-test Thu Jan 1 01:00:00 1970 ++++ iptables-1.3.6/extensions/.IMQ-test Mon Jun 16 10:12:47 2003 +@@ -0,0 +1,3 @@ ++#!/bin/sh ++# True if IMQ target patch is applied. ++[ -f $KERNEL_DIR/net/ipv4/netfilter/ipt_IMQ.c ] && echo IMQ +--- iptables-1.3.6.orig/extensions.orig/libipt_IMQ.c Thu Jan 1 01:00:00 1970 ++++ iptables-1.3.6/extensions/libipt_IMQ.c Mon Jun 16 10:12:47 2003 +@@ -0,0 +1,101 @@ ++/* Shared library add-on to iptables to add IMQ target support. */ ++#include <stdio.h> ++#include <string.h> ++#include <stdlib.h> ++#include <getopt.h> ++ ++#include <iptables.h> ++#include <linux/netfilter_ipv4/ip_tables.h> ++#include <linux/netfilter_ipv4/ipt_IMQ.h> ++ ++/* Function which prints out usage message. */ ++static void ++help(void) ++{ ++ printf( ++"IMQ target v%s options:\n" ++" --todev <N> enqueue to imq<N>, defaults to 0\n", ++IPTABLES_VERSION); ++} ++ ++static struct option opts[] = { ++ { "todev", 1, 0, '1' }, ++ { 0 } ++}; ++ ++/* Initialize the target. */ ++static void ++init(struct ipt_entry_target *t, unsigned int *nfcache) ++{ ++ struct ipt_imq_info *mr = (struct ipt_imq_info*)t->data; ++ ++ mr->todev = 0; ++ *nfcache |= NFC_UNKNOWN; ++} ++ ++/* Function which parses command options; returns true if it ++ ate an option */ ++static int ++parse(int c, char **argv, int invert, unsigned int *flags, ++ const struct ipt_entry *entry, ++ struct ipt_entry_target **target) ++{ ++ struct ipt_imq_info *mr = (struct ipt_imq_info*)(*target)->data; ++ ++ switch(c) { ++ case '1': ++ if (check_inverse(optarg, &invert, NULL, 0)) ++ exit_error(PARAMETER_PROBLEM, ++ "Unexpected `!' after --todev"); ++ mr->todev=atoi(optarg); ++ break; ++ default: ++ return 0; ++ } ++ return 1; ++} ++ ++static void ++final_check(unsigned int flags) ++{ ++} ++ ++/* Prints out the targinfo. */ ++static void ++print(const struct ipt_ip *ip, ++ const struct ipt_entry_target *target, ++ int numeric) ++{ ++ struct ipt_imq_info *mr = (struct ipt_imq_info*)target->data; ++ ++ printf("IMQ: todev %u ", mr->todev); ++} ++ ++/* Saves the union ipt_targinfo in parsable form to stdout. */ ++static void ++save(const struct ipt_ip *ip, const struct ipt_entry_target *target) ++{ ++ struct ipt_imq_info *mr = (struct ipt_imq_info*)target->data; ++ ++ printf("--todev %u", mr->todev); ++} ++ ++static struct iptables_target imq = { ++ .next = NULL, ++ .name = "IMQ", ++ .version = IPTABLES_VERSION, ++ .size = IPT_ALIGN(sizeof(struct ipt_imq_info)), ++ .userspacesize = IPT_ALIGN(sizeof(struct ipt_imq_info)), ++ .help = &help, ++ .init = &init, ++ .parse = &parse, ++ .final_check = &final_check, ++ .print = &print, ++ .save = &save, ++ .extra_opts = opts ++}; ++ ++static __attribute__((constructor)) void _init(void) ++{ ++ register_target(&imq); ++} + diff --git a/delay/linux/kmods/linux-2.6.20-1.2944.fc6.emulab-1.linkdelay.patch b/delay/linux/kmods/linux-2.6.20-1.2944.fc6.emulab-1.linkdelay.patch new file mode 100644 index 0000000000..9e79126ecd --- /dev/null +++ b/delay/linux/kmods/linux-2.6.20-1.2944.fc6.emulab-1.linkdelay.patch @@ -0,0 +1,1254 @@ +diff -urN linux-2.6.20-1.2944.fc6.emulab-1/include/linux/pkt_sched.h linux-2.6.20-1.2944.fc6.emulab-1-linkdelay/include/linux/pkt_sched.h +--- linux-2.6.20-1.2944.fc6.emulab-1/include/linux/pkt_sched.h 2007-04-17 12:04:26.000000000 -0600 ++++ linux-2.6.20-1.2944.fc6.emulab-1-linkdelay/include/linux/pkt_sched.h 2007-09-27 23:03:33.000000000 -0600 +@@ -90,6 +90,41 @@ + __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */ + }; + ++/* PLR section */ ++ ++struct tc_plr_qopt ++{ ++ __u32 plr; /* % drop rate (0-100) */ ++}; ++ ++enum ++{ ++ TCA_PLR_UNSPEC, ++ TCA_PLR_PARMS, ++ TCA_PLR_INIT, ++ __TCA_PLR_MAX, ++}; ++ ++#define TCA_PLR_MAX (__TCA_PLR_MAX - 1) ++ ++/* DELAY section */ ++ ++struct tc_delay_qopt ++{ ++ __u32 delay_usec; /* # of usecs to delay */ ++ __u8 reset_time; /* flag: reset time on dequeue, or not */ ++}; ++ ++enum ++{ ++ TCA_DELAY_UNSPEC, ++ TCA_DELAY_PARMS, ++ TCA_DELAY_INIT, ++ __TCA_DELAY_MAX, ++}; ++ ++#define TCA_DELAY_MAX (__TCA_DELAY_MAX - 1) ++ + /* PRIO section */ + + #define TCQ_PRIO_BANDS 16 +diff -urN linux-2.6.20-1.2944.fc6.emulab-1/net/sched/Kconfig linux-2.6.20-1.2944.fc6.emulab-1-linkdelay/net/sched/Kconfig +--- linux-2.6.20-1.2944.fc6.emulab-1/net/sched/Kconfig 2007-04-17 12:04:29.000000000 -0600 ++++ linux-2.6.20-1.2944.fc6.emulab-1-linkdelay/net/sched/Kconfig 2007-08-09 11:14:07.000000000 -0600 +@@ -104,6 +104,34 @@ + + comment "Queueing/Scheduling" + ++config NET_SCH_PLR ++ tristate "PLR packet scheduler" ++ ---help--- ++ Provides the facility to drop packets on a given interface ++ at a specified packet loss rate (percentage 0-100). In general, ++ there's no need for this option unless you want to perform ++ network testing with artificially produced packet loss. ++ ++ If you say Y or M now, your networking will not be affected ++ unless you enable the packet scheduler via the iproute2+tc ++ package. ++ ++ If unsure, say N now. ++ ++config NET_SCH_DELAY ++ tristate "Delay packet scheduler" ++ ---help--- ++ Provides the facility to delay the delivery of IP packets on ++ a given interface for a specified number of usecs. In general, ++ there's no need for this option unless you want to perform ++ network testing with artificially produced latencies. ++ ++ If you say Y or M now, your networking will not be affected ++ unless you enable the packet scheduler via the iproute2tc ++ package. ++ ++ If unsure, say N now. ++ + config NET_SCH_CBQ + tristate "Class Based Queueing (CBQ)" + ---help--- +diff -urN linux-2.6.20-1.2944.fc6.emulab-1/net/sched/Makefile linux-2.6.20-1.2944.fc6.emulab-1-linkdelay/net/sched/Makefile +--- linux-2.6.20-1.2944.fc6.emulab-1/net/sched/Makefile 2007-04-17 12:04:29.000000000 -0600 ++++ linux-2.6.20-1.2944.fc6.emulab-1-linkdelay/net/sched/Makefile 2007-08-09 11:14:07.000000000 -0600 +@@ -14,7 +14,9 @@ + obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o + obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o + obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o +-obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o ++obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o ++obj-$(CONFIG_NET_SCH_DELAY) += sch_delay.o ++obj-$(CONFIG_NET_SCH_PLR) += sch_plr.o + obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o + obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o + obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o +diff -urN linux-2.6.20-1.2944.fc6.emulab-1/net/sched/sch_delay.c linux-2.6.20-1.2944.fc6.emulab-1-linkdelay/net/sched/sch_delay.c +--- linux-2.6.20-1.2944.fc6.emulab-1/net/sched/sch_delay.c 1969-12-31 17:00:00.000000000 -0700 ++++ linux-2.6.20-1.2944.fc6.emulab-1-linkdelay/net/sched/sch_delay.c 2007-09-27 23:36:59.000000000 -0600 +@@ -0,0 +1,732 @@ ++/* ++ * net/sched/sch_delay.c Add a delay to anything going out... ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: ++ * David T McWherter, <dtm@vramp.net> ++ * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> ++ */ ++ ++#include <linux/module.h> ++#include <asm/uaccess.h> ++#include <asm/system.h> ++#include <asm/bitops.h> ++#include <linux/types.h> ++#include <linux/kernel.h> ++#include <linux/sched.h> ++#include <linux/string.h> ++#include <linux/mm.h> ++#include <linux/socket.h> ++#include <linux/sockios.h> ++#include <linux/in.h> ++#include <linux/errno.h> ++#include <linux/interrupt.h> ++#include <linux/if_ether.h> ++#include <linux/inet.h> ++#include <linux/netdevice.h> ++#include <linux/etherdevice.h> ++#include <linux/notifier.h> ++#include <net/ip.h> ++#include <net/route.h> ++#include <linux/skbuff.h> ++#include <linux/rtnetlink.h> ++#include <net/sock.h> ++#include <net/pkt_sched.h> ++ ++/* Delay Scheduler ++ ======================================= ++ Provides the facility to delay the delivery of IP packets on ++ a given interface for a specified number of usecs. In general, ++ there's no need for this option unless you want to perform ++ network testing with artificially produced latencies. ++ */ ++ ++#define DQDISC (1<<16) ++#define DCLASS (1<<17) ++#define DSTATS (1<<18) ++ ++/* debugging level & bitmask: upper 16 bits are debug mask, lower 16 ++ are level */ ++/* bits for mask are defined above, and level is 0 - 1000 */ ++#define DBG DSTATS | 1000 ++ ++#if 0 ++#define DMSK(lvl) ((lvl) & 0xffff0000) ++#define DLVL(lvl) ((lvl) & 0x0000ffff) ++#define DPRINTK(lvl, fmt, args...) if ((DMSK(lvl) & DMSK(DBG)) && \ ++ (DLVL(lvl) < DLVL(DBG))) \ ++ { printk(KERN_DEBUG fmt, ##args); } ++#else ++#define DMSK(lvl) ++#define DLVL(lvl) ++#define DPRINTK(lvl, fmt, args...) ++#endif ++ ++#define EPRINTK(fmt, args...) printk(KERN_ERR fmt, ##args); ++ ++#define TN_MAGIC 94343939L ++ ++/* delay packet queue lock */ ++static rwlock_t delay_mod_lock = RW_LOCK_UNLOCKED; ++ ++/* XXX: temp for debugging */ ++static __u32 oldbklog; ++ ++struct time_node { ++ struct list_head link; ++ long magic; ++ psched_time_t time; ++}; ++ ++#define LIST_ENTRY(x) list_entry((x), struct time_node, link) ++ ++struct delay_sched_data ++{ ++ struct Qdisc *qd; /* Child qdisc pointer */ ++ struct tcf_proto *filter_list; /* tc filter chain - unused. */ ++ struct time_node times_queue; /* The queue of socket input times */ ++ struct timer_list wd_timer; /* watchdog timer */ ++ psched_tdiff_t delaytime; /* The amount of time to delay sending packets */ ++ __u8 reset_time; /* Should sk_buff time get reset?: flag */ ++}; ++ ++ ++/** ++ * time_node utility functions. ++ */ ++static struct time_node * ++time_node_alloc (void) ++{ ++ struct time_node * out; ++ DPRINTK(DQDISC | 50, "{ time_node_alloc\n"); ++ out = (struct time_node*)kmalloc( sizeof(struct time_node), GFP_ATOMIC ); ++ if ( !out ) ++ { ++ EPRINTK("!!! - NULL time_node_alloc\n"); ++ } else { ++ out->magic = TN_MAGIC; ++ } ++ DPRINTK(DQDISC | 50, "} time_node_alloc\n"); ++ return out; ++} ++ ++ ++static void ++time_node_free( struct time_node * nd ) ++{ ++ if ( nd ) ++ { ++ if ( nd->magic != TN_MAGIC ) ++ { ++ EPRINTK("!!! - time_node_free of %p with bad magic! (%ld)\n", ++ nd, (long)nd->magic); ++ } else { ++ kfree(nd); ++ } ++ } ++} ++ ++ ++static struct time_node * ++time_node_stamp ( struct time_node * nd ) ++{ ++ if ( nd ) ++ PSCHED_GET_TIME( nd->time ); ++ return nd; ++} ++ ++ ++static struct time_node * ++time_node_queue_enqueue_head ( struct time_node * head, struct time_node * nd ) ++{ ++ if ( head && nd ) ++ { ++ list_add(&nd->link, &head->link); ++ } ++ return nd; ++} ++ ++ ++static struct time_node * ++time_node_queue_enqueue_tail ( struct time_node * head, struct time_node * nd ) ++{ ++ DPRINTK(DQDISC | 50, "{ time_node_queue_enqueue_tail\n"); ++ if ( head && nd ) ++ { ++ list_add_tail(&nd->link, &head->link); ++ } ++ DPRINTK(DQDISC | 50, "} time_node_queue_enqueue_tail\n"); ++ return nd; ++} ++ ++ ++static struct time_node * ++time_node_queue_dequeue_head ( struct time_node * head ) ++{ ++ struct time_node * first = NULL; ++ DPRINTK(DQDISC | 50, "{ time_node_queue_dequeue_head\n"); ++ if ( head ) ++ { ++ if ( !list_empty(&head->link) ) ++ { ++ first = LIST_ENTRY(head->link.next); ++ list_del(head->link.next); ++ } ++ } ++ DPRINTK(DQDISC | 50, "} time_node_queue_dequeue_head\n"); ++ return first; ++} ++ ++ ++static struct time_node * ++time_node_queue_peek_head ( struct time_node * head ) ++{ ++ struct time_node * first = NULL; ++ DPRINTK(DQDISC | 50, "{ time_node_queue_peek_head\n"); ++ if ( head ) ++ { ++ if ( !list_empty(&head->link) ) ++ { ++ first = LIST_ENTRY(head->link.next); ++ } ++ } ++ DPRINTK(DQDISC | 50, "} time_node_queue_peek_head\n"); ++ return first; ++} ++ ++ ++static struct time_node * ++time_node_queue_purge ( struct time_node * head ) ++{ ++ DPRINTK(DQDISC | 50, "{ time_node_queue_purge(%p)\n", head); ++ if ( head ) ++ { ++ for ( ; !list_empty(&head->link) ; ) ++ { ++ struct time_node * nd; ++ nd = time_node_queue_dequeue_head ( head ); ++ time_node_free( nd ); ++ } ++ } ++ DPRINTK(DQDISC | 50, "} time_node_queue_purge\n"); ++ return head; ++} ++ ++/** ++ * Watchdog timer ++ */ ++static void delay_watchdog ( unsigned long arg ) ++{ ++ struct Qdisc * sch = (struct Qdisc*)arg; ++ psched_time_t foo; ++ ++ PSCHED_GET_TIME(foo); ++ DPRINTK(DQDISC | 200, "Bang! WD fired at: %llu\n", foo); ++ netif_schedule( sch->dev ); ++} ++ ++/* ------------------------------------------------------------------------- */ ++/* Class functions --------------------------------------------------------- */ ++/* ------------------------------------------------------------------------- */ ++ ++static int ++delay_graft(struct Qdisc *sch,unsigned long arg, ++ struct Qdisc *new1,struct Qdisc **old) ++{ ++ struct delay_sched_data *p = qdisc_priv(sch); ++ ++ DPRINTK(DCLASS | 100, "delay_graft(sch %p,[qdisc %p],new %p,old %p)\n", ++ sch,p,new1,old); ++ if (!new1) ++ new1 = &noop_qdisc; ++ sch_tree_lock(sch); ++ *old = xchg(&p->qd,new1); ++ /* XXX: decrease qlen? */ ++ qdisc_reset(*old); ++ sch_tree_unlock(sch); /* @@@ move up ? */ ++ return 0; ++} ++ ++ ++static struct Qdisc* ++delay_leaf(struct Qdisc *sch, unsigned long arg) ++{ ++ struct delay_sched_data *p = qdisc_priv(sch); ++ ++ return p->qd; ++} ++ ++ ++static unsigned long ++delay_get(struct Qdisc *sch,u32 classid) ++{ ++ struct delay_sched_data *p __attribute__((unused)) = qdisc_priv(sch); ++ ++ DPRINTK(DCLASS | 100, "delay_get(sch %p,[qdisc %p],classid %x)\n", ++ sch,p,classid); ++ return 1; /* we currently only support one class so.. */ ++} ++ ++static int ++delay_class_change(struct Qdisc *sch, u32 classid, u32 parent, ++ struct rtattr **tca, unsigned long *arg) ++{ ++ return 0; ++} ++ ++static unsigned long ++delay_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) ++{ ++ return delay_get(sch,classid); ++} ++ ++ ++static void ++delay_put(struct Qdisc *sch, unsigned long cl) ++{ ++} ++ ++static int ++delay_delete(struct Qdisc *sch,unsigned long arg) ++{ ++ struct delay_sched_data *p __attribute__((unused)) = qdisc_priv(sch); ++ ++ if (!arg) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++ ++static void ++delay_walk(struct Qdisc *sch,struct qdisc_walker *walker) ++{ ++ struct delay_sched_data *p = qdisc_priv(sch); ++ ++ DPRINTK(DCLASS | 100, "delay_walk(sch %p,[qdisc %p],walker %p)\n", ++ sch,p,walker); ++ ++ if (!walker->stop) { ++ if (walker->count >= walker->skip) { ++ if (walker->fn(sch, 1, walker) < 0) { ++ walker->stop = 1; ++ return; ++ } ++ } ++ walker->count++; ++ } ++} ++ ++ ++static struct tcf_proto** ++delay_find_tcf(struct Qdisc *sch,unsigned long cl) ++{ ++ struct delay_sched_data *p = qdisc_priv(sch); ++ ++ return &p->filter_list; ++} ++ ++ ++static int ++delay_dump_class(struct Qdisc *sch, unsigned long cl, ++ struct sk_buff *skb, struct tcmsg *tcm) ++{ ++ struct delay_sched_data *p = qdisc_priv(sch); ++ unsigned char *b = skb->tail; ++ ++ DPRINTK(DCLASS | 100, "delay_dump_class(sch %p,[qdisc %p],class %ld\n", ++ sch,p,cl); ++ tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle),1); ++ RTA_PUT(skb,TCA_OPTIONS,0,NULL); ++ ++ return skb->len; ++ ++ rtattr_failure: ++ skb_trim(skb,b-skb->data); ++ return -1; ++} ++ ++ ++/* ------------------------------------------------------------------------- */ ++/* Qdisc functions --------------------------------------------------------- */ ++/* ------------------------------------------------------------------------- */ ++ ++static int ++delay_enqueue(struct sk_buff *skb, struct Qdisc* sch) ++{ ++ struct delay_sched_data *q = qdisc_priv(sch); ++ int out = NET_XMIT_DROP; ++ ++ DPRINTK(DQDISC, "{ delay_enqueue\n"); ++ ++ { ++ struct time_node * nd; ++ psched_time_t currtime; ++ ++ PSCHED_GET_TIME(currtime); ++ DPRINTK(DQDISC | 200, "Enqueueing at: %llu\n", currtime); ++ nd = time_node_stamp(time_node_alloc()); ++ if ( nd ) ++ { ++ write_lock(&delay_mod_lock); ++ (void) time_node_queue_enqueue_tail( &q->times_queue, nd ); ++ __skb_queue_tail(&sch->q, skb); ++ write_unlock(&delay_mod_lock); ++ sch->qstats.backlog += skb->len; ++ if (sch->qstats.backlog > oldbklog) { ++ oldbklog = sch->qstats.backlog; ++ DPRINTK(DSTATS | 100, "Backlog increased to: %u\n", oldbklog); ++ DPRINTK(DSTATS | 100, "Queue size is: %u\n", sch->q.qlen); ++ } ++ sch->bstats.bytes += skb->len; ++ sch->bstats.packets++; ++ sch->q.qlen++; ++ out = 0; /* We don't drop packets - XXX: REVISE!*/ ++ } ++ } ++ ++ DPRINTK(DQDISC, "} delay_enqueue\n"); ++ ++ return out; ++} ++ ++static int ++delay_requeue(struct sk_buff *skb, struct Qdisc* sch) ++{ ++ /* XXX: ret 0 ? */ ++ int out = NET_XMIT_SUCCESS; ++ struct delay_sched_data *p = qdisc_priv(sch); ++ ++ DPRINTK(DQDISC, "{ delay_requeue\n"); ++ ++ /* We don't delay it a second time, just pass to inner queue. */ ++ if ((out = p->qd->ops->requeue(skb, p->qd)) == 0) { ++ sch->q.qlen++; ++ sch->qstats.requeues++; ++ } ++ else { ++ sch->qstats.drops++; ++ /* XXX: ret 0? */ ++ out = NET_XMIT_DROP; ++ } ++ ++ DPRINTK(DQDISC, "} delay_requeue\n"); ++ return out; ++} ++ ++static struct sk_buff * ++delay_dequeue(struct Qdisc* sch) ++{ ++ struct delay_sched_data *p = qdisc_priv(sch); ++ struct sk_buff *skb = 0; ++ struct time_node * nd; ++ psched_time_t currtime; ++ psched_time_t tmp; ++ ++ DPRINTK(DQDISC, "{ delay_dequeue\n"); ++ ++ PSCHED_GET_TIME(currtime); ++ ++ write_lock(&delay_mod_lock); ++ nd = time_node_queue_dequeue_head ( &p->times_queue ); ++ if ( nd ) ++ { ++ PSCHED_TADD2(nd->time, p->delaytime, tmp); ++ ++ if (PSCHED_TLESS(tmp, currtime)) ++ { ++ // dequeue the skb ++ DPRINTK(DQDISC | 200, "time ((%llu+%ld)=%llu) < %llu, dequeuing\n", ++ nd->time,p->delaytime,tmp,currtime); ++ skb = __skb_dequeue( &sch->q ); ++ /* XXX: should always be an sk_buff to accompany the time_node */ ++ if ( skb ) ++ { ++ if (p->reset_time) { ++ skb_set_timestamp(skb,&currtime); ++ } ++ sch->qstats.backlog -= skb->len; ++ time_node_free(nd); ++ } ++ if (p->qd->ops->enqueue(skb, p->qd) != 0) ++ { ++ DPRINTK(DQDISC | DCLASS, "Child qdisc dropped delayed packet!\n"); ++ sch->qstats.drops++; ++ sch->q.qlen--; ++ } ++ } else { ++ /* psched_time_t diff; */ ++ ++ DPRINTK(DQDISC | 200, "time ((%llu+%ld)=%llu) >= %llu, " ++ "NOT dequeueing\n", ++ nd->time,p->delaytime,tmp,currtime); ++ /* requeue the time */ ++ time_node_queue_enqueue_head ( &p->times_queue, nd ); ++ } ++ ++ /* update timer if any packets still exist in the queue */ ++ nd = time_node_queue_peek_head( &p->times_queue ); ++ if (nd && !netif_queue_stopped(sch->dev)) ++ { ++ psched_tdiff_t diff; ++ ++ /* Calculate the wait time before we should be awakened */ ++ PSCHED_TADD2(nd->time, p->delaytime, tmp); ++ diff = PSCHED_TDIFF(tmp, currtime); ++ ++ if (diff < 1) ++ diff = 1; ++ ++ DPRINTK(DQDISC | 500, "Setting timer to expire at %lu jiffies. " ++ "Diff is %lu Jiffies is %lu HZ is %u\n", ++ jiffies + PSCHED_US2JIFFIE(diff), ++ PSCHED_US2JIFFIE(diff), ++ jiffies, ++ HZ); ++ ++ mod_timer( &p->wd_timer, jiffies + PSCHED_US2JIFFIE(diff)); /* removed +100 */ ++ } ++ } ++ write_unlock(&delay_mod_lock); ++ ++ skb = p->qd->ops->dequeue(p->qd); ++ if ( skb ) ++ { ++ DPRINTK(DQDISC | 100, "delay_dequeue: really dequeueing packet\n"); ++ sch->q.qlen--; ++ } ++ ++ DPRINTK(DQDISC, "} delay_dequeue\n"); ++ return skb; ++} ++ ++ ++static unsigned int ++delay_drop(struct Qdisc* sch) ++{ ++ struct delay_sched_data *p = qdisc_priv(sch); ++ struct sk_buff *skb; ++ int out = 0; ++ ++ DPRINTK(DQDISC, "{ delay_drop\n"); ++ ++ write_lock(&delay_mod_lock); ++ skb = __skb_dequeue_tail(&sch->q); ++ if ( skb ) ++ { ++ struct time_node * nd = ++ time_node_queue_dequeue_head ( &p->times_queue ); ++ time_node_free( nd ); ++ ++ sch->qstats.backlog -= skb->len; ++ sch->qstats.drops++; ++ sch->q.qlen--; ++ kfree_skb(skb); ++ out = 1; /* We succeeded */ ++ } ++ else if (p->qd->ops->drop(p->qd)) ++ { ++ sch->q.qlen--; ++ out = 1; ++ } ++ write_unlock(&delay_mod_lock); ++ ++ DPRINTK(DQDISC, "} delay_drop\n"); ++ return out; ++} ++ ++static void ++delay_reset(struct Qdisc* sch) ++{ ++ struct delay_sched_data *p = qdisc_priv(sch); ++ ++ DPRINTK(DQDISC, "{ delay_reset\n"); ++ ++ del_timer( &p->wd_timer ); ++ skb_queue_purge(&sch->q); ++ time_node_queue_purge ( &p->times_queue ); ++ sch->qstats.backlog = 0; ++ qdisc_reset(p->qd); ++ sch->q.qlen = 0; ++ ++ DPRINTK(DQDISC, "} delay_reset\n"); ++} ++ ++static int ++delay_change(struct Qdisc *sch, struct rtattr *opt) ++{ ++ int err = -EINVAL; ++ struct delay_sched_data *p = qdisc_priv(sch); ++ struct tc_delay_qopt * qopt; ++ ++ DPRINTK(DQDISC, "{ delay_change\n"); ++ ++ if (opt == NULL) ++ goto done; ++ ++ qopt = RTA_DATA(opt); ++ ++ if ( RTA_PAYLOAD(opt) < sizeof(*qopt) ) ++ { ++ EPRINTK("delay_change: opt is too small\n"); ++ goto done; ++ } ++ ++ DPRINTK(DSTATS, "About to lock tree in change\n"); ++ sch_tree_lock(sch); ++ DPRINTK(DSTATS, "Tree locked\n"); ++ p->delaytime = qopt->delay_usec; ++ p->reset_time = qopt->reset_time; ++ DPRINTK(DSTATS, "Reseting timer\n"); ++ init_timer( &p->wd_timer ); ++ p->wd_timer.function = delay_watchdog; ++ p->wd_timer.data = (unsigned long)sch; ++ DPRINTK(DSTATS, "Timer reset\n"); ++ sch_tree_unlock(sch); ++ DPRINTK(DSTATS, "Tree unlocked\n"); ++ ++ DPRINTK(DQDISC, "delay_change: usec: %u\n", qopt->delay_usec); ++ ++ err = 0; ++ done: ++ DPRINTK(DQDISC, "} delay_change\n"); ++ return err; ++} ++ ++static int ++delay_init(struct Qdisc *sch, struct rtattr *opt) ++{ ++ struct delay_sched_data *p = qdisc_priv(sch); ++ int err = -EINVAL; ++ ++ DPRINTK(DQDISC, "{ delay_init\n"); ++ ++ DPRINTK(DQDISC, ">> initializing queue\n"); ++ INIT_LIST_HEAD(&p->times_queue.link); ++ oldbklog = 0; /* XXX: temp */ ++ ++ if ( delay_change(sch, opt) != 0 ) ++ { ++ DPRINTK(DQDISC, ">> delay_change failed\n"); ++ } ++ else ++ { ++ p->filter_list = NULL; ++ if (!(p->qd = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, ++ TC_H_MAKE(sch->handle, 1)))) ++ p->qd = &noop_qdisc; ++ ++ err = 0; ++ } ++ ++ DPRINTK(DQDISC, "} delay_init\n"); ++ return err; ++} ++ ++static ++void delay_destroy(struct Qdisc *sch) ++{ ++ struct delay_sched_data *p = qdisc_priv(sch); ++ struct tcf_proto *tp; ++ ++ DPRINTK(DQDISC, "{ delay_destroy\n"); ++ ++ DPRINTK(DQDISC | 100, ">> del_timer\n"); ++ ++ del_timer( &p->wd_timer ); ++ skb_queue_purge(&sch->q); ++ time_node_queue_purge ( &p->times_queue ); ++ sch->qstats.backlog = 0; ++ ++ while (p->filter_list) { ++ tp = p->filter_list; ++ p->filter_list = tp->next; ++ tp->ops->destroy(tp); ++ } ++ ++ qdisc_destroy(p->qd); ++ p->qd = &noop_qdisc; ++ ++ sch->q.qlen = 0; ++ ++ DPRINTK(DQDISC, "} delay_destroy\n"); ++} ++ ++ ++static int ++delay_dump(struct Qdisc *sch, struct sk_buff *skb) ++{ ++ struct delay_sched_data *p = qdisc_priv(sch); ++ unsigned char *b = skb->tail; ++ struct rtattr *rta; ++ struct tc_delay_qopt opt; ++ ++ DPRINTK(DQDISC, "{ delay_dump\n"); ++ ++ rta = (struct rtattr *)b; ++ RTA_PUT(skb, TCA_OPTIONS, 0, NULL); ++ ++ /* XXX: fix locking */ ++ //sch_tree_lock(sch); ++ opt.delay_usec = p->delaytime; ++ opt.reset_time = p->reset_time; ++ //sch_tree_unlock(sch); ++ ++ RTA_PUT(skb, TCA_DELAY_PARMS, sizeof(opt), &opt); ++ ++ DPRINTK(DQDISC, "} delay_dump\n"); ++ rta->rta_len = skb->tail - b; ++ return skb->len; ++ ++ rtattr_failure: ++ skb_trim(skb, b - skb->data); ++ DPRINTK(DQDISC, "} delay_dump - failed!\n"); ++ return -1; ++} ++ ++static struct Qdisc_class_ops delay_class_ops = { ++ .graft = delay_graft, /* graft */ ++ .leaf = delay_leaf, /* leaf */ ++ .get = delay_get, /* get */ ++ .put = delay_put, /* put */ ++ .change = delay_class_change, /* change */ ++ .delete = delay_delete, /* delete */ ++ .walk = delay_walk, /* walk */ ++ .tcf_chain = delay_find_tcf, /* tcf_chain */ ++ .bind_tcf = delay_bind_filter, /* bind_tcf */ ++ .unbind_tcf = delay_put, /* unbind_tcf */ ++ .dump = delay_dump_class, /* dump */ ++}; ++ ++struct Qdisc_ops delay_qdisc_ops = { ++ .next = NULL, ++ .cl_ops = &delay_class_ops, ++ .id = "delay", ++ .priv_size = sizeof(struct delay_sched_data), ++ .enqueue = delay_enqueue, ++ .dequeue = delay_dequeue, ++ .requeue = delay_requeue, ++ .drop = delay_drop, ++ .init = delay_init, ++ .reset = delay_reset, ++ .destroy = delay_destroy, ++ .change = delay_change, ++ .dump = delay_dump, ++ .owner = THIS_MODULE, ++}; ++ ++static int __init delay_module_init(void) ++{ ++ return register_qdisc(&delay_qdisc_ops); ++} ++static void __exit delay_module_exit(void) ++{ ++ unregister_qdisc(&delay_qdisc_ops); ++} ++module_init(delay_module_init) ++module_exit(delay_module_exit) ++MODULE_LICENSE("GPL"); +diff -urN linux-2.6.20-1.2944.fc6.emulab-1/net/sched/sch_plr.c linux-2.6.20-1.2944.fc6.emulab-1-linkdelay/net/sched/sch_plr.c +--- linux-2.6.20-1.2944.fc6.emulab-1/net/sched/sch_plr.c 1969-12-31 17:00:00.000000000 -0700 ++++ linux-2.6.20-1.2944.fc6.emulab-1-linkdelay/net/sched/sch_plr.c 2007-09-27 22:58:45.000000000 -0600 +@@ -0,0 +1,417 @@ ++/* ++ * net/sched/sch_plr.c probibalistically drop packets ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: ++ * David Johnson <johnsond@cs.utah.edu> (2.4 to 2.6) ++ * Kirk Webb <kwebb@cs.utah.edu> ++ * ++ */ ++ ++#include <linux/module.h> ++#include <linux/types.h> ++#include <linux/kernel.h> ++#include <linux/errno.h> ++#include <linux/netdevice.h> ++#include <linux/etherdevice.h> ++#include <linux/skbuff.h> ++#include <linux/rtnetlink.h> ++#include <net/sock.h> ++#include <net/pkt_sched.h> ++ ++/* ++ Simple PLR "scheduler" We drop packets based on a uniform ++ probability dist, set within the range of an u32. The tc tool ++ expects a floating pt. number between 0 and 1, and subsequently ++ scales it to the appropriate closest u32 value (btw 0 and 2^32) ++ before handing it off to this module via rtnetlink. ++ */ ++ ++#define DQDISC (1<<16) ++#define DCLASS (1<<17) ++ ++/* debugging level & bitmask: upper 16 bits are debug mask, lower 16 ++ are level */ ++/* bits for mask are defined above, and level is 0 - 1000 */ ++#define DBG DQDISC | DCLASS | 1000 ++ ++#if 0 ++#define DMSK(lvl) ((lvl) & 0xffff0000) ++#define DLVL(lvl) ((lvl) & 0x0000ffff) ++#define DPRINTK(lvl, fmt, args...) if ((DMSK(lvl) & DMSK(DBG)) && \ ++ (DLVL(lvl) < DLVL(DBG))) \ ++ { printk(KERN_DEBUG fmt, ##args); } ++#else ++#define DMSK(lvl) ++#define DLVL(lvl) ++#define DPRINTK(lvl, fmt, args...) ++#endif ++ ++#define EPRINTK(fmt, args...) printk(KERN_ERR fmt, ##args); ++ ++struct plr_sched_data ++{ ++ struct Qdisc *qd; /* child qdisc - we only support 1 */ ++ u32 plr; /* packet loss rate */ ++}; ++ ++/* ------------------------------------------------------------------------- */ ++/* Class functions --------------------------------------------------------- */ ++/* ------------------------------------------------------------------------- */ ++ ++static int plr_graft(struct Qdisc *sch,unsigned long arg, ++ struct Qdisc *new1,struct Qdisc **old) ++{ ++ struct plr_sched_data *p = qdisc_priv(sch); ++ ++ DPRINTK(DCLASS | 100, "plr_graft(sch %p,[qdisc %p],new %p,old %p)\n", ++ sch,p,new1,old); ++ if (!new1) ++ new1 = &noop_qdisc; ++ sch_tree_lock(sch); ++ *old = xchg(&p->qd,new1); ++ qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); ++ qdisc_reset(*old); ++ sch_tree_unlock(sch); /* @@@ move up ? */ ++ ++ return 0; ++} ++ ++ ++static struct Qdisc* plr_leaf(struct Qdisc *sch, unsigned long arg) ++{ ++ struct plr_sched_data *p = qdisc_priv(sch); ++ return p->qd; ++} ++ ++static unsigned long plr_get(struct Qdisc *sch,u32 classid) ++{ ++ return 1; /* we currently only support one class so.. */ ++} ++ ++static int plr_class_change(struct Qdisc *sch, u32 classid, u32 parent, ++ struct rtattr **tca, unsigned long *arg) ++{ ++ return -ENOSYS; ++} ++ ++static void plr_put(struct Qdisc *sch, unsigned long cl) ++{ ++} ++ ++static int plr_delete(struct Qdisc *sch,unsigned long arg) ++{ ++ return -ENOSYS; ++} ++ ++ ++static void plr_walk(struct Qdisc *sch,struct qdisc_walker *walker) ++{ ++ if (!walker->stop) { ++ if (walker->count >= walker->skip) { ++ if (walker->fn(sch, 1, walker) < 0) { ++ walker->stop = 1; ++ return; ++ } ++ } ++ walker->count++; ++ } ++} ++ ++static struct tcf_proto** plr_find_tcf(struct Qdisc *sch,unsigned long cl) ++{ ++ return NULL; ++} ++ ++static int plr_dump_class(struct Qdisc *sch, unsigned long cl, ++ struct sk_buff *skb, struct tcmsg *tcm) ++{ ++ struct plr_sched_data *p = qdisc_priv(sch); ++ unsigned char *b = skb->tail; ++ ++ if (cl != 1) ++ return -ENOENT; ++ ++ DPRINTK(DCLASS | 100, "plr_dump_class(sch %p,[qdisc %p],class %ld\n", ++ sch,p,cl); ++ ++ /* ++ * tcm->tcm_handle = TC_H_MAKE(sch->handle,1); ++ * RTA_PUT(skb,TCA_OPTIONS,0,NULL); ++ ++ * return skb->len; ++ ++ * rtattr_failure: ++ * skb_trim(skb,b-skb->data); ++ * return -1; ++ */ ++ ++ tcm->tcm_handle |= TC_H_MIN(1); ++ tcm->tcm_info = p->qd->handle; ++ ++ return 0; ++} ++ ++ ++/* ------------------------------------------------------------------------- */ ++/* Qdisc functions --------------------------------------------------------- */ ++/* ------------------------------------------------------------------------- */ ++ ++static int plr_enqueue(struct sk_buff *skb, struct Qdisc* sch) ++{ ++ struct plr_sched_data *p = qdisc_priv(sch); ++ int ret; ++ ++ DPRINTK(DQDISC, "{ plr_enqueue\n"); ++ ++ { ++ u32 rnd = net_random(); ++ psched_time_t currtime; ++ ++ PSCHED_GET_TIME(currtime); ++ DPRINTK(DQDISC | 100, "Enqueueing at: %llu\n", currtime); ++ ++ if ( rnd < p->plr) ++ { ++ /* ++ Drop, but don't tell the network device we did. ++ Keep track though in stats. ++ */ ++ DPRINTK(DQDISC | 200, "Dropped incoming packet, rand val: %u\n", rnd); ++ kfree_skb(skb); ++ sch->qstats.drops++; ++ return NET_XMIT_DROP; ++ } ++ ++ if ((ret = p->qd->ops->enqueue(skb,p->qd)) != 0) ++ { ++ DPRINTK(DQDISC | 200, "Didn't enqueue incoming packet, rand val: %u\n", rnd); ++ sch->qstats.drops++; ++ return ret; ++ } ++ ++ sch->q.qlen++; ++ sch->bstats.bytes += skb->len; ++ sch->bstats.packets++; ++ } ++ ++ DPRINTK(DQDISC, "} plr_enqueue\n"); ++ ++ return ret; ++} ++ ++static int plr_requeue(struct sk_buff *skb, struct Qdisc* sch) ++{ ++ struct plr_sched_data *p = qdisc_priv(sch); ++ int out = 0; ++ ++ DPRINTK(DQDISC, "{ plr_requeue\n"); ++ ++ if ((out = p->qd->ops->requeue(skb, p->qd)) == 0) { ++ sch->q.qlen++; ++ sch->qstats.requeues++; ++ } ++ else { ++ sch->qstats.drops++; ++ } ++ ++ DPRINTK(DQDISC, "} plr_requeue\n"); ++ ++ return out; ++} ++ ++static struct sk_buff *plr_dequeue(struct Qdisc* sch) ++{ ++ struct plr_sched_data *p = qdisc_priv(sch); ++ struct sk_buff *skb = NULL; ++ psched_time_t currtime; ++ ++ DPRINTK(DQDISC, "{ plr_dequeue\n"); ++ ++ PSCHED_GET_TIME(currtime); ++ ++ // dequeue the skb ++ DPRINTK(DQDISC | 200, "time %llu, dequeuing\n", currtime); ++ skb = p->qd->ops->dequeue(p->qd); ++ if ( skb ) ++ { ++ sch->q.qlen--; ++ } ++ ++ DPRINTK(DQDISC, "} plr_dequeue\n"); ++ return skb; ++} ++ ++ ++static unsigned int plr_drop(struct Qdisc* sch) ++{ ++ struct plr_sched_data *p = qdisc_priv(sch); ++ int out = 0; ++ ++ DPRINTK(DQDISC, "{ plr_drop\n"); ++ ++ if ((out = p->qd->ops->drop(p->qd)) == 1) { ++ sch->q.qlen--; ++ sch->qstats.drops++; /* XXX: should we report a drop too? */ /* Yes. */ ++ } ++ ++ DPRINTK(DQDISC, "} plr_drop\n"); ++ return out; ++} ++ ++static void plr_reset(struct Qdisc* sch) ++{ ++ struct plr_sched_data *p = qdisc_priv(sch); ++ ++ DPRINTK(DQDISC, "{ plr_reset\n"); ++ ++ qdisc_reset(p->qd); ++ sch->q.qlen = 0; ++ ++ DPRINTK(DQDISC, "} plr_reset\n"); ++} ++ ++static int plr_change(struct Qdisc *sch, struct rtattr *opt) ++{ ++ int err = -EINVAL; ++ struct plr_sched_data *p = qdisc_priv(sch); ++ struct tc_plr_qopt *qopt; ++ ++ DPRINTK(DQDISC, "{ plr_change\n"); ++ ++ if (opt == NULL) ++ goto done; ++ ++ qopt = RTA_DATA(opt); ++ ++ if ( RTA_PAYLOAD(opt) < sizeof(u32) ) ++ { ++ EPRINTK("plr_change: opt is too small\n"); ++ goto done; ++ } ++ ++ sch_tree_lock(sch); ++ p->plr = qopt->plr; ++ sch_tree_unlock(sch); ++ ++ DPRINTK(DQDISC | 100, "plr_change: plr: %u\n", qopt->plr ); ++ ++ err = 0; ++ done: ++ DPRINTK(DQDISC, "} plr_change\n"); ++ return err; ++} ++ ++static int plr_init(struct Qdisc *sch, struct rtattr *opt) ++{ ++ struct plr_sched_data *p = qdisc_priv(sch); ++ int err = -EINVAL; ++ ++ DPRINTK(DQDISC, "{ plr_init\n"); ++ ++ if ( plr_change(sch, opt) != 0 ) ++ { ++ DPRINTK(DQDISC, ">> plr_change failed\n"); ++ } ++ else ++ { ++ if (!(p->qd = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, ++ TC_H_MAKE(sch->handle, 1)))) ++ p->qd = &noop_qdisc; ++ ++ err = 0; ++ } ++ ++ DPRINTK(DQDISC | 100, "} plr_init\n"); ++ return err; ++} ++ ++static void plr_destroy(struct Qdisc *sch) ++{ ++ struct plr_sched_data *p = qdisc_priv(sch); ++ ++ DPRINTK(DQDISC, "{ plr_destroy\n"); ++ ++ qdisc_destroy(p->qd); ++ ++ DPRINTK(DQDISC, "} plr_destroy\n"); ++} ++ ++ ++static int plr_dump(struct Qdisc *sch, struct sk_buff *skb) ++{ ++ struct plr_sched_data *p = qdisc_priv(sch); ++ unsigned char *b = skb->tail; ++ struct rtattr *rta; ++ struct tc_plr_qopt opt; ++ ++ DPRINTK(DQDISC, "{ plr_dump\n"); ++ ++ rta = (struct rtattr *)b; ++ RTA_PUT(skb, TCA_OPTIONS, 0, NULL); ++ ++ DPRINTK(DQDISC, " plr_dump a\n"); ++ ++ /* XXX: fix locking */ ++ //sch_tree_lock(sch); ++ opt.plr = p->plr; ++ //sch_tree_unlock(sch); ++ ++ DPRINTK(DQDISC, " plr_dump b\n"); ++ ++ RTA_PUT(skb, TCA_PLR_PARMS, sizeof(opt), &opt); ++ ++ DPRINTK(DQDISC, "} plr_dump\n"); ++ rta->rta_len = skb->tail - b; ++ return skb->len; ++ ++ rtattr_failure: ++ skb_trim(skb, b - skb->data); ++ DPRINTK(DQDISC, "} plr_dump (failed!)\n"); ++ return -1; ++} ++ ++static struct Qdisc_class_ops plr_class_ops = { ++ .graft = plr_graft, /* graft */ ++ .leaf = plr_leaf, /* leaf */ ++ .get = plr_get, /* get */ ++ .put = plr_put, /* put */ ++ .change = plr_class_change, /* change */ ++ .delete = plr_delete, /* delete */ ++ .walk = plr_walk, /* walk */ ++ .tcf_chain = plr_find_tcf, /* tcf_chain */ ++ .dump = plr_dump_class, /* dump */ ++}; ++ ++static struct Qdisc_ops plr_qdisc_ops = { ++ .next = NULL, ++ .cl_ops = &plr_class_ops, ++ .id = "plr", ++ .priv_size = sizeof(struct plr_sched_data), ++ .enqueue = plr_enqueue, ++ .dequeue = plr_dequeue, ++ .requeue = plr_requeue, ++ .drop = plr_drop, ++ .init = plr_init, ++ .reset = plr_reset, ++ .destroy = plr_destroy, ++ .change = plr_change, ++ .dump = plr_dump, ++ .owner = THIS_MODULE, ++}; ++ ++static int __init plr_module_init(void) ++{ ++ return register_qdisc(&plr_qdisc_ops); ++} ++static void __exit plr_module_exit(void) ++{ ++ unregister_qdisc(&plr_qdisc_ops); ++} ++module_init(plr_module_init) ++module_exit(plr_module_exit) ++MODULE_LICENSE("GPL"); diff --git a/delay/linux/tc_mods/iproute-2.6.20-070313-emulab.patch b/delay/linux/tc_mods/iproute-2.6.20-070313-emulab.patch new file mode 100644 index 0000000000..1dc7bd5fc2 --- /dev/null +++ b/delay/linux/tc_mods/iproute-2.6.20-070313-emulab.patch @@ -0,0 +1,324 @@ +diff -urN iproute-2.6.20-070313/Config iproute-2.6.20-070313-emulab/Config +--- iproute-2.6.20-070313/Config 2007-09-24 20:43:43.000000000 -0600 ++++ iproute-2.6.20-070313-emulab/Config 2007-09-24 21:30:11.000000000 -0600 +@@ -1 +1 @@ +-# Generated config based on /usr/src/iproute-2.6.20-070313/include ++# Generated config based on /usr/src/linux-2.6.20-1.2944.fc6.emulab-1/include +diff -urN iproute-2.6.20-070313/Makefile iproute-2.6.20-070313-emulab/Makefile +--- iproute-2.6.20-070313/Makefile 2007-03-13 15:50:56.000000000 -0600 ++++ iproute-2.6.20-070313-emulab/Makefile 2007-08-09 14:55:52.000000000 -0600 +@@ -1,7 +1,8 @@ ++KERNEL_INCLUDE=/usr/src/linux-2.6.20-1.2944.fc6.emulab-1/include + DESTDIR= +-SBINDIR=/usr/sbin +-CONFDIR=/etc/iproute2 +-DOCDIR=/usr/share/doc/iproute2 ++SBINDIR=/usr/local/sbin ++CONFDIR=/usr/local/etc/iproute2 ++DOCDIR=/usr/local/share/doc/iproute2 + MANDIR=/usr/share/man + + # Path to db_185.h include +diff -urN iproute-2.6.20-070313/include/linux/pkt_sched.h iproute-2.6.20-070313-emulab/include/linux/pkt_sched.h +--- iproute-2.6.20-070313/include/linux/pkt_sched.h 2007-03-13 15:50:56.000000000 -0600 ++++ iproute-2.6.20-070313-emulab/include/linux/pkt_sched.h 2007-08-09 15:50:25.000000000 -0600 +@@ -90,6 +90,21 @@ + __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */ + }; + ++/* PLR section */ ++ ++struct tc_plr_qopt ++{ ++ __u32 plr; /* % drop rate (0-100) */ ++}; ++ ++/* DELAY section */ ++ ++struct tc_delay_qopt ++{ ++ __u32 delay_usec; /* # of usecs to delay */ ++ __u8 reset_time; /* flag: reset time on dequeue, or not */ ++}; ++ + /* PRIO section */ + + #define TCQ_PRIO_BANDS 16 +diff -urN iproute-2.6.20-070313/tc/Makefile iproute-2.6.20-070313-emulab/tc/Makefile +--- iproute-2.6.20-070313/tc/Makefile 2007-03-13 15:50:56.000000000 -0600 ++++ iproute-2.6.20-070313-emulab/tc/Makefile 2007-08-09 14:37:43.000000000 -0600 +@@ -16,6 +16,8 @@ + TCMODULES += f_route.o + TCMODULES += f_fw.o + TCMODULES += f_basic.o ++TCMODULES += q_delay.o ++TCMODULES += q_plr.o + TCMODULES += q_dsmark.o + TCMODULES += q_gred.o + TCMODULES += f_tcindex.o +diff -urN iproute-2.6.20-070313/tc/q_delay.c iproute-2.6.20-070313-emulab/tc/q_delay.c +--- iproute-2.6.20-070313/tc/q_delay.c 1969-12-31 17:00:00.000000000 -0700 ++++ iproute-2.6.20-070313-emulab/tc/q_delay.c 2007-09-24 21:27:29.000000000 -0600 +@@ -0,0 +1,131 @@ ++/* ++ * q_delay.c Delay. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: David T. McWherter, <dtm@vramp.net> ++ * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> ++ */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++#include <unistd.h> ++#include <syslog.h> ++#include <fcntl.h> ++#include <sys/socket.h> ++#include <netinet/in.h> ++#include <arpa/inet.h> ++#include <string.h> ++ ++#include "utils.h" ++#include "tc_util.h" ++ ++static void explain(void) ++{ ++ fprintf(stderr, "Usage: ... delay <microseconds> [reset_time (0|1)]\n"); ++} ++ ++static void explain1(char *arg) ++{ ++ fprintf(stderr, "Illegal \"%s\"\n", arg); ++} ++ ++ ++#define usage() return(-1) ++ ++static int ++delay_parse_opt ++ (struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n) ++{ ++ struct tc_delay_qopt opt; ++ char *end; ++ ++ memset(&opt, 0, sizeof(opt)); ++ ++ if ( argc > 4 ) { ++ fprintf(stderr, "Too many arguments (seen: %d, expected: 2 or 4)\n", ++ argc); ++ return -1; ++ } else if ( argc < 2 ) { ++ fprintf(stderr, "Too few arguments (seen: %d, expected: 2 or 4)\n", ++ argc); ++ return -1; ++ } else { ++ ++ while ( argc > 0 ) { ++ if (!strcmp(*argv, "usecs")) { ++ NEXT_ARG(); ++ opt.delay_usec = strtoul(*argv,&end,0); ++ if (*end) { ++ explain1("microseconds"); ++ return -1; ++ } ++ fprintf( stdout, "Usecs: %u\n", opt.delay_usec ); ++ } ++ else if (!strcmp(*argv, "reset_time")) { ++ NEXT_ARG(); ++ opt.reset_time = strtoul(*argv,&end,0); ++ if (*end) { ++ explain1("reset_time"); ++ return -1; ++ } ++ fprintf( stdout, "reset_time: %u\n", opt.reset_time ); ++ } ++ argc--; ++ argv++; ++ } ++ } ++ ++ addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt)); ++ ++ return 0; ++} ++ ++static int ++delay_print_opt ++ (struct qdisc_util *qu, FILE *f, struct rtattr *opt) ++{ ++ struct tc_delay_qopt *qopt; ++ ++ if (opt == NULL) ++ return 0; ++ ++ if ( RTA_PAYLOAD(opt) < sizeof(*qopt)) ++ return -1; ++ ++ qopt = RTA_DATA(opt); ++ fprintf( f, "delay { %u } reset_time { %u }", ++ qopt->delay_usec, ++ qopt->reset_time ++ ); ++ ++ return 0; ++} ++ ++ ++static int delay_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats) ++{ ++ return 0; ++} ++ ++ ++static int ++delay_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, ++ struct nlmsghdr *n) ++{ ++ return 0; ++} ++ ++ ++struct qdisc_util delay_qdisc_util = { ++ .id = "delay", ++ .parse_qopt = delay_parse_opt, ++ .print_qopt = delay_print_opt, ++ .print_xstats = delay_print_xstats, ++ ++ .parse_copt = delay_parse_class_opt, ++ .print_copt = delay_print_opt, ++}; +diff -urN iproute-2.6.20-070313/tc/q_plr.c iproute-2.6.20-070313-emulab/tc/q_plr.c +--- iproute-2.6.20-070313/tc/q_plr.c 1969-12-31 17:00:00.000000000 -0700 ++++ iproute-2.6.20-070313-emulab/tc/q_plr.c 2007-09-24 21:29:18.000000000 -0600 +@@ -0,0 +1,115 @@ ++/* ++ * q_plr.c packet loss qdisc ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ * ++ * Authors: Kirk Webb, <kwebb@cs.utah.edu> ++ * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> ++ */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++#include <unistd.h> ++#include <syslog.h> ++#include <fcntl.h> ++#include <sys/socket.h> ++#include <netinet/in.h> ++#include <arpa/inet.h> ++#include <string.h> ++ ++#include "utils.h" ++#include "tc_util.h" ++ ++static void explain(void) ++{ ++ fprintf(stderr, "Usage: ... plr <rate (%% loss: 0-100)>\n"); ++} ++ ++static void explain1(char *arg) ++{ ++ fprintf(stderr, "Illegal \"%s\"\n", arg); ++} ++ ++ ++#define usage() return(-1) ++ ++static int ++plr_parse_opt ++ (struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n) ++{ ++ struct tc_plr_qopt opt; ++ double plr_rate = 0; ++ char *p; ++ ++ memset(&opt, 0, sizeof(opt)); ++ ++ if ( argc > 1 ) { ++ fprintf(stderr, "Too many arguments (seen: %d, expected: %d)\n", ++ argc, 1); ++ return -1; ++ } else if ( argc < 1 ) { ++ fprintf(stderr, "Too few arguments (seen: %d, expected: %d)\n", ++ argc, 1); ++ return -1; ++ } else if ( argc == 1 ) { ++ plr_rate = strtod(*argv, &p); ++ if (p == *argv || plr_rate < 0 || plr_rate > 1) { ++ explain1("fraction (range 0-1)"); ++ return -1; ++ } ++ } ++ ++ opt.plr = (unsigned int)(plr_rate*0xffffffffUL); ++ fprintf( stdout, "PLR: %u\n", opt.plr ); ++ ++ addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt)); ++ ++ return 0; ++} ++ ++static int ++plr_print_opt ++ (struct qdisc_util *qu, FILE *f, struct rtattr *opt) ++{ ++ struct tc_plr_qopt *qopt; ++ ++ if (opt == NULL) ++ return 0; ++ ++ if ( RTA_PAYLOAD(opt) < sizeof(*qopt)) ++ return -1; ++ ++ qopt = RTA_DATA(opt); ++ fprintf( f, "PLR: %f", ++ qopt->plr / (double) 0xffffffff ++ ); ++ ++ return 0; ++} ++ ++static int plr_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats) ++{ ++ return 0; ++} ++ ++static int ++plr_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, ++ struct nlmsghdr *n) ++{ ++ return 0; ++} ++ ++ ++ ++struct qdisc_util plr_qdisc_util = { ++ .id = "plr", ++ .parse_qopt = plr_parse_opt, ++ .print_qopt = plr_print_opt, ++ .print_xstats = plr_print_xstats, ++ ++ .parse_copt = plr_parse_class_opt, ++ .print_copt = plr_print_opt, ++}; +diff -urN iproute-2.6.20-070313/tc/tc_qdisc.c iproute-2.6.20-070313-emulab/tc/tc_qdisc.c +--- iproute-2.6.20-070313/tc/tc_qdisc.c 2007-03-13 15:50:56.000000000 -0600 ++++ iproute-2.6.20-070313-emulab/tc/tc_qdisc.c 2007-08-09 14:42:02.000000000 -0600 +@@ -36,7 +36,7 @@ + fprintf(stderr, "\n"); + fprintf(stderr, " tc qdisc show [ dev STRING ] [ingress]\n"); + fprintf(stderr, "Where:\n"); +- fprintf(stderr, "QDISC_KIND := { [p|b]fifo | tbf | prio | cbq | red | etc. }\n"); ++ fprintf(stderr, "QDISC_KIND := { [p|b]fifo | delay | plr | tbf | prio | cbq | red | etc. }\n"); + fprintf(stderr, "OPTIONS := ... try tc qdisc add <desired QDISC_KIND> help\n"); + return -1; + } -- GitLab