aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@nuts.davemloft.net>2004-07-09 02:26:07 -0700
committerDavid S. Miller <davem@nuts.davemloft.net>2004-07-09 02:26:07 -0700
commitf84b29fa430db0053369f023a0034161b8f7777c (patch)
treebe3ea3b12f604f0a701fcd948ee3d33e358cd490 /net
parent5d42dde43391899eba74d03f4f8f20901855fef8 (diff)
downloadhistory-f84b29fa430db0053369f023a0034161b8f7777c.tar.gz
[PKT_SCHED]: Remove CSZ scheduler.
It was an experimental hack and never finished off. Signed-off-by: David S. Miller <davem@redhat.com>
Diffstat (limited to 'net')
-rw-r--r--net/sched/Kconfig15
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/sch_csz.c1055
3 files changed, 0 insertions, 1071 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index c4e477502e8e63..54817b8ec7f009 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -49,21 +49,6 @@ config NET_SCH_HFSC
To compile this code as a module, choose M here: the
module will be called sch_hfsc.
-config NET_SCH_CSZ
- tristate "CSZ packet scheduler"
- depends on NET_SCHED
- ---help---
- Say Y here if you want to use the Clark-Shenker-Zhang (CSZ) packet
- scheduling algorithm for some of your network devices. At the
- moment, this is the only algorithm that can guarantee service for
- real-time applications (see the top of <file:net/sched/sch_csz.c>
- for details and references about the algorithm).
-
- Note: this scheduler is currently broken.
-
- To compile this code as a module, choose M here: the
- module will be called sch_csz.
-
#tristate ' H-PFQ packet scheduler' CONFIG_NET_SCH_HPFQ
config NET_SCH_ATM
tristate "ATM pseudo-scheduler"
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 895bf4b02a9d68..daa437f8511342 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -12,7 +12,6 @@ obj-$(CONFIG_NET_ACT_POLICE) += police.o
obj-$(CONFIG_NET_CLS_POLICE) += police.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
-obj-$(CONFIG_NET_SCH_CSZ) += sch_csz.o
obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o
obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
obj-$(CONFIG_NET_SCH_RED) += sch_red.o
diff --git a/net/sched/sch_csz.c b/net/sched/sch_csz.c
deleted file mode 100644
index d0f8d51fedf828..00000000000000
--- a/net/sched/sch_csz.c
+++ /dev/null
@@ -1,1055 +0,0 @@
-/*
- * net/sched/sch_csz.c Clark-Shenker-Zhang scheduler.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/bitops.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/pkt_sched.h>
-
-
-/* Clark-Shenker-Zhang algorithm.
- =======================================
-
- SOURCE.
-
- David D. Clark, Scott Shenker and Lixia Zhang
- "Supporting Real-Time Applications in an Integrated Services Packet
- Network: Architecture and Mechanism".
-
- CBQ presents a flexible universal algorithm for packet scheduling,
- but it has pretty poor delay characteristics.
- Round-robin scheduling and link-sharing goals
- apparently contradict minimization of network delay and jitter.
- Moreover, correct handling of predictive flows seems to be
- impossible in CBQ.
-
- CSZ presents a more precise but less flexible and less efficient
- approach. As I understand it, the main idea is to create
- WFQ flows for each guaranteed service and to allocate
- the rest of bandwidth to dummy flow-0. Flow-0 comprises
- the predictive services and the best effort traffic;
- it is handled by a priority scheduler with the highest
- priority band allocated for predictive services, and the rest ---
- to the best effort packets.
-
- Note that in CSZ flows are NOT limited to their bandwidth. It
- is supposed that the flow passed admission control at the edge
- of the QoS network and it doesn't need further shaping. Any
- attempt to improve the flow or to shape it to a token bucket
- at intermediate hops will introduce undesired delays and raise
- jitter.
-
- At the moment CSZ is the only scheduler that provides
- true guaranteed service. Another schemes (including CBQ)
- do not provide guaranteed delay and randomize jitter.
- There is a proof (Sally Floyd), that delay
- can be estimated by a IntServ compliant formula.
- This result is true formally, but it is wrong in principle.
- It takes into account only round-robin delays,
- ignoring delays introduced by link sharing i.e. overlimiting.
- Note that temporary overlimits are inevitable because
- real links are not ideal, and the real algorithm must take this
- into account.
-
- ALGORITHM.
-
- --- Notations.
-
- $B$ is link bandwidth (bits/sec).
-
- $I$ is set of all flows, including flow $0$.
- Every flow $a \in I$ has associated bandwidth slice $r_a < 1$ and
- $\sum_{a \in I} r_a = 1$.
-
- --- Flow model.
-
- Let $m_a$ is the number of backlogged bits in flow $a$.
- The flow is {\em active}, if $m_a > 0$.
- This number is a discontinuous function of time;
- when a packet $i$ arrives:
- \[
- m_a(t_i+0) - m_a(t_i-0) = L^i,
- \]
- where $L^i$ is the length of the arrived packet.
- The flow queue is drained continuously until $m_a == 0$:
- \[
- {d m_a \over dt} = - { B r_a \over \sum_{b \in A} r_b}.
- \]
- I.e. flow rates are their allocated rates proportionally
- scaled to take all available link bandwidth. Apparently,
- it is not the only possible policy. F.e. CBQ classes
- without borrowing would be modelled by:
- \[
- {d m_a \over dt} = - B r_a .
- \]
- More complicated hierarchical bandwidth allocation
- policies are possible, but unfortunately, the basic
- flow equations have a simple solution only for proportional
- scaling.
-
- --- Departure times.
-
- We calculate the time until the last bit of packet is sent:
- \[
- E_a^i(t) = { m_a(t_i) - \delta_a(t) \over r_a },
- \]
- where $\delta_a(t)$ is number of bits drained since $t_i$.
- We have to evaluate $E_a^i$ for all queued packets,
- then find the packet with minimal $E_a^i$ and send it.
-
- This sounds good, but direct implementation of the algorithm
- is absolutely infeasible. Luckily, if flow rates
- are scaled proportionally, the equations have a simple solution.
-
- The differential equation for $E_a^i$ is
- \[
- {d E_a^i (t) \over dt } = - { d \delta_a(t) \over dt} { 1 \over r_a} =
- { B \over \sum_{b \in A} r_b}
- \]
- with initial condition
- \[
- E_a^i (t_i) = { m_a(t_i) \over r_a } .
- \]
-
- Let's introduce an auxiliary function $R(t)$:
-
- --- Round number.
-
- Consider the following model: we rotate over active flows,
- sending $r_a B$ bits from every flow, so that we send
- $B \sum_{a \in A} r_a$ bits per round, that takes
- $\sum_{a \in A} r_a$ seconds.
-
- Hence, $R(t)$ (round number) is a monotonically increasing
- linear function of time when $A$ is not changed
- \[
- { d R(t) \over dt } = { 1 \over \sum_{a \in A} r_a }
- \]
- and it is continuous when $A$ changes.
-
- The central observation is that the quantity
- $F_a^i = R(t) + E_a^i(t)/B$ does not depend on time at all!
- $R(t)$ does not depend on flow, so that $F_a^i$ can be
- calculated only once on packet arrival, and we need not
- recalculate $E$ numbers and resorting queues.
- The number $F_a^i$ is called finish number of the packet.
- It is just the value of $R(t)$ when the last bit of packet
- is sent out.
-
- Maximal finish number on flow is called finish number of flow
- and minimal one is "start number of flow".
- Apparently, flow is active if and only if $F_a \leq R$.
-
- When a packet of length $L_i$ bit arrives to flow $a$ at time $t_i$,
- we calculate $F_a^i$ as:
-
- If flow was inactive ($F_a < R$):
- $F_a^i = R(t) + {L_i \over B r_a}$
- otherwise
- $F_a^i = F_a + {L_i \over B r_a}$
-
- These equations complete the algorithm specification.
-
- It looks pretty hairy, but there is a simple
- procedure for solving these equations.
- See procedure csz_update(), that is a generalization of
- the algorithm from S. Keshav's thesis Chapter 3
- "Efficient Implementation of Fair Queuing".
-
- NOTES.
-
- * We implement only the simplest variant of CSZ,
- when flow-0 is a explicit 4band priority fifo.
- This is bad, but we need a "peek" operation in addition
- to "dequeue" to implement complete CSZ.
- I do not want to do that, unless it is absolutely
- necessary.
-
- * A primitive support for token bucket filtering
- presents itself too. It directly contradicts CSZ, but
- even though the Internet is on the globe ... :-)
- "the edges of the network" really exist.
-
- BUGS.
-
- * Fixed point arithmetic is overcomplicated, suboptimal and even
- wrong. Check it later. */
-
-
-/* This number is arbitrary */
-
-#define CSZ_GUARANTEED 16
-#define CSZ_FLOWS (CSZ_GUARANTEED+4)
-
-struct csz_head
-{
- struct csz_head *snext;
- struct csz_head *sprev;
- struct csz_head *fnext;
- struct csz_head *fprev;
-};
-
-struct csz_flow
-{
- struct csz_head *snext;
- struct csz_head *sprev;
- struct csz_head *fnext;
- struct csz_head *fprev;
-
-/* Parameters */
- struct tc_ratespec rate;
- struct tc_ratespec slice;
- u32 *L_tab; /* Lookup table for L/(B*r_a) values */
- unsigned long limit; /* Maximal length of queue */
-#ifdef CSZ_PLUS_TBF
- struct tc_ratespec peakrate;
- __u32 buffer; /* Depth of token bucket, normalized
- as L/(B*r_a) */
- __u32 mtu;
-#endif
-
-/* Variables */
-#ifdef CSZ_PLUS_TBF
- unsigned long tokens; /* Tokens number: usecs */
- psched_time_t t_tbf;
- unsigned long R_tbf;
- int throttled;
-#endif
- unsigned peeked;
- unsigned long start; /* Finish number of the first skb */
- unsigned long finish; /* Finish number of the flow */
-
- struct sk_buff_head q; /* FIFO queue */
-};
-
-#define L2R(f,L) ((f)->L_tab[(L)>>(f)->slice.cell_log])
-
-struct csz_sched_data
-{
-/* Parameters */
- unsigned char rate_log; /* fixed point position for rate;
- * really we need not it */
- unsigned char R_log; /* fixed point position for round number */
- unsigned char delta_log; /* 1<<delta_log is maximal timeout in usecs;
- * 21 <-> 2.1sec is MAXIMAL value */
-
-/* Variables */
- struct tcf_proto *filter_list;
- u8 prio2band[TC_PRIO_MAX+1];
-#ifdef CSZ_PLUS_TBF
- struct timer_list wd_timer;
- long wd_expires;
-#endif
- psched_time_t t_c; /* Time check-point */
- unsigned long R_c; /* R-number check-point */
- unsigned long rate; /* Current sum of rates of active flows */
- struct csz_head s; /* Flows sorted by "start" */
- struct csz_head f; /* Flows sorted by "finish" */
-
- struct sk_buff_head other[4];/* Predicted (0) and the best efforts
- classes (1,2,3) */
- struct csz_flow flow[CSZ_GUARANTEED]; /* Array of flows */
-};
-
-/* These routines (csz_insert_finish and csz_insert_start) are
- the most time consuming part of all the algorithm.
-
- We insert to sorted list, so that time
- is linear with respect to number of active flows in the worst case.
- Note that we have not very large number of guaranteed flows,
- so that logarithmic algorithms (heap etc.) are useless,
- they are slower than linear one when length of list <= 32.
-
- Heap would take sence if we used WFQ for best efforts
- flows, but SFQ is better choice in this case.
- */
-
-
-/* Insert flow "this" to the list "b" before
- flow with greater finish number.
- */
-
-#if 0
-/* Scan forward */
-static inline void csz_insert_finish(struct csz_head *b,
- struct csz_flow *this)
-{
- struct csz_head *f = b->fnext;
- unsigned long finish = this->finish;
-
- while (f != b) {
- if (((struct csz_flow*)f)->finish - finish > 0)
- break;
- f = f->fnext;
- }
- this->fnext = f;
- this->fprev = f->fprev;
- this->fnext->fprev = this->fprev->fnext = (struct csz_head*)this;
-}
-#else
-/* Scan backward */
-static inline void csz_insert_finish(struct csz_head *b,
- struct csz_flow *this)
-{
- struct csz_head *f = b->fprev;
- unsigned long finish = this->finish;
-
- while (f != b) {
- if (((struct csz_flow*)f)->finish - finish <= 0)
- break;
- f = f->fprev;
- }
- this->fnext = f->fnext;
- this->fprev = f;
- this->fnext->fprev = this->fprev->fnext = (struct csz_head*)this;
-}
-#endif
-
-/* Insert flow "this" to the list "b" before
- flow with greater start number.
- */
-
-static inline void csz_insert_start(struct csz_head *b,
- struct csz_flow *this)
-{
- struct csz_head *f = b->snext;
- unsigned long start = this->start;
-
- while (f != b) {
- if (((struct csz_flow*)f)->start - start > 0)
- break;
- f = f->snext;
- }
- this->snext = f;
- this->sprev = f->sprev;
- this->snext->sprev = this->sprev->snext = (struct csz_head*)this;
-}
-
-
-/* Calculate and return current round number.
- It is another time consuming part, but
- it is impossible to avoid it.
-
- It costs O(N) that make all the algorithm useful only
- to play with closest to ideal fluid model.
-
- There exist less academic, but more practical modifications,
- which might have even better characteristics (WF2Q+, HPFQ, HFSC)
- */
-
-static unsigned long csz_update(struct Qdisc *sch)
-{
- struct csz_sched_data *q = (struct csz_sched_data*)sch->data;
- struct csz_flow *a;
- unsigned long F;
- unsigned long tmp;
- psched_time_t now;
- unsigned long delay;
- unsigned long R_c;
-
- PSCHED_GET_TIME(now);
- delay = PSCHED_TDIFF(now, q->t_c);
- if (delay>>q->delta_log) {
- /* Delta is too large.
- It is possible if MTU/BW > 1<<q->delta_log
- (i.e. configuration error) or because of hardware
- fault. We have no choice...
- */
- qdisc_reset(sch);
- return 0;
- }
-
- q->t_c = now;
-
- for (;;) {
- a = (struct csz_flow*)q->f.fnext;
-
- /* No more active flows. Reset R and exit. */
- if (a == (struct csz_flow*)&q->f) {
-#ifdef CSZ_DEBUG
- if (q->rate) {
- printk("csz_update: rate!=0 on inactive csz\n");
- q->rate = 0;
- }
-#endif
- q->R_c = 0;
- return 0;
- }
-
- F = a->finish;
-
-#ifdef CSZ_DEBUG
- if (q->rate == 0) {
- printk("csz_update: rate=0 on active csz\n");
- goto do_reset;
- }
-#endif
-
- /*
- * tmp = (t - q->t_c)/q->rate;
- */
-
- tmp = ((delay<<(31-q->delta_log))/q->rate)>>(31-q->delta_log+q->R_log);
-
- tmp += q->R_c;
-
- /* OK, this flow (and all flows with greater
- finish numbers) is still active */
- if (F - tmp > 0)
- break;
-
- /* It is more not active */
-
- a->fprev->fnext = a->fnext;
- a->fnext->fprev = a->fprev;
-
- /*
- * q->t_c += (F - q->R_c)*q->rate
- */
-
- tmp = ((F-q->R_c)*q->rate)<<q->R_log;
- R_c = F;
- q->rate -= a->slice.rate;
-
- if ((long)(delay - tmp) >= 0) {
- delay -= tmp;
- continue;
- }
- delay = 0;
- }
-
- q->R_c = tmp;
- return tmp;
-}
-
-unsigned csz_classify(struct sk_buff *skb, struct csz_sched_data *q)
-{
- return CSZ_GUARANTEED;
-}
-
-static int
-csz_enqueue(struct sk_buff *skb, struct Qdisc* sch)
-{
- struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
- unsigned flow_id = csz_classify(skb, q);
- unsigned long R;
- int prio = 0;
- struct csz_flow *this;
-
- if (flow_id >= CSZ_GUARANTEED) {
- prio = flow_id - CSZ_GUARANTEED;
- flow_id = 0;
- }
-
- this = &q->flow[flow_id];
- if (this->q.qlen >= this->limit || this->L_tab == NULL) {
- sch->stats.drops++;
- kfree_skb(skb);
- return NET_XMIT_DROP;
- }
-
- R = csz_update(sch);
-
- if ((long)(this->finish - R) >= 0) {
- /* It was active */
- this->finish += L2R(this,skb->len);
- } else {
- /* It is inactive; activate it */
- this->finish = R + L2R(this,skb->len);
- q->rate += this->slice.rate;
- csz_insert_finish(&q->f, this);
- }
-
- /* If this flow was empty, remember start number
- and insert it into start queue */
- if (this->q.qlen == 0) {
- this->start = this->finish;
- csz_insert_start(&q->s, this);
- }
- if (flow_id)
- skb_queue_tail(&this->q, skb);
- else
- skb_queue_tail(&q->other[prio], skb);
- sch->q.qlen++;
- sch->stats.bytes += skb->len;
- sch->stats.packets++;
- return 0;
-}
-
-static __inline__ struct sk_buff *
-skb_dequeue_best(struct csz_sched_data * q)
-{
- int i;
- struct sk_buff *skb;
-
- for (i=0; i<4; i++) {
- skb = skb_dequeue(&q->other[i]);
- if (skb) {
- q->flow[0].q.qlen--;
- return skb;
- }
- }
- return NULL;
-}
-
-static __inline__ struct sk_buff *
-skb_peek_best(struct csz_sched_data * q)
-{
- int i;
- struct sk_buff *skb;
-
- for (i=0; i<4; i++) {
- skb = skb_peek(&q->other[i]);
- if (skb)
- return skb;
- }
- return NULL;
-}
-
-#ifdef CSZ_PLUS_TBF
-
-static void csz_watchdog(unsigned long arg)
-{
- struct Qdisc *sch = (struct Qdisc*)arg;
-
- qdisc_wakeup(sch->dev);
-}
-
-static __inline__ void
-csz_move_queue(struct csz_flow *this, long delta)
-{
- this->fprev->fnext = this->fnext;
- this->fnext->fprev = this->fprev;
-
- this->start += delta;
- this->finish += delta;
-
- csz_insert_finish(this);
-}
-
-static __inline__ int csz_enough_tokens(struct csz_sched_data *q,
- struct csz_flow *this,
- struct sk_buff *skb)
-{
- long toks;
- long shift;
- psched_time_t now;
-
- PSCHED_GET_TIME(now);
-
- toks = PSCHED_TDIFF(now, t_tbf) + this->tokens - L2R(q,this,skb->len);
-
- shift = 0;
- if (this->throttled) {
- /* Remember aposteriory delay */
-
- unsigned long R = csz_update(q);
- shift = R - this->R_tbf;
- this->R_tbf = R;
- }
-
- if (toks >= 0) {
- /* Now we have enough tokens to proceed */
-
- this->tokens = toks <= this->depth ? toks : this->depth;
- this->t_tbf = now;
-
- if (!this->throttled)
- return 1;
-
- /* Flow was throttled. Update its start&finish numbers
- with delay calculated aposteriori.
- */
-
- this->throttled = 0;
- if (shift > 0)
- csz_move_queue(this, shift);
- return 1;
- }
-
- if (!this->throttled) {
- /* Flow has just been throttled; remember
- current round number to calculate aposteriori delay
- */
- this->throttled = 1;
- this->R_tbf = csz_update(q);
- }
-
- /* Move all the queue to the time when it will be allowed to send.
- We should translate time to round number, but it is impossible,
- so that we made the most conservative estimate i.e. we suppose
- that only this flow is active and, hence, R = t.
- Really toks <= R <= toks/r_a.
-
- This apriory shift in R will be adjusted later to reflect
- real delay. We cannot avoid it because of:
- - throttled flow continues to be active from the viewpoint
- of CSZ, so that it would acquire the highest priority,
- if you not adjusted start numbers.
- - Eventually, finish number would become less than round
- number and flow were declared inactive.
- */
-
- toks = -toks;
-
- /* Remember, that we should start watchdog */
- if (toks < q->wd_expires)
- q->wd_expires = toks;
-
- toks >>= q->R_log;
- shift += toks;
- if (shift > 0) {
- this->R_tbf += toks;
- csz_move_queue(this, shift);
- }
- csz_insert_start(this);
- return 0;
-}
-#endif
-
-
-static struct sk_buff *
-csz_dequeue(struct Qdisc* sch)
-{
- struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
- struct sk_buff *skb;
- struct csz_flow *this;
-
-#ifdef CSZ_PLUS_TBF
- q->wd_expires = 0;
-#endif
- this = (struct csz_flow*)q->s.snext;
-
- while (this != (struct csz_flow*)&q->s) {
-
- /* First of all: unlink from start list */
- this->sprev->snext = this->snext;
- this->snext->sprev = this->sprev;
-
- if (this != &q->flow[0]) { /* Guaranteed flow */
- skb = __skb_dequeue(&this->q);
- if (skb) {
-#ifdef CSZ_PLUS_TBF
- if (this->depth) {
- if (!csz_enough_tokens(q, this, skb))
- continue;
- }
-#endif
- if (this->q.qlen) {
- struct sk_buff *nskb = skb_peek(&this->q);
- this->start += L2R(this,nskb->len);
- csz_insert_start(&q->s, this);
- }
- sch->q.qlen--;
- return skb;
- }
- } else { /* Predicted or best effort flow */
- skb = skb_dequeue_best(q);
- if (skb) {
- unsigned peeked = this->peeked;
- this->peeked = 0;
-
- if (--this->q.qlen) {
- struct sk_buff *nskb;
- unsigned dequeued = L2R(this,skb->len);
-
- /* We got not the same thing that
- peeked earlier; adjust start number
- */
- if (peeked != dequeued && peeked)
- this->start += dequeued - peeked;
-
- nskb = skb_peek_best(q);
- peeked = L2R(this,nskb->len);
- this->start += peeked;
- this->peeked = peeked;
- csz_insert_start(&q->s, this);
- }
- sch->q.qlen--;
- return skb;
- }
- }
- }
-#ifdef CSZ_PLUS_TBF
- /* We are about to return no skb.
- Schedule watchdog timer, if it occurred because of shaping.
- */
- if (q->wd_expires) {
- unsigned long delay = PSCHED_US2JIFFIE(q->wd_expires);
- if (delay == 0)
- delay = 1;
- mod_timer(&q->wd_timer, jiffies + delay);
- sch->stats.overlimits++;
- }
-#endif
- return NULL;
-}
-
-static void
-csz_reset(struct Qdisc* sch)
-{
- struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
- int i;
-
- for (i=0; i<4; i++)
- skb_queue_purge(&q->other[i]);
-
- for (i=0; i<CSZ_GUARANTEED; i++) {
- struct csz_flow *this = q->flow + i;
- skb_queue_purge(&this->q);
- this->snext = this->sprev =
- this->fnext = this->fprev = (struct csz_head*)this;
- this->start = this->finish = 0;
- }
- q->s.snext = q->s.sprev = &q->s;
- q->f.fnext = q->f.fprev = &q->f;
- q->R_c = 0;
-#ifdef CSZ_PLUS_TBF
- PSCHED_GET_TIME(&q->t_tbf);
- q->tokens = q->depth;
- del_timer(&q->wd_timer);
-#endif
- sch->q.qlen = 0;
-}
-
-static void
-csz_destroy(struct Qdisc* sch)
-{
- struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
- struct tcf_proto *tp;
-
- while ((tp = q->filter_list) != NULL) {
- q->filter_list = tp->next;
- tcf_destroy(tp);
- }
-}
-
-static int csz_init(struct Qdisc *sch, struct rtattr *opt)
-{
- struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
- struct rtattr *tb[TCA_CSZ_PTAB];
- struct tc_csz_qopt *qopt;
- int i;
-
- rtattr_parse(tb, TCA_CSZ_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt));
- if (tb[TCA_CSZ_PARMS-1] == NULL ||
- RTA_PAYLOAD(tb[TCA_CSZ_PARMS-1]) < sizeof(*qopt))
- return -EINVAL;
- qopt = RTA_DATA(tb[TCA_CSZ_PARMS-1]);
-
- q->R_log = qopt->R_log;
- q->delta_log = qopt->delta_log;
- for (i=0; i<=TC_PRIO_MAX; i++) {
- if (qopt->priomap[i] >= CSZ_FLOWS)
- return -EINVAL;
- q->prio2band[i] = qopt->priomap[i];
- }
-
- for (i=0; i<4; i++)
- skb_queue_head_init(&q->other[i]);
-
- for (i=0; i<CSZ_GUARANTEED; i++) {
- struct csz_flow *this = q->flow + i;
- skb_queue_head_init(&this->q);
- this->snext = this->sprev =
- this->fnext = this->fprev = (struct csz_head*)this;
- this->start = this->finish = 0;
- }
- q->s.snext = q->s.sprev = &q->s;
- q->f.fnext = q->f.fprev = &q->f;
- q->R_c = 0;
-#ifdef CSZ_PLUS_TBF
- init_timer(&q->wd_timer);
- q->wd_timer.data = (unsigned long)sch;
- q->wd_timer.function = csz_watchdog;
-#endif
- return 0;
-}
-
-static int csz_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
- struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
- unsigned char *b = skb->tail;
- struct rtattr *rta;
- struct tc_csz_qopt opt;
-
- rta = (struct rtattr*)b;
- RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
-
- opt.flows = CSZ_FLOWS;
- memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
- RTA_PUT(skb, TCA_CSZ_PARMS, sizeof(opt), &opt);
- rta->rta_len = skb->tail - b;
-
- return skb->len;
-
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
-}
-
-static int csz_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
- struct Qdisc **old)
-{
- return -EINVAL;
-}
-
-static struct Qdisc * csz_leaf(struct Qdisc *sch, unsigned long cl)
-{
- return NULL;
-}
-
-
-static unsigned long csz_get(struct Qdisc *sch, u32 classid)
-{
- struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
- unsigned long band = TC_H_MIN(classid) - 1;
-
- if (band >= CSZ_FLOWS)
- return 0;
-
- if (band < CSZ_GUARANTEED && q->flow[band].L_tab == NULL)
- return 0;
-
- return band+1;
-}
-
-static unsigned long csz_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
-{
- return csz_get(sch, classid);
-}
-
-
-static void csz_put(struct Qdisc *sch, unsigned long cl)
-{
- return;
-}
-
-static int csz_change(struct Qdisc *sch, u32 handle, u32 parent, struct rtattr **tca, unsigned long *arg)
-{
- unsigned long cl = *arg;
- struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
- struct rtattr *opt = tca[TCA_OPTIONS-1];
- struct rtattr *tb[TCA_CSZ_PTAB];
- struct tc_csz_copt *copt;
-
- rtattr_parse(tb, TCA_CSZ_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt));
- if (tb[TCA_CSZ_PARMS-1] == NULL ||
- RTA_PAYLOAD(tb[TCA_CSZ_PARMS-1]) < sizeof(*copt))
- return -EINVAL;
- copt = RTA_DATA(tb[TCA_CSZ_PARMS-1]);
-
- if (tb[TCA_CSZ_RTAB-1] &&
- RTA_PAYLOAD(tb[TCA_CSZ_RTAB-1]) < 1024)
- return -EINVAL;
-
- if (cl) {
- struct csz_flow *a;
- cl--;
- if (cl >= CSZ_FLOWS)
- return -ENOENT;
- if (cl >= CSZ_GUARANTEED || q->flow[cl].L_tab == NULL)
- return -EINVAL;
-
- a = &q->flow[cl];
-
- spin_lock_bh(&sch->dev->queue_lock);
-#if 0
- a->rate_log = copt->rate_log;
-#endif
-#ifdef CSZ_PLUS_TBF
- a->limit = copt->limit;
- a->rate = copt->rate;
- a->buffer = copt->buffer;
- a->mtu = copt->mtu;
-#endif
-
- if (tb[TCA_CSZ_RTAB-1])
- memcpy(a->L_tab, RTA_DATA(tb[TCA_CSZ_RTAB-1]), 1024);
-
- spin_unlock_bh(&sch->dev->queue_lock);
- return 0;
- }
- /* NI */
- return 0;
-}
-
-static int csz_delete(struct Qdisc *sch, unsigned long cl)
-{
- struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
- struct csz_flow *a;
-
- cl--;
-
- if (cl >= CSZ_FLOWS)
- return -ENOENT;
- if (cl >= CSZ_GUARANTEED || q->flow[cl].L_tab == NULL)
- return -EINVAL;
-
- a = &q->flow[cl];
-
- spin_lock_bh(&sch->dev->queue_lock);
- a->fprev->fnext = a->fnext;
- a->fnext->fprev = a->fprev;
- a->sprev->snext = a->snext;
- a->snext->sprev = a->sprev;
- a->start = a->finish = 0;
- kfree(xchg(&q->flow[cl].L_tab, NULL));
- spin_unlock_bh(&sch->dev->queue_lock);
-
- return 0;
-}
-
-static int csz_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm)
-{
- struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
- unsigned char *b = skb->tail;
- struct rtattr *rta;
- struct tc_csz_copt opt;
-
- tcm->tcm_handle = sch->handle|cl;
-
- cl--;
-
- if (cl > CSZ_FLOWS)
- goto rtattr_failure;
-
- if (cl < CSZ_GUARANTEED) {
- struct csz_flow *f = &q->flow[cl];
-
- if (f->L_tab == NULL)
- goto rtattr_failure;
-
- rta = (struct rtattr*)b;
- RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
-
- opt.limit = f->limit;
- opt.rate = f->rate;
- opt.slice = f->slice;
- memset(&opt.peakrate, 0, sizeof(opt.peakrate));
-#ifdef CSZ_PLUS_TBF
- opt.buffer = f->buffer;
- opt.mtu = f->mtu;
-#else
- opt.buffer = 0;
- opt.mtu = 0;
-#endif
-
- RTA_PUT(skb, TCA_CSZ_PARMS, sizeof(opt), &opt);
- rta->rta_len = skb->tail - b;
- }
-
- return skb->len;
-
-rtattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
-}
-
-static void csz_walk(struct Qdisc *sch, struct qdisc_walker *arg)
-{
- struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
- int prio = 0;
-
- if (arg->stop)
- return;
-
- for (prio = 0; prio < CSZ_FLOWS; prio++) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (prio < CSZ_GUARANTEED && q->flow[prio].L_tab == NULL) {
- arg->count++;
- continue;
- }
- if (arg->fn(sch, prio+1, arg) < 0) {
- arg->stop = 1;
- break;
- }
- arg->count++;
- }
-}
-
-static struct tcf_proto ** csz_find_tcf(struct Qdisc *sch, unsigned long cl)
-{
- struct csz_sched_data *q = (struct csz_sched_data *)sch->data;
-
- if (cl)
- return NULL;
-
- return &q->filter_list;
-}
-
-struct Qdisc_class_ops csz_class_ops = {
- .graft = csz_graft,
- .leaf = csz_leaf,
- .get = csz_get,
- .put = csz_put,
- .change = csz_change,
- .delete = csz_delete,
- .walk = csz_walk,
- .tcf_chain = csz_find_tcf,
- .bind_tcf = csz_bind,
- .unbind_tcf = csz_put,
- .dump = csz_dump_class,
-};
-
-static struct Qdisc_ops csz_qdisc_ops = {
- .next = NULL,
- .cl_ops = &csz_class_ops,
- .id = "csz",
- .priv_size = sizeof(struct csz_sched_data),
- .enqueue = csz_enqueue,
- .dequeue = csz_dequeue,
- .requeue = NULL,
- .drop = NULL,
- .init = csz_init,
- .reset = csz_reset,
- .destroy = csz_destroy,
- .change = NULL,
- .dump = csz_dump,
- .owner = THIS_MODULE,
-};
-
-static int __init csz_module_init(void)
-{
- return register_qdisc(&csz_qdisc_ops);
-}
-static void __exit csz_module_exit(void)
-{
- unregister_qdisc(&csz_qdisc_ops);
-}
-module_init(csz_module_init)
-module_exit(csz_module_exit)
-MODULE_LICENSE("GPL");