620-sched_esfq.patch 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791
  1. --- a/include/linux/pkt_sched.h
  2. +++ b/include/linux/pkt_sched.h
  3. @@ -193,6 +193,33 @@ struct tc_sfq_xstats {
  4. __s32 allot;
  5. };
  6. +/* ESFQ section */
  7. +
  8. +enum
  9. +{
  10. + /* traditional */
  11. + TCA_SFQ_HASH_CLASSIC,
  12. + TCA_SFQ_HASH_DST,
  13. + TCA_SFQ_HASH_SRC,
  14. + TCA_SFQ_HASH_FWMARK,
  15. + /* conntrack */
  16. + TCA_SFQ_HASH_CTORIGDST,
  17. + TCA_SFQ_HASH_CTORIGSRC,
  18. + TCA_SFQ_HASH_CTREPLDST,
  19. + TCA_SFQ_HASH_CTREPLSRC,
  20. + TCA_SFQ_HASH_CTNATCHG,
  21. +};
  22. +
  23. +struct tc_esfq_qopt
  24. +{
  25. + unsigned quantum; /* Bytes per round allocated to flow */
  26. + int perturb_period; /* Period of hash perturbation */
  27. + __u32 limit; /* Maximal packets in queue */
  28. + unsigned divisor; /* Hash divisor */
  29. + unsigned flows; /* Maximal number of flows */
  30. + unsigned hash_kind; /* Hash function to use for flow identification */
  31. +};
  32. +
  33. /* RED section */
  34. enum {
  35. --- a/net/sched/Kconfig
  36. +++ b/net/sched/Kconfig
  37. @@ -148,6 +148,37 @@ config NET_SCH_SFQ
  38. To compile this code as a module, choose M here: the
  39. module will be called sch_sfq.
  40. +config NET_SCH_ESFQ
  41. + tristate "Enhanced Stochastic Fairness Queueing (ESFQ)"
  42. + ---help---
  43. + Say Y here if you want to use the Enhanced Stochastic Fairness
  44. + Queueing (ESFQ) packet scheduling algorithm for some of your network
  45. + devices or as a leaf discipline for a classful qdisc such as HTB or
  46. + CBQ (see the top of <file:net/sched/sch_esfq.c> for details and
  47. + references to the SFQ algorithm).
  48. +
  49. + This is an enchanced SFQ version which allows you to control some
  50. + hardcoded values in the SFQ scheduler.
  51. +
  52. + ESFQ also adds control of the hash function used to identify packet
  53. + flows. The original SFQ discipline hashes by connection; ESFQ add
  54. + several other hashing methods, such as by src IP or by dst IP, which
  55. + can be more fair to users in some networking situations.
  56. +
  57. + To compile this code as a module, choose M here: the
  58. + module will be called sch_esfq.
  59. +
  60. +config NET_SCH_ESFQ_NFCT
  61. + bool "Connection Tracking Hash Types"
  62. + depends on NET_SCH_ESFQ && NF_CONNTRACK
  63. + ---help---
  64. + Say Y here to enable support for hashing based on netfilter connection
  65. + tracking information. This is useful for a router that is also using
  66. + NAT to connect privately-addressed hosts to the Internet. If you want
  67. + to provide fair distribution of upstream bandwidth, ESFQ must use
  68. + connection tracking information, since all outgoing packets will share
  69. + the same source address.
  70. +
  71. config NET_SCH_TEQL
  72. tristate "True Link Equalizer (TEQL)"
  73. ---help---
  74. --- a/net/sched/Makefile
  75. +++ b/net/sched/Makefile
  76. @@ -26,6 +26,7 @@ obj-$(CONFIG_NET_SCH_INGRESS) += sch_ing
  77. obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
  78. obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o
  79. obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
  80. +obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o
  81. obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
  82. obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
  83. obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
  84. --- /dev/null
  85. +++ b/net/sched/sch_esfq.c
  86. @@ -0,0 +1,702 @@
  87. +/*
  88. + * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline.
  89. + *
  90. + * This program is free software; you can redistribute it and/or
  91. + * modify it under the terms of the GNU General Public License
  92. + * as published by the Free Software Foundation; either version
  93. + * 2 of the License, or (at your option) any later version.
  94. + *
  95. + * Authors: Alexey Kuznetsov, <[email protected]>
  96. + *
  97. + * Changes: Alexander Atanasov, <[email protected]>
  98. + * Added dynamic depth,limit,divisor,hash_kind options.
  99. + * Added dst and src hashes.
  100. + *
  101. + * Alexander Clouter, <[email protected]>
  102. + * Ported ESFQ to Linux 2.6.
  103. + *
  104. + * Corey Hickey, <[email protected]>
  105. + * Maintenance of the Linux 2.6 port.
  106. + * Added fwmark hash (thanks to Robert Kurjata).
  107. + * Added usage of jhash.
  108. + * Added conntrack support.
  109. + * Added ctnatchg hash (thanks to Ben Pfountz).
  110. + */
  111. +
  112. +#include <linux/module.h>
  113. +#include <asm/uaccess.h>
  114. +#include <asm/system.h>
  115. +#include <linux/bitops.h>
  116. +#include <linux/types.h>
  117. +#include <linux/kernel.h>
  118. +#include <linux/jiffies.h>
  119. +#include <linux/string.h>
  120. +#include <linux/mm.h>
  121. +#include <linux/socket.h>
  122. +#include <linux/sockios.h>
  123. +#include <linux/in.h>
  124. +#include <linux/errno.h>
  125. +#include <linux/interrupt.h>
  126. +#include <linux/if_ether.h>
  127. +#include <linux/inet.h>
  128. +#include <linux/netdevice.h>
  129. +#include <linux/etherdevice.h>
  130. +#include <linux/notifier.h>
  131. +#include <linux/init.h>
  132. +#include <net/ip.h>
  133. +#include <net/netlink.h>
  134. +#include <linux/ipv6.h>
  135. +#include <net/route.h>
  136. +#include <linux/skbuff.h>
  137. +#include <net/sock.h>
  138. +#include <net/pkt_sched.h>
  139. +#include <linux/jhash.h>
  140. +#ifdef CONFIG_NET_SCH_ESFQ_NFCT
  141. +#include <net/netfilter/nf_conntrack.h>
  142. +#endif
  143. +
  144. +/* Stochastic Fairness Queuing algorithm.
  145. + For more comments look at sch_sfq.c.
  146. + The difference is that you can change limit, depth,
  147. + hash table size and choose alternate hash types.
  148. +
  149. + classic: same as in sch_sfq.c
  150. + dst: destination IP address
  151. + src: source IP address
  152. + fwmark: netfilter mark value
  153. + ctorigdst: original destination IP address
  154. + ctorigsrc: original source IP address
  155. + ctrepldst: reply destination IP address
  156. + ctreplsrc: reply source IP
  157. +
  158. +*/
  159. +
  160. +#define ESFQ_HEAD 0
  161. +#define ESFQ_TAIL 1
  162. +
  163. +/* This type should contain at least SFQ_DEPTH*2 values */
  164. +typedef unsigned int esfq_index;
  165. +
  166. +struct esfq_head
  167. +{
  168. + esfq_index next;
  169. + esfq_index prev;
  170. +};
  171. +
  172. +struct esfq_sched_data
  173. +{
  174. +/* Parameters */
  175. + int perturb_period;
  176. + unsigned quantum; /* Allotment per round: MUST BE >= MTU */
  177. + int limit;
  178. + unsigned depth;
  179. + unsigned hash_divisor;
  180. + unsigned hash_kind;
  181. +/* Variables */
  182. + struct timer_list perturb_timer;
  183. + int perturbation;
  184. + esfq_index tail; /* Index of current slot in round */
  185. + esfq_index max_depth; /* Maximal depth */
  186. +
  187. + esfq_index *ht; /* Hash table */
  188. + esfq_index *next; /* Active slots link */
  189. + short *allot; /* Current allotment per slot */
  190. + unsigned short *hash; /* Hash value indexed by slots */
  191. + struct sk_buff_head *qs; /* Slot queue */
  192. + struct esfq_head *dep; /* Linked list of slots, indexed by depth */
  193. +};
  194. +
  195. +/* This contains the info we will hash. */
  196. +struct esfq_packet_info
  197. +{
  198. + u32 proto; /* protocol or port */
  199. + u32 src; /* source from packet header */
  200. + u32 dst; /* destination from packet header */
  201. + u32 ctorigsrc; /* original source from conntrack */
  202. + u32 ctorigdst; /* original destination from conntrack */
  203. + u32 ctreplsrc; /* reply source from conntrack */
  204. + u32 ctrepldst; /* reply destination from conntrack */
  205. + u32 mark; /* netfilter mark (fwmark) */
  206. +};
  207. +
  208. +static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a)
  209. +{
  210. + return jhash_1word(a, q->perturbation) & (q->hash_divisor-1);
  211. +}
  212. +
  213. +static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b)
  214. +{
  215. + return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1);
  216. +}
  217. +
  218. +static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c)
  219. +{
  220. + return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1);
  221. +}
  222. +
  223. +static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb)
  224. +{
  225. + struct esfq_packet_info info;
  226. +#ifdef CONFIG_NET_SCH_ESFQ_NFCT
  227. + enum ip_conntrack_info ctinfo;
  228. + struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
  229. +#endif
  230. +
  231. + switch (skb->protocol) {
  232. + case __constant_htons(ETH_P_IP):
  233. + {
  234. + struct iphdr *iph = ip_hdr(skb);
  235. + info.dst = iph->daddr;
  236. + info.src = iph->saddr;
  237. + if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
  238. + (iph->protocol == IPPROTO_TCP ||
  239. + iph->protocol == IPPROTO_UDP ||
  240. + iph->protocol == IPPROTO_SCTP ||
  241. + iph->protocol == IPPROTO_DCCP ||
  242. + iph->protocol == IPPROTO_ESP))
  243. + info.proto = *(((u32*)iph) + iph->ihl);
  244. + else
  245. + info.proto = iph->protocol;
  246. + break;
  247. + }
  248. + case __constant_htons(ETH_P_IPV6):
  249. + {
  250. + struct ipv6hdr *iph = ipv6_hdr(skb);
  251. + /* Hash ipv6 addresses into a u32. This isn't ideal,
  252. + * but the code is simple. */
  253. + info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation);
  254. + info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation);
  255. + if (iph->nexthdr == IPPROTO_TCP ||
  256. + iph->nexthdr == IPPROTO_UDP ||
  257. + iph->nexthdr == IPPROTO_SCTP ||
  258. + iph->nexthdr == IPPROTO_DCCP ||
  259. + iph->nexthdr == IPPROTO_ESP)
  260. + info.proto = *(u32*)&iph[1];
  261. + else
  262. + info.proto = iph->nexthdr;
  263. + break;
  264. + }
  265. + default:
  266. + info.dst = (u32)(unsigned long)skb_dst(skb);
  267. + info.src = (u32)(unsigned long)skb->sk;
  268. + info.proto = skb->protocol;
  269. + }
  270. +
  271. + info.mark = skb->mark;
  272. +
  273. +#ifdef CONFIG_NET_SCH_ESFQ_NFCT
  274. + /* defaults if there is no conntrack info */
  275. + info.ctorigsrc = info.src;
  276. + info.ctorigdst = info.dst;
  277. + info.ctreplsrc = info.dst;
  278. + info.ctrepldst = info.src;
  279. + /* collect conntrack info */
  280. + if (ct && ct != &nf_conntrack_untracked) {
  281. + if (skb->protocol == __constant_htons(ETH_P_IP)) {
  282. + info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
  283. + info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip;
  284. + info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip;
  285. + info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip;
  286. + }
  287. + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
  288. + /* Again, hash ipv6 addresses into a single u32. */
  289. + info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation);
  290. + info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation);
  291. + info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation);
  292. + info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation);
  293. + }
  294. +
  295. + }
  296. +#endif
  297. +
  298. + switch(q->hash_kind) {
  299. + case TCA_SFQ_HASH_CLASSIC:
  300. + return esfq_jhash_3words(q, info.dst, info.src, info.proto);
  301. + case TCA_SFQ_HASH_DST:
  302. + return esfq_jhash_1word(q, info.dst);
  303. + case TCA_SFQ_HASH_SRC:
  304. + return esfq_jhash_1word(q, info.src);
  305. + case TCA_SFQ_HASH_FWMARK:
  306. + return esfq_jhash_1word(q, info.mark);
  307. +#ifdef CONFIG_NET_SCH_ESFQ_NFCT
  308. + case TCA_SFQ_HASH_CTORIGDST:
  309. + return esfq_jhash_1word(q, info.ctorigdst);
  310. + case TCA_SFQ_HASH_CTORIGSRC:
  311. + return esfq_jhash_1word(q, info.ctorigsrc);
  312. + case TCA_SFQ_HASH_CTREPLDST:
  313. + return esfq_jhash_1word(q, info.ctrepldst);
  314. + case TCA_SFQ_HASH_CTREPLSRC:
  315. + return esfq_jhash_1word(q, info.ctreplsrc);
  316. + case TCA_SFQ_HASH_CTNATCHG:
  317. + {
  318. + if (info.ctorigdst == info.ctreplsrc)
  319. + return esfq_jhash_1word(q, info.ctorigsrc);
  320. + return esfq_jhash_1word(q, info.ctreplsrc);
  321. + }
  322. +#endif
  323. + default:
  324. + if (net_ratelimit())
  325. + printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n");
  326. + }
  327. + return esfq_jhash_3words(q, info.dst, info.src, info.proto);
  328. +}
  329. +
  330. +static inline void esfq_link(struct esfq_sched_data *q, esfq_index x)
  331. +{
  332. + esfq_index p, n;
  333. + int d = q->qs[x].qlen + q->depth;
  334. +
  335. + p = d;
  336. + n = q->dep[d].next;
  337. + q->dep[x].next = n;
  338. + q->dep[x].prev = p;
  339. + q->dep[p].next = q->dep[n].prev = x;
  340. +}
  341. +
  342. +static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x)
  343. +{
  344. + esfq_index p, n;
  345. +
  346. + n = q->dep[x].next;
  347. + p = q->dep[x].prev;
  348. + q->dep[p].next = n;
  349. + q->dep[n].prev = p;
  350. +
  351. + if (n == p && q->max_depth == q->qs[x].qlen + 1)
  352. + q->max_depth--;
  353. +
  354. + esfq_link(q, x);
  355. +}
  356. +
  357. +static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x)
  358. +{
  359. + esfq_index p, n;
  360. + int d;
  361. +
  362. + n = q->dep[x].next;
  363. + p = q->dep[x].prev;
  364. + q->dep[p].next = n;
  365. + q->dep[n].prev = p;
  366. + d = q->qs[x].qlen;
  367. + if (q->max_depth < d)
  368. + q->max_depth = d;
  369. +
  370. + esfq_link(q, x);
  371. +}
  372. +
  373. +static unsigned int esfq_drop(struct Qdisc *sch)
  374. +{
  375. + struct esfq_sched_data *q = qdisc_priv(sch);
  376. + esfq_index d = q->max_depth;
  377. + struct sk_buff *skb;
  378. + unsigned int len;
  379. +
  380. + /* Queue is full! Find the longest slot and
  381. + drop a packet from it */
  382. +
  383. + if (d > 1) {
  384. + esfq_index x = q->dep[d+q->depth].next;
  385. + skb = q->qs[x].prev;
  386. + len = skb->len;
  387. + __skb_unlink(skb, &q->qs[x]);
  388. + kfree_skb(skb);
  389. + esfq_dec(q, x);
  390. + sch->q.qlen--;
  391. + sch->qstats.drops++;
  392. + sch->qstats.backlog -= len;
  393. + return len;
  394. + }
  395. +
  396. + if (d == 1) {
  397. + /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
  398. + d = q->next[q->tail];
  399. + q->next[q->tail] = q->next[d];
  400. + q->allot[q->next[d]] += q->quantum;
  401. + skb = q->qs[d].prev;
  402. + len = skb->len;
  403. + __skb_unlink(skb, &q->qs[d]);
  404. + kfree_skb(skb);
  405. + esfq_dec(q, d);
  406. + sch->q.qlen--;
  407. + q->ht[q->hash[d]] = q->depth;
  408. + sch->qstats.drops++;
  409. + sch->qstats.backlog -= len;
  410. + return len;
  411. + }
  412. +
  413. + return 0;
  414. +}
  415. +
  416. +static void esfq_q_enqueue(struct sk_buff *skb, struct esfq_sched_data *q, unsigned int end)
  417. +{
  418. + unsigned hash = esfq_hash(q, skb);
  419. + unsigned depth = q->depth;
  420. + esfq_index x;
  421. +
  422. + x = q->ht[hash];
  423. + if (x == depth) {
  424. + q->ht[hash] = x = q->dep[depth].next;
  425. + q->hash[x] = hash;
  426. + }
  427. +
  428. + if (end == ESFQ_TAIL)
  429. + __skb_queue_tail(&q->qs[x], skb);
  430. + else
  431. + __skb_queue_head(&q->qs[x], skb);
  432. +
  433. + esfq_inc(q, x);
  434. + if (q->qs[x].qlen == 1) { /* The flow is new */
  435. + if (q->tail == depth) { /* It is the first flow */
  436. + q->tail = x;
  437. + q->next[x] = x;
  438. + q->allot[x] = q->quantum;
  439. + } else {
  440. + q->next[x] = q->next[q->tail];
  441. + q->next[q->tail] = x;
  442. + q->tail = x;
  443. + }
  444. + }
  445. +}
  446. +
  447. +static int esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
  448. +{
  449. + struct esfq_sched_data *q = qdisc_priv(sch);
  450. + esfq_q_enqueue(skb, q, ESFQ_TAIL);
  451. + sch->qstats.backlog += skb->len;
  452. + if (++sch->q.qlen < q->limit-1) {
  453. + sch->bstats.bytes += skb->len;
  454. + sch->bstats.packets++;
  455. + return 0;
  456. + }
  457. +
  458. + sch->qstats.drops++;
  459. + esfq_drop(sch);
  460. + return NET_XMIT_CN;
  461. +}
  462. +
  463. +static struct sk_buff *esfq_peek(struct Qdisc* sch)
  464. +{
  465. + struct esfq_sched_data *q = qdisc_priv(sch);
  466. + esfq_index a;
  467. +
  468. + /* No active slots */
  469. + if (q->tail == q->depth)
  470. + return NULL;
  471. +
  472. + a = q->next[q->tail];
  473. + return skb_peek(&q->qs[a]);
  474. +}
  475. +
  476. +static struct sk_buff *esfq_q_dequeue(struct esfq_sched_data *q)
  477. +{
  478. + struct sk_buff *skb;
  479. + unsigned depth = q->depth;
  480. + esfq_index a, old_a;
  481. +
  482. + /* No active slots */
  483. + if (q->tail == depth)
  484. + return NULL;
  485. +
  486. + a = old_a = q->next[q->tail];
  487. +
  488. + /* Grab packet */
  489. + skb = __skb_dequeue(&q->qs[a]);
  490. + esfq_dec(q, a);
  491. +
  492. + /* Is the slot empty? */
  493. + if (q->qs[a].qlen == 0) {
  494. + q->ht[q->hash[a]] = depth;
  495. + a = q->next[a];
  496. + if (a == old_a) {
  497. + q->tail = depth;
  498. + return skb;
  499. + }
  500. + q->next[q->tail] = a;
  501. + q->allot[a] += q->quantum;
  502. + } else if ((q->allot[a] -= skb->len) <= 0) {
  503. + q->tail = a;
  504. + a = q->next[a];
  505. + q->allot[a] += q->quantum;
  506. + }
  507. +
  508. + return skb;
  509. +}
  510. +
  511. +static struct sk_buff *esfq_dequeue(struct Qdisc* sch)
  512. +{
  513. + struct esfq_sched_data *q = qdisc_priv(sch);
  514. + struct sk_buff *skb;
  515. +
  516. + skb = esfq_q_dequeue(q);
  517. + if (skb == NULL)
  518. + return NULL;
  519. + sch->q.qlen--;
  520. + sch->qstats.backlog -= skb->len;
  521. + return skb;
  522. +}
  523. +
  524. +static void esfq_q_destroy(struct esfq_sched_data *q)
  525. +{
  526. + del_timer(&q->perturb_timer);
  527. + if(q->ht)
  528. + kfree(q->ht);
  529. + if(q->dep)
  530. + kfree(q->dep);
  531. + if(q->next)
  532. + kfree(q->next);
  533. + if(q->allot)
  534. + kfree(q->allot);
  535. + if(q->hash)
  536. + kfree(q->hash);
  537. + if(q->qs)
  538. + kfree(q->qs);
  539. +}
  540. +
  541. +static void esfq_destroy(struct Qdisc *sch)
  542. +{
  543. + struct esfq_sched_data *q = qdisc_priv(sch);
  544. + esfq_q_destroy(q);
  545. +}
  546. +
  547. +
  548. +static void esfq_reset(struct Qdisc* sch)
  549. +{
  550. + struct sk_buff *skb;
  551. +
  552. + while ((skb = esfq_dequeue(sch)) != NULL)
  553. + kfree_skb(skb);
  554. +}
  555. +
  556. +static void esfq_perturbation(unsigned long arg)
  557. +{
  558. + struct Qdisc *sch = (struct Qdisc*)arg;
  559. + struct esfq_sched_data *q = qdisc_priv(sch);
  560. +
  561. + q->perturbation = net_random()&0x1F;
  562. +
  563. + if (q->perturb_period) {
  564. + q->perturb_timer.expires = jiffies + q->perturb_period;
  565. + add_timer(&q->perturb_timer);
  566. + }
  567. +}
  568. +
  569. +static unsigned int esfq_check_hash(unsigned int kind)
  570. +{
  571. + switch (kind) {
  572. + case TCA_SFQ_HASH_CTORIGDST:
  573. + case TCA_SFQ_HASH_CTORIGSRC:
  574. + case TCA_SFQ_HASH_CTREPLDST:
  575. + case TCA_SFQ_HASH_CTREPLSRC:
  576. + case TCA_SFQ_HASH_CTNATCHG:
  577. +#ifndef CONFIG_NET_SCH_ESFQ_NFCT
  578. + {
  579. + if (net_ratelimit())
  580. + printk(KERN_WARNING "ESFQ: Conntrack hash types disabled in kernel config. Falling back to classic.\n");
  581. + return TCA_SFQ_HASH_CLASSIC;
  582. + }
  583. +#endif
  584. + case TCA_SFQ_HASH_CLASSIC:
  585. + case TCA_SFQ_HASH_DST:
  586. + case TCA_SFQ_HASH_SRC:
  587. + case TCA_SFQ_HASH_FWMARK:
  588. + return kind;
  589. + default:
  590. + {
  591. + if (net_ratelimit())
  592. + printk(KERN_WARNING "ESFQ: Unknown hash type. Falling back to classic.\n");
  593. + return TCA_SFQ_HASH_CLASSIC;
  594. + }
  595. + }
  596. +}
  597. +
  598. +static int esfq_q_init(struct esfq_sched_data *q, struct nlattr *opt)
  599. +{
  600. + struct tc_esfq_qopt *ctl = nla_data(opt);
  601. + esfq_index p = ~0U/2;
  602. + int i;
  603. +
  604. + if (opt && opt->nla_len < nla_attr_size(sizeof(*ctl)))
  605. + return -EINVAL;
  606. +
  607. + q->perturbation = 0;
  608. + q->hash_kind = TCA_SFQ_HASH_CLASSIC;
  609. + q->max_depth = 0;
  610. + if (opt == NULL) {
  611. + q->perturb_period = 0;
  612. + q->hash_divisor = 1024;
  613. + q->tail = q->limit = q->depth = 128;
  614. +
  615. + } else {
  616. + struct tc_esfq_qopt *ctl = nla_data(opt);
  617. + if (ctl->quantum)
  618. + q->quantum = ctl->quantum;
  619. + q->perturb_period = ctl->perturb_period*HZ;
  620. + q->hash_divisor = ctl->divisor ? : 1024;
  621. + q->tail = q->limit = q->depth = ctl->flows ? : 128;
  622. +
  623. + if ( q->depth > p - 1 )
  624. + return -EINVAL;
  625. +
  626. + if (ctl->limit)
  627. + q->limit = min_t(u32, ctl->limit, q->depth);
  628. +
  629. + if (ctl->hash_kind) {
  630. + q->hash_kind = esfq_check_hash(ctl->hash_kind);
  631. + }
  632. + }
  633. +
  634. + q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL);
  635. + if (!q->ht)
  636. + goto err_case;
  637. + q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL);
  638. + if (!q->dep)
  639. + goto err_case;
  640. + q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL);
  641. + if (!q->next)
  642. + goto err_case;
  643. + q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL);
  644. + if (!q->allot)
  645. + goto err_case;
  646. + q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL);
  647. + if (!q->hash)
  648. + goto err_case;
  649. + q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL);
  650. + if (!q->qs)
  651. + goto err_case;
  652. +
  653. + for (i=0; i< q->hash_divisor; i++)
  654. + q->ht[i] = q->depth;
  655. + for (i=0; i<q->depth; i++) {
  656. + skb_queue_head_init(&q->qs[i]);
  657. + q->dep[i+q->depth].next = i+q->depth;
  658. + q->dep[i+q->depth].prev = i+q->depth;
  659. + }
  660. +
  661. + for (i=0; i<q->depth; i++)
  662. + esfq_link(q, i);
  663. + return 0;
  664. +err_case:
  665. + esfq_q_destroy(q);
  666. + return -ENOBUFS;
  667. +}
  668. +
  669. +static int esfq_init(struct Qdisc *sch, struct nlattr *opt)
  670. +{
  671. + struct esfq_sched_data *q = qdisc_priv(sch);
  672. + int err;
  673. +
  674. + q->quantum = psched_mtu(qdisc_dev(sch)); /* default */
  675. + if ((err = esfq_q_init(q, opt)))
  676. + return err;
  677. +
  678. + init_timer(&q->perturb_timer);
  679. + q->perturb_timer.data = (unsigned long)sch;
  680. + q->perturb_timer.function = esfq_perturbation;
  681. + if (q->perturb_period) {
  682. + q->perturb_timer.expires = jiffies + q->perturb_period;
  683. + add_timer(&q->perturb_timer);
  684. + }
  685. +
  686. + return 0;
  687. +}
  688. +
  689. +static int esfq_change(struct Qdisc *sch, struct nlattr *opt)
  690. +{
  691. + struct esfq_sched_data *q = qdisc_priv(sch);
  692. + struct esfq_sched_data new;
  693. + struct sk_buff *skb;
  694. + int err;
  695. +
  696. + /* set up new queue */
  697. + memset(&new, 0, sizeof(struct esfq_sched_data));
  698. + new.quantum = psched_mtu(qdisc_dev(sch)); /* default */
  699. + if ((err = esfq_q_init(&new, opt)))
  700. + return err;
  701. +
  702. + /* copy all packets from the old queue to the new queue */
  703. + sch_tree_lock(sch);
  704. + while ((skb = esfq_q_dequeue(q)) != NULL)
  705. + esfq_q_enqueue(skb, &new, ESFQ_TAIL);
  706. +
  707. + /* clean up the old queue */
  708. + esfq_q_destroy(q);
  709. +
  710. + /* copy elements of the new queue into the old queue */
  711. + q->perturb_period = new.perturb_period;
  712. + q->quantum = new.quantum;
  713. + q->limit = new.limit;
  714. + q->depth = new.depth;
  715. + q->hash_divisor = new.hash_divisor;
  716. + q->hash_kind = new.hash_kind;
  717. + q->tail = new.tail;
  718. + q->max_depth = new.max_depth;
  719. + q->ht = new.ht;
  720. + q->dep = new.dep;
  721. + q->next = new.next;
  722. + q->allot = new.allot;
  723. + q->hash = new.hash;
  724. + q->qs = new.qs;
  725. +
  726. + /* finish up */
  727. + if (q->perturb_period) {
  728. + q->perturb_timer.expires = jiffies + q->perturb_period;
  729. + add_timer(&q->perturb_timer);
  730. + } else {
  731. + q->perturbation = 0;
  732. + }
  733. + sch_tree_unlock(sch);
  734. + return 0;
  735. +}
  736. +
  737. +static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb)
  738. +{
  739. + struct esfq_sched_data *q = qdisc_priv(sch);
  740. + unsigned char *b = skb_tail_pointer(skb);
  741. + struct tc_esfq_qopt opt;
  742. +
  743. + opt.quantum = q->quantum;
  744. + opt.perturb_period = q->perturb_period/HZ;
  745. +
  746. + opt.limit = q->limit;
  747. + opt.divisor = q->hash_divisor;
  748. + opt.flows = q->depth;
  749. + opt.hash_kind = q->hash_kind;
  750. +
  751. + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
  752. +
  753. + return skb->len;
  754. +
  755. +nla_put_failure:
  756. + nlmsg_trim(skb, b);
  757. + return -1;
  758. +}
  759. +
  760. +static struct Qdisc_ops esfq_qdisc_ops =
  761. +{
  762. + .next = NULL,
  763. + .cl_ops = NULL,
  764. + .id = "esfq",
  765. + .priv_size = sizeof(struct esfq_sched_data),
  766. + .enqueue = esfq_enqueue,
  767. + .dequeue = esfq_dequeue,
  768. + .peek = esfq_peek,
  769. + .drop = esfq_drop,
  770. + .init = esfq_init,
  771. + .reset = esfq_reset,
  772. + .destroy = esfq_destroy,
  773. + .change = esfq_change,
  774. + .dump = esfq_dump,
  775. + .owner = THIS_MODULE,
  776. +};
  777. +
  778. +static int __init esfq_module_init(void)
  779. +{
  780. + return register_qdisc(&esfq_qdisc_ops);
  781. +}
  782. +static void __exit esfq_module_exit(void)
  783. +{
  784. + unregister_qdisc(&esfq_qdisc_ops);
  785. +}
  786. +module_init(esfq_module_init)
  787. +module_exit(esfq_module_exit)
  788. +MODULE_LICENSE("GPL");