700-multiple_default_gateways.patch 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284
  1. --- a/include/linux/netfilter_ipv4/ip_nat.h
  2. +++ b/include/linux/netfilter_ipv4/ip_nat.h
  3. @@ -121,5 +121,13 @@ extern int ip_nat_used_tuple(const struc
  4. extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv,
  5. u_int32_t newval,
  6. u_int16_t oldcheck);
  7. +
  8. +/* Call input routing for SNAT-ed traffic */
  9. +extern unsigned int ip_nat_route_input(unsigned int hooknum,
  10. + struct sk_buff **pskb,
  11. + const struct net_device *in,
  12. + const struct net_device *out,
  13. + int (*okfn)(struct sk_buff *));
  14. +
  15. #endif /*__KERNEL__*/
  16. #endif
  17. --- a/include/linux/rtnetlink.h
  18. +++ b/include/linux/rtnetlink.h
  19. @@ -234,6 +234,8 @@ struct rtnexthop
  20. #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
  21. #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
  22. #define RTNH_F_ONLINK 4 /* Gateway is forced on link */
  23. +#define RTNH_F_SUSPECT 8 /* We don't know the real state */
  24. +#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT)
  25. /* Macros to handle hexthops */
  26. --- a/include/net/ip_fib.h
  27. +++ b/include/net/ip_fib.h
  28. @@ -162,7 +162,8 @@ static inline int fib_lookup(const struc
  29. static inline void fib_select_default(const struct rt_key *key, struct fib_result *res)
  30. {
  31. - if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
  32. + if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
  33. + FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)
  34. main_table->tb_select_default(main_table, key, res);
  35. }
  36. @@ -174,6 +175,7 @@ extern struct fib_table * fib_tables[RT_
  37. extern int fib_lookup(const struct rt_key *key, struct fib_result *res);
  38. extern struct fib_table *__fib_new_table(int id);
  39. extern void fib_rule_put(struct fib_rule *r);
  40. +extern int fib_result_table(struct fib_result *res);
  41. static inline struct fib_table *fib_get_table(int id)
  42. {
  43. @@ -275,5 +277,6 @@ static inline void fib_res_put(struct fi
  44. #endif
  45. }
  46. +extern rwlock_t fib_nhflags_lock;
  47. #endif /* _NET_FIB_H */
  48. --- a/include/net/route.h
  49. +++ b/include/net/route.h
  50. @@ -49,6 +49,8 @@ struct rt_key
  51. {
  52. __u32 dst;
  53. __u32 src;
  54. + __u32 lsrc;
  55. + __u32 gw;
  56. int iif;
  57. int oif;
  58. #ifdef CONFIG_IP_ROUTE_FWMARK
  59. @@ -128,6 +130,7 @@ extern void ip_rt_advice(struct rtable
  60. extern void rt_cache_flush(int how);
  61. extern int ip_route_output_key(struct rtable **, const struct rt_key *key);
  62. extern int ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin);
  63. +extern int ip_route_input_lookup(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin, u32 lsrc);
  64. extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu);
  65. extern void ip_rt_update_pmtu(struct dst_entry *dst, unsigned mtu);
  66. extern void ip_rt_send_redirect(struct sk_buff *skb);
  67. @@ -148,6 +151,15 @@ static inline int ip_route_output(struct
  68. }
  69. +static inline int
  70. +ip_route_output_lookup(struct rtable **rp,
  71. + u32 daddr, u32 saddr, u32 tos, int oif, u32 gw)
  72. +{
  73. + struct rt_key key = { dst:daddr, src:saddr, gw:gw, oif:oif, tos:tos };
  74. +
  75. + return ip_route_output_key(rp, &key);
  76. +}
  77. +
  78. static inline void ip_rt_put(struct rtable * rt)
  79. {
  80. if (rt)
  81. --- a/net/ipv4/fib_frontend.c
  82. +++ b/net/ipv4/fib_frontend.c
  83. @@ -54,6 +54,8 @@
  84. struct fib_table *local_table;
  85. struct fib_table *main_table;
  86. +#define FIB_RES_TABLE(r) (RT_TABLE_MAIN)
  87. +
  88. #else
  89. #define RT_TABLE_MIN 1
  90. @@ -71,6 +73,7 @@ struct fib_table *__fib_new_table(int id
  91. return tb;
  92. }
  93. +#define FIB_RES_TABLE(r) (fib_result_table(r))
  94. #endif /* CONFIG_IP_MULTIPLE_TABLES */
  95. @@ -209,6 +212,9 @@ int fib_validate_source(u32 src, u32 dst
  96. struct in_device *in_dev;
  97. struct rt_key key;
  98. struct fib_result res;
  99. + int table;
  100. + unsigned char prefixlen;
  101. + unsigned char scope;
  102. int no_addr, rpf;
  103. int ret;
  104. @@ -216,6 +222,7 @@ int fib_validate_source(u32 src, u32 dst
  105. key.src = dst;
  106. key.tos = tos;
  107. key.oif = 0;
  108. + key.gw = 0;
  109. key.iif = oif;
  110. key.scope = RT_SCOPE_UNIVERSE;
  111. @@ -237,31 +244,35 @@ int fib_validate_source(u32 src, u32 dst
  112. goto e_inval_res;
  113. *spec_dst = FIB_RES_PREFSRC(res);
  114. fib_combine_itag(itag, &res);
  115. -#ifdef CONFIG_IP_ROUTE_MULTIPATH
  116. - if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
  117. -#else
  118. if (FIB_RES_DEV(res) == dev)
  119. -#endif
  120. {
  121. ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
  122. fib_res_put(&res);
  123. return ret;
  124. }
  125. + table = FIB_RES_TABLE(&res);
  126. + prefixlen = res.prefixlen;
  127. + scope = res.scope;
  128. fib_res_put(&res);
  129. if (no_addr)
  130. goto last_resort;
  131. - if (rpf)
  132. - goto e_inval;
  133. key.oif = dev->ifindex;
  134. ret = 0;
  135. if (fib_lookup(&key, &res) == 0) {
  136. - if (res.type == RTN_UNICAST) {
  137. + if (res.type == RTN_UNICAST &&
  138. + ((table == FIB_RES_TABLE(&res) &&
  139. + res.prefixlen >= prefixlen && res.scope >= scope) ||
  140. + !rpf)) {
  141. *spec_dst = FIB_RES_PREFSRC(res);
  142. ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
  143. + fib_res_put(&res);
  144. + return ret;
  145. }
  146. fib_res_put(&res);
  147. }
  148. + if (rpf)
  149. + goto e_inval;
  150. return ret;
  151. last_resort:
  152. @@ -579,9 +590,7 @@ static int fib_inetaddr_event(struct not
  153. switch (event) {
  154. case NETDEV_UP:
  155. fib_add_ifaddr(ifa);
  156. -#ifdef CONFIG_IP_ROUTE_MULTIPATH
  157. fib_sync_up(ifa->ifa_dev->dev);
  158. -#endif
  159. rt_cache_flush(-1);
  160. break;
  161. case NETDEV_DOWN:
  162. @@ -617,9 +626,7 @@ static int fib_netdev_event(struct notif
  163. for_ifa(in_dev) {
  164. fib_add_ifaddr(ifa);
  165. } endfor_ifa(in_dev);
  166. -#ifdef CONFIG_IP_ROUTE_MULTIPATH
  167. fib_sync_up(dev);
  168. -#endif
  169. rt_cache_flush(-1);
  170. break;
  171. case NETDEV_DOWN:
  172. --- a/net/ipv4/fib_hash.c
  173. +++ b/net/ipv4/fib_hash.c
  174. @@ -71,6 +71,7 @@ struct fib_node
  175. struct fib_info *fn_info;
  176. #define FIB_INFO(f) ((f)->fn_info)
  177. fn_key_t fn_key;
  178. + int fn_last_dflt;
  179. u8 fn_tos;
  180. u8 fn_type;
  181. u8 fn_scope;
  182. @@ -336,72 +337,123 @@ out:
  183. return err;
  184. }
  185. -static int fn_hash_last_dflt=-1;
  186. -
  187. -static int fib_detect_death(struct fib_info *fi, int order,
  188. - struct fib_info **last_resort, int *last_idx)
  189. +static int fib_detect_death(struct fib_info *fi, int order, int last_dflt,
  190. + struct fib_info **last_resort, int *last_idx,
  191. + int *last_nhsel, const struct rt_key *key)
  192. {
  193. struct neighbour *n;
  194. - int state = NUD_NONE;
  195. + int nhsel;
  196. + int state;
  197. + struct fib_nh * nh;
  198. + u32 dst;
  199. + int flag, dead = 1;
  200. +
  201. + /* change_nexthops(fi) { */
  202. + for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) {
  203. + if (key->oif && key->oif != nh->nh_oif)
  204. + continue;
  205. + if (key->gw && key->gw != nh->nh_gw && nh->nh_gw &&
  206. + nh->nh_scope == RT_SCOPE_LINK)
  207. + continue;
  208. + if (nh->nh_flags & RTNH_F_DEAD)
  209. + continue;
  210. - n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
  211. - if (n) {
  212. - state = n->nud_state;
  213. - neigh_release(n);
  214. + flag = 0;
  215. + if (nh->nh_dev->flags & IFF_NOARP) {
  216. + dead = 0;
  217. + goto setfl;
  218. + }
  219. +
  220. + dst = nh->nh_gw;
  221. + if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK)
  222. + dst = key->dst;
  223. +
  224. + state = NUD_NONE;
  225. + n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev);
  226. + if (n) {
  227. + state = n->nud_state;
  228. + neigh_release(n);
  229. + }
  230. + if (state==NUD_REACHABLE ||
  231. + ((state&NUD_VALID) && order != last_dflt)) {
  232. + dead = 0;
  233. + goto setfl;
  234. + }
  235. + if (!(state&NUD_VALID))
  236. + flag = 1;
  237. + if (!dead)
  238. + goto setfl;
  239. + if ((state&NUD_VALID) ||
  240. + (*last_idx<0 && order >= last_dflt)) {
  241. + *last_resort = fi;
  242. + *last_idx = order;
  243. + *last_nhsel = nhsel;
  244. + }
  245. +
  246. + setfl:
  247. +
  248. + read_lock_bh(&fib_nhflags_lock);
  249. + if (flag)
  250. + nh->nh_flags |= RTNH_F_SUSPECT;
  251. + else
  252. + nh->nh_flags &= ~RTNH_F_SUSPECT;
  253. + read_unlock_bh(&fib_nhflags_lock);
  254. }
  255. - if (state==NUD_REACHABLE)
  256. - return 0;
  257. - if ((state&NUD_VALID) && order != fn_hash_last_dflt)
  258. - return 0;
  259. - if ((state&NUD_VALID) ||
  260. - (*last_idx<0 && order > fn_hash_last_dflt)) {
  261. - *last_resort = fi;
  262. - *last_idx = order;
  263. - }
  264. - return 1;
  265. + /* } endfor_nexthops(fi) */
  266. +
  267. + return dead;
  268. }
  269. static void
  270. fn_hash_select_default(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
  271. {
  272. - int order, last_idx;
  273. - struct fib_node *f;
  274. + int order, last_idx, last_dflt, last_nhsel;
  275. + struct fib_node *f, *first_node;
  276. struct fib_info *fi = NULL;
  277. struct fib_info *last_resort;
  278. struct fn_hash *t = (struct fn_hash*)tb->tb_data;
  279. - struct fn_zone *fz = t->fn_zones[0];
  280. + struct fn_zone *fz = t->fn_zones[res->prefixlen];
  281. + fn_key_t k;
  282. if (fz == NULL)
  283. return;
  284. + k = fz_key(key->dst, fz);
  285. + last_dflt = -2;
  286. + first_node = NULL;
  287. last_idx = -1;
  288. last_resort = NULL;
  289. + last_nhsel = 0;
  290. order = -1;
  291. read_lock(&fib_hash_lock);
  292. - for (f = fz->fz_hash[0]; f; f = f->fn_next) {
  293. + for (f = fz_chain(k, fz); f; f = f->fn_next) {
  294. struct fib_info *next_fi = FIB_INFO(f);
  295. - if ((f->fn_state&FN_S_ZOMBIE) ||
  296. + if (!fn_key_eq(k, f->fn_key) ||
  297. + (f->fn_state&FN_S_ZOMBIE) ||
  298. f->fn_scope != res->scope ||
  299. +#ifdef CONFIG_IP_ROUTE_TOS
  300. + (f->fn_tos && f->fn_tos != key->tos) ||
  301. +#endif
  302. f->fn_type != RTN_UNICAST)
  303. continue;
  304. if (next_fi->fib_priority > res->fi->fib_priority)
  305. break;
  306. - if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
  307. - continue;
  308. f->fn_state |= FN_S_ACCESSED;
  309. - if (fi == NULL) {
  310. - if (next_fi != res->fi)
  311. - break;
  312. - } else if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
  313. + if (!first_node) {
  314. + last_dflt = f->fn_last_dflt;
  315. + first_node = f;
  316. + }
  317. + if (fi && !fib_detect_death(fi, order, last_dflt,
  318. + &last_resort, &last_idx, &last_nhsel, key)) {
  319. if (res->fi)
  320. fib_info_put(res->fi);
  321. res->fi = fi;
  322. atomic_inc(&fi->fib_clntref);
  323. - fn_hash_last_dflt = order;
  324. + first_node->fn_last_dflt = order;
  325. goto out;
  326. }
  327. fi = next_fi;
  328. @@ -409,16 +461,25 @@ fn_hash_select_default(struct fib_table
  329. }
  330. if (order<=0 || fi==NULL) {
  331. - fn_hash_last_dflt = -1;
  332. + if (fi && fi->fib_nhs > 1 &&
  333. + fib_detect_death(fi, order, last_dflt,
  334. + &last_resort, &last_idx, &last_nhsel, key) &&
  335. + last_resort == fi) {
  336. + read_lock_bh(&fib_nhflags_lock);
  337. + fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
  338. + read_unlock_bh(&fib_nhflags_lock);
  339. + }
  340. + if (first_node) first_node->fn_last_dflt = -1;
  341. goto out;
  342. }
  343. - if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
  344. + if (!fib_detect_death(fi, order, last_dflt, &last_resort, &last_idx,
  345. + &last_nhsel, key)) {
  346. if (res->fi)
  347. fib_info_put(res->fi);
  348. res->fi = fi;
  349. atomic_inc(&fi->fib_clntref);
  350. - fn_hash_last_dflt = order;
  351. + first_node->fn_last_dflt = order;
  352. goto out;
  353. }
  354. @@ -428,8 +489,11 @@ fn_hash_select_default(struct fib_table
  355. res->fi = last_resort;
  356. if (last_resort)
  357. atomic_inc(&last_resort->fib_clntref);
  358. + read_lock_bh(&fib_nhflags_lock);
  359. + last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
  360. + read_unlock_bh(&fib_nhflags_lock);
  361. + first_node->fn_last_dflt = last_idx;
  362. }
  363. - fn_hash_last_dflt = last_idx;
  364. out:
  365. read_unlock(&fib_hash_lock);
  366. }
  367. @@ -589,6 +653,7 @@ replace:
  368. memset(new_f, 0, sizeof(struct fib_node));
  369. + new_f->fn_last_dflt = -1;
  370. new_f->fn_key = key;
  371. #ifdef CONFIG_IP_ROUTE_TOS
  372. new_f->fn_tos = tos;
  373. --- a/net/ipv4/fib_rules.c
  374. +++ b/net/ipv4/fib_rules.c
  375. @@ -307,6 +307,11 @@ static void fib_rules_attach(struct net_
  376. }
  377. }
  378. +int fib_result_table(struct fib_result *res)
  379. +{
  380. + return res->r->r_table;
  381. +}
  382. +
  383. int fib_lookup(const struct rt_key *key, struct fib_result *res)
  384. {
  385. int err;
  386. @@ -371,8 +376,10 @@ FRprintk("FAILURE\n");
  387. void fib_select_default(const struct rt_key *key, struct fib_result *res)
  388. {
  389. - if (res->r && res->r->r_action == RTN_UNICAST &&
  390. - FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
  391. + if (res->r &&
  392. + (res->r->r_action == RTN_UNICAST || res->r->r_action == RTN_NAT) &&
  393. + ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
  394. + FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)) {
  395. struct fib_table *tb;
  396. if ((tb = fib_get_table(res->r->r_table)) != NULL)
  397. tb->tb_select_default(tb, key, res);
  398. --- a/net/ipv4/fib_semantics.c
  399. +++ b/net/ipv4/fib_semantics.c
  400. @@ -48,6 +48,7 @@
  401. static struct fib_info *fib_info_list;
  402. static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
  403. int fib_info_cnt;
  404. +rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED;
  405. #define for_fib_info() { struct fib_info *fi; \
  406. for (fi = fib_info_list; fi; fi = fi->fib_next)
  407. @@ -150,7 +151,7 @@ static __inline__ int nh_comp(const stru
  408. #ifdef CONFIG_NET_CLS_ROUTE
  409. nh->nh_tclassid != onh->nh_tclassid ||
  410. #endif
  411. - ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
  412. + ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE))
  413. return -1;
  414. onh++;
  415. } endfor_nexthops(fi);
  416. @@ -166,7 +167,7 @@ static __inline__ struct fib_info * fib_
  417. nfi->fib_prefsrc == fi->fib_prefsrc &&
  418. nfi->fib_priority == fi->fib_priority &&
  419. memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
  420. - ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
  421. + ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 &&
  422. (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
  423. return fi;
  424. } endfor_fib_info();
  425. @@ -365,8 +366,11 @@ static int fib_check_nh(const struct rtm
  426. return -EINVAL;
  427. if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
  428. return -ENODEV;
  429. - if (!(dev->flags&IFF_UP))
  430. - return -ENETDOWN;
  431. + if (!(dev->flags&IFF_UP)) {
  432. + if (fi->fib_protocol != RTPROT_STATIC)
  433. + return -ENETDOWN;
  434. + nh->nh_flags |= RTNH_F_DEAD;
  435. + }
  436. nh->nh_dev = dev;
  437. dev_hold(dev);
  438. nh->nh_scope = RT_SCOPE_LINK;
  439. @@ -380,23 +384,48 @@ static int fib_check_nh(const struct rtm
  440. /* It is not necessary, but requires a bit of thinking */
  441. if (key.scope < RT_SCOPE_LINK)
  442. key.scope = RT_SCOPE_LINK;
  443. - if ((err = fib_lookup(&key, &res)) != 0)
  444. - return err;
  445. - err = -EINVAL;
  446. - if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
  447. - goto out;
  448. - nh->nh_scope = res.scope;
  449. - nh->nh_oif = FIB_RES_OIF(res);
  450. - if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
  451. - goto out;
  452. - dev_hold(nh->nh_dev);
  453. - err = -ENETDOWN;
  454. - if (!(nh->nh_dev->flags & IFF_UP))
  455. - goto out;
  456. - err = 0;
  457. +
  458. + err = fib_lookup(&key, &res);
  459. + if (err) {
  460. + struct in_device *in_dev;
  461. +
  462. + if (err != -ENETUNREACH ||
  463. + fi->fib_protocol != RTPROT_STATIC)
  464. + return err;
  465. +
  466. + in_dev = inetdev_by_index(nh->nh_oif);
  467. + if (in_dev == NULL ||
  468. + in_dev->dev->flags & IFF_UP) {
  469. + if (in_dev)
  470. + in_dev_put(in_dev);
  471. + return err;
  472. + }
  473. + nh->nh_flags |= RTNH_F_DEAD;
  474. + nh->nh_scope = RT_SCOPE_LINK;
  475. + nh->nh_dev = in_dev->dev;
  476. + dev_hold(nh->nh_dev);
  477. + in_dev_put(in_dev);
  478. + } else {
  479. + err = -EINVAL;
  480. + if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
  481. + goto out;
  482. + nh->nh_scope = res.scope;
  483. + nh->nh_oif = FIB_RES_OIF(res);
  484. + if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
  485. + goto out;
  486. + dev_hold(nh->nh_dev);
  487. + if (!(nh->nh_dev->flags & IFF_UP)) {
  488. + if (fi->fib_protocol != RTPROT_STATIC) {
  489. + err = -ENETDOWN;
  490. + goto out;
  491. + }
  492. + nh->nh_flags |= RTNH_F_DEAD;
  493. + }
  494. + err = 0;
  495. out:
  496. - fib_res_put(&res);
  497. - return err;
  498. + fib_res_put(&res);
  499. + return err;
  500. + }
  501. } else {
  502. struct in_device *in_dev;
  503. @@ -407,8 +436,11 @@ out:
  504. if (in_dev == NULL)
  505. return -ENODEV;
  506. if (!(in_dev->dev->flags&IFF_UP)) {
  507. - in_dev_put(in_dev);
  508. - return -ENETDOWN;
  509. + if (fi->fib_protocol != RTPROT_STATIC) {
  510. + in_dev_put(in_dev);
  511. + return -ENETDOWN;
  512. + }
  513. + nh->nh_flags |= RTNH_F_DEAD;
  514. }
  515. nh->nh_dev = in_dev->dev;
  516. dev_hold(nh->nh_dev);
  517. @@ -606,8 +638,12 @@ fib_semantic_match(int type, struct fib_
  518. for_nexthops(fi) {
  519. if (nh->nh_flags&RTNH_F_DEAD)
  520. continue;
  521. - if (!key->oif || key->oif == nh->nh_oif)
  522. - break;
  523. + if (key->oif && key->oif != nh->nh_oif)
  524. + continue;
  525. + if (key->gw && key->gw != nh->nh_gw &&
  526. + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
  527. + continue;
  528. + break;
  529. }
  530. #ifdef CONFIG_IP_ROUTE_MULTIPATH
  531. if (nhsel < fi->fib_nhs) {
  532. @@ -873,22 +909,35 @@ int fib_sync_down(u32 local, struct net_
  533. if (local && fi->fib_prefsrc == local) {
  534. fi->fib_flags |= RTNH_F_DEAD;
  535. ret++;
  536. - } else if (dev && fi->fib_nhs) {
  537. + } else if (fi->fib_nhs) {
  538. int dead = 0;
  539. change_nexthops(fi) {
  540. - if (nh->nh_flags&RTNH_F_DEAD)
  541. - dead++;
  542. - else if (nh->nh_dev == dev &&
  543. - nh->nh_scope != scope) {
  544. - nh->nh_flags |= RTNH_F_DEAD;
  545. + if (nh->nh_flags&RTNH_F_DEAD) {
  546. + if (fi->fib_protocol!=RTPROT_STATIC ||
  547. + nh->nh_dev == NULL ||
  548. + !__in_dev_get(nh->nh_dev) ||
  549. + nh->nh_dev->flags&IFF_UP)
  550. + dead++;
  551. + } else if ((nh->nh_dev == dev && dev &&
  552. + nh->nh_scope != scope) ||
  553. + (local == nh->nh_gw && local &&
  554. + nh->nh_oif)) {
  555. + write_lock_bh(&fib_nhflags_lock);
  556. #ifdef CONFIG_IP_ROUTE_MULTIPATH
  557. - spin_lock_bh(&fib_multipath_lock);
  558. + spin_lock(&fib_multipath_lock);
  559. + nh->nh_flags |= RTNH_F_DEAD;
  560. fi->fib_power -= nh->nh_power;
  561. nh->nh_power = 0;
  562. - spin_unlock_bh(&fib_multipath_lock);
  563. + spin_unlock(&fib_multipath_lock);
  564. +#else
  565. + nh->nh_flags |= RTNH_F_DEAD;
  566. #endif
  567. - dead++;
  568. + write_unlock_bh(&fib_nhflags_lock);
  569. + if (fi->fib_protocol!=RTPROT_STATIC ||
  570. + force ||
  571. + (dev && __in_dev_get(dev) == NULL))
  572. + dead++;
  573. }
  574. #ifdef CONFIG_IP_ROUTE_MULTIPATH
  575. if (force > 1 && nh->nh_dev == dev) {
  576. @@ -906,37 +955,55 @@ int fib_sync_down(u32 local, struct net_
  577. return ret;
  578. }
  579. -#ifdef CONFIG_IP_ROUTE_MULTIPATH
  580. -
  581. /*
  582. - Dead device goes up. We wake up dead nexthops.
  583. - It takes sense only on multipath routes.
  584. + Dead device goes up or new address is added. We wake up dead nexthops.
  585. */
  586. int fib_sync_up(struct net_device *dev)
  587. {
  588. - int ret = 0;
  589. + struct rt_key key;
  590. + struct fib_result res;
  591. + int ret, rep;
  592. +repeat:
  593. if (!(dev->flags&IFF_UP))
  594. return 0;
  595. + ret = 0;
  596. + rep = 0;
  597. for_fib_info() {
  598. int alive = 0;
  599. change_nexthops(fi) {
  600. - if (!(nh->nh_flags&RTNH_F_DEAD)) {
  601. - alive++;
  602. + if (!(nh->nh_flags&RTNH_F_DEAD))
  603. continue;
  604. - }
  605. if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
  606. continue;
  607. if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
  608. continue;
  609. + if (nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) {
  610. + memset(&key, 0, sizeof(key));
  611. + key.dst = nh->nh_gw;
  612. + key.oif = nh->nh_oif;
  613. + key.scope = nh->nh_scope;
  614. + if (fib_lookup(&key, &res) != 0)
  615. + continue;
  616. + if (res.type != RTN_UNICAST &&
  617. + res.type != RTN_LOCAL) {
  618. + fib_res_put(&res);
  619. + continue;
  620. + }
  621. + nh->nh_scope = res.scope;
  622. + fib_res_put(&res);
  623. + rep = 1;
  624. + }
  625. alive++;
  626. +#ifdef CONFIG_IP_ROUTE_MULTIPATH
  627. spin_lock_bh(&fib_multipath_lock);
  628. nh->nh_power = 0;
  629. nh->nh_flags &= ~RTNH_F_DEAD;
  630. spin_unlock_bh(&fib_multipath_lock);
  631. +#endif
  632. } endfor_nexthops(fi)
  633. if (alive > 0) {
  634. @@ -944,9 +1011,13 @@ int fib_sync_up(struct net_device *dev)
  635. ret++;
  636. }
  637. } endfor_fib_info();
  638. + if (rep)
  639. + goto repeat;
  640. return ret;
  641. }
  642. +#ifdef CONFIG_IP_ROUTE_MULTIPATH
  643. +
  644. /*
  645. The algorithm is suboptimal, but it provides really
  646. fair weighted route distribution.
  647. @@ -955,24 +1026,45 @@ int fib_sync_up(struct net_device *dev)
  648. void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
  649. {
  650. struct fib_info *fi = res->fi;
  651. - int w;
  652. + int w, alive;
  653. spin_lock_bh(&fib_multipath_lock);
  654. + if (key->oif) {
  655. + int sel = -1;
  656. + w = -1;
  657. + change_nexthops(fi) {
  658. + if (key->oif != nh->nh_oif)
  659. + continue;
  660. + if (key->gw && key->gw != nh->nh_gw &&
  661. + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
  662. + continue;
  663. + if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
  664. + if (nh->nh_power > w) {
  665. + w = nh->nh_power;
  666. + sel = nhsel;
  667. + }
  668. + }
  669. + } endfor_nexthops(fi);
  670. + if (sel >= 0) {
  671. + spin_unlock_bh(&fib_multipath_lock);
  672. + res->nh_sel = sel;
  673. + return;
  674. + }
  675. + goto last_resort;
  676. + }
  677. +
  678. +repeat:
  679. if (fi->fib_power <= 0) {
  680. int power = 0;
  681. change_nexthops(fi) {
  682. - if (!(nh->nh_flags&RTNH_F_DEAD)) {
  683. + if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
  684. power += nh->nh_weight;
  685. nh->nh_power = nh->nh_weight;
  686. }
  687. } endfor_nexthops(fi);
  688. fi->fib_power = power;
  689. - if (power <= 0) {
  690. - spin_unlock_bh(&fib_multipath_lock);
  691. - /* Race condition: route has just become dead. */
  692. - res->nh_sel = 0;
  693. - return;
  694. - }
  695. + if (power <= 0)
  696. + goto last_resort;
  697. }
  698. @@ -982,20 +1074,40 @@ void fib_select_multipath(const struct r
  699. w = jiffies % fi->fib_power;
  700. + alive = 0;
  701. change_nexthops(fi) {
  702. - if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
  703. + if (!(nh->nh_flags&RTNH_F_BADSTATE) && nh->nh_power) {
  704. if ((w -= nh->nh_power) <= 0) {
  705. nh->nh_power--;
  706. fi->fib_power--;
  707. - res->nh_sel = nhsel;
  708. spin_unlock_bh(&fib_multipath_lock);
  709. + res->nh_sel = nhsel;
  710. return;
  711. }
  712. + alive = 1;
  713. + }
  714. + } endfor_nexthops(fi);
  715. + if (alive) {
  716. + fi->fib_power = 0;
  717. + goto repeat;
  718. + }
  719. +
  720. +last_resort:
  721. +
  722. + for_nexthops(fi) {
  723. + if (!(nh->nh_flags&RTNH_F_DEAD)) {
  724. + if (key->oif && key->oif != nh->nh_oif)
  725. + continue;
  726. + if (key->gw && key->gw != nh->nh_gw &&
  727. + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
  728. + continue;
  729. + spin_unlock_bh(&fib_multipath_lock);
  730. + res->nh_sel = nhsel;
  731. + return;
  732. }
  733. } endfor_nexthops(fi);
  734. /* Race condition: route has just become dead. */
  735. - res->nh_sel = 0;
  736. spin_unlock_bh(&fib_multipath_lock);
  737. }
  738. #endif
  739. --- a/net/ipv4/ip_nat_dumb.c
  740. +++ b/net/ipv4/ip_nat_dumb.c
  741. @@ -124,6 +124,7 @@ ip_do_nat(struct sk_buff *skb)
  742. key.dst = ciph->saddr;
  743. key.iif = skb->dev->ifindex;
  744. key.oif = 0;
  745. + key.gw = 0;
  746. #ifdef CONFIG_IP_ROUTE_TOS
  747. key.tos = RT_TOS(ciph->tos);
  748. #endif
  749. --- a/net/ipv4/netfilter/ip_fw_compat_masq.c
  750. +++ b/net/ipv4/netfilter/ip_fw_compat_masq.c
  751. @@ -41,6 +41,10 @@ do_masquerade(struct sk_buff **pskb, con
  752. enum ip_conntrack_info ctinfo;
  753. struct ip_conntrack *ct;
  754. unsigned int ret;
  755. + struct rtable *rt, *skb_rt;
  756. + struct net_device *skb_dev;
  757. + __u32 saddr;
  758. + int new;
  759. /* Sorry, only ICMP, TCP and UDP. */
  760. if (iph->protocol != IPPROTO_ICMP
  761. @@ -64,22 +68,28 @@ do_masquerade(struct sk_buff **pskb, con
  762. }
  763. info = &ct->nat.info;
  764. + iph = (*pskb)->nh.iph;
  765. + saddr = iph->saddr;
  766. + new = 0;
  767. WRITE_LOCK(&ip_nat_lock);
  768. /* Setup the masquerade, if not already */
  769. if (!info->initialized) {
  770. u_int32_t newsrc;
  771. - struct rtable *rt;
  772. struct ip_nat_multi_range range;
  773. + skb_rt = (struct rtable *) (*pskb)->dst;
  774. + skb_dev = skb_rt->u.dst.dev;
  775. /* Pass 0 instead of saddr, since it's going to be changed
  776. anyway. */
  777. - if (ip_route_output(&rt, iph->daddr, 0, 0, 0) != 0) {
  778. + if (ip_route_output_lookup(&rt, iph->daddr, 0, RT_TOS(iph->tos),
  779. + skb_dev? skb_dev->ifindex : 0,
  780. + skb_dev? skb_rt->rt_gateway : 0) != 0) {
  781. + WRITE_UNLOCK(&ip_nat_lock);
  782. DEBUGP("ipnat_rule_masquerade: Can't reroute.\n");
  783. return NF_DROP;
  784. }
  785. - newsrc = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
  786. - RT_SCOPE_UNIVERSE);
  787. + newsrc = rt->rt_src;
  788. ip_rt_put(rt);
  789. range = ((struct ip_nat_multi_range)
  790. { 1,
  791. @@ -92,11 +102,31 @@ do_masquerade(struct sk_buff **pskb, con
  792. WRITE_UNLOCK(&ip_nat_lock);
  793. return ret;
  794. }
  795. + new = 1;
  796. } else
  797. DEBUGP("Masquerading already done on this conn.\n");
  798. WRITE_UNLOCK(&ip_nat_lock);
  799. - return do_bindings(ct, ctinfo, info, NF_IP_POST_ROUTING, pskb);
  800. + ret = do_bindings(ct, ctinfo, info, NF_IP_POST_ROUTING, pskb);
  801. + if (ret != NF_ACCEPT || saddr == (*pskb)->nh.iph->saddr || new)
  802. + return ret;
  803. +
  804. + iph = (*pskb)->nh.iph;
  805. + if (ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), 0) != 0)
  806. + return NF_DROP;
  807. +
  808. + skb_rt = (struct rtable *) (*pskb)->dst;
  809. + skb_dev = skb_rt->u.dst.dev;
  810. + if (skb_dev != rt->u.dst.dev || rt->rt_gateway != skb_rt->rt_gateway) {
  811. + if (skb_dev != rt->u.dst.dev) {
  812. + /* TODO: check the new mtu and reply FRAG_NEEDED */
  813. + }
  814. + dst_release((*pskb)->dst);
  815. + (*pskb)->dst = &rt->u.dst;
  816. + } else {
  817. + ip_rt_put(rt);
  818. + }
  819. + return NF_ACCEPT;
  820. }
  821. void
  822. --- a/net/ipv4/netfilter/ip_nat_core.c
  823. +++ b/net/ipv4/netfilter/ip_nat_core.c
  824. @@ -994,6 +994,60 @@ icmp_reply_translation(struct sk_buff *s
  825. return NF_ACCEPT;
  826. }
  827. +unsigned int
  828. +ip_nat_route_input(unsigned int hooknum,
  829. + struct sk_buff **pskb,
  830. + const struct net_device *in,
  831. + const struct net_device *out,
  832. + int (*okfn)(struct sk_buff *))
  833. +{
  834. + struct sk_buff *skb = *pskb;
  835. + struct iphdr *iph;
  836. + struct ip_conntrack *ct;
  837. + enum ip_conntrack_info ctinfo;
  838. + struct ip_nat_info *info;
  839. + enum ip_conntrack_dir dir;
  840. + __u32 saddr;
  841. + int i;
  842. +
  843. + if (!(ct = ip_conntrack_get(skb, &ctinfo)))
  844. + return NF_ACCEPT;
  845. +
  846. + info = &ct->nat.info;
  847. + if (!info->initialized)
  848. + return NF_ACCEPT;
  849. +
  850. + if (skb->dst)
  851. + return NF_ACCEPT;
  852. +
  853. + if (skb->len < sizeof(struct iphdr))
  854. + return NF_ACCEPT;
  855. +
  856. + iph = skb->nh.iph;
  857. + saddr = iph->saddr;
  858. + hooknum = NF_IP_POST_ROUTING;
  859. + dir = CTINFO2DIR(ctinfo);
  860. +
  861. + READ_LOCK(&ip_nat_lock);
  862. + for (i = 0; i < info->num_manips; i++) {
  863. + if (info->manips[i].direction == dir
  864. + && info->manips[i].hooknum == hooknum
  865. + && info->manips[i].maniptype == IP_NAT_MANIP_SRC) {
  866. + saddr = info->manips[i].manip.ip;
  867. + }
  868. + }
  869. + READ_UNLOCK(&ip_nat_lock);
  870. +
  871. + if (saddr == iph->saddr)
  872. + return NF_ACCEPT;
  873. +
  874. + if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos,
  875. + skb->dev, saddr))
  876. + return NF_DROP;
  877. +
  878. + return NF_ACCEPT;
  879. +}
  880. +
  881. int __init ip_nat_init(void)
  882. {
  883. size_t i;
  884. --- a/net/ipv4/netfilter/ip_nat_standalone.c
  885. +++ b/net/ipv4/netfilter/ip_nat_standalone.c
  886. @@ -245,6 +245,9 @@ ip_nat_local_fn(unsigned int hooknum,
  887. /* Before packet filtering, change destination */
  888. static struct nf_hook_ops ip_nat_in_ops
  889. = { { NULL, NULL }, ip_nat_in, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_NAT_DST };
  890. +/* Before routing, route before mangling */
  891. +static struct nf_hook_ops ip_nat_inr_ops
  892. += { { NULL, NULL }, ip_nat_route_input, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_LAST-1 };
  893. /* After packet filtering, change source */
  894. static struct nf_hook_ops ip_nat_out_ops
  895. = { { NULL, NULL }, ip_nat_out, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_NAT_SRC};
  896. @@ -313,10 +316,15 @@ static int init_or_cleanup(int init)
  897. printk("ip_nat_init: can't register in hook.\n");
  898. goto cleanup_nat;
  899. }
  900. + ret = nf_register_hook(&ip_nat_inr_ops);
  901. + if (ret < 0) {
  902. + printk("ip_nat_init: can't register inr hook.\n");
  903. + goto cleanup_inops;
  904. + }
  905. ret = nf_register_hook(&ip_nat_out_ops);
  906. if (ret < 0) {
  907. printk("ip_nat_init: can't register out hook.\n");
  908. - goto cleanup_inops;
  909. + goto cleanup_inrops;
  910. }
  911. ret = nf_register_hook(&ip_nat_local_out_ops);
  912. if (ret < 0) {
  913. @@ -336,6 +344,8 @@ static int init_or_cleanup(int init)
  914. nf_unregister_hook(&ip_nat_local_out_ops);
  915. cleanup_outops:
  916. nf_unregister_hook(&ip_nat_out_ops);
  917. + cleanup_inrops:
  918. + nf_unregister_hook(&ip_nat_inr_ops);
  919. cleanup_inops:
  920. nf_unregister_hook(&ip_nat_in_ops);
  921. cleanup_nat:
  922. --- a/net/ipv4/netfilter/ipt_MASQUERADE.c
  923. +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
  924. @@ -87,7 +87,8 @@ masquerade_target(struct sk_buff **pskb,
  925. key.dst = (*pskb)->nh.iph->daddr;
  926. key.src = 0; /* Unknown: that's what we're trying to establish */
  927. key.tos = RT_TOS((*pskb)->nh.iph->tos)|RTO_CONN;
  928. - key.oif = 0;
  929. + key.oif = out->ifindex;
  930. + key.gw = ((struct rtable *) (*pskb)->dst)->rt_gateway;
  931. #ifdef CONFIG_IP_ROUTE_FWMARK
  932. key.fwmark = (*pskb)->nfmark;
  933. #endif
  934. @@ -98,13 +99,6 @@ masquerade_target(struct sk_buff **pskb,
  935. " No route: Rusty's brain broke!\n");
  936. return NF_DROP;
  937. }
  938. - if (rt->u.dst.dev != out) {
  939. - if (net_ratelimit())
  940. - printk("MASQUERADE:"
  941. - " Route sent us somewhere else.\n");
  942. - ip_rt_put(rt);
  943. - return NF_DROP;
  944. - }
  945. newsrc = rt->rt_src;
  946. DEBUGP("newsrc = %u.%u.%u.%u\n", NIPQUAD(newsrc));
  947. --- a/net/ipv4/route.c
  948. +++ b/net/ipv4/route.c
  949. @@ -919,6 +919,7 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
  950. /* Gateway is different ... */
  951. rt->rt_gateway = new_gw;
  952. + if (rt->key.gw) rt->key.gw = new_gw;
  953. /* Redirect received -> path was valid */
  954. dst_confirm(&rth->u.dst);
  955. @@ -1343,6 +1344,7 @@ static int ip_route_input_mc(struct sk_b
  956. rth->key.fwmark = skb->nfmark;
  957. #endif
  958. rth->key.src = saddr;
  959. + rth->key.lsrc = 0;
  960. rth->rt_src = saddr;
  961. #ifdef CONFIG_IP_ROUTE_NAT
  962. rth->rt_dst_map = daddr;
  963. @@ -1356,6 +1358,7 @@ static int ip_route_input_mc(struct sk_b
  964. rth->u.dst.dev = &loopback_dev;
  965. dev_hold(rth->u.dst.dev);
  966. rth->key.oif = 0;
  967. + rth->key.gw = 0;
  968. rth->rt_gateway = daddr;
  969. rth->rt_spec_dst= spec_dst;
  970. rth->rt_type = RTN_MULTICAST;
  971. @@ -1395,7 +1398,7 @@ e_inval:
  972. */
  973. int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
  974. - u8 tos, struct net_device *dev)
  975. + u8 tos, struct net_device *dev, u32 lsrc)
  976. {
  977. struct rt_key key;
  978. struct fib_result res;
  979. @@ -1415,16 +1418,17 @@ int ip_route_input_slow(struct sk_buff *
  980. goto out;
  981. key.dst = daddr;
  982. - key.src = saddr;
  983. + key.src = lsrc? : saddr;
  984. key.tos = tos;
  985. #ifdef CONFIG_IP_ROUTE_FWMARK
  986. key.fwmark = skb->nfmark;
  987. #endif
  988. - key.iif = dev->ifindex;
  989. + key.iif = lsrc? loopback_dev.ifindex : dev->ifindex;
  990. key.oif = 0;
  991. + key.gw = 0;
  992. key.scope = RT_SCOPE_UNIVERSE;
  993. - hash = rt_hash_code(daddr, saddr ^ (key.iif << 5), tos);
  994. + hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos);
  995. /* Check for the most weird martians, which can be not detected
  996. by fib_lookup.
  997. @@ -1445,6 +1449,12 @@ int ip_route_input_slow(struct sk_buff *
  998. if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr))
  999. goto martian_destination;
  1000. + if (lsrc) {
  1001. + if (MULTICAST(lsrc) || BADCLASS(lsrc) ||
  1002. + ZERONET(lsrc) || LOOPBACK(lsrc))
  1003. + goto e_inval;
  1004. + }
  1005. +
  1006. /*
  1007. * Now we are ready to route packet.
  1008. */
  1009. @@ -1454,6 +1464,10 @@ int ip_route_input_slow(struct sk_buff *
  1010. goto no_route;
  1011. }
  1012. free_res = 1;
  1013. + if (lsrc && res.type != RTN_UNICAST && res.type != RTN_NAT)
  1014. + goto e_inval;
  1015. + key.iif = dev->ifindex;
  1016. + key.src = saddr;
  1017. rt_cache_stat[smp_processor_id()].in_slow_tot++;
  1018. @@ -1464,7 +1478,7 @@ int ip_route_input_slow(struct sk_buff *
  1019. if (1) {
  1020. u32 src_map = saddr;
  1021. - if (res.r)
  1022. + if (res.r && !lsrc)
  1023. src_map = fib_rules_policy(saddr, &res, &flags);
  1024. if (res.type == RTN_NAT) {
  1025. @@ -1503,8 +1517,9 @@ int ip_route_input_slow(struct sk_buff *
  1026. if (res.type != RTN_UNICAST)
  1027. goto martian_destination;
  1028. + fib_select_default(&key, &res);
  1029. #ifdef CONFIG_IP_ROUTE_MULTIPATH
  1030. - if (res.fi->fib_nhs > 1 && key.oif == 0)
  1031. + if (res.fi->fib_nhs > 1)
  1032. fib_select_multipath(&key, &res);
  1033. #endif
  1034. out_dev = in_dev_get(FIB_RES_DEV(res));
  1035. @@ -1524,6 +1539,7 @@ int ip_route_input_slow(struct sk_buff *
  1036. flags |= RTCF_DIRECTSRC;
  1037. if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) &&
  1038. + !lsrc &&
  1039. (IN_DEV_SHARED_MEDIA(out_dev) ||
  1040. inet_addr_onlink(out_dev, saddr, FIB_RES_GW(res))))
  1041. flags |= RTCF_DOREDIRECT;
  1042. @@ -1550,6 +1566,7 @@ int ip_route_input_slow(struct sk_buff *
  1043. #endif
  1044. rth->key.src = saddr;
  1045. rth->rt_src = saddr;
  1046. + rth->key.lsrc = lsrc;
  1047. rth->rt_gateway = daddr;
  1048. #ifdef CONFIG_IP_ROUTE_NAT
  1049. rth->rt_src_map = key.src;
  1050. @@ -1562,6 +1579,7 @@ int ip_route_input_slow(struct sk_buff *
  1051. rth->u.dst.dev = out_dev->dev;
  1052. dev_hold(rth->u.dst.dev);
  1053. rth->key.oif = 0;
  1054. + rth->key.gw = 0;
  1055. rth->rt_spec_dst= spec_dst;
  1056. rth->u.dst.input = ip_forward;
  1057. @@ -1572,7 +1590,8 @@ int ip_route_input_slow(struct sk_buff *
  1058. rth->rt_flags = flags;
  1059. #ifdef CONFIG_NET_FASTROUTE
  1060. - if (netdev_fastroute && !(flags&(RTCF_NAT|RTCF_MASQ|RTCF_DOREDIRECT))) {
  1061. + if (netdev_fastroute && !(flags&(RTCF_NAT|RTCF_MASQ|RTCF_DOREDIRECT)) &&
  1062. + !lsrc) {
  1063. struct net_device *odev = rth->u.dst.dev;
  1064. if (odev != dev &&
  1065. dev->accept_fastpath &&
  1066. @@ -1595,6 +1614,8 @@ out: return err;
  1067. brd_input:
  1068. if (skb->protocol != htons(ETH_P_IP))
  1069. goto e_inval;
  1070. + if (lsrc)
  1071. + goto e_inval;
  1072. if (ZERONET(saddr))
  1073. spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
  1074. @@ -1627,6 +1648,7 @@ local_input:
  1075. #endif
  1076. rth->key.src = saddr;
  1077. rth->rt_src = saddr;
  1078. + rth->key.lsrc = 0;
  1079. #ifdef CONFIG_IP_ROUTE_NAT
  1080. rth->rt_dst_map = key.dst;
  1081. rth->rt_src_map = key.src;
  1082. @@ -1639,6 +1661,7 @@ local_input:
  1083. rth->u.dst.dev = &loopback_dev;
  1084. dev_hold(rth->u.dst.dev);
  1085. rth->key.oif = 0;
  1086. + rth->key.gw = 0;
  1087. rth->rt_gateway = daddr;
  1088. rth->rt_spec_dst= spec_dst;
  1089. rth->u.dst.input= ip_local_deliver;
  1090. @@ -1704,8 +1727,9 @@ martian_source:
  1091. goto e_inval;
  1092. }
  1093. -int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
  1094. - u8 tos, struct net_device *dev)
  1095. +static inline int
  1096. +ip_route_input_cached(struct sk_buff *skb, u32 daddr, u32 saddr,
  1097. + u8 tos, struct net_device *dev, u32 lsrc)
  1098. {
  1099. struct rtable * rth;
  1100. unsigned hash;
  1101. @@ -1719,6 +1743,7 @@ int ip_route_input(struct sk_buff *skb,
  1102. if (rth->key.dst == daddr &&
  1103. rth->key.src == saddr &&
  1104. rth->key.iif == iif &&
  1105. + rth->key.lsrc == lsrc &&
  1106. rth->key.oif == 0 &&
  1107. #ifdef CONFIG_IP_ROUTE_FWMARK
  1108. rth->key.fwmark == skb->nfmark &&
  1109. @@ -1766,9 +1791,21 @@ int ip_route_input(struct sk_buff *skb,
  1110. read_unlock(&inetdev_lock);
  1111. return -EINVAL;
  1112. }
  1113. - return ip_route_input_slow(skb, daddr, saddr, tos, dev);
  1114. + return ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc);
  1115. +}
  1116. +
  1117. +int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
  1118. + u8 tos, struct net_device *dev)
  1119. +{
  1120. + return ip_route_input_cached(skb, daddr, saddr, tos, dev, 0);
  1121. }
  1122. +int ip_route_input_lookup(struct sk_buff *skb, u32 daddr, u32 saddr,
  1123. + u8 tos, struct net_device *dev, u32 lsrc)
  1124. +{
  1125. + return ip_route_input_cached(skb, daddr, saddr, tos, dev, lsrc);
  1126. +}
  1127. +
  1128. /*
  1129. * Major route resolver routine.
  1130. */
  1131. @@ -1791,6 +1828,7 @@ int ip_route_output_slow(struct rtable *
  1132. key.tos = tos & IPTOS_RT_MASK;
  1133. key.iif = loopback_dev.ifindex;
  1134. key.oif = oldkey->oif;
  1135. + key.gw = oldkey->gw;
  1136. #ifdef CONFIG_IP_ROUTE_FWMARK
  1137. key.fwmark = oldkey->fwmark;
  1138. #endif
  1139. @@ -1880,6 +1918,7 @@ int ip_route_output_slow(struct rtable *
  1140. dev_out = &loopback_dev;
  1141. dev_hold(dev_out);
  1142. key.oif = loopback_dev.ifindex;
  1143. + key.gw = 0;
  1144. res.type = RTN_LOCAL;
  1145. flags |= RTCF_LOCAL;
  1146. goto make_route;
  1147. @@ -1887,7 +1926,7 @@ int ip_route_output_slow(struct rtable *
  1148. if (fib_lookup(&key, &res)) {
  1149. res.fi = NULL;
  1150. - if (oldkey->oif) {
  1151. + if (oldkey->oif && dev_out->flags&IFF_UP) {
  1152. /* Apparently, routing tables are wrong. Assume,
  1153. that the destination is on link.
  1154. @@ -1930,6 +1969,7 @@ int ip_route_output_slow(struct rtable *
  1155. dev_out = &loopback_dev;
  1156. dev_hold(dev_out);
  1157. key.oif = dev_out->ifindex;
  1158. + key.gw = 0;
  1159. if (res.fi)
  1160. fib_info_put(res.fi);
  1161. res.fi = NULL;
  1162. @@ -1937,13 +1977,12 @@ int ip_route_output_slow(struct rtable *
  1163. goto make_route;
  1164. }
  1165. + if (res.type == RTN_UNICAST)
  1166. + fib_select_default(&key, &res);
  1167. #ifdef CONFIG_IP_ROUTE_MULTIPATH
  1168. - if (res.fi->fib_nhs > 1 && key.oif == 0)
  1169. + if (res.fi->fib_nhs > 1)
  1170. fib_select_multipath(&key, &res);
  1171. - else
  1172. #endif
  1173. - if (!res.prefixlen && res.type == RTN_UNICAST && !key.oif)
  1174. - fib_select_default(&key, &res);
  1175. if (!key.src)
  1176. key.src = FIB_RES_PREFSRC(res);
  1177. @@ -2001,7 +2040,9 @@ make_route:
  1178. rth->key.tos = tos;
  1179. rth->key.src = oldkey->src;
  1180. rth->key.iif = 0;
  1181. + rth->key.lsrc = 0;
  1182. rth->key.oif = oldkey->oif;
  1183. + rth->key.gw = oldkey->gw;
  1184. #ifdef CONFIG_IP_ROUTE_FWMARK
  1185. rth->key.fwmark = oldkey->fwmark;
  1186. #endif
  1187. @@ -2080,6 +2121,7 @@ int ip_route_output_key(struct rtable **
  1188. rth->key.src == key->src &&
  1189. rth->key.iif == 0 &&
  1190. rth->key.oif == key->oif &&
  1191. + rth->key.gw == key->gw &&
  1192. #ifdef CONFIG_IP_ROUTE_FWMARK
  1193. rth->key.fwmark == key->fwmark &&
  1194. #endif
  1195. --- a/net/netsyms.c
  1196. +++ b/net/netsyms.c
  1197. @@ -260,6 +260,7 @@ EXPORT_SYMBOL(inet_register_protosw);
  1198. EXPORT_SYMBOL(inet_unregister_protosw);
  1199. EXPORT_SYMBOL(ip_route_output_key);
  1200. EXPORT_SYMBOL(ip_route_input);
  1201. +EXPORT_SYMBOL(ip_route_input_lookup);
  1202. EXPORT_SYMBOL(icmp_send);
  1203. EXPORT_SYMBOL(icmp_statistics);
  1204. EXPORT_SYMBOL(icmp_err_convert);