1 /*
2 * libwebsockets - small server side websockets and web server implementation
3 *
4 * Copyright (C) 2010 - 2021 Andy Green <andy@warmcat.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 *
24 * We mainly focus on the routing table / gateways because those are the
25 * elements that decide if we can get on to the internet or not.
26 *
27 * We also need to understand the source addresses of possible outgoing routes,
28 * and follow LINK down (ifconfig down) to clean up routes on the interface idx
29 * going down that are not otherwise cleaned.
30 */
31
32 #include <private-lib-core.h>
33
34 #include <asm/types.h>
35 #include <sys/socket.h>
36 #include <linux/netlink.h>
37 #include <linux/rtnetlink.h>
38
39 /* work around CentOS 7 -Wconversion problem */
40 #undef RTA_ALIGNTO
41 #define RTA_ALIGNTO 4U
42
43 //#define lwsl_netlink lwsl_notice
44 #define lwsl_cx_netlink lwsl_cx_info
45
46 static void
lws_netlink_coldplug_done_cb(lws_sorted_usec_list_t *sul)47 lws_netlink_coldplug_done_cb(lws_sorted_usec_list_t *sul)
48 {
49 struct lws_context *ctx = lws_container_of(sul, struct lws_context,
50 sul_nl_coldplug);
51 ctx->nl_initial_done = 1;
52 #if defined(LWS_WITH_SYS_STATE)
53 /* if nothing is there to intercept anything, go all the way */
54 lws_state_transition_steps(&ctx->mgr_system, LWS_SYSTATE_OPERATIONAL);
55 #endif
56 }
57
58 static int
rops_handle_POLLIN_netlink(struct lws_context_per_thread *pt, struct lws *wsi, struct lws_pollfd *pollfd)59 rops_handle_POLLIN_netlink(struct lws_context_per_thread *pt, struct lws *wsi,
60 struct lws_pollfd *pollfd)
61 {
62 struct lws_context *cx = pt->context;
63 uint8_t s[4096]
64 #if defined(_DEBUG)
65 , route_change = 0
66 #endif
67 #if defined(LWS_WITH_SYS_SMD)
68 , gateway_change = 0
69 #endif
70 ;
71 struct sockaddr_nl nladdr;
72 lws_route_t robj, *rou, *rmat;
73 struct nlmsghdr *h;
74 struct msghdr msg;
75 struct iovec iov;
76 unsigned int n;
77 char buf[72];
78
79 if (!(pollfd->revents & LWS_POLLIN))
80 return LWS_HPI_RET_HANDLED;
81
82 memset(&msg, 0, sizeof(msg));
83
84 iov.iov_base = (void *)s;
85 iov.iov_len = sizeof(s);
86
87 msg.msg_name = (void *)&(nladdr);
88 msg.msg_namelen = sizeof(nladdr);
89
90 msg.msg_iov = &iov;
91 msg.msg_iovlen = 1;
92
93 n = (unsigned int)recvmsg(wsi->desc.sockfd, &msg, 0);
94 if ((int)n < 0) {
95 lwsl_cx_notice(cx, "recvmsg failed");
96 return LWS_HPI_RET_PLEASE_CLOSE_ME;
97 }
98
99 // lwsl_hexdump_notice(s, (size_t)n);
100
101 h = (struct nlmsghdr *)s;
102
103 /* we can get a bunch of messages coalesced in one read*/
104
105 for ( ; NLMSG_OK(h, n); h = NLMSG_NEXT(h, n)) {
106 struct ifaddrmsg *ifam;
107 struct rtattr *ra;
108 struct rtmsg *rm;
109 #if !defined(LWS_WITH_NO_LOGS) && defined(_DEBUG)
110 struct ndmsg *nd;
111 #endif
112 unsigned int ra_len;
113 uint8_t *p;
114
115 struct ifinfomsg *ifi;
116 struct rtattr *attribute;
117 unsigned int len;
118
119 lwsl_cx_netlink(cx, "RTM %d", h->nlmsg_type);
120
121 memset(&robj, 0, sizeof(robj));
122 robj.if_idx = -1;
123 robj.priority = -1;
124 rm = (struct rtmsg *)NLMSG_DATA(h);
125
126 /*
127 * We have to care about NEWLINK so we can understand when a
128 * network interface went down, and clear the related routes.
129 *
130 * We don't get individual DELROUTEs for these.
131 */
132
133 switch (h->nlmsg_type) {
134 case RTM_NEWLINK:
135
136 ifi = NLMSG_DATA(h);
137 len = (unsigned int)(h->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)));
138
139 /* loop over all attributes for the NEWLINK message */
140 for (attribute = IFLA_RTA(ifi); RTA_OK(attribute, len);
141 attribute = RTA_NEXT(attribute, len)) {
142 lwsl_cx_netlink(cx, "if attr %d",
143 (int)attribute->rta_type);
144 switch(attribute->rta_type) {
145 case IFLA_IFNAME:
146 lwsl_cx_netlink(cx, "NETLINK ifidx %d : %s",
147 ifi->ifi_index,
148 (char *)RTA_DATA(attribute));
149 break;
150 default:
151 break;
152 } /* switch */
153 } /* for loop */
154
155 lwsl_cx_netlink(cx, "NEWLINK ifi_index %d, flags 0x%x",
156 ifi->ifi_index, ifi->ifi_flags);
157
158 /*
159 * Despite "New"link this is actually telling us there
160 * is some change on the network interface IFF_ state
161 */
162
163 if (!(ifi->ifi_flags & IFF_UP)) {
164 /*
165 * Interface is down, so scrub all routes that
166 * applied to it
167 */
168 lwsl_cx_netlink(cx, "NEWLINK: ifdown %d",
169 ifi->ifi_index);
170 lws_pt_lock(pt, __func__);
171 _lws_route_table_ifdown(pt, ifi->ifi_index);
172 lws_pt_unlock(pt);
173 }
174 continue; /* ie, not break, no second half */
175
176 case RTM_NEWADDR:
177 case RTM_DELADDR:
178
179 ifam = (struct ifaddrmsg *)NLMSG_DATA(h);
180
181 robj.source_ads = 1;
182 robj.dest_len = ifam->ifa_prefixlen;
183 robj.if_idx = (int)ifam->ifa_index;
184 robj.scope = ifam->ifa_scope;
185 robj.ifa_flags = ifam->ifa_flags;
186 robj.dest.sa4.sin_family = ifam->ifa_family;
187
188 /* address attributes */
189 ra = (struct rtattr *)IFA_RTA(ifam);
190 ra_len = (unsigned int)IFA_PAYLOAD(h);
191
192 lwsl_cx_netlink(cx, "%s",
193 h->nlmsg_type == RTM_NEWADDR ?
194 "NEWADDR" : "DELADDR");
195
196 /*
197 * almost nothing interesting within IFA_* attributes:
198 * so skip it and goto to the second half
199 */
200 goto second_half;
201
202 case RTM_NEWROUTE:
203 case RTM_DELROUTE:
204
205 lwsl_cx_netlink(cx, "%s",
206 h->nlmsg_type == RTM_NEWROUTE ?
207 "NEWROUTE" : "DELROUTE");
208
209 /* route attributes */
210 ra = (struct rtattr *)RTM_RTA(rm);
211 ra_len = (unsigned int)RTM_PAYLOAD(h);
212 break;
213
214 case RTM_DELNEIGH:
215 case RTM_NEWNEIGH:
216 lwsl_cx_netlink(cx, "%s", h->nlmsg_type ==
217 RTM_NEWNEIGH ? "NEWNEIGH" :
218 "DELNEIGH");
219 #if !defined(LWS_WITH_NO_LOGS) && defined(_DEBUG)
220 nd = (struct ndmsg *)rm;
221 lwsl_cx_netlink(cx, "fam %u, ifidx %u, flags 0x%x",
222 nd->ndm_family, nd->ndm_ifindex,
223 nd->ndm_flags);
224 #endif
225 ra = (struct rtattr *)RTM_RTA(rm);
226 ra_len = (unsigned int)RTM_PAYLOAD(h);
227 for ( ; RTA_OK(ra, ra_len); ra = RTA_NEXT(ra, ra_len)) {
228 lwsl_cx_netlink(cx, "atr %d", ra->rta_type);
229 switch (ra->rta_type) {
230 case NDA_DST:
231 lwsl_cx_netlink(cx, "dst len %d",
232 ra->rta_len);
233 break;
234 }
235 }
236 lws_pt_lock(pt, __func__);
237 _lws_route_pt_close_unroutable(pt);
238 lws_pt_unlock(pt);
239 continue;
240
241 default:
242 lwsl_cx_netlink(cx, "*** Unknown RTM_%d",
243 h->nlmsg_type);
244 continue;
245 } /* switch */
246
247 robj.proto = rm->rtm_protocol;
248
249 // iterate over route attributes
250 for ( ; RTA_OK(ra, ra_len); ra = RTA_NEXT(ra, ra_len)) {
251 // lwsl_netlink("%s: atr %d\n", __func__, ra->rta_type);
252 switch (ra->rta_type) {
253 case RTA_PREFSRC: /* protocol ads: preferred src ads */
254 case RTA_SRC:
255 lws_sa46_copy_address(&robj.src, RTA_DATA(ra),
256 rm->rtm_family);
257 robj.src_len = rm->rtm_src_len;
258 lws_sa46_write_numeric_address(&robj.src, buf, sizeof(buf));
259 lwsl_cx_netlink(cx, "RTA_SRC: %s", buf);
260 break;
261 case RTA_DST:
262 /* check if is local addr -> considering it as src addr too */
263 if (rm->rtm_type == RTN_LOCAL &&
264 ((rm->rtm_family == AF_INET && rm->rtm_dst_len == 32) ||
265 (rm->rtm_family == AF_INET6 && rm->rtm_dst_len == 128))) {
266 lws_sa46_copy_address(&robj.src, RTA_DATA(ra),
267 rm->rtm_family);
268 lwsl_cx_netlink(cx, "Local addr: RTA_DST -> added to RTA_SRC");
269 }
270
271 lws_sa46_copy_address(&robj.dest, RTA_DATA(ra),
272 rm->rtm_family);
273 robj.dest_len = rm->rtm_dst_len;
274 lws_sa46_write_numeric_address(&robj.dest, buf, sizeof(buf));
275 lwsl_cx_netlink(cx, "RTA_DST: %s", buf);
276 break;
277 case RTA_GATEWAY:
278 lws_sa46_copy_address(&robj.gateway,
279 RTA_DATA(ra),
280 rm->rtm_family);
281 #if defined(LWS_WITH_SYS_SMD)
282 gateway_change = 1;
283 #endif
284 break;
285 case RTA_IIF: /* int: input interface index */
286 case RTA_OIF: /* int: output interface index */
287 robj.if_idx = *(int *)RTA_DATA(ra);
288 lwsl_cx_netlink(cx, "ifidx %d", robj.if_idx);
289 break;
290 case RTA_PRIORITY: /* int: priority of route */
291 p = RTA_DATA(ra);
292 robj.priority = p[3] << 24 | p[2] << 16 |
293 p[1] << 8 | p[0];
294 break;
295 case RTA_CACHEINFO: /* struct rta_cacheinfo */
296 break;
297 #if defined(LWS_HAVE_RTA_PREF)
298 case RTA_PREF: /* char: RFC4191 v6 router preference */
299 break;
300 #endif
301 case RTA_TABLE: /* int */
302 break;
303
304 default:
305 lwsl_cx_info(cx, "unknown attr type %d",
306 ra->rta_type);
307 break;
308 }
309 } /* for */
310
311 /*
312 * the second half, once all the attributes were collected
313 */
314 second_half:
315 switch (h->nlmsg_type) {
316
317 case RTM_DELROUTE:
318 /*
319 * This will also take down wsi marked as using it
320 */
321 lwsl_cx_netlink(cx, "DELROUTE: if_idx %d",
322 robj.if_idx);
323 lws_pt_lock(pt, __func__);
324 _lws_route_remove(pt, &robj, 0);
325 lws_pt_unlock(pt);
326 goto inform;
327
328 case RTM_NEWROUTE:
329
330 lwsl_cx_netlink(cx, "NEWROUTE rtm_type %d",
331 rm->rtm_type);
332
333 /*
334 * We don't want any routing debris like /32 or broadcast
335 * in our routing table... we will collect source addresses
336 * bound to interfaces via NEWADDR
337 */
338
339 if (rm->rtm_type != RTN_UNICAST &&
340 rm->rtm_type != RTN_LOCAL)
341 break;
342
343 if (rm->rtm_flags & RTM_F_CLONED)
344 break;
345
346 goto ana;
347
348 case RTM_DELADDR:
349 lwsl_cx_notice(cx, "DELADDR");
350 #if defined(_DEBUG)
351 _lws_routing_entry_dump(cx, &robj);
352 #endif
353 lws_pt_lock(pt, __func__);
354 _lws_route_remove(pt, &robj, LRR_MATCH_SRC | LRR_IGNORE_PRI);
355 _lws_route_pt_close_unroutable(pt);
356 lws_pt_unlock(pt);
357 break;
358
359 case RTM_NEWADDR:
360
361 lwsl_cx_netlink(cx, "NEWADDR");
362 ana:
363
364 /*
365 * Is robj a dupe in the routing table already?
366 *
367 * match on pri ignore == set pri and skip
368 * no match == add
369 */
370
371 lws_pt_lock(pt, __func__);
372
373 /* returns zero on match already in table */
374 rmat = _lws_route_remove(pt, &robj, h->nlmsg_type == RTM_NEWROUTE ?
375 LRR_MATCH_DST : LRR_MATCH_SRC | LRR_IGNORE_PRI);
376 lws_pt_unlock(pt);
377
378 if (rmat) {
379 rmat->priority = robj.priority;
380 break;
381 }
382
383 rou = lws_malloc(sizeof(*rou), __func__);
384 if (!rou) {
385 lwsl_cx_err(cx, "oom");
386 return LWS_HPI_RET_HANDLED;
387 }
388
389 *rou = robj;
390
391 lws_pt_lock(pt, __func__);
392
393 /*
394 * We lock the pt before getting the uidx, so it
395 * cannot race
396 */
397
398 rou->uidx = _lws_route_get_uidx(cx);
399 lws_dll2_add_tail(&rou->list, &cx->routing_table);
400 lwsl_cx_info(cx, "route list size %u",
401 cx->routing_table.count);
402
403 _lws_route_pt_close_unroutable(pt);
404
405 lws_pt_unlock(pt);
406
407 inform:
408 #if defined(_DEBUG)
409 route_change = 1;
410 #endif
411 #if defined(LWS_WITH_SYS_SMD)
412 /*
413 * Reflect the route add / del event using SMD.
414 * Participants interested can refer to the pt
415 * routing table
416 */
417 (void)lws_smd_msg_printf(cx, LWSSMDCL_NETWORK,
418 "{\"rt\":\"%s\"}\n",
419 (h->nlmsg_type == RTM_DELROUTE) ?
420 "del" : "add");
421 #endif
422
423 break;
424
425 default:
426 // lwsl_info("%s: unknown msg type %d\n", __func__,
427 // h->nlmsg_type);
428 break;
429 }
430 } /* message iterator */
431
432 #if defined(LWS_WITH_SYS_SMD)
433 if (gateway_change)
434 /*
435 * If a route with a gw was added or deleted, retrigger captive
436 * portal detection if we have that
437 */
438 (void)lws_smd_msg_printf(cx, LWSSMDCL_NETWORK,
439 "{\"trigger\": \"cpdcheck\", "
440 "\"src\":\"gw-change\"}");
441 #endif
442
443 #if defined(_DEBUG)
444 if (route_change) {
445 lws_context_lock(cx, __func__);
446 _lws_routing_table_dump(cx);
447 lws_context_unlock(cx);
448 }
449 #endif
450
451 if (!cx->nl_initial_done &&
452 pt == &cx->pt[0] &&
453 cx->routing_table.count) {
454 /*
455 * While netlink info still coming, keep moving the timer for
456 * calling it "done" to +100ms until after it stops coming
457 */
458 lws_context_lock(cx, __func__);
459 lws_sul_schedule(cx, 0, &cx->sul_nl_coldplug,
460 lws_netlink_coldplug_done_cb,
461 100 * LWS_US_PER_MS);
462 lws_context_unlock(cx);
463 }
464
465 return LWS_HPI_RET_HANDLED;
466 }
467
468 struct nl_req_s {
469 struct nlmsghdr hdr;
470 struct rtmsg gen;
471 };
472
473 int
rops_pt_init_destroy_netlink(struct lws_context *context, const struct lws_context_creation_info *info, struct lws_context_per_thread *pt, int destroy)474 rops_pt_init_destroy_netlink(struct lws_context *context,
475 const struct lws_context_creation_info *info,
476 struct lws_context_per_thread *pt, int destroy)
477 {
478 struct sockaddr_nl sanl;
479 struct nl_req_s req;
480 struct msghdr msg;
481 struct iovec iov;
482 struct lws *wsi;
483 int n, ret = 1;
484
485 if (destroy) {
486
487 /*
488 * pt netlink wsi closed + freed as part of pt's destroy
489 * wsi mass close, just need to take down the routing table
490 */
491 _lws_route_table_empty(pt);
492
493 return 0;
494 }
495
496 if (context->netlink)
497 return 0;
498
499 if (pt > &context->pt[0])
500 /* we can only have one netlink socket */
501 return 0;
502
503 lwsl_cx_info(context, "creating netlink skt");
504
505 /*
506 * We want a netlink socket per pt as well
507 */
508
509 lws_context_lock(context, __func__);
510 wsi = __lws_wsi_create_with_role(context, (int)(pt - &context->pt[0]),
511 &role_ops_netlink, NULL);
512 lws_context_unlock(context);
513 if (!wsi)
514 goto bail;
515
516 wsi->desc.sockfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
517 if (wsi->desc.sockfd == LWS_SOCK_INVALID) {
518 lwsl_cx_err(context, "unable to open netlink");
519 goto bail1;
520 }
521
522 lws_plat_set_nonblocking(wsi->desc.sockfd);
523
524 __lws_lc_tag(context, &context->lcg[LWSLCG_VHOST], &wsi->lc,
525 "netlink");
526
527 memset(&sanl, 0, sizeof(sanl));
528 sanl.nl_family = AF_NETLINK;
529 sanl.nl_pid = (uint32_t)getpid();
530 sanl.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_IFADDR
531 #if defined(LWS_WITH_IPV6)
532 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFADDR
533 #endif
534 ;
535
536 if (lws_fi(&context->fic, "netlink_bind") ||
537 bind(wsi->desc.sockfd, (struct sockaddr*)&sanl, sizeof(sanl)) < 0) {
538 lwsl_cx_warn(context, "netlink bind failed");
539 ret = 0; /* some systems deny access, just ignore */
540 goto bail2;
541 }
542
543 context->netlink = wsi;
544 if (lws_wsi_inject_to_loop(pt, wsi))
545 goto bail2;
546
547 /* if (lws_change_pollfd(wsi, 0, LWS_POLLIN)) {
548 lwsl_err("%s: pollfd in fail\n", __func__);
549 goto bail2;
550 }
551 */
552 /*
553 * Since we're starting the PT, ask to be sent all the existing routes.
554 *
555 * This requires CAP_ADMIN, or root... we do this early before dropping
556 * privs
557 */
558
559 memset(&sanl, 0, sizeof(sanl));
560 memset(&msg, 0, sizeof(msg));
561 memset(&req, 0, sizeof(req));
562
563 sanl.nl_family = AF_NETLINK;
564
565 req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req.gen));
566 req.hdr.nlmsg_type = RTM_GETROUTE;
567 req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
568 req.hdr.nlmsg_seq = 1;
569 req.hdr.nlmsg_pid = (uint32_t)getpid();
570 req.gen.rtm_family = AF_PACKET;
571 req.gen.rtm_table = RT_TABLE_DEFAULT;
572
573 iov.iov_base = &req;
574 iov.iov_len = req.hdr.nlmsg_len;
575 msg.msg_iov = &iov;
576 msg.msg_iovlen = 1;
577 msg.msg_name = &sanl;
578 msg.msg_namelen = sizeof(sanl);
579
580 n = (int)sendmsg(wsi->desc.sockfd, (struct msghdr *)&msg, 0);
581 if (n < 0) {
582 lwsl_cx_notice(context, "rt dump req failed... permissions? errno %d",
583 LWS_ERRNO);
584 }
585
586 /*
587 * Responses are going to come asynchronously, let's block moving
588 * off state IFACE_COLDPLUG until we have had them. This is important
589 * since if we don't hold there, when we do get the responses we may
590 * cull any ongoing connections as unroutable otherwise
591 */
592
593 lwsl_cx_debug(context, "starting netlink coldplug wait");
594
595 return 0;
596
597 bail2:
598 __lws_lc_untag(wsi->a.context, &wsi->lc);
599 compatible_close(wsi->desc.sockfd);
600 bail1:
601 lws_free(wsi);
602 bail:
603 return ret;
604 }
605
606 static const lws_rops_t rops_table_netlink[] = {
607 /* 1 */ { .pt_init_destroy = rops_pt_init_destroy_netlink },
608 /* 2 */ { .handle_POLLIN = rops_handle_POLLIN_netlink },
609 };
610
611 const struct lws_role_ops role_ops_netlink = {
612 /* role name */ "netlink",
613 /* alpn id */ NULL,
614
615 /* rops_table */ rops_table_netlink,
616 /* rops_idx */ {
617 /* LWS_ROPS_check_upgrades */
618 /* LWS_ROPS_pt_init_destroy */ 0x01,
619 /* LWS_ROPS_init_vhost */
620 /* LWS_ROPS_destroy_vhost */ 0x00,
621 /* LWS_ROPS_service_flag_pending */
622 /* LWS_ROPS_handle_POLLIN */ 0x02,
623 /* LWS_ROPS_handle_POLLOUT */
624 /* LWS_ROPS_perform_user_POLLOUT */ 0x00,
625 /* LWS_ROPS_callback_on_writable */
626 /* LWS_ROPS_tx_credit */ 0x00,
627 /* LWS_ROPS_write_role_protocol */
628 /* LWS_ROPS_encapsulation_parent */ 0x00,
629 /* LWS_ROPS_alpn_negotiated */
630 /* LWS_ROPS_close_via_role_protocol */ 0x00,
631 /* LWS_ROPS_close_role */
632 /* LWS_ROPS_close_kill_connection */ 0x00,
633 /* LWS_ROPS_destroy_role */
634 /* LWS_ROPS_adoption_bind */ 0x00,
635 /* LWS_ROPS_client_bind */
636 /* LWS_ROPS_issue_keepalive */ 0x00,
637 },
638
639 /* adoption_cb clnt, srv */ { 0, 0 },
640 /* rx_cb clnt, srv */ { 0, 0 },
641 /* writeable cb clnt, srv */ { 0, 0 },
642 /* close cb clnt, srv */ { 0, 0 },
643 /* protocol_bind_cb c,s */ { 0, 0 },
644 /* protocol_unbind_cb c,s */ { 0, 0 },
645 /* file_handle */ 0,
646 };
647