diff --git a/src/config.c b/src/config.c index 23ad52841d7841e9590d1211c634e1c58a67ddbf..8256dfa7eb66314df3482c74600772c0d0583f6f 100644 --- a/src/config.c +++ b/src/config.c @@ -644,7 +644,7 @@ static void parse_global_section(struct uci_section *section) // Process the parsed options if (tb[GLOBAL_BUNDLE_FILE]) { - global_bundle.download_url = sm_new(1500000); + global_bundle.download_url = sm_new(1000000); if (global_bundle.download_url == NULL) { syslog(LOG_INFO, "Could not assign sm, return"); return; diff --git a/src/dns.c b/src/dns.c index 197fb669888be6d96b311de14d200d2384a61d83..f2e7f261fb31d3237282ecf1a6b73b3a0b2d0a9d 100644 --- a/src/dns.c +++ b/src/dns.c @@ -282,7 +282,7 @@ static void cleanup_answers(struct RES_RECORD answers[], int answer_cnt) // Function parses dns response. // @param data_p payload // @return nothing -void parse_dns_response(unsigned char *data_p) +void parse_dns_response(struct profile *pkt_profile, unsigned char *data_p) { size_t offset; size_t ipv4_addr = 0, i = 0, j = 0, cname_cnt = 0; @@ -349,7 +349,7 @@ void parse_dns_response(unsigned char *data_p) // Store qname and ip address corresponding to it in dns cache if ((ipv4_addr > 0) || (ipv6_addr > 0)) - store_qname_ipaddr_in_cache(dns, answers, ipv4_addr, url_cname, cname_cnt, ipv6_addr); + store_qname_ipaddr_in_cache(pkt_profile, dns, answers, ipv4_addr, url_cname, cname_cnt, ipv6_addr); // Clean up answers after processing cleanup_answers(answers, answer_cnt); diff --git a/src/dns_cache.c b/src/dns_cache.c index 65ab1bed4e523583034e74560a840fe776df63b7..ce150c6fa766b4f73db29fbee17d4396edde91c6 100644 --- a/src/dns_cache.c +++ b/src/dns_cache.c @@ -291,6 +291,35 @@ static void add_dns_cache_entry(struct DNS_HEADER *dns, struct RES_RECORD answer dns_cache_cnt++; } +static int update_dns_cache_from_hash_table(StrMap *hash_table, struct DNS_HEADER *dns, + struct RES_RECORD answers[MAX_DNS_ANS_RECORD], size_t ipv4_addr_cnt, + char url_cname[][MAX_URL_LENGTH], size_t cname_cnt, size_t ipv6_addr_cnt) +{ + if (!hash_table) + return 1; + + char tmp_buf[MAX_URL_LENGTH] = {0}; + + if (sm_get(hash_table, (char *)answers[0].name, tmp_buf, sizeof(tmp_buf)) != 0) { + // check if url is already present in the global_dns_cache + for (j = 0; j < dns_cache_cnt; j++) { + if (strstr((global_dns_cache + j)->url, (char *)answers[0].name) != NULL) { + // url is present in global cache, update entry + update_dns_cache_entry(dns, answers, ipv4_addr_cnt, url_cname, + cname_cnt, ipv6_addr_cnt, j); + return 0; + } + } + + // Create a new entry in the global_dns_cache + add_dns_cache_entry(dns, answers, ipv4_addr_cnt, url_cname, cname_cnt, ipv6_addr_cnt); + return 0; + } else { + // using the else because it seems better to be explicit + return 1; + } +} + static int update_dns_cache_from_url_list(struct list_head *lh, struct DNS_HEADER *dns, struct RES_RECORD answers[MAX_DNS_ANS_RECORD], size_t ipv4_addr_cnt, char url_cname[][MAX_URL_LENGTH], size_t cname_cnt, size_t ipv6_addr_cnt) @@ -305,9 +334,17 @@ static int update_dns_cache_from_url_list(struct list_head *lh, struct DNS_HEADE list_for_each_entry(url, lh, list) { // Check if url->url is a substring of payload_pkt - // if not, then try regex - // if it is, the just add the answer to dns cache - if (strstr((char *)answers[0].name, url->url) == NULL) { + // so if incoming request is for help.xyz.com + // and *.xyz.com is in our list, then add help.xyz.com + // to the cache + // TECH NOTE: if user specifies xyz.com, *.xyz.com will also be blocked + // the regex not seems a bit unnecessary because it is only used for wildcard + // matching for now, maybe we can add a * check before strstr to avoid + // unnecessary additions to the cache + // logic: + // if url starts with a * and then strstr succeeds, then don't try regex + // else try regex, if regex does not match, move to the next entry + if (url->url[0] != '*' || strstr((char *)answers[0].name, url->url) == NULL) { wildcard_to_regex(url->url, url_regex); int reti = regcomp(®ex, url_regex, REG_ICASE | REG_EXTENDED); if (reti) { @@ -323,6 +360,7 @@ static int update_dns_cache_from_url_list(struct list_head *lh, struct DNS_HEADE } } + // if we are here it means strstr or regex matched // check if url is already present in the global_dns_cache for (j = 0; j < dns_cache_cnt; j++) { if (strstr((global_dns_cache + j)->url, (char *)answers[0].name) != NULL) { @@ -336,10 +374,12 @@ static int update_dns_cache_from_url_list(struct list_head *lh, struct DNS_HEADE // Create a new entry in the global_dns_cache add_dns_cache_entry(dns, answers, ipv4_addr_cnt, url_cname, cname_cnt, ipv6_addr_cnt); - return 0; + + // TODO potential improvement: update strmap library to store list of values for a given key + // instead of a single value, then we can use strmap for dns cache too } - return 1; + return 0; } // Function stores the IP addresses of blacklisted URL's in dns cache. @@ -349,7 +389,7 @@ static int update_dns_cache_from_url_list(struct list_head *lh, struct DNS_HEADE // @param url_cname array with cname // @param cname_cnt number of cname per dns response. // @returns nothing -void store_qname_ipaddr_in_cache(struct DNS_HEADER *dns, +void store_qname_ipaddr_in_cache(struct profile *pkt_profile, struct DNS_HEADER *dns, struct RES_RECORD answers[MAX_DNS_ANS_RECORD], size_t ipv4_addr_cnt, char url_cname[][MAX_URL_LENGTH], size_t cname_cnt, size_t ipv6_addr_cnt) { @@ -357,36 +397,45 @@ void store_qname_ipaddr_in_cache(struct DNS_HEADER *dns, if (ntohs(dns->ans_count) <= 0) return; - struct profile *p; + struct profile *p = pkt_profile; int ret; - // Iterate over the global profile list - // so, we loop over all filters of all profiles - // to cover all possible urls - list_for_each_entry(p, &profiles, list) { - struct urlfilter *filter = NULL; - // TODO: should we add host check here? + // first check the global hash_table + ret = update_dns_cache_from_hash_table(global_bundle.download_url, + dns, answers, ipv4_addr_cnt, + url_cname, cname_cnt, ipv6_addr_cnt); - list_for_each_entry(filter, &p->filters, list) { - // Check if the filter is enabled - if (!filter->enable) { - continue; - } + if (ret == 0) + return; - if (filter->bundle) { - ret = update_dns_cache_from_url_list(&filter->bundle->custom_url, - dns, answers, ipv4_addr_cnt, - url_cname, cname_cnt, ipv6_addr_cnt); - if (ret == 0) - return; - //TODO update to include download_url - } + // we loop over all filters of this profile + struct urlfilter *filter = NULL; + + list_for_each_entry(filter, &p->filters, list) { + // Check if the filter is enabled + if (!filter->enable) { + continue; + } + + if (filter->bundle) { + ret = update_dns_cache_from_url_list(&filter->bundle->custom_url, + dns, answers, ipv4_addr_cnt, + url_cname, cname_cnt, ipv6_addr_cnt); + if (ret == 0) + return; + + ret = update_dns_cache_from_hash_table(filter->bundle->download_url, + dns, answers, ipv4_addr_cnt, + url_cname, cname_cnt, ipv6_addr_cnt); - ret = update_dns_cache_from_url_list(&filter->filter_text, - dns, answers, ipv4_addr_cnt, - url_cname, cname_cnt, ipv6_addr_cnt); if (ret == 0) return; } + + ret = update_dns_cache_from_url_list(&filter->filter_text, + dns, answers, ipv4_addr_cnt, + url_cname, cname_cnt, ipv6_addr_cnt); + if (ret == 0) + return; } } diff --git a/src/filter.c b/src/filter.c index e7e2497de5e2c6ad1fa8fb21017a44e2246b5a49..e982491a67c3711bcc38478aee0dc620a1277017 100644 --- a/src/filter.c +++ b/src/filter.c @@ -43,6 +43,24 @@ struct nfq_handle *nfq_h; struct nfq_q_handle *queue_h; struct uloop_fd uloop_fd; +static struct profile *get_profile_for_mac_address(const char *mac_addr) +{ + struct profile *p = NULL; + struct host *h = NULL; + + // Iterate over the global profile list + list_for_each_entry(p, &profiles, list) { + // Iterate over the hosts (MAC addresses) associated with this profile + list_for_each_entry(h, &p->hosts, list) { + if (strncasecmp(mac_addr, h->host_identifier, len) == 0) { + return p; + } + } + } + + return NULL; +} + // Checks if the packet needs to be accepted or not based on mac address // and url. // @param payload packet payload @@ -78,6 +96,11 @@ static int handle_packet_filtering(struct nfq_data *payload) return PKT_ACCEPT; } + struct profile *pkt_profile = get_profile_for_mac_address(mac_addr); + + if (pkt_profile == NULL) + return PKT_ACCEPT; + ret = nfq_get_payload(payload, &data); if (ret <= 0) { syslog(LOG_ERR, "%s:%u-->No payload found", __func__, __LINE__); @@ -89,7 +112,7 @@ static int handle_packet_filtering(struct nfq_data *payload) // Get the payload offset. int payload_offset; - payload_offset = get_ip_packet_payload(data, &proto, data_len, &is_ipv6); + payload_offset = get_ip_packet_payload(pkt_profile, data, &proto, data_len, &is_ipv6); if (payload_offset <= 0) { // Just accept the packet and return since the packet either does not match // urlfilter criteria or is a dns response. @@ -109,8 +132,8 @@ static int handle_packet_filtering(struct nfq_data *payload) return PKT_ACCEPT; } - // Check if the URL has been blacklisted in any profile. - ret = profile_based_url_filtering(payload_pkt, mac_addr, sizeof(mac_addr), is_https_packet, is_ipv6); + // Check if the URL has been blacklisted in this profile. + ret = profile_based_url_filtering(pkt_profile, payload_pkt, mac_addr, sizeof(mac_addr), is_https_packet, is_ipv6); return ret; } diff --git a/src/filter_utils.c b/src/filter_utils.c index c9e820e4eee9ce6f6fdc875df68f1d26ffb3255b..cb6dc54c47263aa4f4150d9d83401d03523cd391 100644 --- a/src/filter_utils.c +++ b/src/filter_utils.c @@ -95,7 +95,7 @@ static char *check_https_packet(unsigned char *data, char **payload_pkt, bool is return NULL; } -static int handle_udp_packet(const unsigned char *data_p, size_t offset, size_t len) +static int handle_udp_packet(struct profile *pkt_profile, const unsigned char *data_p, size_t offset, size_t len) { const struct udphdr *udp_header = (const struct udphdr *)(data_p + offset); @@ -117,14 +117,14 @@ static int handle_udp_packet(const unsigned char *data_p, size_t offset, size_t syslog(LOG_ERR, "%s:%u--> payload is null", __func__, __LINE__); return -1; } - parse_dns_response(dns_payload); + parse_dns_response(pkt_profile, dns_payload); return 0; } return (int)(offset + sizeof(struct udphdr) + sizeof(struct dnshdr)); } -static int handle_tcp_packet(const unsigned char *data_p, size_t offset, size_t len) +static int handle_tcp_packet(struct profile *pkt_profile, const unsigned char *data_p, size_t offset, size_t len) { const struct tcphdr *tcp_header = (const struct tcphdr *)(data_p + offset); size_t tcp_header_length = tcp_header->doff * 4; @@ -143,7 +143,7 @@ static int handle_tcp_packet(const unsigned char *data_p, size_t offset, size_t return -1; } - parse_dns_response(dns_payload); + parse_dns_response(pkt_profile, dns_payload); return 0; } else if (ntohs(tcp_header->dest) == DNS_PORT) { return 0; @@ -152,7 +152,8 @@ static int handle_tcp_packet(const unsigned char *data_p, size_t offset, size_t return (int)(offset + tcp_header_length); } -static int process_ipv4_packet(const unsigned char *data_p, uint8_t *proto, size_t len, bool *is_ipv6) +static int process_ipv4_packet(struct profile *pkt_profile, const unsigned char *data_p, + uint8_t *proto, size_t len, bool *is_ipv6) { const struct iphdr *iph = (const struct iphdr *)data_p; size_t ip_header_length = iph->ihl * 4; @@ -160,16 +161,17 @@ static int process_ipv4_packet(const unsigned char *data_p, uint8_t *proto, size *is_ipv6 = false; if (*proto == IPPROTO_UDP) { - return handle_udp_packet(data_p, ip_header_length, len); + return handle_udp_packet(pkt_profile, data_p, ip_header_length, len); } else if (*proto == IPPROTO_TCP) { - return handle_tcp_packet(data_p, ip_header_length, len); + return handle_tcp_packet(pkt_profile, data_p, ip_header_length, len); } else { syslog(LOG_ERR, "Unknown protocol"); return -1; } } -static int process_ipv6_packet(const unsigned char *data_p, uint8_t *proto, size_t len, bool *is_ipv6) +static int process_ipv6_packet(struct profile *pkt_profile, const unsigned char *data_p, + uint8_t *proto, size_t len, bool *is_ipv6) { const struct ip6_hdr *ip6h = (const struct ip6_hdr *)data_p; @@ -181,24 +183,25 @@ static int process_ipv6_packet(const unsigned char *data_p, uint8_t *proto, size size_t payload_offset = ipv6_header_get_L4_header_offset(ip6h, proto); if (*proto == IPPROTO_UDP) { - return handle_udp_packet(data_p, payload_offset, len); + return handle_udp_packet(pkt_profile, data_p, payload_offset, len); } else if (*proto == IPPROTO_TCP) { - return handle_tcp_packet(data_p, payload_offset, len); + return handle_tcp_packet(pkt_profile, data_p, payload_offset, len); } else { syslog(LOG_ERR, "Unknown protocol"); return -1; } } -int get_ip_packet_payload(unsigned char *data_p, uint8_t *proto, size_t len, bool *is_ipv6) +int get_ip_packet_payload(struct profile *pkt_profile, unsigned char *data_p, + uint8_t *proto, size_t len, bool *is_ipv6) { const struct iphdr *iph = (const struct iphdr *)data_p; bool ipv4 = (iph->version == 4); if (ipv4) { - return process_ipv4_packet(data_p, proto, len, is_ipv6); + return process_ipv4_packet(pkt_profile, data_p, proto, len, is_ipv6); } else { - return process_ipv6_packet(data_p, proto, len, is_ipv6); + return process_ipv6_packet(pkt_profile, data_p, proto, len, is_ipv6); } } @@ -251,7 +254,23 @@ int get_payload_pkt(int proto, unsigned char *data, int payload_offset, return URL_SUCCESS; } -static int match_url_list(struct list_head *url_list, char *payload_pkt) +static int match_url_from_hash_table(StrMap *hash_table, char *payload_pkt) +{ + if (!hash_table || !payload_pkt || *payload_pkt == '\0') + return CHECK_CONTINUE; + + char result[MAX_URL_LENGTH] = {0}; + if (sm_get(hash_table, payload_pkt, result, MAX_URL_LENGTH) == 0) { + return CHECK_CONTINUE; + } else { + // todo: we can probably use result as match_cnt + // increment it and re-insert it again + // if we are very inclined to keep stats for hash_table + return PKT_DROP; + } +} + +static int match_url_from_list(struct list_head *url_list, char *payload_pkt) { struct url_list *url = NULL; regex_t regex; @@ -291,12 +310,12 @@ static int match_url_list(struct list_head *url_list, char *payload_pkt) // @param payload_pkt packet payload // @param match_profile profile store which has the requested mac address. // @return PKT_ACCEPT or PKT DROP on success else -1. -static int match_url_from_filter(char *payload_pkt, struct urlfilter *filter) +static int match_url_from_filter_section(char *payload_pkt, struct urlfilter *filter) { int result; - if (!filter) { - syslog(LOG_INFO, "filter or bundle is null."); + if (!filter || !payload_pkt || *payload_pkt == '\0') { + syslog(LOG_INFO, "filter/bundle or payload_pkt is null."); return CHECK_CONTINUE; } @@ -306,18 +325,23 @@ static int match_url_from_filter(char *payload_pkt, struct urlfilter *filter) } // Check URLs in filter_text list - result = match_url_list(&filter->filter_text, payload_pkt); + result = match_url_from_list(&filter->filter_text, payload_pkt); if (result != CHECK_CONTINUE) { return result; } // Check URLs from the urlbundle if (filter->bundle) { - result = match_url_list(&filter->bundle->custom_url, payload_pkt); + result = match_url_from_list(&filter->bundle->custom_url, payload_pkt); + if (result != CHECK_CONTINUE) { + return result; + } + + // download_url is already a pointer, so pass it as it is + result = match_url_from_hash_table(filter->bundle->download_url, payload_pkt); if (result != CHECK_CONTINUE) { return result; } - // TODO: add logic to handle new download_url } return CHECK_CONTINUE; @@ -446,6 +470,10 @@ static int time_based_url_filtering(struct schedule *schedule) static int mac_based_url_filtering(char *payload_pkt, struct urlfilter *filter, bool is_https_packet, bool is_ipv6) { + // we don't know whats happening, this is not supposed to be null, just ACCEPT + if (!payload_pkt || *payload_pkt == '\0') + return PKT_ACCEPT; + int ret; char *url_pkt = NULL; @@ -455,16 +483,61 @@ static int mac_based_url_filtering(char *payload_pkt, struct urlfilter *filter, url_pkt = match_ip_address_from_dns_cache(payload_pkt, is_ipv6); if (url_pkt == NULL) return PKT_ACCEPT; - ret = match_url_from_filter(url_pkt, filter); + + // check global hash_table first + ret = match_url_from_hash_table(url_pkt, global_bundle.download_url); + + if (ret == CHECK_CONTINUE) + ret = match_url_from_filter_section(url_pkt, filter); } else { - ret = match_url_from_filter(payload_pkt, filter); + // http packet, inpect the host in the request to pass verdict + // check global hash_table first + // Extract the Host header + char *host_header = strstr(payload_pkt, "\r\nHost: "); + if (host_header == NULL) { + syslog(LOG_INFO, "%s:%u No Host header found: %s\n", __func__, __LINE__, payload_pkt); + // TODO maybe we should fall back to matching ip of the packet instead, like https + return PKT_ACCEPT; + } + + host_header += strlen("\r\nHost: "); + + // first check for colon + char *host_end = strchr(host_header, ':'); + if (host_end == NULL) { + // no colo, try end of line + host_end = strstr(host_header, "\r\n"); + if (host_end == NULL) { + syslog(LOG_INFO, "%s:%u Malformed Host header: %s\n", __func__, __LINE__, host_header); + return PKT_ACCEPT; + } + } + + *host_end = '\0'; + + syslog(LOG_INFO, "%s:%u Extracted HTTP Host: %s\n", __func__, __LINE__, host_header); + + ret = match_url_from_hash_table(host_header, global_bundle.download_url); if (ret == CHECK_CONTINUE) { // Check if the url is present in cname list of dns cache. // If yes, then find the url name corresponding to the cname and check // if that is present in the matched profile. If yes, then drop the packet. - char *cname = match_cname_from_dns_cache(payload_pkt); + char *cname = match_cname_from_dns_cache(host_header); if (cname != NULL) - ret = match_url_from_filter(cname, filter); + ret = match_url_from_hash_table(cname, global_bundle.download_url); + } + + // nothing matched in global hash_table, check the filter_section + if (ret == CHECK_CONTINUE) + ret = match_url_from_filter_section(host_header, filter); + + if (ret == CHECK_CONTINUE) { + // Check if the url is present in cname list of dns cache. + // If yes, then find the url name corresponding to the cname and check + // if that is present in the matched profile. If yes, then drop the packet. + char *cname = match_cname_from_dns_cache(host_header); + if (cname != NULL) + ret = match_url_from_filter_section(cname, filter); } } @@ -476,67 +549,53 @@ static int mac_based_url_filtering(char *payload_pkt, struct urlfilter *filter, // @param mac_addr mac address to be searched in the profile // @param len length of mac_addr // @return on success PKT_ACCEPT or PKT_DROP else -1. -int profile_based_url_filtering(char *payload_pkt, char *mac_addr, +int profile_based_url_filtering(struct profile *pkt_profile, char *payload_pkt, char *mac_addr, size_t len, bool is_https_packet, bool is_ipv6) { int ret; - struct profile *p; + struct profile *p = pkt_profile; struct urlfilter *filter; struct host *h; - bool host_found = false; - - // Iterate over the global profile list - list_for_each_entry(p, &profiles, list) { - // Iterate over the hosts (MAC addresses) associated with this profile - list_for_each_entry(h, &p->hosts, list) { - if (strncasecmp(mac_addr, h->host_identifier, len) == 0) { - host_found = true; - break; - } - } - if (host_found) { - // Iterate over the filters associated with the profile - list_for_each_entry(filter, &p->filters, list) { - // Check if the filter is enabled - if (!filter->enable) { - continue; - } - - // Check if the filter has an active schedule first - struct schedule *schedule = NULL; - bool schedule_active = false; + // Iterate over the filters associated with the profile + list_for_each_entry(filter, &p->filters, list) { + // Check if the filter is enabled + if (!filter->enable) { + continue; + } - // no schedule means filter is active at all times - if (list_empty(&filter->schedules)) { - // The list is empty, no elements + // Check if the filter has an active schedule first + struct schedule *schedule = NULL; + bool schedule_active = false; + + // no schedule means filter is active at all times + if (list_empty(&filter->schedules)) { + // The list is empty, no elements + schedule_active = true; + } else { + // The list contains one or more elements + list_for_each_entry(schedule, &filter->schedules, list) { + // Perform time-based filtering first + if (!time_based_url_filtering(schedule)) { schedule_active = true; - } else { - // The list contains one or more elements - list_for_each_entry(schedule, &filter->schedules, list) { - // Perform time-based filtering first - if (!time_based_url_filtering(schedule)) { - schedule_active = true; - break; - } - } + break; } + } + } - // If no schedule is active, skip URL filtering - if (!schedule_active) { - continue; - } + // If no schedule is active, skip URL filtering + if (!schedule_active) { + continue; + } - // Perform URL filtering if the schedule is active - ret = mac_based_url_filtering(payload_pkt, filter, is_https_packet, is_ipv6); + // Perform URL filtering if the schedule is active + ret = mac_based_url_filtering(payload_pkt, filter, is_https_packet, is_ipv6); - // If URL filtering results in a drop, return the result - if (ret == PKT_DROP) { - return ret; - } - // else continue on to the next filter - } + // If URL filtering results in a drop, return the result + if (ret == PKT_DROP) { + return ret; } + // else continue on to the next filter } return PKT_ACCEPT; diff --git a/src/utils.c b/src/utils.c index 8ea13d7488dd7f5113fdc53f866dabffafbd98fe..75a5f809e02215bb11362b74b80a02a122c514a3 100644 --- a/src/utils.c +++ b/src/utils.c @@ -172,8 +172,8 @@ void print_global(struct blob_buf *b) syslog(LOG_INFO, "%s:%u-->google.com not found", __func__, __LINE__); } else { syslog(LOG_INFO, "%s:%u-->google.com found", __func__, __LINE__); - } + clock_gettime(CLOCK_MONOTONIC, &end_time); elapsed_time_ns = calculate_elapsed_time(&start_time, &end_time); snprintf(buf, 256, "(%lld ms) for %u records\n", elapsed_time_ns / 1000000, map_size);