Source code for bloxone.utils

#!/usr/local/bin/python3
# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
'''
------------------------------------------------------------------------

 Description:
    Simple utility functions for data type validation, domain handling,
    and data normalisationa specifically with the aim of supporting
    queries to TIDE and Dossier.

 Requirements:
   Python3 with re, ipaddress, requests 

 Author: Chris Marrison

 Date Last Updated: 20210714

 Todo:

 Copyright (c) 2018 Chris Marrison / Infoblox

 Redistribution and use in source and binary forms,
 with or without modification, are permitted provided
 that the following conditions are met:

 1. Redistributions of source code must retain the above copyright
 notice, this list of conditions and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright
 notice, this list of conditions and the following disclaimer in the
 documentation and/or other materials provided with the distribution.

 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.

------------------------------------------------------------------------
'''
__version__ = '0.0.3'
__author__ = 'Chris Marrison'
__author_email__ = 'chris@infoblox.com'

import logging
import os
import re
import ipaddress
import urllib.parse
import sqlite3

# ** Global Vars **


# ** Data Validation functions ** #

[docs]def data_type(qdata, host_regex, url_regex): ''' Validate and determine data type (host, ip or url) Parameters: qdata (str): data to determine type/validity host_regex/url_regex (re): pre-compiled regexes Returns: dtype (str): data type of qdata ("ip", "host", or "url") ''' if validate_ip(qdata): dtype = "ip" elif validate_url(qdata, url_regex): dtype = "url" elif validate_fqdn(qdata, host_regex): dtype = "host" else: dtype = "invalid" return dtype
[docs]def buildregex(): ''' Pre-compile 'standard' regexes as used by data_type and validate_XXX functions Parameters: none Returns: host_regex (re): Compiled regex for hostnames url_regex (re): Compiled regex for URLs ''' # Added _ for support of Microsoft domains host_regex = re.compile(r'(?!-)[A-Z\d\-\_]{1,63}(?<!-)$', re.IGNORECASE) url_regex = re.compile( # r'^(?:http|ftp)s?://' # http:// or https:// # http:// or https:// r'^(?:http)s?://' # domain... r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # localhost... r'localhost|' # ...or ipv4 r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|' # ...or ipv6 r'\[?[A-F0-9]*:[A-F0-9:]+\]?)' # optional port r'(?::\d+)?' r'(?:/?|[/?]\S+)$', re.IGNORECASE) return host_regex, url_regex
[docs]def validate_fqdn(hostname, regex): ''' Validate input data is a legitmate fqdn Parameters: hostname (str): fqdn as a string regex (obj): Compiled regex for hostnames Returns: bool: Return True for valid and False otherwise ''' if len(hostname) > 255 or len(hostname) < 1: result = False if hostname.endswith("."): # strip exactly one dot from the right, if present hostname = hostname[:-1] result = all(regex.match(x) for x in hostname.split(".")) return result
[docs]def validate_ip(ip): ''' Validate input data is a valid IP address Parameters: ip (str): ip address as a string Returns: bool: Return True for valid and False otherwise ''' try: ipaddress.ip_address(ip) result = True except ValueError: result = False return result
[docs]def validate_url(url, regex): ''' Validate input data is a valid URL Parameters: url (str): string to verify as URL regex (re): pre-compiled regex obj Returns: bool: Return True for valid and False otherwise ''' result = regex.match(url) return result
# ** Misc Functions **
[docs]def reverse_labels(domain): ''' Reserve order of domain labels (or any dot separated data, e.g. IP) Parameters: domain (str): domain.labels Returns: rdomain (str): labels.domain ''' rdomain = "" labels = domain.split(".") for l in reversed(labels): if rdomain: rdomain = rdomain+"."+l else: rdomain = l return rdomain
# ** Data Conversion functions ** #
[docs]def convert_url_to_host(url): ''' Break down URL and return host element Parameters: url (str): Validated URL Returns: host (str): hostname or ip ''' # Break down URL and return host element componants = url.split("/") host = componants[2] # Remove port if present if ":" in host: componants = host.split(":") host = componants[0] return host
[docs]def normalise(item, itype=None, port=False, www=False): ''' Take ip, host or url item process and return normalise data. Parameters: item (str): item to normalise itype (str): One of ["host", "url", "ip"] port (bool): stip port number e.g. :8080 www (bool): strip www. from hostname Returns: normalised (str): Normalised item or "invalid" ''' # Local varlables # Check for itype being set if not itype: # Build regexes for data_type checking host_regex, url_regex = buildregex() itype = data_type(item, host_regex, url_regex) if itype != "invalid": # Normalise item if itype == "url": # Parse URL parsed_url = urllib.parse.urlparse(item) # Strip unneeded elements (protocol, params, query, *port, *www) if port: item = parsed_url.hostname + parsed_url.path else: item = parsed_url.netloc + parsed_url.path # Remove www if required if www and item.startswith('www.'): item = item[4:] elif itype == "host": # Remove trailing dot if present if item.endswith("."): item = item[:-1] # Remove port if required if port and ":" in item: componants = item.split(":") item = componants[0] # Remove www if required if www and item.startswith('www.'): item = item[4:] # Ensure lower case item = item.lower() elif itype == "ip": # Remove port if required if port and ":" in item: componants = item.split(":") item = componants[0] normalised = item else: normalised = itype return normalised
[docs]def count_labels(fqdn): ''' Count number of labels in an FQDN Parameters: fqdn (str): Hostname as fqdn Returns: count (int): number of labels ''' labels = [] labels = fqdn.split(".") count = len(labels) return count
[docs]def strip_host(fqdn): ''' Take FQDN and strip first label or fqdn if no. of labels is 2 or less Parameters: fqdn (str): Hostname as fqdn Returns: domain (str): stripped domain down to two labels ''' labels = [] domain = "" labels = fqdn.split(".") if len(labels) <= 2: domain = fqdn else: domain = get_domain(fqdn, no_of_labels=(len(labels) - 1)) ''' Alternate code with get_domain host = labels.pop(0) for label in labels: domain += label domain += "." domain = domain.rstrip(".") ''' return domain
[docs]def get_domain(fqdn, no_of_labels=2): ''' Take FQDN and return n label domain or fqdn if no. of labels is 2 or less Parameters: fqdn (str): Hostname as fqdn no_of_labels (int): Number of labels to return default = 2 Returns: domain (str): N label domain name or fqdn ''' # Local variables labels = [] domain = "" count = 0 labels = fqdn.split(".") count = len(labels) if count <= no_of_labels: domain = fqdn else: for label in range((count - no_of_labels), (count)): domain += labels[label] domain += "." # Strip last "." domain = domain.rstrip(".") return domain
# ** Local db Functions **
[docs]def opendb(dbfile): ''' Open sqlite db and return cursor() Parameters: dbfile (str): path to file Returns: cursor (obj): Returns a db.cursor() ''' if os.path.isfile(dbfile): db = sqlite3.connect(dbfile) db.row_factory = sqlite3.Row cursor = db.cursor() else: logging.error("Database "+dbfile+" not found.") cursor = None return cursor
[docs]def get_table(cursor): ''' Determine db table and return Parameters: cursor (obj): db.cursor object Returns: table (str): Table name as string ''' # ** Determine table name ** # select = 'SELECT name FROM sqlite_master WHERE type="table"' cursor.execute(select) tables = cursor.fetchall() # ** Expecting single table ** # if len(tables) == 1: # ** Assume table is correct (even if it isn't) ** # table = tables[0][0] else: # ** Print error and exit ### logging.error("DB not of correct format - too many tables") table = None return table
[docs]def db_query(db_cursor, table, query_type, query_data, *flags): ''' Perform db query and return appropriate rows Parameters: db_cursor (obj): db.cursor object table (str): database table name query_type (str): data type for query query_data (str): search string Returns: rows (list): (All matching db rows ''' if query_type == "host": # ** Form DB Query ** select = ('SELECT * FROM ' + table + ' WHERE host="' + query_data + '" OR domain="' + query_data+'"') # Ignore class = "Policy" if flags: select = select+' AND property!="Policy_UnsolictedBulkEmail"' elif query_type == "ip": # ** Form DB Query ** select = 'SELECT * FROM '+table+' WHERE ip="'+query_data+'"' # Ignore class = "Policy" if flags: select = select+' AND property!="Policy_UnsolictedBulkEmail"' elif query_type == "url": # ** Form DB Query ** select = 'SELECT * FROM '+table+' WHERE url="'+query_data+'"' # Ignore class = "Policy" if flags: select = select+' AND property!="Policy_UnsolictedBulkEmail"' else: logging.error("Invalid Type for {} data type for {}" " - should not be here".format(query_type, query_data)) return None db_cursor.execute(select) rows = db_cursor.fetchall() return rows