#! /usr/bin/python from os import path from dateutil import parser import datetime import json import re import hashlib import paho.mqtt.publish as publish from watchfiles import Change, watch import config # config stuff LOG_DIR = config.LOG_DIR BROKER_HOST = config.BROKER_HOST BROKER_PORT = config.BROKER_PORT BROKER_ACCOUNT = config.BROKER_ACCOUNT BROKER_PASSWORD = config.BROKER_PASSWORD BROKER_TOPIC = config.BROKER_TOPIC last_parse = datetime.datetime.now() def filter_logs(change: Change, path: str) -> bool: return path.endswith("access.log") def parse_nginx_log(log): lineformat = re.compile( r"""(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))) - - \[(?P\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST|HEAD|PUT|DELETE) )(?P.+)(http\/(1\.1|2\.0)")) (?P\d{3}) (?P\d+) (?P-|"([^"]+)") (["](?P[^"]+)["])""", re.IGNORECASE, ) visits = () global last_parse try: logfile = open(log) except Exception as e: print(f"Error while trying to open the logfile: {e}") for line in logfile.readlines(): data = re.search(lineformat, line) if data: datadict = data.groupdict() ip = datadict["ipaddress"] datetimestring = datadict["dateandtime"] date = parser.parse(datetimestring, fuzzy=True, ignoretz=True) if last_parse > date: # if a visitors is from the past (before the script launch or already processed), ignore it continue if ip in visits: # ignore IP adresses that are already seen continue # add the IP address to a tuple visits = (*visits, ip) # save the parsing datetime for later last_parse = datetime.datetime.now() return visits def to_color(ip_address): # converts an IP (or any string) to a color code (hex value) hash = hashlib.shake_256(ip_address.encode(), usedforsecurity=False).digest(3) # concatenate the 3 hex shit into one ('0x87', '0xc3', '0xd2' to '0x87c3d2') # #cursedCode # return hex(((hash[0] << 8) | hash[1]) << 8 | hash[2]) return (int(hash[0]), int(hash[1]), int(hash[2])) def to_mqtt(payload): try: publish.single( topic=f"{BROKER_TOPIC}/visits", payload=json.dumps({"color": payload}), qos=0, hostname=BROKER_HOST, port=BROKER_PORT, auth={"username": BROKER_ACCOUNT, "password": BROKER_PASSWORD}, client_id="bisitariak", ) except Exception as e: print(f"Error while trying to publish on the broker: {e}") ### actual script: for changes in watch(LOG_DIR, watch_filter=filter_logs): for log in changes: logfile = log[1] for ip in parse_nginx_log(logfile): color = to_color(ip) to_mqtt(color) print(f"published color '{color}' for IP '{ip}'")