카테고리 없음

AWS ALB Accesslog 파서(Python)

달사자! 2023. 1. 6. 14:50

AWS ALB의 Access log의 파서 프로그램입니다. 파이썬 버전이고 간혹 사용해서 메모 용도르 저장해둡니다.


import re

# 1) https://docs.aws.amazon.com/ko_kr/elasticloadbalancing/latest/application/load-balancer-access-logs.html
# 2) https://stackoverflow.com/questions/68875527/regex-for-python-based-logparser-for-printing-aws-elb-logs

fields = [ "type",
"classification_reason" ]

field = str(input("what is the field needed? "))
regex = r'([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^\s]+?)\" \"([^\s]+)\" \"([^ ]*)\" \"([^ ]*)\"'

def ParseLogFile(file):
    resultDict = {}

    with open(file, 'r') as log:
        line = log.readline()
        while line:
            line_split = re.split(regex, line)
            line_split = line_split[1:len(line_split) - 1]
            index = fields.index(field)
            val = line_split[index]
            resultDict.setdefault(val, 0)
            resultDict[val] += 1
            line = log.readline()
        return resultDict
if __name__ == '__main__':