I would love some thoughts on this class that a wrote to extract user information from a logfile line.
I'm concerned about design patterns and the complexity of the code. The extract_user method feels especially awkward.
The class should be used as a public interface ala:
parser = UserExtraction(s)
data = parser.extract_user()
Now, the majority of the strings that I pass in will simply not be authentication strings. How do I make this available as some sort of response code / error-message to an end user? Right now I'm using self.error_message = yadayada. But I have a feeling this is not very pythonic.
I didn't write any documentation yet since I'm very uncertain about the general design.
Every detail is appreciated, I'm trying to get better at this.
import re
import logging
import os
import ConfigParser
class UserExtraction():
def __init__(self, string):
self.string = string
def _string_identification(self):
identifiers = {
'Successfully logged in' : 'login',
'logout' : 'logout',
}
regex_patterns = {
#TODO Store regexpatterns in outside file
"login" : "(\w{3}.+?\d{1,2} \d{2}:\d{2}:\d{2}).+?Authentication: \[([0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}) ## (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\] (.+?) - Successfully .+? Role: (.+?),",
"logout" : "(\w{3}.+?\d{1,2} \d{2}:\d{2}:\d{2}).+?Authentication: Unable to ping (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}), going to logout user (.+)"
}
for key, value in identifiers.iteritems():
if key in self.string:
self.event_type = value
self.regex = regex_patterns[value]
return True
return False
def _matching(self):
self.matches = re.search(self.regex, self.string)
if self.matches:
return True
else:
return False
def _match_to_dict(self):
matches = self.matches
if self.event_type == 'login':
self.userinfo = {
'date_time' : matches.group(1),
'mac_addr' : matches.group(2),
'ip_addr' : matches.group(3),
'login' : matches.group(4),
'role' : matches.group(5)
}
elif self.event_type == 'logout':
self.userinfo = {
'date_time' : matches.group(1),
'ip_addr' : matches.group(2),
'login' : matches.group(3)
}
if self.userinfo:
return True
def extract_user(self):
if self._string_identification() == True:
if self._matching() == True:
if self._match_to_dict() == True:
return self.userinfo
else:
self.error_message = "COULD NOT PARSE USERINFO INTO DICTIONARY"
else:
self.error_message = "COULD NOT MATCH AUTHENTICATION STRING"
else:
self.error_message = "NOT AN AUTHENTICATION STRING"
if __name__ == "__main__":
ROOT_PATH = os.path.dirname(__file__)
config = ConfigParser.ConfigParser()
config.readfp(open(os.path.join(ROOT_PATH + '/settings.cfg')))
filename = config.get('defaults', 'file_location')
LOG_FILENAME = config.get('defaults', 'log_location')
LOG_FORMAT = '%(asctime)s - %(levelname)s %(message)s'
LOG_LEVEL = config.get('defaults', 'logging_level')
#TODO Change datefmt to be consistent with Apache logfile
logging.basicConfig(filename=LOG_FILENAME, format=LOG_FORMAT, datefmt='%m/%d %I:%M:%S', level=eval(LOG_LEVEL))
sample_strings = []
sample_strings.append('''Jan 2 15:32:49 cam-1/10.0.0.1 Authentication: [01:26:b2:F8:39:27 ## 172.16.197.23] Anonymous - Successfully logged in, Provider: anon, L2 MAC address: 00:26:B2:F2:39:87, Role: User, OS: Mac OS 10.6''')
for s in sample_strings:
Processor = UserExtraction(s)
Processor.extract_user()
if hasattr(Processor, 'userinfo'):
print(Processor.userinfo)
else:
print(Processor.error_message)