I have written a python script using regex to extrapolate a poorly formatted text file data table into an actual data table. I am currently printing the output using . C:\my file location\file.csv in the command prompt. Is there a better way to cause my script to run for an entire directory of files and programmatically output each to a .csv file?
Here is the code:
import re
data = open('C:\file.txt', 'rU')
output = [['DEALER CODE', 'DEALER NAME', 'DEALER PHONE', 'DATE', 'CHG DESCRIPTION', 'VIN', 'CURRENT', 'OVER 30-DAYS', 'OVER 60-DAYS', 'OVER 90-DAYS', 'OVER 120-DAYS', 'AGE']]
for line in data:
dealer_code = re.search(r'(\w\d\d\d\d\d )', line)
dealer_code_phone_search = re.search(r'(\d\d\d-\d\d\d-\d\d\d\d)', line)
if dealer_code and "/" not in line:
line = line.replace('\n','')
dealer_code_number = line.split()[0]
if dealer_code_phone_search:
dealer_code_phone = dealer_code_phone_search.group(0)
dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip().split(dealer_code_phone)[0].strip()
else:
dealer_code_phone = ''
dealer_name = ''.join(line.split(dealer_code_number)[1:]).strip()
elif "/" in line:
line = line.replace('\n','')
vin = re.search(r'(\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w)', line)
if vin:
date = line.split(vin.group(0))[0].split()[0]
descp = ' '.join(line.split(vin.group(0))[0].split()[1:])
rest = line.split(vin.group(0))[1].split()
vin = vin.group(0)
new_line = [dealer_code_number, dealer_name, dealer_code_phone, date, descp, vin]
for i in rest:
new_line.append(i)
output.append(new_line)
for line in output:
print(','.join(line))