You don't need regular expressions for this, since your data is pretty straightforward. First, note that you can organize the data without even parsing the date, because you can use simple string comparison:
def proc_lines(lines):
cur_date = None
cur_temps = []
results = []
for line in lines:
parts = line.split()
date = "%s %s" % (parts[0], parts[1])
if date != cur_date:
if cur_temps:
#save current data
results.append((cur_date, cur_temps))
#reset state
cur_date = date
cur_temps = []
#add the line's temperature in fahrenheit, stripping out the 'F'
cur_temps.append(float(parts[3][:-1]))
#process the last line
if cur_temps:
results.append((cur_date, cur_temps))
return results
Now results
will be a list of (date, temperature)
tuples with an unparsed date:
>>> lines = """2013/09/30 11:23:01 Temperature 41.34F 5.19C
2013/09/30 11:23:01 Temperature 99.84F 37.69C
2013/09/30 11:23:01 Temperature 65.86F 18.81C
2013/09/30 11:25:02 Temperature 41.67F 5.38C
2013/09/30 11:25:02 Temperature 65.64F 18.69C
2013/09/30 11:25:02 Temperature 98.83F 37.12C""".split("\n")
>>> results = proc_lines(lines)
>>> results
[('2013/09/30 11:23:01', [41.340000000000003, 99.840000000000003,
65.859999999999999]),
('2013/09/30 11:25:02', [41.670000000000002, 65.640000000000001,
98.829999999999998])]
You can use datetime.datetime.strptime
to actually parse the date and process the date (subtracting the month as you asked):
>>> import datetime
>>> def proc_datestr(date):
dt = datetime.datetime.strptime(date, "%Y/%m/%d %H:%M:%S")
return "Date(%d, %d, %d, %d, %d, %d)" % (
dt.year, dt.month - 1, dt.day, dt.hour, dt.minute, dt.second)
>>> proc_datestr(results[0][0])
'Date(2013, 8, 30, 11, 23, 1)'
Note the format string "%Y/%m/%d %H:%M:%S"
which parses dates as detailed here. This lovely built-in function obviates the need for you to write your own regexp to deal with the date.
Then you just process the results & dump to json as follows:
>>> import json
>>> def proc_result(result):
date, temps = result
res = {'c': [{'v': proc_datestr(date)}]}
for temp in temps:
res['c'].append({'v': temp})
return json.dumps(res)
>>> proc_result(results[0])
'{"c": [{"v": "Date(2013, 8, 30, 11, 23, 1)"}, {"v": 41.340000000000003}, {"v": 99.840000000000003}, {"v": 65.859999999999999}]}'
>>> proc_result(results[1])
'{"c": [{"v": "Date(2013, 8, 30, 11, 25, 2)"}, {"v": 41.670000000000002}, {"v": 65.640000000000001}, {"v": 98.829999999999998}]}'