Can a thread be created to keep the queue populated with lines from the source file, text.txt, as well as simultaneously write the results to the output file tagfile.csv when the tagreset value is equal to the size of the tagdict?
tagdict = {}
tagreset = 10
with open('text.txt') as f:
for line in f:
if (len(tagdict) == tagreset):
tagfile = open("tagfile.csv","a")
for key in tagdict:
tagstring = ':' + (int(tagdict[key])-1) + '"\n"' + key + '","' + tagdict[key]
tagfile.write(tagstring)
tagfile.close()
tagdict = {}
q.put(line)
def worker(queue):
mutex.acquire()
try:
queue_full = True
while queue_full:
try:
for match in re.finditer('\<tag\>(.*?)\<\/tag\>',line):
try:
tagdict[match.group(0)] = match.start()
except:
print "no title matches found"
except Queue.Empty:
queue_full = False
finally:
mutex.release()
thread_count = 5
for i in range(thread_count):
t = Thread(target=worker, args = (q,))
t.start()