As the subject suggests:
- Loop recursively through folders
- All files are XLSX
- Remove all non ASCII characters
- Save as a CSV
Can it be improved with regards to speed? Would using openpyxl
help?
import os
import xlrd
import csv
for subdir, dirs, files in os.walk("C:\Users\Alan\Downloads\Knowledge"):
for file in files:
filepath = subdir + os.sep + file
wb = xlrd.open_workbook(filepath)
sh = wb.sheet_by_index(0)
csv_file = open(filepath.replace(".xlsx","")+'_csv.csv','wb')
wr = csv.writer(csv_file,quoting=csv.QUOTE_ALL)
for rownum in xrange(sh.nrows):
wr.writerow([unicode(val).encode('ascii','ignore') for val in sh.row_values(rownum)])
csv_file.close()