I've been working on a project (link) to download a spreadsheet of known ransomware and properties and turn it into json so I can better consume the information within early detection projects.
I'm new to python - what can I be doing better? The destination json I'm converting can be found here.
update_json.py (entry point)
from excel_to_json import excel_to_json
from download_file import download_file
SOURCESHEET = 'https://docs.google.com/spreadsheets/d/1TWS238xacAto-fLKh1n5uTsdijWdCEsGIM0Y0Hvmc5g/pub?output=xlsx'
OUTPUTSHEET = '../RansomwareOverview.xlsx'
JSONFILE = '../ransomware_overview.json'
def write_json_file(json_data, filename):
output = open(filename, 'w')
output.writelines(json_data)
def generate_json(source_file, download_destination, json_file):
download_file(source_file, download_destination)
write_json_file(excel_to_json(download_destination), json_file)
generate_json(SOURCESHEET, OUTPUTSHEET, JSONFILE)
download_file.py
import urllib.request
def download_file(source, destination):
try:
urllib.request.urlretrieve(source, destination)
except IOError:
print('An error occured trying to write an updated spreadsheet. Do you already have it open?')
except urllib.error.URLError:
print('An error occured trying to download the file. Please check the source and try again.')
excel_to_json.py
import simplejson as json
import xlrd
from collections import OrderedDict
def excel_to_json(filename):
wb = xlrd.open_workbook(filename)
sh = wb.sheet_by_index(0)
mw = wb.sheet_by_index(2)
# List to hold dictionaries
c_list = []
# Iterate through each row in worksheet and fetch values into dict
for rownum in range(1, sh.nrows):
wares = OrderedDict()
row_values = sh.row_values(rownum)
if row_values[6]=="":
name = row_values[0]
gre=[name]
elif "," in row_values[6]:
e=row_values[6].split(",")
ge = [row_values[0]]
gre=e+ge
else:
gre=[row_values[0],row_values[6]]
wares['name'] = gre
wares['extensions'] = row_values[1]
wares['extensionPattern'] = row_values[2]
wares['ransomNoteFilenames'] = row_values[3]
wares['comment'] = row_values[4]
wares['encryptionAlgorithm'] = row_values[5]
wares['decryptor'] = row_values[7]
if row_values[8]=="":
wares['resources'] = [row_values[9]]
elif row_values[9]=="":
wares['resources']=[row_values[8]]
else:
wares['resources'] = [row_values[8], row_values[9]]
wares['screenshots'] = row_values[10]
for r in range(1, mw.nrows):
rowe = mw.row_values(r)
if row_values[0] == rowe[0]:
wares['microsoftDetectionName']=rowe[1]
wares['microsoftInfo'] = rowe[2]
wares['sandbox'] = rowe[3]
wares['iocs'] = rowe[4]
wares['snort'] = rowe[5]
c_list.append(wares)
# Serialize the list of dicts to JSON
return json.dumps(c_list, indent=4)