I would like to refactor a large python method I wrote to have better practices. I wrote a method that parses a design file from the FSL neuroimaging library. Design files are text files with settings for the neuroimaging software. I initially wrote the code to do all the processing in a single loop though the file. I'm looking for all manner of suggestions, but mostly tips of design and best practices. Code section to follow, but you can also view the code on github along with the larger project here. I'd love input on the project as a whole, but I know that might be out of scope for this forum. Thanks for the help! Code structure:
- Define variables based on type of design file
- Parse design file based on type of design file
- Return the now populated variables setup in 1
.
def parse_design_file(self,fsf_lines, type):
"""
Parses design file information and return information in parsed variables that can be used by the csv methods
"""
analysis_name=''
output_path=''
zvalue=''
pvalue=''
if type == self.FIRST_TYPE or self.PRE_TYPE:
in_file=''
if type == self.FIRST_TYPE:
ev_convolves=dict()
ev_paths=dict()
ev_deriv=dict()
ev_temp=dict()
if type == self.ME_TYPE or type == self.FE_TYPE:
feat_paths=dict()
count=''
if type == self.FIRST_TYPE or type == self.FE_TYPE:
ev_names=dict()
evg_lines=list()
cope_names=dict()
cope_def_lines=list()
if type == self.PRE_TYPE:
tr=''
total_volumes=''
brain_thresh=''
motion_correction=''
smoothing=''
deleted=''
if type == self.FE_TYPE:
first_example_dir=''
if type == self.ME_TYPE:
FE_example_dir=''
for line in fsf_lines:
#regex matching
#all
output_match=re.search("set fmri\(outputdir\)",line)
feat_file_match=re.search("feat_files\(\d+\)",line)
total_vols_match=re.search("fmri\(npts\)", line)
z_match=re.search("set fmri\(z_thresh\)",line)
p_match=re.search("set fmri\(prob_thresh\)",line)
if output_match:
output_path=self.get_fsf_value(line,output_match.end())
#TODO hardcoded stripping here, make flexible
if type == self.ME_TYPE:
run=re.search("ME",line)
elif type == self.FIRST_TYPE or type == self.PRE_TYPE:
run=re.search("r\d\/",line)
elif type == self.FE_TYPE:
run=re.search("FE\d*",line)
if run:
analysis_name=self.get_fsf_value(line,run.end())
if type == self.ME_TYPE:
analysis_name=self.strip_cope(analysis_name)
if total_vols_match:
value=self.get_fsf_value(line,total_vols_match.end())
if type == self.ME_TYPE or type == self.FE_TYPE:
count=value
if z_match:
value=self.get_fsf_value(line,z_match.end())
zvalue=value
if p_match:
value=self.get_fsf_value(line,p_match.end())
pvalue=value
if feat_file_match:
if type == self.FIRST_TYPE or type == self.PRE_TYPE:
value=self.get_fsf_value(line,feat_file_match.end())
in_file=self.strip_root(value)
preproc_match=re.search("preproc.*feat",value)
#TODO inconsistent methodology here
if preproc_match:
self.preproc=value[preproc_match.start():preproc_match.end()]
print self.preproc
elif type == self.ME_TYPE or type == self.FE_TYPE:
value=self.get_fsf_value(line,feat_file_match.end())
index=self.get_fsf_indice(feat_file_match.group())
stripped=self.strip_fanal(line)
feat_paths[index]=stripped
if (type == self.ME_TYPE and not FE_example_dir) or (type == self.FE_TYPE and not first_example_dir):
set_match=re.search("set feat_files\(\d+\) \"",line)
no_cope=line[set_match.end():len(line)]
no_cope=no_cope.strip('\n')
no_cope=no_cope.strip('\"')
no_cope=self.strip_cope(no_cope)
if type == self.ME_TYPE:
FE_example_dir=no_cope
else:
first_example_dir=no_cope
if type == self.PRE_TYPE:
tr_match=re.search("fmri\(tr\)", line)
mc_match=re.search("set fmri\(mc\)",line)
smooth_match=re.search("set fmri\(smooth\)",line)
total_vols_match=re.search("fmri\(npts\)", line)
removed_volumes=re.search("fmri\(ndelete\)", line)
thresh_match=re.search("set fmri\(brain_thresh\)",line)
if tr_match:
tr=self.get_fsf_value(line,tr_match.end())
if mc_match:
value=self.get_fsf_value(line,mc_match.end())
if value == "1":
value = "Y"
else:
value = "N"
motion_correction=value
if smooth_match:
smoothing=self.get_fsf_value(line,smooth_match.end())
if removed_volumes:
deleted=self.get_fsf_value(line,removed_volumes.end())
if total_vols_match:
total_volumes=self.get_fsf_value(line,total_vols_match.end())
if thresh_match:
brain_thresh=self.get_fsf_value(line,thresh_match.end())
if type == self.FIRST_TYPE:
ev_conv_match=re.search("fmri\(convolve\d+\)", line)
ev_path_match=re.search("fmri\(custom\d+\)", line)
ev_deriv_match=re.search("fmri\(deriv_yn\d+\)", line)
ev_temps_match=re.search("fmri\(tempfilt_yn\d+\)", line)
if ev_conv_match:
conv=self.get_fsf_value(line,ev_conv_match.end())
index=self.get_fsf_indice(ev_conv_match.group())
conv_text={
"0" : "none",
"1" : "Gaussian",
"2" : "Gamma",
"3" : "Double-Gamma HRF",
"4" : "Gamma basis functions",
"5" : "Sine basis functions",
"6" : "FIR basis functions",
}
ev_convolves[index]=conv_text[conv]
if ev_deriv_match:
value=self.get_fsf_value(line,ev_deriv_match.end())
index=self.get_fsf_indice(ev_deriv_match.group())
if value == "1":
value = "Y"
else:
value = "N"
ev_deriv[index]=value
if ev_temps_match:
value=self.get_fsf_value(line,ev_temps_match.end())
index=self.get_fsf_indice(ev_temps_match.group())
if value == "1":
value = "Y"
else:
value = "N"
ev_temp[index]=value
if ev_path_match:
value=self.get_fsf_value(line,ev_path_match.end())
index=self.get_fsf_indice(ev_path_match.group())
ev_paths[index]=self.strip_root(value)
if type == self.FE_TYPE:
evg_match=re.search("fmri\(evg\d+\.\d+\)", line)
if evg_match:
evg_lines.append(line)
if type == self.FE_TYPE or type == self.FIRST_TYPE:
ev_name_match=re.search("fmri\(evtitle\d+\)", line)
cope_name_match=re.search("fmri\(conname_real\.\d+\)", line)
cope_def_match=re.search("fmri\(con_real\d+\.\d+\)", line)
if cope_name_match:
name=self.get_fsf_value(line,cope_name_match.end())
index=self.get_fsf_indice(cope_name_match.group())
cope_names[index]=name
if cope_def_match:
cope_def_lines.append(line)
if ev_name_match:
name=self.get_fsf_value(line,ev_name_match.end())
index=self.get_fsf_indice(ev_name_match.group())
ev_names[index]=name
if type == self.FIRST_TYPE or type == self.FE_TYPE:
design_matrix=[['0' for col in range(len(ev_names)+2)] for row in range(len(cope_names)+1)]
if 'ev_temp' in locals():
real_copes=list()
index_cope=1
real_copes.append(str(index_cope))
for i in range(1,len(ev_temp)+1):
ind=str(i)
if ev_temp[ind] == 'Y':
index_cope += 2
else:
index_cope += 1
real_copes.append(str(index_cope))
real_copes.pop()
design_matrix=self.fill_matrix(cope_def_lines,design_matrix,type,1,real_copes)
else:
design_matrix=self.fill_matrix(cope_def_lines,design_matrix,type,1)
for i in range(1,len(cope_names)+1):
ind=str(i)
design_matrix[i][0]=ind
for i in range(1,len(cope_names)+1):
ind=str(i)
design_matrix[i][1]=cope_names[ind]
for i in range(2,len(ev_names)+2):
ind=str(i-1)
design_matrix[0][i]=ev_names[ind]
design_matrix[0][0]='Cope #'
design_matrix[0][1]='Cope Name'
if type == self.PRE_TYPE:
return analysis_name,output_path,tr,total_volumes,deleted,in_file,motion_correction,brain_thresh,smoothing
elif type == self.FIRST_TYPE:
return analysis_name,output_path,in_file,design_matrix,ev_names,ev_paths,ev_convolves,ev_deriv,ev_temp,cope_names
elif type == self.ME_TYPE:
return analysis_name,output_path,pvalue,zvalue,feat_paths,count, FE_example_dir
elif type == self.FE_TYPE:
regressor_matrix=[['0' for col in range(int(count)+1)] for row in range(len(ev_names)+1)]
self.fill_matrix(evg_lines,regressor_matrix,5, 0)
for i in range(1,len(ev_names)+1):
ind=str(i)
regressor_matrix[i][0]=ev_names[ind]
for i in range(1,int(count)+1):
ind=str(i)
regressor_matrix[0][i]=ind
return analysis_name,output_path,feat_paths,count,design_matrix,regressor_matrix,ev_names,cope_names,first_example_dir