I am very new to programming and this is my first functional code. It works fine but I'm sure that I could use a lot of optimization. If you see any blunders or would be able to help condense the script that would be fantastic.
#!/usr/bin/python
import sys, getopt, subprocess, os, tempfile, shutil, time
file_name = sys.argv[2]
pwd = os.getcwd() + "/"
dirname = pwd + "Secretome_files"
file_location = dirname + '/'
try:
os.makedirs(dirname)
except OSError:
if os.path.exists(dirname):
pass
else:
raise
def singleline():
print "\nMaking fasta single line"
file_in = sys.argv[1]
file_out = open(file_location + file_name + "singleline.fasta", "w")
command = ("fasta_formatter -i " + file_in + " -w 0")
p1 = subprocess.Popen((command), stdout=file_out, shell=True)
p1.wait()
print "Fasta now single line"
def signalp():
singleline()
command = ("signalp -f short -m " + file_location + file_name + "removed_SigPep.fasta " + file_location + file_name + "singleline.fasta > "+ file_location + file_name + "signalpOUT.txt")
print "\nRunning SignalP"
signalpRUN = subprocess.Popen([command], shell=True)
signalpRUN.wait()
print "SignalP Complete"
print "\nCreating SignalP protein list"
command2 = ("fasta_formatter -i " + file_location + file_name + "removed_SigPep.fasta -t")
file_out2 = open(file_location + file_name + "removed_SigPep_tab.fasta.txt", "w")
tab = subprocess.Popen([command2], stdout=file_out2, shell=True)
tab.wait()
command3 = ("cut -f1,1 " + file_location + file_name + "removed_SigPep_tab.fasta.txt")
file_out3 = open(file_location + file_name + "listaftercut.txt", "w")
file_out4 = open(file_location + file_name + "goodlistSigP.txt", "w")
listGood = subprocess.Popen([command3], stdout=file_out3, shell=True)
listGood.wait()
openfile = open(file_location + file_name + "listaftercut.txt", 'r')
for line in openfile:
goodname = line.partition(' ')[0] + '\n'
file_out4.write(goodname)
def sigpFasta():
command4 = ("faSomeRecords " + file_location + file_name + "singleline.fasta " + file_location + file_name + "goodlistSigP.txt " + file_location + file_name + "signalP_pass.fasta")
print "\nRetreving SignalP fasta"
fastaRUN = subprocess.Popen([command4], shell=True)
fastaRUN.wait()
def tmhmm():
command = ("tmhmm " + file_location + file_name + "removed_SigPep.fasta")
file_out = open(file_location + file_name + "tmhmmOUT.txt", "w")
print "\nRunning tmhmm on mature signalp sequences only"
tmhmmRUN = subprocess.Popen([command], stdout=file_out, shell=True)
tmhmmRUN.wait()
print "tmhmm complete"
print "\nIdentifying sequences without tm regions."
openfile = open(file_location + file_name + "tmhmmOUT.txt", "r")
file_out2 = open(file_location + file_name + "tmhmmGoodlist.txt", "a")
for line in openfile:
if "\tPredHel=0\t" in line:
goodname = line.partition('\t')[0] + '\n'
file_out2.write(goodname)
def targetp():
command = ("targetp -N " + file_location + file_name + "signalP_pass.fasta")
file_out = open(file_location + file_name + "targetpOUT.txt", "w")
print "\nRunning TargetP on SignalP pass seqeunces only"
targetpRUN = subprocess.Popen([command], stdout=file_out, shell=True)
targetpRUN.wait()
print "TargetP complete"
print "\nIdentifying sequences that are secreated."
lines = open(file_location + file_name + 'targetpOUT.txt').readlines()
open(file_location + file_name + 'targetpOUT_parse.txt', 'w').writelines(lines[8:-2])
openfile = open(file_location + file_name + "targetpOUT_parse.txt", "r")
file_out2 = open(file_location + file_name + "targetpGoodlist.txt", "a")
for line in openfile:
if "S" in line:
goodname = line.partition(' ')[0] + '\n'
file_out2.write(goodname)
def wolfpsort():
command = ("runWolfPsortSummary fungi < " + file_location + file_name + "singleline.fasta")
file_out = open(file_location + file_name + "wolfPsortOUT.txt", "w")
file_out2 = open(file_location + file_name + "wolfPsortErrorLog.txt", "w")
print "\nRunning WoLFPSORT"
wolfRUN = subprocess.Popen([command], stdout = file_out, stderr=file_out2, shell=True)
wolfRUN.wait()
print "WoLFPSORT complete"
lines = open(file_location + file_name + 'wolfPsortOUT.txt').readlines()
open(file_location + file_name + 'wolfPsortOUT_parse.txt', 'w').writelines(lines[1:])
file_out2 = open(file_location + file_name + "wolfPsortGoodlist.txt", "a")
searchValue = "extr"
f = open(file_location + file_name + "wolfPsortOUT_parse.txt", "r+b")
for line in f:
if line.split()[1] == searchValue:
goodname = line.partition(' ')[0] + '\n'
file_out2.write(goodname)
def secretome():
file1 = set(line.strip() for line in open(file_location + file_name + "goodlistSigP.txt"))
file2 = set(line.strip() for line in open(file_location + file_name + "tmhmmGoodlist.txt"))
file3 = set(line.strip() for line in open(file_location + file_name + "targetpGoodlist.txt"))
file4 = set(line.strip() for line in open(file_location + file_name + "wolfPsortGoodlist.txt"))
newfile = open(file_location + file_name + "secretome_pass.txt", "w")
for line in file1 & file2 & file3 & file4:
if line:
newfile.write(line + '\n')
def secFasta():
command = ("faSomeRecords " + file_location + file_name + "singleline.fasta " + file_location + file_name + "secretome_pass.txt " + file_name + "secretome_pass.fasta")
print "\nRetreving Secretome fasta"
fastaRUN = subprocess.Popen([command], shell=True)
fastaRUN.wait()
print "\nSecretome identification Complete"
signalp()
sigpFasta()
tmhmm()
targetp()
wolfpsort()
secretome()
secFasta()
exit(0)