I want to apply three levels of filtering on record
.
rec
in cname.rec
is string of >2 words, so I want to consider all ngram ofrec
to be checked inrecord
i. if
gram
matches intorecord
incrementfilter_company_level
and write it to file2nd Filter on record is for each value
value
inself.keyword_material_info
list.i. if
value
matches intorecord
incrementfilter_with_material_info
and write it to file3rd filter is for
item
inself.keyword_bse_list
.i. if
item
matches intorecord
incrementfilter_with_keyword_info
and write it to fileii. Now move to next record if inner most filter is present.
I have written these code, does it satisfy above conditions, or is there any bug? None of them give error but want to make sure that logic is correct.
for record in fetch_record:
total += 1
for rec in cname:
try:
c_ngram = self.get_ngrams(rec['company_name'])
for gram in c_ngram:
if gram.lower()+' ' in u'{} {}'.format(record['title'], record['description']).lower():
filter_company_level += 1
# print "Matched based on company name : ", record['article_link']
company_write.write(record['article_link']+' - '+rec['company_name']+' - '+rec['company_code']+'\n')
for value in self.keyword_material_info:
if value.lower()+' ' in u'{} {}'.format(record['title'], record['description']).lower():
filter_with_material_info += 1
materialinfo_write.write(record['article_link']+' - '+rec['company_name']+' - '+rec['company_code']+' - '+value+'\n')
for item in self.keyword_bse_list:
if item.lower()+' ' in u'{} {}'.format(record['title'], record['description']).lower():
filter_with_keyword_info += 1
keyword_write.write(record['article_link']+' - '+rec['company_name']+' - '+rec['company_code']+" - "+value+' - '+item+'\n')
print record['article_link']
print value
print item
break
break
# break
raise GetOutOfLoop
except GetOutOfLoop:
break
Or this one is correct?
for record in fetch_record:
total += 1
for rec in cname:
try:
c_ngram = self.get_ngrams(rec['company_name'])
for gram in c_ngram:
if gram.lower()+' ' in u'{} {}'.format(record['title'], record['description']).lower():
filter_company_level += 1
# print "Matched based on company name : ", record['article_link']
company_write.write(record['article_link']+' - '+rec['company_name']+' - '+rec['company_code']+'\n')
for value in self.keyword_material_info:
if value.lower()+' ' in u'{} {}'.format(record['title'], record['description']).lower():
filter_with_material_info += 1
materialinfo_write.write(record['article_link']+' - '+rec['company_name']+' - '+rec['company_code']+' - '+value+'\n')
flag_keyword = 0
for item in self.keyword_bse_list:
if item.lower()+' ' in u'{} {}'.format(record['title'], record['description']).lower():
filter_with_keyword_info += 1
keyword_write.write(record['article_link']+' - '+rec['company_name']+' - '+rec['company_code']+" - "+value+' - '+item+'\n')
print record['article_link']
print value
print item
flag_keyword = 1
break
if flag_keyword == 1:
break
# break
if flag_keyword == 1:
raise GetOutOfLoop
except GetOutOfLoop:
break