parse_client = None
store_client = None
def init_client():
global parse_client
global store_client
// ... some codes
factory = ClientFactory(config)
parse_client = factory.getJdParseClient()
store_client = factory.getJdStoreClient()
def has_jd(jd_url):
jd_id = parse_client.parseJdId(jd_source, jd_url)
return store_client.listJd("", [jd_id])
def caculate(concurrency, jd_list):
with futures.ProcessPoolExecutor(max_workers=concurrency) as executor:
for jd_item in jd_list:
job = executor.submit(check_url, jd_item)
def check_url(jd_item):
jd_date, jd_source, url = jd_item
return (jd_date, jd_source, url, has_jd(url))
def main():
init_client()
concurrency = 15
jd_count, jd_item_list = parse_files()
caculate(concurrency, jd_item_list)
if __name__ == '__main__':
main()
I want to know if I have better choice instead of the global variable parse_client
and store_client
.
The problem is if I change these two variable from global to local variable, I need to init them every time, but these two variable is generate by the ClientFactory
which need remote producedure call(rpc)
, so the cost is too much