I am using the Python module urllib3
. It occurs to me that there is a better way to design a good class when I rebuild my last site crawler.
class Global:
host = 'http://xxx.org/'
proxy=True
proxyHost='http://127.0.0.1:8087/'
opts=AttrDict(
method='GET',
headers={'Host':'xxxx.org',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:12.0) Gecko/20100101 Firefox/12.0',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':'en-us;q=0.5,en;q=0.3',
'Accept-Encoding':'gzip, deflate',
'Connection':'keep-alive',
'Cookie':'xxxxxxx',
'Cache-Control':'max-age=0'
},
assert_same_host=False
)
def getPool(self,proxy=None):
if proxy is None:
proxy = self.proxy
if(self.proxy):
http_pool = urllib3.proxy_from_url(self.proxyHost)
else:
http_pool = urllib3.connection_from_url(self.host)
return http_pool
class Conn:
def __init__(self, proxy):
self.proxy= proxy
self.pool = Global().getPool(self.proxy)
def swith(self):
self.pool = Global().getPool(not self.proxy)
def get(url, opts=Global.opts):
try:
self.pool.urlopen(
method=opts.method,
url= url,
headers= opts.headers,
assert_same_host=opts.assert_same_host
)
except TimeoutError, e:
print()
except MaxRetryError, e:
# ..
I try to make class contains all global configs and data for others to call, just like I do in JavaScript. But, are these classes too tightly coupled? Should I just merge them together?
Conn
? If not, it probably belongs inConn
. If you're worried about creating a separate copy of all of these constants in everyConn
instance, you can make them class variables instance of instance variables. (And you may want to makegetPool
a@classmethod
, too.) – abarnert Oct 27 '12 at 5:32