1 # A high-level interface to the pycurl extension
3 # ** mfx NOTE: the CGI class uses "black magic" using COOKIEFILE in
4 # combination with a non-existant file name. See the libcurl docs
7 # If you want thread-safe operation, you'll have to set the NOSIGNAL option
10 # By Eric S. Raymond, April 2003.
12 import os, sys, urllib, exceptions, mimetools, pycurl
14 from cStringIO import StringIO
16 from StringIO import StringIO
20 "High-level interface to cURL functions."
21 def __init__(self, base_url="", fakeheaders=[]):
22 self.handle = pycurl.Curl()
23 # These members might be set.
24 self.set_url(base_url)
26 self.fakeheaders = fakeheaders
27 # Nothing past here should be modified by the caller.
29 self.header = StringIO()
30 # Verify that we've got the right site; harmless on a non-SSL connect.
31 self.set_option(pycurl.SSL_VERIFYHOST, 2)
32 # Follow redirects in case it wants to take us to a CGI...
33 self.set_option(pycurl.FOLLOWLOCATION, 1)
34 self.set_option(pycurl.MAXREDIRS, 5)
35 # Setting this option with even a nonexistent file makes libcurl
36 # handle cookie capture and playback automatically.
37 self.set_option(pycurl.COOKIEFILE, "/dev/null")
38 # Set timeouts to avoid hanging too long
40 # Use password identification from .netrc automatically
41 self.set_option(pycurl.NETRC, 1)
42 # Set up a callback to capture the payload
43 def payload_callback(x):
45 self.set_option(pycurl.WRITEFUNCTION, payload_callback)
46 def header_callback(x):
48 self.set_option(pycurl.HEADERFUNCTION, header_callback)
50 def set_timeout(self, timeout):
51 "Set timeout for connect and object retrieval (applies for both)"
52 self.set_option(pycurl.CONNECTTIMEOUT, timeout)
53 self.set_option(pycurl.TIMEOUT, timeout)
55 def set_url(self, url):
56 "Set the base URL to be retrieved."
58 self.set_option(pycurl.URL, self.base_url)
60 def set_option(self, *args):
61 "Set an option on the retrieval,"
62 apply(self.handle.setopt, args)
64 def set_verbosity(self, level):
65 "Set verbosity to 1 to see transactions."
66 self.set_option(pycurl.VERBOSE, level)
68 def __request(self, relative_url=None):
69 "Perform the pending request."
71 self.set_option(pycurl.HTTPHEADER, self.fakeheaders)
73 self.set_option(pycurl.URL,os.path.join(self.base_url,relative_url))
79 def get(self, url="", params=None):
80 "Ship a GET request for a specified URL, capture the response."
82 url += "?" + urllib.urlencode(params)
83 self.set_option(pycurl.HTTPGET, 1)
84 return self.__request(url)
86 def post(self, cgi, params):
87 "Ship a POST request to a specified CGI, capture the response."
88 self.set_option(pycurl.POST, 1)
89 self.set_option(pycurl.POSTFIELDS, urllib.urlencode(params))
90 return self.__request(cgi)
93 "Return the body from the last response."
97 "Return an RFC822 object with info on the page."
99 url = self.handle.getinfo(pycurl.EFFECTIVE_URL)
100 if url[:5] == 'http:':
101 self.header.readline()
102 m = mimetools.Message(self.header)
104 m = mimetools.Message(StringIO())
105 m['effective-url'] = url
106 m['http-code'] = str(self.handle.getinfo(pycurl.HTTP_CODE))
107 m['total-time'] = str(self.handle.getinfo(pycurl.TOTAL_TIME))
108 m['namelookup-time'] = str(self.handle.getinfo(pycurl.NAMELOOKUP_TIME))
109 m['connect-time'] = str(self.handle.getinfo(pycurl.CONNECT_TIME))
110 m['pretransfer-time'] = str(self.handle.getinfo(pycurl.PRETRANSFER_TIME))
111 m['redirect-time'] = str(self.handle.getinfo(pycurl.REDIRECT_TIME))
112 m['redirect-count'] = str(self.handle.getinfo(pycurl.REDIRECT_COUNT))
113 m['size-upload'] = str(self.handle.getinfo(pycurl.SIZE_UPLOAD))
114 m['size-download'] = str(self.handle.getinfo(pycurl.SIZE_DOWNLOAD))
115 m['speed-upload'] = str(self.handle.getinfo(pycurl.SPEED_UPLOAD))
116 m['header-size'] = str(self.handle.getinfo(pycurl.HEADER_SIZE))
117 m['request-size'] = str(self.handle.getinfo(pycurl.REQUEST_SIZE))
118 m['content-length-download'] = str(self.handle.getinfo(pycurl.CONTENT_LENGTH_DOWNLOAD))
119 m['content-length-upload'] = str(self.handle.getinfo(pycurl.CONTENT_LENGTH_UPLOAD))
120 m['content-type'] = (self.handle.getinfo(pycurl.CONTENT_TYPE) or '').strip(';')
123 def answered(self, check):
124 "Did a given check string occur in the last payload?"
125 return self.payload.find(check) >= 0
128 "Close a session, freeing resources."
136 if __name__ == "__main__":
137 if len(sys.argv) < 2:
138 url = 'http://curl.haxx.se'