# A high-level interface to the pycurl extension # # ** mfx NOTE: the CGI class uses "black magic" using COOKIEFILE in # combination with a non-existant file name. See the libcurl docs # for more info. # # If you want thread-safe operation, you'll have to set the NOSIGNAL option # yourself. # # By Eric S. Raymond, April 2003. import os, sys, urllib, exceptions, mimetools, pycurl try: from cStringIO import StringIO except ImportError: from StringIO import StringIO class Curl: "High-level interface to cURL functions." def __init__(self, base_url="", fakeheaders=[]): self.handle = pycurl.Curl() # These members might be set. self.set_url(base_url) self.verbosity = 0 self.fakeheaders = fakeheaders # Nothing past here should be modified by the caller. self.payload = "" self.header = StringIO() # Verify that we've got the right site; harmless on a non-SSL connect. self.set_option(pycurl.SSL_VERIFYHOST, 2) # Follow redirects in case it wants to take us to a CGI... self.set_option(pycurl.FOLLOWLOCATION, 1) self.set_option(pycurl.MAXREDIRS, 5) # Setting this option with even a nonexistent file makes libcurl # handle cookie capture and playback automatically. self.set_option(pycurl.COOKIEFILE, "/dev/null") # Set timeouts to avoid hanging too long self.set_timeout(30) # Use password identification from .netrc automatically self.set_option(pycurl.NETRC, 1) # Set up a callback to capture the payload def payload_callback(x): self.payload += x self.set_option(pycurl.WRITEFUNCTION, payload_callback) def header_callback(x): self.header.write(x) self.set_option(pycurl.HEADERFUNCTION, header_callback) def set_timeout(self, timeout): "Set timeout for connect and object retrieval (applies for both)" self.set_option(pycurl.CONNECTTIMEOUT, timeout) self.set_option(pycurl.TIMEOUT, timeout) def set_url(self, url): "Set the base URL to be retrieved." self.base_url = url self.set_option(pycurl.URL, self.base_url) def set_option(self, *args): "Set an option on the retrieval," apply(self.handle.setopt, args) def set_verbosity(self, level): "Set verbosity to 1 to see transactions." self.set_option(pycurl.VERBOSE, level) def __request(self, relative_url=None): "Perform the pending request." if self.fakeheaders: self.set_option(pycurl.HTTPHEADER, self.fakeheaders) if relative_url: self.set_option(pycurl.URL,os.path.join(self.base_url,relative_url)) self.header.seek(0,0) self.payload = "" self.handle.perform() return self.payload def get(self, url="", params=None): "Ship a GET request for a specified URL, capture the response." if params: url += "?" + urllib.urlencode(params) self.set_option(pycurl.HTTPGET, 1) return self.__request(url) def post(self, cgi, params): "Ship a POST request to a specified CGI, capture the response." self.set_option(pycurl.POST, 1) self.set_option(pycurl.POSTFIELDS, urllib.urlencode(params)) return self.__request(cgi) def body(self): "Return the body from the last response." return self.payload def info(self): "Return an RFC822 object with info on the page." self.header.seek(0,0) url = self.handle.getinfo(pycurl.EFFECTIVE_URL) if url[:5] == 'http:': self.header.readline() m = mimetools.Message(self.header) else: m = mimetools.Message(StringIO()) m['effective-url'] = url m['http-code'] = str(self.handle.getinfo(pycurl.HTTP_CODE)) m['total-time'] = str(self.handle.getinfo(pycurl.TOTAL_TIME)) m['namelookup-time'] = str(self.handle.getinfo(pycurl.NAMELOOKUP_TIME)) m['connect-time'] = str(self.handle.getinfo(pycurl.CONNECT_TIME)) m['pretransfer-time'] = str(self.handle.getinfo(pycurl.PRETRANSFER_TIME)) m['redirect-time'] = str(self.handle.getinfo(pycurl.REDIRECT_TIME)) m['redirect-count'] = str(self.handle.getinfo(pycurl.REDIRECT_COUNT)) m['size-upload'] = str(self.handle.getinfo(pycurl.SIZE_UPLOAD)) m['size-download'] = str(self.handle.getinfo(pycurl.SIZE_DOWNLOAD)) m['speed-upload'] = str(self.handle.getinfo(pycurl.SPEED_UPLOAD)) m['header-size'] = str(self.handle.getinfo(pycurl.HEADER_SIZE)) m['request-size'] = str(self.handle.getinfo(pycurl.REQUEST_SIZE)) m['content-length-download'] = str(self.handle.getinfo(pycurl.CONTENT_LENGTH_DOWNLOAD)) m['content-length-upload'] = str(self.handle.getinfo(pycurl.CONTENT_LENGTH_UPLOAD)) m['content-type'] = (self.handle.getinfo(pycurl.CONTENT_TYPE) or '').strip(';') return m def answered(self, check): "Did a given check string occur in the last payload?" return self.payload.find(check) >= 0 def close(self): "Close a session, freeing resources." self.handle.close() self.header.close() def __del__(self): self.close() if __name__ == "__main__": if len(sys.argv) < 2: url = 'http://curl.haxx.se' else: url = sys.argv[1] c = Curl() c.get(url) print c.body() print '='*74 + '\n' print c.info() c.close()