From 21b12e70742b50efb2c566dacfdea1544145dcc9 Mon Sep 17 00:00:00 2001 From: Mark Huang Date: Mon, 30 Oct 2006 16:37:49 +0000 Subject: [PATCH] pgdb returns raw UTF-8 strings. Wrap pgdb.typecast so that it casts strings that appear to contain non-ASCII characters to Python unicode objects. --- PLC/PostgreSQL.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/PLC/PostgreSQL.py b/PLC/PostgreSQL.py index 534a27a7..1934316f 100644 --- a/PLC/PostgreSQL.py +++ b/PLC/PostgreSQL.py @@ -5,18 +5,42 @@ # Mark Huang # Copyright (C) 2006 The Trustees of Princeton University # -# $Id: PostgreSQL.py,v 1.6 2006/10/20 17:53:42 mlhuang Exp $ +# $Id: PostgreSQL.py,v 1.7 2006/10/24 13:47:05 mlhuang Exp $ # import pgdb from types import StringTypes, NoneType import traceback import commands +import re from pprint import pformat from PLC.Debug import profile, log from PLC.Faults import * +is8bit = re.compile("[\x80-\xff]").search + +def unicast(typecast): + """ + pgdb returns raw UTF-8 strings. This function casts strings that + appear to contain non-ASCII characters to unicode objects. + """ + + def wrapper(*args, **kwds): + value = typecast(*args, **kwds) + + # pgdb always encodes unicode objects as UTF-8 regardless of + # the DB encoding (and gives you no option for overriding + # the encoding), so always decode 8-bit objects as UTF-8. + if isinstance(value, str) and is8bit(value): + value = unicode(value, "utf-8") + + return value + + return wrapper + +pgdb.pgdbTypeCache.typecast = unicast(pgdb.pgdbTypeCache.typecast) + class PostgreSQL: def __init__(self, api): self.api = api @@ -25,7 +49,7 @@ class PostgreSQL: self.db = pgdb.connect(user = api.config.PLC_DB_USER, password = api.config.PLC_DB_PASSWORD, host = "%s:%d" % (api.config.PLC_DB_HOST, api.config.PLC_DB_PORT), - database = "planetlab4") # XXX api.config.PLC_DB_NAME) + database = api.config.PLC_DB_NAME) self.cursor = self.db.cursor() (self.rowcount, self.description, self.lastrowid) = \ -- 2.47.0