All strings passed to Python are now Unicode objects

This commit is contained in:
John Wiegley 2009-11-10 18:44:08 -05:00
parent 0ac2dc2810
commit ac885a9075
5 changed files with 132 additions and 103 deletions

View file

@ -75,13 +75,19 @@ typedef register_python_conversion<bool, bool_to_python, bool_from_python>
bool_python_conversion; bool_python_conversion;
#if defined(STRING_VERIFY_ON)
struct string_to_python struct string_to_python
{ {
static PyObject* convert(const ledger::string& str) static PyObject* convert(const string& str)
{ {
#if 1
// Return a Unicode object
PyObject * pstr = PyString_FromString(str.c_str());
PyObject * uni = PyUnicode_FromEncodedObject(pstr, "UTF-8", NULL);
return object(handle<>(borrowed(uni))).ptr();
#else
// Return a 7-bit ASCII string
return incref(object(static_cast<const std::string&>(str)).ptr()); return incref(object(static_cast<const std::string&>(str)).ptr());
#endif
} }
}; };
@ -89,55 +95,29 @@ struct string_from_python
{ {
static void* convertible(PyObject* obj_ptr) static void* convertible(PyObject* obj_ptr)
{ {
if (!PyString_Check(obj_ptr)) return 0; if (!PyUnicode_Check(obj_ptr) &&
!PyString_Check(obj_ptr)) return 0;
return obj_ptr; return obj_ptr;
} }
static void construct(PyObject* obj_ptr, static void construct(PyObject* obj_ptr,
converter::rvalue_from_python_stage1_data* data) converter::rvalue_from_python_stage1_data* data)
{ {
if (PyString_Check(obj_ptr)) {
const char* value = PyString_AsString(obj_ptr); const char* value = PyString_AsString(obj_ptr);
if (value == 0) throw_error_already_set(); if (value == 0) throw_error_already_set();
void* storage = void* storage =
reinterpret_cast<converter::rvalue_from_python_storage<ledger::string> *> reinterpret_cast<converter::rvalue_from_python_storage<string> *>
(data)->storage.bytes; (data)->storage.bytes;
new (storage) ledger::string(value); new (storage) string(value);
data->convertible = storage; data->convertible = storage;
} } else {
}; VERIFY(PyUnicode_Check(obj_ptr));
typedef register_python_conversion<ledger::string,
string_to_python, string_from_python>
string_python_conversion;
#endif // STRING_VERIFY_ON
struct unicode_to_python
{
static PyObject* convert(const std::string& utf8str)
{
PyObject * pstr = PyString_FromString(utf8str.c_str());
PyObject * uni = PyUnicode_FromEncodedObject(pstr, "UTF-8", NULL);
return object(handle<>(borrowed(uni))).ptr();
}
};
struct unicode_from_python
{
static void* convertible(PyObject* obj_ptr)
{
if (!PyUnicode_Check(obj_ptr)) return 0;
return obj_ptr;
}
static void construct(PyObject* obj_ptr,
converter::rvalue_from_python_stage1_data* data)
{
Py_ssize_t size = PyUnicode_GET_SIZE(obj_ptr); Py_ssize_t size = PyUnicode_GET_SIZE(obj_ptr);
const Py_UNICODE* value = PyUnicode_AS_UNICODE(obj_ptr); const Py_UNICODE* value = PyUnicode_AS_UNICODE(obj_ptr);
std::string str; string str;
if (sizeof(Py_UNICODE) == 2) // UTF-16 if (sizeof(Py_UNICODE) == 2) // UTF-16
utf8::unchecked::utf16to8(value, value + size, std::back_inserter(str)); utf8::unchecked::utf16to8(value, value + size, std::back_inserter(str));
else if (sizeof(Py_UNICODE) == 4) // UTF-32 else if (sizeof(Py_UNICODE) == 4) // UTF-32
@ -147,16 +127,16 @@ struct unicode_from_python
if (value == 0) throw_error_already_set(); if (value == 0) throw_error_already_set();
void* storage = void* storage =
reinterpret_cast<converter::rvalue_from_python_storage<std::string> *> reinterpret_cast<converter::rvalue_from_python_storage<string> *>
(data)->storage.bytes; (data)->storage.bytes;
new (storage) std::string(str); new (storage) string(str);
data->convertible = storage; data->convertible = storage;
} }
}
}; };
typedef register_python_conversion<std::string, typedef register_python_conversion<string, string_to_python, string_from_python>
unicode_to_python, unicode_from_python> string_python_conversion;
unicode_python_conversion;
struct istream_to_python struct istream_to_python
@ -273,10 +253,7 @@ void export_utils()
; ;
bool_python_conversion(); bool_python_conversion();
#if defined(STRING_VERIFY_ON)
string_python_conversion(); string_python_conversion();
#endif
unicode_python_conversion();
istream_python_conversion(); istream_python_conversion();
ostream_python_conversion(); ostream_python_conversion();
} }

View file

@ -36,7 +36,7 @@ template <typename T, typename TfromPy>
struct object_from_python struct object_from_python
{ {
object_from_python() { object_from_python() {
boost::python::converter::registry::push_back boost::python::converter::registry::insert
(&TfromPy::convertible, &TfromPy::construct, (&TfromPy::convertible, &TfromPy::construct,
boost::python::type_id<T>()); boost::python::type_id<T>());
} }
@ -106,6 +106,55 @@ struct register_optional_to_python : public boost::noncopyable
} }
}; };
namespace boost { namespace python {
// Use expr to create the PyObject corresponding to x
# define BOOST_PYTHON_RETURN_TO_PYTHON_BY_VALUE(T, expr, pytype)\
template <> struct to_python_value<T&> \
: detail::builtin_to_python \
{ \
inline PyObject* operator()(T const& x) const \
{ \
return (expr); \
} \
inline PyTypeObject const* get_pytype() const \
{ \
return (pytype); \
} \
}; \
template <> struct to_python_value<T const&> \
: detail::builtin_to_python \
{ \
inline PyObject* operator()(T const& x) const \
{ \
return (expr); \
} \
inline PyTypeObject const* get_pytype() const \
{ \
return (pytype); \
} \
};
# define BOOST_PYTHON_ARG_TO_PYTHON_BY_VALUE(T, expr) \
namespace converter \
{ \
template <> struct arg_to_python< T > \
: handle<> \
{ \
arg_to_python(T const& x) \
: python::handle<>(expr) {} \
}; \
}
// Specialize argument and return value converters for T using expr
# define BOOST_PYTHON_TO_PYTHON_BY_VALUE(T, expr, pytype) \
BOOST_PYTHON_RETURN_TO_PYTHON_BY_VALUE(T,expr, pytype) \
BOOST_PYTHON_ARG_TO_PYTHON_BY_VALUE(T,expr)
BOOST_PYTHON_TO_PYTHON_BY_VALUE(ledger::string, ::PyUnicode_FromEncodedObject(::PyString_FromString(x.c_str()), "UTF-8", NULL), &PyUnicode_Type)
} } // namespace boost::python
//boost::python::register_ptr_to_python< boost::shared_ptr<Base> >(); //boost::python::register_ptr_to_python< boost::shared_ptr<Base> >();
#endif // _PY_UTILS_H #endif // _PY_UTILS_H

View file

@ -406,8 +406,16 @@ void report_memory(std::ostream& out, bool report_all)
} }
} }
} // namespace ledger
#if defined(STRING_VERIFY_ON) #endif // VERIFY_ON
/**********************************************************************
*
* String wrapper
*/
namespace ledger {
string::string() : std::string() { string::string() : std::string() {
TRACE_CTOR(string, ""); TRACE_CTOR(string, "");
@ -445,18 +453,10 @@ string::~string() throw() {
TRACE_DTOR(string); TRACE_DTOR(string);
} }
#endif // STRING_VERIFY_ON string empty_string("");
} // namespace ledger strings_list split_arguments(const char * line)
#endif // VERIFY_ON
ledger::string empty_string("");
ledger::strings_list split_arguments(const char * line)
{ {
using namespace ledger;
strings_list args; strings_list args;
char buf[4096]; char buf[4096];
@ -506,6 +506,8 @@ ledger::strings_list split_arguments(const char * line)
return args; return args;
} }
} // namespace ledger
/********************************************************************** /**********************************************************************
* *
* Logging * Logging

View file

@ -62,10 +62,6 @@
#define TIMERS_ON 1 #define TIMERS_ON 1
#endif #endif
#if defined(VERIFY_ON)
//#define STRING_VERIFY_ON 1
#endif
/*@}*/ /*@}*/
/** /**
@ -76,11 +72,7 @@
namespace ledger { namespace ledger {
using namespace boost; using namespace boost;
#if defined(STRING_VERIFY_ON)
class string; class string;
#else
typedef std::string string;
#endif
typedef std::list<string> strings_list; typedef std::list<string> strings_list;
@ -162,12 +154,33 @@ void trace_dtor_func(void * ptr, const char * cls_name, std::size_t cls_size);
void report_memory(std::ostream& out, bool report_all = false); void report_memory(std::ostream& out, bool report_all = false);
#if defined(STRING_VERIFY_ON) } // namespace ledger
#else // ! VERIFY_ON
#define VERIFY(x)
#define DO_VERIFY() true
#define TRACE_CTOR(cls, args)
#define TRACE_DTOR(cls)
#endif // VERIFY_ON
#define IF_VERIFY() if (DO_VERIFY())
/*@}*/
/** /**
* This string type is a wrapper around std::string that allows us to * @name String wrapper
* trace constructor and destructor calls. *
* This string type is a wrapper around std::string that allows us to trace
* constructor and destructor calls. It also makes ledger's use of strings a
* unique type, that the Boost.Python code can use as the basis for
* transparent Unicode conversions.
*/ */
/*@{*/
namespace ledger {
class string : public std::string class string : public std::string
{ {
public: public:
@ -240,25 +253,12 @@ inline bool operator!=(const char* __lhs, const string& __rhs)
inline bool operator!=(const string& __lhs, const char* __rhs) inline bool operator!=(const string& __lhs, const char* __rhs)
{ return __lhs.compare(__rhs) != 0; } { return __lhs.compare(__rhs) != 0; }
#endif // STRING_VERIFY_ON extern string empty_string;
strings_list split_arguments(const char * line);
} // namespace ledger } // namespace ledger
#else // ! VERIFY_ON
#define VERIFY(x)
#define DO_VERIFY() true
#define TRACE_CTOR(cls, args)
#define TRACE_DTOR(cls)
#endif // VERIFY_ON
extern ledger::string empty_string;
ledger::strings_list split_arguments(const char * line);
#define IF_VERIFY() if (DO_VERIFY())
/*@}*/ /*@}*/
/** /**

View file

@ -150,6 +150,7 @@ for line in fd.readlines():
line = re.sub('set_session_context\(\)', line = re.sub('set_session_context\(\)',
'set_session_context()\n self.testSession = None', line) 'set_session_context()\n self.testSession = None', line)
line = re.sub('([a-z_]+?)_t\b', '\\1', line) line = re.sub('([a-z_]+?)_t\b', '\\1', line)
line = re.sub('("[^"]+")', 'u\\1', line)
line = re.sub('std::string\(([^)]+?)\)', '\\1', line) line = re.sub('std::string\(([^)]+?)\)', '\\1', line)
line = re.sub('string\(([^)]+?)\)', '\\1', line) line = re.sub('string\(([^)]+?)\)', '\\1', line)
line = re.sub('\.print\(([^)]+?)\)', '.print_(\\1)', line) line = re.sub('\.print\(([^)]+?)\)', '.print_(\\1)', line)