From 1283625b0f0b395a59d6d72b06bc7a62479b2d26 Mon Sep 17 00:00:00 2001 From: Manjunath Kudlur Date: Thu, 31 Mar 2016 09:53:00 -0700 Subject: Added an API to allow oversize protos when using C++ extension in Python --- python/google/protobuf/internal/message_test.py | 59 +++++++++++++++++++++++++ python/google/protobuf/pyext/message.cc | 46 ++++++++++++++----- 2 files changed, 94 insertions(+), 11 deletions(-) (limited to 'python') diff --git a/python/google/protobuf/internal/message_test.py b/python/google/protobuf/internal/message_test.py index d03f2d25..ee6b944a 100755 --- a/python/google/protobuf/internal/message_test.py +++ b/python/google/protobuf/internal/message_test.py @@ -57,7 +57,11 @@ try: except ImportError: import unittest from google.protobuf.internal import _parameterized +from google.protobuf import descriptor_pb2 +from google.protobuf import descriptor_pool from google.protobuf import map_unittest_pb2 +from google.protobuf import message_factory +from google.protobuf import text_format from google.protobuf import unittest_pb2 from google.protobuf import unittest_proto3_arena_pb2 from google.protobuf.internal import any_test_pb2 @@ -1776,5 +1780,60 @@ class PackedFieldTest(unittest.TestCase): b'\x70\x01') self.assertEqual(golden_data, message.SerializeToString()) + +@unittest.skipIf(api_implementation.Type() != 'cpp', + 'explicit tests of the C++ implementation') +class OversizeProtosTest(unittest.TestCase): + + def setUp(self): + self.file_desc = """ + name: "f/f.msg2" + package: "f" + message_type { + name: "msg1" + field { + name: "payload" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + } + message_type { + name: "msg2" + field { + name: "field" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: "msg1" + } + } + """ + pool = descriptor_pool.DescriptorPool() + desc = descriptor_pb2.FileDescriptorProto() + text_format.Parse(self.file_desc, desc) + pool.Add(desc) + self.proto_cls = message_factory.MessageFactory(pool).GetPrototype( + pool.FindMessageTypeByName('f.msg2')) + self.p = self.proto_cls() + self.p.field.payload = 'c' * (1024 * 1024 * 64 + 1) + self.p_serialized = self.p.SerializeToString() + + def testAssertOversizeProto(self): + from google.protobuf.pyext._message import SetAllowOversizeProtos + SetAllowOversizeProtos(False) + q = self.proto_cls() + try: + q.ParseFromString(self.p_serialized) + except message.DecodeError as e: + self.assertEqual(str(e), 'Error parsing message') + + def testSucceedOversizeProto(self): + from google.protobuf.pyext._message import SetAllowOversizeProtos + SetAllowOversizeProtos(True) + q = self.proto_cls() + q.ParseFromString(self.p_serialized) + self.assertEqual(self.p.field.payload, q.field.payload) + if __name__ == '__main__': unittest.main() diff --git a/python/google/protobuf/pyext/message.cc b/python/google/protobuf/pyext/message.cc index 60ec9c1b..7d3e8c37 100644 --- a/python/google/protobuf/pyext/message.cc +++ b/python/google/protobuf/pyext/message.cc @@ -1911,6 +1911,30 @@ static PyObject* CopyFrom(CMessage* self, PyObject* arg) { Py_RETURN_NONE; } +// Protobuf has a 64MB limit built in, this variable will override this. Please +// do not enable this unless you fully understand the implications: protobufs +// must all be kept in memory at the same time, so if they grow too big you may +// get OOM errors. The protobuf APIs do not provide any tools for processing +// protobufs in chunks. If you have protos this big you should break them up if +// it is at all convenient to do so. +static bool allow_oversize_protos = false; + +// Provide a method in the module to set allow_oversize_protos to a boolean +// value. This method returns the newly value of allow_oversize_protos. +static PyObject* SetAllowOversizeProtos(PyObject* m, PyObject* arg) { + if (!arg || !PyBool_Check(arg)) { + PyErr_SetString(PyExc_TypeError, + "Argument to SetAllowOversizeProtos must be boolean"); + return NULL; + } + allow_oversize_protos = PyObject_IsTrue(arg); + if (allow_oversize_protos) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +} + static PyObject* MergeFromString(CMessage* self, PyObject* arg) { const void* data; Py_ssize_t data_length; @@ -1921,15 +1945,9 @@ static PyObject* MergeFromString(CMessage* self, PyObject* arg) { AssureWritable(self); io::CodedInputStream input( reinterpret_cast(data), data_length); -#if PROTOBUF_PYTHON_ALLOW_OVERSIZE_PROTOS - // Protobuf has a 64MB limit built in, this code will override this. Please do - // not enable this unless you fully understand the implications: protobufs - // must all be kept in memory at the same time, so if they grow too big you - // may get OOM errors. The protobuf APIs do not provide any tools for - // processing protobufs in chunks. If you have protos this big you should - // break them up if it is at all convenient to do so. - input.SetTotalBytesLimit(INT_MAX, INT_MAX); -#endif // PROTOBUF_PYTHON_ALLOW_OVERSIZE_PROTOS + if (allow_oversize_protos) { + input.SetTotalBytesLimit(INT_MAX, INT_MAX); + } PyDescriptorPool* pool = GetDescriptorPoolForMessage(self); input.SetExtensionRegistry(pool->pool, pool->message_factory); bool success = self->message->MergePartialFromCodedStream(&input); @@ -3046,6 +3064,11 @@ bool InitProto2MessageModule(PyObject *m) { } // namespace python } // namespace protobuf +static PyMethodDef ModuleMethods[] = { + {"SetAllowOversizeProtos", + (PyCFunction)google::protobuf::python::cmessage::SetAllowOversizeProtos, + METH_O, "Enable/disable oversize proto parsing."}, +}; #if PY_MAJOR_VERSION >= 3 static struct PyModuleDef _module = { @@ -3053,7 +3076,7 @@ static struct PyModuleDef _module = { "_message", google::protobuf::python::module_docstring, -1, - NULL, + ModuleMethods, /* m_methods */ NULL, NULL, NULL, @@ -3072,7 +3095,8 @@ extern "C" { #if PY_MAJOR_VERSION >= 3 m = PyModule_Create(&_module); #else - m = Py_InitModule3("_message", NULL, google::protobuf::python::module_docstring); + m = Py_InitModule3("_message", ModuleMethods, + google::protobuf::python::module_docstring); #endif if (m == NULL) { return INITFUNC_ERRORVAL; -- cgit v1.2.3 From cf828deb9b5537c294e4a5628ef04fdbdfdcbb28 Mon Sep 17 00:00:00 2001 From: Manjunath Kudlur Date: Fri, 25 Mar 2016 10:58:46 -0700 Subject: Linking the cpp implementation extension statically with libprotobuf --- python/README.md | 12 ++---------- python/setup.py | 35 ++++++++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 17 deletions(-) (limited to 'python') diff --git a/python/README.md b/python/README.md index 1b5b9dff..57acfd94 100644 --- a/python/README.md +++ b/python/README.md @@ -123,13 +123,5 @@ C++ Implementation The C++ implementation for Python messages is built as a Python extension to improve the overall protobuf Python performance. -To use the C++ implementation, you need to: -1) Install the C++ protobuf runtime library, please see instructions in the - parent directory. -2) Export an environment variable: - - $ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp - -You must set this variable at runtime, before running your program, otherwise -the pure-Python implementation will be used. In a future release, we will -change the default so that C++ implementation is used whenever it is available. +To use the C++ implementation, you need to install the C++ protobuf runtime +library, please see instructions in the parent directory. diff --git a/python/setup.py b/python/setup.py index 6ea3bad7..f5c00a72 100755 --- a/python/setup.py +++ b/python/setup.py @@ -157,13 +157,28 @@ class test_conformance(_build_py): status = subprocess.check_call(cmd, shell=True) +def get_option_from_sys_argv(option_str): + if option_str in sys.argv: + sys.argv.remove(option_str) + return True + return False + + if __name__ == '__main__': ext_module_list = [] - cpp_impl = '--cpp_implementation' warnings_as_errors = '--warnings_as_errors' - if cpp_impl in sys.argv: - sys.argv.remove(cpp_impl) + if get_option_from_sys_argv('--cpp_implementation'): + # Link libprotobuf.a and libprotobuf-lite.a statically with the + # extension. Note that those libraries have to be compiled with + # -fPIC for this to work. + compile_static_ext = get_option_from_sys_argv('--compile_static_extension') extra_compile_args = ['-Wno-write-strings', '-Wno-invalid-offsetof'] + libraries = ['protobuf'] + extra_objects = None + if compile_static_ext: + libraries = None + extra_objects = ['../src/.libs/libprotobuf.a', + '../src/.libs/libprotobuf-lite.a'] test_conformance.target = 'test_python_cpp' if "clang" in os.popen('$CC --version 2> /dev/null').read(): @@ -174,16 +189,22 @@ if __name__ == '__main__': sys.argv.remove(warnings_as_errors) # C++ implementation extension - ext_module_list.append( + ext_module_list.extend([ Extension( "google.protobuf.pyext._message", glob.glob('google/protobuf/pyext/*.cc'), include_dirs=[".", "../src"], - libraries=['protobuf'], + libraries=libraries, + extra_objects=extra_objects, library_dirs=['../src/.libs'], extra_compile_args=extra_compile_args, - ) - ) + ), + Extension( + "google.protobuf.internal._api_implementation", + glob.glob('google/protobuf/internal/api_implementation.cc'), + extra_compile_args=['-DPYTHON_PROTO2_CPP_IMPL_V2'], + ), + ]) os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'cpp' # Keep this list of dependencies in sync with tox.ini. -- cgit v1.2.3