mirror of
https://github.com/python/cpython.git
synced 2025-09-08 01:41:19 +00:00
SF patch #980695: efficient string concatenation
(Original patch by Armin Rigo).
This commit is contained in:
parent
d09d9664e6
commit
52a21b8e65
3 changed files with 107 additions and 3 deletions
|
@ -455,7 +455,7 @@ and \var{j} are integers:
|
||||||
\lineiii{\var{x} not in \var{s}}{\code{0} if an item of \var{s} is
|
\lineiii{\var{x} not in \var{s}}{\code{0} if an item of \var{s} is
|
||||||
equal to \var{x}, else \code{1}}{(1)}
|
equal to \var{x}, else \code{1}}{(1)}
|
||||||
\hline
|
\hline
|
||||||
\lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{}
|
\lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{(6)}
|
||||||
\lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(2)}
|
\lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(2)}
|
||||||
\hline
|
\hline
|
||||||
\lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(3)}
|
\lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(3)}
|
||||||
|
@ -536,6 +536,16 @@ In Python 2.3 and beyond, \var{x} may be a string of any length.
|
||||||
(which end depends on the sign of \var{k}). Note, \var{k} cannot
|
(which end depends on the sign of \var{k}). Note, \var{k} cannot
|
||||||
be zero.
|
be zero.
|
||||||
|
|
||||||
|
\item[(6)] If \var{s} and \var{t} are both strings, some Python
|
||||||
|
implementations such as CPython can usally perform an inplace optimization
|
||||||
|
for assignments of the form \code{\var{s}=\var{s}+\var{t}} or
|
||||||
|
\code{\var{s}+=\var{t}}. When applicable, this optimization makes
|
||||||
|
quadratic run-time much less likely. This optimization is both version
|
||||||
|
and implementation dependent. For performance sensitive code, it is
|
||||||
|
preferrable to use the \method{str.join()} method which assures consistent
|
||||||
|
linear concatenation performance across versions and implementations.
|
||||||
|
\versionchanged[Formerly, string concatenation never occurred inplace]{2.4}
|
||||||
|
|
||||||
\end{description}
|
\end{description}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,11 @@ What's New in Python 2.4 alpha 2?
|
||||||
Core and builtins
|
Core and builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Patch #980695: Implements efficient string concatenation for statements
|
||||||
|
of the form s=s+t and s+=t. This will vary across implementations.
|
||||||
|
Accordingly, the str.join() method is strongly preferred for performance
|
||||||
|
sensitive code.
|
||||||
|
|
||||||
- PEP-0318, Function Decorators have been added to the language. These are
|
- PEP-0318, Function Decorators have been added to the language. These are
|
||||||
implemented using the Java-style @decorator syntax, like so:
|
implemented using the Java-style @decorator syntax, like so:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -85,6 +85,8 @@ static int exec_statement(PyFrameObject *,
|
||||||
static void set_exc_info(PyThreadState *, PyObject *, PyObject *, PyObject *);
|
static void set_exc_info(PyThreadState *, PyObject *, PyObject *, PyObject *);
|
||||||
static void reset_exc_info(PyThreadState *);
|
static void reset_exc_info(PyThreadState *);
|
||||||
static void format_exc_check_arg(PyObject *, char *, PyObject *);
|
static void format_exc_check_arg(PyObject *, char *, PyObject *);
|
||||||
|
static PyObject *string_concatenate(PyObject *, PyObject *,
|
||||||
|
PyFrameObject *, unsigned char *);
|
||||||
|
|
||||||
#define NAME_ERROR_MSG \
|
#define NAME_ERROR_MSG \
|
||||||
"name '%.200s' is not defined"
|
"name '%.200s' is not defined"
|
||||||
|
@ -550,6 +552,7 @@ PyEval_EvalFrame(PyFrameObject *f)
|
||||||
#define INSTR_OFFSET() (next_instr - first_instr)
|
#define INSTR_OFFSET() (next_instr - first_instr)
|
||||||
#define NEXTOP() (*next_instr++)
|
#define NEXTOP() (*next_instr++)
|
||||||
#define NEXTARG() (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2])
|
#define NEXTARG() (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2])
|
||||||
|
#define PEEKARG() ((next_instr[2]<<8) + next_instr[1])
|
||||||
#define JUMPTO(x) (next_instr = first_instr + (x))
|
#define JUMPTO(x) (next_instr = first_instr + (x))
|
||||||
#define JUMPBY(x) (next_instr += (x))
|
#define JUMPBY(x) (next_instr += (x))
|
||||||
|
|
||||||
|
@ -580,8 +583,7 @@ PyEval_EvalFrame(PyFrameObject *f)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define PREDICTED(op) PRED_##op: next_instr++
|
#define PREDICTED(op) PRED_##op: next_instr++
|
||||||
#define PREDICTED_WITH_ARG(op) PRED_##op: oparg = (next_instr[2]<<8) + \
|
#define PREDICTED_WITH_ARG(op) PRED_##op: oparg = PEEKARG(); next_instr += 3
|
||||||
next_instr[1]; next_instr += 3
|
|
||||||
|
|
||||||
/* Stack manipulation macros */
|
/* Stack manipulation macros */
|
||||||
|
|
||||||
|
@ -1066,11 +1068,18 @@ PyEval_EvalFrame(PyFrameObject *f)
|
||||||
goto slow_add;
|
goto slow_add;
|
||||||
x = PyInt_FromLong(i);
|
x = PyInt_FromLong(i);
|
||||||
}
|
}
|
||||||
|
else if (PyString_CheckExact(v) &&
|
||||||
|
PyString_CheckExact(w)) {
|
||||||
|
x = string_concatenate(v, w, f, next_instr);
|
||||||
|
/* string_concatenate consumed the ref to v */
|
||||||
|
goto skip_decref_vx;
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
slow_add:
|
slow_add:
|
||||||
x = PyNumber_Add(v, w);
|
x = PyNumber_Add(v, w);
|
||||||
}
|
}
|
||||||
Py_DECREF(v);
|
Py_DECREF(v);
|
||||||
|
skip_decref_vx:
|
||||||
Py_DECREF(w);
|
Py_DECREF(w);
|
||||||
SET_TOP(x);
|
SET_TOP(x);
|
||||||
if (x != NULL) continue;
|
if (x != NULL) continue;
|
||||||
|
@ -1261,11 +1270,18 @@ PyEval_EvalFrame(PyFrameObject *f)
|
||||||
goto slow_iadd;
|
goto slow_iadd;
|
||||||
x = PyInt_FromLong(i);
|
x = PyInt_FromLong(i);
|
||||||
}
|
}
|
||||||
|
else if (PyString_CheckExact(v) &&
|
||||||
|
PyString_CheckExact(w)) {
|
||||||
|
x = string_concatenate(v, w, f, next_instr);
|
||||||
|
/* string_concatenate consumed the ref to v */
|
||||||
|
goto skip_decref_v;
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
slow_iadd:
|
slow_iadd:
|
||||||
x = PyNumber_InPlaceAdd(v, w);
|
x = PyNumber_InPlaceAdd(v, w);
|
||||||
}
|
}
|
||||||
Py_DECREF(v);
|
Py_DECREF(v);
|
||||||
|
skip_decref_v:
|
||||||
Py_DECREF(w);
|
Py_DECREF(w);
|
||||||
SET_TOP(x);
|
SET_TOP(x);
|
||||||
if (x != NULL) continue;
|
if (x != NULL) continue;
|
||||||
|
@ -4191,6 +4207,79 @@ format_exc_check_arg(PyObject *exc, char *format_str, PyObject *obj)
|
||||||
PyErr_Format(exc, format_str, obj_str);
|
PyErr_Format(exc, format_str, obj_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
string_concatenate(PyObject *v, PyObject *w,
|
||||||
|
PyFrameObject *f, unsigned char *next_instr)
|
||||||
|
{
|
||||||
|
/* This function implements 'variable += expr' when both arguments
|
||||||
|
are strings. */
|
||||||
|
|
||||||
|
if (v->ob_refcnt == 2) {
|
||||||
|
/* In the common case, there are 2 references to the value
|
||||||
|
* stored in 'variable' when the += is performed: one on the
|
||||||
|
* value stack (in 'v') and one still stored in the 'variable'.
|
||||||
|
* We try to delete the variable now to reduce the refcnt to 1.
|
||||||
|
*/
|
||||||
|
switch (*next_instr) {
|
||||||
|
case STORE_FAST:
|
||||||
|
{
|
||||||
|
int oparg = PEEKARG();
|
||||||
|
PyObject **fastlocals = f->f_localsplus;
|
||||||
|
if (GETLOCAL(oparg) == v)
|
||||||
|
SETLOCAL(oparg, NULL);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case STORE_DEREF:
|
||||||
|
{
|
||||||
|
PyObject **freevars = f->f_localsplus + f->f_nlocals;
|
||||||
|
PyObject *c = freevars[PEEKARG()];
|
||||||
|
if (PyCell_GET(c) == v)
|
||||||
|
PyCell_Set(c, NULL);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case STORE_NAME:
|
||||||
|
{
|
||||||
|
PyObject *names = f->f_code->co_names;
|
||||||
|
PyObject *name = GETITEM(names, PEEKARG());
|
||||||
|
PyObject *locals = f->f_locals;
|
||||||
|
if (PyDict_CheckExact(locals) &&
|
||||||
|
PyDict_GetItem(locals, name) == v) {
|
||||||
|
if (PyDict_DelItem(locals, name) != 0) {
|
||||||
|
PyErr_Clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (v->ob_refcnt == 1) {
|
||||||
|
/* Now we own the last reference to 'v', so we can resize it
|
||||||
|
* in-place.
|
||||||
|
*/
|
||||||
|
int v_len = PyString_GET_SIZE(v);
|
||||||
|
int w_len = PyString_GET_SIZE(w);
|
||||||
|
if (_PyString_Resize(&v, v_len + w_len) != 0) {
|
||||||
|
/* XXX if _PyString_Resize() fails, 'v' has been
|
||||||
|
* deallocated so it cannot be put back into 'variable'.
|
||||||
|
* The MemoryError is raised when there is no value in
|
||||||
|
* 'variable', which might (very remotely) be a cause
|
||||||
|
* of incompatibilities.
|
||||||
|
*/
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
/* copy 'w' into the newly allocated area of 'v' */
|
||||||
|
memcpy(PyString_AS_STRING(v) + v_len,
|
||||||
|
PyString_AS_STRING(w), w_len);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* When in-place resizing is not an option. */
|
||||||
|
PyString_Concat(&v, w);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef DYNAMIC_EXECUTION_PROFILE
|
#ifdef DYNAMIC_EXECUTION_PROFILE
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue