PyDoc_STRVAR(unicode_strip__doc__, "strip($self, chars=None, /)\n" "--\n" "\n" "Return a copy of the string with leading and trailing whitespace removed.\n" "\n" "If chars is given and not None, remove characters in chars instead.");
其对应在解释器中通过help函数查看的结果:
1 2 3 4 5 6 7 8 9
Python 3.7.7 (default, Jun 21 2020, 15:02:27) [MSC v.1916 64 bit (AMD64)] on win32 Type "help", "copyright", "credits" or "license" for more information. >>> help(str.strip) Help on method_descriptor:
strip(self, chars=None, /) Return a copy of the string with leading and trailing whitespace removed.
If chars is given and not None, remove characters in chars instead.
/*[clinic input] str.strip as unicode_strip chars: object = None / Return a copy of the string with leading and trailing whitespace removed. If chars is given and not None, remove characters in chars instead. [clinic start generated code]*/
/* externally visible for str.strip(unicode) */ PyObject * _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj) /* externally visible for str.strip(unicode) */ PyObject * _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj) { void *data; int kind; Py_ssize_t i, j, len; BLOOM_MASK sepmask; Py_ssize_t seplen;
if (PyUnicode_READY(self) == -1 || PyUnicode_READY(sepobj) == -1) returnNULL;
kind = PyUnicode_KIND(self); data = PyUnicode_DATA(self); len = PyUnicode_GET_LENGTH(self); seplen = PyUnicode_GET_LENGTH(sepobj); sepmask = make_bloom_mask(PyUnicode_KIND(sepobj), PyUnicode_DATA(sepobj), seplen);
i = 0; if (striptype != RIGHTSTRIP) { while (i < len) { Py_UCS4 ch = PyUnicode_READ(kind, data, i); if (!BLOOM(sepmask, ch)) break; if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0) break; i++; } }
j = len; if (striptype != LEFTSTRIP) { j--; while (j >= i) { Py_UCS4 ch = PyUnicode_READ(kind, data, j); if (!BLOOM(sepmask, ch)) break; if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0) break; j--; }
j++; }
return PyUnicode_Substring(self, i, j); }
其中的kind是表示unicode对象里面真正的字节的存储方式:
1 2 3 4 5 6 7 8 9 10
enumPyUnicode_Kind { /* String contains only wstr byte characters. This is only possible when the string was created with a legacy API and _PyUnicode_Ready() has not been called yet. */ PyUnicode_WCHAR_KIND = 0, /* Return values of the PyUnicode_KIND() macro: */ PyUnicode_1BYTE_KIND = 1, PyUnicode_2BYTE_KIND = 2, PyUnicode_4BYTE_KIND = 4 };
len是用来获取原始Unicode字符串的长度,seplen是作为参数的字符串长度:
1 2 3 4 5 6 7
/* Returns the length of the unicode string. The caller has to make sure that the string has it's canonical representation set before calling this macro. Call PyUnicode_(FAST_)Ready to ensure that. */ #define PyUnicode_GET_LENGTH(op) \ (assert(PyUnicode_Check(op)), \ assert(PyUnicode_IS_READY(op)), \ ((PyASCIIObject *)(op))->length)
p = s; e = s + n; if (n > MEMCHR_CUT_OFF) { #if STRINGLIB_SIZEOF_CHAR == 1 p = memchr(s, ch, n); if (p != NULL) return (p - s); return-1; #else /* use memchr if we can choose a needle without two many likely false positives */ const STRINGLIB_CHAR *s1, *e1; unsignedchar needle = ch & 0xff; /* If looking for a multiple of 256, we'd have too many false positives looking for the '\0' byte in UCS2 and UCS4 representations. */ if (needle != 0) { do { void *candidate = memchr(p, needle, (e - p) * sizeof(STRINGLIB_CHAR)); if (candidate == NULL) return-1; s1 = p; p = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); if (*p == ch) return (p - s); /* False positive */ p++; if (p - s1 > MEMCHR_CUT_OFF) continue; if (e - p <= MEMCHR_CUT_OFF) break; e1 = p + MEMCHR_CUT_OFF; while (p != e1) { if (*p == ch) return (p - s); p++; } } while (e - p > MEMCHR_CUT_OFF); } #endif } while (p < e) { if (*p == ch) return (p - s); p++; } return-1; }