17db96d56Sopenharmony_ci/* csv module */ 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_ci/* 47db96d56Sopenharmony_ci 57db96d56Sopenharmony_ciThis module provides the low-level underpinnings of a CSV reading/writing 67db96d56Sopenharmony_cimodule. Users should not use this module directly, but import the csv.py 77db96d56Sopenharmony_cimodule instead. 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_ci*/ 107db96d56Sopenharmony_ci 117db96d56Sopenharmony_ci#define MODULE_VERSION "1.0" 127db96d56Sopenharmony_ci 137db96d56Sopenharmony_ci#include "Python.h" 147db96d56Sopenharmony_ci#include "structmember.h" // PyMemberDef 157db96d56Sopenharmony_ci#include <stdbool.h> 167db96d56Sopenharmony_ci 177db96d56Sopenharmony_ci/*[clinic input] 187db96d56Sopenharmony_cimodule _csv 197db96d56Sopenharmony_ci[clinic start generated code]*/ 207db96d56Sopenharmony_ci/*[clinic end generated code: output=da39a3ee5e6b4b0d input=385118b71aa43706]*/ 217db96d56Sopenharmony_ci 227db96d56Sopenharmony_ci#include "clinic/_csv.c.h" 237db96d56Sopenharmony_ci#define NOT_SET ((Py_UCS4)-1) 247db96d56Sopenharmony_ci#define EOL ((Py_UCS4)-2) 257db96d56Sopenharmony_ci 267db96d56Sopenharmony_ci 277db96d56Sopenharmony_citypedef struct { 287db96d56Sopenharmony_ci PyObject *error_obj; /* CSV exception */ 297db96d56Sopenharmony_ci PyObject *dialects; /* Dialect registry */ 307db96d56Sopenharmony_ci PyTypeObject *dialect_type; 317db96d56Sopenharmony_ci PyTypeObject *reader_type; 327db96d56Sopenharmony_ci PyTypeObject *writer_type; 337db96d56Sopenharmony_ci long field_limit; /* max parsed field size */ 347db96d56Sopenharmony_ci PyObject *str_write; 357db96d56Sopenharmony_ci} _csvstate; 367db96d56Sopenharmony_ci 377db96d56Sopenharmony_cistatic struct PyModuleDef _csvmodule; 387db96d56Sopenharmony_ci 397db96d56Sopenharmony_cistatic inline _csvstate* 407db96d56Sopenharmony_ciget_csv_state(PyObject *module) 417db96d56Sopenharmony_ci{ 427db96d56Sopenharmony_ci void *state = PyModule_GetState(module); 437db96d56Sopenharmony_ci assert(state != NULL); 447db96d56Sopenharmony_ci return (_csvstate *)state; 457db96d56Sopenharmony_ci} 467db96d56Sopenharmony_ci 477db96d56Sopenharmony_cistatic int 487db96d56Sopenharmony_ci_csv_clear(PyObject *module) 497db96d56Sopenharmony_ci{ 507db96d56Sopenharmony_ci _csvstate *module_state = PyModule_GetState(module); 517db96d56Sopenharmony_ci Py_CLEAR(module_state->error_obj); 527db96d56Sopenharmony_ci Py_CLEAR(module_state->dialects); 537db96d56Sopenharmony_ci Py_CLEAR(module_state->dialect_type); 547db96d56Sopenharmony_ci Py_CLEAR(module_state->reader_type); 557db96d56Sopenharmony_ci Py_CLEAR(module_state->writer_type); 567db96d56Sopenharmony_ci Py_CLEAR(module_state->str_write); 577db96d56Sopenharmony_ci return 0; 587db96d56Sopenharmony_ci} 597db96d56Sopenharmony_ci 607db96d56Sopenharmony_cistatic int 617db96d56Sopenharmony_ci_csv_traverse(PyObject *module, visitproc visit, void *arg) 627db96d56Sopenharmony_ci{ 637db96d56Sopenharmony_ci _csvstate *module_state = PyModule_GetState(module); 647db96d56Sopenharmony_ci Py_VISIT(module_state->error_obj); 657db96d56Sopenharmony_ci Py_VISIT(module_state->dialects); 667db96d56Sopenharmony_ci Py_VISIT(module_state->dialect_type); 677db96d56Sopenharmony_ci Py_VISIT(module_state->reader_type); 687db96d56Sopenharmony_ci Py_VISIT(module_state->writer_type); 697db96d56Sopenharmony_ci return 0; 707db96d56Sopenharmony_ci} 717db96d56Sopenharmony_ci 727db96d56Sopenharmony_cistatic void 737db96d56Sopenharmony_ci_csv_free(void *module) 747db96d56Sopenharmony_ci{ 757db96d56Sopenharmony_ci _csv_clear((PyObject *)module); 767db96d56Sopenharmony_ci} 777db96d56Sopenharmony_ci 787db96d56Sopenharmony_citypedef enum { 797db96d56Sopenharmony_ci START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, 807db96d56Sopenharmony_ci IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD, 817db96d56Sopenharmony_ci EAT_CRNL,AFTER_ESCAPED_CRNL 827db96d56Sopenharmony_ci} ParserState; 837db96d56Sopenharmony_ci 847db96d56Sopenharmony_citypedef enum { 857db96d56Sopenharmony_ci QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE 867db96d56Sopenharmony_ci} QuoteStyle; 877db96d56Sopenharmony_ci 887db96d56Sopenharmony_citypedef struct { 897db96d56Sopenharmony_ci QuoteStyle style; 907db96d56Sopenharmony_ci const char *name; 917db96d56Sopenharmony_ci} StyleDesc; 927db96d56Sopenharmony_ci 937db96d56Sopenharmony_cistatic const StyleDesc quote_styles[] = { 947db96d56Sopenharmony_ci { QUOTE_MINIMAL, "QUOTE_MINIMAL" }, 957db96d56Sopenharmony_ci { QUOTE_ALL, "QUOTE_ALL" }, 967db96d56Sopenharmony_ci { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" }, 977db96d56Sopenharmony_ci { QUOTE_NONE, "QUOTE_NONE" }, 987db96d56Sopenharmony_ci { 0 } 997db96d56Sopenharmony_ci}; 1007db96d56Sopenharmony_ci 1017db96d56Sopenharmony_citypedef struct { 1027db96d56Sopenharmony_ci PyObject_HEAD 1037db96d56Sopenharmony_ci 1047db96d56Sopenharmony_ci char doublequote; /* is " represented by ""? */ 1057db96d56Sopenharmony_ci char skipinitialspace; /* ignore spaces following delimiter? */ 1067db96d56Sopenharmony_ci char strict; /* raise exception on bad CSV */ 1077db96d56Sopenharmony_ci int quoting; /* style of quoting to write */ 1087db96d56Sopenharmony_ci Py_UCS4 delimiter; /* field separator */ 1097db96d56Sopenharmony_ci Py_UCS4 quotechar; /* quote character */ 1107db96d56Sopenharmony_ci Py_UCS4 escapechar; /* escape character */ 1117db96d56Sopenharmony_ci PyObject *lineterminator; /* string to write between records */ 1127db96d56Sopenharmony_ci 1137db96d56Sopenharmony_ci} DialectObj; 1147db96d56Sopenharmony_ci 1157db96d56Sopenharmony_citypedef struct { 1167db96d56Sopenharmony_ci PyObject_HEAD 1177db96d56Sopenharmony_ci 1187db96d56Sopenharmony_ci PyObject *input_iter; /* iterate over this for input lines */ 1197db96d56Sopenharmony_ci 1207db96d56Sopenharmony_ci DialectObj *dialect; /* parsing dialect */ 1217db96d56Sopenharmony_ci 1227db96d56Sopenharmony_ci PyObject *fields; /* field list for current record */ 1237db96d56Sopenharmony_ci ParserState state; /* current CSV parse state */ 1247db96d56Sopenharmony_ci Py_UCS4 *field; /* temporary buffer */ 1257db96d56Sopenharmony_ci Py_ssize_t field_size; /* size of allocated buffer */ 1267db96d56Sopenharmony_ci Py_ssize_t field_len; /* length of current field */ 1277db96d56Sopenharmony_ci int numeric_field; /* treat field as numeric */ 1287db96d56Sopenharmony_ci unsigned long line_num; /* Source-file line number */ 1297db96d56Sopenharmony_ci} ReaderObj; 1307db96d56Sopenharmony_ci 1317db96d56Sopenharmony_citypedef struct { 1327db96d56Sopenharmony_ci PyObject_HEAD 1337db96d56Sopenharmony_ci 1347db96d56Sopenharmony_ci PyObject *write; /* write output lines to this file */ 1357db96d56Sopenharmony_ci 1367db96d56Sopenharmony_ci DialectObj *dialect; /* parsing dialect */ 1377db96d56Sopenharmony_ci 1387db96d56Sopenharmony_ci Py_UCS4 *rec; /* buffer for parser.join */ 1397db96d56Sopenharmony_ci Py_ssize_t rec_size; /* size of allocated record */ 1407db96d56Sopenharmony_ci Py_ssize_t rec_len; /* length of record */ 1417db96d56Sopenharmony_ci int num_fields; /* number of fields in record */ 1427db96d56Sopenharmony_ci 1437db96d56Sopenharmony_ci PyObject *error_obj; /* cached error object */ 1447db96d56Sopenharmony_ci} WriterObj; 1457db96d56Sopenharmony_ci 1467db96d56Sopenharmony_ci/* 1477db96d56Sopenharmony_ci * DIALECT class 1487db96d56Sopenharmony_ci */ 1497db96d56Sopenharmony_ci 1507db96d56Sopenharmony_cistatic PyObject * 1517db96d56Sopenharmony_ciget_dialect_from_registry(PyObject *name_obj, _csvstate *module_state) 1527db96d56Sopenharmony_ci{ 1537db96d56Sopenharmony_ci PyObject *dialect_obj; 1547db96d56Sopenharmony_ci 1557db96d56Sopenharmony_ci dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj); 1567db96d56Sopenharmony_ci if (dialect_obj == NULL) { 1577db96d56Sopenharmony_ci if (!PyErr_Occurred()) 1587db96d56Sopenharmony_ci PyErr_Format(module_state->error_obj, "unknown dialect"); 1597db96d56Sopenharmony_ci } 1607db96d56Sopenharmony_ci else 1617db96d56Sopenharmony_ci Py_INCREF(dialect_obj); 1627db96d56Sopenharmony_ci 1637db96d56Sopenharmony_ci return dialect_obj; 1647db96d56Sopenharmony_ci} 1657db96d56Sopenharmony_ci 1667db96d56Sopenharmony_cistatic PyObject * 1677db96d56Sopenharmony_ciget_char_or_None(Py_UCS4 c) 1687db96d56Sopenharmony_ci{ 1697db96d56Sopenharmony_ci if (c == NOT_SET) { 1707db96d56Sopenharmony_ci Py_RETURN_NONE; 1717db96d56Sopenharmony_ci } 1727db96d56Sopenharmony_ci else 1737db96d56Sopenharmony_ci return PyUnicode_FromOrdinal(c); 1747db96d56Sopenharmony_ci} 1757db96d56Sopenharmony_ci 1767db96d56Sopenharmony_cistatic PyObject * 1777db96d56Sopenharmony_ciDialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored)) 1787db96d56Sopenharmony_ci{ 1797db96d56Sopenharmony_ci Py_XINCREF(self->lineterminator); 1807db96d56Sopenharmony_ci return self->lineterminator; 1817db96d56Sopenharmony_ci} 1827db96d56Sopenharmony_ci 1837db96d56Sopenharmony_cistatic PyObject * 1847db96d56Sopenharmony_ciDialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored)) 1857db96d56Sopenharmony_ci{ 1867db96d56Sopenharmony_ci return get_char_or_None(self->delimiter); 1877db96d56Sopenharmony_ci} 1887db96d56Sopenharmony_ci 1897db96d56Sopenharmony_cistatic PyObject * 1907db96d56Sopenharmony_ciDialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored)) 1917db96d56Sopenharmony_ci{ 1927db96d56Sopenharmony_ci return get_char_or_None(self->escapechar); 1937db96d56Sopenharmony_ci} 1947db96d56Sopenharmony_ci 1957db96d56Sopenharmony_cistatic PyObject * 1967db96d56Sopenharmony_ciDialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored)) 1977db96d56Sopenharmony_ci{ 1987db96d56Sopenharmony_ci return get_char_or_None(self->quotechar); 1997db96d56Sopenharmony_ci} 2007db96d56Sopenharmony_ci 2017db96d56Sopenharmony_cistatic PyObject * 2027db96d56Sopenharmony_ciDialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored)) 2037db96d56Sopenharmony_ci{ 2047db96d56Sopenharmony_ci return PyLong_FromLong(self->quoting); 2057db96d56Sopenharmony_ci} 2067db96d56Sopenharmony_ci 2077db96d56Sopenharmony_cistatic int 2087db96d56Sopenharmony_ci_set_bool(const char *name, char *target, PyObject *src, bool dflt) 2097db96d56Sopenharmony_ci{ 2107db96d56Sopenharmony_ci if (src == NULL) 2117db96d56Sopenharmony_ci *target = dflt; 2127db96d56Sopenharmony_ci else { 2137db96d56Sopenharmony_ci int b = PyObject_IsTrue(src); 2147db96d56Sopenharmony_ci if (b < 0) 2157db96d56Sopenharmony_ci return -1; 2167db96d56Sopenharmony_ci *target = (char)b; 2177db96d56Sopenharmony_ci } 2187db96d56Sopenharmony_ci return 0; 2197db96d56Sopenharmony_ci} 2207db96d56Sopenharmony_ci 2217db96d56Sopenharmony_cistatic int 2227db96d56Sopenharmony_ci_set_int(const char *name, int *target, PyObject *src, int dflt) 2237db96d56Sopenharmony_ci{ 2247db96d56Sopenharmony_ci if (src == NULL) 2257db96d56Sopenharmony_ci *target = dflt; 2267db96d56Sopenharmony_ci else { 2277db96d56Sopenharmony_ci int value; 2287db96d56Sopenharmony_ci if (!PyLong_CheckExact(src)) { 2297db96d56Sopenharmony_ci PyErr_Format(PyExc_TypeError, 2307db96d56Sopenharmony_ci "\"%s\" must be an integer", name); 2317db96d56Sopenharmony_ci return -1; 2327db96d56Sopenharmony_ci } 2337db96d56Sopenharmony_ci value = _PyLong_AsInt(src); 2347db96d56Sopenharmony_ci if (value == -1 && PyErr_Occurred()) { 2357db96d56Sopenharmony_ci return -1; 2367db96d56Sopenharmony_ci } 2377db96d56Sopenharmony_ci *target = value; 2387db96d56Sopenharmony_ci } 2397db96d56Sopenharmony_ci return 0; 2407db96d56Sopenharmony_ci} 2417db96d56Sopenharmony_ci 2427db96d56Sopenharmony_cistatic int 2437db96d56Sopenharmony_ci_set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) 2447db96d56Sopenharmony_ci{ 2457db96d56Sopenharmony_ci if (src == NULL) { 2467db96d56Sopenharmony_ci *target = dflt; 2477db96d56Sopenharmony_ci } 2487db96d56Sopenharmony_ci else { 2497db96d56Sopenharmony_ci *target = NOT_SET; 2507db96d56Sopenharmony_ci if (src != Py_None) { 2517db96d56Sopenharmony_ci if (!PyUnicode_Check(src)) { 2527db96d56Sopenharmony_ci PyErr_Format(PyExc_TypeError, 2537db96d56Sopenharmony_ci "\"%s\" must be string or None, not %.200s", name, 2547db96d56Sopenharmony_ci Py_TYPE(src)->tp_name); 2557db96d56Sopenharmony_ci return -1; 2567db96d56Sopenharmony_ci } 2577db96d56Sopenharmony_ci Py_ssize_t len = PyUnicode_GetLength(src); 2587db96d56Sopenharmony_ci if (len < 0) { 2597db96d56Sopenharmony_ci return -1; 2607db96d56Sopenharmony_ci } 2617db96d56Sopenharmony_ci if (len != 1) { 2627db96d56Sopenharmony_ci PyErr_Format(PyExc_TypeError, 2637db96d56Sopenharmony_ci "\"%s\" must be a 1-character string", 2647db96d56Sopenharmony_ci name); 2657db96d56Sopenharmony_ci return -1; 2667db96d56Sopenharmony_ci } 2677db96d56Sopenharmony_ci /* PyUnicode_READY() is called in PyUnicode_GetLength() */ 2687db96d56Sopenharmony_ci *target = PyUnicode_READ_CHAR(src, 0); 2697db96d56Sopenharmony_ci } 2707db96d56Sopenharmony_ci } 2717db96d56Sopenharmony_ci return 0; 2727db96d56Sopenharmony_ci} 2737db96d56Sopenharmony_ci 2747db96d56Sopenharmony_cistatic int 2757db96d56Sopenharmony_ci_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) 2767db96d56Sopenharmony_ci{ 2777db96d56Sopenharmony_ci if (src == NULL) { 2787db96d56Sopenharmony_ci *target = dflt; 2797db96d56Sopenharmony_ci } 2807db96d56Sopenharmony_ci else { 2817db96d56Sopenharmony_ci if (!PyUnicode_Check(src)) { 2827db96d56Sopenharmony_ci PyErr_Format(PyExc_TypeError, 2837db96d56Sopenharmony_ci "\"%s\" must be string, not %.200s", name, 2847db96d56Sopenharmony_ci Py_TYPE(src)->tp_name); 2857db96d56Sopenharmony_ci return -1; 2867db96d56Sopenharmony_ci } 2877db96d56Sopenharmony_ci Py_ssize_t len = PyUnicode_GetLength(src); 2887db96d56Sopenharmony_ci if (len < 0) { 2897db96d56Sopenharmony_ci return -1; 2907db96d56Sopenharmony_ci } 2917db96d56Sopenharmony_ci if (len != 1) { 2927db96d56Sopenharmony_ci PyErr_Format(PyExc_TypeError, 2937db96d56Sopenharmony_ci "\"%s\" must be a 1-character string", 2947db96d56Sopenharmony_ci name); 2957db96d56Sopenharmony_ci return -1; 2967db96d56Sopenharmony_ci } 2977db96d56Sopenharmony_ci /* PyUnicode_READY() is called in PyUnicode_GetLength() */ 2987db96d56Sopenharmony_ci *target = PyUnicode_READ_CHAR(src, 0); 2997db96d56Sopenharmony_ci } 3007db96d56Sopenharmony_ci return 0; 3017db96d56Sopenharmony_ci} 3027db96d56Sopenharmony_ci 3037db96d56Sopenharmony_cistatic int 3047db96d56Sopenharmony_ci_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) 3057db96d56Sopenharmony_ci{ 3067db96d56Sopenharmony_ci if (src == NULL) 3077db96d56Sopenharmony_ci *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL); 3087db96d56Sopenharmony_ci else { 3097db96d56Sopenharmony_ci if (src == Py_None) 3107db96d56Sopenharmony_ci *target = NULL; 3117db96d56Sopenharmony_ci else if (!PyUnicode_Check(src)) { 3127db96d56Sopenharmony_ci PyErr_Format(PyExc_TypeError, 3137db96d56Sopenharmony_ci "\"%s\" must be a string", name); 3147db96d56Sopenharmony_ci return -1; 3157db96d56Sopenharmony_ci } 3167db96d56Sopenharmony_ci else { 3177db96d56Sopenharmony_ci if (PyUnicode_READY(src) == -1) 3187db96d56Sopenharmony_ci return -1; 3197db96d56Sopenharmony_ci Py_INCREF(src); 3207db96d56Sopenharmony_ci Py_XSETREF(*target, src); 3217db96d56Sopenharmony_ci } 3227db96d56Sopenharmony_ci } 3237db96d56Sopenharmony_ci return 0; 3247db96d56Sopenharmony_ci} 3257db96d56Sopenharmony_ci 3267db96d56Sopenharmony_cistatic int 3277db96d56Sopenharmony_cidialect_check_quoting(int quoting) 3287db96d56Sopenharmony_ci{ 3297db96d56Sopenharmony_ci const StyleDesc *qs; 3307db96d56Sopenharmony_ci 3317db96d56Sopenharmony_ci for (qs = quote_styles; qs->name; qs++) { 3327db96d56Sopenharmony_ci if ((int)qs->style == quoting) 3337db96d56Sopenharmony_ci return 0; 3347db96d56Sopenharmony_ci } 3357db96d56Sopenharmony_ci PyErr_Format(PyExc_TypeError, "bad \"quoting\" value"); 3367db96d56Sopenharmony_ci return -1; 3377db96d56Sopenharmony_ci} 3387db96d56Sopenharmony_ci 3397db96d56Sopenharmony_ci#define D_OFF(x) offsetof(DialectObj, x) 3407db96d56Sopenharmony_ci 3417db96d56Sopenharmony_cistatic struct PyMemberDef Dialect_memberlist[] = { 3427db96d56Sopenharmony_ci { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY }, 3437db96d56Sopenharmony_ci { "doublequote", T_BOOL, D_OFF(doublequote), READONLY }, 3447db96d56Sopenharmony_ci { "strict", T_BOOL, D_OFF(strict), READONLY }, 3457db96d56Sopenharmony_ci { NULL } 3467db96d56Sopenharmony_ci}; 3477db96d56Sopenharmony_ci 3487db96d56Sopenharmony_cistatic PyGetSetDef Dialect_getsetlist[] = { 3497db96d56Sopenharmony_ci { "delimiter", (getter)Dialect_get_delimiter}, 3507db96d56Sopenharmony_ci { "escapechar", (getter)Dialect_get_escapechar}, 3517db96d56Sopenharmony_ci { "lineterminator", (getter)Dialect_get_lineterminator}, 3527db96d56Sopenharmony_ci { "quotechar", (getter)Dialect_get_quotechar}, 3537db96d56Sopenharmony_ci { "quoting", (getter)Dialect_get_quoting}, 3547db96d56Sopenharmony_ci {NULL}, 3557db96d56Sopenharmony_ci}; 3567db96d56Sopenharmony_ci 3577db96d56Sopenharmony_cistatic void 3587db96d56Sopenharmony_ciDialect_dealloc(DialectObj *self) 3597db96d56Sopenharmony_ci{ 3607db96d56Sopenharmony_ci PyTypeObject *tp = Py_TYPE(self); 3617db96d56Sopenharmony_ci PyObject_GC_UnTrack(self); 3627db96d56Sopenharmony_ci tp->tp_clear((PyObject *)self); 3637db96d56Sopenharmony_ci PyObject_GC_Del(self); 3647db96d56Sopenharmony_ci Py_DECREF(tp); 3657db96d56Sopenharmony_ci} 3667db96d56Sopenharmony_ci 3677db96d56Sopenharmony_cistatic char *dialect_kws[] = { 3687db96d56Sopenharmony_ci "dialect", 3697db96d56Sopenharmony_ci "delimiter", 3707db96d56Sopenharmony_ci "doublequote", 3717db96d56Sopenharmony_ci "escapechar", 3727db96d56Sopenharmony_ci "lineterminator", 3737db96d56Sopenharmony_ci "quotechar", 3747db96d56Sopenharmony_ci "quoting", 3757db96d56Sopenharmony_ci "skipinitialspace", 3767db96d56Sopenharmony_ci "strict", 3777db96d56Sopenharmony_ci NULL 3787db96d56Sopenharmony_ci}; 3797db96d56Sopenharmony_ci 3807db96d56Sopenharmony_cistatic _csvstate * 3817db96d56Sopenharmony_ci_csv_state_from_type(PyTypeObject *type, const char *name) 3827db96d56Sopenharmony_ci{ 3837db96d56Sopenharmony_ci PyObject *module = PyType_GetModuleByDef(type, &_csvmodule); 3847db96d56Sopenharmony_ci if (module == NULL) { 3857db96d56Sopenharmony_ci return NULL; 3867db96d56Sopenharmony_ci } 3877db96d56Sopenharmony_ci _csvstate *module_state = PyModule_GetState(module); 3887db96d56Sopenharmony_ci if (module_state == NULL) { 3897db96d56Sopenharmony_ci PyErr_Format(PyExc_SystemError, 3907db96d56Sopenharmony_ci "%s: No _csv module state found", name); 3917db96d56Sopenharmony_ci return NULL; 3927db96d56Sopenharmony_ci } 3937db96d56Sopenharmony_ci return module_state; 3947db96d56Sopenharmony_ci} 3957db96d56Sopenharmony_ci 3967db96d56Sopenharmony_cistatic PyObject * 3977db96d56Sopenharmony_cidialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) 3987db96d56Sopenharmony_ci{ 3997db96d56Sopenharmony_ci DialectObj *self; 4007db96d56Sopenharmony_ci PyObject *ret = NULL; 4017db96d56Sopenharmony_ci PyObject *dialect = NULL; 4027db96d56Sopenharmony_ci PyObject *delimiter = NULL; 4037db96d56Sopenharmony_ci PyObject *doublequote = NULL; 4047db96d56Sopenharmony_ci PyObject *escapechar = NULL; 4057db96d56Sopenharmony_ci PyObject *lineterminator = NULL; 4067db96d56Sopenharmony_ci PyObject *quotechar = NULL; 4077db96d56Sopenharmony_ci PyObject *quoting = NULL; 4087db96d56Sopenharmony_ci PyObject *skipinitialspace = NULL; 4097db96d56Sopenharmony_ci PyObject *strict = NULL; 4107db96d56Sopenharmony_ci 4117db96d56Sopenharmony_ci if (!PyArg_ParseTupleAndKeywords(args, kwargs, 4127db96d56Sopenharmony_ci "|OOOOOOOOO", dialect_kws, 4137db96d56Sopenharmony_ci &dialect, 4147db96d56Sopenharmony_ci &delimiter, 4157db96d56Sopenharmony_ci &doublequote, 4167db96d56Sopenharmony_ci &escapechar, 4177db96d56Sopenharmony_ci &lineterminator, 4187db96d56Sopenharmony_ci "echar, 4197db96d56Sopenharmony_ci "ing, 4207db96d56Sopenharmony_ci &skipinitialspace, 4217db96d56Sopenharmony_ci &strict)) 4227db96d56Sopenharmony_ci return NULL; 4237db96d56Sopenharmony_ci 4247db96d56Sopenharmony_ci _csvstate *module_state = _csv_state_from_type(type, "dialect_new"); 4257db96d56Sopenharmony_ci if (module_state == NULL) { 4267db96d56Sopenharmony_ci return NULL; 4277db96d56Sopenharmony_ci } 4287db96d56Sopenharmony_ci 4297db96d56Sopenharmony_ci if (dialect != NULL) { 4307db96d56Sopenharmony_ci if (PyUnicode_Check(dialect)) { 4317db96d56Sopenharmony_ci dialect = get_dialect_from_registry(dialect, module_state); 4327db96d56Sopenharmony_ci if (dialect == NULL) 4337db96d56Sopenharmony_ci return NULL; 4347db96d56Sopenharmony_ci } 4357db96d56Sopenharmony_ci else 4367db96d56Sopenharmony_ci Py_INCREF(dialect); 4377db96d56Sopenharmony_ci /* Can we reuse this instance? */ 4387db96d56Sopenharmony_ci if (PyObject_TypeCheck(dialect, module_state->dialect_type) && 4397db96d56Sopenharmony_ci delimiter == NULL && 4407db96d56Sopenharmony_ci doublequote == NULL && 4417db96d56Sopenharmony_ci escapechar == NULL && 4427db96d56Sopenharmony_ci lineterminator == NULL && 4437db96d56Sopenharmony_ci quotechar == NULL && 4447db96d56Sopenharmony_ci quoting == NULL && 4457db96d56Sopenharmony_ci skipinitialspace == NULL && 4467db96d56Sopenharmony_ci strict == NULL) 4477db96d56Sopenharmony_ci return dialect; 4487db96d56Sopenharmony_ci } 4497db96d56Sopenharmony_ci 4507db96d56Sopenharmony_ci self = (DialectObj *)type->tp_alloc(type, 0); 4517db96d56Sopenharmony_ci if (self == NULL) { 4527db96d56Sopenharmony_ci Py_CLEAR(dialect); 4537db96d56Sopenharmony_ci return NULL; 4547db96d56Sopenharmony_ci } 4557db96d56Sopenharmony_ci self->lineterminator = NULL; 4567db96d56Sopenharmony_ci 4577db96d56Sopenharmony_ci Py_XINCREF(delimiter); 4587db96d56Sopenharmony_ci Py_XINCREF(doublequote); 4597db96d56Sopenharmony_ci Py_XINCREF(escapechar); 4607db96d56Sopenharmony_ci Py_XINCREF(lineterminator); 4617db96d56Sopenharmony_ci Py_XINCREF(quotechar); 4627db96d56Sopenharmony_ci Py_XINCREF(quoting); 4637db96d56Sopenharmony_ci Py_XINCREF(skipinitialspace); 4647db96d56Sopenharmony_ci Py_XINCREF(strict); 4657db96d56Sopenharmony_ci if (dialect != NULL) { 4667db96d56Sopenharmony_ci#define DIALECT_GETATTR(v, n) \ 4677db96d56Sopenharmony_ci do { \ 4687db96d56Sopenharmony_ci if (v == NULL) { \ 4697db96d56Sopenharmony_ci v = PyObject_GetAttrString(dialect, n); \ 4707db96d56Sopenharmony_ci if (v == NULL) \ 4717db96d56Sopenharmony_ci PyErr_Clear(); \ 4727db96d56Sopenharmony_ci } \ 4737db96d56Sopenharmony_ci } while (0) 4747db96d56Sopenharmony_ci DIALECT_GETATTR(delimiter, "delimiter"); 4757db96d56Sopenharmony_ci DIALECT_GETATTR(doublequote, "doublequote"); 4767db96d56Sopenharmony_ci DIALECT_GETATTR(escapechar, "escapechar"); 4777db96d56Sopenharmony_ci DIALECT_GETATTR(lineterminator, "lineterminator"); 4787db96d56Sopenharmony_ci DIALECT_GETATTR(quotechar, "quotechar"); 4797db96d56Sopenharmony_ci DIALECT_GETATTR(quoting, "quoting"); 4807db96d56Sopenharmony_ci DIALECT_GETATTR(skipinitialspace, "skipinitialspace"); 4817db96d56Sopenharmony_ci DIALECT_GETATTR(strict, "strict"); 4827db96d56Sopenharmony_ci } 4837db96d56Sopenharmony_ci 4847db96d56Sopenharmony_ci /* check types and convert to C values */ 4857db96d56Sopenharmony_ci#define DIASET(meth, name, target, src, dflt) \ 4867db96d56Sopenharmony_ci if (meth(name, target, src, dflt)) \ 4877db96d56Sopenharmony_ci goto err 4887db96d56Sopenharmony_ci DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ','); 4897db96d56Sopenharmony_ci DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true); 4907db96d56Sopenharmony_ci DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, NOT_SET); 4917db96d56Sopenharmony_ci DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n"); 4927db96d56Sopenharmony_ci DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"'); 4937db96d56Sopenharmony_ci DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL); 4947db96d56Sopenharmony_ci DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false); 4957db96d56Sopenharmony_ci DIASET(_set_bool, "strict", &self->strict, strict, false); 4967db96d56Sopenharmony_ci 4977db96d56Sopenharmony_ci /* validate options */ 4987db96d56Sopenharmony_ci if (dialect_check_quoting(self->quoting)) 4997db96d56Sopenharmony_ci goto err; 5007db96d56Sopenharmony_ci if (self->delimiter == NOT_SET) { 5017db96d56Sopenharmony_ci PyErr_SetString(PyExc_TypeError, 5027db96d56Sopenharmony_ci "\"delimiter\" must be a 1-character string"); 5037db96d56Sopenharmony_ci goto err; 5047db96d56Sopenharmony_ci } 5057db96d56Sopenharmony_ci if (quotechar == Py_None && quoting == NULL) 5067db96d56Sopenharmony_ci self->quoting = QUOTE_NONE; 5077db96d56Sopenharmony_ci if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) { 5087db96d56Sopenharmony_ci PyErr_SetString(PyExc_TypeError, 5097db96d56Sopenharmony_ci "quotechar must be set if quoting enabled"); 5107db96d56Sopenharmony_ci goto err; 5117db96d56Sopenharmony_ci } 5127db96d56Sopenharmony_ci if (self->lineterminator == NULL) { 5137db96d56Sopenharmony_ci PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); 5147db96d56Sopenharmony_ci goto err; 5157db96d56Sopenharmony_ci } 5167db96d56Sopenharmony_ci 5177db96d56Sopenharmony_ci ret = (PyObject *)self; 5187db96d56Sopenharmony_ci Py_INCREF(self); 5197db96d56Sopenharmony_cierr: 5207db96d56Sopenharmony_ci Py_CLEAR(self); 5217db96d56Sopenharmony_ci Py_CLEAR(dialect); 5227db96d56Sopenharmony_ci Py_CLEAR(delimiter); 5237db96d56Sopenharmony_ci Py_CLEAR(doublequote); 5247db96d56Sopenharmony_ci Py_CLEAR(escapechar); 5257db96d56Sopenharmony_ci Py_CLEAR(lineterminator); 5267db96d56Sopenharmony_ci Py_CLEAR(quotechar); 5277db96d56Sopenharmony_ci Py_CLEAR(quoting); 5287db96d56Sopenharmony_ci Py_CLEAR(skipinitialspace); 5297db96d56Sopenharmony_ci Py_CLEAR(strict); 5307db96d56Sopenharmony_ci return ret; 5317db96d56Sopenharmony_ci} 5327db96d56Sopenharmony_ci 5337db96d56Sopenharmony_ci/* Since dialect is now a heap type, it inherits pickling method for 5347db96d56Sopenharmony_ci * protocol 0 and 1 from object, therefore it needs to be overridden */ 5357db96d56Sopenharmony_ci 5367db96d56Sopenharmony_ciPyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling"); 5377db96d56Sopenharmony_ci 5387db96d56Sopenharmony_cistatic PyObject * 5397db96d56Sopenharmony_ciDialect_reduce(PyObject *self, PyObject *args) { 5407db96d56Sopenharmony_ci PyErr_Format(PyExc_TypeError, 5417db96d56Sopenharmony_ci "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self))); 5427db96d56Sopenharmony_ci return NULL; 5437db96d56Sopenharmony_ci} 5447db96d56Sopenharmony_ci 5457db96d56Sopenharmony_cistatic struct PyMethodDef dialect_methods[] = { 5467db96d56Sopenharmony_ci {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc}, 5477db96d56Sopenharmony_ci {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc}, 5487db96d56Sopenharmony_ci {NULL, NULL} 5497db96d56Sopenharmony_ci}; 5507db96d56Sopenharmony_ci 5517db96d56Sopenharmony_ciPyDoc_STRVAR(Dialect_Type_doc, 5527db96d56Sopenharmony_ci"CSV dialect\n" 5537db96d56Sopenharmony_ci"\n" 5547db96d56Sopenharmony_ci"The Dialect type records CSV parsing and generation options.\n"); 5557db96d56Sopenharmony_ci 5567db96d56Sopenharmony_cistatic int 5577db96d56Sopenharmony_ciDialect_clear(DialectObj *self) 5587db96d56Sopenharmony_ci{ 5597db96d56Sopenharmony_ci Py_CLEAR(self->lineterminator); 5607db96d56Sopenharmony_ci return 0; 5617db96d56Sopenharmony_ci} 5627db96d56Sopenharmony_ci 5637db96d56Sopenharmony_cistatic int 5647db96d56Sopenharmony_ciDialect_traverse(DialectObj *self, visitproc visit, void *arg) 5657db96d56Sopenharmony_ci{ 5667db96d56Sopenharmony_ci Py_VISIT(self->lineterminator); 5677db96d56Sopenharmony_ci Py_VISIT(Py_TYPE(self)); 5687db96d56Sopenharmony_ci return 0; 5697db96d56Sopenharmony_ci} 5707db96d56Sopenharmony_ci 5717db96d56Sopenharmony_cistatic PyType_Slot Dialect_Type_slots[] = { 5727db96d56Sopenharmony_ci {Py_tp_doc, (char*)Dialect_Type_doc}, 5737db96d56Sopenharmony_ci {Py_tp_members, Dialect_memberlist}, 5747db96d56Sopenharmony_ci {Py_tp_getset, Dialect_getsetlist}, 5757db96d56Sopenharmony_ci {Py_tp_new, dialect_new}, 5767db96d56Sopenharmony_ci {Py_tp_methods, dialect_methods}, 5777db96d56Sopenharmony_ci {Py_tp_dealloc, Dialect_dealloc}, 5787db96d56Sopenharmony_ci {Py_tp_clear, Dialect_clear}, 5797db96d56Sopenharmony_ci {Py_tp_traverse, Dialect_traverse}, 5807db96d56Sopenharmony_ci {0, NULL} 5817db96d56Sopenharmony_ci}; 5827db96d56Sopenharmony_ci 5837db96d56Sopenharmony_ciPyType_Spec Dialect_Type_spec = { 5847db96d56Sopenharmony_ci .name = "_csv.Dialect", 5857db96d56Sopenharmony_ci .basicsize = sizeof(DialectObj), 5867db96d56Sopenharmony_ci .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | 5877db96d56Sopenharmony_ci Py_TPFLAGS_IMMUTABLETYPE), 5887db96d56Sopenharmony_ci .slots = Dialect_Type_slots, 5897db96d56Sopenharmony_ci}; 5907db96d56Sopenharmony_ci 5917db96d56Sopenharmony_ci 5927db96d56Sopenharmony_ci/* 5937db96d56Sopenharmony_ci * Return an instance of the dialect type, given a Python instance or kwarg 5947db96d56Sopenharmony_ci * description of the dialect 5957db96d56Sopenharmony_ci */ 5967db96d56Sopenharmony_cistatic PyObject * 5977db96d56Sopenharmony_ci_call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs) 5987db96d56Sopenharmony_ci{ 5997db96d56Sopenharmony_ci PyObject *type = (PyObject *)module_state->dialect_type; 6007db96d56Sopenharmony_ci if (dialect_inst) { 6017db96d56Sopenharmony_ci return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs); 6027db96d56Sopenharmony_ci } 6037db96d56Sopenharmony_ci else { 6047db96d56Sopenharmony_ci return PyObject_VectorcallDict(type, NULL, 0, kwargs); 6057db96d56Sopenharmony_ci } 6067db96d56Sopenharmony_ci} 6077db96d56Sopenharmony_ci 6087db96d56Sopenharmony_ci/* 6097db96d56Sopenharmony_ci * READER 6107db96d56Sopenharmony_ci */ 6117db96d56Sopenharmony_cistatic int 6127db96d56Sopenharmony_ciparse_save_field(ReaderObj *self) 6137db96d56Sopenharmony_ci{ 6147db96d56Sopenharmony_ci PyObject *field; 6157db96d56Sopenharmony_ci 6167db96d56Sopenharmony_ci field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, 6177db96d56Sopenharmony_ci (void *) self->field, self->field_len); 6187db96d56Sopenharmony_ci if (field == NULL) 6197db96d56Sopenharmony_ci return -1; 6207db96d56Sopenharmony_ci self->field_len = 0; 6217db96d56Sopenharmony_ci if (self->numeric_field) { 6227db96d56Sopenharmony_ci PyObject *tmp; 6237db96d56Sopenharmony_ci 6247db96d56Sopenharmony_ci self->numeric_field = 0; 6257db96d56Sopenharmony_ci tmp = PyNumber_Float(field); 6267db96d56Sopenharmony_ci Py_DECREF(field); 6277db96d56Sopenharmony_ci if (tmp == NULL) 6287db96d56Sopenharmony_ci return -1; 6297db96d56Sopenharmony_ci field = tmp; 6307db96d56Sopenharmony_ci } 6317db96d56Sopenharmony_ci if (PyList_Append(self->fields, field) < 0) { 6327db96d56Sopenharmony_ci Py_DECREF(field); 6337db96d56Sopenharmony_ci return -1; 6347db96d56Sopenharmony_ci } 6357db96d56Sopenharmony_ci Py_DECREF(field); 6367db96d56Sopenharmony_ci return 0; 6377db96d56Sopenharmony_ci} 6387db96d56Sopenharmony_ci 6397db96d56Sopenharmony_cistatic int 6407db96d56Sopenharmony_ciparse_grow_buff(ReaderObj *self) 6417db96d56Sopenharmony_ci{ 6427db96d56Sopenharmony_ci assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4)); 6437db96d56Sopenharmony_ci 6447db96d56Sopenharmony_ci Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096; 6457db96d56Sopenharmony_ci Py_UCS4 *field_new = self->field; 6467db96d56Sopenharmony_ci PyMem_Resize(field_new, Py_UCS4, field_size_new); 6477db96d56Sopenharmony_ci if (field_new == NULL) { 6487db96d56Sopenharmony_ci PyErr_NoMemory(); 6497db96d56Sopenharmony_ci return 0; 6507db96d56Sopenharmony_ci } 6517db96d56Sopenharmony_ci self->field = field_new; 6527db96d56Sopenharmony_ci self->field_size = field_size_new; 6537db96d56Sopenharmony_ci return 1; 6547db96d56Sopenharmony_ci} 6557db96d56Sopenharmony_ci 6567db96d56Sopenharmony_cistatic int 6577db96d56Sopenharmony_ciparse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c) 6587db96d56Sopenharmony_ci{ 6597db96d56Sopenharmony_ci if (self->field_len >= module_state->field_limit) { 6607db96d56Sopenharmony_ci PyErr_Format(module_state->error_obj, 6617db96d56Sopenharmony_ci "field larger than field limit (%ld)", 6627db96d56Sopenharmony_ci module_state->field_limit); 6637db96d56Sopenharmony_ci return -1; 6647db96d56Sopenharmony_ci } 6657db96d56Sopenharmony_ci if (self->field_len == self->field_size && !parse_grow_buff(self)) 6667db96d56Sopenharmony_ci return -1; 6677db96d56Sopenharmony_ci self->field[self->field_len++] = c; 6687db96d56Sopenharmony_ci return 0; 6697db96d56Sopenharmony_ci} 6707db96d56Sopenharmony_ci 6717db96d56Sopenharmony_cistatic int 6727db96d56Sopenharmony_ciparse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c) 6737db96d56Sopenharmony_ci{ 6747db96d56Sopenharmony_ci DialectObj *dialect = self->dialect; 6757db96d56Sopenharmony_ci 6767db96d56Sopenharmony_ci switch (self->state) { 6777db96d56Sopenharmony_ci case START_RECORD: 6787db96d56Sopenharmony_ci /* start of record */ 6797db96d56Sopenharmony_ci if (c == EOL) 6807db96d56Sopenharmony_ci /* empty line - return [] */ 6817db96d56Sopenharmony_ci break; 6827db96d56Sopenharmony_ci else if (c == '\n' || c == '\r') { 6837db96d56Sopenharmony_ci self->state = EAT_CRNL; 6847db96d56Sopenharmony_ci break; 6857db96d56Sopenharmony_ci } 6867db96d56Sopenharmony_ci /* normal character - handle as START_FIELD */ 6877db96d56Sopenharmony_ci self->state = START_FIELD; 6887db96d56Sopenharmony_ci /* fallthru */ 6897db96d56Sopenharmony_ci case START_FIELD: 6907db96d56Sopenharmony_ci /* expecting field */ 6917db96d56Sopenharmony_ci if (c == '\n' || c == '\r' || c == EOL) { 6927db96d56Sopenharmony_ci /* save empty field - return [fields] */ 6937db96d56Sopenharmony_ci if (parse_save_field(self) < 0) 6947db96d56Sopenharmony_ci return -1; 6957db96d56Sopenharmony_ci self->state = (c == EOL ? START_RECORD : EAT_CRNL); 6967db96d56Sopenharmony_ci } 6977db96d56Sopenharmony_ci else if (c == dialect->quotechar && 6987db96d56Sopenharmony_ci dialect->quoting != QUOTE_NONE) { 6997db96d56Sopenharmony_ci /* start quoted field */ 7007db96d56Sopenharmony_ci self->state = IN_QUOTED_FIELD; 7017db96d56Sopenharmony_ci } 7027db96d56Sopenharmony_ci else if (c == dialect->escapechar) { 7037db96d56Sopenharmony_ci /* possible escaped character */ 7047db96d56Sopenharmony_ci self->state = ESCAPED_CHAR; 7057db96d56Sopenharmony_ci } 7067db96d56Sopenharmony_ci else if (c == ' ' && dialect->skipinitialspace) 7077db96d56Sopenharmony_ci /* ignore spaces at start of field */ 7087db96d56Sopenharmony_ci ; 7097db96d56Sopenharmony_ci else if (c == dialect->delimiter) { 7107db96d56Sopenharmony_ci /* save empty field */ 7117db96d56Sopenharmony_ci if (parse_save_field(self) < 0) 7127db96d56Sopenharmony_ci return -1; 7137db96d56Sopenharmony_ci } 7147db96d56Sopenharmony_ci else { 7157db96d56Sopenharmony_ci /* begin new unquoted field */ 7167db96d56Sopenharmony_ci if (dialect->quoting == QUOTE_NONNUMERIC) 7177db96d56Sopenharmony_ci self->numeric_field = 1; 7187db96d56Sopenharmony_ci if (parse_add_char(self, module_state, c) < 0) 7197db96d56Sopenharmony_ci return -1; 7207db96d56Sopenharmony_ci self->state = IN_FIELD; 7217db96d56Sopenharmony_ci } 7227db96d56Sopenharmony_ci break; 7237db96d56Sopenharmony_ci 7247db96d56Sopenharmony_ci case ESCAPED_CHAR: 7257db96d56Sopenharmony_ci if (c == '\n' || c=='\r') { 7267db96d56Sopenharmony_ci if (parse_add_char(self, module_state, c) < 0) 7277db96d56Sopenharmony_ci return -1; 7287db96d56Sopenharmony_ci self->state = AFTER_ESCAPED_CRNL; 7297db96d56Sopenharmony_ci break; 7307db96d56Sopenharmony_ci } 7317db96d56Sopenharmony_ci if (c == EOL) 7327db96d56Sopenharmony_ci c = '\n'; 7337db96d56Sopenharmony_ci if (parse_add_char(self, module_state, c) < 0) 7347db96d56Sopenharmony_ci return -1; 7357db96d56Sopenharmony_ci self->state = IN_FIELD; 7367db96d56Sopenharmony_ci break; 7377db96d56Sopenharmony_ci 7387db96d56Sopenharmony_ci case AFTER_ESCAPED_CRNL: 7397db96d56Sopenharmony_ci if (c == EOL) 7407db96d56Sopenharmony_ci break; 7417db96d56Sopenharmony_ci /*fallthru*/ 7427db96d56Sopenharmony_ci 7437db96d56Sopenharmony_ci case IN_FIELD: 7447db96d56Sopenharmony_ci /* in unquoted field */ 7457db96d56Sopenharmony_ci if (c == '\n' || c == '\r' || c == EOL) { 7467db96d56Sopenharmony_ci /* end of line - return [fields] */ 7477db96d56Sopenharmony_ci if (parse_save_field(self) < 0) 7487db96d56Sopenharmony_ci return -1; 7497db96d56Sopenharmony_ci self->state = (c == EOL ? START_RECORD : EAT_CRNL); 7507db96d56Sopenharmony_ci } 7517db96d56Sopenharmony_ci else if (c == dialect->escapechar) { 7527db96d56Sopenharmony_ci /* possible escaped character */ 7537db96d56Sopenharmony_ci self->state = ESCAPED_CHAR; 7547db96d56Sopenharmony_ci } 7557db96d56Sopenharmony_ci else if (c == dialect->delimiter) { 7567db96d56Sopenharmony_ci /* save field - wait for new field */ 7577db96d56Sopenharmony_ci if (parse_save_field(self) < 0) 7587db96d56Sopenharmony_ci return -1; 7597db96d56Sopenharmony_ci self->state = START_FIELD; 7607db96d56Sopenharmony_ci } 7617db96d56Sopenharmony_ci else { 7627db96d56Sopenharmony_ci /* normal character - save in field */ 7637db96d56Sopenharmony_ci if (parse_add_char(self, module_state, c) < 0) 7647db96d56Sopenharmony_ci return -1; 7657db96d56Sopenharmony_ci } 7667db96d56Sopenharmony_ci break; 7677db96d56Sopenharmony_ci 7687db96d56Sopenharmony_ci case IN_QUOTED_FIELD: 7697db96d56Sopenharmony_ci /* in quoted field */ 7707db96d56Sopenharmony_ci if (c == EOL) 7717db96d56Sopenharmony_ci ; 7727db96d56Sopenharmony_ci else if (c == dialect->escapechar) { 7737db96d56Sopenharmony_ci /* Possible escape character */ 7747db96d56Sopenharmony_ci self->state = ESCAPE_IN_QUOTED_FIELD; 7757db96d56Sopenharmony_ci } 7767db96d56Sopenharmony_ci else if (c == dialect->quotechar && 7777db96d56Sopenharmony_ci dialect->quoting != QUOTE_NONE) { 7787db96d56Sopenharmony_ci if (dialect->doublequote) { 7797db96d56Sopenharmony_ci /* doublequote; " represented by "" */ 7807db96d56Sopenharmony_ci self->state = QUOTE_IN_QUOTED_FIELD; 7817db96d56Sopenharmony_ci } 7827db96d56Sopenharmony_ci else { 7837db96d56Sopenharmony_ci /* end of quote part of field */ 7847db96d56Sopenharmony_ci self->state = IN_FIELD; 7857db96d56Sopenharmony_ci } 7867db96d56Sopenharmony_ci } 7877db96d56Sopenharmony_ci else { 7887db96d56Sopenharmony_ci /* normal character - save in field */ 7897db96d56Sopenharmony_ci if (parse_add_char(self, module_state, c) < 0) 7907db96d56Sopenharmony_ci return -1; 7917db96d56Sopenharmony_ci } 7927db96d56Sopenharmony_ci break; 7937db96d56Sopenharmony_ci 7947db96d56Sopenharmony_ci case ESCAPE_IN_QUOTED_FIELD: 7957db96d56Sopenharmony_ci if (c == EOL) 7967db96d56Sopenharmony_ci c = '\n'; 7977db96d56Sopenharmony_ci if (parse_add_char(self, module_state, c) < 0) 7987db96d56Sopenharmony_ci return -1; 7997db96d56Sopenharmony_ci self->state = IN_QUOTED_FIELD; 8007db96d56Sopenharmony_ci break; 8017db96d56Sopenharmony_ci 8027db96d56Sopenharmony_ci case QUOTE_IN_QUOTED_FIELD: 8037db96d56Sopenharmony_ci /* doublequote - seen a quote in a quoted field */ 8047db96d56Sopenharmony_ci if (dialect->quoting != QUOTE_NONE && 8057db96d56Sopenharmony_ci c == dialect->quotechar) { 8067db96d56Sopenharmony_ci /* save "" as " */ 8077db96d56Sopenharmony_ci if (parse_add_char(self, module_state, c) < 0) 8087db96d56Sopenharmony_ci return -1; 8097db96d56Sopenharmony_ci self->state = IN_QUOTED_FIELD; 8107db96d56Sopenharmony_ci } 8117db96d56Sopenharmony_ci else if (c == dialect->delimiter) { 8127db96d56Sopenharmony_ci /* save field - wait for new field */ 8137db96d56Sopenharmony_ci if (parse_save_field(self) < 0) 8147db96d56Sopenharmony_ci return -1; 8157db96d56Sopenharmony_ci self->state = START_FIELD; 8167db96d56Sopenharmony_ci } 8177db96d56Sopenharmony_ci else if (c == '\n' || c == '\r' || c == EOL) { 8187db96d56Sopenharmony_ci /* end of line - return [fields] */ 8197db96d56Sopenharmony_ci if (parse_save_field(self) < 0) 8207db96d56Sopenharmony_ci return -1; 8217db96d56Sopenharmony_ci self->state = (c == EOL ? START_RECORD : EAT_CRNL); 8227db96d56Sopenharmony_ci } 8237db96d56Sopenharmony_ci else if (!dialect->strict) { 8247db96d56Sopenharmony_ci if (parse_add_char(self, module_state, c) < 0) 8257db96d56Sopenharmony_ci return -1; 8267db96d56Sopenharmony_ci self->state = IN_FIELD; 8277db96d56Sopenharmony_ci } 8287db96d56Sopenharmony_ci else { 8297db96d56Sopenharmony_ci /* illegal */ 8307db96d56Sopenharmony_ci PyErr_Format(module_state->error_obj, "'%c' expected after '%c'", 8317db96d56Sopenharmony_ci dialect->delimiter, 8327db96d56Sopenharmony_ci dialect->quotechar); 8337db96d56Sopenharmony_ci return -1; 8347db96d56Sopenharmony_ci } 8357db96d56Sopenharmony_ci break; 8367db96d56Sopenharmony_ci 8377db96d56Sopenharmony_ci case EAT_CRNL: 8387db96d56Sopenharmony_ci if (c == '\n' || c == '\r') 8397db96d56Sopenharmony_ci ; 8407db96d56Sopenharmony_ci else if (c == EOL) 8417db96d56Sopenharmony_ci self->state = START_RECORD; 8427db96d56Sopenharmony_ci else { 8437db96d56Sopenharmony_ci PyErr_Format(module_state->error_obj, 8447db96d56Sopenharmony_ci "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); 8457db96d56Sopenharmony_ci return -1; 8467db96d56Sopenharmony_ci } 8477db96d56Sopenharmony_ci break; 8487db96d56Sopenharmony_ci 8497db96d56Sopenharmony_ci } 8507db96d56Sopenharmony_ci return 0; 8517db96d56Sopenharmony_ci} 8527db96d56Sopenharmony_ci 8537db96d56Sopenharmony_cistatic int 8547db96d56Sopenharmony_ciparse_reset(ReaderObj *self) 8557db96d56Sopenharmony_ci{ 8567db96d56Sopenharmony_ci Py_XSETREF(self->fields, PyList_New(0)); 8577db96d56Sopenharmony_ci if (self->fields == NULL) 8587db96d56Sopenharmony_ci return -1; 8597db96d56Sopenharmony_ci self->field_len = 0; 8607db96d56Sopenharmony_ci self->state = START_RECORD; 8617db96d56Sopenharmony_ci self->numeric_field = 0; 8627db96d56Sopenharmony_ci return 0; 8637db96d56Sopenharmony_ci} 8647db96d56Sopenharmony_ci 8657db96d56Sopenharmony_cistatic PyObject * 8667db96d56Sopenharmony_ciReader_iternext(ReaderObj *self) 8677db96d56Sopenharmony_ci{ 8687db96d56Sopenharmony_ci PyObject *fields = NULL; 8697db96d56Sopenharmony_ci Py_UCS4 c; 8707db96d56Sopenharmony_ci Py_ssize_t pos, linelen; 8717db96d56Sopenharmony_ci unsigned int kind; 8727db96d56Sopenharmony_ci const void *data; 8737db96d56Sopenharmony_ci PyObject *lineobj; 8747db96d56Sopenharmony_ci 8757db96d56Sopenharmony_ci _csvstate *module_state = _csv_state_from_type(Py_TYPE(self), 8767db96d56Sopenharmony_ci "Reader.__next__"); 8777db96d56Sopenharmony_ci if (module_state == NULL) { 8787db96d56Sopenharmony_ci return NULL; 8797db96d56Sopenharmony_ci } 8807db96d56Sopenharmony_ci 8817db96d56Sopenharmony_ci if (parse_reset(self) < 0) 8827db96d56Sopenharmony_ci return NULL; 8837db96d56Sopenharmony_ci do { 8847db96d56Sopenharmony_ci lineobj = PyIter_Next(self->input_iter); 8857db96d56Sopenharmony_ci if (lineobj == NULL) { 8867db96d56Sopenharmony_ci /* End of input OR exception */ 8877db96d56Sopenharmony_ci if (!PyErr_Occurred() && (self->field_len != 0 || 8887db96d56Sopenharmony_ci self->state == IN_QUOTED_FIELD)) { 8897db96d56Sopenharmony_ci if (self->dialect->strict) 8907db96d56Sopenharmony_ci PyErr_SetString(module_state->error_obj, 8917db96d56Sopenharmony_ci "unexpected end of data"); 8927db96d56Sopenharmony_ci else if (parse_save_field(self) >= 0) 8937db96d56Sopenharmony_ci break; 8947db96d56Sopenharmony_ci } 8957db96d56Sopenharmony_ci return NULL; 8967db96d56Sopenharmony_ci } 8977db96d56Sopenharmony_ci if (!PyUnicode_Check(lineobj)) { 8987db96d56Sopenharmony_ci PyErr_Format(module_state->error_obj, 8997db96d56Sopenharmony_ci "iterator should return strings, " 9007db96d56Sopenharmony_ci "not %.200s " 9017db96d56Sopenharmony_ci "(the file should be opened in text mode)", 9027db96d56Sopenharmony_ci Py_TYPE(lineobj)->tp_name 9037db96d56Sopenharmony_ci ); 9047db96d56Sopenharmony_ci Py_DECREF(lineobj); 9057db96d56Sopenharmony_ci return NULL; 9067db96d56Sopenharmony_ci } 9077db96d56Sopenharmony_ci if (PyUnicode_READY(lineobj) == -1) { 9087db96d56Sopenharmony_ci Py_DECREF(lineobj); 9097db96d56Sopenharmony_ci return NULL; 9107db96d56Sopenharmony_ci } 9117db96d56Sopenharmony_ci ++self->line_num; 9127db96d56Sopenharmony_ci kind = PyUnicode_KIND(lineobj); 9137db96d56Sopenharmony_ci data = PyUnicode_DATA(lineobj); 9147db96d56Sopenharmony_ci pos = 0; 9157db96d56Sopenharmony_ci linelen = PyUnicode_GET_LENGTH(lineobj); 9167db96d56Sopenharmony_ci while (linelen--) { 9177db96d56Sopenharmony_ci c = PyUnicode_READ(kind, data, pos); 9187db96d56Sopenharmony_ci if (parse_process_char(self, module_state, c) < 0) { 9197db96d56Sopenharmony_ci Py_DECREF(lineobj); 9207db96d56Sopenharmony_ci goto err; 9217db96d56Sopenharmony_ci } 9227db96d56Sopenharmony_ci pos++; 9237db96d56Sopenharmony_ci } 9247db96d56Sopenharmony_ci Py_DECREF(lineobj); 9257db96d56Sopenharmony_ci if (parse_process_char(self, module_state, EOL) < 0) 9267db96d56Sopenharmony_ci goto err; 9277db96d56Sopenharmony_ci } while (self->state != START_RECORD); 9287db96d56Sopenharmony_ci 9297db96d56Sopenharmony_ci fields = self->fields; 9307db96d56Sopenharmony_ci self->fields = NULL; 9317db96d56Sopenharmony_cierr: 9327db96d56Sopenharmony_ci return fields; 9337db96d56Sopenharmony_ci} 9347db96d56Sopenharmony_ci 9357db96d56Sopenharmony_cistatic void 9367db96d56Sopenharmony_ciReader_dealloc(ReaderObj *self) 9377db96d56Sopenharmony_ci{ 9387db96d56Sopenharmony_ci PyTypeObject *tp = Py_TYPE(self); 9397db96d56Sopenharmony_ci PyObject_GC_UnTrack(self); 9407db96d56Sopenharmony_ci tp->tp_clear((PyObject *)self); 9417db96d56Sopenharmony_ci if (self->field != NULL) { 9427db96d56Sopenharmony_ci PyMem_Free(self->field); 9437db96d56Sopenharmony_ci self->field = NULL; 9447db96d56Sopenharmony_ci } 9457db96d56Sopenharmony_ci PyObject_GC_Del(self); 9467db96d56Sopenharmony_ci Py_DECREF(tp); 9477db96d56Sopenharmony_ci} 9487db96d56Sopenharmony_ci 9497db96d56Sopenharmony_cistatic int 9507db96d56Sopenharmony_ciReader_traverse(ReaderObj *self, visitproc visit, void *arg) 9517db96d56Sopenharmony_ci{ 9527db96d56Sopenharmony_ci Py_VISIT(self->dialect); 9537db96d56Sopenharmony_ci Py_VISIT(self->input_iter); 9547db96d56Sopenharmony_ci Py_VISIT(self->fields); 9557db96d56Sopenharmony_ci Py_VISIT(Py_TYPE(self)); 9567db96d56Sopenharmony_ci return 0; 9577db96d56Sopenharmony_ci} 9587db96d56Sopenharmony_ci 9597db96d56Sopenharmony_cistatic int 9607db96d56Sopenharmony_ciReader_clear(ReaderObj *self) 9617db96d56Sopenharmony_ci{ 9627db96d56Sopenharmony_ci Py_CLEAR(self->dialect); 9637db96d56Sopenharmony_ci Py_CLEAR(self->input_iter); 9647db96d56Sopenharmony_ci Py_CLEAR(self->fields); 9657db96d56Sopenharmony_ci return 0; 9667db96d56Sopenharmony_ci} 9677db96d56Sopenharmony_ci 9687db96d56Sopenharmony_ciPyDoc_STRVAR(Reader_Type_doc, 9697db96d56Sopenharmony_ci"CSV reader\n" 9707db96d56Sopenharmony_ci"\n" 9717db96d56Sopenharmony_ci"Reader objects are responsible for reading and parsing tabular data\n" 9727db96d56Sopenharmony_ci"in CSV format.\n" 9737db96d56Sopenharmony_ci); 9747db96d56Sopenharmony_ci 9757db96d56Sopenharmony_cistatic struct PyMethodDef Reader_methods[] = { 9767db96d56Sopenharmony_ci { NULL, NULL } 9777db96d56Sopenharmony_ci}; 9787db96d56Sopenharmony_ci#define R_OFF(x) offsetof(ReaderObj, x) 9797db96d56Sopenharmony_ci 9807db96d56Sopenharmony_cistatic struct PyMemberDef Reader_memberlist[] = { 9817db96d56Sopenharmony_ci { "dialect", T_OBJECT, R_OFF(dialect), READONLY }, 9827db96d56Sopenharmony_ci { "line_num", T_ULONG, R_OFF(line_num), READONLY }, 9837db96d56Sopenharmony_ci { NULL } 9847db96d56Sopenharmony_ci}; 9857db96d56Sopenharmony_ci 9867db96d56Sopenharmony_ci 9877db96d56Sopenharmony_cistatic PyType_Slot Reader_Type_slots[] = { 9887db96d56Sopenharmony_ci {Py_tp_doc, (char*)Reader_Type_doc}, 9897db96d56Sopenharmony_ci {Py_tp_traverse, Reader_traverse}, 9907db96d56Sopenharmony_ci {Py_tp_iter, PyObject_SelfIter}, 9917db96d56Sopenharmony_ci {Py_tp_iternext, Reader_iternext}, 9927db96d56Sopenharmony_ci {Py_tp_methods, Reader_methods}, 9937db96d56Sopenharmony_ci {Py_tp_members, Reader_memberlist}, 9947db96d56Sopenharmony_ci {Py_tp_clear, Reader_clear}, 9957db96d56Sopenharmony_ci {Py_tp_dealloc, Reader_dealloc}, 9967db96d56Sopenharmony_ci {0, NULL} 9977db96d56Sopenharmony_ci}; 9987db96d56Sopenharmony_ci 9997db96d56Sopenharmony_ciPyType_Spec Reader_Type_spec = { 10007db96d56Sopenharmony_ci .name = "_csv.reader", 10017db96d56Sopenharmony_ci .basicsize = sizeof(ReaderObj), 10027db96d56Sopenharmony_ci .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | 10037db96d56Sopenharmony_ci Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION), 10047db96d56Sopenharmony_ci .slots = Reader_Type_slots 10057db96d56Sopenharmony_ci}; 10067db96d56Sopenharmony_ci 10077db96d56Sopenharmony_ci 10087db96d56Sopenharmony_cistatic PyObject * 10097db96d56Sopenharmony_cicsv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) 10107db96d56Sopenharmony_ci{ 10117db96d56Sopenharmony_ci PyObject * iterator, * dialect = NULL; 10127db96d56Sopenharmony_ci _csvstate *module_state = get_csv_state(module); 10137db96d56Sopenharmony_ci ReaderObj * self = PyObject_GC_New( 10147db96d56Sopenharmony_ci ReaderObj, 10157db96d56Sopenharmony_ci module_state->reader_type); 10167db96d56Sopenharmony_ci 10177db96d56Sopenharmony_ci if (!self) 10187db96d56Sopenharmony_ci return NULL; 10197db96d56Sopenharmony_ci 10207db96d56Sopenharmony_ci self->dialect = NULL; 10217db96d56Sopenharmony_ci self->fields = NULL; 10227db96d56Sopenharmony_ci self->input_iter = NULL; 10237db96d56Sopenharmony_ci self->field = NULL; 10247db96d56Sopenharmony_ci self->field_size = 0; 10257db96d56Sopenharmony_ci self->line_num = 0; 10267db96d56Sopenharmony_ci 10277db96d56Sopenharmony_ci if (parse_reset(self) < 0) { 10287db96d56Sopenharmony_ci Py_DECREF(self); 10297db96d56Sopenharmony_ci return NULL; 10307db96d56Sopenharmony_ci } 10317db96d56Sopenharmony_ci 10327db96d56Sopenharmony_ci if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) { 10337db96d56Sopenharmony_ci Py_DECREF(self); 10347db96d56Sopenharmony_ci return NULL; 10357db96d56Sopenharmony_ci } 10367db96d56Sopenharmony_ci self->input_iter = PyObject_GetIter(iterator); 10377db96d56Sopenharmony_ci if (self->input_iter == NULL) { 10387db96d56Sopenharmony_ci Py_DECREF(self); 10397db96d56Sopenharmony_ci return NULL; 10407db96d56Sopenharmony_ci } 10417db96d56Sopenharmony_ci self->dialect = (DialectObj *)_call_dialect(module_state, dialect, 10427db96d56Sopenharmony_ci keyword_args); 10437db96d56Sopenharmony_ci if (self->dialect == NULL) { 10447db96d56Sopenharmony_ci Py_DECREF(self); 10457db96d56Sopenharmony_ci return NULL; 10467db96d56Sopenharmony_ci } 10477db96d56Sopenharmony_ci 10487db96d56Sopenharmony_ci PyObject_GC_Track(self); 10497db96d56Sopenharmony_ci return (PyObject *)self; 10507db96d56Sopenharmony_ci} 10517db96d56Sopenharmony_ci 10527db96d56Sopenharmony_ci/* 10537db96d56Sopenharmony_ci * WRITER 10547db96d56Sopenharmony_ci */ 10557db96d56Sopenharmony_ci/* ---------------------------------------------------------------- */ 10567db96d56Sopenharmony_cistatic void 10577db96d56Sopenharmony_cijoin_reset(WriterObj *self) 10587db96d56Sopenharmony_ci{ 10597db96d56Sopenharmony_ci self->rec_len = 0; 10607db96d56Sopenharmony_ci self->num_fields = 0; 10617db96d56Sopenharmony_ci} 10627db96d56Sopenharmony_ci 10637db96d56Sopenharmony_ci#define MEM_INCR 32768 10647db96d56Sopenharmony_ci 10657db96d56Sopenharmony_ci/* Calculate new record length or append field to record. Return new 10667db96d56Sopenharmony_ci * record length. 10677db96d56Sopenharmony_ci */ 10687db96d56Sopenharmony_cistatic Py_ssize_t 10697db96d56Sopenharmony_cijoin_append_data(WriterObj *self, unsigned int field_kind, const void *field_data, 10707db96d56Sopenharmony_ci Py_ssize_t field_len, int *quoted, 10717db96d56Sopenharmony_ci int copy_phase) 10727db96d56Sopenharmony_ci{ 10737db96d56Sopenharmony_ci DialectObj *dialect = self->dialect; 10747db96d56Sopenharmony_ci int i; 10757db96d56Sopenharmony_ci Py_ssize_t rec_len; 10767db96d56Sopenharmony_ci 10777db96d56Sopenharmony_ci#define INCLEN \ 10787db96d56Sopenharmony_ci do {\ 10797db96d56Sopenharmony_ci if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \ 10807db96d56Sopenharmony_ci goto overflow; \ 10817db96d56Sopenharmony_ci } \ 10827db96d56Sopenharmony_ci rec_len++; \ 10837db96d56Sopenharmony_ci } while(0) 10847db96d56Sopenharmony_ci 10857db96d56Sopenharmony_ci#define ADDCH(c) \ 10867db96d56Sopenharmony_ci do {\ 10877db96d56Sopenharmony_ci if (copy_phase) \ 10887db96d56Sopenharmony_ci self->rec[rec_len] = c;\ 10897db96d56Sopenharmony_ci INCLEN;\ 10907db96d56Sopenharmony_ci } while(0) 10917db96d56Sopenharmony_ci 10927db96d56Sopenharmony_ci rec_len = self->rec_len; 10937db96d56Sopenharmony_ci 10947db96d56Sopenharmony_ci /* If this is not the first field we need a field separator */ 10957db96d56Sopenharmony_ci if (self->num_fields > 0) 10967db96d56Sopenharmony_ci ADDCH(dialect->delimiter); 10977db96d56Sopenharmony_ci 10987db96d56Sopenharmony_ci /* Handle preceding quote */ 10997db96d56Sopenharmony_ci if (copy_phase && *quoted) 11007db96d56Sopenharmony_ci ADDCH(dialect->quotechar); 11017db96d56Sopenharmony_ci 11027db96d56Sopenharmony_ci /* Copy/count field data */ 11037db96d56Sopenharmony_ci /* If field is null just pass over */ 11047db96d56Sopenharmony_ci for (i = 0; field_data && (i < field_len); i++) { 11057db96d56Sopenharmony_ci Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i); 11067db96d56Sopenharmony_ci int want_escape = 0; 11077db96d56Sopenharmony_ci 11087db96d56Sopenharmony_ci if (c == dialect->delimiter || 11097db96d56Sopenharmony_ci c == dialect->escapechar || 11107db96d56Sopenharmony_ci c == dialect->quotechar || 11117db96d56Sopenharmony_ci PyUnicode_FindChar( 11127db96d56Sopenharmony_ci dialect->lineterminator, c, 0, 11137db96d56Sopenharmony_ci PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) { 11147db96d56Sopenharmony_ci if (dialect->quoting == QUOTE_NONE) 11157db96d56Sopenharmony_ci want_escape = 1; 11167db96d56Sopenharmony_ci else { 11177db96d56Sopenharmony_ci if (c == dialect->quotechar) { 11187db96d56Sopenharmony_ci if (dialect->doublequote) 11197db96d56Sopenharmony_ci ADDCH(dialect->quotechar); 11207db96d56Sopenharmony_ci else 11217db96d56Sopenharmony_ci want_escape = 1; 11227db96d56Sopenharmony_ci } 11237db96d56Sopenharmony_ci else if (c == dialect->escapechar) { 11247db96d56Sopenharmony_ci want_escape = 1; 11257db96d56Sopenharmony_ci } 11267db96d56Sopenharmony_ci if (!want_escape) 11277db96d56Sopenharmony_ci *quoted = 1; 11287db96d56Sopenharmony_ci } 11297db96d56Sopenharmony_ci if (want_escape) { 11307db96d56Sopenharmony_ci if (dialect->escapechar == NOT_SET) { 11317db96d56Sopenharmony_ci PyErr_Format(self->error_obj, 11327db96d56Sopenharmony_ci "need to escape, but no escapechar set"); 11337db96d56Sopenharmony_ci return -1; 11347db96d56Sopenharmony_ci } 11357db96d56Sopenharmony_ci ADDCH(dialect->escapechar); 11367db96d56Sopenharmony_ci } 11377db96d56Sopenharmony_ci } 11387db96d56Sopenharmony_ci /* Copy field character into record buffer. 11397db96d56Sopenharmony_ci */ 11407db96d56Sopenharmony_ci ADDCH(c); 11417db96d56Sopenharmony_ci } 11427db96d56Sopenharmony_ci 11437db96d56Sopenharmony_ci if (*quoted) { 11447db96d56Sopenharmony_ci if (copy_phase) 11457db96d56Sopenharmony_ci ADDCH(dialect->quotechar); 11467db96d56Sopenharmony_ci else { 11477db96d56Sopenharmony_ci INCLEN; /* starting quote */ 11487db96d56Sopenharmony_ci INCLEN; /* ending quote */ 11497db96d56Sopenharmony_ci } 11507db96d56Sopenharmony_ci } 11517db96d56Sopenharmony_ci return rec_len; 11527db96d56Sopenharmony_ci 11537db96d56Sopenharmony_ci overflow: 11547db96d56Sopenharmony_ci PyErr_NoMemory(); 11557db96d56Sopenharmony_ci return -1; 11567db96d56Sopenharmony_ci#undef ADDCH 11577db96d56Sopenharmony_ci#undef INCLEN 11587db96d56Sopenharmony_ci} 11597db96d56Sopenharmony_ci 11607db96d56Sopenharmony_cistatic int 11617db96d56Sopenharmony_cijoin_check_rec_size(WriterObj *self, Py_ssize_t rec_len) 11627db96d56Sopenharmony_ci{ 11637db96d56Sopenharmony_ci assert(rec_len >= 0); 11647db96d56Sopenharmony_ci 11657db96d56Sopenharmony_ci if (rec_len > self->rec_size) { 11667db96d56Sopenharmony_ci size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR; 11677db96d56Sopenharmony_ci Py_UCS4 *rec_new = self->rec; 11687db96d56Sopenharmony_ci PyMem_Resize(rec_new, Py_UCS4, rec_size_new); 11697db96d56Sopenharmony_ci if (rec_new == NULL) { 11707db96d56Sopenharmony_ci PyErr_NoMemory(); 11717db96d56Sopenharmony_ci return 0; 11727db96d56Sopenharmony_ci } 11737db96d56Sopenharmony_ci self->rec = rec_new; 11747db96d56Sopenharmony_ci self->rec_size = (Py_ssize_t)rec_size_new; 11757db96d56Sopenharmony_ci } 11767db96d56Sopenharmony_ci return 1; 11777db96d56Sopenharmony_ci} 11787db96d56Sopenharmony_ci 11797db96d56Sopenharmony_cistatic int 11807db96d56Sopenharmony_cijoin_append(WriterObj *self, PyObject *field, int quoted) 11817db96d56Sopenharmony_ci{ 11827db96d56Sopenharmony_ci unsigned int field_kind = -1; 11837db96d56Sopenharmony_ci const void *field_data = NULL; 11847db96d56Sopenharmony_ci Py_ssize_t field_len = 0; 11857db96d56Sopenharmony_ci Py_ssize_t rec_len; 11867db96d56Sopenharmony_ci 11877db96d56Sopenharmony_ci if (field != NULL) { 11887db96d56Sopenharmony_ci if (PyUnicode_READY(field) == -1) 11897db96d56Sopenharmony_ci return 0; 11907db96d56Sopenharmony_ci field_kind = PyUnicode_KIND(field); 11917db96d56Sopenharmony_ci field_data = PyUnicode_DATA(field); 11927db96d56Sopenharmony_ci field_len = PyUnicode_GET_LENGTH(field); 11937db96d56Sopenharmony_ci } 11947db96d56Sopenharmony_ci rec_len = join_append_data(self, field_kind, field_data, field_len, 11957db96d56Sopenharmony_ci "ed, 0); 11967db96d56Sopenharmony_ci if (rec_len < 0) 11977db96d56Sopenharmony_ci return 0; 11987db96d56Sopenharmony_ci 11997db96d56Sopenharmony_ci /* grow record buffer if necessary */ 12007db96d56Sopenharmony_ci if (!join_check_rec_size(self, rec_len)) 12017db96d56Sopenharmony_ci return 0; 12027db96d56Sopenharmony_ci 12037db96d56Sopenharmony_ci self->rec_len = join_append_data(self, field_kind, field_data, field_len, 12047db96d56Sopenharmony_ci "ed, 1); 12057db96d56Sopenharmony_ci self->num_fields++; 12067db96d56Sopenharmony_ci 12077db96d56Sopenharmony_ci return 1; 12087db96d56Sopenharmony_ci} 12097db96d56Sopenharmony_ci 12107db96d56Sopenharmony_cistatic int 12117db96d56Sopenharmony_cijoin_append_lineterminator(WriterObj *self) 12127db96d56Sopenharmony_ci{ 12137db96d56Sopenharmony_ci Py_ssize_t terminator_len, i; 12147db96d56Sopenharmony_ci unsigned int term_kind; 12157db96d56Sopenharmony_ci const void *term_data; 12167db96d56Sopenharmony_ci 12177db96d56Sopenharmony_ci terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator); 12187db96d56Sopenharmony_ci if (terminator_len == -1) 12197db96d56Sopenharmony_ci return 0; 12207db96d56Sopenharmony_ci 12217db96d56Sopenharmony_ci /* grow record buffer if necessary */ 12227db96d56Sopenharmony_ci if (!join_check_rec_size(self, self->rec_len + terminator_len)) 12237db96d56Sopenharmony_ci return 0; 12247db96d56Sopenharmony_ci 12257db96d56Sopenharmony_ci term_kind = PyUnicode_KIND(self->dialect->lineterminator); 12267db96d56Sopenharmony_ci term_data = PyUnicode_DATA(self->dialect->lineterminator); 12277db96d56Sopenharmony_ci for (i = 0; i < terminator_len; i++) 12287db96d56Sopenharmony_ci self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i); 12297db96d56Sopenharmony_ci self->rec_len += terminator_len; 12307db96d56Sopenharmony_ci 12317db96d56Sopenharmony_ci return 1; 12327db96d56Sopenharmony_ci} 12337db96d56Sopenharmony_ci 12347db96d56Sopenharmony_ciPyDoc_STRVAR(csv_writerow_doc, 12357db96d56Sopenharmony_ci"writerow(iterable)\n" 12367db96d56Sopenharmony_ci"\n" 12377db96d56Sopenharmony_ci"Construct and write a CSV record from an iterable of fields. Non-string\n" 12387db96d56Sopenharmony_ci"elements will be converted to string."); 12397db96d56Sopenharmony_ci 12407db96d56Sopenharmony_cistatic PyObject * 12417db96d56Sopenharmony_cicsv_writerow(WriterObj *self, PyObject *seq) 12427db96d56Sopenharmony_ci{ 12437db96d56Sopenharmony_ci DialectObj *dialect = self->dialect; 12447db96d56Sopenharmony_ci PyObject *iter, *field, *line, *result; 12457db96d56Sopenharmony_ci 12467db96d56Sopenharmony_ci iter = PyObject_GetIter(seq); 12477db96d56Sopenharmony_ci if (iter == NULL) { 12487db96d56Sopenharmony_ci if (PyErr_ExceptionMatches(PyExc_TypeError)) { 12497db96d56Sopenharmony_ci PyErr_Format(self->error_obj, 12507db96d56Sopenharmony_ci "iterable expected, not %.200s", 12517db96d56Sopenharmony_ci Py_TYPE(seq)->tp_name); 12527db96d56Sopenharmony_ci } 12537db96d56Sopenharmony_ci return NULL; 12547db96d56Sopenharmony_ci } 12557db96d56Sopenharmony_ci 12567db96d56Sopenharmony_ci /* Join all fields in internal buffer. 12577db96d56Sopenharmony_ci */ 12587db96d56Sopenharmony_ci join_reset(self); 12597db96d56Sopenharmony_ci while ((field = PyIter_Next(iter))) { 12607db96d56Sopenharmony_ci int append_ok; 12617db96d56Sopenharmony_ci int quoted; 12627db96d56Sopenharmony_ci 12637db96d56Sopenharmony_ci switch (dialect->quoting) { 12647db96d56Sopenharmony_ci case QUOTE_NONNUMERIC: 12657db96d56Sopenharmony_ci quoted = !PyNumber_Check(field); 12667db96d56Sopenharmony_ci break; 12677db96d56Sopenharmony_ci case QUOTE_ALL: 12687db96d56Sopenharmony_ci quoted = 1; 12697db96d56Sopenharmony_ci break; 12707db96d56Sopenharmony_ci default: 12717db96d56Sopenharmony_ci quoted = 0; 12727db96d56Sopenharmony_ci break; 12737db96d56Sopenharmony_ci } 12747db96d56Sopenharmony_ci 12757db96d56Sopenharmony_ci if (PyUnicode_Check(field)) { 12767db96d56Sopenharmony_ci append_ok = join_append(self, field, quoted); 12777db96d56Sopenharmony_ci Py_DECREF(field); 12787db96d56Sopenharmony_ci } 12797db96d56Sopenharmony_ci else if (field == Py_None) { 12807db96d56Sopenharmony_ci append_ok = join_append(self, NULL, quoted); 12817db96d56Sopenharmony_ci Py_DECREF(field); 12827db96d56Sopenharmony_ci } 12837db96d56Sopenharmony_ci else { 12847db96d56Sopenharmony_ci PyObject *str; 12857db96d56Sopenharmony_ci 12867db96d56Sopenharmony_ci str = PyObject_Str(field); 12877db96d56Sopenharmony_ci Py_DECREF(field); 12887db96d56Sopenharmony_ci if (str == NULL) { 12897db96d56Sopenharmony_ci Py_DECREF(iter); 12907db96d56Sopenharmony_ci return NULL; 12917db96d56Sopenharmony_ci } 12927db96d56Sopenharmony_ci append_ok = join_append(self, str, quoted); 12937db96d56Sopenharmony_ci Py_DECREF(str); 12947db96d56Sopenharmony_ci } 12957db96d56Sopenharmony_ci if (!append_ok) { 12967db96d56Sopenharmony_ci Py_DECREF(iter); 12977db96d56Sopenharmony_ci return NULL; 12987db96d56Sopenharmony_ci } 12997db96d56Sopenharmony_ci } 13007db96d56Sopenharmony_ci Py_DECREF(iter); 13017db96d56Sopenharmony_ci if (PyErr_Occurred()) 13027db96d56Sopenharmony_ci return NULL; 13037db96d56Sopenharmony_ci 13047db96d56Sopenharmony_ci if (self->num_fields > 0 && self->rec_len == 0) { 13057db96d56Sopenharmony_ci if (dialect->quoting == QUOTE_NONE) { 13067db96d56Sopenharmony_ci PyErr_Format(self->error_obj, 13077db96d56Sopenharmony_ci "single empty field record must be quoted"); 13087db96d56Sopenharmony_ci return NULL; 13097db96d56Sopenharmony_ci } 13107db96d56Sopenharmony_ci self->num_fields--; 13117db96d56Sopenharmony_ci if (!join_append(self, NULL, 1)) 13127db96d56Sopenharmony_ci return NULL; 13137db96d56Sopenharmony_ci } 13147db96d56Sopenharmony_ci 13157db96d56Sopenharmony_ci /* Add line terminator. 13167db96d56Sopenharmony_ci */ 13177db96d56Sopenharmony_ci if (!join_append_lineterminator(self)) { 13187db96d56Sopenharmony_ci return NULL; 13197db96d56Sopenharmony_ci } 13207db96d56Sopenharmony_ci 13217db96d56Sopenharmony_ci line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, 13227db96d56Sopenharmony_ci (void *) self->rec, self->rec_len); 13237db96d56Sopenharmony_ci if (line == NULL) { 13247db96d56Sopenharmony_ci return NULL; 13257db96d56Sopenharmony_ci } 13267db96d56Sopenharmony_ci result = PyObject_CallOneArg(self->write, line); 13277db96d56Sopenharmony_ci Py_DECREF(line); 13287db96d56Sopenharmony_ci return result; 13297db96d56Sopenharmony_ci} 13307db96d56Sopenharmony_ci 13317db96d56Sopenharmony_ciPyDoc_STRVAR(csv_writerows_doc, 13327db96d56Sopenharmony_ci"writerows(iterable of iterables)\n" 13337db96d56Sopenharmony_ci"\n" 13347db96d56Sopenharmony_ci"Construct and write a series of iterables to a csv file. Non-string\n" 13357db96d56Sopenharmony_ci"elements will be converted to string."); 13367db96d56Sopenharmony_ci 13377db96d56Sopenharmony_cistatic PyObject * 13387db96d56Sopenharmony_cicsv_writerows(WriterObj *self, PyObject *seqseq) 13397db96d56Sopenharmony_ci{ 13407db96d56Sopenharmony_ci PyObject *row_iter, *row_obj, *result; 13417db96d56Sopenharmony_ci 13427db96d56Sopenharmony_ci row_iter = PyObject_GetIter(seqseq); 13437db96d56Sopenharmony_ci if (row_iter == NULL) { 13447db96d56Sopenharmony_ci return NULL; 13457db96d56Sopenharmony_ci } 13467db96d56Sopenharmony_ci while ((row_obj = PyIter_Next(row_iter))) { 13477db96d56Sopenharmony_ci result = csv_writerow(self, row_obj); 13487db96d56Sopenharmony_ci Py_DECREF(row_obj); 13497db96d56Sopenharmony_ci if (!result) { 13507db96d56Sopenharmony_ci Py_DECREF(row_iter); 13517db96d56Sopenharmony_ci return NULL; 13527db96d56Sopenharmony_ci } 13537db96d56Sopenharmony_ci else 13547db96d56Sopenharmony_ci Py_DECREF(result); 13557db96d56Sopenharmony_ci } 13567db96d56Sopenharmony_ci Py_DECREF(row_iter); 13577db96d56Sopenharmony_ci if (PyErr_Occurred()) 13587db96d56Sopenharmony_ci return NULL; 13597db96d56Sopenharmony_ci Py_RETURN_NONE; 13607db96d56Sopenharmony_ci} 13617db96d56Sopenharmony_ci 13627db96d56Sopenharmony_cistatic struct PyMethodDef Writer_methods[] = { 13637db96d56Sopenharmony_ci { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc}, 13647db96d56Sopenharmony_ci { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc}, 13657db96d56Sopenharmony_ci { NULL, NULL } 13667db96d56Sopenharmony_ci}; 13677db96d56Sopenharmony_ci 13687db96d56Sopenharmony_ci#define W_OFF(x) offsetof(WriterObj, x) 13697db96d56Sopenharmony_ci 13707db96d56Sopenharmony_cistatic struct PyMemberDef Writer_memberlist[] = { 13717db96d56Sopenharmony_ci { "dialect", T_OBJECT, W_OFF(dialect), READONLY }, 13727db96d56Sopenharmony_ci { NULL } 13737db96d56Sopenharmony_ci}; 13747db96d56Sopenharmony_ci 13757db96d56Sopenharmony_cistatic int 13767db96d56Sopenharmony_ciWriter_traverse(WriterObj *self, visitproc visit, void *arg) 13777db96d56Sopenharmony_ci{ 13787db96d56Sopenharmony_ci Py_VISIT(self->dialect); 13797db96d56Sopenharmony_ci Py_VISIT(self->write); 13807db96d56Sopenharmony_ci Py_VISIT(self->error_obj); 13817db96d56Sopenharmony_ci Py_VISIT(Py_TYPE(self)); 13827db96d56Sopenharmony_ci return 0; 13837db96d56Sopenharmony_ci} 13847db96d56Sopenharmony_ci 13857db96d56Sopenharmony_cistatic int 13867db96d56Sopenharmony_ciWriter_clear(WriterObj *self) 13877db96d56Sopenharmony_ci{ 13887db96d56Sopenharmony_ci Py_CLEAR(self->dialect); 13897db96d56Sopenharmony_ci Py_CLEAR(self->write); 13907db96d56Sopenharmony_ci Py_CLEAR(self->error_obj); 13917db96d56Sopenharmony_ci return 0; 13927db96d56Sopenharmony_ci} 13937db96d56Sopenharmony_ci 13947db96d56Sopenharmony_cistatic void 13957db96d56Sopenharmony_ciWriter_dealloc(WriterObj *self) 13967db96d56Sopenharmony_ci{ 13977db96d56Sopenharmony_ci PyTypeObject *tp = Py_TYPE(self); 13987db96d56Sopenharmony_ci PyObject_GC_UnTrack(self); 13997db96d56Sopenharmony_ci tp->tp_clear((PyObject *)self); 14007db96d56Sopenharmony_ci if (self->rec != NULL) { 14017db96d56Sopenharmony_ci PyMem_Free(self->rec); 14027db96d56Sopenharmony_ci } 14037db96d56Sopenharmony_ci PyObject_GC_Del(self); 14047db96d56Sopenharmony_ci Py_DECREF(tp); 14057db96d56Sopenharmony_ci} 14067db96d56Sopenharmony_ci 14077db96d56Sopenharmony_ciPyDoc_STRVAR(Writer_Type_doc, 14087db96d56Sopenharmony_ci"CSV writer\n" 14097db96d56Sopenharmony_ci"\n" 14107db96d56Sopenharmony_ci"Writer objects are responsible for generating tabular data\n" 14117db96d56Sopenharmony_ci"in CSV format from sequence input.\n" 14127db96d56Sopenharmony_ci); 14137db96d56Sopenharmony_ci 14147db96d56Sopenharmony_cistatic PyType_Slot Writer_Type_slots[] = { 14157db96d56Sopenharmony_ci {Py_tp_doc, (char*)Writer_Type_doc}, 14167db96d56Sopenharmony_ci {Py_tp_traverse, Writer_traverse}, 14177db96d56Sopenharmony_ci {Py_tp_clear, Writer_clear}, 14187db96d56Sopenharmony_ci {Py_tp_dealloc, Writer_dealloc}, 14197db96d56Sopenharmony_ci {Py_tp_methods, Writer_methods}, 14207db96d56Sopenharmony_ci {Py_tp_members, Writer_memberlist}, 14217db96d56Sopenharmony_ci {0, NULL} 14227db96d56Sopenharmony_ci}; 14237db96d56Sopenharmony_ci 14247db96d56Sopenharmony_ciPyType_Spec Writer_Type_spec = { 14257db96d56Sopenharmony_ci .name = "_csv.writer", 14267db96d56Sopenharmony_ci .basicsize = sizeof(WriterObj), 14277db96d56Sopenharmony_ci .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | 14287db96d56Sopenharmony_ci Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION), 14297db96d56Sopenharmony_ci .slots = Writer_Type_slots, 14307db96d56Sopenharmony_ci}; 14317db96d56Sopenharmony_ci 14327db96d56Sopenharmony_ci 14337db96d56Sopenharmony_cistatic PyObject * 14347db96d56Sopenharmony_cicsv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) 14357db96d56Sopenharmony_ci{ 14367db96d56Sopenharmony_ci PyObject * output_file, * dialect = NULL; 14377db96d56Sopenharmony_ci _csvstate *module_state = get_csv_state(module); 14387db96d56Sopenharmony_ci WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type); 14397db96d56Sopenharmony_ci 14407db96d56Sopenharmony_ci if (!self) 14417db96d56Sopenharmony_ci return NULL; 14427db96d56Sopenharmony_ci 14437db96d56Sopenharmony_ci self->dialect = NULL; 14447db96d56Sopenharmony_ci self->write = NULL; 14457db96d56Sopenharmony_ci 14467db96d56Sopenharmony_ci self->rec = NULL; 14477db96d56Sopenharmony_ci self->rec_size = 0; 14487db96d56Sopenharmony_ci self->rec_len = 0; 14497db96d56Sopenharmony_ci self->num_fields = 0; 14507db96d56Sopenharmony_ci 14517db96d56Sopenharmony_ci self->error_obj = Py_NewRef(module_state->error_obj); 14527db96d56Sopenharmony_ci 14537db96d56Sopenharmony_ci if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) { 14547db96d56Sopenharmony_ci Py_DECREF(self); 14557db96d56Sopenharmony_ci return NULL; 14567db96d56Sopenharmony_ci } 14577db96d56Sopenharmony_ci if (_PyObject_LookupAttr(output_file, 14587db96d56Sopenharmony_ci module_state->str_write, 14597db96d56Sopenharmony_ci &self->write) < 0) { 14607db96d56Sopenharmony_ci Py_DECREF(self); 14617db96d56Sopenharmony_ci return NULL; 14627db96d56Sopenharmony_ci } 14637db96d56Sopenharmony_ci if (self->write == NULL || !PyCallable_Check(self->write)) { 14647db96d56Sopenharmony_ci PyErr_SetString(PyExc_TypeError, 14657db96d56Sopenharmony_ci "argument 1 must have a \"write\" method"); 14667db96d56Sopenharmony_ci Py_DECREF(self); 14677db96d56Sopenharmony_ci return NULL; 14687db96d56Sopenharmony_ci } 14697db96d56Sopenharmony_ci self->dialect = (DialectObj *)_call_dialect(module_state, dialect, 14707db96d56Sopenharmony_ci keyword_args); 14717db96d56Sopenharmony_ci if (self->dialect == NULL) { 14727db96d56Sopenharmony_ci Py_DECREF(self); 14737db96d56Sopenharmony_ci return NULL; 14747db96d56Sopenharmony_ci } 14757db96d56Sopenharmony_ci PyObject_GC_Track(self); 14767db96d56Sopenharmony_ci return (PyObject *)self; 14777db96d56Sopenharmony_ci} 14787db96d56Sopenharmony_ci 14797db96d56Sopenharmony_ci/* 14807db96d56Sopenharmony_ci * DIALECT REGISTRY 14817db96d56Sopenharmony_ci */ 14827db96d56Sopenharmony_ci 14837db96d56Sopenharmony_ci/*[clinic input] 14847db96d56Sopenharmony_ci_csv.list_dialects 14857db96d56Sopenharmony_ci 14867db96d56Sopenharmony_ciReturn a list of all known dialect names. 14877db96d56Sopenharmony_ci 14887db96d56Sopenharmony_ci names = csv.list_dialects() 14897db96d56Sopenharmony_ci[clinic start generated code]*/ 14907db96d56Sopenharmony_ci 14917db96d56Sopenharmony_cistatic PyObject * 14927db96d56Sopenharmony_ci_csv_list_dialects_impl(PyObject *module) 14937db96d56Sopenharmony_ci/*[clinic end generated code: output=a5b92b215b006a6d input=8953943eb17d98ab]*/ 14947db96d56Sopenharmony_ci{ 14957db96d56Sopenharmony_ci return PyDict_Keys(get_csv_state(module)->dialects); 14967db96d56Sopenharmony_ci} 14977db96d56Sopenharmony_ci 14987db96d56Sopenharmony_cistatic PyObject * 14997db96d56Sopenharmony_cicsv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) 15007db96d56Sopenharmony_ci{ 15017db96d56Sopenharmony_ci PyObject *name_obj, *dialect_obj = NULL; 15027db96d56Sopenharmony_ci _csvstate *module_state = get_csv_state(module); 15037db96d56Sopenharmony_ci PyObject *dialect; 15047db96d56Sopenharmony_ci 15057db96d56Sopenharmony_ci if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj)) 15067db96d56Sopenharmony_ci return NULL; 15077db96d56Sopenharmony_ci if (!PyUnicode_Check(name_obj)) { 15087db96d56Sopenharmony_ci PyErr_SetString(PyExc_TypeError, 15097db96d56Sopenharmony_ci "dialect name must be a string"); 15107db96d56Sopenharmony_ci return NULL; 15117db96d56Sopenharmony_ci } 15127db96d56Sopenharmony_ci if (PyUnicode_READY(name_obj) == -1) 15137db96d56Sopenharmony_ci return NULL; 15147db96d56Sopenharmony_ci dialect = _call_dialect(module_state, dialect_obj, kwargs); 15157db96d56Sopenharmony_ci if (dialect == NULL) 15167db96d56Sopenharmony_ci return NULL; 15177db96d56Sopenharmony_ci if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) { 15187db96d56Sopenharmony_ci Py_DECREF(dialect); 15197db96d56Sopenharmony_ci return NULL; 15207db96d56Sopenharmony_ci } 15217db96d56Sopenharmony_ci Py_DECREF(dialect); 15227db96d56Sopenharmony_ci Py_RETURN_NONE; 15237db96d56Sopenharmony_ci} 15247db96d56Sopenharmony_ci 15257db96d56Sopenharmony_ci 15267db96d56Sopenharmony_ci/*[clinic input] 15277db96d56Sopenharmony_ci_csv.unregister_dialect 15287db96d56Sopenharmony_ci 15297db96d56Sopenharmony_ci name: object 15307db96d56Sopenharmony_ci 15317db96d56Sopenharmony_ciDelete the name/dialect mapping associated with a string name. 15327db96d56Sopenharmony_ci 15337db96d56Sopenharmony_ci csv.unregister_dialect(name) 15347db96d56Sopenharmony_ci[clinic start generated code]*/ 15357db96d56Sopenharmony_ci 15367db96d56Sopenharmony_cistatic PyObject * 15377db96d56Sopenharmony_ci_csv_unregister_dialect_impl(PyObject *module, PyObject *name) 15387db96d56Sopenharmony_ci/*[clinic end generated code: output=0813ebca6c058df4 input=6b5c1557bf60c7e7]*/ 15397db96d56Sopenharmony_ci{ 15407db96d56Sopenharmony_ci _csvstate *module_state = get_csv_state(module); 15417db96d56Sopenharmony_ci if (PyDict_DelItem(module_state->dialects, name) < 0) { 15427db96d56Sopenharmony_ci if (PyErr_ExceptionMatches(PyExc_KeyError)) { 15437db96d56Sopenharmony_ci PyErr_Format(module_state->error_obj, "unknown dialect"); 15447db96d56Sopenharmony_ci } 15457db96d56Sopenharmony_ci return NULL; 15467db96d56Sopenharmony_ci } 15477db96d56Sopenharmony_ci Py_RETURN_NONE; 15487db96d56Sopenharmony_ci} 15497db96d56Sopenharmony_ci 15507db96d56Sopenharmony_ci/*[clinic input] 15517db96d56Sopenharmony_ci_csv.get_dialect 15527db96d56Sopenharmony_ci 15537db96d56Sopenharmony_ci name: object 15547db96d56Sopenharmony_ci 15557db96d56Sopenharmony_ciReturn the dialect instance associated with name. 15567db96d56Sopenharmony_ci 15577db96d56Sopenharmony_ci dialect = csv.get_dialect(name) 15587db96d56Sopenharmony_ci[clinic start generated code]*/ 15597db96d56Sopenharmony_ci 15607db96d56Sopenharmony_cistatic PyObject * 15617db96d56Sopenharmony_ci_csv_get_dialect_impl(PyObject *module, PyObject *name) 15627db96d56Sopenharmony_ci/*[clinic end generated code: output=aa988cd573bebebb input=edf9ddab32e448fb]*/ 15637db96d56Sopenharmony_ci{ 15647db96d56Sopenharmony_ci return get_dialect_from_registry(name, get_csv_state(module)); 15657db96d56Sopenharmony_ci} 15667db96d56Sopenharmony_ci 15677db96d56Sopenharmony_ci/*[clinic input] 15687db96d56Sopenharmony_ci_csv.field_size_limit 15697db96d56Sopenharmony_ci 15707db96d56Sopenharmony_ci new_limit: object = NULL 15717db96d56Sopenharmony_ci 15727db96d56Sopenharmony_ciSets an upper limit on parsed fields. 15737db96d56Sopenharmony_ci 15747db96d56Sopenharmony_ci csv.field_size_limit([limit]) 15757db96d56Sopenharmony_ci 15767db96d56Sopenharmony_ciReturns old limit. If limit is not given, no new limit is set and 15777db96d56Sopenharmony_cithe old limit is returned 15787db96d56Sopenharmony_ci[clinic start generated code]*/ 15797db96d56Sopenharmony_ci 15807db96d56Sopenharmony_cistatic PyObject * 15817db96d56Sopenharmony_ci_csv_field_size_limit_impl(PyObject *module, PyObject *new_limit) 15827db96d56Sopenharmony_ci/*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/ 15837db96d56Sopenharmony_ci{ 15847db96d56Sopenharmony_ci _csvstate *module_state = get_csv_state(module); 15857db96d56Sopenharmony_ci long old_limit = module_state->field_limit; 15867db96d56Sopenharmony_ci if (new_limit != NULL) { 15877db96d56Sopenharmony_ci if (!PyLong_CheckExact(new_limit)) { 15887db96d56Sopenharmony_ci PyErr_Format(PyExc_TypeError, 15897db96d56Sopenharmony_ci "limit must be an integer"); 15907db96d56Sopenharmony_ci return NULL; 15917db96d56Sopenharmony_ci } 15927db96d56Sopenharmony_ci module_state->field_limit = PyLong_AsLong(new_limit); 15937db96d56Sopenharmony_ci if (module_state->field_limit == -1 && PyErr_Occurred()) { 15947db96d56Sopenharmony_ci module_state->field_limit = old_limit; 15957db96d56Sopenharmony_ci return NULL; 15967db96d56Sopenharmony_ci } 15977db96d56Sopenharmony_ci } 15987db96d56Sopenharmony_ci return PyLong_FromLong(old_limit); 15997db96d56Sopenharmony_ci} 16007db96d56Sopenharmony_ci 16017db96d56Sopenharmony_cistatic PyType_Slot error_slots[] = { 16027db96d56Sopenharmony_ci {0, NULL}, 16037db96d56Sopenharmony_ci}; 16047db96d56Sopenharmony_ci 16057db96d56Sopenharmony_ciPyType_Spec error_spec = { 16067db96d56Sopenharmony_ci .name = "_csv.Error", 16077db96d56Sopenharmony_ci .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, 16087db96d56Sopenharmony_ci .slots = error_slots, 16097db96d56Sopenharmony_ci}; 16107db96d56Sopenharmony_ci 16117db96d56Sopenharmony_ci/* 16127db96d56Sopenharmony_ci * MODULE 16137db96d56Sopenharmony_ci */ 16147db96d56Sopenharmony_ci 16157db96d56Sopenharmony_ciPyDoc_STRVAR(csv_module_doc, 16167db96d56Sopenharmony_ci"CSV parsing and writing.\n" 16177db96d56Sopenharmony_ci"\n" 16187db96d56Sopenharmony_ci"This module provides classes that assist in the reading and writing\n" 16197db96d56Sopenharmony_ci"of Comma Separated Value (CSV) files, and implements the interface\n" 16207db96d56Sopenharmony_ci"described by PEP 305. Although many CSV files are simple to parse,\n" 16217db96d56Sopenharmony_ci"the format is not formally defined by a stable specification and\n" 16227db96d56Sopenharmony_ci"is subtle enough that parsing lines of a CSV file with something\n" 16237db96d56Sopenharmony_ci"like line.split(\",\") is bound to fail. The module supports three\n" 16247db96d56Sopenharmony_ci"basic APIs: reading, writing, and registration of dialects.\n" 16257db96d56Sopenharmony_ci"\n" 16267db96d56Sopenharmony_ci"\n" 16277db96d56Sopenharmony_ci"DIALECT REGISTRATION:\n" 16287db96d56Sopenharmony_ci"\n" 16297db96d56Sopenharmony_ci"Readers and writers support a dialect argument, which is a convenient\n" 16307db96d56Sopenharmony_ci"handle on a group of settings. When the dialect argument is a string,\n" 16317db96d56Sopenharmony_ci"it identifies one of the dialects previously registered with the module.\n" 16327db96d56Sopenharmony_ci"If it is a class or instance, the attributes of the argument are used as\n" 16337db96d56Sopenharmony_ci"the settings for the reader or writer:\n" 16347db96d56Sopenharmony_ci"\n" 16357db96d56Sopenharmony_ci" class excel:\n" 16367db96d56Sopenharmony_ci" delimiter = ','\n" 16377db96d56Sopenharmony_ci" quotechar = '\"'\n" 16387db96d56Sopenharmony_ci" escapechar = None\n" 16397db96d56Sopenharmony_ci" doublequote = True\n" 16407db96d56Sopenharmony_ci" skipinitialspace = False\n" 16417db96d56Sopenharmony_ci" lineterminator = '\\r\\n'\n" 16427db96d56Sopenharmony_ci" quoting = QUOTE_MINIMAL\n" 16437db96d56Sopenharmony_ci"\n" 16447db96d56Sopenharmony_ci"SETTINGS:\n" 16457db96d56Sopenharmony_ci"\n" 16467db96d56Sopenharmony_ci" * quotechar - specifies a one-character string to use as the\n" 16477db96d56Sopenharmony_ci" quoting character. It defaults to '\"'.\n" 16487db96d56Sopenharmony_ci" * delimiter - specifies a one-character string to use as the\n" 16497db96d56Sopenharmony_ci" field separator. It defaults to ','.\n" 16507db96d56Sopenharmony_ci" * skipinitialspace - specifies how to interpret spaces which\n" 16517db96d56Sopenharmony_ci" immediately follow a delimiter. It defaults to False, which\n" 16527db96d56Sopenharmony_ci" means that spaces immediately following a delimiter is part\n" 16537db96d56Sopenharmony_ci" of the following field.\n" 16547db96d56Sopenharmony_ci" * lineterminator - specifies the character sequence which should\n" 16557db96d56Sopenharmony_ci" terminate rows.\n" 16567db96d56Sopenharmony_ci" * quoting - controls when quotes should be generated by the writer.\n" 16577db96d56Sopenharmony_ci" It can take on any of the following module constants:\n" 16587db96d56Sopenharmony_ci"\n" 16597db96d56Sopenharmony_ci" csv.QUOTE_MINIMAL means only when required, for example, when a\n" 16607db96d56Sopenharmony_ci" field contains either the quotechar or the delimiter\n" 16617db96d56Sopenharmony_ci" csv.QUOTE_ALL means that quotes are always placed around fields.\n" 16627db96d56Sopenharmony_ci" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n" 16637db96d56Sopenharmony_ci" fields which do not parse as integers or floating point\n" 16647db96d56Sopenharmony_ci" numbers.\n" 16657db96d56Sopenharmony_ci" csv.QUOTE_NONE means that quotes are never placed around fields.\n" 16667db96d56Sopenharmony_ci" * escapechar - specifies a one-character string used to escape\n" 16677db96d56Sopenharmony_ci" the delimiter when quoting is set to QUOTE_NONE.\n" 16687db96d56Sopenharmony_ci" * doublequote - controls the handling of quotes inside fields. When\n" 16697db96d56Sopenharmony_ci" True, two consecutive quotes are interpreted as one during read,\n" 16707db96d56Sopenharmony_ci" and when writing, each quote character embedded in the data is\n" 16717db96d56Sopenharmony_ci" written as two quotes\n"); 16727db96d56Sopenharmony_ci 16737db96d56Sopenharmony_ciPyDoc_STRVAR(csv_reader_doc, 16747db96d56Sopenharmony_ci" csv_reader = reader(iterable [, dialect='excel']\n" 16757db96d56Sopenharmony_ci" [optional keyword args])\n" 16767db96d56Sopenharmony_ci" for row in csv_reader:\n" 16777db96d56Sopenharmony_ci" process(row)\n" 16787db96d56Sopenharmony_ci"\n" 16797db96d56Sopenharmony_ci"The \"iterable\" argument can be any object that returns a line\n" 16807db96d56Sopenharmony_ci"of input for each iteration, such as a file object or a list. The\n" 16817db96d56Sopenharmony_ci"optional \"dialect\" parameter is discussed below. The function\n" 16827db96d56Sopenharmony_ci"also accepts optional keyword arguments which override settings\n" 16837db96d56Sopenharmony_ci"provided by the dialect.\n" 16847db96d56Sopenharmony_ci"\n" 16857db96d56Sopenharmony_ci"The returned object is an iterator. Each iteration returns a row\n" 16867db96d56Sopenharmony_ci"of the CSV file (which can span multiple input lines).\n"); 16877db96d56Sopenharmony_ci 16887db96d56Sopenharmony_ciPyDoc_STRVAR(csv_writer_doc, 16897db96d56Sopenharmony_ci" csv_writer = csv.writer(fileobj [, dialect='excel']\n" 16907db96d56Sopenharmony_ci" [optional keyword args])\n" 16917db96d56Sopenharmony_ci" for row in sequence:\n" 16927db96d56Sopenharmony_ci" csv_writer.writerow(row)\n" 16937db96d56Sopenharmony_ci"\n" 16947db96d56Sopenharmony_ci" [or]\n" 16957db96d56Sopenharmony_ci"\n" 16967db96d56Sopenharmony_ci" csv_writer = csv.writer(fileobj [, dialect='excel']\n" 16977db96d56Sopenharmony_ci" [optional keyword args])\n" 16987db96d56Sopenharmony_ci" csv_writer.writerows(rows)\n" 16997db96d56Sopenharmony_ci"\n" 17007db96d56Sopenharmony_ci"The \"fileobj\" argument can be any object that supports the file API.\n"); 17017db96d56Sopenharmony_ci 17027db96d56Sopenharmony_ciPyDoc_STRVAR(csv_register_dialect_doc, 17037db96d56Sopenharmony_ci"Create a mapping from a string name to a dialect class.\n" 17047db96d56Sopenharmony_ci" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])"); 17057db96d56Sopenharmony_ci 17067db96d56Sopenharmony_cistatic struct PyMethodDef csv_methods[] = { 17077db96d56Sopenharmony_ci { "reader", _PyCFunction_CAST(csv_reader), 17087db96d56Sopenharmony_ci METH_VARARGS | METH_KEYWORDS, csv_reader_doc}, 17097db96d56Sopenharmony_ci { "writer", _PyCFunction_CAST(csv_writer), 17107db96d56Sopenharmony_ci METH_VARARGS | METH_KEYWORDS, csv_writer_doc}, 17117db96d56Sopenharmony_ci { "register_dialect", _PyCFunction_CAST(csv_register_dialect), 17127db96d56Sopenharmony_ci METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc}, 17137db96d56Sopenharmony_ci _CSV_LIST_DIALECTS_METHODDEF 17147db96d56Sopenharmony_ci _CSV_UNREGISTER_DIALECT_METHODDEF 17157db96d56Sopenharmony_ci _CSV_GET_DIALECT_METHODDEF 17167db96d56Sopenharmony_ci _CSV_FIELD_SIZE_LIMIT_METHODDEF 17177db96d56Sopenharmony_ci { NULL, NULL } 17187db96d56Sopenharmony_ci}; 17197db96d56Sopenharmony_ci 17207db96d56Sopenharmony_cistatic int 17217db96d56Sopenharmony_cicsv_exec(PyObject *module) { 17227db96d56Sopenharmony_ci const StyleDesc *style; 17237db96d56Sopenharmony_ci PyObject *temp; 17247db96d56Sopenharmony_ci _csvstate *module_state = get_csv_state(module); 17257db96d56Sopenharmony_ci 17267db96d56Sopenharmony_ci temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL); 17277db96d56Sopenharmony_ci module_state->dialect_type = (PyTypeObject *)temp; 17287db96d56Sopenharmony_ci if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) { 17297db96d56Sopenharmony_ci return -1; 17307db96d56Sopenharmony_ci } 17317db96d56Sopenharmony_ci 17327db96d56Sopenharmony_ci temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL); 17337db96d56Sopenharmony_ci module_state->reader_type = (PyTypeObject *)temp; 17347db96d56Sopenharmony_ci if (PyModule_AddObjectRef(module, "Reader", temp) < 0) { 17357db96d56Sopenharmony_ci return -1; 17367db96d56Sopenharmony_ci } 17377db96d56Sopenharmony_ci 17387db96d56Sopenharmony_ci temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL); 17397db96d56Sopenharmony_ci module_state->writer_type = (PyTypeObject *)temp; 17407db96d56Sopenharmony_ci if (PyModule_AddObjectRef(module, "Writer", temp) < 0) { 17417db96d56Sopenharmony_ci return -1; 17427db96d56Sopenharmony_ci } 17437db96d56Sopenharmony_ci 17447db96d56Sopenharmony_ci /* Add version to the module. */ 17457db96d56Sopenharmony_ci if (PyModule_AddStringConstant(module, "__version__", 17467db96d56Sopenharmony_ci MODULE_VERSION) == -1) { 17477db96d56Sopenharmony_ci return -1; 17487db96d56Sopenharmony_ci } 17497db96d56Sopenharmony_ci 17507db96d56Sopenharmony_ci /* Set the field limit */ 17517db96d56Sopenharmony_ci module_state->field_limit = 128 * 1024; 17527db96d56Sopenharmony_ci 17537db96d56Sopenharmony_ci /* Add _dialects dictionary */ 17547db96d56Sopenharmony_ci module_state->dialects = PyDict_New(); 17557db96d56Sopenharmony_ci if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) { 17567db96d56Sopenharmony_ci return -1; 17577db96d56Sopenharmony_ci } 17587db96d56Sopenharmony_ci 17597db96d56Sopenharmony_ci /* Add quote styles into dictionary */ 17607db96d56Sopenharmony_ci for (style = quote_styles; style->name; style++) { 17617db96d56Sopenharmony_ci if (PyModule_AddIntConstant(module, style->name, 17627db96d56Sopenharmony_ci style->style) == -1) 17637db96d56Sopenharmony_ci return -1; 17647db96d56Sopenharmony_ci } 17657db96d56Sopenharmony_ci 17667db96d56Sopenharmony_ci /* Add the CSV exception object to the module. */ 17677db96d56Sopenharmony_ci PyObject *bases = PyTuple_Pack(1, PyExc_Exception); 17687db96d56Sopenharmony_ci if (bases == NULL) { 17697db96d56Sopenharmony_ci return -1; 17707db96d56Sopenharmony_ci } 17717db96d56Sopenharmony_ci module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec, 17727db96d56Sopenharmony_ci bases); 17737db96d56Sopenharmony_ci Py_DECREF(bases); 17747db96d56Sopenharmony_ci if (module_state->error_obj == NULL) { 17757db96d56Sopenharmony_ci return -1; 17767db96d56Sopenharmony_ci } 17777db96d56Sopenharmony_ci if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) { 17787db96d56Sopenharmony_ci return -1; 17797db96d56Sopenharmony_ci } 17807db96d56Sopenharmony_ci 17817db96d56Sopenharmony_ci module_state->str_write = PyUnicode_InternFromString("write"); 17827db96d56Sopenharmony_ci if (module_state->str_write == NULL) { 17837db96d56Sopenharmony_ci return -1; 17847db96d56Sopenharmony_ci } 17857db96d56Sopenharmony_ci return 0; 17867db96d56Sopenharmony_ci} 17877db96d56Sopenharmony_ci 17887db96d56Sopenharmony_cistatic PyModuleDef_Slot csv_slots[] = { 17897db96d56Sopenharmony_ci {Py_mod_exec, csv_exec}, 17907db96d56Sopenharmony_ci {0, NULL} 17917db96d56Sopenharmony_ci}; 17927db96d56Sopenharmony_ci 17937db96d56Sopenharmony_cistatic struct PyModuleDef _csvmodule = { 17947db96d56Sopenharmony_ci PyModuleDef_HEAD_INIT, 17957db96d56Sopenharmony_ci "_csv", 17967db96d56Sopenharmony_ci csv_module_doc, 17977db96d56Sopenharmony_ci sizeof(_csvstate), 17987db96d56Sopenharmony_ci csv_methods, 17997db96d56Sopenharmony_ci csv_slots, 18007db96d56Sopenharmony_ci _csv_traverse, 18017db96d56Sopenharmony_ci _csv_clear, 18027db96d56Sopenharmony_ci _csv_free 18037db96d56Sopenharmony_ci}; 18047db96d56Sopenharmony_ci 18057db96d56Sopenharmony_ciPyMODINIT_FUNC 18067db96d56Sopenharmony_ciPyInit__csv(void) 18077db96d56Sopenharmony_ci{ 18087db96d56Sopenharmony_ci return PyModuleDef_Init(&_csvmodule); 18097db96d56Sopenharmony_ci} 1810