xref: /third_party/python/Modules/_csv.c (revision 7db96d56)
1/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module.  Users should not use this module directly, but import the csv.py
7module instead.
8
9*/
10
11#define MODULE_VERSION "1.0"
12
13#include "Python.h"
14#include "structmember.h"         // PyMemberDef
15#include <stdbool.h>
16
17/*[clinic input]
18module _csv
19[clinic start generated code]*/
20/*[clinic end generated code: output=da39a3ee5e6b4b0d input=385118b71aa43706]*/
21
22#include "clinic/_csv.c.h"
23#define NOT_SET ((Py_UCS4)-1)
24#define EOL ((Py_UCS4)-2)
25
26
27typedef struct {
28    PyObject *error_obj;   /* CSV exception */
29    PyObject *dialects;   /* Dialect registry */
30    PyTypeObject *dialect_type;
31    PyTypeObject *reader_type;
32    PyTypeObject *writer_type;
33    long field_limit;   /* max parsed field size */
34    PyObject *str_write;
35} _csvstate;
36
37static struct PyModuleDef _csvmodule;
38
39static inline _csvstate*
40get_csv_state(PyObject *module)
41{
42    void *state = PyModule_GetState(module);
43    assert(state != NULL);
44    return (_csvstate *)state;
45}
46
47static int
48_csv_clear(PyObject *module)
49{
50    _csvstate *module_state = PyModule_GetState(module);
51    Py_CLEAR(module_state->error_obj);
52    Py_CLEAR(module_state->dialects);
53    Py_CLEAR(module_state->dialect_type);
54    Py_CLEAR(module_state->reader_type);
55    Py_CLEAR(module_state->writer_type);
56    Py_CLEAR(module_state->str_write);
57    return 0;
58}
59
60static int
61_csv_traverse(PyObject *module, visitproc visit, void *arg)
62{
63    _csvstate *module_state = PyModule_GetState(module);
64    Py_VISIT(module_state->error_obj);
65    Py_VISIT(module_state->dialects);
66    Py_VISIT(module_state->dialect_type);
67    Py_VISIT(module_state->reader_type);
68    Py_VISIT(module_state->writer_type);
69    return 0;
70}
71
72static void
73_csv_free(void *module)
74{
75   _csv_clear((PyObject *)module);
76}
77
78typedef enum {
79    START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
80    IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
81    EAT_CRNL,AFTER_ESCAPED_CRNL
82} ParserState;
83
84typedef enum {
85    QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
86} QuoteStyle;
87
88typedef struct {
89    QuoteStyle style;
90    const char *name;
91} StyleDesc;
92
93static const StyleDesc quote_styles[] = {
94    { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
95    { QUOTE_ALL,        "QUOTE_ALL" },
96    { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
97    { QUOTE_NONE,       "QUOTE_NONE" },
98    { 0 }
99};
100
101typedef struct {
102    PyObject_HEAD
103
104    char doublequote;           /* is " represented by ""? */
105    char skipinitialspace;      /* ignore spaces following delimiter? */
106    char strict;                /* raise exception on bad CSV */
107    int quoting;                /* style of quoting to write */
108    Py_UCS4 delimiter;          /* field separator */
109    Py_UCS4 quotechar;          /* quote character */
110    Py_UCS4 escapechar;         /* escape character */
111    PyObject *lineterminator;   /* string to write between records */
112
113} DialectObj;
114
115typedef struct {
116    PyObject_HEAD
117
118    PyObject *input_iter;   /* iterate over this for input lines */
119
120    DialectObj *dialect;    /* parsing dialect */
121
122    PyObject *fields;           /* field list for current record */
123    ParserState state;          /* current CSV parse state */
124    Py_UCS4 *field;             /* temporary buffer */
125    Py_ssize_t field_size;      /* size of allocated buffer */
126    Py_ssize_t field_len;       /* length of current field */
127    int numeric_field;          /* treat field as numeric */
128    unsigned long line_num;     /* Source-file line number */
129} ReaderObj;
130
131typedef struct {
132    PyObject_HEAD
133
134    PyObject *write;    /* write output lines to this file */
135
136    DialectObj *dialect;    /* parsing dialect */
137
138    Py_UCS4 *rec;            /* buffer for parser.join */
139    Py_ssize_t rec_size;        /* size of allocated record */
140    Py_ssize_t rec_len;         /* length of record */
141    int num_fields;             /* number of fields in record */
142
143    PyObject *error_obj;       /* cached error object */
144} WriterObj;
145
146/*
147 * DIALECT class
148 */
149
150static PyObject *
151get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
152{
153    PyObject *dialect_obj;
154
155    dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
156    if (dialect_obj == NULL) {
157        if (!PyErr_Occurred())
158            PyErr_Format(module_state->error_obj, "unknown dialect");
159    }
160    else
161        Py_INCREF(dialect_obj);
162
163    return dialect_obj;
164}
165
166static PyObject *
167get_char_or_None(Py_UCS4 c)
168{
169    if (c == NOT_SET) {
170        Py_RETURN_NONE;
171    }
172    else
173        return PyUnicode_FromOrdinal(c);
174}
175
176static PyObject *
177Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
178{
179    Py_XINCREF(self->lineterminator);
180    return self->lineterminator;
181}
182
183static PyObject *
184Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
185{
186    return get_char_or_None(self->delimiter);
187}
188
189static PyObject *
190Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
191{
192    return get_char_or_None(self->escapechar);
193}
194
195static PyObject *
196Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
197{
198    return get_char_or_None(self->quotechar);
199}
200
201static PyObject *
202Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
203{
204    return PyLong_FromLong(self->quoting);
205}
206
207static int
208_set_bool(const char *name, char *target, PyObject *src, bool dflt)
209{
210    if (src == NULL)
211        *target = dflt;
212    else {
213        int b = PyObject_IsTrue(src);
214        if (b < 0)
215            return -1;
216        *target = (char)b;
217    }
218    return 0;
219}
220
221static int
222_set_int(const char *name, int *target, PyObject *src, int dflt)
223{
224    if (src == NULL)
225        *target = dflt;
226    else {
227        int value;
228        if (!PyLong_CheckExact(src)) {
229            PyErr_Format(PyExc_TypeError,
230                         "\"%s\" must be an integer", name);
231            return -1;
232        }
233        value = _PyLong_AsInt(src);
234        if (value == -1 && PyErr_Occurred()) {
235            return -1;
236        }
237        *target = value;
238    }
239    return 0;
240}
241
242static int
243_set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
244{
245    if (src == NULL) {
246        *target = dflt;
247    }
248    else {
249        *target = NOT_SET;
250        if (src != Py_None) {
251            if (!PyUnicode_Check(src)) {
252                PyErr_Format(PyExc_TypeError,
253                    "\"%s\" must be string or None, not %.200s", name,
254                    Py_TYPE(src)->tp_name);
255                return -1;
256            }
257            Py_ssize_t len = PyUnicode_GetLength(src);
258            if (len < 0) {
259                return -1;
260            }
261            if (len != 1) {
262                PyErr_Format(PyExc_TypeError,
263                    "\"%s\" must be a 1-character string",
264                    name);
265                return -1;
266            }
267            /* PyUnicode_READY() is called in PyUnicode_GetLength() */
268            *target = PyUnicode_READ_CHAR(src, 0);
269        }
270    }
271    return 0;
272}
273
274static int
275_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
276{
277    if (src == NULL) {
278        *target = dflt;
279    }
280    else {
281        if (!PyUnicode_Check(src)) {
282            PyErr_Format(PyExc_TypeError,
283                         "\"%s\" must be string, not %.200s", name,
284                         Py_TYPE(src)->tp_name);
285                return -1;
286        }
287        Py_ssize_t len = PyUnicode_GetLength(src);
288        if (len < 0) {
289            return -1;
290        }
291        if (len != 1) {
292            PyErr_Format(PyExc_TypeError,
293                         "\"%s\" must be a 1-character string",
294                         name);
295            return -1;
296        }
297        /* PyUnicode_READY() is called in PyUnicode_GetLength() */
298        *target = PyUnicode_READ_CHAR(src, 0);
299    }
300    return 0;
301}
302
303static int
304_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
305{
306    if (src == NULL)
307        *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
308    else {
309        if (src == Py_None)
310            *target = NULL;
311        else if (!PyUnicode_Check(src)) {
312            PyErr_Format(PyExc_TypeError,
313                         "\"%s\" must be a string", name);
314            return -1;
315        }
316        else {
317            if (PyUnicode_READY(src) == -1)
318                return -1;
319            Py_INCREF(src);
320            Py_XSETREF(*target, src);
321        }
322    }
323    return 0;
324}
325
326static int
327dialect_check_quoting(int quoting)
328{
329    const StyleDesc *qs;
330
331    for (qs = quote_styles; qs->name; qs++) {
332        if ((int)qs->style == quoting)
333            return 0;
334    }
335    PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
336    return -1;
337}
338
339#define D_OFF(x) offsetof(DialectObj, x)
340
341static struct PyMemberDef Dialect_memberlist[] = {
342    { "skipinitialspace",   T_BOOL, D_OFF(skipinitialspace), READONLY },
343    { "doublequote",        T_BOOL, D_OFF(doublequote), READONLY },
344    { "strict",             T_BOOL, D_OFF(strict), READONLY },
345    { NULL }
346};
347
348static PyGetSetDef Dialect_getsetlist[] = {
349    { "delimiter",          (getter)Dialect_get_delimiter},
350    { "escapechar",             (getter)Dialect_get_escapechar},
351    { "lineterminator",         (getter)Dialect_get_lineterminator},
352    { "quotechar",              (getter)Dialect_get_quotechar},
353    { "quoting",                (getter)Dialect_get_quoting},
354    {NULL},
355};
356
357static void
358Dialect_dealloc(DialectObj *self)
359{
360    PyTypeObject *tp = Py_TYPE(self);
361    PyObject_GC_UnTrack(self);
362    tp->tp_clear((PyObject *)self);
363    PyObject_GC_Del(self);
364    Py_DECREF(tp);
365}
366
367static char *dialect_kws[] = {
368    "dialect",
369    "delimiter",
370    "doublequote",
371    "escapechar",
372    "lineterminator",
373    "quotechar",
374    "quoting",
375    "skipinitialspace",
376    "strict",
377    NULL
378};
379
380static _csvstate *
381_csv_state_from_type(PyTypeObject *type, const char *name)
382{
383    PyObject *module = PyType_GetModuleByDef(type, &_csvmodule);
384    if (module == NULL) {
385        return NULL;
386    }
387    _csvstate *module_state = PyModule_GetState(module);
388    if (module_state == NULL) {
389        PyErr_Format(PyExc_SystemError,
390                     "%s: No _csv module state found", name);
391        return NULL;
392    }
393    return module_state;
394}
395
396static PyObject *
397dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
398{
399    DialectObj *self;
400    PyObject *ret = NULL;
401    PyObject *dialect = NULL;
402    PyObject *delimiter = NULL;
403    PyObject *doublequote = NULL;
404    PyObject *escapechar = NULL;
405    PyObject *lineterminator = NULL;
406    PyObject *quotechar = NULL;
407    PyObject *quoting = NULL;
408    PyObject *skipinitialspace = NULL;
409    PyObject *strict = NULL;
410
411    if (!PyArg_ParseTupleAndKeywords(args, kwargs,
412                                     "|OOOOOOOOO", dialect_kws,
413                                     &dialect,
414                                     &delimiter,
415                                     &doublequote,
416                                     &escapechar,
417                                     &lineterminator,
418                                     &quotechar,
419                                     &quoting,
420                                     &skipinitialspace,
421                                     &strict))
422        return NULL;
423
424    _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
425    if (module_state == NULL) {
426        return NULL;
427    }
428
429    if (dialect != NULL) {
430        if (PyUnicode_Check(dialect)) {
431            dialect = get_dialect_from_registry(dialect, module_state);
432            if (dialect == NULL)
433                return NULL;
434        }
435        else
436            Py_INCREF(dialect);
437        /* Can we reuse this instance? */
438        if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
439            delimiter == NULL &&
440            doublequote == NULL &&
441            escapechar == NULL &&
442            lineterminator == NULL &&
443            quotechar == NULL &&
444            quoting == NULL &&
445            skipinitialspace == NULL &&
446            strict == NULL)
447            return dialect;
448    }
449
450    self = (DialectObj *)type->tp_alloc(type, 0);
451    if (self == NULL) {
452        Py_CLEAR(dialect);
453        return NULL;
454    }
455    self->lineterminator = NULL;
456
457    Py_XINCREF(delimiter);
458    Py_XINCREF(doublequote);
459    Py_XINCREF(escapechar);
460    Py_XINCREF(lineterminator);
461    Py_XINCREF(quotechar);
462    Py_XINCREF(quoting);
463    Py_XINCREF(skipinitialspace);
464    Py_XINCREF(strict);
465    if (dialect != NULL) {
466#define DIALECT_GETATTR(v, n)                            \
467        do {                                             \
468            if (v == NULL) {                             \
469                v = PyObject_GetAttrString(dialect, n);  \
470                if (v == NULL)                           \
471                    PyErr_Clear();                       \
472            }                                            \
473        } while (0)
474        DIALECT_GETATTR(delimiter, "delimiter");
475        DIALECT_GETATTR(doublequote, "doublequote");
476        DIALECT_GETATTR(escapechar, "escapechar");
477        DIALECT_GETATTR(lineterminator, "lineterminator");
478        DIALECT_GETATTR(quotechar, "quotechar");
479        DIALECT_GETATTR(quoting, "quoting");
480        DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
481        DIALECT_GETATTR(strict, "strict");
482    }
483
484    /* check types and convert to C values */
485#define DIASET(meth, name, target, src, dflt) \
486    if (meth(name, target, src, dflt)) \
487        goto err
488    DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
489    DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
490    DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, NOT_SET);
491    DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
492    DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
493    DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
494    DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
495    DIASET(_set_bool, "strict", &self->strict, strict, false);
496
497    /* validate options */
498    if (dialect_check_quoting(self->quoting))
499        goto err;
500    if (self->delimiter == NOT_SET) {
501        PyErr_SetString(PyExc_TypeError,
502                        "\"delimiter\" must be a 1-character string");
503        goto err;
504    }
505    if (quotechar == Py_None && quoting == NULL)
506        self->quoting = QUOTE_NONE;
507    if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) {
508        PyErr_SetString(PyExc_TypeError,
509                        "quotechar must be set if quoting enabled");
510        goto err;
511    }
512    if (self->lineterminator == NULL) {
513        PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
514        goto err;
515    }
516
517    ret = (PyObject *)self;
518    Py_INCREF(self);
519err:
520    Py_CLEAR(self);
521    Py_CLEAR(dialect);
522    Py_CLEAR(delimiter);
523    Py_CLEAR(doublequote);
524    Py_CLEAR(escapechar);
525    Py_CLEAR(lineterminator);
526    Py_CLEAR(quotechar);
527    Py_CLEAR(quoting);
528    Py_CLEAR(skipinitialspace);
529    Py_CLEAR(strict);
530    return ret;
531}
532
533/* Since dialect is now a heap type, it inherits pickling method for
534 * protocol 0 and 1 from object, therefore it needs to be overridden */
535
536PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
537
538static PyObject *
539Dialect_reduce(PyObject *self, PyObject *args) {
540    PyErr_Format(PyExc_TypeError,
541        "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
542    return NULL;
543}
544
545static struct PyMethodDef dialect_methods[] = {
546    {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
547    {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
548    {NULL, NULL}
549};
550
551PyDoc_STRVAR(Dialect_Type_doc,
552"CSV dialect\n"
553"\n"
554"The Dialect type records CSV parsing and generation options.\n");
555
556static int
557Dialect_clear(DialectObj *self)
558{
559    Py_CLEAR(self->lineterminator);
560    return 0;
561}
562
563static int
564Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
565{
566    Py_VISIT(self->lineterminator);
567    Py_VISIT(Py_TYPE(self));
568    return 0;
569}
570
571static PyType_Slot Dialect_Type_slots[] = {
572    {Py_tp_doc, (char*)Dialect_Type_doc},
573    {Py_tp_members, Dialect_memberlist},
574    {Py_tp_getset, Dialect_getsetlist},
575    {Py_tp_new, dialect_new},
576    {Py_tp_methods, dialect_methods},
577    {Py_tp_dealloc, Dialect_dealloc},
578    {Py_tp_clear, Dialect_clear},
579    {Py_tp_traverse, Dialect_traverse},
580    {0, NULL}
581};
582
583PyType_Spec Dialect_Type_spec = {
584    .name = "_csv.Dialect",
585    .basicsize = sizeof(DialectObj),
586    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
587              Py_TPFLAGS_IMMUTABLETYPE),
588    .slots = Dialect_Type_slots,
589};
590
591
592/*
593 * Return an instance of the dialect type, given a Python instance or kwarg
594 * description of the dialect
595 */
596static PyObject *
597_call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
598{
599    PyObject *type = (PyObject *)module_state->dialect_type;
600    if (dialect_inst) {
601        return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
602    }
603    else {
604        return PyObject_VectorcallDict(type, NULL, 0, kwargs);
605    }
606}
607
608/*
609 * READER
610 */
611static int
612parse_save_field(ReaderObj *self)
613{
614    PyObject *field;
615
616    field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
617                                      (void *) self->field, self->field_len);
618    if (field == NULL)
619        return -1;
620    self->field_len = 0;
621    if (self->numeric_field) {
622        PyObject *tmp;
623
624        self->numeric_field = 0;
625        tmp = PyNumber_Float(field);
626        Py_DECREF(field);
627        if (tmp == NULL)
628            return -1;
629        field = tmp;
630    }
631    if (PyList_Append(self->fields, field) < 0) {
632        Py_DECREF(field);
633        return -1;
634    }
635    Py_DECREF(field);
636    return 0;
637}
638
639static int
640parse_grow_buff(ReaderObj *self)
641{
642    assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
643
644    Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
645    Py_UCS4 *field_new = self->field;
646    PyMem_Resize(field_new, Py_UCS4, field_size_new);
647    if (field_new == NULL) {
648        PyErr_NoMemory();
649        return 0;
650    }
651    self->field = field_new;
652    self->field_size = field_size_new;
653    return 1;
654}
655
656static int
657parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
658{
659    if (self->field_len >= module_state->field_limit) {
660        PyErr_Format(module_state->error_obj,
661                     "field larger than field limit (%ld)",
662                     module_state->field_limit);
663        return -1;
664    }
665    if (self->field_len == self->field_size && !parse_grow_buff(self))
666        return -1;
667    self->field[self->field_len++] = c;
668    return 0;
669}
670
671static int
672parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
673{
674    DialectObj *dialect = self->dialect;
675
676    switch (self->state) {
677    case START_RECORD:
678        /* start of record */
679        if (c == EOL)
680            /* empty line - return [] */
681            break;
682        else if (c == '\n' || c == '\r') {
683            self->state = EAT_CRNL;
684            break;
685        }
686        /* normal character - handle as START_FIELD */
687        self->state = START_FIELD;
688        /* fallthru */
689    case START_FIELD:
690        /* expecting field */
691        if (c == '\n' || c == '\r' || c == EOL) {
692            /* save empty field - return [fields] */
693            if (parse_save_field(self) < 0)
694                return -1;
695            self->state = (c == EOL ? START_RECORD : EAT_CRNL);
696        }
697        else if (c == dialect->quotechar &&
698                 dialect->quoting != QUOTE_NONE) {
699            /* start quoted field */
700            self->state = IN_QUOTED_FIELD;
701        }
702        else if (c == dialect->escapechar) {
703            /* possible escaped character */
704            self->state = ESCAPED_CHAR;
705        }
706        else if (c == ' ' && dialect->skipinitialspace)
707            /* ignore spaces at start of field */
708            ;
709        else if (c == dialect->delimiter) {
710            /* save empty field */
711            if (parse_save_field(self) < 0)
712                return -1;
713        }
714        else {
715            /* begin new unquoted field */
716            if (dialect->quoting == QUOTE_NONNUMERIC)
717                self->numeric_field = 1;
718            if (parse_add_char(self, module_state, c) < 0)
719                return -1;
720            self->state = IN_FIELD;
721        }
722        break;
723
724    case ESCAPED_CHAR:
725        if (c == '\n' || c=='\r') {
726            if (parse_add_char(self, module_state, c) < 0)
727                return -1;
728            self->state = AFTER_ESCAPED_CRNL;
729            break;
730        }
731        if (c == EOL)
732            c = '\n';
733        if (parse_add_char(self, module_state, c) < 0)
734            return -1;
735        self->state = IN_FIELD;
736        break;
737
738    case AFTER_ESCAPED_CRNL:
739        if (c == EOL)
740            break;
741        /*fallthru*/
742
743    case IN_FIELD:
744        /* in unquoted field */
745        if (c == '\n' || c == '\r' || c == EOL) {
746            /* end of line - return [fields] */
747            if (parse_save_field(self) < 0)
748                return -1;
749            self->state = (c == EOL ? START_RECORD : EAT_CRNL);
750        }
751        else if (c == dialect->escapechar) {
752            /* possible escaped character */
753            self->state = ESCAPED_CHAR;
754        }
755        else if (c == dialect->delimiter) {
756            /* save field - wait for new field */
757            if (parse_save_field(self) < 0)
758                return -1;
759            self->state = START_FIELD;
760        }
761        else {
762            /* normal character - save in field */
763            if (parse_add_char(self, module_state, c) < 0)
764                return -1;
765        }
766        break;
767
768    case IN_QUOTED_FIELD:
769        /* in quoted field */
770        if (c == EOL)
771            ;
772        else if (c == dialect->escapechar) {
773            /* Possible escape character */
774            self->state = ESCAPE_IN_QUOTED_FIELD;
775        }
776        else if (c == dialect->quotechar &&
777                 dialect->quoting != QUOTE_NONE) {
778            if (dialect->doublequote) {
779                /* doublequote; " represented by "" */
780                self->state = QUOTE_IN_QUOTED_FIELD;
781            }
782            else {
783                /* end of quote part of field */
784                self->state = IN_FIELD;
785            }
786        }
787        else {
788            /* normal character - save in field */
789            if (parse_add_char(self, module_state, c) < 0)
790                return -1;
791        }
792        break;
793
794    case ESCAPE_IN_QUOTED_FIELD:
795        if (c == EOL)
796            c = '\n';
797        if (parse_add_char(self, module_state, c) < 0)
798            return -1;
799        self->state = IN_QUOTED_FIELD;
800        break;
801
802    case QUOTE_IN_QUOTED_FIELD:
803        /* doublequote - seen a quote in a quoted field */
804        if (dialect->quoting != QUOTE_NONE &&
805            c == dialect->quotechar) {
806            /* save "" as " */
807            if (parse_add_char(self, module_state, c) < 0)
808                return -1;
809            self->state = IN_QUOTED_FIELD;
810        }
811        else if (c == dialect->delimiter) {
812            /* save field - wait for new field */
813            if (parse_save_field(self) < 0)
814                return -1;
815            self->state = START_FIELD;
816        }
817        else if (c == '\n' || c == '\r' || c == EOL) {
818            /* end of line - return [fields] */
819            if (parse_save_field(self) < 0)
820                return -1;
821            self->state = (c == EOL ? START_RECORD : EAT_CRNL);
822        }
823        else if (!dialect->strict) {
824            if (parse_add_char(self, module_state, c) < 0)
825                return -1;
826            self->state = IN_FIELD;
827        }
828        else {
829            /* illegal */
830            PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
831                            dialect->delimiter,
832                            dialect->quotechar);
833            return -1;
834        }
835        break;
836
837    case EAT_CRNL:
838        if (c == '\n' || c == '\r')
839            ;
840        else if (c == EOL)
841            self->state = START_RECORD;
842        else {
843            PyErr_Format(module_state->error_obj,
844                         "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
845            return -1;
846        }
847        break;
848
849    }
850    return 0;
851}
852
853static int
854parse_reset(ReaderObj *self)
855{
856    Py_XSETREF(self->fields, PyList_New(0));
857    if (self->fields == NULL)
858        return -1;
859    self->field_len = 0;
860    self->state = START_RECORD;
861    self->numeric_field = 0;
862    return 0;
863}
864
865static PyObject *
866Reader_iternext(ReaderObj *self)
867{
868    PyObject *fields = NULL;
869    Py_UCS4 c;
870    Py_ssize_t pos, linelen;
871    unsigned int kind;
872    const void *data;
873    PyObject *lineobj;
874
875    _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
876                                                   "Reader.__next__");
877    if (module_state == NULL) {
878        return NULL;
879    }
880
881    if (parse_reset(self) < 0)
882        return NULL;
883    do {
884        lineobj = PyIter_Next(self->input_iter);
885        if (lineobj == NULL) {
886            /* End of input OR exception */
887            if (!PyErr_Occurred() && (self->field_len != 0 ||
888                                      self->state == IN_QUOTED_FIELD)) {
889                if (self->dialect->strict)
890                    PyErr_SetString(module_state->error_obj,
891                                    "unexpected end of data");
892                else if (parse_save_field(self) >= 0)
893                    break;
894            }
895            return NULL;
896        }
897        if (!PyUnicode_Check(lineobj)) {
898            PyErr_Format(module_state->error_obj,
899                         "iterator should return strings, "
900                         "not %.200s "
901                         "(the file should be opened in text mode)",
902                         Py_TYPE(lineobj)->tp_name
903                );
904            Py_DECREF(lineobj);
905            return NULL;
906        }
907        if (PyUnicode_READY(lineobj) == -1) {
908            Py_DECREF(lineobj);
909            return NULL;
910        }
911        ++self->line_num;
912        kind = PyUnicode_KIND(lineobj);
913        data = PyUnicode_DATA(lineobj);
914        pos = 0;
915        linelen = PyUnicode_GET_LENGTH(lineobj);
916        while (linelen--) {
917            c = PyUnicode_READ(kind, data, pos);
918            if (parse_process_char(self, module_state, c) < 0) {
919                Py_DECREF(lineobj);
920                goto err;
921            }
922            pos++;
923        }
924        Py_DECREF(lineobj);
925        if (parse_process_char(self, module_state, EOL) < 0)
926            goto err;
927    } while (self->state != START_RECORD);
928
929    fields = self->fields;
930    self->fields = NULL;
931err:
932    return fields;
933}
934
935static void
936Reader_dealloc(ReaderObj *self)
937{
938    PyTypeObject *tp = Py_TYPE(self);
939    PyObject_GC_UnTrack(self);
940    tp->tp_clear((PyObject *)self);
941    if (self->field != NULL) {
942        PyMem_Free(self->field);
943        self->field = NULL;
944    }
945    PyObject_GC_Del(self);
946    Py_DECREF(tp);
947}
948
949static int
950Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
951{
952    Py_VISIT(self->dialect);
953    Py_VISIT(self->input_iter);
954    Py_VISIT(self->fields);
955    Py_VISIT(Py_TYPE(self));
956    return 0;
957}
958
959static int
960Reader_clear(ReaderObj *self)
961{
962    Py_CLEAR(self->dialect);
963    Py_CLEAR(self->input_iter);
964    Py_CLEAR(self->fields);
965    return 0;
966}
967
968PyDoc_STRVAR(Reader_Type_doc,
969"CSV reader\n"
970"\n"
971"Reader objects are responsible for reading and parsing tabular data\n"
972"in CSV format.\n"
973);
974
975static struct PyMethodDef Reader_methods[] = {
976    { NULL, NULL }
977};
978#define R_OFF(x) offsetof(ReaderObj, x)
979
980static struct PyMemberDef Reader_memberlist[] = {
981    { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
982    { "line_num", T_ULONG, R_OFF(line_num), READONLY },
983    { NULL }
984};
985
986
987static PyType_Slot Reader_Type_slots[] = {
988    {Py_tp_doc, (char*)Reader_Type_doc},
989    {Py_tp_traverse, Reader_traverse},
990    {Py_tp_iter, PyObject_SelfIter},
991    {Py_tp_iternext, Reader_iternext},
992    {Py_tp_methods, Reader_methods},
993    {Py_tp_members, Reader_memberlist},
994    {Py_tp_clear, Reader_clear},
995    {Py_tp_dealloc, Reader_dealloc},
996    {0, NULL}
997};
998
999PyType_Spec Reader_Type_spec = {
1000    .name = "_csv.reader",
1001    .basicsize = sizeof(ReaderObj),
1002    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1003              Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1004    .slots = Reader_Type_slots
1005};
1006
1007
1008static PyObject *
1009csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
1010{
1011    PyObject * iterator, * dialect = NULL;
1012    _csvstate *module_state = get_csv_state(module);
1013    ReaderObj * self = PyObject_GC_New(
1014        ReaderObj,
1015        module_state->reader_type);
1016
1017    if (!self)
1018        return NULL;
1019
1020    self->dialect = NULL;
1021    self->fields = NULL;
1022    self->input_iter = NULL;
1023    self->field = NULL;
1024    self->field_size = 0;
1025    self->line_num = 0;
1026
1027    if (parse_reset(self) < 0) {
1028        Py_DECREF(self);
1029        return NULL;
1030    }
1031
1032    if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
1033        Py_DECREF(self);
1034        return NULL;
1035    }
1036    self->input_iter = PyObject_GetIter(iterator);
1037    if (self->input_iter == NULL) {
1038        Py_DECREF(self);
1039        return NULL;
1040    }
1041    self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1042                                                keyword_args);
1043    if (self->dialect == NULL) {
1044        Py_DECREF(self);
1045        return NULL;
1046    }
1047
1048    PyObject_GC_Track(self);
1049    return (PyObject *)self;
1050}
1051
1052/*
1053 * WRITER
1054 */
1055/* ---------------------------------------------------------------- */
1056static void
1057join_reset(WriterObj *self)
1058{
1059    self->rec_len = 0;
1060    self->num_fields = 0;
1061}
1062
1063#define MEM_INCR 32768
1064
1065/* Calculate new record length or append field to record.  Return new
1066 * record length.
1067 */
1068static Py_ssize_t
1069join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
1070                 Py_ssize_t field_len, int *quoted,
1071                 int copy_phase)
1072{
1073    DialectObj *dialect = self->dialect;
1074    int i;
1075    Py_ssize_t rec_len;
1076
1077#define INCLEN \
1078    do {\
1079        if (!copy_phase && rec_len == PY_SSIZE_T_MAX) {    \
1080            goto overflow; \
1081        } \
1082        rec_len++; \
1083    } while(0)
1084
1085#define ADDCH(c)                                \
1086    do {\
1087        if (copy_phase) \
1088            self->rec[rec_len] = c;\
1089        INCLEN;\
1090    } while(0)
1091
1092    rec_len = self->rec_len;
1093
1094    /* If this is not the first field we need a field separator */
1095    if (self->num_fields > 0)
1096        ADDCH(dialect->delimiter);
1097
1098    /* Handle preceding quote */
1099    if (copy_phase && *quoted)
1100        ADDCH(dialect->quotechar);
1101
1102    /* Copy/count field data */
1103    /* If field is null just pass over */
1104    for (i = 0; field_data && (i < field_len); i++) {
1105        Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1106        int want_escape = 0;
1107
1108        if (c == dialect->delimiter ||
1109            c == dialect->escapechar ||
1110            c == dialect->quotechar  ||
1111            PyUnicode_FindChar(
1112                dialect->lineterminator, c, 0,
1113                PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1114            if (dialect->quoting == QUOTE_NONE)
1115                want_escape = 1;
1116            else {
1117                if (c == dialect->quotechar) {
1118                    if (dialect->doublequote)
1119                        ADDCH(dialect->quotechar);
1120                    else
1121                        want_escape = 1;
1122                }
1123                else if (c == dialect->escapechar) {
1124                    want_escape = 1;
1125                }
1126                if (!want_escape)
1127                    *quoted = 1;
1128            }
1129            if (want_escape) {
1130                if (dialect->escapechar == NOT_SET) {
1131                    PyErr_Format(self->error_obj,
1132                                 "need to escape, but no escapechar set");
1133                    return -1;
1134                }
1135                ADDCH(dialect->escapechar);
1136            }
1137        }
1138        /* Copy field character into record buffer.
1139         */
1140        ADDCH(c);
1141    }
1142
1143    if (*quoted) {
1144        if (copy_phase)
1145            ADDCH(dialect->quotechar);
1146        else {
1147            INCLEN; /* starting quote */
1148            INCLEN; /* ending quote */
1149        }
1150    }
1151    return rec_len;
1152
1153  overflow:
1154    PyErr_NoMemory();
1155    return -1;
1156#undef ADDCH
1157#undef INCLEN
1158}
1159
1160static int
1161join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1162{
1163    assert(rec_len >= 0);
1164
1165    if (rec_len > self->rec_size) {
1166        size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1167        Py_UCS4 *rec_new = self->rec;
1168        PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1169        if (rec_new == NULL) {
1170            PyErr_NoMemory();
1171            return 0;
1172        }
1173        self->rec = rec_new;
1174        self->rec_size = (Py_ssize_t)rec_size_new;
1175    }
1176    return 1;
1177}
1178
1179static int
1180join_append(WriterObj *self, PyObject *field, int quoted)
1181{
1182    unsigned int field_kind = -1;
1183    const void *field_data = NULL;
1184    Py_ssize_t field_len = 0;
1185    Py_ssize_t rec_len;
1186
1187    if (field != NULL) {
1188        if (PyUnicode_READY(field) == -1)
1189            return 0;
1190        field_kind = PyUnicode_KIND(field);
1191        field_data = PyUnicode_DATA(field);
1192        field_len = PyUnicode_GET_LENGTH(field);
1193    }
1194    rec_len = join_append_data(self, field_kind, field_data, field_len,
1195                               &quoted, 0);
1196    if (rec_len < 0)
1197        return 0;
1198
1199    /* grow record buffer if necessary */
1200    if (!join_check_rec_size(self, rec_len))
1201        return 0;
1202
1203    self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1204                                     &quoted, 1);
1205    self->num_fields++;
1206
1207    return 1;
1208}
1209
1210static int
1211join_append_lineterminator(WriterObj *self)
1212{
1213    Py_ssize_t terminator_len, i;
1214    unsigned int term_kind;
1215    const void *term_data;
1216
1217    terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1218    if (terminator_len == -1)
1219        return 0;
1220
1221    /* grow record buffer if necessary */
1222    if (!join_check_rec_size(self, self->rec_len + terminator_len))
1223        return 0;
1224
1225    term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1226    term_data = PyUnicode_DATA(self->dialect->lineterminator);
1227    for (i = 0; i < terminator_len; i++)
1228        self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1229    self->rec_len += terminator_len;
1230
1231    return 1;
1232}
1233
1234PyDoc_STRVAR(csv_writerow_doc,
1235"writerow(iterable)\n"
1236"\n"
1237"Construct and write a CSV record from an iterable of fields.  Non-string\n"
1238"elements will be converted to string.");
1239
1240static PyObject *
1241csv_writerow(WriterObj *self, PyObject *seq)
1242{
1243    DialectObj *dialect = self->dialect;
1244    PyObject *iter, *field, *line, *result;
1245
1246    iter = PyObject_GetIter(seq);
1247    if (iter == NULL) {
1248        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1249            PyErr_Format(self->error_obj,
1250                         "iterable expected, not %.200s",
1251                         Py_TYPE(seq)->tp_name);
1252        }
1253        return NULL;
1254    }
1255
1256    /* Join all fields in internal buffer.
1257     */
1258    join_reset(self);
1259    while ((field = PyIter_Next(iter))) {
1260        int append_ok;
1261        int quoted;
1262
1263        switch (dialect->quoting) {
1264        case QUOTE_NONNUMERIC:
1265            quoted = !PyNumber_Check(field);
1266            break;
1267        case QUOTE_ALL:
1268            quoted = 1;
1269            break;
1270        default:
1271            quoted = 0;
1272            break;
1273        }
1274
1275        if (PyUnicode_Check(field)) {
1276            append_ok = join_append(self, field, quoted);
1277            Py_DECREF(field);
1278        }
1279        else if (field == Py_None) {
1280            append_ok = join_append(self, NULL, quoted);
1281            Py_DECREF(field);
1282        }
1283        else {
1284            PyObject *str;
1285
1286            str = PyObject_Str(field);
1287            Py_DECREF(field);
1288            if (str == NULL) {
1289                Py_DECREF(iter);
1290                return NULL;
1291            }
1292            append_ok = join_append(self, str, quoted);
1293            Py_DECREF(str);
1294        }
1295        if (!append_ok) {
1296            Py_DECREF(iter);
1297            return NULL;
1298        }
1299    }
1300    Py_DECREF(iter);
1301    if (PyErr_Occurred())
1302        return NULL;
1303
1304    if (self->num_fields > 0 && self->rec_len == 0) {
1305        if (dialect->quoting == QUOTE_NONE) {
1306            PyErr_Format(self->error_obj,
1307                "single empty field record must be quoted");
1308            return NULL;
1309        }
1310        self->num_fields--;
1311        if (!join_append(self, NULL, 1))
1312            return NULL;
1313    }
1314
1315    /* Add line terminator.
1316     */
1317    if (!join_append_lineterminator(self)) {
1318        return NULL;
1319    }
1320
1321    line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1322                                     (void *) self->rec, self->rec_len);
1323    if (line == NULL) {
1324        return NULL;
1325    }
1326    result = PyObject_CallOneArg(self->write, line);
1327    Py_DECREF(line);
1328    return result;
1329}
1330
1331PyDoc_STRVAR(csv_writerows_doc,
1332"writerows(iterable of iterables)\n"
1333"\n"
1334"Construct and write a series of iterables to a csv file.  Non-string\n"
1335"elements will be converted to string.");
1336
1337static PyObject *
1338csv_writerows(WriterObj *self, PyObject *seqseq)
1339{
1340    PyObject *row_iter, *row_obj, *result;
1341
1342    row_iter = PyObject_GetIter(seqseq);
1343    if (row_iter == NULL) {
1344        return NULL;
1345    }
1346    while ((row_obj = PyIter_Next(row_iter))) {
1347        result = csv_writerow(self, row_obj);
1348        Py_DECREF(row_obj);
1349        if (!result) {
1350            Py_DECREF(row_iter);
1351            return NULL;
1352        }
1353        else
1354             Py_DECREF(result);
1355    }
1356    Py_DECREF(row_iter);
1357    if (PyErr_Occurred())
1358        return NULL;
1359    Py_RETURN_NONE;
1360}
1361
1362static struct PyMethodDef Writer_methods[] = {
1363    { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1364    { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1365    { NULL, NULL }
1366};
1367
1368#define W_OFF(x) offsetof(WriterObj, x)
1369
1370static struct PyMemberDef Writer_memberlist[] = {
1371    { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1372    { NULL }
1373};
1374
1375static int
1376Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1377{
1378    Py_VISIT(self->dialect);
1379    Py_VISIT(self->write);
1380    Py_VISIT(self->error_obj);
1381    Py_VISIT(Py_TYPE(self));
1382    return 0;
1383}
1384
1385static int
1386Writer_clear(WriterObj *self)
1387{
1388    Py_CLEAR(self->dialect);
1389    Py_CLEAR(self->write);
1390    Py_CLEAR(self->error_obj);
1391    return 0;
1392}
1393
1394static void
1395Writer_dealloc(WriterObj *self)
1396{
1397    PyTypeObject *tp = Py_TYPE(self);
1398    PyObject_GC_UnTrack(self);
1399    tp->tp_clear((PyObject *)self);
1400    if (self->rec != NULL) {
1401        PyMem_Free(self->rec);
1402    }
1403    PyObject_GC_Del(self);
1404    Py_DECREF(tp);
1405}
1406
1407PyDoc_STRVAR(Writer_Type_doc,
1408"CSV writer\n"
1409"\n"
1410"Writer objects are responsible for generating tabular data\n"
1411"in CSV format from sequence input.\n"
1412);
1413
1414static PyType_Slot Writer_Type_slots[] = {
1415    {Py_tp_doc, (char*)Writer_Type_doc},
1416    {Py_tp_traverse, Writer_traverse},
1417    {Py_tp_clear, Writer_clear},
1418    {Py_tp_dealloc, Writer_dealloc},
1419    {Py_tp_methods, Writer_methods},
1420    {Py_tp_members, Writer_memberlist},
1421    {0, NULL}
1422};
1423
1424PyType_Spec Writer_Type_spec = {
1425    .name = "_csv.writer",
1426    .basicsize = sizeof(WriterObj),
1427    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1428              Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1429    .slots = Writer_Type_slots,
1430};
1431
1432
1433static PyObject *
1434csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1435{
1436    PyObject * output_file, * dialect = NULL;
1437    _csvstate *module_state = get_csv_state(module);
1438    WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
1439
1440    if (!self)
1441        return NULL;
1442
1443    self->dialect = NULL;
1444    self->write = NULL;
1445
1446    self->rec = NULL;
1447    self->rec_size = 0;
1448    self->rec_len = 0;
1449    self->num_fields = 0;
1450
1451    self->error_obj = Py_NewRef(module_state->error_obj);
1452
1453    if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1454        Py_DECREF(self);
1455        return NULL;
1456    }
1457    if (_PyObject_LookupAttr(output_file,
1458                             module_state->str_write,
1459                             &self->write) < 0) {
1460        Py_DECREF(self);
1461        return NULL;
1462    }
1463    if (self->write == NULL || !PyCallable_Check(self->write)) {
1464        PyErr_SetString(PyExc_TypeError,
1465                        "argument 1 must have a \"write\" method");
1466        Py_DECREF(self);
1467        return NULL;
1468    }
1469    self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1470                                                keyword_args);
1471    if (self->dialect == NULL) {
1472        Py_DECREF(self);
1473        return NULL;
1474    }
1475    PyObject_GC_Track(self);
1476    return (PyObject *)self;
1477}
1478
1479/*
1480 * DIALECT REGISTRY
1481 */
1482
1483/*[clinic input]
1484_csv.list_dialects
1485
1486Return a list of all known dialect names.
1487
1488    names = csv.list_dialects()
1489[clinic start generated code]*/
1490
1491static PyObject *
1492_csv_list_dialects_impl(PyObject *module)
1493/*[clinic end generated code: output=a5b92b215b006a6d input=8953943eb17d98ab]*/
1494{
1495    return PyDict_Keys(get_csv_state(module)->dialects);
1496}
1497
1498static PyObject *
1499csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1500{
1501    PyObject *name_obj, *dialect_obj = NULL;
1502    _csvstate *module_state = get_csv_state(module);
1503    PyObject *dialect;
1504
1505    if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1506        return NULL;
1507    if (!PyUnicode_Check(name_obj)) {
1508        PyErr_SetString(PyExc_TypeError,
1509                        "dialect name must be a string");
1510        return NULL;
1511    }
1512    if (PyUnicode_READY(name_obj) == -1)
1513        return NULL;
1514    dialect = _call_dialect(module_state, dialect_obj, kwargs);
1515    if (dialect == NULL)
1516        return NULL;
1517    if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
1518        Py_DECREF(dialect);
1519        return NULL;
1520    }
1521    Py_DECREF(dialect);
1522    Py_RETURN_NONE;
1523}
1524
1525
1526/*[clinic input]
1527_csv.unregister_dialect
1528
1529    name: object
1530
1531Delete the name/dialect mapping associated with a string name.
1532
1533    csv.unregister_dialect(name)
1534[clinic start generated code]*/
1535
1536static PyObject *
1537_csv_unregister_dialect_impl(PyObject *module, PyObject *name)
1538/*[clinic end generated code: output=0813ebca6c058df4 input=6b5c1557bf60c7e7]*/
1539{
1540    _csvstate *module_state = get_csv_state(module);
1541    if (PyDict_DelItem(module_state->dialects, name) < 0) {
1542        if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1543            PyErr_Format(module_state->error_obj, "unknown dialect");
1544        }
1545        return NULL;
1546    }
1547    Py_RETURN_NONE;
1548}
1549
1550/*[clinic input]
1551_csv.get_dialect
1552
1553    name: object
1554
1555Return the dialect instance associated with name.
1556
1557    dialect = csv.get_dialect(name)
1558[clinic start generated code]*/
1559
1560static PyObject *
1561_csv_get_dialect_impl(PyObject *module, PyObject *name)
1562/*[clinic end generated code: output=aa988cd573bebebb input=edf9ddab32e448fb]*/
1563{
1564    return get_dialect_from_registry(name, get_csv_state(module));
1565}
1566
1567/*[clinic input]
1568_csv.field_size_limit
1569
1570    new_limit: object = NULL
1571
1572Sets an upper limit on parsed fields.
1573
1574    csv.field_size_limit([limit])
1575
1576Returns old limit. If limit is not given, no new limit is set and
1577the old limit is returned
1578[clinic start generated code]*/
1579
1580static PyObject *
1581_csv_field_size_limit_impl(PyObject *module, PyObject *new_limit)
1582/*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/
1583{
1584    _csvstate *module_state = get_csv_state(module);
1585    long old_limit = module_state->field_limit;
1586    if (new_limit != NULL) {
1587        if (!PyLong_CheckExact(new_limit)) {
1588            PyErr_Format(PyExc_TypeError,
1589                         "limit must be an integer");
1590            return NULL;
1591        }
1592        module_state->field_limit = PyLong_AsLong(new_limit);
1593        if (module_state->field_limit == -1 && PyErr_Occurred()) {
1594            module_state->field_limit = old_limit;
1595            return NULL;
1596        }
1597    }
1598    return PyLong_FromLong(old_limit);
1599}
1600
1601static PyType_Slot error_slots[] = {
1602    {0, NULL},
1603};
1604
1605PyType_Spec error_spec = {
1606    .name = "_csv.Error",
1607    .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
1608    .slots = error_slots,
1609};
1610
1611/*
1612 * MODULE
1613 */
1614
1615PyDoc_STRVAR(csv_module_doc,
1616"CSV parsing and writing.\n"
1617"\n"
1618"This module provides classes that assist in the reading and writing\n"
1619"of Comma Separated Value (CSV) files, and implements the interface\n"
1620"described by PEP 305.  Although many CSV files are simple to parse,\n"
1621"the format is not formally defined by a stable specification and\n"
1622"is subtle enough that parsing lines of a CSV file with something\n"
1623"like line.split(\",\") is bound to fail.  The module supports three\n"
1624"basic APIs: reading, writing, and registration of dialects.\n"
1625"\n"
1626"\n"
1627"DIALECT REGISTRATION:\n"
1628"\n"
1629"Readers and writers support a dialect argument, which is a convenient\n"
1630"handle on a group of settings.  When the dialect argument is a string,\n"
1631"it identifies one of the dialects previously registered with the module.\n"
1632"If it is a class or instance, the attributes of the argument are used as\n"
1633"the settings for the reader or writer:\n"
1634"\n"
1635"    class excel:\n"
1636"        delimiter = ','\n"
1637"        quotechar = '\"'\n"
1638"        escapechar = None\n"
1639"        doublequote = True\n"
1640"        skipinitialspace = False\n"
1641"        lineterminator = '\\r\\n'\n"
1642"        quoting = QUOTE_MINIMAL\n"
1643"\n"
1644"SETTINGS:\n"
1645"\n"
1646"    * quotechar - specifies a one-character string to use as the\n"
1647"        quoting character.  It defaults to '\"'.\n"
1648"    * delimiter - specifies a one-character string to use as the\n"
1649"        field separator.  It defaults to ','.\n"
1650"    * skipinitialspace - specifies how to interpret spaces which\n"
1651"        immediately follow a delimiter.  It defaults to False, which\n"
1652"        means that spaces immediately following a delimiter is part\n"
1653"        of the following field.\n"
1654"    * lineterminator -  specifies the character sequence which should\n"
1655"        terminate rows.\n"
1656"    * quoting - controls when quotes should be generated by the writer.\n"
1657"        It can take on any of the following module constants:\n"
1658"\n"
1659"        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1660"            field contains either the quotechar or the delimiter\n"
1661"        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1662"        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1663"            fields which do not parse as integers or floating point\n"
1664"            numbers.\n"
1665"        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1666"    * escapechar - specifies a one-character string used to escape\n"
1667"        the delimiter when quoting is set to QUOTE_NONE.\n"
1668"    * doublequote - controls the handling of quotes inside fields.  When\n"
1669"        True, two consecutive quotes are interpreted as one during read,\n"
1670"        and when writing, each quote character embedded in the data is\n"
1671"        written as two quotes\n");
1672
1673PyDoc_STRVAR(csv_reader_doc,
1674"    csv_reader = reader(iterable [, dialect='excel']\n"
1675"                        [optional keyword args])\n"
1676"    for row in csv_reader:\n"
1677"        process(row)\n"
1678"\n"
1679"The \"iterable\" argument can be any object that returns a line\n"
1680"of input for each iteration, such as a file object or a list.  The\n"
1681"optional \"dialect\" parameter is discussed below.  The function\n"
1682"also accepts optional keyword arguments which override settings\n"
1683"provided by the dialect.\n"
1684"\n"
1685"The returned object is an iterator.  Each iteration returns a row\n"
1686"of the CSV file (which can span multiple input lines).\n");
1687
1688PyDoc_STRVAR(csv_writer_doc,
1689"    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1690"                            [optional keyword args])\n"
1691"    for row in sequence:\n"
1692"        csv_writer.writerow(row)\n"
1693"\n"
1694"    [or]\n"
1695"\n"
1696"    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1697"                            [optional keyword args])\n"
1698"    csv_writer.writerows(rows)\n"
1699"\n"
1700"The \"fileobj\" argument can be any object that supports the file API.\n");
1701
1702PyDoc_STRVAR(csv_register_dialect_doc,
1703"Create a mapping from a string name to a dialect class.\n"
1704"    dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1705
1706static struct PyMethodDef csv_methods[] = {
1707    { "reader", _PyCFunction_CAST(csv_reader),
1708        METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1709    { "writer", _PyCFunction_CAST(csv_writer),
1710        METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1711    { "register_dialect", _PyCFunction_CAST(csv_register_dialect),
1712        METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1713    _CSV_LIST_DIALECTS_METHODDEF
1714    _CSV_UNREGISTER_DIALECT_METHODDEF
1715    _CSV_GET_DIALECT_METHODDEF
1716    _CSV_FIELD_SIZE_LIMIT_METHODDEF
1717    { NULL, NULL }
1718};
1719
1720static int
1721csv_exec(PyObject *module) {
1722    const StyleDesc *style;
1723    PyObject *temp;
1724    _csvstate *module_state = get_csv_state(module);
1725
1726    temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1727    module_state->dialect_type = (PyTypeObject *)temp;
1728    if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1729        return -1;
1730    }
1731
1732    temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1733    module_state->reader_type = (PyTypeObject *)temp;
1734    if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1735        return -1;
1736    }
1737
1738    temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1739    module_state->writer_type = (PyTypeObject *)temp;
1740    if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1741        return -1;
1742    }
1743
1744    /* Add version to the module. */
1745    if (PyModule_AddStringConstant(module, "__version__",
1746                                   MODULE_VERSION) == -1) {
1747        return -1;
1748    }
1749
1750    /* Set the field limit */
1751    module_state->field_limit = 128 * 1024;
1752
1753    /* Add _dialects dictionary */
1754    module_state->dialects = PyDict_New();
1755    if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1756        return -1;
1757    }
1758
1759    /* Add quote styles into dictionary */
1760    for (style = quote_styles; style->name; style++) {
1761        if (PyModule_AddIntConstant(module, style->name,
1762                                    style->style) == -1)
1763            return -1;
1764    }
1765
1766    /* Add the CSV exception object to the module. */
1767    PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1768    if (bases == NULL) {
1769        return -1;
1770    }
1771    module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1772                                                       bases);
1773    Py_DECREF(bases);
1774    if (module_state->error_obj == NULL) {
1775        return -1;
1776    }
1777    if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1778        return -1;
1779    }
1780
1781    module_state->str_write = PyUnicode_InternFromString("write");
1782    if (module_state->str_write == NULL) {
1783        return -1;
1784    }
1785    return 0;
1786}
1787
1788static PyModuleDef_Slot csv_slots[] = {
1789    {Py_mod_exec, csv_exec},
1790    {0, NULL}
1791};
1792
1793static struct PyModuleDef _csvmodule = {
1794    PyModuleDef_HEAD_INIT,
1795    "_csv",
1796    csv_module_doc,
1797    sizeof(_csvstate),
1798    csv_methods,
1799    csv_slots,
1800    _csv_traverse,
1801    _csv_clear,
1802    _csv_free
1803};
1804
1805PyMODINIT_FUNC
1806PyInit__csv(void)
1807{
1808    return PyModuleDef_Init(&_csvmodule);
1809}
1810