1/* csv module */ 2 3/* 4 5This module provides the low-level underpinnings of a CSV reading/writing 6module. Users should not use this module directly, but import the csv.py 7module instead. 8 9*/ 10 11#define MODULE_VERSION "1.0" 12 13#include "Python.h" 14#include "structmember.h" // PyMemberDef 15#include <stdbool.h> 16 17/*[clinic input] 18module _csv 19[clinic start generated code]*/ 20/*[clinic end generated code: output=da39a3ee5e6b4b0d input=385118b71aa43706]*/ 21 22#include "clinic/_csv.c.h" 23#define NOT_SET ((Py_UCS4)-1) 24#define EOL ((Py_UCS4)-2) 25 26 27typedef struct { 28 PyObject *error_obj; /* CSV exception */ 29 PyObject *dialects; /* Dialect registry */ 30 PyTypeObject *dialect_type; 31 PyTypeObject *reader_type; 32 PyTypeObject *writer_type; 33 long field_limit; /* max parsed field size */ 34 PyObject *str_write; 35} _csvstate; 36 37static struct PyModuleDef _csvmodule; 38 39static inline _csvstate* 40get_csv_state(PyObject *module) 41{ 42 void *state = PyModule_GetState(module); 43 assert(state != NULL); 44 return (_csvstate *)state; 45} 46 47static int 48_csv_clear(PyObject *module) 49{ 50 _csvstate *module_state = PyModule_GetState(module); 51 Py_CLEAR(module_state->error_obj); 52 Py_CLEAR(module_state->dialects); 53 Py_CLEAR(module_state->dialect_type); 54 Py_CLEAR(module_state->reader_type); 55 Py_CLEAR(module_state->writer_type); 56 Py_CLEAR(module_state->str_write); 57 return 0; 58} 59 60static int 61_csv_traverse(PyObject *module, visitproc visit, void *arg) 62{ 63 _csvstate *module_state = PyModule_GetState(module); 64 Py_VISIT(module_state->error_obj); 65 Py_VISIT(module_state->dialects); 66 Py_VISIT(module_state->dialect_type); 67 Py_VISIT(module_state->reader_type); 68 Py_VISIT(module_state->writer_type); 69 return 0; 70} 71 72static void 73_csv_free(void *module) 74{ 75 _csv_clear((PyObject *)module); 76} 77 78typedef enum { 79 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, 80 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD, 81 EAT_CRNL,AFTER_ESCAPED_CRNL 82} ParserState; 83 84typedef enum { 85 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE 86} QuoteStyle; 87 88typedef struct { 89 QuoteStyle style; 90 const char *name; 91} StyleDesc; 92 93static const StyleDesc quote_styles[] = { 94 { QUOTE_MINIMAL, "QUOTE_MINIMAL" }, 95 { QUOTE_ALL, "QUOTE_ALL" }, 96 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" }, 97 { QUOTE_NONE, "QUOTE_NONE" }, 98 { 0 } 99}; 100 101typedef struct { 102 PyObject_HEAD 103 104 char doublequote; /* is " represented by ""? */ 105 char skipinitialspace; /* ignore spaces following delimiter? */ 106 char strict; /* raise exception on bad CSV */ 107 int quoting; /* style of quoting to write */ 108 Py_UCS4 delimiter; /* field separator */ 109 Py_UCS4 quotechar; /* quote character */ 110 Py_UCS4 escapechar; /* escape character */ 111 PyObject *lineterminator; /* string to write between records */ 112 113} DialectObj; 114 115typedef struct { 116 PyObject_HEAD 117 118 PyObject *input_iter; /* iterate over this for input lines */ 119 120 DialectObj *dialect; /* parsing dialect */ 121 122 PyObject *fields; /* field list for current record */ 123 ParserState state; /* current CSV parse state */ 124 Py_UCS4 *field; /* temporary buffer */ 125 Py_ssize_t field_size; /* size of allocated buffer */ 126 Py_ssize_t field_len; /* length of current field */ 127 int numeric_field; /* treat field as numeric */ 128 unsigned long line_num; /* Source-file line number */ 129} ReaderObj; 130 131typedef struct { 132 PyObject_HEAD 133 134 PyObject *write; /* write output lines to this file */ 135 136 DialectObj *dialect; /* parsing dialect */ 137 138 Py_UCS4 *rec; /* buffer for parser.join */ 139 Py_ssize_t rec_size; /* size of allocated record */ 140 Py_ssize_t rec_len; /* length of record */ 141 int num_fields; /* number of fields in record */ 142 143 PyObject *error_obj; /* cached error object */ 144} WriterObj; 145 146/* 147 * DIALECT class 148 */ 149 150static PyObject * 151get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state) 152{ 153 PyObject *dialect_obj; 154 155 dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj); 156 if (dialect_obj == NULL) { 157 if (!PyErr_Occurred()) 158 PyErr_Format(module_state->error_obj, "unknown dialect"); 159 } 160 else 161 Py_INCREF(dialect_obj); 162 163 return dialect_obj; 164} 165 166static PyObject * 167get_char_or_None(Py_UCS4 c) 168{ 169 if (c == NOT_SET) { 170 Py_RETURN_NONE; 171 } 172 else 173 return PyUnicode_FromOrdinal(c); 174} 175 176static PyObject * 177Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored)) 178{ 179 Py_XINCREF(self->lineterminator); 180 return self->lineterminator; 181} 182 183static PyObject * 184Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored)) 185{ 186 return get_char_or_None(self->delimiter); 187} 188 189static PyObject * 190Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored)) 191{ 192 return get_char_or_None(self->escapechar); 193} 194 195static PyObject * 196Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored)) 197{ 198 return get_char_or_None(self->quotechar); 199} 200 201static PyObject * 202Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored)) 203{ 204 return PyLong_FromLong(self->quoting); 205} 206 207static int 208_set_bool(const char *name, char *target, PyObject *src, bool dflt) 209{ 210 if (src == NULL) 211 *target = dflt; 212 else { 213 int b = PyObject_IsTrue(src); 214 if (b < 0) 215 return -1; 216 *target = (char)b; 217 } 218 return 0; 219} 220 221static int 222_set_int(const char *name, int *target, PyObject *src, int dflt) 223{ 224 if (src == NULL) 225 *target = dflt; 226 else { 227 int value; 228 if (!PyLong_CheckExact(src)) { 229 PyErr_Format(PyExc_TypeError, 230 "\"%s\" must be an integer", name); 231 return -1; 232 } 233 value = _PyLong_AsInt(src); 234 if (value == -1 && PyErr_Occurred()) { 235 return -1; 236 } 237 *target = value; 238 } 239 return 0; 240} 241 242static int 243_set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) 244{ 245 if (src == NULL) { 246 *target = dflt; 247 } 248 else { 249 *target = NOT_SET; 250 if (src != Py_None) { 251 if (!PyUnicode_Check(src)) { 252 PyErr_Format(PyExc_TypeError, 253 "\"%s\" must be string or None, not %.200s", name, 254 Py_TYPE(src)->tp_name); 255 return -1; 256 } 257 Py_ssize_t len = PyUnicode_GetLength(src); 258 if (len < 0) { 259 return -1; 260 } 261 if (len != 1) { 262 PyErr_Format(PyExc_TypeError, 263 "\"%s\" must be a 1-character string", 264 name); 265 return -1; 266 } 267 /* PyUnicode_READY() is called in PyUnicode_GetLength() */ 268 *target = PyUnicode_READ_CHAR(src, 0); 269 } 270 } 271 return 0; 272} 273 274static int 275_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) 276{ 277 if (src == NULL) { 278 *target = dflt; 279 } 280 else { 281 if (!PyUnicode_Check(src)) { 282 PyErr_Format(PyExc_TypeError, 283 "\"%s\" must be string, not %.200s", name, 284 Py_TYPE(src)->tp_name); 285 return -1; 286 } 287 Py_ssize_t len = PyUnicode_GetLength(src); 288 if (len < 0) { 289 return -1; 290 } 291 if (len != 1) { 292 PyErr_Format(PyExc_TypeError, 293 "\"%s\" must be a 1-character string", 294 name); 295 return -1; 296 } 297 /* PyUnicode_READY() is called in PyUnicode_GetLength() */ 298 *target = PyUnicode_READ_CHAR(src, 0); 299 } 300 return 0; 301} 302 303static int 304_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) 305{ 306 if (src == NULL) 307 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL); 308 else { 309 if (src == Py_None) 310 *target = NULL; 311 else if (!PyUnicode_Check(src)) { 312 PyErr_Format(PyExc_TypeError, 313 "\"%s\" must be a string", name); 314 return -1; 315 } 316 else { 317 if (PyUnicode_READY(src) == -1) 318 return -1; 319 Py_INCREF(src); 320 Py_XSETREF(*target, src); 321 } 322 } 323 return 0; 324} 325 326static int 327dialect_check_quoting(int quoting) 328{ 329 const StyleDesc *qs; 330 331 for (qs = quote_styles; qs->name; qs++) { 332 if ((int)qs->style == quoting) 333 return 0; 334 } 335 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value"); 336 return -1; 337} 338 339#define D_OFF(x) offsetof(DialectObj, x) 340 341static struct PyMemberDef Dialect_memberlist[] = { 342 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY }, 343 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY }, 344 { "strict", T_BOOL, D_OFF(strict), READONLY }, 345 { NULL } 346}; 347 348static PyGetSetDef Dialect_getsetlist[] = { 349 { "delimiter", (getter)Dialect_get_delimiter}, 350 { "escapechar", (getter)Dialect_get_escapechar}, 351 { "lineterminator", (getter)Dialect_get_lineterminator}, 352 { "quotechar", (getter)Dialect_get_quotechar}, 353 { "quoting", (getter)Dialect_get_quoting}, 354 {NULL}, 355}; 356 357static void 358Dialect_dealloc(DialectObj *self) 359{ 360 PyTypeObject *tp = Py_TYPE(self); 361 PyObject_GC_UnTrack(self); 362 tp->tp_clear((PyObject *)self); 363 PyObject_GC_Del(self); 364 Py_DECREF(tp); 365} 366 367static char *dialect_kws[] = { 368 "dialect", 369 "delimiter", 370 "doublequote", 371 "escapechar", 372 "lineterminator", 373 "quotechar", 374 "quoting", 375 "skipinitialspace", 376 "strict", 377 NULL 378}; 379 380static _csvstate * 381_csv_state_from_type(PyTypeObject *type, const char *name) 382{ 383 PyObject *module = PyType_GetModuleByDef(type, &_csvmodule); 384 if (module == NULL) { 385 return NULL; 386 } 387 _csvstate *module_state = PyModule_GetState(module); 388 if (module_state == NULL) { 389 PyErr_Format(PyExc_SystemError, 390 "%s: No _csv module state found", name); 391 return NULL; 392 } 393 return module_state; 394} 395 396static PyObject * 397dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) 398{ 399 DialectObj *self; 400 PyObject *ret = NULL; 401 PyObject *dialect = NULL; 402 PyObject *delimiter = NULL; 403 PyObject *doublequote = NULL; 404 PyObject *escapechar = NULL; 405 PyObject *lineterminator = NULL; 406 PyObject *quotechar = NULL; 407 PyObject *quoting = NULL; 408 PyObject *skipinitialspace = NULL; 409 PyObject *strict = NULL; 410 411 if (!PyArg_ParseTupleAndKeywords(args, kwargs, 412 "|OOOOOOOOO", dialect_kws, 413 &dialect, 414 &delimiter, 415 &doublequote, 416 &escapechar, 417 &lineterminator, 418 "echar, 419 "ing, 420 &skipinitialspace, 421 &strict)) 422 return NULL; 423 424 _csvstate *module_state = _csv_state_from_type(type, "dialect_new"); 425 if (module_state == NULL) { 426 return NULL; 427 } 428 429 if (dialect != NULL) { 430 if (PyUnicode_Check(dialect)) { 431 dialect = get_dialect_from_registry(dialect, module_state); 432 if (dialect == NULL) 433 return NULL; 434 } 435 else 436 Py_INCREF(dialect); 437 /* Can we reuse this instance? */ 438 if (PyObject_TypeCheck(dialect, module_state->dialect_type) && 439 delimiter == NULL && 440 doublequote == NULL && 441 escapechar == NULL && 442 lineterminator == NULL && 443 quotechar == NULL && 444 quoting == NULL && 445 skipinitialspace == NULL && 446 strict == NULL) 447 return dialect; 448 } 449 450 self = (DialectObj *)type->tp_alloc(type, 0); 451 if (self == NULL) { 452 Py_CLEAR(dialect); 453 return NULL; 454 } 455 self->lineterminator = NULL; 456 457 Py_XINCREF(delimiter); 458 Py_XINCREF(doublequote); 459 Py_XINCREF(escapechar); 460 Py_XINCREF(lineterminator); 461 Py_XINCREF(quotechar); 462 Py_XINCREF(quoting); 463 Py_XINCREF(skipinitialspace); 464 Py_XINCREF(strict); 465 if (dialect != NULL) { 466#define DIALECT_GETATTR(v, n) \ 467 do { \ 468 if (v == NULL) { \ 469 v = PyObject_GetAttrString(dialect, n); \ 470 if (v == NULL) \ 471 PyErr_Clear(); \ 472 } \ 473 } while (0) 474 DIALECT_GETATTR(delimiter, "delimiter"); 475 DIALECT_GETATTR(doublequote, "doublequote"); 476 DIALECT_GETATTR(escapechar, "escapechar"); 477 DIALECT_GETATTR(lineterminator, "lineterminator"); 478 DIALECT_GETATTR(quotechar, "quotechar"); 479 DIALECT_GETATTR(quoting, "quoting"); 480 DIALECT_GETATTR(skipinitialspace, "skipinitialspace"); 481 DIALECT_GETATTR(strict, "strict"); 482 } 483 484 /* check types and convert to C values */ 485#define DIASET(meth, name, target, src, dflt) \ 486 if (meth(name, target, src, dflt)) \ 487 goto err 488 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ','); 489 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true); 490 DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, NOT_SET); 491 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n"); 492 DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"'); 493 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL); 494 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false); 495 DIASET(_set_bool, "strict", &self->strict, strict, false); 496 497 /* validate options */ 498 if (dialect_check_quoting(self->quoting)) 499 goto err; 500 if (self->delimiter == NOT_SET) { 501 PyErr_SetString(PyExc_TypeError, 502 "\"delimiter\" must be a 1-character string"); 503 goto err; 504 } 505 if (quotechar == Py_None && quoting == NULL) 506 self->quoting = QUOTE_NONE; 507 if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) { 508 PyErr_SetString(PyExc_TypeError, 509 "quotechar must be set if quoting enabled"); 510 goto err; 511 } 512 if (self->lineterminator == NULL) { 513 PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); 514 goto err; 515 } 516 517 ret = (PyObject *)self; 518 Py_INCREF(self); 519err: 520 Py_CLEAR(self); 521 Py_CLEAR(dialect); 522 Py_CLEAR(delimiter); 523 Py_CLEAR(doublequote); 524 Py_CLEAR(escapechar); 525 Py_CLEAR(lineterminator); 526 Py_CLEAR(quotechar); 527 Py_CLEAR(quoting); 528 Py_CLEAR(skipinitialspace); 529 Py_CLEAR(strict); 530 return ret; 531} 532 533/* Since dialect is now a heap type, it inherits pickling method for 534 * protocol 0 and 1 from object, therefore it needs to be overridden */ 535 536PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling"); 537 538static PyObject * 539Dialect_reduce(PyObject *self, PyObject *args) { 540 PyErr_Format(PyExc_TypeError, 541 "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self))); 542 return NULL; 543} 544 545static struct PyMethodDef dialect_methods[] = { 546 {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc}, 547 {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc}, 548 {NULL, NULL} 549}; 550 551PyDoc_STRVAR(Dialect_Type_doc, 552"CSV dialect\n" 553"\n" 554"The Dialect type records CSV parsing and generation options.\n"); 555 556static int 557Dialect_clear(DialectObj *self) 558{ 559 Py_CLEAR(self->lineterminator); 560 return 0; 561} 562 563static int 564Dialect_traverse(DialectObj *self, visitproc visit, void *arg) 565{ 566 Py_VISIT(self->lineterminator); 567 Py_VISIT(Py_TYPE(self)); 568 return 0; 569} 570 571static PyType_Slot Dialect_Type_slots[] = { 572 {Py_tp_doc, (char*)Dialect_Type_doc}, 573 {Py_tp_members, Dialect_memberlist}, 574 {Py_tp_getset, Dialect_getsetlist}, 575 {Py_tp_new, dialect_new}, 576 {Py_tp_methods, dialect_methods}, 577 {Py_tp_dealloc, Dialect_dealloc}, 578 {Py_tp_clear, Dialect_clear}, 579 {Py_tp_traverse, Dialect_traverse}, 580 {0, NULL} 581}; 582 583PyType_Spec Dialect_Type_spec = { 584 .name = "_csv.Dialect", 585 .basicsize = sizeof(DialectObj), 586 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | 587 Py_TPFLAGS_IMMUTABLETYPE), 588 .slots = Dialect_Type_slots, 589}; 590 591 592/* 593 * Return an instance of the dialect type, given a Python instance or kwarg 594 * description of the dialect 595 */ 596static PyObject * 597_call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs) 598{ 599 PyObject *type = (PyObject *)module_state->dialect_type; 600 if (dialect_inst) { 601 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs); 602 } 603 else { 604 return PyObject_VectorcallDict(type, NULL, 0, kwargs); 605 } 606} 607 608/* 609 * READER 610 */ 611static int 612parse_save_field(ReaderObj *self) 613{ 614 PyObject *field; 615 616 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, 617 (void *) self->field, self->field_len); 618 if (field == NULL) 619 return -1; 620 self->field_len = 0; 621 if (self->numeric_field) { 622 PyObject *tmp; 623 624 self->numeric_field = 0; 625 tmp = PyNumber_Float(field); 626 Py_DECREF(field); 627 if (tmp == NULL) 628 return -1; 629 field = tmp; 630 } 631 if (PyList_Append(self->fields, field) < 0) { 632 Py_DECREF(field); 633 return -1; 634 } 635 Py_DECREF(field); 636 return 0; 637} 638 639static int 640parse_grow_buff(ReaderObj *self) 641{ 642 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4)); 643 644 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096; 645 Py_UCS4 *field_new = self->field; 646 PyMem_Resize(field_new, Py_UCS4, field_size_new); 647 if (field_new == NULL) { 648 PyErr_NoMemory(); 649 return 0; 650 } 651 self->field = field_new; 652 self->field_size = field_size_new; 653 return 1; 654} 655 656static int 657parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c) 658{ 659 if (self->field_len >= module_state->field_limit) { 660 PyErr_Format(module_state->error_obj, 661 "field larger than field limit (%ld)", 662 module_state->field_limit); 663 return -1; 664 } 665 if (self->field_len == self->field_size && !parse_grow_buff(self)) 666 return -1; 667 self->field[self->field_len++] = c; 668 return 0; 669} 670 671static int 672parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c) 673{ 674 DialectObj *dialect = self->dialect; 675 676 switch (self->state) { 677 case START_RECORD: 678 /* start of record */ 679 if (c == EOL) 680 /* empty line - return [] */ 681 break; 682 else if (c == '\n' || c == '\r') { 683 self->state = EAT_CRNL; 684 break; 685 } 686 /* normal character - handle as START_FIELD */ 687 self->state = START_FIELD; 688 /* fallthru */ 689 case START_FIELD: 690 /* expecting field */ 691 if (c == '\n' || c == '\r' || c == EOL) { 692 /* save empty field - return [fields] */ 693 if (parse_save_field(self) < 0) 694 return -1; 695 self->state = (c == EOL ? START_RECORD : EAT_CRNL); 696 } 697 else if (c == dialect->quotechar && 698 dialect->quoting != QUOTE_NONE) { 699 /* start quoted field */ 700 self->state = IN_QUOTED_FIELD; 701 } 702 else if (c == dialect->escapechar) { 703 /* possible escaped character */ 704 self->state = ESCAPED_CHAR; 705 } 706 else if (c == ' ' && dialect->skipinitialspace) 707 /* ignore spaces at start of field */ 708 ; 709 else if (c == dialect->delimiter) { 710 /* save empty field */ 711 if (parse_save_field(self) < 0) 712 return -1; 713 } 714 else { 715 /* begin new unquoted field */ 716 if (dialect->quoting == QUOTE_NONNUMERIC) 717 self->numeric_field = 1; 718 if (parse_add_char(self, module_state, c) < 0) 719 return -1; 720 self->state = IN_FIELD; 721 } 722 break; 723 724 case ESCAPED_CHAR: 725 if (c == '\n' || c=='\r') { 726 if (parse_add_char(self, module_state, c) < 0) 727 return -1; 728 self->state = AFTER_ESCAPED_CRNL; 729 break; 730 } 731 if (c == EOL) 732 c = '\n'; 733 if (parse_add_char(self, module_state, c) < 0) 734 return -1; 735 self->state = IN_FIELD; 736 break; 737 738 case AFTER_ESCAPED_CRNL: 739 if (c == EOL) 740 break; 741 /*fallthru*/ 742 743 case IN_FIELD: 744 /* in unquoted field */ 745 if (c == '\n' || c == '\r' || c == EOL) { 746 /* end of line - return [fields] */ 747 if (parse_save_field(self) < 0) 748 return -1; 749 self->state = (c == EOL ? START_RECORD : EAT_CRNL); 750 } 751 else if (c == dialect->escapechar) { 752 /* possible escaped character */ 753 self->state = ESCAPED_CHAR; 754 } 755 else if (c == dialect->delimiter) { 756 /* save field - wait for new field */ 757 if (parse_save_field(self) < 0) 758 return -1; 759 self->state = START_FIELD; 760 } 761 else { 762 /* normal character - save in field */ 763 if (parse_add_char(self, module_state, c) < 0) 764 return -1; 765 } 766 break; 767 768 case IN_QUOTED_FIELD: 769 /* in quoted field */ 770 if (c == EOL) 771 ; 772 else if (c == dialect->escapechar) { 773 /* Possible escape character */ 774 self->state = ESCAPE_IN_QUOTED_FIELD; 775 } 776 else if (c == dialect->quotechar && 777 dialect->quoting != QUOTE_NONE) { 778 if (dialect->doublequote) { 779 /* doublequote; " represented by "" */ 780 self->state = QUOTE_IN_QUOTED_FIELD; 781 } 782 else { 783 /* end of quote part of field */ 784 self->state = IN_FIELD; 785 } 786 } 787 else { 788 /* normal character - save in field */ 789 if (parse_add_char(self, module_state, c) < 0) 790 return -1; 791 } 792 break; 793 794 case ESCAPE_IN_QUOTED_FIELD: 795 if (c == EOL) 796 c = '\n'; 797 if (parse_add_char(self, module_state, c) < 0) 798 return -1; 799 self->state = IN_QUOTED_FIELD; 800 break; 801 802 case QUOTE_IN_QUOTED_FIELD: 803 /* doublequote - seen a quote in a quoted field */ 804 if (dialect->quoting != QUOTE_NONE && 805 c == dialect->quotechar) { 806 /* save "" as " */ 807 if (parse_add_char(self, module_state, c) < 0) 808 return -1; 809 self->state = IN_QUOTED_FIELD; 810 } 811 else if (c == dialect->delimiter) { 812 /* save field - wait for new field */ 813 if (parse_save_field(self) < 0) 814 return -1; 815 self->state = START_FIELD; 816 } 817 else if (c == '\n' || c == '\r' || c == EOL) { 818 /* end of line - return [fields] */ 819 if (parse_save_field(self) < 0) 820 return -1; 821 self->state = (c == EOL ? START_RECORD : EAT_CRNL); 822 } 823 else if (!dialect->strict) { 824 if (parse_add_char(self, module_state, c) < 0) 825 return -1; 826 self->state = IN_FIELD; 827 } 828 else { 829 /* illegal */ 830 PyErr_Format(module_state->error_obj, "'%c' expected after '%c'", 831 dialect->delimiter, 832 dialect->quotechar); 833 return -1; 834 } 835 break; 836 837 case EAT_CRNL: 838 if (c == '\n' || c == '\r') 839 ; 840 else if (c == EOL) 841 self->state = START_RECORD; 842 else { 843 PyErr_Format(module_state->error_obj, 844 "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); 845 return -1; 846 } 847 break; 848 849 } 850 return 0; 851} 852 853static int 854parse_reset(ReaderObj *self) 855{ 856 Py_XSETREF(self->fields, PyList_New(0)); 857 if (self->fields == NULL) 858 return -1; 859 self->field_len = 0; 860 self->state = START_RECORD; 861 self->numeric_field = 0; 862 return 0; 863} 864 865static PyObject * 866Reader_iternext(ReaderObj *self) 867{ 868 PyObject *fields = NULL; 869 Py_UCS4 c; 870 Py_ssize_t pos, linelen; 871 unsigned int kind; 872 const void *data; 873 PyObject *lineobj; 874 875 _csvstate *module_state = _csv_state_from_type(Py_TYPE(self), 876 "Reader.__next__"); 877 if (module_state == NULL) { 878 return NULL; 879 } 880 881 if (parse_reset(self) < 0) 882 return NULL; 883 do { 884 lineobj = PyIter_Next(self->input_iter); 885 if (lineobj == NULL) { 886 /* End of input OR exception */ 887 if (!PyErr_Occurred() && (self->field_len != 0 || 888 self->state == IN_QUOTED_FIELD)) { 889 if (self->dialect->strict) 890 PyErr_SetString(module_state->error_obj, 891 "unexpected end of data"); 892 else if (parse_save_field(self) >= 0) 893 break; 894 } 895 return NULL; 896 } 897 if (!PyUnicode_Check(lineobj)) { 898 PyErr_Format(module_state->error_obj, 899 "iterator should return strings, " 900 "not %.200s " 901 "(the file should be opened in text mode)", 902 Py_TYPE(lineobj)->tp_name 903 ); 904 Py_DECREF(lineobj); 905 return NULL; 906 } 907 if (PyUnicode_READY(lineobj) == -1) { 908 Py_DECREF(lineobj); 909 return NULL; 910 } 911 ++self->line_num; 912 kind = PyUnicode_KIND(lineobj); 913 data = PyUnicode_DATA(lineobj); 914 pos = 0; 915 linelen = PyUnicode_GET_LENGTH(lineobj); 916 while (linelen--) { 917 c = PyUnicode_READ(kind, data, pos); 918 if (parse_process_char(self, module_state, c) < 0) { 919 Py_DECREF(lineobj); 920 goto err; 921 } 922 pos++; 923 } 924 Py_DECREF(lineobj); 925 if (parse_process_char(self, module_state, EOL) < 0) 926 goto err; 927 } while (self->state != START_RECORD); 928 929 fields = self->fields; 930 self->fields = NULL; 931err: 932 return fields; 933} 934 935static void 936Reader_dealloc(ReaderObj *self) 937{ 938 PyTypeObject *tp = Py_TYPE(self); 939 PyObject_GC_UnTrack(self); 940 tp->tp_clear((PyObject *)self); 941 if (self->field != NULL) { 942 PyMem_Free(self->field); 943 self->field = NULL; 944 } 945 PyObject_GC_Del(self); 946 Py_DECREF(tp); 947} 948 949static int 950Reader_traverse(ReaderObj *self, visitproc visit, void *arg) 951{ 952 Py_VISIT(self->dialect); 953 Py_VISIT(self->input_iter); 954 Py_VISIT(self->fields); 955 Py_VISIT(Py_TYPE(self)); 956 return 0; 957} 958 959static int 960Reader_clear(ReaderObj *self) 961{ 962 Py_CLEAR(self->dialect); 963 Py_CLEAR(self->input_iter); 964 Py_CLEAR(self->fields); 965 return 0; 966} 967 968PyDoc_STRVAR(Reader_Type_doc, 969"CSV reader\n" 970"\n" 971"Reader objects are responsible for reading and parsing tabular data\n" 972"in CSV format.\n" 973); 974 975static struct PyMethodDef Reader_methods[] = { 976 { NULL, NULL } 977}; 978#define R_OFF(x) offsetof(ReaderObj, x) 979 980static struct PyMemberDef Reader_memberlist[] = { 981 { "dialect", T_OBJECT, R_OFF(dialect), READONLY }, 982 { "line_num", T_ULONG, R_OFF(line_num), READONLY }, 983 { NULL } 984}; 985 986 987static PyType_Slot Reader_Type_slots[] = { 988 {Py_tp_doc, (char*)Reader_Type_doc}, 989 {Py_tp_traverse, Reader_traverse}, 990 {Py_tp_iter, PyObject_SelfIter}, 991 {Py_tp_iternext, Reader_iternext}, 992 {Py_tp_methods, Reader_methods}, 993 {Py_tp_members, Reader_memberlist}, 994 {Py_tp_clear, Reader_clear}, 995 {Py_tp_dealloc, Reader_dealloc}, 996 {0, NULL} 997}; 998 999PyType_Spec Reader_Type_spec = { 1000 .name = "_csv.reader", 1001 .basicsize = sizeof(ReaderObj), 1002 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | 1003 Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION), 1004 .slots = Reader_Type_slots 1005}; 1006 1007 1008static PyObject * 1009csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) 1010{ 1011 PyObject * iterator, * dialect = NULL; 1012 _csvstate *module_state = get_csv_state(module); 1013 ReaderObj * self = PyObject_GC_New( 1014 ReaderObj, 1015 module_state->reader_type); 1016 1017 if (!self) 1018 return NULL; 1019 1020 self->dialect = NULL; 1021 self->fields = NULL; 1022 self->input_iter = NULL; 1023 self->field = NULL; 1024 self->field_size = 0; 1025 self->line_num = 0; 1026 1027 if (parse_reset(self) < 0) { 1028 Py_DECREF(self); 1029 return NULL; 1030 } 1031 1032 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) { 1033 Py_DECREF(self); 1034 return NULL; 1035 } 1036 self->input_iter = PyObject_GetIter(iterator); 1037 if (self->input_iter == NULL) { 1038 Py_DECREF(self); 1039 return NULL; 1040 } 1041 self->dialect = (DialectObj *)_call_dialect(module_state, dialect, 1042 keyword_args); 1043 if (self->dialect == NULL) { 1044 Py_DECREF(self); 1045 return NULL; 1046 } 1047 1048 PyObject_GC_Track(self); 1049 return (PyObject *)self; 1050} 1051 1052/* 1053 * WRITER 1054 */ 1055/* ---------------------------------------------------------------- */ 1056static void 1057join_reset(WriterObj *self) 1058{ 1059 self->rec_len = 0; 1060 self->num_fields = 0; 1061} 1062 1063#define MEM_INCR 32768 1064 1065/* Calculate new record length or append field to record. Return new 1066 * record length. 1067 */ 1068static Py_ssize_t 1069join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data, 1070 Py_ssize_t field_len, int *quoted, 1071 int copy_phase) 1072{ 1073 DialectObj *dialect = self->dialect; 1074 int i; 1075 Py_ssize_t rec_len; 1076 1077#define INCLEN \ 1078 do {\ 1079 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \ 1080 goto overflow; \ 1081 } \ 1082 rec_len++; \ 1083 } while(0) 1084 1085#define ADDCH(c) \ 1086 do {\ 1087 if (copy_phase) \ 1088 self->rec[rec_len] = c;\ 1089 INCLEN;\ 1090 } while(0) 1091 1092 rec_len = self->rec_len; 1093 1094 /* If this is not the first field we need a field separator */ 1095 if (self->num_fields > 0) 1096 ADDCH(dialect->delimiter); 1097 1098 /* Handle preceding quote */ 1099 if (copy_phase && *quoted) 1100 ADDCH(dialect->quotechar); 1101 1102 /* Copy/count field data */ 1103 /* If field is null just pass over */ 1104 for (i = 0; field_data && (i < field_len); i++) { 1105 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i); 1106 int want_escape = 0; 1107 1108 if (c == dialect->delimiter || 1109 c == dialect->escapechar || 1110 c == dialect->quotechar || 1111 PyUnicode_FindChar( 1112 dialect->lineterminator, c, 0, 1113 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) { 1114 if (dialect->quoting == QUOTE_NONE) 1115 want_escape = 1; 1116 else { 1117 if (c == dialect->quotechar) { 1118 if (dialect->doublequote) 1119 ADDCH(dialect->quotechar); 1120 else 1121 want_escape = 1; 1122 } 1123 else if (c == dialect->escapechar) { 1124 want_escape = 1; 1125 } 1126 if (!want_escape) 1127 *quoted = 1; 1128 } 1129 if (want_escape) { 1130 if (dialect->escapechar == NOT_SET) { 1131 PyErr_Format(self->error_obj, 1132 "need to escape, but no escapechar set"); 1133 return -1; 1134 } 1135 ADDCH(dialect->escapechar); 1136 } 1137 } 1138 /* Copy field character into record buffer. 1139 */ 1140 ADDCH(c); 1141 } 1142 1143 if (*quoted) { 1144 if (copy_phase) 1145 ADDCH(dialect->quotechar); 1146 else { 1147 INCLEN; /* starting quote */ 1148 INCLEN; /* ending quote */ 1149 } 1150 } 1151 return rec_len; 1152 1153 overflow: 1154 PyErr_NoMemory(); 1155 return -1; 1156#undef ADDCH 1157#undef INCLEN 1158} 1159 1160static int 1161join_check_rec_size(WriterObj *self, Py_ssize_t rec_len) 1162{ 1163 assert(rec_len >= 0); 1164 1165 if (rec_len > self->rec_size) { 1166 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR; 1167 Py_UCS4 *rec_new = self->rec; 1168 PyMem_Resize(rec_new, Py_UCS4, rec_size_new); 1169 if (rec_new == NULL) { 1170 PyErr_NoMemory(); 1171 return 0; 1172 } 1173 self->rec = rec_new; 1174 self->rec_size = (Py_ssize_t)rec_size_new; 1175 } 1176 return 1; 1177} 1178 1179static int 1180join_append(WriterObj *self, PyObject *field, int quoted) 1181{ 1182 unsigned int field_kind = -1; 1183 const void *field_data = NULL; 1184 Py_ssize_t field_len = 0; 1185 Py_ssize_t rec_len; 1186 1187 if (field != NULL) { 1188 if (PyUnicode_READY(field) == -1) 1189 return 0; 1190 field_kind = PyUnicode_KIND(field); 1191 field_data = PyUnicode_DATA(field); 1192 field_len = PyUnicode_GET_LENGTH(field); 1193 } 1194 rec_len = join_append_data(self, field_kind, field_data, field_len, 1195 "ed, 0); 1196 if (rec_len < 0) 1197 return 0; 1198 1199 /* grow record buffer if necessary */ 1200 if (!join_check_rec_size(self, rec_len)) 1201 return 0; 1202 1203 self->rec_len = join_append_data(self, field_kind, field_data, field_len, 1204 "ed, 1); 1205 self->num_fields++; 1206 1207 return 1; 1208} 1209 1210static int 1211join_append_lineterminator(WriterObj *self) 1212{ 1213 Py_ssize_t terminator_len, i; 1214 unsigned int term_kind; 1215 const void *term_data; 1216 1217 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator); 1218 if (terminator_len == -1) 1219 return 0; 1220 1221 /* grow record buffer if necessary */ 1222 if (!join_check_rec_size(self, self->rec_len + terminator_len)) 1223 return 0; 1224 1225 term_kind = PyUnicode_KIND(self->dialect->lineterminator); 1226 term_data = PyUnicode_DATA(self->dialect->lineterminator); 1227 for (i = 0; i < terminator_len; i++) 1228 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i); 1229 self->rec_len += terminator_len; 1230 1231 return 1; 1232} 1233 1234PyDoc_STRVAR(csv_writerow_doc, 1235"writerow(iterable)\n" 1236"\n" 1237"Construct and write a CSV record from an iterable of fields. Non-string\n" 1238"elements will be converted to string."); 1239 1240static PyObject * 1241csv_writerow(WriterObj *self, PyObject *seq) 1242{ 1243 DialectObj *dialect = self->dialect; 1244 PyObject *iter, *field, *line, *result; 1245 1246 iter = PyObject_GetIter(seq); 1247 if (iter == NULL) { 1248 if (PyErr_ExceptionMatches(PyExc_TypeError)) { 1249 PyErr_Format(self->error_obj, 1250 "iterable expected, not %.200s", 1251 Py_TYPE(seq)->tp_name); 1252 } 1253 return NULL; 1254 } 1255 1256 /* Join all fields in internal buffer. 1257 */ 1258 join_reset(self); 1259 while ((field = PyIter_Next(iter))) { 1260 int append_ok; 1261 int quoted; 1262 1263 switch (dialect->quoting) { 1264 case QUOTE_NONNUMERIC: 1265 quoted = !PyNumber_Check(field); 1266 break; 1267 case QUOTE_ALL: 1268 quoted = 1; 1269 break; 1270 default: 1271 quoted = 0; 1272 break; 1273 } 1274 1275 if (PyUnicode_Check(field)) { 1276 append_ok = join_append(self, field, quoted); 1277 Py_DECREF(field); 1278 } 1279 else if (field == Py_None) { 1280 append_ok = join_append(self, NULL, quoted); 1281 Py_DECREF(field); 1282 } 1283 else { 1284 PyObject *str; 1285 1286 str = PyObject_Str(field); 1287 Py_DECREF(field); 1288 if (str == NULL) { 1289 Py_DECREF(iter); 1290 return NULL; 1291 } 1292 append_ok = join_append(self, str, quoted); 1293 Py_DECREF(str); 1294 } 1295 if (!append_ok) { 1296 Py_DECREF(iter); 1297 return NULL; 1298 } 1299 } 1300 Py_DECREF(iter); 1301 if (PyErr_Occurred()) 1302 return NULL; 1303 1304 if (self->num_fields > 0 && self->rec_len == 0) { 1305 if (dialect->quoting == QUOTE_NONE) { 1306 PyErr_Format(self->error_obj, 1307 "single empty field record must be quoted"); 1308 return NULL; 1309 } 1310 self->num_fields--; 1311 if (!join_append(self, NULL, 1)) 1312 return NULL; 1313 } 1314 1315 /* Add line terminator. 1316 */ 1317 if (!join_append_lineterminator(self)) { 1318 return NULL; 1319 } 1320 1321 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, 1322 (void *) self->rec, self->rec_len); 1323 if (line == NULL) { 1324 return NULL; 1325 } 1326 result = PyObject_CallOneArg(self->write, line); 1327 Py_DECREF(line); 1328 return result; 1329} 1330 1331PyDoc_STRVAR(csv_writerows_doc, 1332"writerows(iterable of iterables)\n" 1333"\n" 1334"Construct and write a series of iterables to a csv file. Non-string\n" 1335"elements will be converted to string."); 1336 1337static PyObject * 1338csv_writerows(WriterObj *self, PyObject *seqseq) 1339{ 1340 PyObject *row_iter, *row_obj, *result; 1341 1342 row_iter = PyObject_GetIter(seqseq); 1343 if (row_iter == NULL) { 1344 return NULL; 1345 } 1346 while ((row_obj = PyIter_Next(row_iter))) { 1347 result = csv_writerow(self, row_obj); 1348 Py_DECREF(row_obj); 1349 if (!result) { 1350 Py_DECREF(row_iter); 1351 return NULL; 1352 } 1353 else 1354 Py_DECREF(result); 1355 } 1356 Py_DECREF(row_iter); 1357 if (PyErr_Occurred()) 1358 return NULL; 1359 Py_RETURN_NONE; 1360} 1361 1362static struct PyMethodDef Writer_methods[] = { 1363 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc}, 1364 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc}, 1365 { NULL, NULL } 1366}; 1367 1368#define W_OFF(x) offsetof(WriterObj, x) 1369 1370static struct PyMemberDef Writer_memberlist[] = { 1371 { "dialect", T_OBJECT, W_OFF(dialect), READONLY }, 1372 { NULL } 1373}; 1374 1375static int 1376Writer_traverse(WriterObj *self, visitproc visit, void *arg) 1377{ 1378 Py_VISIT(self->dialect); 1379 Py_VISIT(self->write); 1380 Py_VISIT(self->error_obj); 1381 Py_VISIT(Py_TYPE(self)); 1382 return 0; 1383} 1384 1385static int 1386Writer_clear(WriterObj *self) 1387{ 1388 Py_CLEAR(self->dialect); 1389 Py_CLEAR(self->write); 1390 Py_CLEAR(self->error_obj); 1391 return 0; 1392} 1393 1394static void 1395Writer_dealloc(WriterObj *self) 1396{ 1397 PyTypeObject *tp = Py_TYPE(self); 1398 PyObject_GC_UnTrack(self); 1399 tp->tp_clear((PyObject *)self); 1400 if (self->rec != NULL) { 1401 PyMem_Free(self->rec); 1402 } 1403 PyObject_GC_Del(self); 1404 Py_DECREF(tp); 1405} 1406 1407PyDoc_STRVAR(Writer_Type_doc, 1408"CSV writer\n" 1409"\n" 1410"Writer objects are responsible for generating tabular data\n" 1411"in CSV format from sequence input.\n" 1412); 1413 1414static PyType_Slot Writer_Type_slots[] = { 1415 {Py_tp_doc, (char*)Writer_Type_doc}, 1416 {Py_tp_traverse, Writer_traverse}, 1417 {Py_tp_clear, Writer_clear}, 1418 {Py_tp_dealloc, Writer_dealloc}, 1419 {Py_tp_methods, Writer_methods}, 1420 {Py_tp_members, Writer_memberlist}, 1421 {0, NULL} 1422}; 1423 1424PyType_Spec Writer_Type_spec = { 1425 .name = "_csv.writer", 1426 .basicsize = sizeof(WriterObj), 1427 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | 1428 Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION), 1429 .slots = Writer_Type_slots, 1430}; 1431 1432 1433static PyObject * 1434csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) 1435{ 1436 PyObject * output_file, * dialect = NULL; 1437 _csvstate *module_state = get_csv_state(module); 1438 WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type); 1439 1440 if (!self) 1441 return NULL; 1442 1443 self->dialect = NULL; 1444 self->write = NULL; 1445 1446 self->rec = NULL; 1447 self->rec_size = 0; 1448 self->rec_len = 0; 1449 self->num_fields = 0; 1450 1451 self->error_obj = Py_NewRef(module_state->error_obj); 1452 1453 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) { 1454 Py_DECREF(self); 1455 return NULL; 1456 } 1457 if (_PyObject_LookupAttr(output_file, 1458 module_state->str_write, 1459 &self->write) < 0) { 1460 Py_DECREF(self); 1461 return NULL; 1462 } 1463 if (self->write == NULL || !PyCallable_Check(self->write)) { 1464 PyErr_SetString(PyExc_TypeError, 1465 "argument 1 must have a \"write\" method"); 1466 Py_DECREF(self); 1467 return NULL; 1468 } 1469 self->dialect = (DialectObj *)_call_dialect(module_state, dialect, 1470 keyword_args); 1471 if (self->dialect == NULL) { 1472 Py_DECREF(self); 1473 return NULL; 1474 } 1475 PyObject_GC_Track(self); 1476 return (PyObject *)self; 1477} 1478 1479/* 1480 * DIALECT REGISTRY 1481 */ 1482 1483/*[clinic input] 1484_csv.list_dialects 1485 1486Return a list of all known dialect names. 1487 1488 names = csv.list_dialects() 1489[clinic start generated code]*/ 1490 1491static PyObject * 1492_csv_list_dialects_impl(PyObject *module) 1493/*[clinic end generated code: output=a5b92b215b006a6d input=8953943eb17d98ab]*/ 1494{ 1495 return PyDict_Keys(get_csv_state(module)->dialects); 1496} 1497 1498static PyObject * 1499csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) 1500{ 1501 PyObject *name_obj, *dialect_obj = NULL; 1502 _csvstate *module_state = get_csv_state(module); 1503 PyObject *dialect; 1504 1505 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj)) 1506 return NULL; 1507 if (!PyUnicode_Check(name_obj)) { 1508 PyErr_SetString(PyExc_TypeError, 1509 "dialect name must be a string"); 1510 return NULL; 1511 } 1512 if (PyUnicode_READY(name_obj) == -1) 1513 return NULL; 1514 dialect = _call_dialect(module_state, dialect_obj, kwargs); 1515 if (dialect == NULL) 1516 return NULL; 1517 if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) { 1518 Py_DECREF(dialect); 1519 return NULL; 1520 } 1521 Py_DECREF(dialect); 1522 Py_RETURN_NONE; 1523} 1524 1525 1526/*[clinic input] 1527_csv.unregister_dialect 1528 1529 name: object 1530 1531Delete the name/dialect mapping associated with a string name. 1532 1533 csv.unregister_dialect(name) 1534[clinic start generated code]*/ 1535 1536static PyObject * 1537_csv_unregister_dialect_impl(PyObject *module, PyObject *name) 1538/*[clinic end generated code: output=0813ebca6c058df4 input=6b5c1557bf60c7e7]*/ 1539{ 1540 _csvstate *module_state = get_csv_state(module); 1541 if (PyDict_DelItem(module_state->dialects, name) < 0) { 1542 if (PyErr_ExceptionMatches(PyExc_KeyError)) { 1543 PyErr_Format(module_state->error_obj, "unknown dialect"); 1544 } 1545 return NULL; 1546 } 1547 Py_RETURN_NONE; 1548} 1549 1550/*[clinic input] 1551_csv.get_dialect 1552 1553 name: object 1554 1555Return the dialect instance associated with name. 1556 1557 dialect = csv.get_dialect(name) 1558[clinic start generated code]*/ 1559 1560static PyObject * 1561_csv_get_dialect_impl(PyObject *module, PyObject *name) 1562/*[clinic end generated code: output=aa988cd573bebebb input=edf9ddab32e448fb]*/ 1563{ 1564 return get_dialect_from_registry(name, get_csv_state(module)); 1565} 1566 1567/*[clinic input] 1568_csv.field_size_limit 1569 1570 new_limit: object = NULL 1571 1572Sets an upper limit on parsed fields. 1573 1574 csv.field_size_limit([limit]) 1575 1576Returns old limit. If limit is not given, no new limit is set and 1577the old limit is returned 1578[clinic start generated code]*/ 1579 1580static PyObject * 1581_csv_field_size_limit_impl(PyObject *module, PyObject *new_limit) 1582/*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/ 1583{ 1584 _csvstate *module_state = get_csv_state(module); 1585 long old_limit = module_state->field_limit; 1586 if (new_limit != NULL) { 1587 if (!PyLong_CheckExact(new_limit)) { 1588 PyErr_Format(PyExc_TypeError, 1589 "limit must be an integer"); 1590 return NULL; 1591 } 1592 module_state->field_limit = PyLong_AsLong(new_limit); 1593 if (module_state->field_limit == -1 && PyErr_Occurred()) { 1594 module_state->field_limit = old_limit; 1595 return NULL; 1596 } 1597 } 1598 return PyLong_FromLong(old_limit); 1599} 1600 1601static PyType_Slot error_slots[] = { 1602 {0, NULL}, 1603}; 1604 1605PyType_Spec error_spec = { 1606 .name = "_csv.Error", 1607 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, 1608 .slots = error_slots, 1609}; 1610 1611/* 1612 * MODULE 1613 */ 1614 1615PyDoc_STRVAR(csv_module_doc, 1616"CSV parsing and writing.\n" 1617"\n" 1618"This module provides classes that assist in the reading and writing\n" 1619"of Comma Separated Value (CSV) files, and implements the interface\n" 1620"described by PEP 305. Although many CSV files are simple to parse,\n" 1621"the format is not formally defined by a stable specification and\n" 1622"is subtle enough that parsing lines of a CSV file with something\n" 1623"like line.split(\",\") is bound to fail. The module supports three\n" 1624"basic APIs: reading, writing, and registration of dialects.\n" 1625"\n" 1626"\n" 1627"DIALECT REGISTRATION:\n" 1628"\n" 1629"Readers and writers support a dialect argument, which is a convenient\n" 1630"handle on a group of settings. When the dialect argument is a string,\n" 1631"it identifies one of the dialects previously registered with the module.\n" 1632"If it is a class or instance, the attributes of the argument are used as\n" 1633"the settings for the reader or writer:\n" 1634"\n" 1635" class excel:\n" 1636" delimiter = ','\n" 1637" quotechar = '\"'\n" 1638" escapechar = None\n" 1639" doublequote = True\n" 1640" skipinitialspace = False\n" 1641" lineterminator = '\\r\\n'\n" 1642" quoting = QUOTE_MINIMAL\n" 1643"\n" 1644"SETTINGS:\n" 1645"\n" 1646" * quotechar - specifies a one-character string to use as the\n" 1647" quoting character. It defaults to '\"'.\n" 1648" * delimiter - specifies a one-character string to use as the\n" 1649" field separator. It defaults to ','.\n" 1650" * skipinitialspace - specifies how to interpret spaces which\n" 1651" immediately follow a delimiter. It defaults to False, which\n" 1652" means that spaces immediately following a delimiter is part\n" 1653" of the following field.\n" 1654" * lineterminator - specifies the character sequence which should\n" 1655" terminate rows.\n" 1656" * quoting - controls when quotes should be generated by the writer.\n" 1657" It can take on any of the following module constants:\n" 1658"\n" 1659" csv.QUOTE_MINIMAL means only when required, for example, when a\n" 1660" field contains either the quotechar or the delimiter\n" 1661" csv.QUOTE_ALL means that quotes are always placed around fields.\n" 1662" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n" 1663" fields which do not parse as integers or floating point\n" 1664" numbers.\n" 1665" csv.QUOTE_NONE means that quotes are never placed around fields.\n" 1666" * escapechar - specifies a one-character string used to escape\n" 1667" the delimiter when quoting is set to QUOTE_NONE.\n" 1668" * doublequote - controls the handling of quotes inside fields. When\n" 1669" True, two consecutive quotes are interpreted as one during read,\n" 1670" and when writing, each quote character embedded in the data is\n" 1671" written as two quotes\n"); 1672 1673PyDoc_STRVAR(csv_reader_doc, 1674" csv_reader = reader(iterable [, dialect='excel']\n" 1675" [optional keyword args])\n" 1676" for row in csv_reader:\n" 1677" process(row)\n" 1678"\n" 1679"The \"iterable\" argument can be any object that returns a line\n" 1680"of input for each iteration, such as a file object or a list. The\n" 1681"optional \"dialect\" parameter is discussed below. The function\n" 1682"also accepts optional keyword arguments which override settings\n" 1683"provided by the dialect.\n" 1684"\n" 1685"The returned object is an iterator. Each iteration returns a row\n" 1686"of the CSV file (which can span multiple input lines).\n"); 1687 1688PyDoc_STRVAR(csv_writer_doc, 1689" csv_writer = csv.writer(fileobj [, dialect='excel']\n" 1690" [optional keyword args])\n" 1691" for row in sequence:\n" 1692" csv_writer.writerow(row)\n" 1693"\n" 1694" [or]\n" 1695"\n" 1696" csv_writer = csv.writer(fileobj [, dialect='excel']\n" 1697" [optional keyword args])\n" 1698" csv_writer.writerows(rows)\n" 1699"\n" 1700"The \"fileobj\" argument can be any object that supports the file API.\n"); 1701 1702PyDoc_STRVAR(csv_register_dialect_doc, 1703"Create a mapping from a string name to a dialect class.\n" 1704" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])"); 1705 1706static struct PyMethodDef csv_methods[] = { 1707 { "reader", _PyCFunction_CAST(csv_reader), 1708 METH_VARARGS | METH_KEYWORDS, csv_reader_doc}, 1709 { "writer", _PyCFunction_CAST(csv_writer), 1710 METH_VARARGS | METH_KEYWORDS, csv_writer_doc}, 1711 { "register_dialect", _PyCFunction_CAST(csv_register_dialect), 1712 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc}, 1713 _CSV_LIST_DIALECTS_METHODDEF 1714 _CSV_UNREGISTER_DIALECT_METHODDEF 1715 _CSV_GET_DIALECT_METHODDEF 1716 _CSV_FIELD_SIZE_LIMIT_METHODDEF 1717 { NULL, NULL } 1718}; 1719 1720static int 1721csv_exec(PyObject *module) { 1722 const StyleDesc *style; 1723 PyObject *temp; 1724 _csvstate *module_state = get_csv_state(module); 1725 1726 temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL); 1727 module_state->dialect_type = (PyTypeObject *)temp; 1728 if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) { 1729 return -1; 1730 } 1731 1732 temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL); 1733 module_state->reader_type = (PyTypeObject *)temp; 1734 if (PyModule_AddObjectRef(module, "Reader", temp) < 0) { 1735 return -1; 1736 } 1737 1738 temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL); 1739 module_state->writer_type = (PyTypeObject *)temp; 1740 if (PyModule_AddObjectRef(module, "Writer", temp) < 0) { 1741 return -1; 1742 } 1743 1744 /* Add version to the module. */ 1745 if (PyModule_AddStringConstant(module, "__version__", 1746 MODULE_VERSION) == -1) { 1747 return -1; 1748 } 1749 1750 /* Set the field limit */ 1751 module_state->field_limit = 128 * 1024; 1752 1753 /* Add _dialects dictionary */ 1754 module_state->dialects = PyDict_New(); 1755 if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) { 1756 return -1; 1757 } 1758 1759 /* Add quote styles into dictionary */ 1760 for (style = quote_styles; style->name; style++) { 1761 if (PyModule_AddIntConstant(module, style->name, 1762 style->style) == -1) 1763 return -1; 1764 } 1765 1766 /* Add the CSV exception object to the module. */ 1767 PyObject *bases = PyTuple_Pack(1, PyExc_Exception); 1768 if (bases == NULL) { 1769 return -1; 1770 } 1771 module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec, 1772 bases); 1773 Py_DECREF(bases); 1774 if (module_state->error_obj == NULL) { 1775 return -1; 1776 } 1777 if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) { 1778 return -1; 1779 } 1780 1781 module_state->str_write = PyUnicode_InternFromString("write"); 1782 if (module_state->str_write == NULL) { 1783 return -1; 1784 } 1785 return 0; 1786} 1787 1788static PyModuleDef_Slot csv_slots[] = { 1789 {Py_mod_exec, csv_exec}, 1790 {0, NULL} 1791}; 1792 1793static struct PyModuleDef _csvmodule = { 1794 PyModuleDef_HEAD_INIT, 1795 "_csv", 1796 csv_module_doc, 1797 sizeof(_csvstate), 1798 csv_methods, 1799 csv_slots, 1800 _csv_traverse, 1801 _csv_clear, 1802 _csv_free 1803}; 1804 1805PyMODINIT_FUNC 1806PyInit__csv(void) 1807{ 1808 return PyModuleDef_Init(&_csvmodule); 1809} 1810