17db96d56Sopenharmony_ci/* Accumulator struct implementation */ 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_ci#include "Python.h" 47db96d56Sopenharmony_ci#include "pycore_accu.h" 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_cistatic PyObject * 77db96d56Sopenharmony_cijoin_list_unicode(PyObject *lst) 87db96d56Sopenharmony_ci{ 97db96d56Sopenharmony_ci /* return ''.join(lst) */ 107db96d56Sopenharmony_ci PyObject *sep, *ret; 117db96d56Sopenharmony_ci sep = PyUnicode_FromStringAndSize("", 0); 127db96d56Sopenharmony_ci ret = PyUnicode_Join(sep, lst); 137db96d56Sopenharmony_ci Py_DECREF(sep); 147db96d56Sopenharmony_ci return ret; 157db96d56Sopenharmony_ci} 167db96d56Sopenharmony_ci 177db96d56Sopenharmony_ciint 187db96d56Sopenharmony_ci_PyAccu_Init(_PyAccu *acc) 197db96d56Sopenharmony_ci{ 207db96d56Sopenharmony_ci /* Lazily allocated */ 217db96d56Sopenharmony_ci acc->large = NULL; 227db96d56Sopenharmony_ci acc->small = PyList_New(0); 237db96d56Sopenharmony_ci if (acc->small == NULL) 247db96d56Sopenharmony_ci return -1; 257db96d56Sopenharmony_ci return 0; 267db96d56Sopenharmony_ci} 277db96d56Sopenharmony_ci 287db96d56Sopenharmony_cistatic int 297db96d56Sopenharmony_ciflush_accumulator(_PyAccu *acc) 307db96d56Sopenharmony_ci{ 317db96d56Sopenharmony_ci Py_ssize_t nsmall = PyList_GET_SIZE(acc->small); 327db96d56Sopenharmony_ci if (nsmall) { 337db96d56Sopenharmony_ci int ret; 347db96d56Sopenharmony_ci PyObject *joined; 357db96d56Sopenharmony_ci if (acc->large == NULL) { 367db96d56Sopenharmony_ci acc->large = PyList_New(0); 377db96d56Sopenharmony_ci if (acc->large == NULL) 387db96d56Sopenharmony_ci return -1; 397db96d56Sopenharmony_ci } 407db96d56Sopenharmony_ci joined = join_list_unicode(acc->small); 417db96d56Sopenharmony_ci if (joined == NULL) 427db96d56Sopenharmony_ci return -1; 437db96d56Sopenharmony_ci if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) { 447db96d56Sopenharmony_ci Py_DECREF(joined); 457db96d56Sopenharmony_ci return -1; 467db96d56Sopenharmony_ci } 477db96d56Sopenharmony_ci ret = PyList_Append(acc->large, joined); 487db96d56Sopenharmony_ci Py_DECREF(joined); 497db96d56Sopenharmony_ci return ret; 507db96d56Sopenharmony_ci } 517db96d56Sopenharmony_ci return 0; 527db96d56Sopenharmony_ci} 537db96d56Sopenharmony_ci 547db96d56Sopenharmony_ciint 557db96d56Sopenharmony_ci_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode) 567db96d56Sopenharmony_ci{ 577db96d56Sopenharmony_ci Py_ssize_t nsmall; 587db96d56Sopenharmony_ci assert(PyUnicode_Check(unicode)); 597db96d56Sopenharmony_ci 607db96d56Sopenharmony_ci if (PyList_Append(acc->small, unicode)) 617db96d56Sopenharmony_ci return -1; 627db96d56Sopenharmony_ci nsmall = PyList_GET_SIZE(acc->small); 637db96d56Sopenharmony_ci /* Each item in a list of unicode objects has an overhead (in 64-bit 647db96d56Sopenharmony_ci * builds) of: 657db96d56Sopenharmony_ci * - 8 bytes for the list slot 667db96d56Sopenharmony_ci * - 56 bytes for the header of the unicode object 677db96d56Sopenharmony_ci * that is, 64 bytes. 100000 such objects waste more than 6 MiB 687db96d56Sopenharmony_ci * compared to a single concatenated string. 697db96d56Sopenharmony_ci */ 707db96d56Sopenharmony_ci if (nsmall < 100000) 717db96d56Sopenharmony_ci return 0; 727db96d56Sopenharmony_ci return flush_accumulator(acc); 737db96d56Sopenharmony_ci} 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_ciPyObject * 767db96d56Sopenharmony_ci_PyAccu_FinishAsList(_PyAccu *acc) 777db96d56Sopenharmony_ci{ 787db96d56Sopenharmony_ci int ret; 797db96d56Sopenharmony_ci PyObject *res; 807db96d56Sopenharmony_ci 817db96d56Sopenharmony_ci ret = flush_accumulator(acc); 827db96d56Sopenharmony_ci Py_CLEAR(acc->small); 837db96d56Sopenharmony_ci if (ret) { 847db96d56Sopenharmony_ci Py_CLEAR(acc->large); 857db96d56Sopenharmony_ci return NULL; 867db96d56Sopenharmony_ci } 877db96d56Sopenharmony_ci res = acc->large; 887db96d56Sopenharmony_ci acc->large = NULL; 897db96d56Sopenharmony_ci return res; 907db96d56Sopenharmony_ci} 917db96d56Sopenharmony_ci 927db96d56Sopenharmony_ciPyObject * 937db96d56Sopenharmony_ci_PyAccu_Finish(_PyAccu *acc) 947db96d56Sopenharmony_ci{ 957db96d56Sopenharmony_ci PyObject *list, *res; 967db96d56Sopenharmony_ci if (acc->large == NULL) { 977db96d56Sopenharmony_ci list = acc->small; 987db96d56Sopenharmony_ci acc->small = NULL; 997db96d56Sopenharmony_ci } 1007db96d56Sopenharmony_ci else { 1017db96d56Sopenharmony_ci list = _PyAccu_FinishAsList(acc); 1027db96d56Sopenharmony_ci if (!list) 1037db96d56Sopenharmony_ci return NULL; 1047db96d56Sopenharmony_ci } 1057db96d56Sopenharmony_ci res = join_list_unicode(list); 1067db96d56Sopenharmony_ci Py_DECREF(list); 1077db96d56Sopenharmony_ci return res; 1087db96d56Sopenharmony_ci} 1097db96d56Sopenharmony_ci 1107db96d56Sopenharmony_civoid 1117db96d56Sopenharmony_ci_PyAccu_Destroy(_PyAccu *acc) 1127db96d56Sopenharmony_ci{ 1137db96d56Sopenharmony_ci Py_CLEAR(acc->small); 1147db96d56Sopenharmony_ci Py_CLEAR(acc->large); 1157db96d56Sopenharmony_ci} 116