Lines Matching defs:u8c

64 // extern int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
66 // extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
75 // extern int utf8byte(struct utf8cursor *u8c);
657 * u8c : pointer to cursor.
664 static int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
671 u8c->data = data;
672 u8c->s = s;
673 u8c->p = NULL;
674 u8c->ss = NULL;
675 u8c->sp = NULL;
676 u8c->len = len;
677 u8c->slen = 0;
678 u8c->ccc = STOPPER;
679 u8c->nccc = STOPPER;
681 if (u8c->len != len)
693 * u8c : pointer to cursor.
699 static int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
702 return utf8ncursor(u8c, data, s, (unsigned int)-1);
707 * Get one byte from the normalized form of the string described by u8c.
711 * The cursor keeps track of the location in the string in u8c->s.
713 * u8c->p, and u8c->s is set to the start of the decomposition. Note
714 * that bytes from a decomposition do not count against u8c->len.
716 * Characters are emitted if they match the current CCC in u8c->ccc.
717 * Hitting end-of-string while u8c->ccc == STOPPER means we're done,
721 * values of u8c->s and u8c->p are stored in u8c->ss and u8c->sp at
723 * emitted and stores it in u8c->nccc, the second pass emits the
729 * u8c->p != NULL -> a decomposition is being scanned.
730 * u8c->ss != NULL -> this is a repeating scan.
731 * u8c->ccc == -1 -> this is the first scan of a repeating scan.
733 static int utf8byte(struct utf8cursor *u8c)
740 if (u8c->p && *u8c->s == '\0') {
741 u8c->s = u8c->p;
742 u8c->p = NULL;
746 if (!u8c->p && (u8c->len == 0 || *u8c->s == '\0')) {
748 if (u8c->ccc == STOPPER)
753 } else if ((*u8c->s & 0xC0) == 0x80) {
755 if (!u8c->p)
756 u8c->len--;
757 return (unsigned char)*u8c->s++;
761 if (u8c->p) {
762 leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
764 leaf = utf8nlookup(u8c->data, u8c->hangul,
765 u8c->s, u8c->len);
774 if (utf8agetab[LEAF_GEN(leaf)] > u8c->data->maxage) {
777 u8c->len -= utf8clen(u8c->s);
778 u8c->p = u8c->s + utf8clen(u8c->s);
779 u8c->s = LEAF_STR(leaf);
781 if (*u8c->s == '\0') {
782 if (u8c->ccc == STOPPER)
788 leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
798 if (ccc != STOPPER && u8c->ccc < ccc && ccc < u8c->nccc)
799 u8c->nccc = ccc;
805 if (ccc == u8c->ccc) {
806 if (!u8c->p)
807 u8c->len--;
808 return (unsigned char)*u8c->s++;
813 if (u8c->nccc == STOPPER) {
819 u8c->ccc = MINCCC - 1;
820 u8c->nccc = ccc;
821 u8c->sp = u8c->p;
822 u8c->ss = u8c->s;
823 u8c->slen = u8c->len;
824 if (!u8c->p)
825 u8c->len -= utf8clen(u8c->s);
826 u8c->s += utf8clen(u8c->s);
829 if (!u8c->p)
830 u8c->len -= utf8clen(u8c->s);
831 u8c->s += utf8clen(u8c->s);
832 } else if (u8c->nccc != MAXCCC + 1) {
834 u8c->ccc = u8c->nccc;
835 u8c->nccc = MAXCCC + 1;
836 u8c->s = u8c->ss;
837 u8c->p = u8c->sp;
838 u8c->len = u8c->slen;
841 u8c->ccc = STOPPER;
842 u8c->nccc = STOPPER;
843 u8c->sp = NULL;
844 u8c->ss = NULL;
845 u8c->slen = 0;