Lines Matching defs:width
85 """Represents the width of a Unicode character. All East Asian Width classes resolve into
108 # matches a width assignment for a single codepoint, i.e. "1F336;N # ..."
110 # matches a width assignment for a range of codepoints, i.e. "3001..3003;W # ..."
112 # map between width category code and condensed width
122 raw_data = None # (low, high, width)
131 width = width_codes[raw_data[2]]
136 # All such codepoints are implicitly given Neural width (resolves to narrow)
137 width_map.append(EffectiveWidth.NARROW if current < low else width)
141 # Catch any leftover codepoints and assign them implicit Neutral/narrow width.
148 """Returns a list `l` where `l[c]` is true if codepoint `c` is considered a zero-width
149 character. `c` is considered a zero-width character if `c` is in general categories
167 # if name ends with Last, we backfill the width value to all codepoints since
171 # unassigned characters are implicitly given Neutral width, which is nonzero
176 # Catch any leftover codepoints. They must be unassigned (so nonzero width)
183 """A bucket contains a group of codepoints and an ordered width list. If one bucket's width
184 list overlaps with another's width list, those buckets can be merged via `try_extend`."""
191 def append(self, codepoint: Codepoint, width: EffectiveWidth):
192 """Adds a codepoint/width pair to the bucket, and appends `width` to the width list."""
193 self.entry_set.add((codepoint, width))
194 self.widths.append(width)
197 """If either `self` or `attempt`'s width list starts with the other bucket's width list,
198 set `self`'s width list to the longer of the two, add all of `attempt`'s codepoints
210 """Return a list of the codepoint/width pairs in this bucket, sorted by codepoint."""
215 def width(self) -> "EffectiveWidth":
216 """If all codepoints in this bucket have the same width, return that width; otherwise,
221 for width in self.widths[1:]:
222 if potential_width != width:
235 for (codepoint, width) in entries:
236 buckets[(codepoint >> low_bit) & mask].append(codepoint, width)
249 same width list, which means that they can be merged into the same bucket.
287 self.entries = list(map(lambda i: int(self.indexed[i].width()), self.entries))
350 /// that this version of unicode-width is based on.
360 /// Returns the [UAX #11](https://www.unicode.org/reports/tr11/) based width of `c` by
362 /// If `is_cjk == true`, ambiguous width characters are treated as double width; otherwise,
363 /// they're treated as single width.
384 // Since this is the last table, each entry represents an encoded width.
387 // Extract the packed width
388 let width = packed_widths >> (2 * (cp & 0b11)) & 0b11;
390 // A width of 3 signifies that the codepoint is ambiguous width.
391 if width == 3 {
398 width.into()
406 /// Returns the [UAX #11](https://www.unicode.org/reports/tr11/) based width of `c`, or
408 /// If `is_cjk == true`, ambiguous width characters are treated as double width; otherwise,
409 /// they're treated as single width.
411 pub fn width(c: char, is_cjk: bool) -> Option<usize> {
414 // U+0020 to U+007F (exclusive) are single-width ASCII codepoints
457 lookup table for character width, and write a Rust module utilizing that table to
461 - The soft hyphen (`U+00AD`) is single-width.
462 - Hangul Jamo medial vowels & final consonants (`U+1160..=U+11FF`) are zero-width.
463 - All codepoints in general categories `Cc`, `Cf`, `Mn`, and `Me` are zero-width.
464 - All codepoints with an East Asian Width of `Ambigous` are ambiguous-width.
465 - All codepoints with an East Asian Width of `Wide` or `Fullwidth` are double-width.
467 of `Neutral`, `Narrow`, or `Halfwidth`) are single-width.
477 # Characters marked as zero-width in zw_map should be zero-width in the final map