Automated update
This commit is contained in:
		
							
								
								
									
										357
									
								
								qutebrowser/venv/lib/python3.11/site-packages/urwid/str_util.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										357
									
								
								qutebrowser/venv/lib/python3.11/site-packages/urwid/str_util.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,357 @@ | ||||
| # Urwid unicode character processing tables | ||||
| #    Copyright (C) 2004-2011  Ian Ward | ||||
| # | ||||
| #    This library is free software; you can redistribute it and/or | ||||
| #    modify it under the terms of the GNU Lesser General Public | ||||
| #    License as published by the Free Software Foundation; either | ||||
| #    version 2.1 of the License, or (at your option) any later version. | ||||
| # | ||||
| #    This library is distributed in the hope that it will be useful, | ||||
| #    but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
| #    Lesser General Public License for more details. | ||||
| # | ||||
| #    You should have received a copy of the GNU Lesser General Public | ||||
| #    License along with this library; if not, write to the Free Software | ||||
| #    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA | ||||
| # | ||||
| # Urwid web site: https://urwid.org/ | ||||
|  | ||||
|  | ||||
| from __future__ import annotations | ||||
|  | ||||
| import re | ||||
| import typing | ||||
| import warnings | ||||
|  | ||||
| import wcwidth | ||||
|  | ||||
| if typing.TYPE_CHECKING: | ||||
|     from typing_extensions import Literal | ||||
|  | ||||
| SAFE_ASCII_RE = re.compile("^[ -~]*$") | ||||
| SAFE_ASCII_BYTES_RE = re.compile(b"^[ -~]*$") | ||||
|  | ||||
| _byte_encoding: Literal["utf8", "narrow", "wide"] = "narrow" | ||||
|  | ||||
|  | ||||
| def get_char_width(char: str) -> Literal[0, 1, 2]: | ||||
|     width = wcwidth.wcwidth(char) | ||||
|     if width < 0: | ||||
|         return 0 | ||||
|     return width | ||||
|  | ||||
|  | ||||
| def get_width(o: int) -> Literal[0, 1, 2]: | ||||
|     """Return the screen column width for unicode ordinal o.""" | ||||
|     return get_char_width(chr(o)) | ||||
|  | ||||
|  | ||||
| def decode_one(text: bytes | str, pos: int) -> tuple[int, int]: | ||||
|     """ | ||||
|     Return (ordinal at pos, next position) for UTF-8 encoded text. | ||||
|     """ | ||||
|     lt = len(text) - pos | ||||
|  | ||||
|     b2 = 0  # Fallback, not changing anything | ||||
|     b3 = 0  # Fallback, not changing anything | ||||
|     b4 = 0  # Fallback, not changing anything | ||||
|  | ||||
|     try: | ||||
|         if isinstance(text, str): | ||||
|             b1 = ord(text[pos]) | ||||
|             if lt > 1: | ||||
|                 b2 = ord(text[pos + 1]) | ||||
|             if lt > 2: | ||||
|                 b3 = ord(text[pos + 2]) | ||||
|             if lt > 3: | ||||
|                 b4 = ord(text[pos + 3]) | ||||
|         else: | ||||
|             b1 = text[pos] | ||||
|             if lt > 1: | ||||
|                 b2 = text[pos + 1] | ||||
|             if lt > 2: | ||||
|                 b3 = text[pos + 2] | ||||
|             if lt > 3: | ||||
|                 b4 = text[pos + 3] | ||||
|     except Exception as e: | ||||
|         raise ValueError(f"{e}: text={text!r}, pos={pos!r}, lt={lt!r}").with_traceback(e.__traceback__) from e | ||||
|  | ||||
|     if not b1 & 0x80: | ||||
|         return b1, pos + 1 | ||||
|     error = ord("?"), pos + 1 | ||||
|  | ||||
|     if lt < 2: | ||||
|         return error | ||||
|     if b1 & 0xE0 == 0xC0: | ||||
|         if b2 & 0xC0 != 0x80: | ||||
|             return error | ||||
|         o = ((b1 & 0x1F) << 6) | (b2 & 0x3F) | ||||
|         if o < 0x80: | ||||
|             return error | ||||
|         return o, pos + 2 | ||||
|     if lt < 3: | ||||
|         return error | ||||
|     if b1 & 0xF0 == 0xE0: | ||||
|         if b2 & 0xC0 != 0x80: | ||||
|             return error | ||||
|         if b3 & 0xC0 != 0x80: | ||||
|             return error | ||||
|         o = ((b1 & 0x0F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F) | ||||
|         if o < 0x800: | ||||
|             return error | ||||
|         return o, pos + 3 | ||||
|     if lt < 4: | ||||
|         return error | ||||
|     if b1 & 0xF8 == 0xF0: | ||||
|         if b2 & 0xC0 != 0x80: | ||||
|             return error | ||||
|         if b3 & 0xC0 != 0x80: | ||||
|             return error | ||||
|         if b4 & 0xC0 != 0x80: | ||||
|             return error | ||||
|         o = ((b1 & 0x07) << 18) | ((b2 & 0x3F) << 12) | ((b3 & 0x3F) << 6) | (b4 & 0x3F) | ||||
|         if o < 0x10000: | ||||
|             return error | ||||
|         return o, pos + 4 | ||||
|     return error | ||||
|  | ||||
|  | ||||
| def decode_one_uni(text: str, i: int) -> tuple[int, int]: | ||||
|     """ | ||||
|     decode_one implementation for unicode strings | ||||
|     """ | ||||
|     return ord(text[i]), i + 1 | ||||
|  | ||||
|  | ||||
| def decode_one_right(text: bytes, pos: int) -> tuple[int, int] | None: | ||||
|     """ | ||||
|     Return (ordinal at pos, next position) for UTF-8 encoded text. | ||||
|     pos is assumed to be on the trailing byte of a utf-8 sequence. | ||||
|     """ | ||||
|     if not isinstance(text, bytes): | ||||
|         raise TypeError(text) | ||||
|     error = ord("?"), pos - 1 | ||||
|     p = pos | ||||
|     while p >= 0: | ||||
|         if text[p] & 0xC0 != 0x80: | ||||
|             o, _next_pos = decode_one(text, p) | ||||
|             return o, p - 1 | ||||
|         p -= 1 | ||||
|         if p == p - 4: | ||||
|             return error | ||||
|     return None | ||||
|  | ||||
|  | ||||
| def set_byte_encoding(enc: Literal["utf8", "narrow", "wide"]) -> None: | ||||
|     if enc not in {"utf8", "narrow", "wide"}: | ||||
|         raise ValueError(enc) | ||||
|     global _byte_encoding  # noqa: PLW0603  # pylint: disable=global-statement | ||||
|     _byte_encoding = enc | ||||
|  | ||||
|  | ||||
| def get_byte_encoding() -> Literal["utf8", "narrow", "wide"]: | ||||
|     return _byte_encoding | ||||
|  | ||||
|  | ||||
| def calc_string_text_pos(text: str, start_offs: int, end_offs: int, pref_col: int) -> tuple[int, int]: | ||||
|     """ | ||||
|     Calculate the closest position to the screen column pref_col in text | ||||
|     where start_offs is the offset into text assumed to be screen column 0 | ||||
|     and end_offs is the end of the range to search. | ||||
|  | ||||
|     :param text: string | ||||
|     :param start_offs: starting text position | ||||
|     :param end_offs: ending text position | ||||
|     :param pref_col: target column | ||||
|     :returns: (position, actual_col) | ||||
|  | ||||
|     ..note:: this method is a simplified version of `wcwidth.wcswidth` and ideally should be in wcwidth package. | ||||
|     """ | ||||
|     if start_offs > end_offs: | ||||
|         raise ValueError((start_offs, end_offs)) | ||||
|  | ||||
|     cols = 0 | ||||
|     for idx in range(start_offs, end_offs): | ||||
|         width = get_char_width(text[idx]) | ||||
|         if width + cols > pref_col: | ||||
|             return idx, cols | ||||
|         cols += width | ||||
|  | ||||
|     return end_offs, cols | ||||
|  | ||||
|  | ||||
| def calc_text_pos(text: str | bytes, start_offs: int, end_offs: int, pref_col: int) -> tuple[int, int]: | ||||
|     """ | ||||
|     Calculate the closest position to the screen column pref_col in text | ||||
|     where start_offs is the offset into text assumed to be screen column 0 | ||||
|     and end_offs is the end of the range to search. | ||||
|  | ||||
|     text may be unicode or a byte string in the target _byte_encoding | ||||
|  | ||||
|     Returns (position, actual_col). | ||||
|     """ | ||||
|     if start_offs > end_offs: | ||||
|         raise ValueError((start_offs, end_offs)) | ||||
|  | ||||
|     if isinstance(text, str): | ||||
|         return calc_string_text_pos(text, start_offs, end_offs, pref_col) | ||||
|  | ||||
|     if not isinstance(text, bytes): | ||||
|         raise TypeError(text) | ||||
|  | ||||
|     if _byte_encoding == "utf8": | ||||
|         i = start_offs | ||||
|         sc = 0 | ||||
|         while i < end_offs: | ||||
|             o, n = decode_one(text, i) | ||||
|             w = get_width(o) | ||||
|             if w + sc > pref_col: | ||||
|                 return i, sc | ||||
|             i = n | ||||
|             sc += w | ||||
|         return i, sc | ||||
|  | ||||
|     # "wide" and "narrow" | ||||
|     i = start_offs + pref_col | ||||
|     if i >= end_offs: | ||||
|         return end_offs, end_offs - start_offs | ||||
|     if _byte_encoding == "wide" and within_double_byte(text, start_offs, i) == 2: | ||||
|         i -= 1 | ||||
|     return i, i - start_offs | ||||
|  | ||||
|  | ||||
| def calc_width(text: str | bytes, start_offs: int, end_offs: int) -> int: | ||||
|     """ | ||||
|     Return the screen column width of text between start_offs and end_offs. | ||||
|  | ||||
|     text may be unicode or a byte string in the target _byte_encoding | ||||
|  | ||||
|     Some characters are wide (take two columns) and others affect the | ||||
|     previous character (take zero columns).  Use the widths table above | ||||
|     to calculate the screen column width of text[start_offs:end_offs] | ||||
|     """ | ||||
|  | ||||
|     if start_offs > end_offs: | ||||
|         raise ValueError((start_offs, end_offs)) | ||||
|  | ||||
|     if isinstance(text, str): | ||||
|         return sum(get_char_width(char) for char in text[start_offs:end_offs]) | ||||
|  | ||||
|     if _byte_encoding == "utf8": | ||||
|         try: | ||||
|             return sum(get_char_width(char) for char in text[start_offs:end_offs].decode("utf-8")) | ||||
|         except UnicodeDecodeError as exc: | ||||
|             warnings.warn( | ||||
|                 "`calc_width` with text encoded to bytes can produce incorrect results" | ||||
|                 f"due to possible offset in the middle of character: {exc}", | ||||
|                 UnicodeWarning, | ||||
|                 stacklevel=2, | ||||
|             ) | ||||
|  | ||||
|         i = start_offs | ||||
|         sc = 0 | ||||
|         while i < end_offs: | ||||
|             o, i = decode_one(text, i) | ||||
|             w = get_width(o) | ||||
|             sc += w | ||||
|         return sc | ||||
|     # "wide", "narrow" or all printable ASCII, just return the character count | ||||
|     return end_offs - start_offs | ||||
|  | ||||
|  | ||||
| def is_wide_char(text: str | bytes, offs: int) -> bool: | ||||
|     """ | ||||
|     Test if the character at offs within text is wide. | ||||
|  | ||||
|     text may be unicode or a byte string in the target _byte_encoding | ||||
|     """ | ||||
|     if isinstance(text, str): | ||||
|         return get_char_width(text[offs]) == 2 | ||||
|     if not isinstance(text, bytes): | ||||
|         raise TypeError(text) | ||||
|     if _byte_encoding == "utf8": | ||||
|         o, _n = decode_one(text, offs) | ||||
|         return get_width(o) == 2 | ||||
|     if _byte_encoding == "wide": | ||||
|         return within_double_byte(text, offs, offs) == 1 | ||||
|     return False | ||||
|  | ||||
|  | ||||
| def move_prev_char(text: str | bytes, start_offs: int, end_offs: int) -> int: | ||||
|     """ | ||||
|     Return the position of the character before end_offs. | ||||
|     """ | ||||
|     if start_offs >= end_offs: | ||||
|         raise ValueError((start_offs, end_offs)) | ||||
|     if isinstance(text, str): | ||||
|         return end_offs - 1 | ||||
|     if not isinstance(text, bytes): | ||||
|         raise TypeError(text) | ||||
|     if _byte_encoding == "utf8": | ||||
|         o = end_offs - 1 | ||||
|         while text[o] & 0xC0 == 0x80: | ||||
|             o -= 1 | ||||
|         return o | ||||
|     if _byte_encoding == "wide" and within_double_byte(text, start_offs, end_offs - 1) == 2: | ||||
|         return end_offs - 2 | ||||
|     return end_offs - 1 | ||||
|  | ||||
|  | ||||
| def move_next_char(text: str | bytes, start_offs: int, end_offs: int) -> int: | ||||
|     """ | ||||
|     Return the position of the character after start_offs. | ||||
|     """ | ||||
|     if start_offs >= end_offs: | ||||
|         raise ValueError((start_offs, end_offs)) | ||||
|     if isinstance(text, str): | ||||
|         return start_offs + 1 | ||||
|     if not isinstance(text, bytes): | ||||
|         raise TypeError(text) | ||||
|     if _byte_encoding == "utf8": | ||||
|         o = start_offs + 1 | ||||
|         while o < end_offs and text[o] & 0xC0 == 0x80: | ||||
|             o += 1 | ||||
|         return o | ||||
|     if _byte_encoding == "wide" and within_double_byte(text, start_offs, start_offs) == 1: | ||||
|         return start_offs + 2 | ||||
|     return start_offs + 1 | ||||
|  | ||||
|  | ||||
| def within_double_byte(text: bytes, line_start: int, pos: int) -> Literal[0, 1, 2]: | ||||
|     """Return whether pos is within a double-byte encoded character. | ||||
|  | ||||
|     text -- byte string in question | ||||
|     line_start -- offset of beginning of line (< pos) | ||||
|     pos -- offset in question | ||||
|  | ||||
|     Return values: | ||||
|     0 -- not within dbe char, or double_byte_encoding == False | ||||
|     1 -- pos is on the 1st half of a dbe char | ||||
|     2 -- pos is on the 2nd half of a dbe char | ||||
|     """ | ||||
|     if not isinstance(text, bytes): | ||||
|         raise TypeError(text) | ||||
|     v = text[pos] | ||||
|  | ||||
|     if 0x40 <= v < 0x7F: | ||||
|         # might be second half of big5, uhc or gbk encoding | ||||
|         if pos == line_start: | ||||
|             return 0 | ||||
|  | ||||
|         if text[pos - 1] >= 0x81 and within_double_byte(text, line_start, pos - 1) == 1: | ||||
|             return 2 | ||||
|         return 0 | ||||
|  | ||||
|     if v < 0x80: | ||||
|         return 0 | ||||
|  | ||||
|     i = pos - 1 | ||||
|     while i >= line_start: | ||||
|         if text[i] < 0x80: | ||||
|             break | ||||
|         i -= 1 | ||||
|  | ||||
|     if (pos - i) & 1: | ||||
|         return 1 | ||||
|     return 2 | ||||
		Reference in New Issue
	
	Block a user
	 klein panic
					klein panic