Module bases.alphabet.range_alphabet
Alphabets implicitly specified by Unicode codepoint range.
Expand source code
"""
Alphabets implicitly specified by Unicode codepoint range.
"""
from typing import Any, Iterator, Mapping, overload, Union
from .abstract import Alphabet
from .string_alphabet import StringAlphabet
class RangeAlphabet(Alphabet):
"""
Class for alphabets implicitly specified by a range of Unicode codepoints
and optional case sensitivity (default: case-sensitive).
Example usage:
```py
>>> from bases.alphabet import RangeAlphabet
>>> RangeAlphabet(range(0x00, 0x100))
RangeAlphabet(range(0x0, 0x100))
```
"""
_codepoints: range
_revdir: Mapping[str, int]
_case_sensitive: bool
def __init__(self, codepoints: range, *,
case_sensitive: bool = True):
super().__init__(case_sensitive)
self._codepoints = codepoints
self._revdir = _RangeAlphabetRevdir(self)
self.__validate_init()
def __validate_init(self) -> None:
codepoints = self._codepoints
case_sensitive = self.case_sensitive
if len(codepoints) <= 1:
raise ValueError("Alphabet must have at least two characters.")
if not case_sensitive:
codepoints_set = set(codepoints)
for i in codepoints:
c = chr(i)
if ord(c.upper()) in codepoints_set and ord(c.lower()) in codepoints_set:
raise ValueError("Alphabet contains lowercase and uppercase versions of the same character, "
"encoding must be case-sensitive.")
@property
def codepoints(self) -> range:
"""
The codepoint range that defines this alphabet.
Example usage:
```py
>>> RangeAlphabet(range(0x00, 0x100)).codepoints
range(0, 256)
```
"""
return self._codepoints
@property
def revdir(self) -> Mapping[str, int]:
return self._revdir
def __len__(self) -> int:
return len(self._codepoints)
@overload
def __getitem__(self, idx: int) -> str:
...
@overload
def __getitem__(self, idx: slice) -> "RangeAlphabet":
...
def __getitem__(self, idx: Union[int, slice]) -> Union[str, "RangeAlphabet"]:
if isinstance(idx, slice):
new_codepoints = self._codepoints[idx]
return RangeAlphabet(new_codepoints, case_sensitive=self.case_sensitive)
return chr(self._codepoints[idx])
def with_case_sensitivity(self, case_sensitive: bool) -> "RangeAlphabet":
if case_sensitive == self.case_sensitive:
return self
return RangeAlphabet(self.codepoints, case_sensitive=case_sensitive)
def as_string_alphabet(self) -> StringAlphabet:
"""
Converts this alphabet into a string alphabet explicitly defined
by the string containing all characters in the codepoint range.
Example usage:
```py
>>> RangeAlphabet(range(0x20, 0x7E)).as_string_alphabet()
StringAlphabet(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN
OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}')
```
"""
chars = "".join(self)
return StringAlphabet(chars, case_sensitive=self.case_sensitive)
def upper(self) -> StringAlphabet:
chars = "".join(self).upper()
return StringAlphabet(chars, case_sensitive=self.case_sensitive)
def lower(self) -> StringAlphabet:
chars = "".join(self).lower()
return StringAlphabet(chars, case_sensitive=self.case_sensitive)
def __eq__(self, other: Any) -> bool:
if not isinstance(other, RangeAlphabet):
return NotImplemented
return self.codepoints == other.codepoints and self.case_sensitive == other.case_sensitive
def __hash__(self) -> int:
return hash((type(self), self.codepoints, self.case_sensitive))
def __repr__(self) -> str:
codepoints_str = f"range({hex(self.codepoints.start)}, {hex(self.codepoints.stop)})"
if self.case_sensitive:
return f"RangeAlphabet({codepoints_str})"
case_sensitive_str = f"case_sensitive={self.case_sensitive}"
return f"RangeAlphabet({codepoints_str}, {case_sensitive_str})"
class _RangeAlphabetRevdir(Mapping[str, int]):
_alphabet: RangeAlphabet
def __init__(self, alphabet: RangeAlphabet):
self._alphabet = alphabet
def __iter__(self) -> Iterator[str]:
return iter(self._alphabet)
def __len__(self) -> int:
return len(self._alphabet)
def __contains__(self, char: Any) -> bool:
if not isinstance(char, str):
return False
alphabet = self._alphabet
if alphabet.case_sensitive:
return ord(char) in alphabet.codepoints
return ord(char.upper()) in alphabet.codepoints or ord(char.lower()) in alphabet.codepoints
def __getitem__(self, char: str) -> int:
alphabet = self._alphabet
if ord(char) in alphabet.codepoints:
return ord(char)-alphabet.codepoints.start
if not alphabet.case_sensitive:
if ord(char.upper()) in alphabet.codepoints:
return ord(char.upper())-alphabet.codepoints.start
if ord(char.lower()) in alphabet.codepoints:
return ord(char.lower())-alphabet.codepoints.start
raise KeyError(f"Character {repr(char)} not in alphabet.")
Classes
class RangeAlphabet (codepoints: range, *, case_sensitive: bool = True)
-
Class for alphabets implicitly specified by a range of Unicode codepoints and optional case sensitivity (default: case-sensitive).
Example usage:
>>> from bases.alphabet import RangeAlphabet >>> RangeAlphabet(range(0x00, 0x100)) RangeAlphabet(range(0x0, 0x100))
Expand source code
class RangeAlphabet(Alphabet): """ Class for alphabets implicitly specified by a range of Unicode codepoints and optional case sensitivity (default: case-sensitive). Example usage: ```py >>> from bases.alphabet import RangeAlphabet >>> RangeAlphabet(range(0x00, 0x100)) RangeAlphabet(range(0x0, 0x100)) ``` """ _codepoints: range _revdir: Mapping[str, int] _case_sensitive: bool def __init__(self, codepoints: range, *, case_sensitive: bool = True): super().__init__(case_sensitive) self._codepoints = codepoints self._revdir = _RangeAlphabetRevdir(self) self.__validate_init() def __validate_init(self) -> None: codepoints = self._codepoints case_sensitive = self.case_sensitive if len(codepoints) <= 1: raise ValueError("Alphabet must have at least two characters.") if not case_sensitive: codepoints_set = set(codepoints) for i in codepoints: c = chr(i) if ord(c.upper()) in codepoints_set and ord(c.lower()) in codepoints_set: raise ValueError("Alphabet contains lowercase and uppercase versions of the same character, " "encoding must be case-sensitive.") @property def codepoints(self) -> range: """ The codepoint range that defines this alphabet. Example usage: ```py >>> RangeAlphabet(range(0x00, 0x100)).codepoints range(0, 256) ``` """ return self._codepoints @property def revdir(self) -> Mapping[str, int]: return self._revdir def __len__(self) -> int: return len(self._codepoints) @overload def __getitem__(self, idx: int) -> str: ... @overload def __getitem__(self, idx: slice) -> "RangeAlphabet": ... def __getitem__(self, idx: Union[int, slice]) -> Union[str, "RangeAlphabet"]: if isinstance(idx, slice): new_codepoints = self._codepoints[idx] return RangeAlphabet(new_codepoints, case_sensitive=self.case_sensitive) return chr(self._codepoints[idx]) def with_case_sensitivity(self, case_sensitive: bool) -> "RangeAlphabet": if case_sensitive == self.case_sensitive: return self return RangeAlphabet(self.codepoints, case_sensitive=case_sensitive) def as_string_alphabet(self) -> StringAlphabet: """ Converts this alphabet into a string alphabet explicitly defined by the string containing all characters in the codepoint range. Example usage: ```py >>> RangeAlphabet(range(0x20, 0x7E)).as_string_alphabet() StringAlphabet(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}') ``` """ chars = "".join(self) return StringAlphabet(chars, case_sensitive=self.case_sensitive) def upper(self) -> StringAlphabet: chars = "".join(self).upper() return StringAlphabet(chars, case_sensitive=self.case_sensitive) def lower(self) -> StringAlphabet: chars = "".join(self).lower() return StringAlphabet(chars, case_sensitive=self.case_sensitive) def __eq__(self, other: Any) -> bool: if not isinstance(other, RangeAlphabet): return NotImplemented return self.codepoints == other.codepoints and self.case_sensitive == other.case_sensitive def __hash__(self) -> int: return hash((type(self), self.codepoints, self.case_sensitive)) def __repr__(self) -> str: codepoints_str = f"range({hex(self.codepoints.start)}, {hex(self.codepoints.stop)})" if self.case_sensitive: return f"RangeAlphabet({codepoints_str})" case_sensitive_str = f"case_sensitive={self.case_sensitive}" return f"RangeAlphabet({codepoints_str}, {case_sensitive_str})"
Ancestors
- Alphabet
- abc.ABC
- collections.abc.Sequence
- collections.abc.Reversible
- collections.abc.Collection
- collections.abc.Sized
- collections.abc.Iterable
- collections.abc.Container
- typing.Generic
Instance variables
var codepoints : range
-
The codepoint range that defines this alphabet.
Example usage:
>>> RangeAlphabet(range(0x00, 0x100)).codepoints range(0, 256)
Expand source code
@property def codepoints(self) -> range: """ The codepoint range that defines this alphabet. Example usage: ```py >>> RangeAlphabet(range(0x00, 0x100)).codepoints range(0, 256) ``` """ return self._codepoints
Methods
def as_string_alphabet(self) ‑> StringAlphabet
-
Converts this alphabet into a string alphabet explicitly defined by the string containing all characters in the codepoint range.
Example usage:
>>> RangeAlphabet(range(0x20, 0x7E)).as_string_alphabet() StringAlphabet(' !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN OPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}')
Expand source code
def as_string_alphabet(self) -> StringAlphabet: """ Converts this alphabet into a string alphabet explicitly defined by the string containing all characters in the codepoint range. Example usage: ```py >>> RangeAlphabet(range(0x20, 0x7E)).as_string_alphabet() StringAlphabet(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}') ``` """ chars = "".join(self) return StringAlphabet(chars, case_sensitive=self.case_sensitive)
Inherited members