Module bases.encoding.fixchar

Fixed-character base encodings.

Expand source code
"""
    Fixed-character base encodings.
"""

import math
from typing import Any, Dict, List, Mapping, Optional, Union
from typing_extensions import Literal

from bases.alphabet import Alphabet
from .base import BaseEncoding
from .errors import DecodingError, InvalidDigitError, PaddingError

PaddingOptions = Literal["ignore", "include", "require"]
"""
    Type of allowed padding options for fixed-character encoding:

    ```py
    PaddingOptions = Literal["ignore", "include", "require"]
    ```

    See `FixcharBaseEncoding.padding`.
"""

def _lcm(a: int, b: int) -> int:
    # math.lcm only available in Python 3.8+
    return a*b//math.gcd(a, b)

class FixcharBaseEncoding(BaseEncoding):
    """
        Fixed-character encodings, generalising those described by [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html).

        Constructor options:

        - `char_nbits: Union[int, Literal["auto"]] = "auto"`, number of bits per character
        - `pad_char: Optional[str] = None`, optional padding character
        - `padding: PaddingOptions = "ignore"`, padding style

        If `char_nbits` is set to `"auto"` (by default), it is automatically computed as:

        ```py
        int(math.ceil(math.log2(base)))
        ```

        From `char_nbits`, size of a block (in bytes and chars) is computed as:

        ```py
        block_nbytes = lcm(char_nbits, 8)//8
        block_nchars = lcm(char_nbits, 8)//char_nbits
        ```

        The value `block_nbytes` is presently not used, while the `block_nchars` is used for padding.

        The `padding` option must be set to `"ignore"` if a padding character is not specified (i.e. if `pad_char` is `None`).
        If a padding character is specified, it must be a character (string of length 1) not in the encoding alphabet:
        it is allowed in decoding strings, but only at then end (so that `s.rstrip(pad_char)` removes all padding).

        The padding behaviour is determined by the value of `padding`:

        - `"ignore"`: no padding included on encoding, no padding required on decoding
        - `"include"`: padding included on encoding, no padding required on decoding
        - `"require"`: padding included on encoding, padding required on decoding

        Encoding of a bytestring `b`:

        1. compute the minimum number `extra_nbits` of additional bits necessary to make `8*len(b)` an integral multiple of `char_nbits`
        2. convert `b` to an unsigned integer `i` (big-endian)
        3. left-shift `i` by `extra_nbits` bits, introducing the necessary zero _pad bits_
        4. converts `i` to the encoding base, using the encoding alphabet for digits
        5. if `padding` is `"include"` or `"require"`, append the minimum number of padding characters necessary to make the encoded
           string length an integral multiple of `block_nchars`

        Decoding of a string `s`:

        1. if `pad_char` is not `None`, count the number N of contiguous padding characters at the end of `s` and strip them, obtaining `s_stripped`
        2. if `padding` is `"require"`, ensure that N is exactly the minimum number of padding characters that must be appended to `s_stripped`
           to make its length an integral multiple of `block_nchars`
        3. converts `s` to an unsigned integer `i`, using the encoding alphabet for digits of the encoding base
        4. compute the number `extra_nbits = (char_nbits*len(s))%8` of pad bits: if this is not smaller than `char_nbits`,
           raise `bases.encoding.errors.DecodingError`
        5. extract the value `i%(2**extra_nbits)` of the pad bits: if this is not zero, raise `bases.encoding.errors.DecodingError`
        6. compute the number of bytes in the decoded bytestring as `original_nbytes = (char_nbits*len(s))//8`
        7. right-shift `i` by `extra_nbits` bits, removing the zero pad bits
        8. converts `i` to its minimal byte representation (big-endian), then zero-pad on the left to reach `original_nbytes` bytes
    """

    _char_nbits: int
    _init_char_nbits: Union[int, Literal["auto"]]
    _pad_char: Optional[str] = None
    _padding: PaddingOptions = "ignore"
    _block_nbytes: int
    _block_nchars: int

    def __init__(self, alphabet: Union[str, range, Alphabet], *,
                 case_sensitive: Optional[bool] = None,
                 char_nbits: Union[int, Literal["auto"]] = "auto",
                 pad_char: Optional[str] = None,
                 padding: PaddingOptions = "ignore"):
        if padding not in ("ignore", "include", "require"):
            raise TypeError("Allowed padding options are: 'ignore', 'include' and 'require'.")
        super().__init__(alphabet, case_sensitive=case_sensitive)
        self._init_char_nbits = char_nbits
        if char_nbits == "auto":
            char_nbits = int(math.ceil(math.log2(self.base)))
        self._char_nbits = char_nbits
        self._pad_char = pad_char
        self._padding = padding
        self.__validate_init()
        l = _lcm(char_nbits, 8)
        self._block_nbytes = l//8
        self._block_nchars = l//char_nbits

    def __validate_init(self) -> None:
        alphabet = self.alphabet
        pad_char = self.pad_char
        if pad_char is None:
            if self.padding != "ignore":
                raise ValueError("If padding is not 'ignore', a padding character must be specified.")
        else:
            if len(pad_char) != 1:
                raise ValueError("If specified, padding character must have length 1.")
            if pad_char in alphabet:
                raise ValueError("Padding character cannot be in the alphabet.")
        char_nbits = self.char_nbits
        if char_nbits is not None:
            if char_nbits <= 0:
                raise ValueError("If specified, number of bits per character must be positive.")
            if 2**char_nbits < self.base:
                raise ValueError(f"Number of bits per character is insufficient to cover the whole alphabet. This is likely a mistake. "
                                 f"If it isn't, please truncate the alphabet to {2**char_nbits} characters (or less).")

    @property
    def char_nbits(self) -> int:
        """
            Number of bits per character.
        """
        return self._char_nbits

    @property
    def block_nchars(self) -> int:
        """
            Number of characters in a block.
        """
        return self._block_nchars

    @property
    def effective_base(self) -> int:
        """
            Effective base used when decoding is `2**char_nbits`.
        """
        effective_base: int = 2**self.char_nbits
        return effective_base

    @property
    def padding(self) -> PaddingOptions:
        """
            Padding style:

            - `"ignore"`: no padding included on encoding, no padding required on decoding
            - `"include"`: padding included on encoding, no padding required on decoding
            - `"require"`: padding included on encoding, padding required on decoding
        """
        return self._padding

    @property
    def include_padding(self) -> bool:
        """
            Whether padding is included on encoding (derived from `FixcharBaseEncoding.padding`).
        """
        return self.padding in ("include", "require")

    @property
    def require_padding(self) -> bool:
        """
            Whether padding is required on decoding (derived from `FixcharBaseEncoding.padding`).
        """
        return self.padding == "require"

    @property
    def pad_char(self) -> Optional[str]:
        """
            An optional character to be used for padding of encoded strings.
            In [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html), this is `"="` for both base64 and base32.
        """
        return self._pad_char

    def pad(self, require: bool = False) -> "FixcharBaseEncoding":
        """
            Returns a copy of this encoding which includes paddding (and optionally requires it).

            Example usage, from `"include"` to `"require"`:

            ```py
            >>> encoding.base32
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='=', padding='include')
            >>> encoding.base32.pad(require=True)
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='=', padding='require')
            ```

            Example usage, from `"ignore"` to `"include"`:

            ```py
            >>> encoding.base32z
            FixcharBaseEncoding(
                StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769',
                               case_sensitive=False))
            >>> encoding.base32z.with_pad_char("=")
            FixcharBaseEncoding(
                StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769',
                               case_sensitive=False),
                pad_char='=')
            >>> encoding.base32z.with_pad_char("=").pad()
            FixcharBaseEncoding(
                StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769',
                               case_sensitive=False),
                pad_char='=', padding='include')
            ```
        """
        options = dict(padding="require" if require else "include")
        return self.with_options(**options)

    def nopad(self, allow: bool = True) -> "FixcharBaseEncoding":
        """
            Returns a copy of this encoding which does not include/require paddding
            (and optionally disallows it by removing the padding character).

            Example usage:

            ```py
            >>> encoding.base32
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='=', padding='include')
            >>> encoding.base32.nopad()
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='=')
            >>> encoding.base32.nopad(allow=False)
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False))
            ```
        """
        options = dict(padding="ignore", pad_char=self.pad_char if allow else None)
        return self.with_options(**options)

    def with_pad_char(self, pad_char: Optional[str]) -> "FixcharBaseEncoding":
        """
            Returns a copy of this encoding with a different padding character
            (or without a padding character if `pad_char` is `None`).

            Example usage:

            ```py
            >>> encoding.base32
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='=', padding='include')
            >>> encoding.base32.with_pad_char("~")
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='~', padding='include')
            ```

        """
        options: Dict[str, Any] = dict(pad_char=pad_char)
        if pad_char is None:
            options["padding"] = "ignore"
        return self.with_options(**options)

    def pad_string(self, s: str) -> str:
        """
            If no padding character is specified for this encoding, returns the input string unchanged.
            If a padding character is specified for this encoding, pads the input string by appending the
            minimum number of padding characters necessary to make its length an integral multiple of the
            block char size (given by `FixcharBaseEncoding.block_nchars`).

            The value of `FixcharBaseEncoding.padding` is irrelevant to this method.
        """
        if not isinstance(s, str):
            raise TypeError()
        pad_char = self.pad_char
        block_nchars = self._block_nchars
        # no padding available for this encoding scheme
        if pad_char is None:
            return s
        # padding available, but no need for padding
        if len(s)%block_nchars == 0:
            return s
        # compute require padding length
        pad_len = block_nchars-(len(s)%block_nchars)
        # return padded string
        return s+pad_char*pad_len

    def strip_string(self, s: str) -> str:
        """
            If no padding character is specified for this encoding, returns the input string unchanged.
            If a padding character is specified for this encoding, strips the input string of any padding
            characters it might have to the right.
            If `FixcharBaseEncoding.padding` is set to `"require"`, checks that the correct number of
            padding characters were included and raises `bases.encoding.errors.PaddingError` if not.
        """
        if not isinstance(s, str):
            raise TypeError()
        pad_char = self.pad_char
        case_sensitive = self.case_sensitive
        block_nchars = self._block_nchars
        # no padding available for this encoding scheme
        if pad_char is None:
            return s
        # padding character(s) to strip from the right of the string
        pad_chars = pad_char
        if not case_sensitive:
            pad_chars += pad_char.lower()+pad_char.upper()
        # strip padding from string
        s_stripped = s.rstrip(pad_chars)
        # if padding is required on decoding, check the correct amount was included
        if self.require_padding:
            padding = len(s)-len(s_stripped)
            extra_nchars = len(s_stripped)%block_nchars
            expected_padding = 0 if extra_nchars == 0 else block_nchars-extra_nchars
            if padding != expected_padding:
                raise PaddingError(padding, expected_padding)
        return s_stripped

    def canonical_bytes(self, b: bytes) -> bytes:
        self._validate_bytes(b)
        return b

    def canonical_string(self, s: str) -> str:
        if self.include_padding:
            return self.pad_string(s)
        return self.strip_string(s)

    def _validate_string(self, s: str) -> str:
        s = self.strip_string(s)
        return super()._validate_string(s)

    def _encode(self, b: bytes) -> str:
        alphabet = self.alphabet
        base = self.base
        char_nbits = self.char_nbits
        effective_base = self.effective_base
        # bytes as unsigned integer
        i = int.from_bytes(b, byteorder="big")
        # add padding bits (align to integral number of characters)
        nchars, extra_nbits = divmod((8*len(b)), char_nbits)
        if extra_nbits > 0:
            i <<= char_nbits-extra_nbits # pad bits set to 0
            nchars += 1
        # compute characters in reverse order
        revchars: List[str] = []
        for _ in range(nchars):
            # extract next digit by consuming rightmost char_nbits
            # Same as: d = i % (2**char_nbits); i >>= char_nbits
            i, d = divmod(i, effective_base)
            # ensure digit is valid for actual base (number of characters in the alphabet)
            if not d < base:
                raise InvalidDigitError(d, base)
            # add the next character to the list
            revchars.append(alphabet[d])
        # join characters, pad string (if padding is to be included) and return
        s = "".join(reversed(revchars))
        if not self.include_padding:
            return s
        return self.pad_string(s)

    def _decode(self, s: str) -> bytes:
        base = self.base
        char_nbits = self.char_nbits
        alphabet_revdir = self.alphabet.revdir
        # decode string into unsigned integer
        i = 0
        for c in s:
            d = alphabet_revdir[c]
            i = i*base + d
        # remove padding bits (ensure that there are not too many and that they are all set to zero)
        original_nbytes, extra_nbits = divmod((char_nbits*len(s)), 8)
        if extra_nbits >= char_nbits:
            raise DecodingError(f"More pad bits found ({extra_nbits}) than bits per character ({char_nbits}).")
        if extra_nbits > 0:
            i, pad_bits = divmod(i, 2**extra_nbits)
            if pad_bits != 0:
                raise DecodingError("Pad bits must be zero.")
        # convert unsigned integer into the required number of bytes (zero-pad to the left)
        return i.to_bytes(length=original_nbytes, byteorder="big")

    def options(self, skip_defaults: bool = False) -> Mapping[str, Any]:
        options: Dict[str, Any] = {}
        if not skip_defaults or self._init_char_nbits != "auto":
            options["char_nbits"] = self._init_char_nbits
        if not skip_defaults or self.pad_char is not None:
            options["pad_char"] = self.pad_char
        if not skip_defaults or self.padding != "ignore":
            options["padding"] = self.padding
        return options

Global variables

var PaddingOptions

Type of allowed padding options for fixed-character encoding:

PaddingOptions = Literal["ignore", "include", "require"]

See FixcharBaseEncoding.padding.

Classes

class FixcharBaseEncoding (alphabet: Union[str, range, Alphabet], *, case_sensitive: Optional[bool] = None, char_nbits: Union[int, Literal['auto']] = 'auto', pad_char: Optional[str] = None, padding: Literal['ignore', 'include', 'require'] = 'ignore')

Fixed-character encodings, generalising those described by rfc4648.

Constructor options:

  • char_nbits: Union[int, Literal["auto"]] = "auto", number of bits per character
  • pad_char: Optional[str] = None, optional padding character
  • padding: PaddingOptions = "ignore", padding style

If char_nbits is set to "auto" (by default), it is automatically computed as:

int(math.ceil(math.log2(base)))

From char_nbits, size of a block (in bytes and chars) is computed as:

block_nbytes = lcm(char_nbits, 8)//8
block_nchars = lcm(char_nbits, 8)//char_nbits

The value block_nbytes is presently not used, while the block_nchars is used for padding.

The padding option must be set to "ignore" if a padding character is not specified (i.e. if pad_char is None). If a padding character is specified, it must be a character (string of length 1) not in the encoding alphabet: it is allowed in decoding strings, but only at then end (so that s.rstrip(pad_char) removes all padding).

The padding behaviour is determined by the value of padding:

  • "ignore": no padding included on encoding, no padding required on decoding
  • "include": padding included on encoding, no padding required on decoding
  • "require": padding included on encoding, padding required on decoding

Encoding of a bytestring b:

  1. compute the minimum number extra_nbits of additional bits necessary to make 8*len(b) an integral multiple of char_nbits
  2. convert b to an unsigned integer i (big-endian)
  3. left-shift i by extra_nbits bits, introducing the necessary zero pad bits
  4. converts i to the encoding base, using the encoding alphabet for digits
  5. if padding is "include" or "require", append the minimum number of padding characters necessary to make the encoded string length an integral multiple of block_nchars

Decoding of a string s:

  1. if pad_char is not None, count the number N of contiguous padding characters at the end of s and strip them, obtaining s_stripped
  2. if padding is "require", ensure that N is exactly the minimum number of padding characters that must be appended to s_stripped to make its length an integral multiple of block_nchars
  3. converts s to an unsigned integer i, using the encoding alphabet for digits of the encoding base
  4. compute the number extra_nbits = (char_nbits*len(s))%8 of pad bits: if this is not smaller than char_nbits, raise DecodingError
  5. extract the value i%(2**extra_nbits) of the pad bits: if this is not zero, raise DecodingError
  6. compute the number of bytes in the decoded bytestring as original_nbytes = (char_nbits*len(s))//8
  7. right-shift i by extra_nbits bits, removing the zero pad bits
  8. converts i to its minimal byte representation (big-endian), then zero-pad on the left to reach original_nbytes bytes
Expand source code
class FixcharBaseEncoding(BaseEncoding):
    """
        Fixed-character encodings, generalising those described by [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html).

        Constructor options:

        - `char_nbits: Union[int, Literal["auto"]] = "auto"`, number of bits per character
        - `pad_char: Optional[str] = None`, optional padding character
        - `padding: PaddingOptions = "ignore"`, padding style

        If `char_nbits` is set to `"auto"` (by default), it is automatically computed as:

        ```py
        int(math.ceil(math.log2(base)))
        ```

        From `char_nbits`, size of a block (in bytes and chars) is computed as:

        ```py
        block_nbytes = lcm(char_nbits, 8)//8
        block_nchars = lcm(char_nbits, 8)//char_nbits
        ```

        The value `block_nbytes` is presently not used, while the `block_nchars` is used for padding.

        The `padding` option must be set to `"ignore"` if a padding character is not specified (i.e. if `pad_char` is `None`).
        If a padding character is specified, it must be a character (string of length 1) not in the encoding alphabet:
        it is allowed in decoding strings, but only at then end (so that `s.rstrip(pad_char)` removes all padding).

        The padding behaviour is determined by the value of `padding`:

        - `"ignore"`: no padding included on encoding, no padding required on decoding
        - `"include"`: padding included on encoding, no padding required on decoding
        - `"require"`: padding included on encoding, padding required on decoding

        Encoding of a bytestring `b`:

        1. compute the minimum number `extra_nbits` of additional bits necessary to make `8*len(b)` an integral multiple of `char_nbits`
        2. convert `b` to an unsigned integer `i` (big-endian)
        3. left-shift `i` by `extra_nbits` bits, introducing the necessary zero _pad bits_
        4. converts `i` to the encoding base, using the encoding alphabet for digits
        5. if `padding` is `"include"` or `"require"`, append the minimum number of padding characters necessary to make the encoded
           string length an integral multiple of `block_nchars`

        Decoding of a string `s`:

        1. if `pad_char` is not `None`, count the number N of contiguous padding characters at the end of `s` and strip them, obtaining `s_stripped`
        2. if `padding` is `"require"`, ensure that N is exactly the minimum number of padding characters that must be appended to `s_stripped`
           to make its length an integral multiple of `block_nchars`
        3. converts `s` to an unsigned integer `i`, using the encoding alphabet for digits of the encoding base
        4. compute the number `extra_nbits = (char_nbits*len(s))%8` of pad bits: if this is not smaller than `char_nbits`,
           raise `bases.encoding.errors.DecodingError`
        5. extract the value `i%(2**extra_nbits)` of the pad bits: if this is not zero, raise `bases.encoding.errors.DecodingError`
        6. compute the number of bytes in the decoded bytestring as `original_nbytes = (char_nbits*len(s))//8`
        7. right-shift `i` by `extra_nbits` bits, removing the zero pad bits
        8. converts `i` to its minimal byte representation (big-endian), then zero-pad on the left to reach `original_nbytes` bytes
    """

    _char_nbits: int
    _init_char_nbits: Union[int, Literal["auto"]]
    _pad_char: Optional[str] = None
    _padding: PaddingOptions = "ignore"
    _block_nbytes: int
    _block_nchars: int

    def __init__(self, alphabet: Union[str, range, Alphabet], *,
                 case_sensitive: Optional[bool] = None,
                 char_nbits: Union[int, Literal["auto"]] = "auto",
                 pad_char: Optional[str] = None,
                 padding: PaddingOptions = "ignore"):
        if padding not in ("ignore", "include", "require"):
            raise TypeError("Allowed padding options are: 'ignore', 'include' and 'require'.")
        super().__init__(alphabet, case_sensitive=case_sensitive)
        self._init_char_nbits = char_nbits
        if char_nbits == "auto":
            char_nbits = int(math.ceil(math.log2(self.base)))
        self._char_nbits = char_nbits
        self._pad_char = pad_char
        self._padding = padding
        self.__validate_init()
        l = _lcm(char_nbits, 8)
        self._block_nbytes = l//8
        self._block_nchars = l//char_nbits

    def __validate_init(self) -> None:
        alphabet = self.alphabet
        pad_char = self.pad_char
        if pad_char is None:
            if self.padding != "ignore":
                raise ValueError("If padding is not 'ignore', a padding character must be specified.")
        else:
            if len(pad_char) != 1:
                raise ValueError("If specified, padding character must have length 1.")
            if pad_char in alphabet:
                raise ValueError("Padding character cannot be in the alphabet.")
        char_nbits = self.char_nbits
        if char_nbits is not None:
            if char_nbits <= 0:
                raise ValueError("If specified, number of bits per character must be positive.")
            if 2**char_nbits < self.base:
                raise ValueError(f"Number of bits per character is insufficient to cover the whole alphabet. This is likely a mistake. "
                                 f"If it isn't, please truncate the alphabet to {2**char_nbits} characters (or less).")

    @property
    def char_nbits(self) -> int:
        """
            Number of bits per character.
        """
        return self._char_nbits

    @property
    def block_nchars(self) -> int:
        """
            Number of characters in a block.
        """
        return self._block_nchars

    @property
    def effective_base(self) -> int:
        """
            Effective base used when decoding is `2**char_nbits`.
        """
        effective_base: int = 2**self.char_nbits
        return effective_base

    @property
    def padding(self) -> PaddingOptions:
        """
            Padding style:

            - `"ignore"`: no padding included on encoding, no padding required on decoding
            - `"include"`: padding included on encoding, no padding required on decoding
            - `"require"`: padding included on encoding, padding required on decoding
        """
        return self._padding

    @property
    def include_padding(self) -> bool:
        """
            Whether padding is included on encoding (derived from `FixcharBaseEncoding.padding`).
        """
        return self.padding in ("include", "require")

    @property
    def require_padding(self) -> bool:
        """
            Whether padding is required on decoding (derived from `FixcharBaseEncoding.padding`).
        """
        return self.padding == "require"

    @property
    def pad_char(self) -> Optional[str]:
        """
            An optional character to be used for padding of encoded strings.
            In [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html), this is `"="` for both base64 and base32.
        """
        return self._pad_char

    def pad(self, require: bool = False) -> "FixcharBaseEncoding":
        """
            Returns a copy of this encoding which includes paddding (and optionally requires it).

            Example usage, from `"include"` to `"require"`:

            ```py
            >>> encoding.base32
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='=', padding='include')
            >>> encoding.base32.pad(require=True)
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='=', padding='require')
            ```

            Example usage, from `"ignore"` to `"include"`:

            ```py
            >>> encoding.base32z
            FixcharBaseEncoding(
                StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769',
                               case_sensitive=False))
            >>> encoding.base32z.with_pad_char("=")
            FixcharBaseEncoding(
                StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769',
                               case_sensitive=False),
                pad_char='=')
            >>> encoding.base32z.with_pad_char("=").pad()
            FixcharBaseEncoding(
                StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769',
                               case_sensitive=False),
                pad_char='=', padding='include')
            ```
        """
        options = dict(padding="require" if require else "include")
        return self.with_options(**options)

    def nopad(self, allow: bool = True) -> "FixcharBaseEncoding":
        """
            Returns a copy of this encoding which does not include/require paddding
            (and optionally disallows it by removing the padding character).

            Example usage:

            ```py
            >>> encoding.base32
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='=', padding='include')
            >>> encoding.base32.nopad()
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='=')
            >>> encoding.base32.nopad(allow=False)
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False))
            ```
        """
        options = dict(padding="ignore", pad_char=self.pad_char if allow else None)
        return self.with_options(**options)

    def with_pad_char(self, pad_char: Optional[str]) -> "FixcharBaseEncoding":
        """
            Returns a copy of this encoding with a different padding character
            (or without a padding character if `pad_char` is `None`).

            Example usage:

            ```py
            >>> encoding.base32
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='=', padding='include')
            >>> encoding.base32.with_pad_char("~")
            FixcharBaseEncoding(
                StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                               case_sensitive=False),
                pad_char='~', padding='include')
            ```

        """
        options: Dict[str, Any] = dict(pad_char=pad_char)
        if pad_char is None:
            options["padding"] = "ignore"
        return self.with_options(**options)

    def pad_string(self, s: str) -> str:
        """
            If no padding character is specified for this encoding, returns the input string unchanged.
            If a padding character is specified for this encoding, pads the input string by appending the
            minimum number of padding characters necessary to make its length an integral multiple of the
            block char size (given by `FixcharBaseEncoding.block_nchars`).

            The value of `FixcharBaseEncoding.padding` is irrelevant to this method.
        """
        if not isinstance(s, str):
            raise TypeError()
        pad_char = self.pad_char
        block_nchars = self._block_nchars
        # no padding available for this encoding scheme
        if pad_char is None:
            return s
        # padding available, but no need for padding
        if len(s)%block_nchars == 0:
            return s
        # compute require padding length
        pad_len = block_nchars-(len(s)%block_nchars)
        # return padded string
        return s+pad_char*pad_len

    def strip_string(self, s: str) -> str:
        """
            If no padding character is specified for this encoding, returns the input string unchanged.
            If a padding character is specified for this encoding, strips the input string of any padding
            characters it might have to the right.
            If `FixcharBaseEncoding.padding` is set to `"require"`, checks that the correct number of
            padding characters were included and raises `bases.encoding.errors.PaddingError` if not.
        """
        if not isinstance(s, str):
            raise TypeError()
        pad_char = self.pad_char
        case_sensitive = self.case_sensitive
        block_nchars = self._block_nchars
        # no padding available for this encoding scheme
        if pad_char is None:
            return s
        # padding character(s) to strip from the right of the string
        pad_chars = pad_char
        if not case_sensitive:
            pad_chars += pad_char.lower()+pad_char.upper()
        # strip padding from string
        s_stripped = s.rstrip(pad_chars)
        # if padding is required on decoding, check the correct amount was included
        if self.require_padding:
            padding = len(s)-len(s_stripped)
            extra_nchars = len(s_stripped)%block_nchars
            expected_padding = 0 if extra_nchars == 0 else block_nchars-extra_nchars
            if padding != expected_padding:
                raise PaddingError(padding, expected_padding)
        return s_stripped

    def canonical_bytes(self, b: bytes) -> bytes:
        self._validate_bytes(b)
        return b

    def canonical_string(self, s: str) -> str:
        if self.include_padding:
            return self.pad_string(s)
        return self.strip_string(s)

    def _validate_string(self, s: str) -> str:
        s = self.strip_string(s)
        return super()._validate_string(s)

    def _encode(self, b: bytes) -> str:
        alphabet = self.alphabet
        base = self.base
        char_nbits = self.char_nbits
        effective_base = self.effective_base
        # bytes as unsigned integer
        i = int.from_bytes(b, byteorder="big")
        # add padding bits (align to integral number of characters)
        nchars, extra_nbits = divmod((8*len(b)), char_nbits)
        if extra_nbits > 0:
            i <<= char_nbits-extra_nbits # pad bits set to 0
            nchars += 1
        # compute characters in reverse order
        revchars: List[str] = []
        for _ in range(nchars):
            # extract next digit by consuming rightmost char_nbits
            # Same as: d = i % (2**char_nbits); i >>= char_nbits
            i, d = divmod(i, effective_base)
            # ensure digit is valid for actual base (number of characters in the alphabet)
            if not d < base:
                raise InvalidDigitError(d, base)
            # add the next character to the list
            revchars.append(alphabet[d])
        # join characters, pad string (if padding is to be included) and return
        s = "".join(reversed(revchars))
        if not self.include_padding:
            return s
        return self.pad_string(s)

    def _decode(self, s: str) -> bytes:
        base = self.base
        char_nbits = self.char_nbits
        alphabet_revdir = self.alphabet.revdir
        # decode string into unsigned integer
        i = 0
        for c in s:
            d = alphabet_revdir[c]
            i = i*base + d
        # remove padding bits (ensure that there are not too many and that they are all set to zero)
        original_nbytes, extra_nbits = divmod((char_nbits*len(s)), 8)
        if extra_nbits >= char_nbits:
            raise DecodingError(f"More pad bits found ({extra_nbits}) than bits per character ({char_nbits}).")
        if extra_nbits > 0:
            i, pad_bits = divmod(i, 2**extra_nbits)
            if pad_bits != 0:
                raise DecodingError("Pad bits must be zero.")
        # convert unsigned integer into the required number of bytes (zero-pad to the left)
        return i.to_bytes(length=original_nbytes, byteorder="big")

    def options(self, skip_defaults: bool = False) -> Mapping[str, Any]:
        options: Dict[str, Any] = {}
        if not skip_defaults or self._init_char_nbits != "auto":
            options["char_nbits"] = self._init_char_nbits
        if not skip_defaults or self.pad_char is not None:
            options["pad_char"] = self.pad_char
        if not skip_defaults or self.padding != "ignore":
            options["padding"] = self.padding
        return options

Ancestors

Instance variables

var block_nchars : int

Number of characters in a block.

Expand source code
@property
def block_nchars(self) -> int:
    """
        Number of characters in a block.
    """
    return self._block_nchars
var char_nbits : int

Number of bits per character.

Expand source code
@property
def char_nbits(self) -> int:
    """
        Number of bits per character.
    """
    return self._char_nbits
var effective_base : int

Effective base used when decoding is 2**char_nbits.

Expand source code
@property
def effective_base(self) -> int:
    """
        Effective base used when decoding is `2**char_nbits`.
    """
    effective_base: int = 2**self.char_nbits
    return effective_base
var include_padding : bool

Whether padding is included on encoding (derived from FixcharBaseEncoding.padding).

Expand source code
@property
def include_padding(self) -> bool:
    """
        Whether padding is included on encoding (derived from `FixcharBaseEncoding.padding`).
    """
    return self.padding in ("include", "require")
var pad_char : Optional[str]

An optional character to be used for padding of encoded strings. In rfc4648, this is "=" for both base64 and base32.

Expand source code
@property
def pad_char(self) -> Optional[str]:
    """
        An optional character to be used for padding of encoded strings.
        In [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html), this is `"="` for both base64 and base32.
    """
    return self._pad_char
var padding : Literal['ignore', 'include', 'require']

Padding style:

  • "ignore": no padding included on encoding, no padding required on decoding
  • "include": padding included on encoding, no padding required on decoding
  • "require": padding included on encoding, padding required on decoding
Expand source code
@property
def padding(self) -> PaddingOptions:
    """
        Padding style:

        - `"ignore"`: no padding included on encoding, no padding required on decoding
        - `"include"`: padding included on encoding, no padding required on decoding
        - `"require"`: padding included on encoding, padding required on decoding
    """
    return self._padding
var require_padding : bool

Whether padding is required on decoding (derived from FixcharBaseEncoding.padding).

Expand source code
@property
def require_padding(self) -> bool:
    """
        Whether padding is required on decoding (derived from `FixcharBaseEncoding.padding`).
    """
    return self.padding == "require"

Methods

def nopad(self, allow: bool = True) ‑> FixcharBaseEncoding

Returns a copy of this encoding which does not include/require paddding (and optionally disallows it by removing the padding character).

Example usage:

>>> encoding.base32
FixcharBaseEncoding(
    StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                   case_sensitive=False),
    pad_char='=', padding='include')
>>> encoding.base32.nopad()
FixcharBaseEncoding(
    StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                   case_sensitive=False),
    pad_char='=')
>>> encoding.base32.nopad(allow=False)
FixcharBaseEncoding(
    StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                   case_sensitive=False))
Expand source code
def nopad(self, allow: bool = True) -> "FixcharBaseEncoding":
    """
        Returns a copy of this encoding which does not include/require paddding
        (and optionally disallows it by removing the padding character).

        Example usage:

        ```py
        >>> encoding.base32
        FixcharBaseEncoding(
            StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                           case_sensitive=False),
            pad_char='=', padding='include')
        >>> encoding.base32.nopad()
        FixcharBaseEncoding(
            StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                           case_sensitive=False),
            pad_char='=')
        >>> encoding.base32.nopad(allow=False)
        FixcharBaseEncoding(
            StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                           case_sensitive=False))
        ```
    """
    options = dict(padding="ignore", pad_char=self.pad_char if allow else None)
    return self.with_options(**options)
def pad(self, require: bool = False) ‑> FixcharBaseEncoding

Returns a copy of this encoding which includes paddding (and optionally requires it).

Example usage, from "include" to "require":

>>> encoding.base32
FixcharBaseEncoding(
    StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                   case_sensitive=False),
    pad_char='=', padding='include')
>>> encoding.base32.pad(require=True)
FixcharBaseEncoding(
    StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                   case_sensitive=False),
    pad_char='=', padding='require')

Example usage, from "ignore" to "include":

>>> encoding.base32z
FixcharBaseEncoding(
    StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769',
                   case_sensitive=False))
>>> encoding.base32z.with_pad_char("=")
FixcharBaseEncoding(
    StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769',
                   case_sensitive=False),
    pad_char='=')
>>> encoding.base32z.with_pad_char("=").pad()
FixcharBaseEncoding(
    StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769',
                   case_sensitive=False),
    pad_char='=', padding='include')
Expand source code
def pad(self, require: bool = False) -> "FixcharBaseEncoding":
    """
        Returns a copy of this encoding which includes paddding (and optionally requires it).

        Example usage, from `"include"` to `"require"`:

        ```py
        >>> encoding.base32
        FixcharBaseEncoding(
            StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                           case_sensitive=False),
            pad_char='=', padding='include')
        >>> encoding.base32.pad(require=True)
        FixcharBaseEncoding(
            StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                           case_sensitive=False),
            pad_char='=', padding='require')
        ```

        Example usage, from `"ignore"` to `"include"`:

        ```py
        >>> encoding.base32z
        FixcharBaseEncoding(
            StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769',
                           case_sensitive=False))
        >>> encoding.base32z.with_pad_char("=")
        FixcharBaseEncoding(
            StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769',
                           case_sensitive=False),
            pad_char='=')
        >>> encoding.base32z.with_pad_char("=").pad()
        FixcharBaseEncoding(
            StringAlphabet('ybndrfg8ejkmcpqxot1uwisza345h769',
                           case_sensitive=False),
            pad_char='=', padding='include')
        ```
    """
    options = dict(padding="require" if require else "include")
    return self.with_options(**options)
def pad_string(self, s: str) ‑> str

If no padding character is specified for this encoding, returns the input string unchanged. If a padding character is specified for this encoding, pads the input string by appending the minimum number of padding characters necessary to make its length an integral multiple of the block char size (given by FixcharBaseEncoding.block_nchars).

The value of FixcharBaseEncoding.padding is irrelevant to this method.

Expand source code
def pad_string(self, s: str) -> str:
    """
        If no padding character is specified for this encoding, returns the input string unchanged.
        If a padding character is specified for this encoding, pads the input string by appending the
        minimum number of padding characters necessary to make its length an integral multiple of the
        block char size (given by `FixcharBaseEncoding.block_nchars`).

        The value of `FixcharBaseEncoding.padding` is irrelevant to this method.
    """
    if not isinstance(s, str):
        raise TypeError()
    pad_char = self.pad_char
    block_nchars = self._block_nchars
    # no padding available for this encoding scheme
    if pad_char is None:
        return s
    # padding available, but no need for padding
    if len(s)%block_nchars == 0:
        return s
    # compute require padding length
    pad_len = block_nchars-(len(s)%block_nchars)
    # return padded string
    return s+pad_char*pad_len
def strip_string(self, s: str) ‑> str

If no padding character is specified for this encoding, returns the input string unchanged. If a padding character is specified for this encoding, strips the input string of any padding characters it might have to the right. If FixcharBaseEncoding.padding is set to "require", checks that the correct number of padding characters were included and raises PaddingError if not.

Expand source code
def strip_string(self, s: str) -> str:
    """
        If no padding character is specified for this encoding, returns the input string unchanged.
        If a padding character is specified for this encoding, strips the input string of any padding
        characters it might have to the right.
        If `FixcharBaseEncoding.padding` is set to `"require"`, checks that the correct number of
        padding characters were included and raises `bases.encoding.errors.PaddingError` if not.
    """
    if not isinstance(s, str):
        raise TypeError()
    pad_char = self.pad_char
    case_sensitive = self.case_sensitive
    block_nchars = self._block_nchars
    # no padding available for this encoding scheme
    if pad_char is None:
        return s
    # padding character(s) to strip from the right of the string
    pad_chars = pad_char
    if not case_sensitive:
        pad_chars += pad_char.lower()+pad_char.upper()
    # strip padding from string
    s_stripped = s.rstrip(pad_chars)
    # if padding is required on decoding, check the correct amount was included
    if self.require_padding:
        padding = len(s)-len(s_stripped)
        extra_nchars = len(s_stripped)%block_nchars
        expected_padding = 0 if extra_nchars == 0 else block_nchars-extra_nchars
        if padding != expected_padding:
            raise PaddingError(padding, expected_padding)
    return s_stripped
def with_pad_char(self, pad_char: Optional[str]) ‑> FixcharBaseEncoding

Returns a copy of this encoding with a different padding character (or without a padding character if pad_char is None).

Example usage:

>>> encoding.base32
FixcharBaseEncoding(
    StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                   case_sensitive=False),
    pad_char='=', padding='include')
>>> encoding.base32.with_pad_char("~")
FixcharBaseEncoding(
    StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                   case_sensitive=False),
    pad_char='~', padding='include')
Expand source code
def with_pad_char(self, pad_char: Optional[str]) -> "FixcharBaseEncoding":
    """
        Returns a copy of this encoding with a different padding character
        (or without a padding character if `pad_char` is `None`).

        Example usage:

        ```py
        >>> encoding.base32
        FixcharBaseEncoding(
            StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                           case_sensitive=False),
            pad_char='=', padding='include')
        >>> encoding.base32.with_pad_char("~")
        FixcharBaseEncoding(
            StringAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567',
                           case_sensitive=False),
            pad_char='~', padding='include')
        ```

    """
    options: Dict[str, Any] = dict(pad_char=pad_char)
    if pad_char is None:
        options["padding"] = "ignore"
    return self.with_options(**options)

Inherited members