Module shaystack.haysonparser

Parse Json file conform with the specification describe here (https://www.project-haystack.org/doc/Json) and produce a Grid instance.

Expand source code
# -*- coding: utf-8 -*-
# JSON Grid Parser
# See the accompanying LICENSE file.
# (C) 2018 VRT Systems
# (C) 2021 Engie Digital
#
# vim: set ts=4 sts=4 et tw=78 sw=4 si:

"""
Parse Json file conform with the specification describe
here (https://www.project-haystack.org/doc/Json)
and produce a `Grid` instance.
"""

import copy
import datetime
import functools
import json
import re
import sys
from typing import Any, List, Dict, Union

import iso8601

from .datatypes import Quantity, Coordinate, Ref, Bin, Uri, \
    MARKER, NA, REMOVE, XStr
from .grid import Grid
from .metadata import MetadataObject
from .type import Entity
from .version import LATEST_VER, Version
from .zoneinfo import timezone

URI_META = 'Uri'
GRID_SEP = re.compile(r'\n\n+')

# Type regular expressions
MARKER_STR = 'Marker'
NA_STR = 'NA'
REMOVE_STR = 'Remove'
NUMBER_STR = 'Num'
REF = 'Ref'
DATE = 'Date'
DATE_RE = re.compile(r'^(\d{4})-(\d{2})-(\d{2})$', flags=re.MULTILINE)
TIME = 'Time'
TIME_RE = re.compile(r'^(\d{2}):(\d{2})(:?:(\d{2}(:?\.\d+)?))?$',
                     flags=re.MULTILINE)
DATETIME = 'DateTime'
DATETIME_RE = re.compile(r'^(\d{4}-\d{2}-\d{2}T'
                         r'\d{2}:\d{2}(:?:\d{2}(:?\.\d+)?)'
                         r'(:?[zZ]|[+\-]\d+:?\d*))(:? ([A-Za-z\-+_0-9]+))?$',
                         flags=re.MULTILINE)

URI = 'Uri'
BIN = 'Bin'
COORD = 'Coord'


def _parse_metadata(meta: Entity, version: Version) -> MetadataObject:
    metadata = MetadataObject()
    for name, value in meta.items():
        metadata[name] = _parse_embedded_scalar(value, version=version)
    return metadata


def _parse_cols(grid: Grid, parsed: List[Entity], version: Version) -> None:
    for col in parsed:
        name = col.pop('name')
        meta = {}
        for key, value in col.items():
            value = _parse_embedded_scalar(value, version=version)
            if value is not None:
                meta[key] = value
        grid.column[name] = meta


def _parse_row(row: Dict[str, Any], version: Version) -> Entity:
    parsed_row = {}
    for col, value in row.items():
        value = _parse_embedded_scalar(value, version=version)
        if value is not None:
            parsed_row[col] = value
    return parsed_row


def _parse_embedded_scalar(scalar: Union[None, List, Dict, str],
                           version: Version = LATEST_VER) -> Any:  # pylint: disable=too-many-locals
    if isinstance(scalar, list):
        # We support this only in version 3.0 and up.
        return list(map(functools.partial(parse_scalar, version=version),
                        scalar))

    if isinstance(scalar, dict):
        kind = scalar.get('_kind')
        if kind:

            if kind == MARKER_STR:
                return MARKER
            if kind == NA_STR:
                return NA
            if kind == REMOVE_STR:
                return REMOVE
            if kind == NUMBER_STR:
                value = scalar.get('val')
                if value == 'INF':
                    return float('INF')
                if value == '-INF':
                    return -float('INF')
                if value == 'NaN':
                    return float('nan')
                if scalar.get('unit'):
                    return Quantity(value, scalar.get('unit'))
                return Quantity(value)
            # Conversion to dict of float value turn them into float
            # so regex won't work... better just return them
            if isinstance(scalar, (float, int)):
                return scalar


            # Is it a xstr?
            if kind == 'XStr':
                return XStr(scalar.get('type'), scalar.get('val'))

            # Is it a reference?
            if kind == REF:
                return Ref(scalar.get('val'), scalar.get('dis'))

            # Is it a date?
            if kind == DATE:
                match = DATE_RE.match(scalar.get('val'))
                (year, month, day) = match.groups()
                return datetime.date(year=int(year), month=int(month), day=int(day))

            # Is it a time?
            if kind == TIME:
                match = TIME_RE.match(scalar.get('val'))
                if match:
                    (hour, minute, _, second, _) = match.groups()
                    # Convert second to seconds and microseconds
                    if second is None:
                        sec = 0
                        usec = 0
                    elif '.' in second:
                        (whole_sec, frac_sec) = second.split('.', 1)
                        sec = int(whole_sec)
                        usec = int(frac_sec[:6].ljust(6, '0'))
                    else:
                        sec = int(second)
                        usec = 0
                    return datetime.time(hour=int(hour), minute=int(minute),
                                         second=sec, microsecond=usec)

            # Is it a date/time?
            if kind == DATETIME:
                match = DATETIME_RE.match(scalar.get('val'))
                if match:
                    matches = match.groups()
                    # Parse ISO8601 component
                    iso_date = iso8601.parse_date(matches[0])
                    # Parse timezone
                    tzname = scalar.get('tz')
                    if tzname is None:
                        return iso_date  # No timezone given
                    try:
                        time_zone = timezone(tzname)
                        return iso_date.astimezone(time_zone)
                    except TypeError:  # noqa: E722 pragma: no cover
                        # Unlikely code path.
                        return iso_date

            # Is it a URI?
            if kind == URI:
                return Uri(scalar.get('val'))

            # Is it a Bin?
            if kind == BIN:
                return Bin(scalar.get('val'))

            # Is it a co-ordinate?
            if kind == COORD:
                return Coordinate(float(scalar.get('lat')), scalar.get('lng'))
            return scalar
        # We support this only in version 3.0 and up.
        if sys.version_info[0] < 3 and {"meta", "cols", "rows"} <= scalar.viewkeys() \
                or {"meta", "cols", "rows"} <= scalar.keys():  # Check if grid in grid
            return parse_grid(scalar)
        return {k: parse_scalar(v, version=version) for (k, v) in scalar.items()}

    return scalar

def parse_scalar(scalar: Union[str, bool, float, int, list, dict], version: Version = LATEST_VER) -> Any:
    """
    Parse a scalar.
    Args:
        scalar: The string with the scalar value.
        version: The Haysack version
    Returns:
        The scalar value.
    """
    if scalar is None:
        return None
    if isinstance(scalar, (bool, float, int)):
        return scalar
    if isinstance(scalar, str) and \
            (len(scalar) >= 2) and \
            (scalar[0] in ('"', '[', '{')) and \
            (scalar[-1] in ('"', ']', '}')):
        scalar = json.loads(scalar)

    return _parse_embedded_scalar(scalar, version=version)


def parse_grid(grid_str: Union[str, Dict[str, Any]]) -> Grid:
    """
    Parse a grid from json string.
    Args:
        grid_str: The json string
    Returns:
        The corresponding grid.
    """
    if isinstance(grid_str, str):
        parsed = json.loads(grid_str)
    else:
        parsed = copy.deepcopy(grid_str)
    meta = parsed.pop('meta')
    # Decode version
    version = Version(meta.pop('ver'))

    # Parse the remaining elements
    metadata = _parse_metadata(meta, version)

    grid = Grid(version=version, metadata=metadata)

    # Grab the columns in the order given
    _parse_cols(grid, parsed.pop('cols'), version)

    # Parse the rows
    for row in (parsed.pop('rows', []) or []):
        parsed_row = _parse_row(row, version)
        grid.append(parsed_row)

    return grid

Functions

def parse_grid(grid_str: Union[str, Dict[str, Any]]) ‑> shaystack.grid.Grid

Parse a grid from json string.

Args

grid_str
The json string

Returns

The corresponding grid.

Expand source code
def parse_grid(grid_str: Union[str, Dict[str, Any]]) -> Grid:
    """
    Parse a grid from json string.
    Args:
        grid_str: The json string
    Returns:
        The corresponding grid.
    """
    if isinstance(grid_str, str):
        parsed = json.loads(grid_str)
    else:
        parsed = copy.deepcopy(grid_str)
    meta = parsed.pop('meta')
    # Decode version
    version = Version(meta.pop('ver'))

    # Parse the remaining elements
    metadata = _parse_metadata(meta, version)

    grid = Grid(version=version, metadata=metadata)

    # Grab the columns in the order given
    _parse_cols(grid, parsed.pop('cols'), version)

    # Parse the rows
    for row in (parsed.pop('rows', []) or []):
        parsed_row = _parse_row(row, version)
        grid.append(parsed_row)

    return grid
def parse_scalar(scalar: Union[str, bool, float, int, list, dict], version: shaystack.version.Version = <shaystack.version.Version object>) ‑> Any

Parse a scalar.

Args

scalar
The string with the scalar value.
version
The Haysack version

Returns

The scalar value.

Expand source code
def parse_scalar(scalar: Union[str, bool, float, int, list, dict], version: Version = LATEST_VER) -> Any:
    """
    Parse a scalar.
    Args:
        scalar: The string with the scalar value.
        version: The Haysack version
    Returns:
        The scalar value.
    """
    if scalar is None:
        return None
    if isinstance(scalar, (bool, float, int)):
        return scalar
    if isinstance(scalar, str) and \
            (len(scalar) >= 2) and \
            (scalar[0] in ('"', '[', '{')) and \
            (scalar[-1] in ('"', ']', '}')):
        scalar = json.loads(scalar)

    return _parse_embedded_scalar(scalar, version=version)