Source code for hutch.security.sumologic.search

"""Provides SumoLogic search wrappers."""

import datetime
import time
from typing import Iterable

from hutch.security.sumologic import base
from hutch.security.sumologic.constants import (
    PAGE_SIZE,
    SEARCH_STATE_CANCELLED,
    SEARCH_STATE_DONE,
)
from hutch.security.sumologic.exceptions import SearchException, SearchTimeout
from hutch.security.sumologic.models import SearchJob, SearchJobMessage, SearchJobRecord


[docs]class Client(base.Client): """Provides a SumoLogic search client."""
[docs] def records(self, job_id: str) -> Iterable[SearchJobRecord]: """Yields records until there are none left. :param job_id: The search job identifier to return records for. """ offset = 0 while True: result = self.client.search_job_records( {"id": job_id}, limit=PAGE_SIZE, offset=offset ) # We can't deserialise directly into the model here as we want to unnest all # data from under 'map'. count = len(result.get("records", [])) offset += count records = [] for record in result.get("records", []): records.append(SearchJobRecord(**record["map"])) # Loop until we receive less results than we requested. if count == PAGE_SIZE: yield records else: break # Yield the final page. yield records
[docs] def messages(self, job_id: str) -> Iterable[SearchJobMessage]: """Yields messages until there are none left. :param job_id: The search job identifier to return messages for. """ offset = 0 while True: result = self.client.search_job_messages( {"id": job_id}, limit=PAGE_SIZE, offset=offset, ) # We can't deserialise directly into the model here as we want to unnest all # data from under 'map'. count = len(result.get("messages", [])) offset += count messages = [] for message in result.get("messages", []): messages.append(SearchJobMessage(**message["map"])) # Loop until we receive less messages than we requested. if count == PAGE_SIZE: yield messages else: break # Yield the final page. yield messages
[docs] def query( self, query: str, start: datetime.datetime, end: datetime.datetime, timeout: int = 600, interval: int = 10, ) -> SearchJob: """Syncronously execute a query and return the results. :param query: A search query to execute. :param start: A date stamp to constrain the query (from). :param end: A date stamp to constrain the query (to). :param timeout: The maximum duration to wait for results from the API, in seconds. :param interval: The time to wait between requests to the API to check query status, in seconds. :return: Search metadata. """ time_start = time.time() epoch = datetime.datetime.utcfromtimestamp(0).replace( tzinfo=datetime.timezone.utc ) # Queries are submitted with time components as miliseconds since epoch to avoid # the need to parse and pass a timezone to the API. job = self.client.search_job( query, fromTime=int((start - epoch).total_seconds() * 1000), toTime=int((end - epoch).total_seconds() * 1000), ) # Poll the job status until timeout, or the API indicates the job is complete. while True: status = self.client.search_job_status(job) if status.get("state") == SEARCH_STATE_DONE: break if status.get("state") == SEARCH_STATE_CANCELLED: break # Wait and check for timeout. time.sleep(interval) if time.time() - time_start > timeout: raise SearchTimeout( "Search did not complete before a client-side timeout was reached." ) # Raise errors as exceptions. errors = status.get("pendingErrors", []) if len(errors) > 0: raise SearchException(f"Search query returned errors: {', '.join(errors)}") return SearchJob(id=job.get("id"), **status)