"""Provides JupiterOne query wrappers."""
import json
import time
from typing import Any, Dict, Iterable, Optional
import jmespath
import requests
from hutch.security.jupiterone.constants import (
API_ENDPOINT,
API_ENDPOINT_GRAPHQL,
JMESPATH_J1QL_DATA,
QUERY_J1QL_DEFERRED,
STATUS_QUERY_IN_PROG,
)
from hutch.security.jupiterone.exceptions import QueryException, QueryTimeout
from hutch.security.jupiterone.models import (
DeferredQuery,
DeferredQueryStatus,
QueryResponse,
)
[docs]class Client:
"""Provides a JupiterOne query client."""
def __init__(self, account: str, token: str, api_url: str = API_ENDPOINT):
"""Initialise a JupiterOne query client.
:param account: The JupiterOne account ID to authenticate with.
:param token: The JupiterOne token to authenticate with.
:param api_url: The JupiterOne API endpoint to interact with.
"""
self.api = api_url.rstrip("/")
self.headers = {
"Authorization": f"Bearer {token}",
"LifeOmic-Account": account,
}
def _parse_graphql_errors(self, body: str) -> str:
"""Parses errors from a GraphQL response, returning a string.
This method accepts a string, rather than JSON, as it may be called in exception
handlers where attempts to call .json() on a response object would also need
to be handled.
:param response: The response from the API as a string.
:return: A string containing errors encountered.
"""
try:
response = json.loads(body)
errors = response.get("errors", [])
except json.JSONDecodeError:
return ""
return ", ".join(error.get("message") for error in errors)
def _get(
self,
uri: str,
headers: Optional[Dict[str, Any]] = None,
params: Optional[Dict[str, Any]] = None,
) -> requests.Response:
"""Performs an HTTP GET request.
This method wraps requests to ensure application / GraphQL errors are returned
as part of the exception.
:param url: URL to perform the HTTP GET against.
:param headers: Dictionary of headers to add to the request.
:param payload: Dictionary of data to pass as JSON in the request.
:param params: HTTP parameters to add to the request.
:return: A requests Response object.
"""
try:
response = requests.get(uri, headers=headers, params=params)
response.raise_for_status()
except requests.exceptions.RequestException as err:
raise QueryException(err) from None
# Parse out any application errors and raise an exception if present.
errors = self._parse_graphql_errors(response.text)
if errors:
raise QueryException(errors) from None
return response
def _post(
self,
uri: str,
headers: Optional[Dict[str, Any]] = None,
payload: Optional[Dict[str, Any]] = None,
params: Optional[Dict[str, Any]] = None,
) -> requests.Response:
"""Performs an HTTP POST request.
This method wraps requests to ensure application / GraphQL errors are returned
as part of the exception.
:param url: URL to perform the HTTP GET against.
:param headers: Dictionary of headers to add to the request.
:param payload: Dictionary of data to pass as JSON in the request.
:param params: HTTP parameters to add to the request.
:return: A requests Response object.
"""
try:
response = requests.post(uri, headers=headers, json=payload, params=params)
response.raise_for_status()
except requests.exceptions.RequestException as err:
raise QueryException(err) from None
# Parse out any application errors and raise an exception if present.
errors = self._parse_graphql_errors(response.text)
if errors:
raise QueryException(errors) from None
return response
[docs] def deferred_status(self, status_url: str) -> DeferredQueryStatus:
"""Returns the status of a deferred query.
:param status_url: The status URL of the deferred query.
:return: A deferred query status object.
"""
try:
status = self._get(status_url)
except QueryException as err:
raise QueryException(f"Failed to get results for query: {err}") from None
# Return the deferred query status.
return DeferredQueryStatus(status_url=status_url, **status.json())
[docs] def deferred(
self,
query: str,
cursor: str = "",
include_deleted: bool = False,
) -> DeferredQuery:
"""Performs a deferred query, returning a deferred query object.
:param query: The JupiterOne query to execute.
:param cursor: The cursor for the next page of results to fetch, returned from
a previous query.
:param include_deleted: Whether to include 'recently deleted' objects from
JupiterOne.
:return: A deferred query object.
"""
endpoint = f"{self.api}{API_ENDPOINT_GRAPHQL}"
# It's a little confusing, but the query provided by the user is actually a
# variable passed to the canned J1QL_DEFERRED GraphQL query. Additionally,
# all original parameters must be provided when cursing / paging over results.
payload = {
"query": QUERY_J1QL_DEFERRED,
"variables": {
"query": query,
"cursor": cursor,
"includeDeleted": include_deleted,
"deferredResponse": "FORCE",
},
}
# Schedule the query.
try:
schedule = self._post(endpoint, headers=self.headers, payload=payload)
except QueryException as err:
raise QueryException(f"Failed to submit query to J1: {err}") from None
# Return a deferred query object, rather than querying for status and returning
# a status object. This is done as the J1 API doesn't appear to allow returning
# the original query as part of a GraphQL response, and requires that the
# original query be specified when paging over results.
status = jmespath.search(JMESPATH_J1QL_DATA, schedule.json())
return DeferredQuery(
query=query,
include_deleted=include_deleted,
status_url=status.get("url"),
)
[docs] def query(
self,
query: str,
cursor: str = "",
interval: int = 5,
timeout: int = 300,
include_deleted: bool = False,
) -> QueryResponse:
"""Syncronously performs a JupiterOne search, returning a response object.
:param query: The JupiterOne query to execute.
:param cursor: The cursor to use when retrieving results, used for pagination.
:param timeout: The maximum time to wait for results (in seconds).
:param interval: The time to wait between requests to the API to check query
status, in seconds.
:param include_deleted: Whether to include 'recently deleted' objects from
JupiterOne.
:return: A Query response object.
"""
time_start = time.time()
# Schedule the query, and poll for status change. All queries use the deferred
# API, as otherwise long queries will timeout.
scheduled = self.deferred(query, cursor=cursor, include_deleted=include_deleted)
while True:
try:
status = self.deferred_status(scheduled.status_url)
except QueryException as err:
raise QueryException(f"Failed to get status for query: {err}") from None
# Check if the query has complete.
if status.status != STATUS_QUERY_IN_PROG:
break
# Wait and check for timeout.
time.sleep(interval)
if time.time() - time_start > timeout:
message = "Search did not complete before a client timeout was reached."
raise QueryTimeout(message) from None
# Fetch and return a results object. Note: this final request falls outside
# of the timeout block, as the results are ready, we just need to get them.
try:
results = self._get(status.url)
except QueryException as err:
raise QueryException(f"Failed to get results for query: {err}") from None
# Generate results and return to the caller. Note: It's up to the caller to
# paginate, to reduce memory footprint.
data = results.json()
return QueryResponse(
query=query,
count=data.get("totalCount"),
cursor=data.get("cursor", None),
results=data.get("data", []),
include_deleted=include_deleted,
)