--- title: Base dataset keywords: fastai sidebar: home_sidebar summary: "Base class for dataset module." description: "Base class for dataset module." nb_path: "nbs/datasets/base.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %}

AbstractDataset

v1

{% raw %}

class AbstractDataset[source]

AbstractDataset(args)

{% endraw %} {% raw %}
{% endraw %}

v2

{% raw %}

class AbstractDatasetv2[source]

AbstractDatasetv2(args)

{% endraw %} {% raw %}
{% endraw %}

Dataset

SessionDataset

v1

{% raw %}

class SessionDataset[source]

SessionDataset(root, process_method, min_date=None, session_length=None, min_session_length=None, min_item_support=None, num_slices=None, days_offset=None, days_shift=None, days_train=None, days_test=None, data=None) :: Dataset

Session dataset base class.

Args: root (string): Root directory where the dataset should be saved. process_method (string): last: last day => test set last_min_date: last day => test set, but from a minimal date onwards days_test: last N days => test set slice: create multiple train-test-combinations with a sliding window approach min_date (string): Minimum date session_length (int): Session time length :default = 30 * 60 #30 minutes min_session_length (int): Minimum number of items for a session to be valid min_item_support (int): Minimum number of interactions for an item to be valid num_slices (int): Offset in days from the first date in the data set days_offset (int): Number of days the training start date is shifted after creating one slice days_shift (int): Days shift days_train (int): Days in train set in each slice days_test (int): Days in test set in each slice

{% endraw %} {% raw %}
{% endraw %}

v2

{% raw %}

class SessionDatasetv2[source]

SessionDatasetv2(root, column_names) :: Dataset

Dataset base class

{% endraw %} {% raw %}
{% endraw %}

v3

{% raw %}

class SessionDatasetv3[source]

SessionDatasetv3(root) :: Dataset

Dataset base class

{% endraw %} {% raw %}
{% endraw %}

GraphData

{% raw %}

class GraphData[source]

GraphData(data, shuffle=False, graph=None)

{% endraw %} {% raw %}
{% endraw %} {% raw %}
train_data = ([[1, 2, 3], [2, 3, 4], [1, 2, 4], [2, 3], [1]], 
              [4, 5, 5, 4, 2])

tds = GraphData(train_data, shuffle=False)
print(tds.generate_batch(1))
print(tds.generate_batch(2))
print(tds.inputs)

tds = GraphData(train_data, shuffle=True)
print(tds.generate_batch(1))
print(tds.generate_batch(2))
print(tds.inputs)
[array([0]), array([1]), array([2]), array([3]), array([4])]
[array([0, 1]), array([2, 3]), array([4])]
[[1 2 3]
 [2 3 4]
 [1 2 4]
 [2 3 0]
 [1 0 0]]
[array([0]), array([1]), array([2]), array([3]), array([4])]
[array([0, 1]), array([2, 3]), array([4])]
[[1 2 4]
 [2 3 4]
 [1 0 0]
 [2 3 0]
 [1 2 3]]
{% endraw %}