--- title: Data Loader keywords: fastai sidebar: home_sidebar summary: "Generic data ingestion routines to ingest data from files to databases." description: "Generic data ingestion routines to ingest data from files to databases." nb_path: "01_data.loader.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %}

Decorator

{% raw %}

auto_str[source]

auto_str()

Auto generate str

{% endraw %} {% raw %}
{% endraw %}

Attribute Delegation

{% raw %}

class GetAttr[source]

GetAttr()

Inherit from this to have all attr accesses in self._xtra passed down to self.default

{% endraw %} {% raw %}
{% endraw %}

ObjectFactor, DbSinkProvider and FileSourceProvider are the factory classes.

{% raw %}

class ObjectFactory[source]

ObjectFactory()

Generic object factory

{% endraw %} {% raw %}
{% endraw %} {% raw %}

class DbTargetProvider[source]

DbTargetProvider() :: ObjectFactory

Database provider

{% endraw %} {% raw %}
{% endraw %} {% raw %}

class FileSourceProvider[source]

FileSourceProvider() :: ObjectFactory

Supported file sources

{% endraw %} {% raw %}
{% endraw %}

DatabaseType and FileSource classes.

{% raw %}

DatabaseTarget[source]

Enum = [PostgreSQL, MySQL]

An enumeration.

{% endraw %} {% raw %}
{% endraw %} {% raw %}

FileSource[source]

Enum = [CSV, Excel]

An enumeration.

{% endraw %} {% raw %}
{% endraw %}

Target Databases

PostgreSQL

{% raw %}

class PgSqlDbBuilder[source]

PgSqlDbBuilder()

PostgreSQL database builder.

{% endraw %} {% raw %}

class PgSqlDb[source]

PgSqlDb(host, port, db, user, password)

PostgreSQL database destination.

{% endraw %} {% raw %}
{% endraw %}

MySQL

{% raw %}

class MySqlDbBuilder[source]

MySqlDbBuilder()

MySQL database builder.

{% endraw %} {% raw %}

class MySqlDb[source]

MySqlDb(host, port, db, user, password)

MySQL database destination.

{% endraw %} {% raw %}
{% endraw %}

Supported Sources

{% raw %}

create_excel_file_source[source]

create_excel_file_source(file_path, **args)

Create Excel file source.

{% endraw %} {% raw %}

create_csv_file_source[source]

create_csv_file_source(file_path, **args)

Create CSV file source.

{% endraw %} {% raw %}

class ExcelSource[source]

ExcelSource(file_path, **args)

Excel file source.

{% endraw %} {% raw %}

class CSVSource[source]

CSVSource(file_path, **args)

CSV file source.

{% endraw %} {% raw %}
{% endraw %}

Ingestion

{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}

ingest[source]

ingest(file_source, target_db, table_name, if_exists='append', method='multi', schema=None)

Ingest the file into the database table.

{% endraw %} {% raw %}
{% endraw %} {% raw %}
excel_source = file_sources.get(FileSource.Excel, file_path="data/accounts.xlsx")
excel_source.get_data()
user_id username password email created_on last_login
0 1 user1 user11 user1@abc.com 2020-12-06 04:00:00.000003 2020-12-07 13:00:00.000003
1 2 user2 user22 user2@abc.com 2020-12-06 04:00:00.000003 2020-12-07 13:00:00.000003
2 3 user3 user33 user3@abc.com 2020-12-06 04:00:00.000003 2020-12-07 13:00:00.000003
3 4 user4 user44 user4@abc.com 2020-12-06 04:00:00.000003 2020-12-07 13:00:00.000003
4 5 user5 user55 user5@abc.com 2020-12-06 04:00:00.000003 2020-12-07 13:00:00.000003
... ... ... ... ... ... ...
95 96 user96 user9696 user96@abc.com 2020-12-06 04:00:00.000003 2020-12-07 13:00:00.000003
96 97 user97 user9797 user97@abc.com 2020-12-06 04:00:00.000003 2020-12-07 13:00:00.000003
97 98 user98 user9898 user98@abc.com 2020-12-06 04:00:00.000003 2020-12-07 13:00:00.000003
98 99 user99 user9999 user99@abc.com 2020-12-06 04:00:00.000003 2020-12-07 13:00:00.000003
99 100 user100 user100100 user100@abc.com 2020-12-06 04:00:00.000003 2020-12-07 13:00:00.000003

100 rows × 6 columns

{% endraw %} {% raw %}
csv_source = file_sources.get(FileSource.CSV, file_path="data/accounts.csv")
csv_source.get_data()
user_id username password email created_on last_login
0 1 user1 user11 user1@abc.com 2020-12-06 04:00:00 2020-12-07 13:00:00
1 2 user2 user22 user2@abc.com 2020-12-06 04:00:00 2020-12-07 13:00:00
2 3 user3 user33 user3@abc.com 2020-12-06 04:00:00 2020-12-07 13:00:00
3 4 user4 user44 user4@abc.com 2020-12-06 04:00:00 2020-12-07 13:00:00
4 5 user5 user55 user5@abc.com 2020-12-06 04:00:00 2020-12-07 13:00:00
... ... ... ... ... ... ...
95 96 user96 user9696 user96@abc.com 2020-12-06 04:00:00 2020-12-07 13:00:00
96 97 user97 user9797 user97@abc.com 2020-12-06 04:00:00 2020-12-07 13:00:00
97 98 user98 user9898 user98@abc.com 2020-12-06 04:00:00 2020-12-07 13:00:00
98 99 user99 user9999 user99@abc.com 2020-12-06 04:00:00 2020-12-07 13:00:00
99 100 user100 user100100 user100@abc.com 2020-12-06 04:00:00 2020-12-07 13:00:00

100 rows × 6 columns

{% endraw %} {% raw %}
config = {
    'host': 'localhost',
    'port': 5432,
    'db': 'testdb',
    'user': 'user1',
    'password': 'userpwd'
}
pgsql_target = db_targets.get(DatabaseTarget.PostgreSQL, **config)
pgsql_target.get_conn_str()
'postgresql+psycopg2://user1:userpwd@localhost:5432/testdb'
{% endraw %} {% raw %}
ingest(excel_source, pgsql_target, 'accounts')
2020-12-12 21:33:38,062 INFO(): {'user_id': INTEGER(), 'username': VARCHAR(length=50), 'password': VARCHAR(length=50), 'email': VARCHAR(length=255), 'created_on': TIMESTAMP(), 'last_login': TIMESTAMP()}
Total records in data/accounts.xlsx - 100
user_id - 100
username - 100
password - 100
email - 100
created_on - 1
last_login - 1
{% endraw %} {% raw %}
ingest(excel_source, pgsql_target, 'accounts', if_exists='replace')
2020-12-12 21:33:38,214 INFO(): {'user_id': INTEGER(), 'username': VARCHAR(length=50), 'password': VARCHAR(length=50), 'email': VARCHAR(length=255), 'created_on': TIMESTAMP(), 'last_login': TIMESTAMP()}
Total records in data/accounts.xlsx - 100
user_id - 100
username - 100
password - 100
email - 100
created_on - 1
last_login - 1
{% endraw %} {% raw %}
config = {
    'host': 'localhost',
    'port': 3306,
    'db': 'testdb',
    'user': 'user1',
    'password': 'userpwd'
}
mysql_target = db_targets.get(DatabaseTarget.MySQL, **config)
mysql_target.get_conn_str()
'mysql+pymysql://user1:userpwd@localhost:3306/testdb?charset=utf8mb4'
{% endraw %} {% raw %}
ingest(excel_source, mysql_target, 'accounts')
2020-12-12 21:33:38,710 INFO(): {'user_id': INTEGER(), 'username': VARCHAR(length=50), 'password': VARCHAR(length=50), 'email': VARCHAR(length=255), 'created_on': TIMESTAMP(), 'last_login': TIMESTAMP()}
Total records in data/accounts.xlsx - 100
user_id - 100
username - 100
password - 100
email - 100
created_on - 1
last_login - 1
{% endraw %} {% raw %}
ingest(excel_source, mysql_target, 'accounts', if_exists='replace')
2020-12-12 21:33:39,131 INFO(): {'user_id': INTEGER(), 'username': VARCHAR(length=50), 'password': VARCHAR(length=50), 'email': VARCHAR(length=255), 'created_on': TIMESTAMP(), 'last_login': TIMESTAMP()}
Total records in data/accounts.xlsx - 100
user_id - 100
username - 100
password - 100
email - 100
created_on - 1
last_login - 1
{% endraw %}