Source code for etler.transform.cutter

from etler.load.logger import LoggingStream
import pandas as pd


[docs]def chunk( filename, chunkdir, chunksize, sep=",", chunk_prefix="chunk", index=False, should_log=True, logname="bad_lines.log", ): chunkdir.mkdir(parents=True, exist_ok=True) log = open(logname, "w") if should_log else None with LoggingStream(log, log): for idx, chunk in enumerate( pd.read_csv( filename, sep=sep, chunksize=chunksize, error_bad_lines=False, warn_bad_lines=True, ) ): chunk.to_csv(chunkdir / f"{chunk_prefix}_{idx}.csv", index=index)