Source code for output.timeseries

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

[docs]def get_ts_coords(tsfile): """Given a CrunchFlow time series ouput file, return the coordinate at which that time series was output. Parameters ---------- tsfile : str filename containing timeseries output Returns ------- coords : tuple of int Coordinates of the form (x, y, z)""" # Open the file and read in the first line with open(tsfile) as f: for i, line in enumerate(f): if i == 0: fields = line.split()[-3:] # Final 3 fields are x, y, and z x = int(fields[0]) y = int(fields[1]) z = int(fields[2]) # Return tuple coords = (x, y, z) return coords
[docs]def get_ts_duplicates(tsfile): """Find duplicate columns in a time series file. Useful when a user specifies `time_series_print all` in CrunchFlow; the time series file includes primary species printed twice. Parameters ---------- tsfile : str path to the time series file Returns ------- columns : list list of column headings without duplicates nondup_indices : list list of column indices without duplicates """ # Open the file and read in the second line with open(tsfile) as f: for i, line in enumerate(f): if i == 1: columns = line.split() # Get list of duplicates and how many times each is seen seen = {} # Count occurrences of each item duplicates = [] # List of duplicate items dup_indices = [] # List of first occurrences of each duplicate item for col in columns: if col not in seen: seen[col] = 1 else: if seen[col] == 1: duplicates.append(col) idx = columns.index(col) dup_indices.append(idx) seen[col] += 1 # Do not go through if seen[col] == 1 again # Total number of columns in the file, including duplicates totcols = len(columns) nondup_indices = list(range(totcols)) # List of non-duplicated indices # Delete each duplicate, but reverse sort to avoid throwing off indices # after deleting earlier elements for idx in sorted(dup_indices, reverse=True): del columns[idx] del nondup_indices[idx] return columns, nondup_indices
[docs]class timeseries: """This is the timeseries class for working with CrunchFlow time series output files. Attributes ---------- coords : tuple of int x, y and z coordinates of the time series timeunit : str time unit used in the CrunchFlow input unit : str Concentration units included in the file. Automatically set to the default CrunchFlow concentration units (mol/kgw) species : list of str list of aqueous species in the file data : ndarray of float Numpy array of all data. First col is the time step and remaining cols are species in the same order as self.species list df : dataframe of float Pandas dataframe of all data. Index is the time step and columns #are the aqueous species Methods ------- convert_mgL(database='datacom.dbs', folder='.') Convert time series concentrations from mol/kgw to mg/L (ppm). plot(species, units='mg/L', **kwargs) Plot the time series of one or more species. Examples -------- >>> ts = timeseries('Well1-1.txt') >>> ts.convert_mgL() >>> calcium = ts.df['Ca++'] >>> ts.plot('Ca++') """ def __init__(self, tsfile, folder='.'): """Read in and get basic info about the timeseries file `tsfile`. Parameters ---------- tsfile : str Name of the CrunchFlow time series file folder : str Path to the CrunchFlow time series file """ tsfilepath = os.path.join(folder, tsfile) # Get coordinates at which time series was output self.coords = get_ts_coords(tsfilepath) # Get list of duplicates and their indices self.columns, indices = get_ts_duplicates(tsfilepath) # Assume that, if there are duplicates, it's because user # specified `time_series_print all` in CrunchFlow, in which # case non-duplicate columns are printed in log format ncols = len(self.columns) lastcol = max(indices) + 1 # If idx of the last column is greater than the # cols, # duplicates were deleted, so set logformat = True if lastcol > ncols: logformat = True else: logformat = False # List of species (columns without the time column) self.species = self.columns[1:] # Set time and concentration units t = self.columns[0] self.timeunit = t[t.find("(")+1:t.find(")")] self.unit = 'mol/L' # Load data into numpy array self.data = np.genfromtxt(tsfilepath, skip_header=2, usecols=indices, missing_values=['Infinity', 'NaN', '-Infinity']) # If necessary, convert from log to real format if logformat: if self.columns[1] != 'pH': self.data[:, 1:] = 10**self.data[:, 1:] else: # Skip first two cols, which are time and pH self.data[:, 2:] = 10**self.data[:, 2:] # Load into a pandas dataframe as well self.df = pd.DataFrame(data=self.data[:, 1:], index=self.data[:, 0], columns=self.species) self.df.index.name = 'time'
[docs] def convert_mgL(self, database='datacom.dbs', folder='.'): """Convert time series concentrations from mol/kgw to mg/L (ppm). Note that this assumes that 1 kg water = 1 L water. Parameters ---------- database : str name of the CrunchFlow database. The default is 'datacom.dbs' folder : str path to the database. The default is current directory. Returns ------- None. Modifies timeseries object in place. """ databasepath = os.path.join(folder, database) # If units are already mg/L, no need to do anything if self.unit == 'mg/L': return # Check if database exists if not os.path.exists(databasepath): raise OSError('Could not find ' + databasepath) molar_mass = {} # Open the database and get the molar mass of each species with open(databasepath) as db: for line in db: for spec in self.species: # Database format is, e.g., "'Ca++' 6.0 2.0 40.0780", # where the last value is the molar mass if line.split()[0] == "'{}'".format(spec): molar_mass[spec] = float(line.split()[-1]) # Delete keys with molar masses of 0 (e.g., tracers) # and do not convert them to mg/L del_keys = [] for key, value in molar_mass.items(): if value == 0: del_keys.append(key) # Cannot delete key within loop, otherwise # it changes size on each iteration for key in del_keys: del molar_mass[key] for spec in self.species: if spec not in molar_mass.keys(): print('Warning -- Did not convert {} to mg/L'.format(spec)) else: idx = self.columns.index(spec) # Only need to convert .data since .data and .df are linked self.data[:, idx] = self.data[:, idx]*molar_mass[spec]*1000 # Update the unit attribute self.unit = 'mg/L'
[docs] def plot(self, species, units='mg/L', **kwargs): """Plot the time series of one or more species. Parameters ---------- species : str or list of str Either single species or list of species to be plotted units : str Concentration units to use for plotting. The default is 'mg/L' **kwargs : dict keyword arguments passed to plt.subplots (e.g., figsize) Returns ------- fig : pyplot object figure handle for current plot ax : pyplot object axis handle for current plot """ if units == 'mg/L' and self.unit != 'mg/L': # Raise error if cannot find datacom.dbs if not os.path.exists('./datacom.dbs'): raise OSError('Could not find default database. \ Plot with other units or convert to mg/L first using the \ convert_mgL method. See convert_mgL.__doc__ for more info.') self.convert_mgL() # Accept both str and list input, so if str, convert to list if isinstance(species, str): species = [species] fig, ax = plt.subplots(**kwargs) for spec in species: ax.plot(self.df.index, self.df[spec], label=spec) ax.set(xlabel='Time ({})'.format(self.timeunit), ylabel='Concentration ({})'.format(units)) ax.legend(); return fig, ax
if __name__ == '__main__': print(timeseries.__doc__)