"""
Class that implements the common functionality for reading and writing CSV-files
"""
from optparse import OptionGroup
from os import path
from PyFoam.ThirdParty.six import print_
[docs]class CommonReadWriteCSV(object):
""" The class implement common functionality
"""
validFormats=["csv","xls","xlsx","txt","smpl"]
[docs] def addOptions(self):
calc=OptionGroup(self.parser,
"Calculations",
"Calculations to be performed on the data. Format is '<name>:::<expr>' (three colons should not appear in variable names. This can be modified with --specification-separator). In the expressions there are variables that correspond to the column names. Also a variable 'data' that can be subscripted (for columns that are not valid variable names)")
calc.add_option("--recalc-columns",
action="append",
dest="recalcColumns",
default=[],
help="Columns that should be recalculated after reading. Can be specified more than once. In the expression a variable 'this' can reference the variable itself")
calc.add_option("--regular-expression-for-recalculation",
action="store_true",
dest="regularExpressionRecalc",
default=False,
help="The name in recalculations is a regular expression that must match the existing names")
calc.add_option("--raw-data-add-column",
action="append",
dest="rawAddColumns",
default=[],
help="Columns that should be added to the data after reading")
calc.add_option("--joined-data-add-column",
action="append",
dest="joinedAddColumns",
default=[],
help="Columns that should be added to the data before writing")
calc.add_option("--specification-separator",
action="store",
dest="specSeparator",
default=":::",
help="Separator used for specifications instead of %default")
self.parser.add_option_group(calc)
info=OptionGroup(self.parser,
"Info",
"Information about the data")
info.add_option("--print-columns",
action="store_true",
dest="printColums",
default=False,
help="Print the column names found")
self.parser.add_option_group(info)
data=OptionGroup(self.parser,
"Data",
"Specification on the data that is read in")
self.parser.add_option_group(data)
data.add_option("--time-name",
action="store",
dest="time",
default=None,
help="Name of the time column")
data.add_option("--set-names",
action="store",
dest="setNames",
default=None,
help="Comma-separated list of names to be used (instead of reading the names from the files)")
data.add_option("--column-names",
action="append",
default=[],
dest="columns",
help="The columns (names) which should be copied to the CSV. All if unset")
data.add_option("--regexp-column-names",
action="store_true",
default=False,
dest="columnsRegexp",
help="The column names should be matched as regular expressions")
data.add_option("--skip-header-lines",
action="store",
type="int",
default=0,
dest="skipHeaderLines",
help="Number of lines to skip for the header")
formt=OptionGroup(self.parser,
"Format",
"Specification on the format of the data read and written")
self.parser.add_option_group(formt)
formt.add_option("--write-excel-file",
action="store_true",
dest="writeExcel",
default=False,
help="Write to Excel-file instead of plain CSV. Onle works with the python-libraries pandas and xlwt")
formt.add_option("--read-excel-file",
action="store_true",
dest="readExcel",
default=False,
help="Read from Excel-file instead of plain CSV. Onle works with the python-libraries pandas and xlrd")
formt.add_option("--automatic-format",
action="store_true",
dest="automaticFormat",
default=False,
help="Determine from the file extension whether the files are CSV, Excel or plain text")
formt.add_option("--delimiter",
action="store",
dest="delimiter",
default=',',
help="Delimiter to be used between the values. Default: %default")
formt.add_option("--default-read-format",
choices=CommonReadWriteCSV.validFormats,
default="csv",
dest="defaultReadFormat",
help="If --automatic is not used then it is assumed that all input files are of this format. Default is %default. Valid options are "+", ".join(CommonReadWriteCSV.validFormats))
colNames=OptionGroup(self.parser,
"Column names",
"Transformations on the column names before they are written")
self.parser.add_option_group(colNames)
colNames.add_option("--write-time-name",
action="store",
dest="writeTimeName",
default=None,
help="Renaming the time name for the written data")
colNames.add_option("--column-name-replacements",
action="append",
dest="colNameReplacements",
default=[],
help="Replacements in the column names. Format is 'orig:::replace'. Can be specified more than once. Instead of ::: the value set with --specification-separator can be used")
colNames.add_option("--column-name-transformation",
action="append",
dest="colNameTransformation",
default=[],
help="Transform the column names with a Python lambda-function. Can be specified more than once. For instance 'lambda s:s.upper()' transforms the column names to upper case")
@property
def names(self):
if self.opts.setNames is None:
return None
else:
return tuple(self.opts.setNames.split(","))
[docs] def printColumns(self,fName,data):
if self.opts.printColums:
delim="\n "
print_("Columns in",fName,":",delim.join([""]+list(data.names())))
if data.eliminatedNames:
print_("Eliminated from",fName,":",", ".join(data.eliminatedNames))
[docs] def recalcColumns(self,data):
self.__processColumns(data,self.opts.recalcColumns)
[docs] def rawAddColumns(self,data):
self.__processColumns(data,self.opts.rawAddColumns,create=True)
[docs] def joinedAddColumns(self,data):
self.__processColumns(data,self.opts.joinedAddColumns,create=True)
def __processColumns(self,data,specs,create=False):
for s in specs:
try:
name,expr=s.split(self.opts.specSeparator)
except ValueError:
self.error(s,"can not be split correctly with ':::':",s.split(":::"))
if not create and self.opts.regularExpressionRecalc:
import re
rex=re.compile(name)
for n in data.names():
if rex.match(n):
data.recalcData(n,expr)
else:
data.recalcData(name,expr,create)
[docs] def processName(self,name):
if name==self.opts.time and self.opts.writeTimeName:
name=self.opts.writeTimeName
for r in self.opts.colNameReplacements:
try:
orig,repl=r.split(self.opts.specSeparator)
except ValueError:
self.error(r,"can not be split correctly with ':::':",r.split(":::"))
name=name.replace(orig,repl)
for l in self.opts.colNameTransformation:
f=eval(l)
name=f(name)
return name
# Should work with Python3 and Python2