"""
Application-class that implements pyFoamConvertToCSV.py
"""
from optparse import OptionGroup
from .PyFoamApplication import PyFoamApplication
from .CommonReadWriteCSV import CommonReadWriteCSV
from PyFoam.Basics.SpreadsheetData import SpreadsheetData
from PyFoam.ThirdParty.six import print_
from os import path,listdir
from copy import deepcopy
from glob import glob
hasXlsxWriter=False
try:
import xlsxwriter
hasXlsxWriter=True
except ImportError:
pass
[docs]class ConvertToCSV(PyFoamApplication,
CommonReadWriteCSV):
def __init__(self,
args=None,
**kwargs):
description="""\
Takes a plain file with column-oriented data and converts it to a
csv-file. If more than one file are specified, they are joined
according to the first column.
Note: the first file determines the resolution of the time-axis
"""
if not hasXlsxWriter:
description+="""
Warning: The module 'xlsxwriter' is not installed. Therefor no addition of formulas
to excel files is possible"""
CommonReadWriteCSV.__init__(self)
PyFoamApplication.__init__(self,
args=args,
description=description,
usage="%prog <source> ... <dest.csv>",
interspersed=True,
changeVersion=False,
nr=2,
exactNr=False,
**kwargs)
[docs] def addOptions(self):
CommonReadWriteCSV.addOptions(self)
inp=OptionGroup(self.parser,
"Input",
"Manipulating the input data")
self.parser.add_option_group(inp)
inp.add_option("--strip-characters",
action="store",
dest="stripCharacters",
default=None,
help="A string with the characters that should be stripped from the input file before it is processed. For instance '()'")
inp.add_option("--replace-first-line",
action="store",
dest="replaceFirstLine",
default=None,
help="Replace the first line of the input with this string")
how=OptionGroup(self.parser,
"How",
"How the data should be joined")
self.parser.add_option_group(how)
how.add_option("--force",
action="store_true",
dest="force",
default=False,
help="Overwrite the destination csv if it already exists")
how.add_option("--extend-data",
action="store_true",
dest="extendData",
default=False,
help="Extend the time range if other files exceed the range of the first file")
how.add_option("--names-from-filename",
action="store_true",
dest="namesFromFilename",
default=False,
help="Read the value names from the file-name (assuming that names are split by _ and the names are in the tail - front is the general filename)")
how.add_option("--add-times",
action="store_true",
dest="addTimes",
default=False,
help="Actually add the times from the second file instead of interpolating")
how.add_option("--interpolate-new-times",
action="store_true",
dest="interpolateNewTime",
default=False,
help="Interpolate data if new times are added")
how.add_option("--new-data-no-interpolate",
action="store_false",
dest="newDataInterpolate",
default=True,
help="Don't interpolate new data fields to the existing times")
excel=OptionGroup(self.parser,
"Excel",
"Stuff for excel file output")
self.parser.add_option_group(excel)
excel.add_option("--add-sheets",
action="store_true",
dest="addSheets",
default=False,
help="Add the input data in unmodified form as additional sheets to the excel file")
if hasXlsxWriter:
excel.add_option("--add-formula-to-sheet",
action="append",
dest="addFormulas",
default=[],
help="Add columns with formulas calculated from other data. This only works when writing XLSX-files. The formula is 'nane:::ExcelFormula'. In the ExcelFormula the written column names can be used. These have to be enclosed in '' (this is necessary to allow names with spaces and digits). Can be used more than once")
[docs] def run(self):
dest=self.parser.getArgs()[-1]
if path.exists(dest) and not self.opts.force:
self.error("CSV-file",dest,"exists already. Use --force to overwrite")
sources=[]
for s in self.parser.getArgs()[0:-1]:
if s.find("/*lastTime*/")>=0:
front,back=s.split("/*lastTime*/",1)
for d in glob(front):
lastTime=None
for f in listdir(d):
if path.exists(path.join(d,f,back)):
try:
t=float(f)
if lastTime:
if t>float(lastTime):
lastTime=f
else:
lastTime=f
except ValueError:
pass
if lastTime:
sources.append(path.join(d,lastTime,back))
else:
sources.append(s)
diffs=[None]
if len(sources)>1:
# find differing parts
commonStart=1e4
commonEnd=1e4
for s in sources[1:]:
a=path.abspath(sources[0])
b=path.abspath(s)
start=0
end=0
for i in range(min(len(a),len(b))):
start=i
if a[i]!=b[i]:
break
commonStart=min(commonStart,start)
for i in range(min(len(a),len(b))):
end=i
if a[-(i+1)]!=b[-(i+1)]:
break
commonEnd=min(commonEnd,end)
diffs=[]
for s in sources:
b=path.abspath(s)
if commonEnd>0:
diffs.append(b[commonStart:-(commonEnd)])
else:
diffs.append(b[commonStart:])
names=self.names
title=path.splitext(path.basename(sources[0]))[0]
if self.opts.namesFromFilename:
if not names is None:
self.error("Names already specified as",names,". Can't calc from filename")
names=path.splitext(path.basename(sources[0]))[0].split("_")
title=None
data=SpreadsheetData(names=names,
timeName=self.opts.time,
validData=self.opts.columns,
skip_header=self.opts.skipHeaderLines,
stripCharacters=self.opts.stripCharacters,
replaceFirstLine=self.opts.replaceFirstLine,
validMatchRegexp=self.opts.columnsRegexp,
title=title,
**self.dataFormatOptions(sources[0]))
rawData=[deepcopy(data)]
self.printColumns(sources[0],data)
self.recalcColumns(data)
self.rawAddColumns(data)
if self.opts.time==None:
self.opts.time=data.timeName()
if not diffs[0] is None:
data.rename(lambda c:diffs[0]+" "+c)
for i,s in enumerate(sources[1:]):
names=None
title=path.splitext(path.basename(s))[0]
if self.opts.namesFromFilename:
names=title.split("_")
title=None
sData=SpreadsheetData(names=names,
skip_header=self.opts.skipHeaderLines,
stripCharacters=self.opts.stripCharacters,
replaceFirstLine=self.opts.replaceFirstLine,
timeName=self.opts.time,
validData=self.opts.columns,
validMatchRegexp=self.opts.columnsRegexp,
title=title,
**self.dataFormatOptions(s))
rawData.append(sData)
self.printColumns(s,sData)
self.recalcColumns(sData)
self.rawAddColumns(sData)
if self.opts.addTimes:
data.addTimes(time=self.opts.time,
times=sData.data[self.opts.time],
interpolate=self.opts.interpolateNewTime)
for n in sData.names():
if n!=self.opts.time and (self.opts.columns==[] or data.validName(n,self.opts.columns,True)):
d=data.resample(sData,
n,
time=self.opts.time,
extendData=self.opts.extendData,
noInterpolation=not self.opts.newDataInterpolate)
data.append(diffs[i+1]+" "+n,d)
self.joinedAddColumns(data)
data.rename(self.processName,renameTime=True)
data.rename(lambda c:c.strip())
data.eliminatedNames=None
if len(sources)>1:
self.printColumns("written data",data)
if self.opts.automaticFormat:
if self.getDataFormat(dest)=="excel":
self.opts.writeExcel=True
if self.opts.writeExcel:
from pandas import ExcelWriter
with ExcelWriter(dest) as writer:
data.getData().to_excel(writer,sheet_name="Data")
if self.opts.addSheets:
for n,d in enumerate(rawData):
d.getData().to_excel(writer,
sheet_name="Original file %d" % n)
if hasXlsxWriter:
if len(self.opts.addFormulas)>0:
from xlsxwriter.utility import xl_rowcol_to_cell as rowCol2Cell
rows=len(data.getData())
sheet=writer.sheets["Data"]
cols={}
for i,n in enumerate(data.names()):
cols[n]=i
newC=i
for f in self.opts.addFormulas:
newC+=1
name,formula=f.split(":::")
sheet.write(0,newC,name)
cols[name]=newC
splitted=[]
ind=0
while ind>=0:
if ind>=len(formula):
break
nInd=formula.find("'",ind)
if nInd<0:
splitted.append(formula[ind:])
ind=nInd
elif nInd!=ind:
splitted.append(formula[ind:nInd])
ind=nInd
else:
nInd=formula.find("'",ind+1)
if nInd<0:
self.error("No closing ' in formula",formula)
name=formula[ind+1:nInd]
if name not in cols:
self.error("Name",name,"not in column names",cols.keys())
splitted.append(cols[name])
ind=nInd+1
for row in range(rows):
cellFormula="="
for s in splitted:
if type(s)==int:
cellFormula+=rowCol2Cell(row+1,s)
else:
cellFormula+=s
sheet.write(row+1,newC,cellFormula)
print_("Formulas written. In LibreOffice recalculate with Ctrl+Shift+F9")
else:
data.writeCSV(dest,
delimiter=self.opts.delimiter)
# Should work with Python3 and Python2