# ICE Revision: $Id: $
"""
Collects data about runs in a small SQLite database
"""
# don't look at it too closely. It's my first sqlite-code
import sqlite3
from os import path
import datetime
import re
import sys
from PyFoam.Error import error
from .CSVCollection import CSVCollection
from PyFoam.ThirdParty.six import print_,iteritems,integer_types
from PyFoam.ThirdParty.six import u as uniCode
[docs]class RunDatabase(object):
"""
Database with information about runs. To be queried etc
"""
separator = "//"
unique_id = "uniqueid"
run_id = "runId"
TheRunsName = "theRuns"
def __init__(self,
name,
create=False,
verbose=False):
""":param name: name of the file
:param create: should the database be created if it does not exist"""
self.verbose=verbose
if not path.exists(name):
if create==False:
error("Database",name,"does not exist")
else:
self.initDatabase(name)
self.db=sqlite3.connect(name)
self.db.row_factory=sqlite3.Row
[docs] def initDatabase(self,name):
"""Create a new database file"""
db=sqlite3.connect(name)
with db:
db.row_factory=sqlite3.Row
cursor=db.cursor()
cursor.execute("CREATE TABLE theRuns({} INTEGER PRIMARY KEY, ".format(self.run_id) +
self.__normalize("insertionTime") + " TIMESTAMP)")
cursor.close()
[docs] def id_exists(self, theId):
return self.get_runID(theId) is not None
[docs] def get_runID(self, uniqueId):
if uniqueId is None:
return None
runCols = self.__getColumns(self.TheRunsName)
if self.unique_id not in runCols:
return None
dataCursor = self.db.cursor()
dataCursor.execute("SELECT * FROM {} WHERE {}=?".format(self.TheRunsName,
self.unique_id),
(uniqueId, ))
data = dataCursor.fetchall()
if len(data) < 1:
return None
else:
return data[0][self.run_id]
[docs] def add(self, data, update_existing=False):
"""Add a dictionary with data to the database"""
self.__adaptDatabase(data)
try:
unique = data[self.unique_id]
except KeyError:
unique = None
exists = self.id_exists(unique)
if exists and not update_existing:
raise KeyError("Run with key {} already in database".format(unique))
runData = dict([("insertionTime", datetime.datetime.now())] +
[(k, v) for k, v in iteritems(data) if type(v) != dict])
update_run = self.get_runID(unique)
runID = self.__addContent(self.TheRunsName,
runData,
update_run=update_run)
if update_run is not None:
runID = update_run
subtables = dict([(k, v) for k, v in iteritems(data) if type(v) == dict])
for tn, content in iteritems(subtables):
self.__addContent(tn+"Data",
dict(list(self.__flattenDict(content).items())+
[(self.run_id, runID)]),
update_run=update_run)
self.db.commit()
[docs] def modify(self, unique_id, data):
"""Modify a dataset for which we have a unique id with the data in a dictionary"""
update_run = self.get_runID(unique_id)
if update_run is None:
raise KeyError("Unique ID {} not in database".format(unique_id))
self.__adaptDatabase(data)
runData = dict([(k, v) for k, v in iteritems(data) if type(v) != dict])
runID = self.__addContent(self.TheRunsName,
runData,
update_run=update_run)
if update_run is not None:
runID = update_run
subtables = dict([(k, v) for k, v in iteritems(data) if type(v) == dict])
for tn, content in iteritems(subtables):
self.__addContent(tn+"Data",
dict(list(self.__flattenDict(content).items())+
[(self.run_id, runID)]),
update_run=update_run)
self.db.commit()
specialChars={
'[':'bro',
']':'brc',
'{':'cro',
'}':'crc',
'(':'pro',
')':'prc',
'|':'pip',
}
specialString="_specialChar"
def __normalize(self,s):
"""Normalize a column-name so that the case-insensitve column-names of SQlite
are no problem"""
if s in [self.run_id,"dataId"]:
return s
result=""
for c in s:
if c.isupper() or c=="_":
result+="_"+c.lower()
elif c in RunDatabase.specialChars:
result+=RunDatabase.specialString+RunDatabase.specialChars[c]
else:
result+=c
return result
def __denormalize(self,s):
"""Denormalize the column name that was normalized by _normalize"""
while s.find(RunDatabase.specialString)>=0:
pre,post=s.split(RunDatabase.specialString,maxsplit=1)
spec=post[0:3]
for k,v in iteritems(RunDatabase.specialChars):
if spec==v:
s=pre+k+post[3:]
break
else:
error("No special character for encoding",spec,"found")
result=""
underFound=False
for c in s:
if underFound:
underFound=False
result+=c.upper()
elif c=="_":
underFound=True
else:
result+=c
if underFound:
error("String",s,"was not correctly encoded")
return result
def __addContent(self, table, data, update_run=None):
cursor = self.db.cursor()
if len(data) == 0:
if self.verbose:
print_("No data. Nothing done")
cursor.close()
return None
runData={}
for k,v in iteritems(data):
if k==self.run_id:
runData[k]=v
elif isinstance(v,integer_types+(float,)):
runData[k]=float(v)
else:
runData[k]=uniCode(str(v))
cols=self.__getColumns(table)[1:]
addData=[]
for c in cols:
try:
addData.append(runData[c])
except KeyError:
addData.append(None)
addData=tuple(addData)
if update_run is None:
cSQL = "insert into "+table+" ("+ \
",".join(['"'+self.__normalize(c)+'"' for c in cols])+ \
") values ("+",".join(["?"]*len(addData))+")"
sqlData = addData
else:
dataCursor = self.db.cursor()
dataCursor.execute("SELECT * FROM {} WHERE {}=?".format(table,
self.run_id),
(update_run, ))
dataHere = dataCursor.fetchall()
if len(dataHere) < 1:
cSQL = "insert into {} ( {} ) values ( {} )".format(table,
self.run_id,
update_run)
if self.verbose:
print_("Execute SQL", cSQL, "to add an aerelmost empty row")
cursor.execute(cSQL)
cols = [c for c in cols if c in data]
cSQL = "update " + table + " set " + \
" , ".join(['"{}" = ?'.format(self.__normalize(c)) for c in cols]) + \
" where {} = ?".format(self.run_id)
sqlData = tuple(runData[c] for c in cols) + (update_run,)
if self.verbose:
print_("Execute SQL",cSQL,"with",sqlData)
try:
cursor.execute(cSQL, sqlData)
except Exception:
e = sys.exc_info()[1] # Needed because python 2.5 does not support 'as e'
print_("SQL-Expression:",cSQL)
print_("AddData:",addData)
raise e
lastrow=cursor.lastrowid
cursor.close()
return lastrow
def __adaptDatabase(self,data):
"""Make sure that all the required columns and tables are there"""
c=self.db.execute('SELECT name FROM sqlite_master WHERE type = "table"')
tables=[ x["name"] for x in c.fetchall() ]
indata=dict([(k,v) for k,v in iteritems(data) if type(v)!=dict])
subtables=dict([(k,v) for k,v in iteritems(data) if type(v)==dict])
self.__addColumnsToTable(self.TheRunsName,indata)
for tn,content in iteritems(subtables):
if tn+"Data" not in tables:
if self.verbose:
print_("Adding table",tn)
self.db.execute(
"CREATE TABLE {}Data (dataId INTEGER PRIMARY KEY, {} INTEGER)".format(
tn, self.run_id))
self.__addColumnsToTable(tn+"Data",
self.__flattenDict(content))
def __flattenDict(self,oData,prefix=""):
data=[(prefix+k,v) for k,v in iteritems(oData) if type(v)!=dict]
subtables=dict([(k,v) for k,v in iteritems(oData) if type(v)==dict])
for name,val in iteritems(subtables):
data+=list(self.__flattenDict(val,prefix+name+self.separator).items())
if self.verbose:
print_("Flattened",oData,"to",data)
return dict(data)
def __getColumns(self,tablename):
c=self.db.execute('SELECT * from '+tablename)
result=[]
for desc in c.description:
if desc[0] in ['dataId',self.run_id]:
result.append(desc[0])
else:
result.append(self.__denormalize(desc[0]))
return result
def __addColumnsToTable(self,table,data):
columns=self.__getColumns(table)
for k,v in iteritems(data):
if k not in columns:
if self.verbose:
print_("Adding:",k,"to",table,"(normalized:",
self.__normalize(k),")")
if isinstance(v,integer_types+(float,)):
self.db.execute('ALTER TABLE "%s" ADD COLUMN "%s" REAL' %
(table,self.__normalize(k)))
else:
self.db.execute('ALTER TABLE "%s" ADD COLUMN "%s" TEXT' %
(table,self.__normalize(k)))
[docs] def dumpToCSV(self,
fname,
selection=None,
disableRunData=None,
pandasFormat=True,
excel=False):
"""Dump the contents of the database to a csv-file
:param name: the CSV-file
:param selection: list of regular expressions. Only data
entries fitting those will be added to the CSV-file (except
for the basic run). If unset all data will be written"""
file=CSVCollection(fname)
runCursor=self.db.cursor()
runCursor.execute("SELECT * from theRuns")
c=self.db.execute('SELECT name FROM sqlite_master WHERE type = "table"')
tables=[ x["name"] for x in c.fetchall() ]
allData=set()
writtenData=set()
disabledStandard=set()
for d in runCursor:
id=d[self.run_id]
if self.verbose:
print_("Dumping run",id)
for k in list(d.keys()):
writeEntry=True
if disableRunData:
for e in disableRunData:
exp=re.compile(e)
if not exp.search(self.__denormalize(k)) is None:
writeEntry=False
break
if writeEntry:
file[k]=d[k]
else:
disabledStandard.add(k)
for t in tables:
if t==self.TheRunsName:
namePrefix="runInfo"
else:
namePrefix=t[:-4]
dataCursor=self.db.cursor()
dataCursor.execute("SELECT * FROM "+t+" WHERE {}=?".format(self.run_id),
(str(id),))
data=dataCursor.fetchall()
if len(data)>1:
error(len(data),"data items found for id ",id,
"in table",t,".Need exactly 1")
elif len(data)<1:
continue
for k in list(data[0].keys()):
if k in ["dataId", self.run_id]:
continue
if k in disabledStandard:
continue
name=namePrefix+self.separator+self.__denormalize(k)
allData.add(name)
writeEntry=True
if selection:
writeEntry=False
for e in selection:
exp=re.compile(e)
if exp.search(name):
writeEntry=True
break
if writeEntry:
writtenData.add(name)
file[name]=data[0][k]
file.write()
if self.verbose:
sep="\n "
if allData==writtenData:
print_("Added all data entries:",sep,sep.join(sorted(allData)),sep="")
else:
print_("Added parameters:",sep,sep.join(sorted(writtenData)),
"\nUnwritten data:",sep,sep.join(sorted(allData-writtenData)),sep="")
if len(disabledStandard)>0:
print_("Disabled standard entries:",sep,sep.join(sorted(disabledStandard)),sep="")
f=file(pandasFormat)
if excel:
file(True).to_excel(fname)
if not f is None:
return f
else:
# retry by forcing to numpy
return file(False)
# Should work with Python3 and Python2