Coverage for /home/agp/Documents/me/code/gutools/gutools/persistence.py : 0%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
2"""This module provide a Persiste using sqlite
3"""
5import sqlite3
6import time
7from datetime import datetime
8import os
9import random
10import threading
11import hashlib
12import yaml
13import re
14import ujson as json
15from collections import namedtuple, defaultdict
16from typing import Iterable
17from weakref import WeakKeyDictionary
19from aiopb.aiopb import Hub
20from gutools.tools import uidfrom, snake_case, expandpath, retry, identity, \
21 yaml_encode, yaml_decode, test_pid, walk, rebuild, serializable_container
22from gutools.uobjects import UObject
24class FileLock(object):
25 """Open a file with exclusive access.
26 FileLock is used as a context manager returning the file stream opened
27 with the desired access mode ('w', 'r', 'a').
29 When a program wants to update a file in the session,
30 FileLock can be used to prevent only one file will get the access.
32 FileLock proceed as follows:
34 - check for ``<filename>.lock``
35 - if does not exist, or the modification timestamp is older, then
36 it will create the file with the content: ``<pid>.<threadid>.<random>``
37 - then check again the file.
38 - read the file and check that the content is the same.
39 - then acquire the lock.
40 - update file.
41 - remove the lock file.
43 In case two processes try to access at the same time, the file creation
44 is an atomic operation, so only one process will read its own content.
45 """
46 def __init__(self, path, mode='w', timeout=0):
47 """
48 - ``path``: the file to get access to.
49 - ``mode``: the desired access mode.
50 - ``timeout``: for getting the lock.
51 """
52 self.path = path
53 self.lock_file = path + '.lock'
54 self.mode = mode
55 self.timeout = timeout
56 self.fd = None
57 self.lock_fd = None
59 def __enter__(self):
60 """Try to write a specific content (like a fingerprint)
61 in a lock file, and check if the content is the same afterall.
62 If content match, then the access is granted and return the
63 real file opened in the given access mode.
64 """
65 content = f'{os.getpid()}.{threading.current_thread().name}.{random.random()}'
67 def test():
68 try:
69 return open(self.lock_file, 'r').read()
70 except OSError:
71 return ''
73 now = time.time()
74 pid = os.getpid()
75 while self.timeout and time.time() - now < self.timeout:
76 content2 = test()
77 if content == content2:
78 break
79 if not content2 or not test_pid(content2.split('.')[0]):
80 open(self.lock_file, 'w').write(content)
81 continue
82 time.sleep(random.random() / 10.0)
83 else:
84 raise TimeoutError(f"Locking {self.path} for {self.mode}")
86 self.fd = open(self.path, self.mode)
87 return self.fd
89 def __exit__(self, *args):
90 """Remove the lock and close the file."""
91 os.unlink(self.lock_file)
92 self.fd.close()
93 self.fd = None
96class FSLayout(object):
97 """Handle files contents based on predeterminated patterns and structure.
98 """
99 reg_file_params = re.compile(
100 r'(?P<workspace>.*)/(?P<key>[^/]+)/(?P<name>\w+-\w+)(\.(?P<label>.*))?\.(\2)',
101 re.DOTALL)
103 ext_decoder = {
104 'yaml': yaml_decode,
105 'json': json.decode,
106 'pid': int,
107 'out': identity,
108 'err': identity,
109 }
111 ext_encoder = {
112 'yaml': yaml_encode,
113 'json': json.encode,
114 'pid': str,
115 'out': identity,
116 'err': identity,
117 }
119 ext_aliases = {
120 'fp': 'yaml',
121 }
123 for k, v in ext_aliases.items():
124 ext_decoder[k] = ext_decoder[v]
125 ext_encoder[k] = ext_encoder[v]
127 patterns = {
128 ('out', 'err', 'pid', 'fp', 'db'): '{root:}/{key:}/{name:}.{key:}',
129 ('etc', ): '{root:}/etc/{name:}.yaml',
130 ('<folder>', ) : '{root:}/{name:}'
131 }
133 def __init__(self, path):
134 self.path = expandpath(path)
135 self.stat = dict()
136 """Dictionary with last modification of a file.
137 Is updated by ``get/set_content()``"""
139 def get_path(self, key, *args, touch=False):
140 """Get a the file (or folder) of the key expanded with given args.
142 When ``touch=True`` the file is *touched* and create all necessary
143 parent folders.
144 """
145 path = ''
146 root = self.path
147 name = '.'.join(['{}'] * len(args)).format(*args)
148 for pattern, fmt in self.patterns.items():
149 if key in pattern:
150 path = fmt.format(**locals())
151 break
152 else:
153 raise RuntimeError(f"Key '{key}' found to expand path")
155 assert path
156 if touch:
157 if key in ('<folder>', ):
158 os.makedirs(path, exist_ok=True)
159 else:
160 parent = os.path.dirname(path)
161 os.makedirs(parent, exist_ok=True)
162 return path
164 def get_content(self, key, *args, default=None):
165 """Get the content of a file in the layout structure,
166 setting with default when file does not exists."""
167 path = self.get_path(key, *args)
168 content = self._get_content(path)
169 if content is None and default is not None:
170 content = default
171 self.set_content(key, content, *args)
172 try:
173 self.stat[path] = [os.stat(path).st_mtime, (key, args)]
174 except FileNotFoundError:
175 pass
176 return content
178 def set_content(self, key, content, *args):
179 """Set the content of a key/args in the layout structure.
180 Uses a FileLock to have exclusive access to file.
181 """
182 path = self.get_path(key, *args)
183 parent = os.path.dirname(path)
184 os.makedirs(parent, exist_ok=True)
185 content = serializable_container(content)
187 with FileLock(path, timeout=3) as f:
188 try:
189 func = self.ext_encoder.get(path.split('.')[-1], identity)
190 f.write(func(content))
191 self.stat[path] = [os.stat(path).st_mtime, (key, args)]
193 except Exception as why:
194 path = None
195 return path
197 def update_content(self, key, content, *args):
198 """Update the content of a key/args in the layout structure.
200 - read the current content
201 - update the content
202 - save the file
203 - return content last state
204 """
205 content2 = self.get_content(key, *args, default=content)
206 content2.update(content)
207 self.set_content(key, content, *args)
208 return content2
210 def iter_file_content(self, key, name='', label='', *args):
211 """Iterate over known files handled by the layout structure
212 filtering name and label if they are provided.
213 """
214 dummy = self.get_path(key, '__dummy__')
215 top = os.path.dirname(dummy)
216 for root, _, files in os.walk(top):
217 for name_ in files:
218 filename = os.path.join(root, name_)
219 m = self.reg_file_params.search(filename)
220 if m:
221 d = m.groupdict()
222 if name in ('', d['name']) and \
223 label in ('', d['label']):
224 content = self._get_content(filename)
225 if content is not None:
226 yield d, filename, content
228 def iter_external_modifications(self):
229 """Iterator for external known file modifications."""
230 for path, (mtime, params) in list(self.stat.items()):
231 try:
232 mtime2 = os.stat(path)
233 if mtime2 > mtime:
234 yield path, params
235 except OSError:
236 self.stat.pop(path)
238 def set_alias(self, key, alias, *args):
239 """Set an alias of a file by creating a symbolic
240 link between files.
241 """
242 src, dst = self._alias_info(key, alias, *args)
243 if src != dst:
244 assert os.path.exists(src)
245 if os.path.exists(dst):
246 os.remove(dst)
247 # cd dir to make link relative and less anoying when `ls -ls`
248 # don't work!
249 # curdir = expandpath(os.path.curdir)
250 # os.chdir(os.path.dirname(dst))
251 # os.symlink(src, os.path.basename(dst))
252 # os.chdir(curdir)
253 os.symlink(src, dst)
254 assert os.path.islink(dst)
256 def remove_alias(self, key, alias, *args):
257 """Remove an alias of a file."""
258 src, dst = self._alias_info(key, alias, *args)
259 assert os.path.islink(dst)
260 os.remove(dst)
262 def _alias_info(self, key, alias, *args):
263 "compute the source and target for an alias."
264 src = self.get_path(key, *args)
265 dst, ext = os.path.splitext(src)
266 dst = list(os.path.split(dst))
267 dst[-1] = alias + ext
268 dst = os.path.join(*dst)
269 return src, dst
271 def _get_content(self, filename, default=None):
272 "get the content of a file, retuning default value if does not exists."
273 try:
274 with open(filename) as f:
275 content = f.read()
276 func = self.ext_decoder.get(filename.split('.')[-1], identity)
277 content = func(content)
278 except FileNotFoundError as why:
279 content = default
280 return content
282# --------------------------------------------------
283# Configurable
284# --------------------------------------------------
285class Config(dict):
286 def __init__(self, layout, name, default={}):
287 self.layout = layout
288 self.name = name
289 if default:
290 self.update(default)
292 def load(self, default={}):
293 self.clear()
294 data = self.layout.get_content(
295 'etc', self.name,
296 default=default)
297 self.update(data)
300 def save(self, name=None):
301 name = name or self.name
302 self.config = self.layout.set_content(
303 'etc', self, self.name)
305 def update_key(self, key, values):
306 v0 = self.get(key)
307 if isinstance(v0, dict) and isinstance(values, dict):
308 v0.update(values)
309 elif isinstance(v0, list) and isinstance(values, Iterable):
310 v0 = set(v0)
311 v0.update(values)
312 self[key] = list(v0)
313 else:
314 self[key] = values
316 self.save()
319class DB(object): # , metaclass=Xingleton):
320 """This class provide a Persiste Logging events using sqlite.
322 As sqlite doesn't suppor shared connection between threads,
323 we implement a simple connection factory for the current thread.
324 """
325 scheme = ""
327 def __init__(self, path=None, delete=False):
328 self.path = expandpath(path)
329 self.conn__ = dict()
330 self.workspaces = WeakKeyDictionary()
332 if delete and os.path.exists(self.path):
333 os.unlink(self.path)
335 def __del__(self):
336 self.conn.commit()
338 def __enter__(self):
339 return self
341 def __exit__(self, *_exc):
342 self.conn.commit()
344 def __str__(self):
345 return f"<{self.__class__.__name__}: {self.path}>"
347 def __repr__(self):
348 return str(self)
350 @property
351 def conn(self):
352 "Connection Factory per thread"
353 tid = threading.get_ident()
355 conn = self.conn__.get(tid)
356 if conn is None:
357 self.conn__[tid] = conn = sqlite3.connect(self.path)
358 return conn
360 def close(self):
361 """Clear the processed event and close connections with database"""
362 for conn in list(self.conn__.values()):
363 try:
364 conn.commit()
365 conn.close()
366 except sqlite3.ProgrammingError:
367 pass
369 # def get(self, since=0):
370 # cursor = self.conn.cursor()
371 # cursor.execute(SELECT, (since, ))
372 # for raw in cursor.fetchall():
373 # if raw:
374 # event = Event(*raw)
375 # yield event
377 def execute(self, query, *args, **kw):
378 conn = self.conn
379 if args:
380 r = conn.execute(query, args)
381 else:
382 r = conn.execute(query, kw)
383 return r
385 def executescript(self, script):
386 conn = self.conn
387 try:
388 conn.executescript(script)
389 except sqlite3.OperationalError as why:
390 print('FAILED TO CREATE DB SCHEME: {}'.format(why))
391 print(script)
392 foo = 1
393 conn.commit()
395 # workspace management
396 @property
397 def ready(self):
398 return self.path is not None
400 def attach(self, workspace):
401 self.workspaces[workspace] = True # keep a life reference
402 if self.ready:
403 workspace.db_ready()
405 def change_db(self, path):
406 self.path = path
407 # force new operations to create a new sqlite3 connection per thread
408 self.conn__.clear()
409 for workpace in self.workspaces:
410 workpace.db_ready()
413class DBWorkspace(object):
414 scheme = ""
415 REPLACE = 'REPLACE INTO {table:} ({}) VALUES (:{})'
416 INSERT = 'INSERT INTO {table:} ({}) VALUES (:{})'
417 SELECT = 'SELECT * FROM {table:} WHERE {where:}'
418 DELETE = 'DELETE FROM {table:} WHERE {}'
420 uobject_table = dict()
422 def __init__(self, db):
423 self.db = db
424 self.db.attach(self)
426 def db_ready(self):
427 if self.db.ready:
428 self.__create_squema__()
430 def __create_squema__(self):
431 try:
432 self.db.executescript(self.scheme)
433 self.db.conn.commit()
434 except Exception as why:
435 print(why)
436 retry(1)
438 @classmethod
439 def _get_table(cls, klass):
440 table = cls.uobject_table.get(klass)
441 if table is None:
442 table = snake_case(klass.__name__)
443 cls.uobject_table[klass] = table
444 return table
446 def update(self, uobject, sql=None, table=None, **kwargs):
447 table = table or self._get_table(uobject.__class__)
448 sql = sql or self.REPLACE
450 kw = uobject.asdict(skip_nones=True, **kwargs)
451 # for k in set(uobject.__slots__).intersection(kwargs):
452 # kw[k] = kwargs[k]
454 if 'date' in kw and kw.get('date') is None:
455 kw['date'] = datetime.now()
457 sql = sql.format(','.join(kw.keys()),
458 ',:'.join(kw.keys()),
459 table=table, )
460 self.db.execute(sql, **kw)
461 for tries in range(20):
462 try:
463 self.db.conn.commit()
464 break
465 except sqlite3.OperationalError:
466 time.sleep(random.random())
468 def xupdate(self, uobject, **kw):
469 self.update(uobject, **kw)
470 kw.update(uobject.asdict())
471 for full_object in self.find(uobject.__class__, **kw):
472 return full_object
474 def replace(self, uobject, **kw):
475 kw.update(uobject.asdict())
476 for full_object in self.find(uobject.__class__, **kw):
477 break
478 else:
479 return self.xupdate(uobject, **kw)
481 def delete_item(self, uobject):
482 table = self._get_table(uobject.__class__)
483 kw = uobject.asdict(skip_nones=True)
484 sql = self.DELETE.format(' AND '.join([f'{k}=:{k}' for k in kw.keys()]),
485 table=table, )
486 self.db.execute(sql, **kw)
487 self.db.conn.commit()
489 def delete(self, klass, join='AND', sql=None, table=None, **kw):
490 table = table or self._get_table(klass)
491 sql = sql or self.DELETE
493 for row in self._execute(klass, sql, join=join, table=table, **kw):
494 item = klass(*row)
495 yield item
497 def find(self, klass, join='AND', sql=None, table=None, **kw):
498 table = table or self._get_table(klass)
499 sql = sql or self.SELECT
501 for row in self._execute(klass, sql, join=join, table=table, **kw):
502 item = klass(*row)
503 yield item
505 def _execute(self, klass, sql, join='AND', **kw):
506 where = [k for k in set(kw).intersection(
507 klass.__slots__) if kw[k] is not None]
509 where = f' {join} '.join([f'{k}=:{k}' for k in where]) or '1'
511 sql = sql.format(where=where, **kw)
512 iterator = self.db.execute(sql, **kw)
513 return iterator
515def test_file_locking():
516 layout = FSLayout(path='/tmp/kk')
517 content = dict(foo=1, bar='dos')
518 path = layout.set_content('etc', content, 'buzz')
520 with FileLock(path, mode='a', timeout=100) as f1:
521 with FileLock(path, mode='a', timeout=2) as f2:
522 foo = 1
523 foo = 1
526if __name__ == '__main__':
527 test_file_locking()