#!/usr/bin/env python """\ Module for reading and writing txtdb databases. A txtdb database is a text file of records separated by lines consisting of a form-feed only. Each record consists of lines with a field name and a value separated by a colon and optionally surrounded by whitespace. If a line ends with a backslash, it is continued on the next line, with nothing, not even a newline or other whitespace, between the lines. A line starting with whitespace, is concatenated to the previous line with a newline joining the two lines. The records need not have any unique keys; indeed, there may be several identical records in the database. Lines with a hash sign (#) in position zero are considered comments, and are ignored, unless the previous line ended with a backslash. Comment lines are not retained when saving the database. The database format is specifically designed to be easy to edit in a normal text editor. * Function open(filename, autoreload=1): Open the database in FILENAME and return the Txtdb object. See open.__doc__ for more information. * Class Txtdb: The class of the database objects returned by txtdb.open(). See Txtdb.__doc__ for more information. * Class Record: The class of all records returned by the database. * Class Error: The class of exceptions raised by the txtdb module. The implementation probably doesn't scale very well to large databases, since it uses lists that are scanned linearly for matching records. """ __author__ = "Thomas Bellman " __rcsId__ = """$Id: txtdb.py,v 1.6 2001/12/29 21:52:36 bellman Exp $""" import __builtin__ import string import os import fileinput import re import stat import cPickle class _Error: msg = None def __init__(self, **data): for k,v in data.items(): setattr(self, k, v) def __repr__(self): if hasattr(self, "line") and self.line != None: return "%s: %s at line %d" % (__name__, self.msg, self.line) else: return "%s: %s" % (__name__, self.msg) class DbError(_Error): """Exception raised for errors in the database file. """ line = None class UseError(_Error): """Exception raised for errors in using the txtdb module. """ pass class _ContlineInput: def __init__(self, seq): self.__seq = seq self.__i = -1 self.__j = 0 self.__line = None def __getitem__(self, i): if i == self.__i: if self.__line == None: raise IndexError, "End of File" return self.__line if i != self.__i + 1: raise IndexError, "Must be indexed in order" self.__i = i line = "" while 1: try: l = self.__seq[self.__j] self.__j = self.__j + 1 except IndexError: break line = line + l if l[-1:] == "\\": line = line[:-1] elif l[-2:] == "\\\n": line = line[:-2] else: break if line == "": self.__line = None raise IndexError, "End of File" self.__line = line return line class Record: def __init__(self, values): for k,v in values.items(): setattr(self, k, v) def _keys(self): return self.__dict__.keys() def _values(self): return self.__dict__.values() def _items(self): return self.__dict__.items() class Txtdb: """Class for holding a txtdb database in memory. The class automatically reloads itself if the source file has changed. Warning: this can sometimes lead to surprising results. Methods: - search(predicate): Find all records for which PREDICATE is true. - select_one(**keys): Returns the single record matching KEYS. - select(**keys): Returns all records matching KEYS. - save(): Saves the database back to file. - add(rec): Adds a record to the database - remove(rec): Removes a record from the database. """ def __init__(self, filename, autoreload=1): self.__filename = filename self.__autoreload = autoreload self._records = [] self.__fieldnames = [] self.reload() class __find_matcher: def __init__(self, keys): self.keys = keys def __call__(self, rec): for k,v in self.keys.items(): if not hasattr(rec, k) or getattr(rec, k) != v: return None return rec class __find_re_matcher: def __init__(self, reflags, keys): self.keys = {} for v,rx in keys.items(): self.keys[v] = re.compile(rx, reflags) def __call__(self, rec): for k,v in self.keys.items(): if not hasattr(rec, k) or not v.search(getattr(rec, k)): return None return rec def search(self, predicate): """Search for all records for which PREDICATE returns true. The callable object PREDICATE will be applied to every record in the database, and a list of all records for which PREDICATE returns true will be returned. """ self.maybe_reload() l = [] for r in self._records: if predicate(r): l.append(r) return l def select(self, **keys): """Returns a list of all records matching the named arguments. """ return self.search(self.__find_matcher(keys)) def select_one(self, **keys): """Returns the single record that matches the named arguments. An exception is raised if no records or more than one record matches the query. """ l = apply(self.select, (), keys) if len(l) == 0: raise KeyError, "No such element" if len(l) > 1: raise KeyError, "Not unique" return l[0] def re_select(self, _reflags=0, **keys): """Returns a list of all records matching the named arguments. """ return self.search(self.__find_re_matcher(_reflags, keys)) def __getitem__(self, i): # Don't call maybe_reload(), so a loop over the database will # give consistent results return self._records[i] def __getslice__(self, start, end): return self._records[start:end] def needs_reload(self): from stat import ST_MTIME, ST_INO, ST_DEV if not self.__autoreload: return 0 if not self.__filename: return 0 finfo = os.stat(self.__filename) return ( finfo[ST_MTIME] != self.__finfo[ST_MTIME] or finfo[ST_INO] != self.__finfo[ST_INO] or finfo[ST_DEV] != self.__finfo[ST_DEV]) def maybe_reload(self): if self.needs_reload(): self.reload() def __pickle_filename(self, filename=None): if filename is None: filename = self.__filename return filename + ",p" def loadpickle(self, fp=None, _filename=None): """Load pickled database to memory.""" if fp is None: fp = __builtin__.open(self.__pickle_filename(_filename)) db = cPickle.load(fp) self._records, self.__fieldnames = db def savepickle(self, fp=None, _filename=None): if fp is None: fp = __builtin__.open(self.__pickle_filename(_filename), "w") db = (self._records, self.__fieldnames) cPickle.dump(db, fp, 1) # If we got passed a file object to save to, we don't want to close # that for the caller; they might want to use it further. Thus, we # let the garbage collector dispose and close fp for us. def reload(self): """Load database file to memory.""" if not self.__filename: return # Keep info about file, so we can know if it needs to be # reloaded. self.__finfo = os.stat(self.__filename) try: picklestat = os.stat(self.__pickle_filename()) if picklestat[stat.ST_MTIME] > self.__finfo[stat.ST_MTIME]: self.loadpickle() return except: pass self.load(__builtin__.open(self.__filename)) try: self.savepickle() except: # Oh, well, we'll just have to load it the hard way next time too. pass def load(self, file): """Load database from file object FILE. """ self._records = [] self.__fieldnames = [] values = {} is_cont = 0 key = None lineno = 0 for line in file.readlines(): lineno = lineno + 1 was_cont = is_cont if line[-1:] == '\n': line = line[:-1] if line == '\f': if values: self.add(Record(values)) values = {}; is_cont = 0; key = None continue if line[-1:] == '\\': is_cont = 1; line = line[:-1] else: is_cont = 0 if string.strip(line) == "": continue if was_cont: # Append new line to last key val = string.rstrip(line) if type(values[key]) == type([]): values[key][-1] = values[key][-1] + val else: values[key] = values[key] + val elif line[0] in string.whitespace: val = string.strip(line) if type(values[key]) == type([]): values[key][-1] = values[key][-1] + "\n" + val else: values[key] = values[key] + "\n" + val elif line[:1] == "#": continue else: # Split on colon and asign tmp = string.split(line, ":", 1) if len(tmp) < 2: raise DbError(line=lineno, msg="Field name and value not separated") [key, val] = tmp key = string.lower(string.strip(key)) val = string.strip(val) if key[-2:] == "[]": key = key[:-2]; val = [val] if values.has_key(key): values[key] = values[key] + val else: values[key] = val # We must add the fieldnames here, not just in the add() # method, to get the correct order from the file if not key in self.__fieldnames: self.__fieldnames.append(key) # Any "leftovers"? if values: self.add(Record(values)) def format_field(self, key, rec): """Format a field for writing to the database.""" val = getattr(rec, key) if type(val) == type([]): key = key + "[]" else: val = [val] fieldlist = [] key = string.capwords(key) + ":" if len(key) < 8: key = key + "\t" for v in val: fieldlist.append(key + string.replace(v, "\n", "\n\t") + "\n") field = string.join(fieldlist, "") return field def store(self, file): """Write the database to file object FILE. """ for rec in self._records: for key in self.__fieldnames: if not hasattr(rec, key): continue file.write(self.format_field(key, rec)) for key in dir(rec): if key[:1] == "_": continue if key in self.__fieldnames: continue file.write(self.format_field(key, rec)) if rec is not self._records[-1]: file.write("\f\n") file.close() def save(self, use_rename=1, filename=None): """Save the database back to disk. If parameter FILENAME is given, database is saved to that filename, otherwise it will be saved to the file where it was loaded from. If USE_RENAME is true (default, unless FILE is given), save() writes to a temporary file and uses os.rename() afterwards. This means that the update of the file is atomic for other processes, but means that hard links will point to the old file. If USE_RENAME is false, save() writes to the original file, which can lead to other processes loading only a partial database. Any changes made to the database file by other processes (or other instances of the Txtdb class) will be lost. No attempts to lock the file is done. """ if not filename: filename = self.__filename if use_rename: tmpname = filename + ".NEW" file = __builtin__.open(tmpname, "w") else: file = __builtin__.open(filename, "w") self.store(file) if use_rename: os.rename(tmpname, filename) self.__finfo = os.stat(self.__filename) # XXX: Is this correct? try: self.savepickle(_filename=filename) except: # Oh, well, we'll just have to load it the hard way next time. pass def add(self, rec): """Add a record REC to the in memory database. """ # if self.__autoreload: # raise UseError(msg="Can't modify an autoreloading database") self._records.append(rec) for key in dir(rec): if key[:1] != "_" and not key in self.__fieldnames: self.__fieldnames.append(key) def remove(self, rec): """Remove the record REC from the in memory database. REC *must* be an object returned by one of the select*() methods. """ self._records.remove(rec) def __len__(self): return len(self._records) def open(filename, autoreload=1): """Open the txtdb database in FILENAME. If AUTORELOAD is true (default), every select*() operation on the opened database will automatically check if the file on disk has changed, and reload itself if that is the case. This can sometimes be confusing. Note especially that changes to the in core database will be lost if the database decides to reload itself. """ db = Txtdb(filename, autoreload) return db def consistency(db, required, no_others, unique_key): """Check a txtdb database for consistency. Checks that all records in DB has all fields named in REQUIRED. If NO_OTHERS is true, checks that no record has any field except those in REQUIRED. UNIQUE_KEY is a list of fields that should form a unique key in the database, i.e no two records should have the same values for that set of fields. """ misses = [] morefields = [] uniques = {} non_uniques = [] for rec in db: fields = rec._keys()[:] for fld in required: if not hasattr(rec, fld): if rec not in misses: misses.append(rec) else: fields.remove(fld) if no_others and len(fields) > 0: morefields.append(rec) # Check for uniqueness of key if unique_key and not rec in misses: key = [] for fld in unique_key: key.append(getattr(rec, fld)) key = tuple(key) if uniques.has_key(key): non_uniques.append(rec) else: uniques[key] = rec if misses or morefields or non_uniques: return (misses, morefields, non_uniques) return 0