Source code for markovchain.base

from .storage import JsonStorage
from .scanner import Scanner
from .parser import ParserBase, Parser
from .util import load, DOC_INHERIT, state_size_dataset


[docs]class Markov(metaclass=DOC_INHERIT): """Markov chain generator base class. Attributes ---------- DEFAULT_SCANNER : `type` Default scanner class. DEFAULT_PARSER : `type` Default parser class. DEFAULT_STORAGE : `type` Default storage class. scanner : `markovchain.scanner.Scanner` parser : `markovchain.parser.ParserBase` storage : `markovchain.storage.Storage` """ DEFAULT_SCANNER = Scanner DEFAULT_PARSER = Parser DEFAULT_STORAGE = JsonStorage
[docs] def __init__(self, scanner=None, parser=None, storage=None): """Markov chain generator base class constructor. Parameters ---------- scanner : `dict` or `markovchain.scanner.Scanner`, optional Scanner (default: `DEFAULT_SCANNER()`). parser : `dict` or `markovchain.parser.ParserBase`, optional Parser (default: `DEFAULT_PARSER()`). storage : `markovchain.storage.Storage`, optional Parser (default: `DEFAULT_STORAGE()`). """ if storage is None: storage = self.DEFAULT_STORAGE() #if scanner is None: # scanner = storage.settings.get('scanner', None) #if parser is None: # scanner = storage.settings.get('parser', None) self.storage = storage self.scanner = load(scanner, Scanner, self.DEFAULT_SCANNER) self.parser = load(parser, ParserBase, self.DEFAULT_PARSER)
def __eq__(self, markov): return (self.scanner == markov.scanner and self.parser == markov.parser and self.storage == markov.storage)
[docs] def data(self, data, part=False, dataset=''): """Parse data and update links. Parameters ---------- data Data to parse. part : `bool`, optional True if data is partial (default: `False`). dataset : `str`, optional Dataset key prefix (default: ''). """ links = self.parser(self.scanner(data, part), part, dataset) self.storage.add_links(links)
[docs] def generate(self, state_size=None, start=(), dataset='', backward=False): """Generate a sequence. Parameters ---------- state_size : `int`, optional State size (default: parser.state_sizes[0]). start : `str` or `iterable` of `str`, optional Initial state (default: ()). dataset : `str`, optional Dataset key prefix. backward : `bool`, optional Link direction. Returns ------- `generator` of `str` State generator. """ if state_size is None: try: state_size = next(iter(self.parser.state_sizes)) except StopIteration: return #elif (self.parser is not None # and state_size not in self.parser.state_sizes): # raise ValueError('invalid state size: {0}: not in {1}' # .format(state_size, self.parser.state_sizes)) dataset += state_size_dataset(state_size) return self.storage.generate(start, state_size, dataset, backward)
[docs] def get_settings_json(self): """Convert generator settings to JSON. Returns ------- `dict` JSON data. """ return { 'scanner': None if self.scanner is None else self.scanner.save(), 'parser': None if self.parser is None else self.parser.save() }
[docs] def save(self, fp=None): """Save to file. Parameters ---------- fp : `file`, optional Output file. """ self.storage.settings['markov'] = self.get_settings_json() self.storage.save(fp)
[docs] def close(self): """Close. """ self.storage.close()
[docs] @classmethod def from_storage(cls, storage): """Load from storage. Parameters ---------- storage : `markovchain.storage.Storage` Returns ------- `markovchain.Markov` """ args = dict(storage.settings.get('markov', {})) args['storage'] = storage return cls(**args)
[docs] @classmethod def from_file(cls, fp, storage=None): """Load from file. Parameters ---------- fp : `str` or `file` File or path. storage : `type`, optional Storage class (default: cls.DEFAULT_STORAGE) Returns ------- `markovchain.Markov` """ if storage is None: storage = cls.DEFAULT_STORAGE return cls.from_storage(storage.load(fp))
[docs] @classmethod def from_settings(cls, settings=None, storage=None): """Create from settings. Parameters ---------- settings : `dict`, optional Settings (default: None). storage : `type`, optional Storage class (default: cls.DEFAULT_STORAGE) Returns ------- `markovchain.Markov` """ if storage is None: storage = cls.DEFAULT_STORAGE return cls.from_storage(storage(settings=settings))