Source code for markovchain.text.util

import re
import enum


RE_PUNCT = re.compile(r'^[^\w\s]+$')
RE_WORD = re.compile(r'\w+')

RE_FLAGS = 'AILMSUX'
RE_CUSTOM_FLAGS = 'O'


[docs]class ReplyMode(enum.IntEnum): """Text reply mode. """ END = 0 START = 1 REPLY = 2
[docs]class CharCase(enum.IntEnum): """Character case.""" PRESERVE = 0 TITLE = 1 UPPER = 2 LOWER = 3
[docs] def convert(self, string): """Return a copy of string converted to case. Parameters ---------- string : `str` Returns ------- `str` Examples -------- >>> CharCase.LOWER.convert('sTr InG') 'str ing' >>> CharCase.UPPER.convert('sTr InG') 'STR ING' >>> CharCase.TITLE.convert('sTr InG') 'Str ing' >>> CharCase.PRESERVE.convert('sTr InG') 'sTr InG' """ if self == self.__class__.TITLE: return capitalize(string) if self == self.__class__.UPPER: return string.upper() if self == self.__class__.LOWER: return string.lower() return string
[docs]class ReFlags(enum.IntEnum): """Custom regexp flags. Attributes ---------- O : `int` OVERLAP : `int` Replace overlapping occurrences of pattern. """ O = 1 OVERLAP = 1
[docs]def ispunct(string): """Return `True` if all characters in a string are punctuation and it is not empty. Parameters ---------- string : `str` Returns ------- `bool` Examples -------- >>> ispunct('.,?') True >>> ispunct('.x.') False >>> ispunct('. ') False >>> ispunct('') False """ return RE_PUNCT.match(string) is not None
[docs]def get_words(string): """Find all words in a string. Parameters ---------- string : `str` Returns ------- `list` of `str` Examples -------- >>> get_words(' ..?!word , (Word).. word') ['word', 'Word', 'word'] """ return RE_WORD.findall(string)
[docs]def lstrip_ws_and_chars(string, chars): """Remove leading whitespace and characters from a string. Parameters ---------- string : `str` String to strip. chars : `str` Characters to remove. Returns ------- `str` Stripped string. Examples -------- >>> lstrip_ws_and_chars(' \\t.\\n , .x. ', '.,?!') 'x. ' """ res = string.lstrip().lstrip(chars) while len(res) != len(string): string = res res = string.lstrip().lstrip(chars) return res
[docs]def capitalize(string): """Capitalize a sentence. Parameters ---------- string : `str` String to capitalize. Returns ------- `str` Capitalized string. Examples -------- >>> capitalize('worD WORD WoRd') 'Word word word' """ if not string: return string if len(string) == 1: return string.upper() return string[0].upper() + string[1:].lower()
[docs]def re_flags(flags, custom=ReFlags): """Parse regexp flag string. Parameters ---------- flags: `str` Flag string. custom: `IntEnum`, optional Custom flag enum (default: None). Returns ------- (`int`, `int`) (flags for `re.compile`, custom flags) Raises ------ ValueError """ re_, custom_ = 0, 0 for flag in flags.upper(): try: re_ |= getattr(re, flag) except AttributeError: if custom is not None: try: custom_ |= getattr(custom, flag) except AttributeError: raise ValueError('Invalid custom flag "%s"' % flag) else: raise ValueError('Invalid regexp flag "%s"' % flag) return re_, custom_
[docs]def re_flags_str(flags, custom_flags): """Convert regexp flags to string. Parameters ---------- flags : `int` Flags. custom_flags : `int` Custom flags. Returns ------- `str` Flag string. """ res = '' for flag in RE_FLAGS: if flags & getattr(re, flag): res += flag for flag in RE_CUSTOM_FLAGS: if custom_flags & getattr(ReFlags, flag): res += flag return res
[docs]def re_sub(pattern, repl, string, count=0, flags=0, custom_flags=0): """Replace regular expression. Parameters ---------- pattern : `str` or `_sre.SRE_Pattern` Compiled regular expression. repl : `str` or `function` Replacement. string : `str` Input string. count: `int` Maximum number of pattern occurrences. flags : `int` Flags. custom_flags : `int` Custom flags. """ if custom_flags & ReFlags.OVERLAP: prev_string = None while string != prev_string: prev_string = string string = re.sub(pattern, repl, string, count, flags) return string return re.sub(pattern, repl, string, count, flags)