Module idict.function
Expand source code
import re
def isplit(source, sep=None, regex=False):
"""
https://stackoverflow.com/a/9773142/9681577
generator version of str.split()
:param source:
source string (unicode or bytes)
:param sep:
separator to split on.
:param regex:
if True, will treat sep as regular expression.
:returns:
generator yielding elements of string.
"""
if sep is None:
# mimic default python behavior
source = source.strip()
sep = "\\s+"
if isinstance(source, bytes):
sep = sep.encode("ascii")
regex = True
if regex:
# version using re.finditer()
if not hasattr(sep, "finditer"):
sep = re.compile(sep)
start = 0
for m in sep.finditer(source):
idx = m.start()
assert idx >= start
yield source[start:idx]
start = m.end()
yield source[start:]
else:
# version using str.find(), less overhead than re.finditer()
sepsize = len(sep)
start = 0
while True:
idx = source.find(sep, start)
if idx == -1:
yield source[start:]
return
yield source[start:idx]
start = idx + sepsize
Sub-modules
idict.function.data
-
Functions to be used directly within an idict workflow
idict.function.dataset
-
Functions to be used directly within an idict workflow
idict.function.evaluation
idict.function.manipulation
idict.function.model
idict.function.text
idict.function.vizualization
idict.function.wrapper
Functions
def isplit(source, sep=None, regex=False)
-
https://stackoverflow.com/a/9773142/9681577
generator version of str.split()
:param source: source string (unicode or bytes)
:param sep: separator to split on.
:param regex: if True, will treat sep as regular expression.
:returns: generator yielding elements of string.
Expand source code
def isplit(source, sep=None, regex=False): """ https://stackoverflow.com/a/9773142/9681577 generator version of str.split() :param source: source string (unicode or bytes) :param sep: separator to split on. :param regex: if True, will treat sep as regular expression. :returns: generator yielding elements of string. """ if sep is None: # mimic default python behavior source = source.strip() sep = "\\s+" if isinstance(source, bytes): sep = sep.encode("ascii") regex = True if regex: # version using re.finditer() if not hasattr(sep, "finditer"): sep = re.compile(sep) start = 0 for m in sep.finditer(source): idx = m.start() assert idx >= start yield source[start:idx] start = m.end() yield source[start:] else: # version using str.find(), less overhead than re.finditer() sepsize = len(sep) start = 0 while True: idx = source.find(sep, start) if idx == -1: yield source[start:] return yield source[start:idx] start = idx + sepsize