Module idict.data.compression
Expand source code
# Copyright (c) 2021. Davi Pereira dos Santos
# This file is part of the i-dict project.
# Please respect the license - more about this in the section (*) below.
#
# idict is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# idict is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with idict. If not, see <http://www.gnu.org/licenses/>.
#
# (*) Removing authorship by any means, e.g. by distribution of derived
# works or verbatim, obfuscated, compiled or rewritten versions of any
# part of this work is illegal and unethical regarding the effort and
# time spent here.
import pickle
import lz4.frame as lz4
from idict.config import GLOBAL
def pack(obj, ensure_determinism=True):
# r"""
# >>> from idict import setup
# >>> setup(compression_cachelimit_MB=0.000_100)
# >>> memo = GLOBAL["compression_cache"] = {}
# >>> GLOBAL["compression_cachesize"] = 0
# >>> b = b"000011"
# >>> pack(b)
# b'pckl_\x04"M\x18h@\x15\x00\x00\x00\x00\x00\x00\x006\x13\x00\x00\x00R\x80\x05\x95\n\x00\x01\x00\xa0C\x06000011\x94.\x00\x00\x00\x00'
# >>> memo[id(b)]["unpacked"]
# b'000011'
# >>> len(memo), GLOBAL["compression_cachesize"], GLOBAL["compression_cachelimit"]
# (1, 47, 100)
# >>> pack(b"asd")
# b'pckl_\x04"M\x18h@\x12\x00\x00\x00\x00\x00\x00\x00\xd9\x10\x00\x00\x00R\x80\x05\x95\x07\x00\x01\x00pC\x03asd\x94.\x00\x00\x00\x00'
# >>> len(memo), GLOBAL["compression_cachesize"], GLOBAL["compression_cachelimit"]
# (2, 91, 100)
# >>> len(pack(b"123"))
# 44
# >>> len(memo), GLOBAL["compression_cachesize"], GLOBAL["compression_cachelimit"]
# (2, 88, 100)
# """
# # memid = id(obj)
# # memo = GLOBAL["compression_cache"]
# # if memid in memo:
# # if obj is memo[memid]["unpacked"]:
# # return memo[memid]["packed"]
# # else:
# # # rare collision
# # GLOBAL["compression_cachesize"] -= memo[memid]["packed"]
# # del memo[memid]
#
try:
try:
dump = pickle.dumps(obj, protocol=5)
prefix = b"pckl_"
except:
if ensure_determinism: # pragma: no cover
raise NondeterminismException("Cannot serialize deterministically.")
import dill
dump = dill.dumps(obj, protocol=5)
prefix = b"dill_"
blob = prefix + lz4.compress(dump)
# # GLOBAL["compression_cachesize"] += len(blob)
# # memo[memid] = {"unpacked": obj, "packed": blob}
#
# # # LRU
# # keys = iter(list(memo.keys()))
# # while len(memo) > 0 and GLOBAL["compression_cachesize"] > GLOBAL["compression_cachelimit"]:
# # k = next(keys)
# # v = memo.pop(k)["packed"]
# # GLOBAL["compression_cachesize"] -= len(v)
#
return blob
except KeyError as e: # pragma: no cover
if str(e) == "'__getstate__'": # pragma: no cover
raise Exception("Unpickable value:", type(obj))
else:
raise e
def unpack(blob):
r"""
>>> unpack(b'pckl_\x04"M\x18h@\x15\x00\x00\x00\x00\x00\x00\x006\x13\x00\x00\x00R\x80\x05\x95\n\x00\x01\x00\xa0C\x06000011\x94.\x00\x00\x00\x00')
b'000011'
"""
prefix = blob[:5]
blob = blob[5:]
if prefix == b"pckl_":
return pickle.loads(lz4.decompress(blob))
elif prefix == b"dill_":
import dill
return dill.loads(lz4.decompress(blob))
class NondeterminismException(Exception):
pass
Functions
def pack(obj, ensure_determinism=True)
-
Expand source code
def pack(obj, ensure_determinism=True): # r""" # >>> from idict import setup # >>> setup(compression_cachelimit_MB=0.000_100) # >>> memo = GLOBAL["compression_cache"] = {} # >>> GLOBAL["compression_cachesize"] = 0 # >>> b = b"000011" # >>> pack(b) # b'pckl_\x04"M\x18h@\x15\x00\x00\x00\x00\x00\x00\x006\x13\x00\x00\x00R\x80\x05\x95\n\x00\x01\x00\xa0C\x06000011\x94.\x00\x00\x00\x00' # >>> memo[id(b)]["unpacked"] # b'000011' # >>> len(memo), GLOBAL["compression_cachesize"], GLOBAL["compression_cachelimit"] # (1, 47, 100) # >>> pack(b"asd") # b'pckl_\x04"M\x18h@\x12\x00\x00\x00\x00\x00\x00\x00\xd9\x10\x00\x00\x00R\x80\x05\x95\x07\x00\x01\x00pC\x03asd\x94.\x00\x00\x00\x00' # >>> len(memo), GLOBAL["compression_cachesize"], GLOBAL["compression_cachelimit"] # (2, 91, 100) # >>> len(pack(b"123")) # 44 # >>> len(memo), GLOBAL["compression_cachesize"], GLOBAL["compression_cachelimit"] # (2, 88, 100) # """ # # memid = id(obj) # # memo = GLOBAL["compression_cache"] # # if memid in memo: # # if obj is memo[memid]["unpacked"]: # # return memo[memid]["packed"] # # else: # # # rare collision # # GLOBAL["compression_cachesize"] -= memo[memid]["packed"] # # del memo[memid] # try: try: dump = pickle.dumps(obj, protocol=5) prefix = b"pckl_" except: if ensure_determinism: # pragma: no cover raise NondeterminismException("Cannot serialize deterministically.") import dill dump = dill.dumps(obj, protocol=5) prefix = b"dill_" blob = prefix + lz4.compress(dump) # # GLOBAL["compression_cachesize"] += len(blob) # # memo[memid] = {"unpacked": obj, "packed": blob} # # # # LRU # # keys = iter(list(memo.keys())) # # while len(memo) > 0 and GLOBAL["compression_cachesize"] > GLOBAL["compression_cachelimit"]: # # k = next(keys) # # v = memo.pop(k)["packed"] # # GLOBAL["compression_cachesize"] -= len(v) # return blob except KeyError as e: # pragma: no cover if str(e) == "'__getstate__'": # pragma: no cover raise Exception("Unpickable value:", type(obj)) else: raise e
def unpack(blob)
-
>>> unpack(b'pckl_\x04"M\x18h@\x15\x00\x00\x00\x00\x00\x00\x006\x13\x00\x00\x00R\x80\x05\x95\n\x00\x01\x00\xa0C\x06000011\x94.\x00\x00\x00\x00') b'000011'
Expand source code
def unpack(blob): r""" >>> unpack(b'pckl_\x04"M\x18h@\x15\x00\x00\x00\x00\x00\x00\x006\x13\x00\x00\x00R\x80\x05\x95\n\x00\x01\x00\xa0C\x06000011\x94.\x00\x00\x00\x00') b'000011' """ prefix = blob[:5] blob = blob[5:] if prefix == b"pckl_": return pickle.loads(lz4.decompress(blob)) elif prefix == b"dill_": import dill return dill.loads(lz4.decompress(blob))
Classes
class NondeterminismException (*args, **kwargs)
-
Common base class for all non-exit exceptions.
Expand source code
class NondeterminismException(Exception): pass
Ancestors
- builtins.Exception
- builtins.BaseException