Module hdict.content.aux_value
Expand source code
# Copyright (c) 2023. Davi Pereira dos Santos
# This file is part of the hdict project.
# Please respect the license - more about this in the section (*) below.
#
# hdict is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# hdict is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with hdict. If not, see <http://www.gnu.org/licenses/>.
#
# (*) Removing authorship by any means, e.g. by distribution of derived
# works or verbatim, obfuscated, compiled or rewritten versions of any
# part of this work is illegal and it is unethical regarding the effort and
# time spent here.
#
import dis
import re
from collections import OrderedDict
from inspect import signature, isbuiltin, isclass
from pickle import dumps
from hosh import Hosh, ø
def v2hosh(value: object) -> Hosh:
"""
Convert any value to a hosh object.
`value` should be serializable (by pickle).
Adopt pickle for hoshfication because it is faster.
>>> obj = {"x": 3, "l": [1, 2, "5"]}
>>> print(v2hosh(obj))
z0cJsCBPY7cHt.oJnrpyk23FOlPdCcYvcX8x7jg6
>>> # We encapsulate 'obj' as built-in types cannot be easily patched.
>>> obj = OrderedDict(obj)
>>> obj.hosh = ø * "My-custom-identifier-arbitrarily-defined"
>>> print(v2hosh(obj))
My-custom-identifier-arbitrarily-defined
"""
if hasattr(value, "hosh"):
return value.hosh
else:
try:
if callable(value):
return f2hosh(value)
# REMINDER: pickle is the fastest serialization
return Hosh(dumps(value, protocol=5))
except TypeError as e: # pragma: no cover
if "disassemble _PredictScorer" in str(e) or "disassemble _ProbaScorer" in str(e):
return Hosh(dumps(value, protocol=5))
raise Exception(f"Cannot pickle. Pickling is needed to hosh hdict values ({value}): {e}")
def f2hosh(function: callable):
"""
Convert a function to a hosh object.
Adopt pickle(bytecode) for hoshfication because it is faster than other serializations of bytecode.
>>> fun = lambda x, y: x + y
>>> print(f2hosh(fun))
vD8.I-NU3x5hzmj-m1EJgeAIE-.H.HGnWxqvZng0
>>> fun.hosh = ø * "My-custom-identifier-arbitrarily-defined"
>>> print(f2hosh(fun))
My-custom-identifier-arbitrarily-defined
"""
if hasattr(function, "hosh"):
return function.hosh
if isbuiltin(function) or isclass(function):
return Hosh(str(function).encode())
fields_and_params = signature(function).parameters.values()
fields_and_params = {v.name: None if v.default is v.empty else v.default for v in fields_and_params}
# Remove line numbers.
groups = [l for l in dis.Bytecode(function).dis().split("\n\n") if l]
clean_lines = [fields_and_params]
for group in groups:
# Replace memory addresses and file names by just the object name.
group = re.sub(r'<code object (.+) at 0x[0-f]+, file ".+", line \d+>', r"\1", group)
lines = [re.sub(r"^[\d ]+", "", segment) for segment in re.split(" +", group)][1:]
clean_lines.append(lines)
code = [fields_and_params, clean_lines]
# REMINDER: pickle chosen because it is the fastest serialization (see bottom of the file)
return Hosh(dumps(code, protocol=5))
# Timing:
"""
l = [{"x": 3}, 530934590.435903475, "4p9fj24gifh2430g8h230g82h34g0p2843hg02h"] * 100
def f():
pack(l, ensure_determinism=True, unsafe_fallback=False, compressed=False)
def g():
dumps(l)
def h():
pack(l, ensure_determinism=True, unsafe_fallback=False, compressed=True)
print(timeit(f, number=1000))
print(timeit(g, number=1000))
print(timeit(h, number=1000))
"""
Functions
def f2hosh(function:
) -
Convert a function to a hosh object.
Adopt pickle(bytecode) for hoshfication because it is faster than other serializations of bytecode.
>>> fun = lambda x, y: x + y >>> print(f2hosh(fun)) vD8.I-NU3x5hzmj-m1EJgeAIE-.H.HGnWxqvZng0 >>> fun.hosh = ø * "My-custom-identifier-arbitrarily-defined" >>> print(f2hosh(fun)) My-custom-identifier-arbitrarily-defined
Expand source code
def f2hosh(function: callable): """ Convert a function to a hosh object. Adopt pickle(bytecode) for hoshfication because it is faster than other serializations of bytecode. >>> fun = lambda x, y: x + y >>> print(f2hosh(fun)) vD8.I-NU3x5hzmj-m1EJgeAIE-.H.HGnWxqvZng0 >>> fun.hosh = ø * "My-custom-identifier-arbitrarily-defined" >>> print(f2hosh(fun)) My-custom-identifier-arbitrarily-defined """ if hasattr(function, "hosh"): return function.hosh if isbuiltin(function) or isclass(function): return Hosh(str(function).encode()) fields_and_params = signature(function).parameters.values() fields_and_params = {v.name: None if v.default is v.empty else v.default for v in fields_and_params} # Remove line numbers. groups = [l for l in dis.Bytecode(function).dis().split("\n\n") if l] clean_lines = [fields_and_params] for group in groups: # Replace memory addresses and file names by just the object name. group = re.sub(r'<code object (.+) at 0x[0-f]+, file ".+", line \d+>', r"\1", group) lines = [re.sub(r"^[\d ]+", "", segment) for segment in re.split(" +", group)][1:] clean_lines.append(lines) code = [fields_and_params, clean_lines] # REMINDER: pickle chosen because it is the fastest serialization (see bottom of the file) return Hosh(dumps(code, protocol=5))
def v2hosh(value: object) ‑> hosh.hosh_.Hosh
-
Convert any value to a hosh object.
value
should be serializable (by pickle). Adopt pickle for hoshfication because it is faster.>>> obj = {"x": 3, "l": [1, 2, "5"]} >>> print(v2hosh(obj)) z0cJsCBPY7cHt.oJnrpyk23FOlPdCcYvcX8x7jg6 >>> # We encapsulate 'obj' as built-in types cannot be easily patched. >>> obj = OrderedDict(obj) >>> obj.hosh = ø * "My-custom-identifier-arbitrarily-defined" >>> print(v2hosh(obj)) My-custom-identifier-arbitrarily-defined
Expand source code
def v2hosh(value: object) -> Hosh: """ Convert any value to a hosh object. `value` should be serializable (by pickle). Adopt pickle for hoshfication because it is faster. >>> obj = {"x": 3, "l": [1, 2, "5"]} >>> print(v2hosh(obj)) z0cJsCBPY7cHt.oJnrpyk23FOlPdCcYvcX8x7jg6 >>> # We encapsulate 'obj' as built-in types cannot be easily patched. >>> obj = OrderedDict(obj) >>> obj.hosh = ø * "My-custom-identifier-arbitrarily-defined" >>> print(v2hosh(obj)) My-custom-identifier-arbitrarily-defined """ if hasattr(value, "hosh"): return value.hosh else: try: if callable(value): return f2hosh(value) # REMINDER: pickle is the fastest serialization return Hosh(dumps(value, protocol=5)) except TypeError as e: # pragma: no cover if "disassemble _PredictScorer" in str(e) or "disassemble _ProbaScorer" in str(e): return Hosh(dumps(value, protocol=5)) raise Exception(f"Cannot pickle. Pickling is needed to hosh hdict values ({value}): {e}")