Module idict.function.vizualization

Expand source code
from itertools import repeat

from idict.macro import isnumber


def X2histogram(col=0, input="X", output="histogram", bins=8, **kwargs):
    """
    >>> import numpy as np
    >>> from idict import let
    >>> X = np.array([["a", 2.1, 1.6], ["a", 3, 2], ["b", 7, 3]])
    >>> X2histogram(X=X, col=1, bins=2)
    {'histogram': [{'x': '(2.095, 4.55]', 'count': 2}, {'x': '(4.55, 7.0]', 'count': 1}], '_history': Ellipsis}
    >>> from idict import idict
    >>> from idict.function.dataset import df2Xy
    >>> d = idict.fromtoy(output_format="df") >> df2Xy >> X2histogram
    >>> d.histogram
    [{'x': '(0.069, 3.975]', 'count': 11}, {'x': '(3.975, 7.85]', 'count': 5}, {'x': '(7.85, 11.725]', 'count': 3}, {'x': '(11.725, 15.6]', 'count': 0}, {'x': '(15.6, 19.475]', 'count': 0}, {'x': '(19.475, 23.35]', 'count': 0}, {'x': '(23.35, 27.225]', 'count': 0}, {'x': '(27.225, 31.1]', 'count': 1}]
    """
    import numpy as np
    import pandas

    X = kwargs[input]
    vals = X.iloc[:, col] if hasattr(X, "iloc") else X[:, col]
    if isnumber(vals[0]):
        cut = pandas.cut(np.array(list(map(float, vals))), bins, duplicates="drop")
        df = pandas.DataFrame(cut)
        df2 = df.groupby(cut).count()
        dic = df2.to_dict()[0]
    else:
        from pandas import Series

        dic = Series(vals).value_counts()
    result = [{"x": str(k), "count": v} for k, v in dic.items()]
    return {output: result, "_history": ...}


def tofloat(X, k, col):
    if hasattr(X, "iloc"):
        X = X.iloc
    val = X[k, col]
    try:
        return float(val)
    except ValueError:
        return float(list(X[:, col]).index(val))
    except TypeError:
        print(
            f"Warning: Wrong type {type(val)} converted to zero. Look for '?' characters if you provided an ARFF file."
        )
        return 0


def Xy2scatterplot(colx=0, coly=1, Xin="X", yin="y", output="scatterplot", **kwargs):
    """
    >>> import numpy as np
    >>> X = np.array([["c1", 2.1, 1.6], ["c2", 3.2, 2.3], ["c3", 7, 3]])
    >>> y = np.array(["a", "b", "c"])
    >>> Xy2scatterplot(X=X, y=y, colx=1, coly=2)
    {'scatterplot': [{'id': 'a', 'data': [{'x': 2.1, 'y': 1.6}]}, {'id': 'b', 'data': [{'x': 3.2, 'y': 2.3}]}, {'id': 'c', 'data': [{'x': 7.0, 'y': 3.0}]}], '_history': Ellipsis}
    >>> Xy2scatterplot(X=X, y=y, colx=1, coly=0)
    {'scatterplot': [{'id': 'a', 'data': [{'x': 2.1, 'y': 0.0}]}, {'id': 'b', 'data': [{'x': 3.2, 'y': 1.0}]}, {'id': 'c', 'data': [{'x': 7.0, 'y': 2.0}]}], '_history': Ellipsis}
    >>> from idict import idict
    >>> from idict.function.dataset import df2Xy
    >>> d = idict.fromtoy(output_format="df") >> df2Xy >> Xy2scatterplot
    >>> d.scatterplot
    [{'id': '0', 'data': [{'x': 5.1, 'y': 6.4}, {'x': 6.1, 'y': 3.6}, {'x': 3.1, 'y': 2.5}, {'x': 9.1, 'y': 3.5}, {'x': 9.1, 'y': 7.2}, {'x': 7.1, 'y': 6.6}, {'x': 2.1, 'y': 0.1}, {'x': 5.1, 'y': 4.5}, {'x': 1.1, 'y': 3.2}, {'x': 3.1, 'y': 2.5}]}, {'id': '1', 'data': [{'x': 1.1, 'y': 2.5}, {'x': 1.1, 'y': 3.5}, {'x': 4.7, 'y': 4.9}, {'x': 8.3, 'y': 2.9}, {'x': 2.5, 'y': 4.5}, {'x': 0.1, 'y': 4.3}, {'x': 0.1, 'y': 4.0}, {'x': 31.1, 'y': 4.7}, {'x': 2.2, 'y': 8.5}, {'x': 1.1, 'y': 8.5}]}]
    """
    X = kwargs[Xin]
    y = kwargs[yin]
    result = []
    for m in dict(zip(y, repeat(None))):
        inner = []
        for k in range(len(X)):
            left = m if isinstance(m, str) else str(float(m))
            if isinstance(y[k], str):
                right = y[k]
            else:
                right = str(float(y[k]))
            if left == right:
                x_ = tofloat(X, k, colx)
                y_ = tofloat(X, k, coly)
                inner.append({"x": x_, "y": y_})
        result.append({"id": str(m), "data": inner})
    return {output: result, "_history": ...}


X2histogram.metadata = {
    "id": "-----------------------------X2histogram",
    "name": "X2histogram",
    "description": "Generate a histogram for the specified column of a field.",
    "parameters": ...,
    "code": ...,
}
Xy2scatterplot.metadata = {
    "id": "--------------------------Xy2scatterplot",
    "name": "Xy2scatterplot",
    "description": "Generate a scatterplot for the specified two columns of a field.",
    "parameters": ...,
    "code": ...,
}

Functions

def X2histogram(col=0, input='X', output='histogram', bins=8, **kwargs)
>>> import numpy as np
>>> from idict import let
>>> X = np.array([["a", 2.1, 1.6], ["a", 3, 2], ["b", 7, 3]])
>>> X2histogram(X=X, col=1, bins=2)
{'histogram': [{'x': '(2.095, 4.55]', 'count': 2}, {'x': '(4.55, 7.0]', 'count': 1}], '_history': Ellipsis}
>>> from idict import idict
>>> from idict.function.dataset import df2Xy
>>> d = idict.fromtoy(output_format="df") >> df2Xy >> X2histogram
>>> d.histogram
[{'x': '(0.069, 3.975]', 'count': 11}, {'x': '(3.975, 7.85]', 'count': 5}, {'x': '(7.85, 11.725]', 'count': 3}, {'x': '(11.725, 15.6]', 'count': 0}, {'x': '(15.6, 19.475]', 'count': 0}, {'x': '(19.475, 23.35]', 'count': 0}, {'x': '(23.35, 27.225]', 'count': 0}, {'x': '(27.225, 31.1]', 'count': 1}]
Expand source code
def X2histogram(col=0, input="X", output="histogram", bins=8, **kwargs):
    """
    >>> import numpy as np
    >>> from idict import let
    >>> X = np.array([["a", 2.1, 1.6], ["a", 3, 2], ["b", 7, 3]])
    >>> X2histogram(X=X, col=1, bins=2)
    {'histogram': [{'x': '(2.095, 4.55]', 'count': 2}, {'x': '(4.55, 7.0]', 'count': 1}], '_history': Ellipsis}
    >>> from idict import idict
    >>> from idict.function.dataset import df2Xy
    >>> d = idict.fromtoy(output_format="df") >> df2Xy >> X2histogram
    >>> d.histogram
    [{'x': '(0.069, 3.975]', 'count': 11}, {'x': '(3.975, 7.85]', 'count': 5}, {'x': '(7.85, 11.725]', 'count': 3}, {'x': '(11.725, 15.6]', 'count': 0}, {'x': '(15.6, 19.475]', 'count': 0}, {'x': '(19.475, 23.35]', 'count': 0}, {'x': '(23.35, 27.225]', 'count': 0}, {'x': '(27.225, 31.1]', 'count': 1}]
    """
    import numpy as np
    import pandas

    X = kwargs[input]
    vals = X.iloc[:, col] if hasattr(X, "iloc") else X[:, col]
    if isnumber(vals[0]):
        cut = pandas.cut(np.array(list(map(float, vals))), bins, duplicates="drop")
        df = pandas.DataFrame(cut)
        df2 = df.groupby(cut).count()
        dic = df2.to_dict()[0]
    else:
        from pandas import Series

        dic = Series(vals).value_counts()
    result = [{"x": str(k), "count": v} for k, v in dic.items()]
    return {output: result, "_history": ...}
def Xy2scatterplot(colx=0, coly=1, Xin='X', yin='y', output='scatterplot', **kwargs)
>>> import numpy as np
>>> X = np.array([["c1", 2.1, 1.6], ["c2", 3.2, 2.3], ["c3", 7, 3]])
>>> y = np.array(["a", "b", "c"])
>>> Xy2scatterplot(X=X, y=y, colx=1, coly=2)
{'scatterplot': [{'id': 'a', 'data': [{'x': 2.1, 'y': 1.6}]}, {'id': 'b', 'data': [{'x': 3.2, 'y': 2.3}]}, {'id': 'c', 'data': [{'x': 7.0, 'y': 3.0}]}], '_history': Ellipsis}
>>> Xy2scatterplot(X=X, y=y, colx=1, coly=0)
{'scatterplot': [{'id': 'a', 'data': [{'x': 2.1, 'y': 0.0}]}, {'id': 'b', 'data': [{'x': 3.2, 'y': 1.0}]}, {'id': 'c', 'data': [{'x': 7.0, 'y': 2.0}]}], '_history': Ellipsis}
>>> from idict import idict
>>> from idict.function.dataset import df2Xy
>>> d = idict.fromtoy(output_format="df") >> df2Xy >> Xy2scatterplot
>>> d.scatterplot
[{'id': '0', 'data': [{'x': 5.1, 'y': 6.4}, {'x': 6.1, 'y': 3.6}, {'x': 3.1, 'y': 2.5}, {'x': 9.1, 'y': 3.5}, {'x': 9.1, 'y': 7.2}, {'x': 7.1, 'y': 6.6}, {'x': 2.1, 'y': 0.1}, {'x': 5.1, 'y': 4.5}, {'x': 1.1, 'y': 3.2}, {'x': 3.1, 'y': 2.5}]}, {'id': '1', 'data': [{'x': 1.1, 'y': 2.5}, {'x': 1.1, 'y': 3.5}, {'x': 4.7, 'y': 4.9}, {'x': 8.3, 'y': 2.9}, {'x': 2.5, 'y': 4.5}, {'x': 0.1, 'y': 4.3}, {'x': 0.1, 'y': 4.0}, {'x': 31.1, 'y': 4.7}, {'x': 2.2, 'y': 8.5}, {'x': 1.1, 'y': 8.5}]}]
Expand source code
def Xy2scatterplot(colx=0, coly=1, Xin="X", yin="y", output="scatterplot", **kwargs):
    """
    >>> import numpy as np
    >>> X = np.array([["c1", 2.1, 1.6], ["c2", 3.2, 2.3], ["c3", 7, 3]])
    >>> y = np.array(["a", "b", "c"])
    >>> Xy2scatterplot(X=X, y=y, colx=1, coly=2)
    {'scatterplot': [{'id': 'a', 'data': [{'x': 2.1, 'y': 1.6}]}, {'id': 'b', 'data': [{'x': 3.2, 'y': 2.3}]}, {'id': 'c', 'data': [{'x': 7.0, 'y': 3.0}]}], '_history': Ellipsis}
    >>> Xy2scatterplot(X=X, y=y, colx=1, coly=0)
    {'scatterplot': [{'id': 'a', 'data': [{'x': 2.1, 'y': 0.0}]}, {'id': 'b', 'data': [{'x': 3.2, 'y': 1.0}]}, {'id': 'c', 'data': [{'x': 7.0, 'y': 2.0}]}], '_history': Ellipsis}
    >>> from idict import idict
    >>> from idict.function.dataset import df2Xy
    >>> d = idict.fromtoy(output_format="df") >> df2Xy >> Xy2scatterplot
    >>> d.scatterplot
    [{'id': '0', 'data': [{'x': 5.1, 'y': 6.4}, {'x': 6.1, 'y': 3.6}, {'x': 3.1, 'y': 2.5}, {'x': 9.1, 'y': 3.5}, {'x': 9.1, 'y': 7.2}, {'x': 7.1, 'y': 6.6}, {'x': 2.1, 'y': 0.1}, {'x': 5.1, 'y': 4.5}, {'x': 1.1, 'y': 3.2}, {'x': 3.1, 'y': 2.5}]}, {'id': '1', 'data': [{'x': 1.1, 'y': 2.5}, {'x': 1.1, 'y': 3.5}, {'x': 4.7, 'y': 4.9}, {'x': 8.3, 'y': 2.9}, {'x': 2.5, 'y': 4.5}, {'x': 0.1, 'y': 4.3}, {'x': 0.1, 'y': 4.0}, {'x': 31.1, 'y': 4.7}, {'x': 2.2, 'y': 8.5}, {'x': 1.1, 'y': 8.5}]}]
    """
    X = kwargs[Xin]
    y = kwargs[yin]
    result = []
    for m in dict(zip(y, repeat(None))):
        inner = []
        for k in range(len(X)):
            left = m if isinstance(m, str) else str(float(m))
            if isinstance(y[k], str):
                right = y[k]
            else:
                right = str(float(y[k]))
            if left == right:
                x_ = tofloat(X, k, colx)
                y_ = tofloat(X, k, coly)
                inner.append({"x": x_, "y": y_})
        result.append({"id": str(m), "data": inner})
    return {output: result, "_history": ...}
def tofloat(X, k, col)
Expand source code
def tofloat(X, k, col):
    if hasattr(X, "iloc"):
        X = X.iloc
    val = X[k, col]
    try:
        return float(val)
    except ValueError:
        return float(list(X[:, col]).index(val))
    except TypeError:
        print(
            f"Warning: Wrong type {type(val)} converted to zero. Look for '?' characters if you provided an ARFF file."
        )
        return 0