Module idict.function.evaluation

Expand source code
#  Copyright (c) 2021. Davi Pereira dos Santos
#  This file is part of the i-dict project.
#  Please respect the license - more about this in the section (*) below.
#
#  i-dict is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  i-dict is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with i-dict.  If not, see <http://www.gnu.org/licenses/>.
#
#  (*) Removing authorship by any means, e.g. by distribution of derived
#  works or verbatim, obfuscated, compiled or rewritten versions of any
#  part of this work is illegal and it is unethical regarding the effort and
#  time spent here.
#
from sklearn.model_selection import train_test_split


# TODO (minor): enforce versions by runtime checking (sklearn etc)


def split(
    input=["X", "y"],
    output=["Xtr", "Xts", "ytr", "yts"],
    config={"test_size": 0.33, "shuffle": True, "stratify": "y", "random_state": 0},
    **kwargs,
):
    r"""
    >>> from idict import idict, let
    >>> d = idict.fromtoy() >> split  # doctest: +SKIP
    >>> d.show(colored=False)  # doctest: +SKIP
    {
        "Xtr": "→(input output config X y)",
        "Xts": "→(input output config X y)",
        "ytr": "→(input output config X y)",
        "yts": "→(input output config X y)",
        "_history": "idict--------------sklearn-1.0.1---split",
        "X": "«{'attr1': {0: 5.1, 1: 1.1, 2: 6.1, 3: 1.1, 4: 3.1, 5: 4.7, 6: 9.1, 7: 8.3, 8: 9.1, 9: 2.5, 10: 7.1, 11: 0.1, 12: 2.1, 13: 0.1, 14: 5.1, 15: 31.1, 16: 1.1, 17: 2.2, 18: 3.1, 19: 1.1}, 'attr2': {0: 6.4, 1: 2.5, 2: 3.6, 3: 3.5, 4: 2.5, 5: 4.9, 6: 3.5, 7: 2.9, 8: 7.2, 9: 4.5, 10: 6.6, 11: 4.3, 12: 0.1, 13: 4.0, 14: 4.5, 15: 4.7, 16: 3.2, 17: 8.5, 18: 2.5, 19: 8.5}}»",
        "y": "«[0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1]»",
        "_id": "8eGk98oY-ZJCTDqlI6cQpv.ScL12.0.1---split",
        "_ids": {
            "Xtr": "sgAswz-TNQOyqNXEOZlDdPMGJzavRgf.FjBFx6Ju",
            "Xts": "fb8kj0bb0D8DvZUHPGTTPC8buLIAQJv-25ZHJT65",
            "ytr": "o.tZeOrqlsvd7-ms6Z6utR3HQEYTPTaN-srQovsp",
            "yts": "-VNSnTA0JvBAZGBoUKB9h0eEomYUqh5jiBwiNT1w",
            "_history": "ZVpX52If8SF51gVRHZMdRxUllQxT.4lyvEaXf1Pt",
            "X": "md_cb8b0c76becc1de32236764e91e8e457e826f (content: 34_1738c83af436029507def2710bc5125f58d0e)",
            "y": "Kp_697ef70c1a293f50ed352ea5775bba6d9b1f4 (content: S0_b6360d62ccafa275d4051dfd02b939104feac)"
        }
    }
    >>> d.yts  # doctest: +SKIP
    array([1, 0, 1, 0, 0, 0, 1])
    """
    # Multidynamic input is only detected when the kwargs index is also indexed by something.
    multidynamic_input = {}
    for i, _ in enumerate(input):
        multidynamic_input[input[i]] = kwargs[input[i]]

    if "stratify" in config and isinstance(config["stratify"], str):
        if config["stratify"] not in input:  # pragma: no cover
            raise Exception(f"Missing field {config['stratify']} for stratification.")
        config["stratify"] = multidynamic_input[config["stratify"]]
    result = train_test_split(*multidynamic_input.values(), **config)
    out = {k: v for k, v in zip(output, result)}
    # Multidynamic output cannot be detected, so it can only be defined as metadata.
    return out


split.metadata = {
    "id": "idict--------------sklearn-1.0.1---split",
    "name": "split",
    "description": "Split data in two sets.",
    "parameters": ...,
    "code": ...,
    "output": {"fields": [], "auto": ["_history"], "meta": [], "dynamic": ["output"]},
}

Functions

def split(input=['X', 'y'], output=['Xtr', 'Xts', 'ytr', 'yts'], config={'test_size': 0.33, 'shuffle': True, 'stratify': 'y', 'random_state': 0}, **kwargs)
>>> from idict import idict, let
>>> d = idict.fromtoy() >> split  # doctest: +SKIP
>>> d.show(colored=False)  # doctest: +SKIP
{
    "Xtr": "→(input output config X y)",
    "Xts": "→(input output config X y)",
    "ytr": "→(input output config X y)",
    "yts": "→(input output config X y)",
    "_history": "idict--------------sklearn-1.0.1---split",
    "X": "«{'attr1': {0: 5.1, 1: 1.1, 2: 6.1, 3: 1.1, 4: 3.1, 5: 4.7, 6: 9.1, 7: 8.3, 8: 9.1, 9: 2.5, 10: 7.1, 11: 0.1, 12: 2.1, 13: 0.1, 14: 5.1, 15: 31.1, 16: 1.1, 17: 2.2, 18: 3.1, 19: 1.1}, 'attr2': {0: 6.4, 1: 2.5, 2: 3.6, 3: 3.5, 4: 2.5, 5: 4.9, 6: 3.5, 7: 2.9, 8: 7.2, 9: 4.5, 10: 6.6, 11: 4.3, 12: 0.1, 13: 4.0, 14: 4.5, 15: 4.7, 16: 3.2, 17: 8.5, 18: 2.5, 19: 8.5}}»",
    "y": "«[0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1]»",
    "_id": "8eGk98oY-ZJCTDqlI6cQpv.ScL12.0.1---split",
    "_ids": {
        "Xtr": "sgAswz-TNQOyqNXEOZlDdPMGJzavRgf.FjBFx6Ju",
        "Xts": "fb8kj0bb0D8DvZUHPGTTPC8buLIAQJv-25ZHJT65",
        "ytr": "o.tZeOrqlsvd7-ms6Z6utR3HQEYTPTaN-srQovsp",
        "yts": "-VNSnTA0JvBAZGBoUKB9h0eEomYUqh5jiBwiNT1w",
        "_history": "ZVpX52If8SF51gVRHZMdRxUllQxT.4lyvEaXf1Pt",
        "X": "md_cb8b0c76becc1de32236764e91e8e457e826f (content: 34_1738c83af436029507def2710bc5125f58d0e)",
        "y": "Kp_697ef70c1a293f50ed352ea5775bba6d9b1f4 (content: S0_b6360d62ccafa275d4051dfd02b939104feac)"
    }
}
>>> d.yts  # doctest: +SKIP
array([1, 0, 1, 0, 0, 0, 1])
Expand source code
def split(
    input=["X", "y"],
    output=["Xtr", "Xts", "ytr", "yts"],
    config={"test_size": 0.33, "shuffle": True, "stratify": "y", "random_state": 0},
    **kwargs,
):
    r"""
    >>> from idict import idict, let
    >>> d = idict.fromtoy() >> split  # doctest: +SKIP
    >>> d.show(colored=False)  # doctest: +SKIP
    {
        "Xtr": "→(input output config X y)",
        "Xts": "→(input output config X y)",
        "ytr": "→(input output config X y)",
        "yts": "→(input output config X y)",
        "_history": "idict--------------sklearn-1.0.1---split",
        "X": "«{'attr1': {0: 5.1, 1: 1.1, 2: 6.1, 3: 1.1, 4: 3.1, 5: 4.7, 6: 9.1, 7: 8.3, 8: 9.1, 9: 2.5, 10: 7.1, 11: 0.1, 12: 2.1, 13: 0.1, 14: 5.1, 15: 31.1, 16: 1.1, 17: 2.2, 18: 3.1, 19: 1.1}, 'attr2': {0: 6.4, 1: 2.5, 2: 3.6, 3: 3.5, 4: 2.5, 5: 4.9, 6: 3.5, 7: 2.9, 8: 7.2, 9: 4.5, 10: 6.6, 11: 4.3, 12: 0.1, 13: 4.0, 14: 4.5, 15: 4.7, 16: 3.2, 17: 8.5, 18: 2.5, 19: 8.5}}»",
        "y": "«[0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1]»",
        "_id": "8eGk98oY-ZJCTDqlI6cQpv.ScL12.0.1---split",
        "_ids": {
            "Xtr": "sgAswz-TNQOyqNXEOZlDdPMGJzavRgf.FjBFx6Ju",
            "Xts": "fb8kj0bb0D8DvZUHPGTTPC8buLIAQJv-25ZHJT65",
            "ytr": "o.tZeOrqlsvd7-ms6Z6utR3HQEYTPTaN-srQovsp",
            "yts": "-VNSnTA0JvBAZGBoUKB9h0eEomYUqh5jiBwiNT1w",
            "_history": "ZVpX52If8SF51gVRHZMdRxUllQxT.4lyvEaXf1Pt",
            "X": "md_cb8b0c76becc1de32236764e91e8e457e826f (content: 34_1738c83af436029507def2710bc5125f58d0e)",
            "y": "Kp_697ef70c1a293f50ed352ea5775bba6d9b1f4 (content: S0_b6360d62ccafa275d4051dfd02b939104feac)"
        }
    }
    >>> d.yts  # doctest: +SKIP
    array([1, 0, 1, 0, 0, 0, 1])
    """
    # Multidynamic input is only detected when the kwargs index is also indexed by something.
    multidynamic_input = {}
    for i, _ in enumerate(input):
        multidynamic_input[input[i]] = kwargs[input[i]]

    if "stratify" in config and isinstance(config["stratify"], str):
        if config["stratify"] not in input:  # pragma: no cover
            raise Exception(f"Missing field {config['stratify']} for stratification.")
        config["stratify"] = multidynamic_input[config["stratify"]]
    result = train_test_split(*multidynamic_input.values(), **config)
    out = {k: v for k, v in zip(output, result)}
    # Multidynamic output cannot be detected, so it can only be defined as metadata.
    return out