Module `idict.function.dataset`

Functions to be used directly within an idict workflow

Expand source code

#  Copyright (c) 2021. Davi Pereira dos Santos
#  This file is part of the i-dict project.
#  Please respect the license - more about this in the section (*) below.
#
#  i-dict is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  i-dict is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with i-dict.  If not, see <http://www.gnu.org/licenses/>.
#
#  (*) Removing authorship by any means, e.g. by distribution of derived
#  works or verbatim, obfuscated, compiled or rewritten versions of any
#  part of this work is illegal and it is unethical regarding the effort and
#  time spent here.
#

"""
Functions to be used directly within an idict workflow
"""
from io import StringIO

from idict.function import isplit
import arff as liacarff
import pandas as pd


# library arff2pandas simply disappeared...

def liac2pandas(arff):
    attrs = arff['attributes']
    attrs_t = []
    for attr in attrs:
        if isinstance(attr[1], list):
            attrs_t.append("%s@{%s}" % (attr[0], ','.join(attr[1])))
        else:
            attrs_t.append("%s@%s" % (attr[0], attr[1]))

    df = pd.DataFrame(data=arff['data'], columns=attrs_t)
    return df


def load(fp):
    data = liacarff.load(fp)
    return liac2pandas(data)


def loads(s):
    data = liacarff.loads(s)
    return liac2pandas(data)


def df2liac(df, relation='data', description=''):
    attrs = []
    for col in df.columns:
        attr = col.split('@')
        if attr[1].count('{') > 0 and attr[1].count('}') > 0:
            vals = attr[1].replace('{', '').replace('}', '').split(',')
            attrs.append((attr[0], vals))
        else:
            attrs.append((attr[0], attr[1]))

    data = list(df.values)
    result = {'attributes': attrs, 'data': data, 'description': description, 'relation': relation}
    return result


def dump(df, fp):
    arff = df2liac(df)
    liacarff.dump(arff, fp)


def dumps(df):
    arff = df2liac(df)
    return liacarff.dumps(arff)


def Xy2M(input=["X", "y"], output="M", **kwargs):
    """
    >>> from idict import idict
    >>> from idict.function.dataset import df2Xy
    >>> d = idict.fromtoy(output_format="df")
    >>> d = d >> df2Xy >> Xy2M
    >>> d.M
    array([[ 5.1,  6.4,  0. ],
           [ 1.1,  2.5,  1. ],
           [ 6.1,  3.6,  0. ],
           [ 1.1,  3.5,  1. ],
           [ 3.1,  2.5,  0. ],
           [ 4.7,  4.9,  1. ],
           [ 9.1,  3.5,  0. ],
           [ 8.3,  2.9,  1. ],
           [ 9.1,  7.2,  0. ],
           [ 2.5,  4.5,  1. ],
           [ 7.1,  6.6,  0. ],
           [ 0.1,  4.3,  1. ],
           [ 2.1,  0.1,  0. ],
           [ 0.1,  4. ,  1. ],
           [ 5.1,  4.5,  0. ],
           [31.1,  4.7,  1. ],
           [ 1.1,  3.2,  0. ],
           [ 2.2,  8.5,  1. ],
           [ 3.1,  2.5,  0. ],
           [ 1.1,  8.5,  1. ]])
    """
    import numpy

    return {output: numpy.column_stack((kwargs[input[0]], kwargs[input[1]])), "_history": ...}


def df2Xy(input="df", Xout="X", yout="y", **kwargs):
    """
    >>> from idict import let, idict
    >>> d = idict.fromminiarff()
    >>> d >>= df2Xy
    >>> d.show(colored=False)
    {
        "X": "→(input Xout yout df)",
        "y": "→(input Xout yout df)",
        "_history": "idict-pandas-1.3.4--sklearn-1.0.1--df2Xy",
        "df": "«{'attr1@REAL': {0: 5.1, 1: 3.1}, 'attr2@REAL': {0: 3.5, 1: 4.5}, 'class@{0,1}': {0: '0', 1: '1'}}»",
        "_id": "0GLFP05fQeW7dVFF0m50N8yVTgl-1.0.1--df2Xy",
        "_ids": {
            "X": "NeRaymrp6pcRWPAFc4t8A9.BnboEM0.1BllNL8yo",
            "y": "0u5TDWnV.PZO-bmzBnZveLLnojWlh-1ZsEFsvVoa",
            "_history": "OdE7kT5FmO.bsbvUAU9a3FA-v5EAQaOdihxMcNv6",
            "df": "q3_b71eb05c4be05eba7b6ae5a9245d5dd70b81b (content: 6X_dc8ccea3b2e46f1c78967fae98b692701dc99)"
        }
    }
    >>> d.y
    array([0, 1])
    """
    from sklearn.preprocessing import LabelEncoder

    le = LabelEncoder()
    df = kwargs[input]
    X_ = df.drop(df.columns[[-1]], axis=1)
    y_ = le.fit_transform(df[df.columns[-1]])
    return {Xout: X_, yout: y_, "_history": ...}


def df2arff(input="df", output="arff", **kwargs):
    """
    >>> from idict import let, idict
    >>> d = idict.fromminiarff()
    >>> d >>= let(df2arff, output="a")
    >>> d.show(colored=False)
    {
        "a": "→(input output df)",
        "_history": "idict---------arff2pandas-1.0.1--df2arff",
        "df": "«{'attr1@REAL': {0: 5.1, 1: 3.1}, 'attr2@REAL': {0: 3.5, 1: 4.5}, 'class@{0,1}': {0: '0', 1: '1'}}»",
        "_id": "Ojq9k7ZSbVjwLGlZ7uuqIyhoJPo.p36mAmav2Wul",
        "_ids": {
            "a": "wVPgESiPwobfFLLm591BKw6i2Zn.p36mAmav2Wul",
            "_history": "D2NpAYrhyJW-.nOhh91ttSjrbGw.nZ8qxps9giws",
            "df": "q3_b71eb05c4be05eba7b6ae5a9245d5dd70b81b (content: 6X_dc8ccea3b2e46f1c78967fae98b692701dc99)"
        }
    }
    >>> d.a
    '@RELATION data\\n\\n@ATTRIBUTE attr1 REAL\\n@ATTRIBUTE attr2 REAL\\n@ATTRIBUTE class {0, 1}\\n\\n@DATA\\n5.1,3.5,0\\n3.1,4.5,1\\n'
    """
    return {output: dumps(kwargs[input]), "_history": ...}


def openml(Xout="X", yout="y", name="iris", version=1):
    """
    #>>> from idict import Ø
    #>>> (Ø >> openml).show(colored=False)
    #{
        #"X": "→(Xout yout name version)",
        #"y": "→(Xout yout name version)",
        #"_history": "idict--------------sklearn-1.0.1--openml",
        #"_id": "idict--------------sklearn-1.0.1--openml",
        #"_ids": {
            #"X": "KkSoAvgPmGq52PvPBqGFCEp9cSUCfb7eht9VpUdr",
            #"y": "HOE9E-HnFYen6JNRC9cHgAamJlOWGYTPIVKvQu8W",
            #"_history": "Efw0ebrPTxTiuJ.ZpVt-MvB62ja.kmrOYzrn-ljf"
        #}
    #}
    #>>> (Ø >> openml).X.head()
       #sepallength  sepalwidth  petallength  petalwidth
    #0          5.1         3.5          1.4         0.2
    #1          4.9         3.0          1.4         0.2
    #2          4.7         3.2          1.3         0.2
    #3          4.6         3.1          1.5         0.2
    #4          5.0         3.6          1.4         0.2
    #>>> (Ø >> openml).y.head()
    #0    Iris-setosa
    #1    Iris-setosa
    #2    Iris-setosa
    #3    Iris-setosa
    #4    Iris-setosa
    #Name: class, dtype: category
    #Categories (3, object): ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
    """
    from sklearn.datasets import fetch_openml

    X, y = fetch_openml(name=name, version=version, as_frame=True, return_X_y=True)
    return {Xout: X, yout: y, "_history": ...}


def arff2df(input="arff", output="df", **kwargs):
    r"""
    >>> from idict import let, idict
    >>> d = idict.fromminiarff(output=["arff"], output_format="arff")
    >>> d.arff
    '@RELATION mini\n@ATTRIBUTE attr1\tREAL\n@ATTRIBUTE attr2 \tREAL\n@ATTRIBUTE class \t{0,1}\n@DATA\n5.1,3.5,0\n3.1,4.5,1'
    >>> d >>= arff2df  # doctest: +SKIP
    >>> d.show(colored=False)    # doctest: +SKIP +ELLIPSIS
    {
        "df": "→(input output arff)",
        "_name": "→(input output arff)",
        "_history": "idict---------arff2pandas-1.0.1--arff2df",
        "arff": "@RELATION mini\n@ATTRIBUTE attr1\tREAL\n@ATTRIBUTE attr2 \tREAL\n@ATTRIBUTE class \t{0,1}\n@DATA\n5.1,3.5,0\n3.1,4.5,1",
        "_id": "XraBH1sOCC.9ohqV3hfIRTE5FV3.0.1--arff2df",
        "_ids": {
            "df": "VNy1otmyPSLXWCKAYbZRtNqMdW3.0.1--arff2df",
            "_name": "K4gZ2YDXBHOHwPzYufHChxkfhSz7ZAr--YbuHM-o",
            "_history": "VaU.REn8zFBQtEWngL.FYvWCowT0aeZPmXg35hCs",
            "arff": "Z._c3e2b235b697e9734b9ec13084129dc30e45b (content: Ev_8bb973161e5ae900c5743b3c332b4a64d1955)"
        }
    }
    >>> d.df   # doctest: +SKIP
       attr1@REAL  attr2@REAL class@{0,1}
    0         5.1         3.5           0
    1         3.1         4.5           1
    """
    relation = "<Unnamed>"
    with StringIO() as f:
        f.write(kwargs[input])
        text = f.getvalue()
        df = loads(text)
        for line in isplit(text, "\n"):
            if line[:9].upper() == "@RELATION":
                relation = line[9:].strip()
                break

    return {output: df, "_name": relation, "_history": ...}


Xy2M.metadata = {
    "id": "idict--pandas-1.3.4--sklearn-1.0.1--Xy2M",
    "name": "Xy2M",
    "description": "X,y (pandas/numpy) to M (numpy) column concatenator.",
    "parameters": ...,
    "code": ...,
}
df2Xy.metadata = {
    "id": "idict-pandas-1.3.4--sklearn-1.0.1--df2Xy",
    "name": "df2Xy",
    "description": "DataFrame (pandas) to X,y (pandas) converter.",
    "parameters": ...,
    "code": ...,
}
df2arff.metadata = {
    "id": "idict---------arff2pandas-1.0.1--df2arff",
    "name": "df2arff",
    "description": "DataFrame (pandas) to ARFF converter.",
    "parameters": ...,
    "code": ...,
}
openml.metadata = {
    "id": "idict--------------sklearn-1.0.1--openml",
    "name": "openml",
    "description": "Fetch DataFrame+Series (pandas) from OpenML.",
    "parameters": ...,
    "code": ...,
}
arff2df.metadata = {
    "id": "idict---------arff2pandas-1.0.1--arff2df",
    "name": "arff2df",
    "description": "ARFF to DataFrame (pandas) converter.",
    "parameters": ...,
    "code": ...,
}

Functions

def Xy2M(input=['X', 'y'], output='M', **kwargs)

>>> from idict import idict
>>> from idict.function.dataset import df2Xy
>>> d = idict.fromtoy(output_format="df")
>>> d = d >> df2Xy >> Xy2M
>>> d.M
array([[ 5.1,  6.4,  0. ],
       [ 1.1,  2.5,  1. ],
       [ 6.1,  3.6,  0. ],
       [ 1.1,  3.5,  1. ],
       [ 3.1,  2.5,  0. ],
       [ 4.7,  4.9,  1. ],
       [ 9.1,  3.5,  0. ],
       [ 8.3,  2.9,  1. ],
       [ 9.1,  7.2,  0. ],
       [ 2.5,  4.5,  1. ],
       [ 7.1,  6.6,  0. ],
       [ 0.1,  4.3,  1. ],
       [ 2.1,  0.1,  0. ],
       [ 0.1,  4. ,  1. ],
       [ 5.1,  4.5,  0. ],
       [31.1,  4.7,  1. ],
       [ 1.1,  3.2,  0. ],
       [ 2.2,  8.5,  1. ],
       [ 3.1,  2.5,  0. ],
       [ 1.1,  8.5,  1. ]])

Expand source code

def Xy2M(input=["X", "y"], output="M", **kwargs):
    """
    >>> from idict import idict
    >>> from idict.function.dataset import df2Xy
    >>> d = idict.fromtoy(output_format="df")
    >>> d = d >> df2Xy >> Xy2M
    >>> d.M
    array([[ 5.1,  6.4,  0. ],
           [ 1.1,  2.5,  1. ],
           [ 6.1,  3.6,  0. ],
           [ 1.1,  3.5,  1. ],
           [ 3.1,  2.5,  0. ],
           [ 4.7,  4.9,  1. ],
           [ 9.1,  3.5,  0. ],
           [ 8.3,  2.9,  1. ],
           [ 9.1,  7.2,  0. ],
           [ 2.5,  4.5,  1. ],
           [ 7.1,  6.6,  0. ],
           [ 0.1,  4.3,  1. ],
           [ 2.1,  0.1,  0. ],
           [ 0.1,  4. ,  1. ],
           [ 5.1,  4.5,  0. ],
           [31.1,  4.7,  1. ],
           [ 1.1,  3.2,  0. ],
           [ 2.2,  8.5,  1. ],
           [ 3.1,  2.5,  0. ],
           [ 1.1,  8.5,  1. ]])
    """
    import numpy

    return {output: numpy.column_stack((kwargs[input[0]], kwargs[input[1]])), "_history": ...}

def arff2df(input='arff', output='df', **kwargs)

>>> from idict import let, idict
>>> d = idict.fromminiarff(output=["arff"], output_format="arff")
>>> d.arff
'@RELATION mini\n@ATTRIBUTE attr1\tREAL\n@ATTRIBUTE attr2 \tREAL\n@ATTRIBUTE class \t{0,1}\n@DATA\n5.1,3.5,0\n3.1,4.5,1'
>>> d >>= arff2df  # doctest: +SKIP
>>> d.show(colored=False)    # doctest: +SKIP +ELLIPSIS
{
    "df": "→(input output arff)",
    "_name": "→(input output arff)",
    "_history": "idict---------arff2pandas-1.0.1--arff2df",
    "arff": "@RELATION mini\n@ATTRIBUTE attr1\tREAL\n@ATTRIBUTE attr2 \tREAL\n@ATTRIBUTE class \t{0,1}\n@DATA\n5.1,3.5,0\n3.1,4.5,1",
    "_id": "XraBH1sOCC.9ohqV3hfIRTE5FV3.0.1--arff2df",
    "_ids": {
        "df": "VNy1otmyPSLXWCKAYbZRtNqMdW3.0.1--arff2df",
        "_name": "K4gZ2YDXBHOHwPzYufHChxkfhSz7ZAr--YbuHM-o",
        "_history": "VaU.REn8zFBQtEWngL.FYvWCowT0aeZPmXg35hCs",
        "arff": "Z._c3e2b235b697e9734b9ec13084129dc30e45b (content: Ev_8bb973161e5ae900c5743b3c332b4a64d1955)"
    }
}
>>> d.df   # doctest: +SKIP
   attr1@REAL  attr2@REAL class@{0,1}
0         5.1         3.5           0
1         3.1         4.5           1

Expand source code

def arff2df(input="arff", output="df", **kwargs):
    r"""
    >>> from idict import let, idict
    >>> d = idict.fromminiarff(output=["arff"], output_format="arff")
    >>> d.arff
    '@RELATION mini\n@ATTRIBUTE attr1\tREAL\n@ATTRIBUTE attr2 \tREAL\n@ATTRIBUTE class \t{0,1}\n@DATA\n5.1,3.5,0\n3.1,4.5,1'
    >>> d >>= arff2df  # doctest: +SKIP
    >>> d.show(colored=False)    # doctest: +SKIP +ELLIPSIS
    {
        "df": "→(input output arff)",
        "_name": "→(input output arff)",
        "_history": "idict---------arff2pandas-1.0.1--arff2df",
        "arff": "@RELATION mini\n@ATTRIBUTE attr1\tREAL\n@ATTRIBUTE attr2 \tREAL\n@ATTRIBUTE class \t{0,1}\n@DATA\n5.1,3.5,0\n3.1,4.5,1",
        "_id": "XraBH1sOCC.9ohqV3hfIRTE5FV3.0.1--arff2df",
        "_ids": {
            "df": "VNy1otmyPSLXWCKAYbZRtNqMdW3.0.1--arff2df",
            "_name": "K4gZ2YDXBHOHwPzYufHChxkfhSz7ZAr--YbuHM-o",
            "_history": "VaU.REn8zFBQtEWngL.FYvWCowT0aeZPmXg35hCs",
            "arff": "Z._c3e2b235b697e9734b9ec13084129dc30e45b (content: Ev_8bb973161e5ae900c5743b3c332b4a64d1955)"
        }
    }
    >>> d.df   # doctest: +SKIP
       attr1@REAL  attr2@REAL class@{0,1}
    0         5.1         3.5           0
    1         3.1         4.5           1
    """
    relation = "<Unnamed>"
    with StringIO() as f:
        f.write(kwargs[input])
        text = f.getvalue()
        df = loads(text)
        for line in isplit(text, "\n"):
            if line[:9].upper() == "@RELATION":
                relation = line[9:].strip()
                break

    return {output: df, "_name": relation, "_history": ...}

def df2Xy(input='df', Xout='X', yout='y', **kwargs)

>>> from idict import let, idict
>>> d = idict.fromminiarff()
>>> d >>= df2Xy
>>> d.show(colored=False)
{
    "X": "→(input Xout yout df)",
    "y": "→(input Xout yout df)",
    "_history": "idict-pandas-1.3.4--sklearn-1.0.1--df2Xy",
    "df": "«{'attr1@REAL': {0: 5.1, 1: 3.1}, 'attr2@REAL': {0: 3.5, 1: 4.5}, 'class@{0,1}': {0: '0', 1: '1'}}»",
    "_id": "0GLFP05fQeW7dVFF0m50N8yVTgl-1.0.1--df2Xy",
    "_ids": {
        "X": "NeRaymrp6pcRWPAFc4t8A9.BnboEM0.1BllNL8yo",
        "y": "0u5TDWnV.PZO-bmzBnZveLLnojWlh-1ZsEFsvVoa",
        "_history": "OdE7kT5FmO.bsbvUAU9a3FA-v5EAQaOdihxMcNv6",
        "df": "q3_b71eb05c4be05eba7b6ae5a9245d5dd70b81b (content: 6X_dc8ccea3b2e46f1c78967fae98b692701dc99)"
    }
}
>>> d.y
array([0, 1])

Expand source code

def df2Xy(input="df", Xout="X", yout="y", **kwargs):
    """
    >>> from idict import let, idict
    >>> d = idict.fromminiarff()
    >>> d >>= df2Xy
    >>> d.show(colored=False)
    {
        "X": "→(input Xout yout df)",
        "y": "→(input Xout yout df)",
        "_history": "idict-pandas-1.3.4--sklearn-1.0.1--df2Xy",
        "df": "«{'attr1@REAL': {0: 5.1, 1: 3.1}, 'attr2@REAL': {0: 3.5, 1: 4.5}, 'class@{0,1}': {0: '0', 1: '1'}}»",
        "_id": "0GLFP05fQeW7dVFF0m50N8yVTgl-1.0.1--df2Xy",
        "_ids": {
            "X": "NeRaymrp6pcRWPAFc4t8A9.BnboEM0.1BllNL8yo",
            "y": "0u5TDWnV.PZO-bmzBnZveLLnojWlh-1ZsEFsvVoa",
            "_history": "OdE7kT5FmO.bsbvUAU9a3FA-v5EAQaOdihxMcNv6",
            "df": "q3_b71eb05c4be05eba7b6ae5a9245d5dd70b81b (content: 6X_dc8ccea3b2e46f1c78967fae98b692701dc99)"
        }
    }
    >>> d.y
    array([0, 1])
    """
    from sklearn.preprocessing import LabelEncoder

    le = LabelEncoder()
    df = kwargs[input]
    X_ = df.drop(df.columns[[-1]], axis=1)
    y_ = le.fit_transform(df[df.columns[-1]])
    return {Xout: X_, yout: y_, "_history": ...}

def df2arff(input='df', output='arff', **kwargs)

>>> from idict import let, idict
>>> d = idict.fromminiarff()
>>> d >>= let(df2arff, output="a")
>>> d.show(colored=False)
{
    "a": "→(input output df)",
    "_history": "idict---------arff2pandas-1.0.1--df2arff",
    "df": "«{'attr1@REAL': {0: 5.1, 1: 3.1}, 'attr2@REAL': {0: 3.5, 1: 4.5}, 'class@{0,1}': {0: '0', 1: '1'}}»",
    "_id": "Ojq9k7ZSbVjwLGlZ7uuqIyhoJPo.p36mAmav2Wul",
    "_ids": {
        "a": "wVPgESiPwobfFLLm591BKw6i2Zn.p36mAmav2Wul",
        "_history": "D2NpAYrhyJW-.nOhh91ttSjrbGw.nZ8qxps9giws",
        "df": "q3_b71eb05c4be05eba7b6ae5a9245d5dd70b81b (content: 6X_dc8ccea3b2e46f1c78967fae98b692701dc99)"
    }
}
>>> d.a
'@RELATION data\n\n@ATTRIBUTE attr1 REAL\n@ATTRIBUTE attr2 REAL\n@ATTRIBUTE class {0, 1}\n\n@DATA\n5.1,3.5,0\n3.1,4.5,1\n'

Expand source code

def df2arff(input="df", output="arff", **kwargs):
    """
    >>> from idict import let, idict
    >>> d = idict.fromminiarff()
    >>> d >>= let(df2arff, output="a")
    >>> d.show(colored=False)
    {
        "a": "→(input output df)",
        "_history": "idict---------arff2pandas-1.0.1--df2arff",
        "df": "«{'attr1@REAL': {0: 5.1, 1: 3.1}, 'attr2@REAL': {0: 3.5, 1: 4.5}, 'class@{0,1}': {0: '0', 1: '1'}}»",
        "_id": "Ojq9k7ZSbVjwLGlZ7uuqIyhoJPo.p36mAmav2Wul",
        "_ids": {
            "a": "wVPgESiPwobfFLLm591BKw6i2Zn.p36mAmav2Wul",
            "_history": "D2NpAYrhyJW-.nOhh91ttSjrbGw.nZ8qxps9giws",
            "df": "q3_b71eb05c4be05eba7b6ae5a9245d5dd70b81b (content: 6X_dc8ccea3b2e46f1c78967fae98b692701dc99)"
        }
    }
    >>> d.a
    '@RELATION data\\n\\n@ATTRIBUTE attr1 REAL\\n@ATTRIBUTE attr2 REAL\\n@ATTRIBUTE class {0, 1}\\n\\n@DATA\\n5.1,3.5,0\\n3.1,4.5,1\\n'
    """
    return {output: dumps(kwargs[input]), "_history": ...}

def df2liac(df, relation='data', description='')

Expand source code

def df2liac(df, relation='data', description=''):
    attrs = []
    for col in df.columns:
        attr = col.split('@')
        if attr[1].count('{') > 0 and attr[1].count('}') > 0:
            vals = attr[1].replace('{', '').replace('}', '').split(',')
            attrs.append((attr[0], vals))
        else:
            attrs.append((attr[0], attr[1]))

    data = list(df.values)
    result = {'attributes': attrs, 'data': data, 'description': description, 'relation': relation}
    return result

def dump(df, fp)

Expand source code

def dump(df, fp):
    arff = df2liac(df)
    liacarff.dump(arff, fp)

def dumps(df)

Expand source code

def dumps(df):
    arff = df2liac(df)
    return liacarff.dumps(arff)

def liac2pandas(arff)

Expand source code

def liac2pandas(arff):
    attrs = arff['attributes']
    attrs_t = []
    for attr in attrs:
        if isinstance(attr[1], list):
            attrs_t.append("%s@{%s}" % (attr[0], ','.join(attr[1])))
        else:
            attrs_t.append("%s@%s" % (attr[0], attr[1]))

    df = pd.DataFrame(data=arff['data'], columns=attrs_t)
    return df

def load(fp)

Expand source code

def load(fp):
    data = liacarff.load(fp)
    return liac2pandas(data)

def loads(s)

Expand source code

def loads(s):
    data = liacarff.loads(s)
    return liac2pandas(data)

def openml(Xout='X', yout='y', name='iris', version=1)

>>> from idict import Ø

>>> (Ø >> openml).show(colored=False)

{

#"X": "→(Xout yout name version)",
#"y": "→(Xout yout name version)",
#"_history": "idict--------------sklearn-1.0.1--openml",
#"_id": "idict--------------sklearn-1.0.1--openml",
#"_ids": {
    #"X": "KkSoAvgPmGq52PvPBqGFCEp9cSUCfb7eht9VpUdr",
    #"y": "HOE9E-HnFYen6JNRC9cHgAamJlOWGYTPIVKvQu8W",
    #"_history": "Efw0ebrPTxTiuJ.ZpVt-MvB62ja.kmrOYzrn-ljf"
#}

}

>>> (Ø >> openml).X.head()

#sepallength sepalwidth petallength petalwidth

0 5.1 3.5 1.4 0.2

1 4.9 3.0 1.4 0.2

2 4.7 3.2 1.3 0.2

3 4.6 3.1 1.5 0.2

4 5.0 3.6 1.4 0.2

>>> (Ø >> openml).y.head()

0 Iris-setosa

1 Iris-setosa

2 Iris-setosa

3 Iris-setosa

4 Iris-setosa

Name: class, dtype: category

Categories (3, object): ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

Expand source code

def openml(Xout="X", yout="y", name="iris", version=1):
    """
    #>>> from idict import Ø
    #>>> (Ø >> openml).show(colored=False)
    #{
        #"X": "→(Xout yout name version)",
        #"y": "→(Xout yout name version)",
        #"_history": "idict--------------sklearn-1.0.1--openml",
        #"_id": "idict--------------sklearn-1.0.1--openml",
        #"_ids": {
            #"X": "KkSoAvgPmGq52PvPBqGFCEp9cSUCfb7eht9VpUdr",
            #"y": "HOE9E-HnFYen6JNRC9cHgAamJlOWGYTPIVKvQu8W",
            #"_history": "Efw0ebrPTxTiuJ.ZpVt-MvB62ja.kmrOYzrn-ljf"
        #}
    #}
    #>>> (Ø >> openml).X.head()
       #sepallength  sepalwidth  petallength  petalwidth
    #0          5.1         3.5          1.4         0.2
    #1          4.9         3.0          1.4         0.2
    #2          4.7         3.2          1.3         0.2
    #3          4.6         3.1          1.5         0.2
    #4          5.0         3.6          1.4         0.2
    #>>> (Ø >> openml).y.head()
    #0    Iris-setosa
    #1    Iris-setosa
    #2    Iris-setosa
    #3    Iris-setosa
    #4    Iris-setosa
    #Name: class, dtype: category
    #Categories (3, object): ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
    """
    from sklearn.datasets import fetch_openml

    X, y = fetch_openml(name=name, version=version, as_frame=True, return_X_y=True)
    return {Xout: X, yout: y, "_history": ...}