Module idict.data.load

Expand source code
#  Copyright (c) 2021. Davi Pereira dos Santos
#  This file is part of the i-dict project.
#  Please respect the license - more about this in the section (*) below.
#
#  i-dict is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  i-dict is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with i-dict.  If not, see <http://www.gnu.org/licenses/>.
#
#  (*) Removing authorship by any means, e.g. by distribution of derived
#  works or verbatim, obfuscated, compiled or rewritten versions of any
#  part of this work is illegal and it is unethical regarding the effort and
#  time spent here.
#
from idict.function.dataset import load


def file2df(name):
    if name.endswith(".arff"):
        relation = None
        with open(name) as f:
            for line in f:
                if line[:9].upper() == "@RELATION":
                    relation = line[9:-1]
                    break
        with open(name) as f:
            df = load(f)
        return df, relation or name
    elif name.endswith(".csv"):
        from pandas import read_csv

        return read_csv(name), name
    else:  # pragma: no cover
        raise Exception(f"Unknown extension {name.split('.')[-1]}.")

Functions

def file2df(name)
Expand source code
def file2df(name):
    if name.endswith(".arff"):
        relation = None
        with open(name) as f:
            for line in f:
                if line[:9].upper() == "@RELATION":
                    relation = line[9:-1]
                    break
        with open(name) as f:
            df = load(f)
        return df, relation or name
    elif name.endswith(".csv"):
        from pandas import read_csv

        return read_csv(name), name
    else:  # pragma: no cover
        raise Exception(f"Unknown extension {name.split('.')[-1]}.")