Source code for streamad.util.dataset

import warnings
from os.path import dirname, join
from typing import Union

import numpy as np
import pandas as pd

warnings.simplefilter(action="ignore", category=FutureWarning)


class DS:
    def __init__(self) -> None:

        self.data = None
        self.date = None
        self.label = None
        self.features = None
        self.names = None

    def preprocess(self) -> None:
        self.preprocess_data()
        self.preprocess_timestamp()
        self.preprocess_label()
        self.preprocess_feature()

    def preprocess_data(self) -> None:
        if type(self.path) == str:
            try:
                self.data = pd.read_csv(self.path)
            except FileExistsError:
                print("Cannot read this file:", self.path)
        elif type(self.path) == np.ndarray:
            self.data = pd.DataFrame(self.path)
        elif type(self.path) == pd.DataFrame:
            self.data = self.path
        self.names = self.data.columns.values

    def preprocess_timestamp(self) -> None:
        if "timestamp" in self.names.tolist():
            self.date = self.data["timestamp"].values
        else:
            self.date = self.data.index.values

    def preprocess_label(self) -> None:
        if "label" in self.names.tolist():
            self.label = np.array(self.data["label"].values)

    def preprocess_feature(self) -> None:
        self.features = np.setdiff1d(
            self.names, np.array(["label", "timestamp"])
        )
        self.data = np.array(self.data[self.features])


[docs]class MultivariateDS(DS):
    """
    Load multivariate dataset.
    """

    def __init__(self, has_names=False) -> None:
        super().__init__()
        module_path = dirname(__file__)
        self.path = join(module_path, "data", "multiDS.csv")
        self.preprocess()


[docs]class UnivariateDS(DS):
    """
    Load univariate dataset.
    """

    def __init__(self) -> None:
        super().__init__()
        module_path = dirname(__file__)
        self.path = join(module_path, "data", "uniDS.csv")
        self.preprocess()


[docs]class CustomDS(DS):
    """
    Load custom dataset.
    Args:
        f_path (Union[str, np.ndarray]): Dataset or its path.
        label (np.ndarray, optional): Anomaly labels for dataset. Defaults to None.
    """

    def __init__(
        self, f_path: Union[str, np.ndarray], label: np.ndarray = None
    ):

        super().__init__()
        self.path = f_path
        self.label = label
        self.preprocess()