Source code for streamad.util.dataset

import warnings
from os.path import dirname, join
from typing import Union

import numpy as np
import pandas as pd

warnings.simplefilter(action="ignore", category=FutureWarning)


class DS:
    def __init__(self) -> None:

        self.data = None
        self.date = None
        self.label = None
        self.features = None
        self.names = None

    def preprocess(self) -> None:
        self.preprocess_data()
        self.preprocess_timestamp()
        self.preprocess_label()
        self.preprocess_feature()

    def preprocess_data(self) -> None:
        if type(self.path) == str:
            try:
                self.data = pd.read_csv(self.path)
            except FileExistsError:
                print("Cannot read this file:", self.path)
        elif type(self.path) == np.ndarray:
            self.data = pd.DataFrame(self.path)
        elif type(self.path) == pd.DataFrame:
            self.data = self.path
        self.names = self.data.columns.values

    def preprocess_timestamp(self) -> None:
        if "timestamp" in self.names.tolist():
            self.date = self.data["timestamp"].values
        else:
            self.date = self.data.index.values

    def preprocess_label(self) -> None:
        if "label" in self.names.tolist():
            self.label = np.array(self.data["label"].values)

    def preprocess_feature(self) -> None:
        self.features = np.setdiff1d(
            self.names, np.array(["label", "timestamp"])
        )
        self.data = np.array(self.data[self.features])


[docs]class MultivariateDS(DS): """ Load multivariate dataset. """ def __init__(self, has_names=False) -> None: super().__init__() module_path = dirname(__file__) self.path = join(module_path, "data", "multiDS.csv") self.preprocess()
[docs]class UnivariateDS(DS): """ Load univariate dataset. """ def __init__(self) -> None: super().__init__() module_path = dirname(__file__) self.path = join(module_path, "data", "uniDS.csv") self.preprocess()
[docs]class CustomDS(DS): """ Load custom dataset. Args: f_path (Union[str, np.ndarray]): Dataset or its path. label (np.ndarray, optional): Anomaly labels for dataset. Defaults to None. """ def __init__( self, f_path: Union[str, np.ndarray], label: np.ndarray = None ): super().__init__() self.path = f_path self.label = label self.preprocess()