Source code for streamad.util.math_toolkit
#!/usr/bin/env python
# coding=utf-8
#
# Author: liufr
# Github: https://github.com/Fengrui-Liu
# LastEditTime: 2021-01-09 19:24:46
# Copyright 2021 liufr
# Description:
import collections
import math
import numpy as np
[docs]class StreamStatistic:
"""Data statistics for the streaming data."""
[docs] def __init__(self):
"""Statistic for stream data
We support max, min, sum, mean, sum of squares, var, std and standard scaler for streaming data.
"""
self._is_uni = False
self._num_items = 0
self._max = collections.defaultdict(lambda: -math.inf)
self._min = collections.defaultdict(lambda: math.inf)
self._sum = collections.defaultdict(float)
self._mean = collections.defaultdict(float)
self._sum_squares = collections.defaultdict(float)
self._var = collections.defaultdict(float)
self._std = collections.defaultdict(float)
[docs] def update(self, X: np.ndarray):
"""Update a pd.Series to stream
Args:
X (np.ndarray): An item from StreamGenerator
"""
self._num_items += 1
if not isinstance(X, np.ndarray):
X = [X]
self._is_uni = True
tmp = collections.defaultdict(float)
for index, item in enumerate(X):
self._max[index] = self._max[index] if self._max[index] > item else item
self._min[index] = self._min[index] if self._min[index] < item else item
self._sum[index] += X[index]
old_mean = self._mean[index]
tmp[index] = item - self._mean[index]
self._mean[index] = self._sum[index] / self._num_items
self._sum_squares[index] += (X[index] - old_mean) * (
X[index] - self._mean[index]
)
self._var[index] = self._sum_squares[index] / self._num_items
self._std[index] = math.sqrt(self._var[index])
[docs] def get_max(self):
"""
Get max stattistic.
"""
result = [_ for _ in self._max.values()]
if self._is_uni:
return result[0]
return np.array(result)
[docs] def get_min(self):
"""
Get min stattistic.
"""
result = [_ for _ in self._min.values()]
if self._is_uni:
return result[0]
return np.array(result)
[docs] def get_mean(self):
"""
Get mean stattistic.
"""
result = [_ for _ in self._mean.values()]
if self._is_uni:
return result[0]
return np.array(result)
[docs] def get_std(self):
"""
Get max stattistic.
"""
result = [_ for _ in self._std.values()]
if self._is_uni:
return result[0]
return np.array(result)
[docs] def get_sum(self):
"""
Get sum stattistic.
"""
result = [_ for _ in self._sum.values()]
if self._is_uni:
return result[0]
return np.array(result)
[docs] def get_var(self):
"""
Get var stattistic.
"""
result = [_ for _ in self._var.values()]
if self._is_uni:
return result[0]
return np.array(result)