Skip to content

statistic_utils

iqr_calculation(input_data, percent=50)

return quarter of the input_data

Source code in exe_kg_lib/utils/task_utils/statistic_utils.py
54
55
56
def iqr_calculation(input_data: np.ndarray, percent: int = 50) -> np.ndarray:
    """return quarter of the input_data"""
    return np.percentile(input_data, percent)

outlier_calculation(input, iq1=None, iq3=None)

return the outliers in the data

Source code in exe_kg_lib/utils/task_utils/statistic_utils.py
59
60
61
62
63
64
65
66
67
68
def outlier_calculation(input: np.ndarray, iq1: float = None, iq3: float = None) -> np.ndarray:
    """return the outliers in the data"""
    iq1 = iqr_calculation(input, 25) if (not iq1) else iq1
    iq3 = iqr_calculation(input, 75) if (not iq3) else iq3
    median = np.median(input)
    iqr = iq3 - iq1
    high_outliers = input < median - 1.5 * iqr
    low_outliers = input > median + 1.5 * iqr
    outlier_rows = [low_outliers.iloc[i] or high_outliers.iloc[i] for i in range(len(input))]
    return outlier_rows

trend_calculation(input_data, half_window_size=2, padding='same')

calculate the trend of the data, which is the sliding-window average

Source code in exe_kg_lib/utils/task_utils/statistic_utils.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def trend_calculation(
    input_data: np.ndarray,
    half_window_size: int = 2,
    padding: str = "same",
) -> np.ndarray:
    """calculate the trend of the data, which is the sliding-window average"""

    def padding_input(input: np.ndarray, half_window_size: int, padding: str = "same") -> np.ndarray:
        """padding the beginning and end of the input data by the beginning or end value"""
        if padding == "same":
            begin_padding = pd.Series([input.iloc[0]] * half_window_size)
            end_padding = pd.Series([input.iloc[len(input) - 1]] * half_window_size)
            output = pd.concat([begin_padding, input, end_padding]).reset_index().drop("index", axis=1)
            return output

        else:
            return 0

    input = padding_input(input_data, half_window_size)

    output = 0
    input_len = len(input) - 2 * half_window_size
    for i in range(2 * half_window_size + 1):
        output += np.array(input.iloc[i : i + input_len])

    output = output / (2 * half_window_size + 1)

    return output

Last update: October 20, 2023