42 lines
1.0 KiB
Python
42 lines
1.0 KiB
Python
import os
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
# location will help to open files in the same directory as the py-script
|
|
__location__ = os.path.realpath(
|
|
os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
|
|
|
df = pd.read_csv(os.path.join(__location__, 'vl2-varianz-v1.csv'))
|
|
|
|
# Dataframe
|
|
print(df)
|
|
# x freq
|
|
# 0 1 9
|
|
# 1 2 7
|
|
# 2 3 5
|
|
# 3 4 4
|
|
# 4 5 2
|
|
print(df.sum())
|
|
# x 15
|
|
# freq 27
|
|
sums = df.sum()
|
|
print(sums['freq'])
|
|
# 27
|
|
|
|
# Calculate Mean, respecting frequencies
|
|
rowSum = 0
|
|
for index, row in df.iterrows():
|
|
rowSum = rowSum + row.x * row.freq
|
|
mean = rowSum / sums.freq
|
|
print("mean: ", mean)
|
|
|
|
# Calculate Variance, respecting frequencies
|
|
# Sample Variance: ^σ² = (1 / n - 1) * Σ(freq*(x - mean)²)
|
|
variancePart1 = (1 / (sums.freq - 1))
|
|
variancePart2 = 0
|
|
for index, row in df.iterrows():
|
|
variancePart2 = variancePart2 + (row.freq * (row.x - mean)**2)
|
|
print(row['x'], row['freq'], variancePart2)
|
|
variance = variancePart1 * variancePart2
|
|
print("variance: ", variance)
|
|
# variance: 1.703703703703704 |