Files
ProgrammierungUndDatenanalyse/Sonstiges/STAT2/vl2-varianz-v1.py
2021-11-14 17:24:14 +01:00

42 lines
1.0 KiB
Python

import os
import pandas as pd
import numpy as np
# location will help to open files in the same directory as the py-script
__location__ = os.path.realpath(
os.path.join(os.getcwd(), os.path.dirname(__file__)))
df = pd.read_csv(os.path.join(__location__, 'vl2-varianz-v1.csv'))
# Dataframe
print(df)
# x freq
# 0 1 9
# 1 2 7
# 2 3 5
# 3 4 4
# 4 5 2
print(df.sum())
# x 15
# freq 27
sums = df.sum()
print(sums['freq'])
# 27
# Calculate Mean, respecting frequencies
rowSum = 0
for index, row in df.iterrows():
rowSum = rowSum + row.x * row.freq
mean = rowSum / sums.freq
print("mean: ", mean)
# Calculate Variance, respecting frequencies
# Sample Variance: ^σ² = (1 / n - 1) * Σ(freq*(x - mean)²)
variancePart1 = (1 / (sums.freq - 1))
variancePart2 = 0
for index, row in df.iterrows():
variancePart2 = variancePart2 + (row.freq * (row.x - mean)**2)
print(row['x'], row['freq'], variancePart2)
variance = variancePart1 * variancePart2
print("variance: ", variance)
# variance: 1.703703703703704