import os import pandas as pd import numpy as np # location will help to open files in the same directory as the py-script __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) df = pd.read_csv(os.path.join(__location__, 'vl2-varianz-v1.csv')) # Dataframe print(df) # x freq # 0 1 9 # 1 2 7 # 2 3 5 # 3 4 4 # 4 5 2 print(df.sum()) # x 15 # freq 27 sums = df.sum() print(sums['freq']) # 27 # Calculate Mean, respecting frequencies rowSum = 0 for index, row in df.iterrows(): rowSum = rowSum + row.x * row.freq mean = rowSum / sums.freq print("mean: ", mean) # Calculate Variance, respecting frequencies # Sample Variance: ^σ² = (1 / n - 1) * Σ(freq*(x - mean)²) variancePart1 = (1 / (sums.freq - 1)) variancePart2 = 0 for index, row in df.iterrows(): variancePart2 = variancePart2 + (row.freq * (row.x - mean)**2) print(row['x'], row['freq'], variancePart2) variance = variancePart1 * variancePart2 print("variance: ", variance) # variance: 1.703703703703704