Lineare Regression

This commit is contained in:
dev weycloud
2021-11-14 17:24:14 +01:00
parent 402383f289
commit ea08ba9b18
7 changed files with 174 additions and 4 deletions

View File

@@ -1,7 +1,12 @@
import os
import pandas as pd
import numpy as np
df = pd.read_csv('/home/pi/Documents/Code/Python/ProgrammierungUndDatenanalyse/Sonstiges/STAT2/vl2-varianz-v1.csv')
# location will help to open files in the same directory as the py-script
__location__ = os.path.realpath(
os.path.join(os.getcwd(), os.path.dirname(__file__)))
df = pd.read_csv(os.path.join(__location__, 'vl2-varianz-v1.csv'))
# Dataframe
print(df)

View File

@@ -1,7 +1,12 @@
import os
import pandas as pd
import numpy as np
df = pd.read_csv('/home/pi/Documents/Code/Python/ProgrammierungUndDatenanalyse/Sonstiges/STAT2/vl3-standardfehler.csv')
# location will help to open files in the same directory as the py-script
__location__ = os.path.realpath(
os.path.join(os.getcwd(), os.path.dirname(__file__)))
df = pd.read_csv(os.path.join(__location__, 'vl3-standardfehler.csv'))
# Dataframe
print(df)
@@ -27,14 +32,28 @@ for index, row in df.iterrows():
summeQuadrierteAbweichungen = summeQuadrierteAbweichungen + (row.freq * (row.x - mean)**2)
print(row['x'], row['freq'], 'summe²abweichungen: ', summeQuadrierteAbweichungen)
variance = variancePart1 * summeQuadrierteAbweichungen
print("variance: ", variance)
print("pop variance: ", variance)
# √(^σ²)
standardDev = variance**(1/2) # √n = n^1/2
print("Standardabweichung: ", standardDev)
print("pop Standardabweichung: ", standardDev)
# √(ŝd / freq)
standardfehler = standardDev / sums.freq**(1/2) # √n = n^1/2
print("Standardfehler des Mittelwerts: ", standardfehler)
# "Bonus":
# Mittelwertsverteilung bei 2 Würfeln
print()
import random

View File

@@ -0,0 +1,6 @@
x,freq
1,159
2,500
3,674
4,110
5,21
1 x freq
2 1 159
3 2 500
4 3 674
5 4 110
6 5 21

View File

@@ -0,0 +1,57 @@
import pandas as pd
import numpy as np
df = pd.read_csv('/home/pi/Documents/Code/Python/ProgrammierungUndDatenanalyse/Sonstiges/STAT2/vl4-zufriedenheit.csv')
# Dataframe
print(df)
print(df.sum())
sums = df.sum()
print('Summierte Häufigkeit: ', sums['freq'])
# Calculate Mean, respecting frequencies
# Σ(freq*(x - mean)) / freq
rowSum = 0
for index, row in df.iterrows():
rowSum = rowSum + row.x * row.freq
mean = rowSum / sums.freq
print("mean: ", mean)
# Geschätzte Populationsvarianz, unter Beachtung der Häufigkeiten
# Sample Variance: ^σ² = (1 / freq - 1) * Σ(freq*(x - mean)²)
variancePart1 = (1 / (sums.freq - 1))
summeQuadrierteAbweichungen = 0
for index, row in df.iterrows():
summeQuadrierteAbweichungen = summeQuadrierteAbweichungen + (row.freq * (row.x - mean)**2)
print(row['x'], row['freq'], 'summe²abweichungen: ', summeQuadrierteAbweichungen)
variance = variancePart1 * summeQuadrierteAbweichungen
print("pop variance: ", variance)
# √(^σ²)
standardDev = variance**(1/2) # √(^σ²) = ^σ²^1/2
print("pop Standardabweichung: ", standardDev)
# √(ŝd / freq)
standardfehler = (variance / sums.freq)**(1/2) # √(ŝd / freq)
print("Standardfehler des Mittelwerts: ", standardfehler)
# konf95,5 = mean -+ 2 * standardfehler
konf955unten = mean - 2 * standardfehler
konf955oben = mean + 2 * standardfehler
print("95,5% Konfidenzintervall ", konf955unten, konf955oben)
# konf95 = mean -+ 1,96 * standardfehler
konf95unten = mean - 1.96 * standardfehler
konf95oben = mean + 1.96 * standardfehler
print("95% Konfidenzintervall ", konf95unten, konf95oben)
# z-Wert = (xi - mean) / standardDev
# z-Wert von 1,00 Ausgezeichnet
zwert1 = (1 - mean) / standardDev
print("zwert1", zwert1)
# z-Wert von 5,00 Schlecht
zwert5 = (5 - mean) / standardDev
print("zwert5", zwert5)