Code fuer Hausarbeit a-c,e
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1 +1,3 @@
|
|||||||
.vscode/settings.json
|
.vscode/settings.json
|
||||||
|
Hausarbeit/__pycache__
|
||||||
|
Beispielcode*
|
||||||
|
|||||||
@@ -1,84 +0,0 @@
|
|||||||
```
|
|
||||||
# https://towardsdatascience.com/five-regression-python-modules-that-every-data-scientist-must-know-a4e03a886853
|
|
||||||
# based on: https://machinelearningmastery.com/how-to-use-correlation-to-understand-the-relationship-between-variables/
|
|
||||||
|
|
||||||
# pip3 install openpyxl
|
|
||||||
import os
|
|
||||||
import pandas as pd # To read data
|
|
||||||
import math as m
|
|
||||||
import numpy as np
|
|
||||||
import scipy as sp
|
|
||||||
|
|
||||||
from scipy import stats
|
|
||||||
import matplotlib.pyplot as plt # To visualize
|
|
||||||
|
|
||||||
# location will help to open files in the same directory as the py-script
|
|
||||||
__location__ = os.path.realpath(
|
|
||||||
os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
|
||||||
|
|
||||||
df = pd.read_excel(os.path.join(__location__,'Daten_Umfrage_SPSS_20211113.xlsx'))
|
|
||||||
|
|
||||||
df = df.apply(pd.to_numeric, errors='coerce') # convert non-numeric values to NaN, e.g. Header "row 1" "CodeXYZ" -> "row 1" "NaN"
|
|
||||||
print("Dataframe (Zeilen, Spalten, ...) inkl. NaN:", df.shape)
|
|
||||||
print(df.head(10))
|
|
||||||
# Code SE01_01 SE01_02 SE02_01 SE02_02 SE03_01
|
|
||||||
# 0 NaN NaN NaN NaN NaN NaN
|
|
||||||
# 1 NaN 6.0 4.0 7.0 4.0 5.0
|
|
||||||
# ...
|
|
||||||
#df = df.dropna() # CAUTION: drops every row that even contains single NaN !
|
|
||||||
|
|
||||||
print(df.tail(10))
|
|
||||||
# Code SE01_01 SE01_02 SE02_01 SE02_02 SE03_01
|
|
||||||
# 155 NaN 4.0 4.0 3.0 5.0 1.0
|
|
||||||
# 156 NaN NaN NaN NaN NaN NaN
|
|
||||||
# (End of File)
|
|
||||||
|
|
||||||
#print(df["HO_Score_Bewerbung_Gewichtet"][105:110])
|
|
||||||
#for col in df.columns:
|
|
||||||
#print(col)
|
|
||||||
|
|
||||||
# Calculate Mean, gew, inv
|
|
||||||
mwHO01_Diff = df["HO01_Diff"][1:156] # Limit to Column and row Amount
|
|
||||||
mwHO01_Diff = mwHO01_Diff.mean(skipna=True) # Columns arithm. mean, skipna to ignore NaN rows
|
|
||||||
mwHO01_Diff = round(mwHO01_Diff, 2)
|
|
||||||
normHO01_Diff = m.sqrt((mwHO01_Diff / 6)**2) # Norm
|
|
||||||
invHO01_Diff = 1 - normHO01_Diff # invert
|
|
||||||
# usw
|
|
||||||
print("HO01_Diff Mittelwert:", mwHO01_Diff)
|
|
||||||
print("HO01_Diff Normiert:", normHO01_Diff)
|
|
||||||
print("HO01_Diff Invertiert:", invHO01_Diff)
|
|
||||||
# usw
|
|
||||||
|
|
||||||
# Choose Dataframe Columns and row Amount
|
|
||||||
dfColumnX = df["SS_Score"][1:156]
|
|
||||||
dfColumnY = df["HO_Score_Bewerbung_Gewichtet"][1:156]
|
|
||||||
|
|
||||||
# Convert Dataframe Columns to Array containing the X- and Y- Values
|
|
||||||
arrX = np.asarray(dfColumnX) # convert that dataframe column to numpy array
|
|
||||||
arrY = np.asarray(dfColumnY)
|
|
||||||
|
|
||||||
# Prepare Plot Image
|
|
||||||
plt.xlabel('SS_Score', color='black')
|
|
||||||
plt.ylabel('HO_Score_Bewerbung_Gewichtet', color='black')
|
|
||||||
plt.xlim([0,50]) # set x-Axis View Range,[from,to]
|
|
||||||
plt.scatter(arrX, arrY)
|
|
||||||
|
|
||||||
arrX, arrY = zip(*sorted(zip(arrX,arrY))) # sort 2 arrays in sync
|
|
||||||
|
|
||||||
# Convert again, as sorting seemed to break the numpy array data format
|
|
||||||
arrX = np.asarray(arrX) # before: "1 16.0" after: "[16. 18. 21. ...]"
|
|
||||||
arrY = np.asarray(arrY)
|
|
||||||
|
|
||||||
# Use least Square Linear Regression from SciPy Stats
|
|
||||||
regr_results = sp.stats.linregress(arrX, arrY)
|
|
||||||
|
|
||||||
steigung = regr_results.slope
|
|
||||||
yAchsAbschn = regr_results.intercept
|
|
||||||
arrYpredicted = steigung * arrX + yAchsAbschn # using y = m*x + n, calculate every single Y-Value fitting the regression Lines X-Values
|
|
||||||
|
|
||||||
print("y =", steigung, "* x +", yAchsAbschn)
|
|
||||||
|
|
||||||
# Plot Linear Regression Line
|
|
||||||
plt.plot(arrX, arrYpredicted, label='Lin Regr', color='red', linestyle='solid') # https://scriptverse.academy/tutorials/python-matplotlib-plot-straight-line.html
|
|
||||||
plt.show()
|
|
||||||
```
|
|
||||||
Binary file not shown.
Reference in New Issue
Block a user