diff --git a/.gitignore b/.gitignore index c6f9a44..b97b406 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ .vscode/settings.json +Hausarbeit/__pycache__ +Beispielcode* diff --git a/Hausarbeit/Beispielcode von mir.md b/Hausarbeit/Beispielcode von mir.md deleted file mode 100644 index 206cdf8..0000000 --- a/Hausarbeit/Beispielcode von mir.md +++ /dev/null @@ -1,84 +0,0 @@ -``` -# https://towardsdatascience.com/five-regression-python-modules-that-every-data-scientist-must-know-a4e03a886853 -# based on: https://machinelearningmastery.com/how-to-use-correlation-to-understand-the-relationship-between-variables/ - -# pip3 install openpyxl -import os -import pandas as pd # To read data -import math as m -import numpy as np -import scipy as sp - -from scipy import stats -import matplotlib.pyplot as plt # To visualize - -# location will help to open files in the same directory as the py-script -__location__ = os.path.realpath( - os.path.join(os.getcwd(), os.path.dirname(__file__))) - -df = pd.read_excel(os.path.join(__location__,'Daten_Umfrage_SPSS_20211113.xlsx')) - -df = df.apply(pd.to_numeric, errors='coerce') # convert non-numeric values to NaN, e.g. Header "row 1" "CodeXYZ" -> "row 1" "NaN" -print("Dataframe (Zeilen, Spalten, ...) inkl. NaN:", df.shape) -print(df.head(10)) -# Code SE01_01 SE01_02 SE02_01 SE02_02 SE03_01 -# 0 NaN NaN NaN NaN NaN NaN -# 1 NaN 6.0 4.0 7.0 4.0 5.0 -# ... -#df = df.dropna() # CAUTION: drops every row that even contains single NaN ! - -print(df.tail(10)) -# Code SE01_01 SE01_02 SE02_01 SE02_02 SE03_01 -# 155 NaN 4.0 4.0 3.0 5.0 1.0 -# 156 NaN NaN NaN NaN NaN NaN -# (End of File) - -#print(df["HO_Score_Bewerbung_Gewichtet"][105:110]) -#for col in df.columns: - #print(col) - -# Calculate Mean, gew, inv -mwHO01_Diff = df["HO01_Diff"][1:156] # Limit to Column and row Amount -mwHO01_Diff = mwHO01_Diff.mean(skipna=True) # Columns arithm. mean, skipna to ignore NaN rows -mwHO01_Diff = round(mwHO01_Diff, 2) -normHO01_Diff = m.sqrt((mwHO01_Diff / 6)**2) # Norm -invHO01_Diff = 1 - normHO01_Diff # invert -# usw -print("HO01_Diff Mittelwert:", mwHO01_Diff) -print("HO01_Diff Normiert:", normHO01_Diff) -print("HO01_Diff Invertiert:", invHO01_Diff) -# usw - -# Choose Dataframe Columns and row Amount -dfColumnX = df["SS_Score"][1:156] -dfColumnY = df["HO_Score_Bewerbung_Gewichtet"][1:156] - -# Convert Dataframe Columns to Array containing the X- and Y- Values -arrX = np.asarray(dfColumnX) # convert that dataframe column to numpy array -arrY = np.asarray(dfColumnY) - -# Prepare Plot Image -plt.xlabel('SS_Score', color='black') -plt.ylabel('HO_Score_Bewerbung_Gewichtet', color='black') -plt.xlim([0,50]) # set x-Axis View Range,[from,to] -plt.scatter(arrX, arrY) - -arrX, arrY = zip(*sorted(zip(arrX,arrY))) # sort 2 arrays in sync - -# Convert again, as sorting seemed to break the numpy array data format -arrX = np.asarray(arrX) # before: "1 16.0" after: "[16. 18. 21. ...]" -arrY = np.asarray(arrY) - -# Use least Square Linear Regression from SciPy Stats -regr_results = sp.stats.linregress(arrX, arrY) - -steigung = regr_results.slope -yAchsAbschn = regr_results.intercept -arrYpredicted = steigung * arrX + yAchsAbschn # using y = m*x + n, calculate every single Y-Value fitting the regression Lines X-Values - -print("y =", steigung, "* x +", yAchsAbschn) - -# Plot Linear Regression Line -plt.plot(arrX, arrYpredicted, label='Lin Regr', color='red', linestyle='solid') # https://scriptverse.academy/tutorials/python-matplotlib-plot-straight-line.html -plt.show() -``` \ No newline at end of file diff --git a/Hausarbeit/__pycache__/mobile_device_data.cpython-37.pyc b/Hausarbeit/__pycache__/mobile_device_data.cpython-37.pyc deleted file mode 100644 index 1b6bad8..0000000 Binary files a/Hausarbeit/__pycache__/mobile_device_data.cpython-37.pyc and /dev/null differ