Code fuer Hausarbeit a-c,e
This commit is contained in:
BIN
Hausarbeit/Aufgabenstellung Hausarbeit_mobile_device_data.pdf
Normal file
BIN
Hausarbeit/Aufgabenstellung Hausarbeit_mobile_device_data.pdf
Normal file
Binary file not shown.
84
Hausarbeit/Beispielcode von mir.md
Normal file
84
Hausarbeit/Beispielcode von mir.md
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
```
|
||||||
|
# https://towardsdatascience.com/five-regression-python-modules-that-every-data-scientist-must-know-a4e03a886853
|
||||||
|
# based on: https://machinelearningmastery.com/how-to-use-correlation-to-understand-the-relationship-between-variables/
|
||||||
|
|
||||||
|
# pip3 install openpyxl
|
||||||
|
import os
|
||||||
|
import pandas as pd # To read data
|
||||||
|
import math as m
|
||||||
|
import numpy as np
|
||||||
|
import scipy as sp
|
||||||
|
|
||||||
|
from scipy import stats
|
||||||
|
import matplotlib.pyplot as plt # To visualize
|
||||||
|
|
||||||
|
# location will help to open files in the same directory as the py-script
|
||||||
|
__location__ = os.path.realpath(
|
||||||
|
os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
||||||
|
|
||||||
|
df = pd.read_excel(os.path.join(__location__,'Daten_Umfrage_SPSS_20211113.xlsx'))
|
||||||
|
|
||||||
|
df = df.apply(pd.to_numeric, errors='coerce') # convert non-numeric values to NaN, e.g. Header "row 1" "CodeXYZ" -> "row 1" "NaN"
|
||||||
|
print("Dataframe (Zeilen, Spalten, ...) inkl. NaN:", df.shape)
|
||||||
|
print(df.head(10))
|
||||||
|
# Code SE01_01 SE01_02 SE02_01 SE02_02 SE03_01
|
||||||
|
# 0 NaN NaN NaN NaN NaN NaN
|
||||||
|
# 1 NaN 6.0 4.0 7.0 4.0 5.0
|
||||||
|
# ...
|
||||||
|
#df = df.dropna() # CAUTION: drops every row that even contains single NaN !
|
||||||
|
|
||||||
|
print(df.tail(10))
|
||||||
|
# Code SE01_01 SE01_02 SE02_01 SE02_02 SE03_01
|
||||||
|
# 155 NaN 4.0 4.0 3.0 5.0 1.0
|
||||||
|
# 156 NaN NaN NaN NaN NaN NaN
|
||||||
|
# (End of File)
|
||||||
|
|
||||||
|
#print(df["HO_Score_Bewerbung_Gewichtet"][105:110])
|
||||||
|
#for col in df.columns:
|
||||||
|
#print(col)
|
||||||
|
|
||||||
|
# Calculate Mean, gew, inv
|
||||||
|
mwHO01_Diff = df["HO01_Diff"][1:156] # Limit to Column and row Amount
|
||||||
|
mwHO01_Diff = mwHO01_Diff.mean(skipna=True) # Columns arithm. mean, skipna to ignore NaN rows
|
||||||
|
mwHO01_Diff = round(mwHO01_Diff, 2)
|
||||||
|
normHO01_Diff = m.sqrt((mwHO01_Diff / 6)**2) # Norm
|
||||||
|
invHO01_Diff = 1 - normHO01_Diff # invert
|
||||||
|
# usw
|
||||||
|
print("HO01_Diff Mittelwert:", mwHO01_Diff)
|
||||||
|
print("HO01_Diff Normiert:", normHO01_Diff)
|
||||||
|
print("HO01_Diff Invertiert:", invHO01_Diff)
|
||||||
|
# usw
|
||||||
|
|
||||||
|
# Choose Dataframe Columns and row Amount
|
||||||
|
dfColumnX = df["SS_Score"][1:156]
|
||||||
|
dfColumnY = df["HO_Score_Bewerbung_Gewichtet"][1:156]
|
||||||
|
|
||||||
|
# Convert Dataframe Columns to Array containing the X- and Y- Values
|
||||||
|
arrX = np.asarray(dfColumnX) # convert that dataframe column to numpy array
|
||||||
|
arrY = np.asarray(dfColumnY)
|
||||||
|
|
||||||
|
# Prepare Plot Image
|
||||||
|
plt.xlabel('SS_Score', color='black')
|
||||||
|
plt.ylabel('HO_Score_Bewerbung_Gewichtet', color='black')
|
||||||
|
plt.xlim([0,50]) # set x-Axis View Range,[from,to]
|
||||||
|
plt.scatter(arrX, arrY)
|
||||||
|
|
||||||
|
arrX, arrY = zip(*sorted(zip(arrX,arrY))) # sort 2 arrays in sync
|
||||||
|
|
||||||
|
# Convert again, as sorting seemed to break the numpy array data format
|
||||||
|
arrX = np.asarray(arrX) # before: "1 16.0" after: "[16. 18. 21. ...]"
|
||||||
|
arrY = np.asarray(arrY)
|
||||||
|
|
||||||
|
# Use least Square Linear Regression from SciPy Stats
|
||||||
|
regr_results = sp.stats.linregress(arrX, arrY)
|
||||||
|
|
||||||
|
steigung = regr_results.slope
|
||||||
|
yAchsAbschn = regr_results.intercept
|
||||||
|
arrYpredicted = steigung * arrX + yAchsAbschn # using y = m*x + n, calculate every single Y-Value fitting the regression Lines X-Values
|
||||||
|
|
||||||
|
print("y =", steigung, "* x +", yAchsAbschn)
|
||||||
|
|
||||||
|
# Plot Linear Regression Line
|
||||||
|
plt.plot(arrX, arrYpredicted, label='Lin Regr', color='red', linestyle='solid') # https://scriptverse.academy/tutorials/python-matplotlib-plot-straight-line.html
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
9
Hausarbeit/UnivariateAnalyse.csv
Normal file
9
Hausarbeit/UnivariateAnalyse.csv
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
,battery_power,int_memory,ram
|
||||||
|
count,200.0,200.0,200.0
|
||||||
|
mean,1264.56,33.485,2153.125
|
||||||
|
std,441.5502229610862,17.79559470640905,1140.4263724281582
|
||||||
|
min,504.0,2.0,263.0
|
||||||
|
20%,857.6,16.0,870.8000000000001
|
||||||
|
50%,1249.5,33.0,2172.5
|
||||||
|
80%,1721.4000000000003,51.0,3317.600000000001
|
||||||
|
max,1999.0,64.0,3976.0
|
||||||
|
BIN
Hausarbeit/__pycache__/mobile_device_data.cpython-37.pyc
Normal file
BIN
Hausarbeit/__pycache__/mobile_device_data.cpython-37.pyc
Normal file
Binary file not shown.
211
Hausarbeit/mobile_device_data.csv
Normal file
211
Hausarbeit/mobile_device_data.csv
Normal file
@@ -0,0 +1,211 @@
|
|||||||
|
id,battery_power,bluetooth,dual_sim,4G,int_memory,ram
|
||||||
|
1,1043,1,1,0,5,3476
|
||||||
|
2,841,1,1,1,61,3895
|
||||||
|
3,1807,1,0,0,27,2396
|
||||||
|
4,1546,0,1,1,25,3893
|
||||||
|
5,1434,0,0,1,49,1773
|
||||||
|
6,1464,1,1,1,50,3506
|
||||||
|
7,1718,0,0,0,47,3873
|
||||||
|
8,833,0,1,0,62,1495
|
||||||
|
9,1111,1,1,1,25,3485
|
||||||
|
10,1520,0,0,0,25,651
|
||||||
|
11,1500,0,0,0,55,3866
|
||||||
|
12,1343,0,0,1,34,3911
|
||||||
|
13,900,1,1,0,30,439
|
||||||
|
14,1190,1,1,0,19,992
|
||||||
|
15,630,0,0,1,51,2751
|
||||||
|
16,1846,1,0,1,53,563
|
||||||
|
17,1985,0,1,1,26,2083
|
||||||
|
18,1042,0,0,1,48,2187
|
||||||
|
19,1231,1,1,1,37,3902
|
||||||
|
20,1488,0,0,0,37,2524
|
||||||
|
21,968,0,0,1,7,1357
|
||||||
|
22,529,0,1,0,60,3456
|
||||||
|
23,1558,0,1,0,50,1641
|
||||||
|
24,533,1,1,0,58,2322
|
||||||
|
25,1037,0,1,0,5,3862
|
||||||
|
26,1025,0,1,1,43,557
|
||||||
|
27,1858,0,1,0,17,2427
|
||||||
|
28,980,0,0,0,8,625
|
||||||
|
29,644,1,1,0,15,3836
|
||||||
|
30,1024,1,1,1,38,854
|
||||||
|
31,1981,0,1,1,11,2383
|
||||||
|
32,1380,0,1,1,56,926
|
||||||
|
33,1557,1,1,0,32,3048
|
||||||
|
34,1201,1,1,0,49,824
|
||||||
|
35,1074,1,0,0,45,874
|
||||||
|
36,1175,1,0,0,33,1617
|
||||||
|
37,1280,0,1,1,8,3309
|
||||||
|
38,1715,0,1,1,28,1786
|
||||||
|
39,1165,0,0,0,40,3128
|
||||||
|
40,567,0,0,0,2,2243
|
||||||
|
41,1952,1,0,0,47,994
|
||||||
|
42,822,1,1,0,43,2749
|
||||||
|
43,685,1,1,0,12,793
|
||||||
|
44,1388,1,1,1,17,324
|
||||||
|
45,1972,0,0,0,48,366
|
||||||
|
46,1411,1,0,1,57,1228
|
||||||
|
47,1094,0,1,0,20,1413
|
||||||
|
48,1653,0,1,1,13,2617
|
||||||
|
49,916,0,1,0,33,1798
|
||||||
|
50,1712,0,0,0,29,2693
|
||||||
|
51,882,1,1,0,32,2437
|
||||||
|
52,632,0,0,1,33,837
|
||||||
|
53,1442,1,1,0,56,2815
|
||||||
|
54,900,0,0,0,47,1440
|
||||||
|
55,1630,1,1,0,20,3470
|
||||||
|
56,1596,1,1,0,24,1251
|
||||||
|
57,1272,1,0,1,54,3181
|
||||||
|
58,1640,0,1,0,7,1361
|
||||||
|
59,1111,0,1,0,15,1630
|
||||||
|
60,1889,1,0,0,25,3689
|
||||||
|
61,1907,1,1,0,22,3586
|
||||||
|
62,529,0,1,0,56,3942
|
||||||
|
63,578,0,0,1,38,1431
|
||||||
|
64,1634,1,1,1,39,2167
|
||||||
|
65,1533,1,1,1,22,1248
|
||||||
|
66,660,0,0,1,47,1986
|
||||||
|
67,1847,1,0,1,28,1378
|
||||||
|
68,1206,1,1,0,10,2959
|
||||||
|
69,549,1,0,1,53,1893
|
||||||
|
70,1705,1,0,1,23,1676
|
||||||
|
71,1366,0,1,0,36,1912
|
||||||
|
72,1991,0,0,0,58,1947
|
||||||
|
73,1102,1,0,1,40,2734
|
||||||
|
74,1452,0,0,1,32,3617
|
||||||
|
75,1810,1,1,0,63,3414
|
||||||
|
76,1166,1,1,1,41,482
|
||||||
|
77,881,1,1,0,6,2813
|
||||||
|
78,1134,1,1,1,64,661
|
||||||
|
79,1031,1,1,1,20,2546
|
||||||
|
80,1376,1,1,1,34,3922
|
||||||
|
81,1391,0,1,1,56,1199
|
||||||
|
82,979,0,0,0,24,3779
|
||||||
|
83,1075,1,0,0,36,3870
|
||||||
|
84,968,0,1,1,61,858
|
||||||
|
85,1999,0,1,1,15,3840
|
||||||
|
86,1626,0,0,0,20,454
|
||||||
|
87,942,0,1,0,28,3953
|
||||||
|
88,1182,0,0,1,42,1633
|
||||||
|
89,1982,1,1,0,48,3035
|
||||||
|
90,1373,0,1,0,27,966
|
||||||
|
91,1151,0,1,0,44,1761
|
||||||
|
92,1650,1,1,0,51,1938
|
||||||
|
93,1663,1,1,0,17,2820
|
||||||
|
94,1965,1,0,1,3,305
|
||||||
|
95,679,0,1,0,41,2838
|
||||||
|
96,1465,0,0,0,3,2042
|
||||||
|
97,1809,1,1,0,27,700
|
||||||
|
98,757,1,1,1,26,2593
|
||||||
|
99,1034,1,0,1,47,1835
|
||||||
|
100,1119,1,0,0,23,3121
|
||||||
|
101,559,1,1,1,24,2023
|
||||||
|
102,1204,1,0,0,19,737
|
||||||
|
103,1008,0,1,1,15,450
|
||||||
|
104,1397,1,1,0,19,2928
|
||||||
|
105,697,0,1,1,34,1694
|
||||||
|
106,1939,1,0,0,58,2593
|
||||||
|
107,1039,0,1,0,15,745
|
||||||
|
108,1605,1,0,1,51,1310
|
||||||
|
109,1094,0,1,0,34,2743
|
||||||
|
110,769,1,1,1,30,3976
|
||||||
|
111,861,1,1,0,40,3931
|
||||||
|
112,504,0,1,0,63,3455
|
||||||
|
113,1930,1,1,1,64,1533
|
||||||
|
114,1795,1,1,0,52,3876
|
||||||
|
115,1363,0,1,1,2,3239
|
||||||
|
116,1376,1,0,1,38,3628
|
||||||
|
117,1981,1,0,0,19,3663
|
||||||
|
118,1901,0,0,0,62,1786
|
||||||
|
119,1319,0,1,0,5,3502
|
||||||
|
120,859,1,0,0,58,815
|
||||||
|
121,1664,1,0,0,25,275
|
||||||
|
122,955,0,1,0,62,3758
|
||||||
|
123,517,0,1,0,38,2747
|
||||||
|
124,1806,0,0,1,64,1809
|
||||||
|
125,1348,0,0,1,50,2086
|
||||||
|
126,1455,0,0,0,16,444
|
||||||
|
127,1611,0,1,NaN,25,3467
|
||||||
|
128,1573,1,1,1,24,2776
|
||||||
|
129,557,0,0,1,23,3170
|
||||||
|
130,1599,1,0,0,31,1234
|
||||||
|
131,1051,1,1,0,16,305
|
||||||
|
132,1857,1,1,0,14,1745
|
||||||
|
133,1986,0,0,0,24,1707
|
||||||
|
134,591,1,0,0,21,2220
|
||||||
|
135,1140,0,1,1,56,3130
|
||||||
|
136,923,1,0,1,10,1500
|
||||||
|
137,1582,NaN,0,0,33,2145
|
||||||
|
138,723,0,0,0,63,488
|
||||||
|
139,1251,1,1,1,54,3863
|
||||||
|
140,574,1,1,1,60,2913
|
||||||
|
141,948,1,1,1,60,2094
|
||||||
|
142,1571,0,1,0,44,3141
|
||||||
|
143,564,1,0,0,33,2573
|
||||||
|
144,1466,0,1,1,57,
|
||||||
|
145,597,1,1,0,16,3788
|
||||||
|
146,895,1,1,1,9,3445
|
||||||
|
147,1535,0,1,1,37,3241
|
||||||
|
148,1832,1,0,0,44,2976
|
||||||
|
149,1045,1,0,1,58,2241
|
||||||
|
150,1483,1,0,1,61,3843
|
||||||
|
151,976,0,0,1, ,3261
|
||||||
|
152,1840,1,1,0,46,3264
|
||||||
|
153,1807,0,1,0,8,826
|
||||||
|
154,624,1,1,0,30,1314
|
||||||
|
155,1963,0,0,1,38,2699
|
||||||
|
156,1307,1,0,1,4,2565
|
||||||
|
157,1933,1,1, ,23,3973
|
||||||
|
158,1496,1,1,0,42,3537
|
||||||
|
159,1532,1,0,0,7,3449
|
||||||
|
160,1004,0,1,1,8,3895
|
||||||
|
161,1483,1,1,1,38,2777
|
||||||
|
162,945,0,0,0,47,2904
|
||||||
|
163,1081,NaN,0,1,17,3774
|
||||||
|
164,1012,0,1,1,32,3034
|
||||||
|
165,1762,0,0,0,50,2940
|
||||||
|
166,796,1,1,1,36,
|
||||||
|
167,1547,0,0,1,50,1168
|
||||||
|
168,988,0,0,1,12,376
|
||||||
|
169,1180,1,0,1,43,3510
|
||||||
|
170,852,1,0,1,3,542
|
||||||
|
171,607,1,0,1,18,550
|
||||||
|
172,1765,0,0,0,24,791
|
||||||
|
173,1250,0,0,0,63,1895
|
||||||
|
174,1577,0,1,0,55,609
|
||||||
|
175,1153,1,1,0,14,263
|
||||||
|
176,651,1,0,0, ,2141
|
||||||
|
177,1186,1,1,0,25,1270
|
||||||
|
178,1429,1,0,0,33,403
|
||||||
|
179,556,0,0,1,10,2040
|
||||||
|
180,1735,1,0,1,21,2597
|
||||||
|
181,1859,0,0,1,42,714
|
||||||
|
182,915,0,0,0,10,423
|
||||||
|
183,890,NaN,0,0,9,1238
|
||||||
|
184,758,0,0,0,48,926
|
||||||
|
185,541,0,0,1,28,2704
|
||||||
|
186,586,0,1,1,33,2817
|
||||||
|
187,762,1,1,1,39,2271
|
||||||
|
188,683,0,1,1,9,1513
|
||||||
|
189,1526,0,1,0,39,339
|
||||||
|
190,1771,1,0,0,39,794
|
||||||
|
191,639,0,0,0,28,391
|
||||||
|
192,1783,1,0,1,23,541
|
||||||
|
193,1933,0,0,1,55,648
|
||||||
|
194,1384,,1,1,62,3386
|
||||||
|
195,1770,0,0,0,26,1651
|
||||||
|
196,1202,0,0,0,49,1177
|
||||||
|
197,885,0,1,0,3,2887
|
||||||
|
198,1629,1,0,1,2,2178
|
||||||
|
199,1072,1,1,1,4,2878
|
||||||
|
200,1863,1,1,0,64,3201
|
||||||
|
201,1739,0,1,0,51,1490
|
||||||
|
202,895,1,0,0,23,2724
|
||||||
|
203,1278,1,1,0,56,3032
|
||||||
|
204,562,1,1,1,43,3352
|
||||||
|
205,1249,1,1,0,38,3195
|
||||||
|
206,1811,0,0,1,25,1677
|
||||||
|
207,560,1,1,1,12,2620
|
||||||
|
208,1773,1,0,1,61,1061
|
||||||
|
209,1715,1,0,0,11,1018
|
||||||
|
210,725,0,1,1,26,1370
|
||||||
|
107
Hausarbeit/mobile_device_data.py
Normal file
107
Hausarbeit/mobile_device_data.py
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
import numpy as np # pip3 install numpy
|
||||||
|
import pandas as pd # pip3 install pandas
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import scipy as sp
|
||||||
|
from scipy import stats
|
||||||
|
|
||||||
|
### a) Einlesen der Quelldaten
|
||||||
|
# Pandas-Methode read_csv() zum Einlesen nutzen, wobei die Spalte namens "id" ausgelassen wird
|
||||||
|
gewuenschteSpalten = ["battery_power", "bluetooth", "dual_sim", "4G", "int_memory", "ram"]
|
||||||
|
df = pd.read_csv('mobile_device_data.csv', usecols=gewuenschteSpalten)
|
||||||
|
|
||||||
|
# Print inkl. Dataframe-Methode head() aufrufen
|
||||||
|
# Parameter n: Ausgabe der ersten 12 Zeilen (Zeile 0 bis 11 = 12 Zeilen)
|
||||||
|
print(df.head(n=12))
|
||||||
|
# Ausgabe:
|
||||||
|
# battery_power bluetooth dual_sim 4G int_memory ram
|
||||||
|
# 0 1043 1.0 1 0 5 3476
|
||||||
|
# ... ... ... ... ... ... ...
|
||||||
|
# 11 1343 0.0 0 1 34 3911
|
||||||
|
|
||||||
|
### b) Bereinigen
|
||||||
|
# Dataframe enthält verschiedene Arten nicht auswertbarer Zellen:
|
||||||
|
# 1. Zellen in denen vorher schon NaN (NotANumber) steht
|
||||||
|
# 2. Zellen, in denen ein leerer String steht (' ')
|
||||||
|
# -> umwandeln von (' ') in "NaN", per DataFrame-Methode replace()
|
||||||
|
df.replace(to_replace=' ', value=np.nan, inplace=True)
|
||||||
|
|
||||||
|
# 1. und 2. lassen sich jetzt auf die gleiche Weise wie folgt ausgeben,
|
||||||
|
# und es werden 4 Zeilen gefunden, in denen das Bluetooth Feld "NaN" ist,
|
||||||
|
# und insgesamt 6 Zeilen, in denen das 4G, int_memory und ram Feld "NaN" sind:
|
||||||
|
# print(df.isna().sum())
|
||||||
|
# Ausgabe:
|
||||||
|
# battery_power 0
|
||||||
|
# bluetooth 4
|
||||||
|
# dual_sim 0
|
||||||
|
# 4G 2
|
||||||
|
# int_memory 2
|
||||||
|
# ram 2
|
||||||
|
|
||||||
|
# Dataframe-Methode "dropna" aufrufen, um solche Einträge zu löschen:
|
||||||
|
# Parameter axis=0 : Zeile wird gelöscht
|
||||||
|
# Parameter how='any' : Zum Löschen genügt eine einzelne leere Zelle
|
||||||
|
# Parameter inplace=True : Die Operation wird direkt auf das DataFrame angewendet
|
||||||
|
df.dropna(axis=0, how='any', inplace=True)
|
||||||
|
|
||||||
|
# Nach dieser Änderung sind nur noch 200 Zeilen im DataFrame
|
||||||
|
print(len(df.index))
|
||||||
|
# Ausgabe:
|
||||||
|
# 200
|
||||||
|
|
||||||
|
### c) Univariate Analyse
|
||||||
|
# Die Datentypen der gewünschten Merkmale werden nicht von vornherein alle als Ganzzahlen (Integer)
|
||||||
|
# interpretiert (vgl. print(df.info()) ), daher erst in solche umwandeln.
|
||||||
|
# Wenn man das nicht macht, kann .describe() nicht ordentlich mit gemischten Spaltentypen umgehen.
|
||||||
|
# Die Beispielausgabe wäre sonst: "mean 1264.560000 NaN NaN"
|
||||||
|
dfMetrischeMerkmale = df[["battery_power", "int_memory", "ram"]].astype(int)
|
||||||
|
dfUnivariateAnalyse = dfMetrischeMerkmale[["battery_power", "int_memory", "ram"]].describe(include='all', percentiles=[0.2, 0.5, 0.8])
|
||||||
|
dfUnivariateAnalyse.to_csv('UnivariateAnalyse.csv')
|
||||||
|
# Ausgabe:
|
||||||
|
# battery_power int_memory ram
|
||||||
|
# count 200.000000 200.000000 200.000000
|
||||||
|
# mean 1264.560000 33.485000 2153.125000
|
||||||
|
# std 441.550223 17.795595 1140.426372
|
||||||
|
# min 504.000000 2.000000 263.000000
|
||||||
|
# 20% 857.600000 16.000000 870.800000
|
||||||
|
# 50% 1249.500000 33.000000 2172.500000
|
||||||
|
# 80% 1721.400000 51.000000 3317.600000
|
||||||
|
# max 1999.000000 64.000000 3976.000000
|
||||||
|
|
||||||
|
|
||||||
|
### d) Balkendiagramme
|
||||||
|
dfNominaleMerkmale = df[["bluetooth", "dual_sim", "4G"]].astype(int)
|
||||||
|
#dfNominaleSummen = dfNominaleMerkmale["bluetooth", "dual_sim", "4G"].sum()
|
||||||
|
#print(dfNominaleMerkmale)
|
||||||
|
#dfNominaleMerkmale.plot(kind="hist")
|
||||||
|
#plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### e) Korrellationen nach Pearson und Lineare Regression zweier Merkmale
|
||||||
|
print(dfMetrischeMerkmale.corr(method="pearson"))
|
||||||
|
# Ausgabe:
|
||||||
|
# battery_power int_memory ram
|
||||||
|
# battery_power 1.000000 0.050449 -0.069141
|
||||||
|
# int_memory 0.050449 1.000000 0.047475
|
||||||
|
# ram -0.069141 0.047475 1.000000
|
||||||
|
# -> ram und battery_power korrelieren am Stärksten, wenn auch negativ:
|
||||||
|
|
||||||
|
# Per Modul SciPy Stats: Methode der kleinsten Quadrate für die Lineare Regression nutzen
|
||||||
|
werteListeX = dfMetrischeMerkmale["ram"]
|
||||||
|
werteListeY = dfMetrischeMerkmale["battery_power"]
|
||||||
|
regrErgebnisse = sp.stats.linregress(werteListeX, werteListeY)
|
||||||
|
steigung = round(regrErgebnisse.slope, 4)
|
||||||
|
yAchsAbschn = round(regrErgebnisse.intercept, 4)
|
||||||
|
arrYpredicted = steigung * werteListeX + yAchsAbschn # using y = m*x + n, calculate every single Y-Value fitting the regression Lines X-Values
|
||||||
|
print("Regressionsgleichung:", "y =", steigung, "* x +", yAchsAbschn)
|
||||||
|
|
||||||
|
# Plot Linear Regression Line
|
||||||
|
plt.plot(werteListeX, arrYpredicted, label='Lin Regression', color='red', linestyle='solid') # https://scriptverse.academy/tutorials/python-matplotlib-plot-straight-line.html
|
||||||
|
# Show Plot Image
|
||||||
|
plt.xlabel('ram', color='black')
|
||||||
|
plt.ylabel('battery_power', color='black')
|
||||||
|
#plt.xlim([0,50]) # set x-Axis View Range,[from,to]
|
||||||
|
plt.scatter(werteListeX, werteListeY)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
@@ -91,18 +91,21 @@ df["HO_Score_erklaert"] = yAchsAbschn + df["SS_Score"] * steigung
|
|||||||
|
|
||||||
# Vorlesung 6
|
# Vorlesung 6
|
||||||
df["Diff_roh_erklaert"] = df["HO_Score_Bewerbung_Roh"] - df["HO_Score_erklaert"]
|
df["Diff_roh_erklaert"] = df["HO_Score_Bewerbung_Roh"] - df["HO_Score_erklaert"]
|
||||||
|
print("Diff_roh_erklaert_Mean", df["Diff_roh_erklaert"].mean())
|
||||||
df.loc[df.Diff_roh_erklaert < 0, "Diff_roh_erklaert"] = 0 # Differenz soll minimal 0 sein dürfen
|
df.loc[df.Diff_roh_erklaert < 0, "Diff_roh_erklaert"] = 0 # Differenz soll minimal 0 sein dürfen
|
||||||
|
|
||||||
df["H0_Final"] = df["HO_Score_Bewerbung_Gewichtet"] + df["Diff_roh_erklaert"]
|
df["H0_Final"] = df["HO_Score_Bewerbung_Gewichtet"] + df["Diff_roh_erklaert"]
|
||||||
|
|
||||||
df["H0_Wert_Z_Wert"] = (df["H0_Final"] - df["H0_Final"].mean()) / df["H0_Final"].std()
|
df["H0_Wert_Z_Wert"] = (df["H0_Final"] - df["H0_Final"].mean()) / df["H0_Final"].std()
|
||||||
|
print("HO_Final mean", df["H0_Final"].mean(), "HO_Final std:", df["H0_Final"].std())
|
||||||
|
print("SS_Mean", df["SS_Score"].mean())
|
||||||
|
|
||||||
df["H0_Wert_7er_Skala"] = (df["H0_Wert_Z_Wert"] * 1.5) + 4
|
df["H0_Wert_7er_Skala"] = (df["H0_Wert_Z_Wert"] * 1.5) + 4
|
||||||
df["H0_Wert_7er_Skala"] = round(df["H0_Wert_7er_Skala"], 2)
|
df["H0_Wert_7er_Skala"] = round(df["H0_Wert_7er_Skala"], 2)
|
||||||
df.loc[df.H0_Wert_7er_Skala < 1, "H0_Wert_7er_Skala"] = 1 # ausreisser festlegen auf Min 1
|
df.loc[df.H0_Wert_7er_Skala < 1, "H0_Wert_7er_Skala"] = 1 # ausreisser festlegen auf Min 1
|
||||||
df.loc[df.H0_Wert_7er_Skala > 7, "H0_Wert_7er_Skala"] = 7 # ausreisser festelegen auf Max 7
|
df.loc[df.H0_Wert_7er_Skala > 7, "H0_Wert_7er_Skala"] = 7 # ausreisser festelegen auf Max 7
|
||||||
|
|
||||||
#df.to_csv(__location__ + '/tmpViewFile.csv', sep=";")
|
df.to_csv(__location__ + '/tmpViewFile.csv', sep=";")
|
||||||
|
|
||||||
# Open Dataframe in Webbrowser:
|
# Open Dataframe in Webbrowser:
|
||||||
def showDf(df):
|
def showDf(df):
|
||||||
|
|||||||
Reference in New Issue
Block a user