32번째 줄: | 32번째 줄: | ||
<source lang="python3"> | <source lang="python3"> | ||
from sklearn.svm import SVC | from sklearn.svm import SVC | ||
+ | from matplotlib import pyplot as plt | ||
import pickle | import pickle | ||
clf= SVC(kernel='poly', probability=True) | clf= SVC(kernel='poly', probability=True) | ||
x = pickle.load(open('preprocessing2016.pkl', "rb")) | x = pickle.load(open('preprocessing2016.pkl', "rb")) | ||
y = pickle.load(open('preprocessing2015.pkl', "rb")) | y = pickle.load(open('preprocessing2015.pkl', "rb")) | ||
− | + | ||
− | + | x = [x[i] for i in range(len(x)) if i % 1000 == 0] | |
+ | y = [y[i] for i in range(len(y)) if i % 1000 == 0] | ||
data_test = [[row["weight"], row["height"]] for row in y] | data_test = [[row["weight"], row["height"]] for row in y] | ||
− | labels_test = [row["overweight"] for row in y] | + | labels_test = [row["overweight"] > 2 for row in y] |
data_train = [[row["weight"], row["height"]] for row in x] | data_train = [[row["weight"], row["height"]] for row in x] | ||
− | labels_train = [row["overweight"] for row in x] | + | |
+ | labels_train = [row["overweight"] > 2 for row in x] | ||
+ | |||
+ | |||
## 3-1) training | ## 3-1) training | ||
clf.fit(data_train, labels_train) | clf.fit(data_train, labels_train) | ||
48번째 줄: | 53번째 줄: | ||
probability_test= clf.predict_proba(data_test)[:,1] | probability_test= clf.predict_proba(data_test)[:,1] | ||
## 4. Evaluate the model | ## 4. Evaluate the model | ||
− | + | from sklearn.metrics import confusion_matrix, roc_auc_score | |
## 4-1) AUC | ## 4-1) AUC | ||
− | auc= roc_auc_score(labels_test, | + | #auc= roc_auc_score(labels_test, probability_test) |
## 4-2) Confusion matrix | ## 4-2) Confusion matrix | ||
− | tn, fp, fn, tp= confusion_matrix(labels_test, | + | tn, fp, fn, tp= confusion_matrix(labels_test, prediction_test).ravel() |
accuracy = (tp+ tn) / (tn+ fp+ fn+ tp) | accuracy = (tp+ tn) / (tn+ fp+ fn+ tp) | ||
sensitivity = tp/ (tp+ fn) | sensitivity = tp/ (tp+ fn) | ||
59번째 줄: | 64번째 줄: | ||
recall = tp/ (tp+ fn) | recall = tp/ (tp+ fn) | ||
fscore= 2 * precision * recall / (precision + recall) | fscore= 2 * precision * recall / (precision + recall) | ||
+ | |||
+ | fig, axs = plt.subplots(1, 2, figsize=(1000, 1000)) | ||
+ | axs[0].scatter([row[1] for row in data_test], [row[0] for row in data_test],s = None,c = labels_test) | ||
+ | axs[1].scatter([row[1] for row in data_test], [row[0] for row in data_test],s = None,c = prediction_test) | ||
+ | |||
+ | plt.show() | ||
</source> | </source> |
2018년 11월 15일 (목) 17:15 판
import csv
import pickle
with open("./NHIS_OPEN_GJ_2015.csv") as csvfile:
reader = csv.reader(csvfile)
header = next(reader, None)
#header = {header[i]:i for i in range(0, len(header))}
table = list()
for row in reader:
row = {header[i] : row[i] for i in range(len(header))}
if row["신장(5Cm단위)"] == "" or row["체중(5Kg 단위)"] == "" or row["허리둘레"] == "":
continue
bmi = round(10000 * int(row["신장(5Cm단위)"]) / (int(row["체중(5Kg 단위)"]) ** 2), 2)
label = 1
if bmi > 40:label = 4
if bmi > 35:label = 3
if bmi > 30:label = 2
nrow = {
"height":int(row["신장(5Cm단위)"]),
"weight":int(row["체중(5Kg 단위)"]),
"waist":int(row["허리둘레"]),
"overweight":label
}
table.append(nrow)
with open("./preprocessing2015.pkl","wb+") as fw:
pickle.dump(table, fw, protocol = pickle.HIGHEST_PROTOCOL)
SVM
from sklearn.svm import SVC
from matplotlib import pyplot as plt
import pickle
clf= SVC(kernel='poly', probability=True)
x = pickle.load(open('preprocessing2016.pkl', "rb"))
y = pickle.load(open('preprocessing2015.pkl', "rb"))
x = [x[i] for i in range(len(x)) if i % 1000 == 0]
y = [y[i] for i in range(len(y)) if i % 1000 == 0]
data_test = [[row["weight"], row["height"]] for row in y]
labels_test = [row["overweight"] > 2 for row in y]
data_train = [[row["weight"], row["height"]] for row in x]
labels_train = [row["overweight"] > 2 for row in x]
## 3-1) training
clf.fit(data_train, labels_train)
## 3-2) prediction
prediction_test= clf.predict(data_test)
probability_test= clf.predict_proba(data_test)[:,1]
## 4. Evaluate the model
from sklearn.metrics import confusion_matrix, roc_auc_score
## 4-1) AUC
#auc= roc_auc_score(labels_test, probability_test)
## 4-2) Confusion matrix
tn, fp, fn, tp= confusion_matrix(labels_test, prediction_test).ravel()
accuracy = (tp+ tn) / (tn+ fp+ fn+ tp)
sensitivity = tp/ (tp+ fn)
specificity = tn/ (fp+ tn)
precision = tp/ (tp+ fp)
recall = tp/ (tp+ fn)
fscore= 2 * precision * recall / (precision + recall)
fig, axs = plt.subplots(1, 2, figsize=(1000, 1000))
axs[0].scatter([row[1] for row in data_test], [row[0] for row in data_test],s = None,c = labels_test)
axs[1].scatter([row[1] for row in data_test], [row[0] for row in data_test],s = None,c = prediction_test)
plt.show()