"빅데이터 입문 기말 리포트"의 두 판 사이의 차이

58번째 줄: 58번째 줄:
 
## 4-2) Confusion matrix
 
## 4-2) Confusion matrix
 
tn, fp, fn, tp= confusion_matrix(labels_test, prediction_test).ravel()
 
tn, fp, fn, tp= confusion_matrix(labels_test, prediction_test).ravel()
 +
accuracy = (tp+ tn) / (tn+ fp+ fn+ tp)
 +
sensitivity = tp/ (tp+ fn)
 +
specificity = tn/ (fp+ tn)
 +
precision = tp/ (tp+ fp)
 +
recall = tp/ (tp+ fn)
 +
fscore= 2 * precision * recall / (precision + recall)
 +
 +
fig, axs = plt.subplots(1, 2, figsize=(1000, 1000))
 +
axs[0].scatter([row[1] for row in data_test], [row[0] for row in data_test],s = None,c = labels_test)
 +
axs[1].scatter([row[1] for row in data_test], [row[0] for row in data_test],s = None,c = prediction_test)
 +
 +
plt.show()
 +
</source>
 +
==Deeplearning==
 +
<source lang="python3">
 +
import pickle
 +
from matplotlib import pyplot as plt
 +
from sklearn.neural_network import MLPClassifier
 +
clf= MLPClassifier(hidden_layer_sizes=(1024,256,32),
 +
    activation='logistic', # sigmoid
 +
    solver='adam', # adamgradient optimizer
 +
    learning_rate_init=0.01)
 +
x = pickle.load(open('preprocessing2016.pkl', "rb"))
 +
y = pickle.load(open('preprocessing2015.pkl', "rb"))
 +
 +
x = [x[i] for i in range(len(x)) if i % 100 == 0]
 +
y = [y[i] for i in range(len(y)) if i % 100 == 0]
 +
data_test = [[row["weight"], row["height"]] for row in y]
 +
labels_test = [row["overweight"] for row in y]
 +
data_train = [[row["weight"], row["height"]] for row in x]
 +
 +
labels_train = [row["overweight"] for row in x]
 +
 +
 +
## 3-1) training
 +
clf.fit(data_train, labels_train)
 +
## 3-2) prediction
 +
prediction_test= clf.predict(data_test)
 +
probability_test= clf.predict_proba(data_test)[:,1]
 +
## 4. Evaluate the model
 +
from sklearn.metrics import confusion_matrix, roc_auc_score
 +
## 4-1) AUC
 +
#auc= roc_auc_score(labels_test, probability_test)
 +
## 4-2) Confusion matrix
 +
#tn, fp, fn, tp= confusion_matrix(labels_test, prediction_test).ravel()
 
accuracy = (tp+ tn) / (tn+ fp+ fn+ tp)
 
accuracy = (tp+ tn) / (tn+ fp+ fn+ tp)
 
sensitivity = tp/ (tp+ fn)
 
sensitivity = tp/ (tp+ fn)

2018년 11월 15일 (목) 17:50 판

import csv
import pickle
with open("./NHIS_OPEN_GJ_2015.csv") as csvfile:
    reader = csv.reader(csvfile)
    header = next(reader, None)
    #header =  {header[i]:i  for i in range(0, len(header))}
    table = list()
    for row in reader:
        row = {header[i] : row[i] for i in range(len(header))}
        if row["신장(5Cm단위)"] == "" or row["체중(5Kg 단위)"] == "" or row["허리둘레"] == "": 
            continue

        bmi =  round(10000 * int(row["신장(5Cm단위)"]) / (int(row["체중(5Kg 단위)"]) ** 2), 2)
        label = 1
        if bmi > 40:label = 4
        if bmi > 35:label = 3
        if bmi > 30:label = 2

        nrow = {
            "height":int(row["신장(5Cm단위)"]),
            "weight":int(row["체중(5Kg 단위)"]),
            "waist":int(row["허리둘레"]),
            "overweight":label
            }
        
        table.append(nrow)
    with open("./preprocessing2015.pkl","wb+") as fw:
        pickle.dump(table, fw, protocol = pickle.HIGHEST_PROTOCOL)

SVM

from sklearn.svm import SVC
from matplotlib import pyplot as plt
import pickle
clf= SVC(kernel='poly', probability=True)
x = pickle.load(open('preprocessing2016.pkl', "rb"))
y = pickle.load(open('preprocessing2015.pkl', "rb"))

x = [x[i] for i in range(len(x)) if i % 1000 == 0]
y = [y[i] for i in range(len(y)) if i % 1000 == 0]
data_test = [[row["weight"], row["height"]] for row in y]
labels_test = [row["overweight"] > 2 for row in y]
data_train = [[row["weight"], row["height"]] for row in x]

labels_train = [row["overweight"] > 2 for row in x]


## 3-1) training
clf.fit(data_train, labels_train)
## 3-2) prediction
prediction_test= clf.predict(data_test)
probability_test= clf.predict_proba(data_test)[:,1]
## 4. Evaluate the model
from sklearn.metrics import confusion_matrix, roc_auc_score
## 4-1) AUC
#auc= roc_auc_score(labels_test, probability_test)
## 4-2) Confusion matrix
tn, fp, fn, tp= confusion_matrix(labels_test, prediction_test).ravel()
accuracy = (tp+ tn) / (tn+ fp+ fn+ tp)
sensitivity = tp/ (tp+ fn)
specificity = tn/ (fp+ tn)
precision = tp/ (tp+ fp)
recall = tp/ (tp+ fn)
fscore= 2 * precision * recall / (precision + recall)

fig, axs = plt.subplots(1, 2, figsize=(1000, 1000))
axs[0].scatter([row[1] for row in data_test], [row[0] for row in data_test],s = None,c = labels_test)
axs[1].scatter([row[1] for row in data_test], [row[0] for row in data_test],s = None,c = prediction_test)

plt.show()

Deeplearning

import pickle
from matplotlib import pyplot as plt
from sklearn.neural_network import MLPClassifier
clf= MLPClassifier(hidden_layer_sizes=(1024,256,32),
    activation='logistic', # sigmoid
    solver='adam', # adamgradient optimizer
    learning_rate_init=0.01)
x = pickle.load(open('preprocessing2016.pkl', "rb"))
y = pickle.load(open('preprocessing2015.pkl', "rb"))

x = [x[i] for i in range(len(x)) if i % 100 == 0]
y = [y[i] for i in range(len(y)) if i % 100 == 0]
data_test = [[row["weight"], row["height"]] for row in y]
labels_test = [row["overweight"] for row in y]
data_train = [[row["weight"], row["height"]] for row in x]

labels_train = [row["overweight"] for row in x]


## 3-1) training
clf.fit(data_train, labels_train)
## 3-2) prediction
prediction_test= clf.predict(data_test)
probability_test= clf.predict_proba(data_test)[:,1]
## 4. Evaluate the model
from sklearn.metrics import confusion_matrix, roc_auc_score
## 4-1) AUC
#auc= roc_auc_score(labels_test, probability_test)
## 4-2) Confusion matrix
#tn, fp, fn, tp= confusion_matrix(labels_test, prediction_test).ravel()
accuracy = (tp+ tn) / (tn+ fp+ fn+ tp)
sensitivity = tp/ (tp+ fn)
specificity = tn/ (fp+ tn)
precision = tp/ (tp+ fp)
recall = tp/ (tp+ fn)
fscore= 2 * precision * recall / (precision + recall)

fig, axs = plt.subplots(1, 2, figsize=(1000, 1000))
axs[0].scatter([row[1] for row in data_test], [row[0] for row in data_test],s = None,c = labels_test)
axs[1].scatter([row[1] for row in data_test], [row[0] for row in data_test],s = None,c = prediction_test)

plt.show()