1. 패키지 로딩
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_auc_score, precision_score
2. 하이퍼 파라미터 설정
INPUT_DIM = 13
MY_EPOCH = 100
MY_BATCH = 32
MY_SPLIT = 0.4
3. 데이터 로딩 및 확인
data = pd.read_excel('heart.xls')
print(data.shape)
display(data.head())
data.describe()
data.info()
# 결측치 확인
print(data.isna().sum())
4. 데이터 스케일링
from sklearn.preprocessing import StandardScaler
X = data.drop('target', axis=1)
y = data['target']
scaler = StandardScaler()
scaled_data = scaler.fit_transform(X)
scaled_data = pd.DataFrame(scaled_data, columns=X.columns)
print(scaled_data.describe())
boxplot = scaled_data.boxplot(figsize=(10,7), showmeans=True)
plt.show()
5. 학습/평가 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(scaled_data, y, test_size=MY_SPLIT, random_state=10)
X_train.shape, X_test.shape, y_train.shape, y_test.shape
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=0)
X_val.shape, X_test.shape, y_val.shape, y_test.shape
6. 모델 생성
# Ouput Shape (None, 1000): None-> 배치 사이즈가 정해지지 않아서 None으로 표기됨
from keras.layers import Dropout
from keras import regularizers
model = Sequential()
model.add(Dense(1000, activation='tanh', input_dim=INPUT_DIM, kernel_regularizer=regularizers.l2(0.02)))
model.add(Dense(1000, activation='tanh', kernel_regularizer=regularizers.l2(0.1)))
model.add(Dropout(rate=0.5))
model.add(Dense(1, activation='sigmoid'))
model.summary()
7. 모델 컴파일 및 학습
from keras.callbacks import TensorBoard
import datetime
# log_dir: 로그가 기록될 디렉토리 경로 (경로에 한글이 포함되면 안됨)
log_dir = 'c:\\Logs\\'+datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=1)
from keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loass', mode='min', patience=3)
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=MY_BATCH, epochs=MY_EPOCH, validation_data=(X_val, y_val),verbose=1, callbacks=[tensorboard, early_stop])
model.save('heart-disease.h5')
8. 예측 및 모델 평가
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5)
print('\n== CONFUSION MATRIX ==')
print(confusion_matrix(y_test, y_pred))
score = model.evaluate(X_test, y_test, verbose=1)#, callbacks=[tensorboard])
print('Loss:',score[0])
print('Accuracy:',score[1])
print('Precision:',precision_score(y_test, y_pred))
print('AUC:', roc_auc_score(y_test, y_pred_prob))
댓글 영역