import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import models, layers
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
MY_PAST = 12 # 과거 12개월의 탑승객 수를 학습하여 13개월 째 탑승객 수 예측
MY_SPLIT = 0.8
MY_NEURON = 500 # 히든 스테이트 개수
MY_SHAPE = (MY_PAST, 1) # 입력 데이터 모양
MY_EPOCH = 100
MY_BATCH = 64
# usecols = [1] : 1번째 컬럼(승객 수)만 사용
df = pd.read_csv('airline.csv', usecols = [1],header=None)
display(df.head())
# 판다스 데이터를 넘파이 배열로 변환
raw_DB = np.array(df).astype(float)
print('== DB INFO (RAW) ==')
print('DB shape: ', raw_DB.shape)
print('All data before transformation:')
print(raw_DB.flatten())
scaler = MinMaxScaler()
raw_DB = scaler.fit_transform(raw_DB)
print('\nAll data after transformation:')
print(raw_DB.flatten())
# slice data into groups of 13
# ex: 0-13, 1-14, 2-15, ... 130-143
# RNN 신경망은 주어진 과거의 자료로부터 미래의 값을 예측하기 위해 데이터를 타임 스텝으로 분할하여 학습 데이터를 만든다.
data = []
for i in range(len(raw_DB) - MY_PAST):
data.append(raw_DB[i: i + MY_PAST + 1])
reshaped_data = np.array(data)
# print(reshaped_data.shape)
np.random.shuffle(reshaped_data)
print('\n== DB INFO (SPLIT) ==')
print('DB shape:', reshaped_data.shape)
print('\nGroup 0:\n', reshaped_data[0])
print('\nGroup 1:\n', reshaped_data[1])
# slicing between input and output
# use index 0 - 11 (12 months) for input and index 12 for output
X_data = reshaped_data[:, 0:MY_PAST]
Y_data = reshaped_data[:, -1]
# split between training and test sets
split_boundary = int(reshaped_data.shape[0] * MY_SPLIT)
X_train = X_data[: split_boundary]
X_test = X_data[split_boundary:]
Y_train = Y_data[: split_boundary]
Y_test = Y_data[split_boundary:]
# print shape information
print('\n== DB SHAPE INFO ==')
print('X_train shape = ', X_train.shape)
print('X_test shape = ', X_test.shape)
print('Y_train shape = ', Y_train.shape)
print('Y_test shape = ', Y_test.shape)
model = Sequential()
model.add(LSTM(units=MY_NEURON, input_shape=MY_SHAPE))
model.add(Dense(units=MY_NEURON, activation='relu'))
model.add(Dense(1, activation='linear'))
model.summary()
# model training and saving
model.compile(loss = 'mse', optimizer = 'adam')
model.fit(X_train, Y_train, epochs = MY_EPOCH, batch_size = MY_BATCH, verbose = 1)
model.save('chap4.h5')
# transform [0, 1] values back to the original range
# we need to keep the "scaler" used for initial transformation
# because "scaler" remembers mean and dev
predict = model.predict(X_test)
predict = scaler.inverse_transform(predict)
Y_test = scaler.inverse_transform(Y_test)
# plot predicted vs real output
plt.plot(predict, 'r:')
plt.plot(Y_test, 'g-')
plt.legend(['predict', 'true'])
plt.show()
05. LSTM을 이용한 기온 예측 (다변량 시계열 데이터 예측) (1) | 2023.12.26 |
---|---|
03. LSTM(Long Short-Term Memory) (0) | 2023.12.21 |
02. RNN이란 무엇인가? (0) | 2023.12.21 |
댓글 영역