AI/Machine Learning

[ML] sklearn LinearRegression 튜토리얼

운호(Noah) 2020. 11. 11. 11:19
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

# 데이터
input_df = pd.read_csv('./spark-memory.csv')

# 데이터에서 feature, target 분리
feature_df = input_df[['matrix_dimension','left_nnz','right_nnz']]
target_df = input_df['왼쪽, 오른쪽 희소행렬 저장']

# 훈련데이터와 테스트데이터 분리
X_train, X_test, y_train, y_test = train_test_split(feature_df,target_df, test_size = 0.2)

# MAPE
def mean_absolute_percentage_error(y_test, y_pred):
    y_test, y_pred = np.array(y_test), np.array(y_pred)
    return np.mean(np.abs((y_test - y_pred) / y_test)) * 100

# 훈련데이터로 모델 훈련
reg = LinearRegression().fit(X_train, y_train)

# 예측값 도출
y_pred = reg.predict(X_test)

# 실제값과 metric 계산
print("MAPE : ", mean_absolute_percentage_error(y_test, y_pred))
print("R^2 : ", r2_score(y_test, y_pred))

# 모델의 weight(가중치)와 bias(절편) 출력
np.set_printoptions(precision=10, suppress=True)
print("\nweight : ", reg.coef_)
print("bias : ", reg.intercept_)