XGBoost를 학습시키기 위한 라이브러리는 xgboost 라이브러리와 scikit-learn(sklearn) 인터페이스가 있습니다.
둘 중 아무거나 사용하여도 상관 없는데, 문제는 기본적으로 제공하는 평가 함수와 목적 함수(손실 함수)이외의 함수를 Custom 할 때 사용 방법에 차이가 있어 기록하고자 글을 작성합니다.
XGBoost
기본 학습 코드
import xgboost as xgb
X_train, y_train = ...
X_valid, y_valid = ...
dtrain = xgb.DMatrix(X_train, label=y_train)
dvalid = xgb.DMatrix(X_valid, label=y_valid)
params = {
'eta': 0.1,
'n_jobs': -1,
}
model = xgb.train(
params,
dtrain,
num_boost_round=1000,
early_stopping_rounds=10,
evals=[(dtrain, 'train'), (dvalid, 'valid')],
)
평가, 손실 함수 변경
평가 함수는 SMAPE, 손실 함수는 Weighted MSE 를 사용했습니다.
def SMAPE(true, pred):
return np.mean((np.abs(true - pred)) / (np.abs(true) + np.abs(pred))) * 100
def weighted_mse_obj(alpha=1):
def weighted_mse(preds, dtrain):
labels = dtrain.get_label()
residuals = labels - preds
grad = np.where(residuals>0, 2*alpha*residuals, 2*residuals)
hess = np.where(residuals>0, 2*alpha, 2.0)
return -grad, hess
return weighted_mse
def smape_eval(preds, dtrain):
labels = dtrain.get_label()
return 'smape_eval', SMAPE(labels, preds)
params = {
'eta': 0.1,
'n_jobs': -1,
}
model = xgb.train(
params,
dtrain,
num_boost_round=1000,
early_stopping_rounds=10,
evals=[(dtrain, 'train'), (dvalid, 'valid')],
obj=weighted_mse_obj(2),
custom_metric=smape_eval,
)
Scikit-Learn Interface
기본 학습 코드
from xgboost import XGBRegressor
X_train, y_train = ...
X_valid, y_valid = ...
reg = XGBRegressor(
learning_rate=0.1,
n_estimators=1000,
early_stopping_rounds=10,
n_jobs=-1)
reg.fit(
X_train, y_train,
eval_set=[(X_train, y_train), (X_valid, y_valid)],
verbose=True)
평가, 손실 함수 변경
평가 함수는 SMAPE, 손실 함수는 Weighted MSE 를 사용했습니다.
def weighted_mse_obj(alpha=1):
def weighted_mse(labels, preds):
residuals = labels - preds
grad = np.where(residuals>0, 2*alpha*residuals, 2*residuals)
hess = np.where(residuals>0, 2*alpha, 2.0)
return -grad, hess
return weighted_mse
def SMAPE(true, pred):
return np.mean((np.abs(true - pred)) / (np.abs(true) + np.abs(pred))) * 100
reg = XGBRegressor(
learning_rate=0.1,
n_estimators=1000,
objective=weighted_mse_obj(alpha=2),
eval_metric=SMAPE,
early_stopping_rounds=10,
n_jobs=-1)
reg.fit(
X_train, y_train,
eval_set=[(X_train, y_train), (X_valid, y_valid)],
verbose=True)
Evaluation Functions
def MSE(true, pred):
return ((pred - true)**2).mean()
def MAE(true, pred):
return np.abs(pred - true).mean()
def RMSE(true, pred):
return np.sqrt(MSE(true, pred))
def MAPE(true, pred):
return np.mean(np.abs((true - pred) / true)) * 100
def MPE(true, pred):
return np.mean((true - pred) / true) * 100
def SMAPE(true, pred):
return np.mean((np.abs(true - pred)) / (np.abs(true) + np.abs(pred))) * 100
Loss Functions
def log_cosh_obj(labels, preds):
residuals = labels - preds
grad = np.tanh(residuals)
hess = 1 - np.tanh(residuals) ** 2
return -grad, hess
def pseudo_huber_obj(labels, preds):
residuals = labels - preds
delta = 2
scale = 1.0 + (residuals / delta) ** 2
grad = residuals / np.sqrt(scale) # residuals / np.sqrt(scale)
hess = 1.0 / (scale * np.sqrt(scale)) # 1.0 / scale ** 1.5
return -grad, hess
def assym_obj(labels, preds):
residuals = labels - preds
grad = np.where(residuals < 0, 2 * 50.0 * residuals, 2 * residuals)
hess = np.where(residuals < 0, 2 * 50.0, 2.0)
return -grad, hess
def cubic_obj(labels, preds):
residuals = labels - preds
grad = 4 * residuals ** 3
hess = 12 * residuals ** 2
return -grad, hess
def weighted_mse_obj(alpha=1):
def weighted_mse(labels, preds):
residuals = labels - preds
grad = np.where(residuals>0, 2*alpha*residuals, 2*residuals)
hess = np.where(residuals>0, 2*alpha, 2.0)
return -grad, hess
return weighted_mse
주의할 점
- labels, preds 입력 순서 확인
- residual = 실제값 - 예측값 (부호에 따라 차이가 크다)
참고