1. Randomforest baseline
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE

# 데이터 로드
file_path = "/content/drive/My Drive/Colab Notebooks/appendix_cancer dataset/appendix_cancer_prediction_dataset.csv"
df = pd.read_csv(file_path)

# 데이터 전처리
# 범주형 특성을 숫자형으로 변환 (Label Encoding)
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

# 결측값 처리 (삭제)
df = df.dropna()

# 특성(X)과 타겟(y) 분리
X = df.drop(columns=['Appendix_Cancer_Prediction'])
y = df['Appendix_Cancer_Prediction']

# 훈련 데이터와 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# SMOTE를 사용한 데이터 증강
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# RandomForestClassifier 모델 생성 및 학습
optimal_depth = 24
optimal_estimator = 200
model = RandomForestClassifier(n_estimators=optimal_estimator, max_depth=optimal_depth, random_state=42)
model.fit(X_train_resampled, y_train_resampled)

# 테스트 데이터로 예측
y_pred = model.predict(X_test)

# 평가 지표 계산
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# 결과 출력
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")



2. XGBoost baseline 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
import shap

# 데이터 로드
file_path = "/content/drive/My Drive/Colab Notebooks/appendix_cancer dataset/appendix_cancer_prediction_dataset.csv"
df = pd.read_csv(file_path)

# 데이터 전처리 (결측값 처리)
df = df.dropna()

# 범주형 변수 Label Encoding 적용
label_encoders = {}
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Feature Selection - 상관관계 높은 변수 선택
target_variable = 'Appendix_Cancer_Prediction'
X = df.drop(columns=[target_variable])
y = df[target_variable]

# 데이터 불균형 확인
print("Class Distribution Before SMOTE:")
print(y.value_counts())

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 데이터 증강 (SMOTE 적용)
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# 스케일링 적용 (선택 사항)
scaler = StandardScaler()
X_train_resampled = scaler.fit_transform(X_train_resampled)
X_test = scaler.transform(X_test)

# 데이터 불균형 확인 (SMOTE 적용 후)
print("\nClass Distribution After SMOTE:")
print(pd.Series(y_train_resampled).value_counts())

# XGBoost 모델 하이퍼파라미터 튜닝
param_grid = {
    'max_depth': [3, 5, 7, 10],
    'n_estimators': [100, 200, 300, 400, 500]
}

xgb_model = XGBClassifier(
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    gamma=0.1,
    reg_lambda=1,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)

grid_search = GridSearchCV(xgb_model, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train_resampled, y_train_resampled)

# 최적 하이퍼파라미터 출력
best_params = grid_search.best_params_
print(f"Best max_depth: {best_params['max_depth']}")
print(f"Best n_estimators: {best_params['n_estimators']}")

# 최적 하이퍼파라미터로 모델 학습
xgb_best_model = XGBClassifier(
    max_depth=best_params['max_depth'],
    n_estimators=best_params['n_estimators'],
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    gamma=0.1,
    reg_lambda=1,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)
xgb_best_model.fit(X_train_resampled, y_train_resampled)
y_pred_xgb = xgb_best_model.predict(X_test)

# 성능 평가 함수
def evaluate_model(y_test, y_pred, model_name):
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred)

    print(f"\n{model_name} Metrics:")
    print(f"  Accuracy: {acc:.4f}")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall: {recall:.4f}")
    print(f"  F1-score: {f1:.4f}")
    print(f"  ROC-AUC: {roc_auc:.4f}")

# 최적 모델 평가
evaluate_model(y_test, y_pred_xgb, "XGBoost (Tuned)")

# Confusion Matrix 시각화
plt.figure(figsize=(6, 5))
sns.heatmap(confusion_matrix(y_test, y_pred_xgb), annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

# SHAP 분석 (특성 중요도 시각화)
explainer = shap.Explainer(xgb_best_model)
shap_values = explainer(X_test)

shap.summary_plot(shap_values, X_test)

3. LightGBM baseline 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
import lightgbm as lgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import matplotlib.pyplot as plt

# 파일 경로
file_path = "/content/drive/My Drive/Colab Notebooks/appendix_cancer dataset/appendix_cancer_prediction_dataset.csv"

# 데이터 로드
df = pd.read_csv(file_path)

# 불필요한 열 제거
df.drop(columns=['Patient_ID'], inplace=True)

# 레이블 인코딩 (범주형 변수 변환)
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # 나중에 해석을 위해 저장

# 특성과 타겟 분리
X = df.drop(columns=['Appendix_Cancer_Prediction'])
y = df['Appendix_Cancer_Prediction']

# 데이터 불균형 해소 (SMOTE 적용)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# 학습/테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

# LightGBM 데이터셋 변환
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

# LightGBM 파라미터 설정 (불균형 데이터 가중치 적용)
params = {
    'objective': 'binary',
    'metric': 'binary_error',
    'boosting_type': 'gbdt',
    'learning_rate': 0.05,
    'num_leaves': 31,
    'max_depth': -1,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'scale_pos_weight': sum(y_train == 0) / sum(y_train == 1),  # 클래스 불균형 보정
    'seed': 42
}

# 모델 학습
num_round = 100
model = lgb.train(params, train_data, num_boost_round=num_round, valid_sets=[test_data], callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])

# 예측
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

# 평가 지표 계산
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# 평가 결과 출력
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

# Feature Importance 분석
feature_importance = model.feature_importance()
feature_names = X.columns
sorted_idx = np.argsort(feature_importance)

# 중요도가 0인 피처 제거
zero_importance_features = [feature_names[i] for i in range(len(feature_importance)) if feature_importance[i] == 0]
print("\nRemoving Features with Zero Importance:", zero_importance_features)
X_train = X_train.drop(columns=zero_importance_features)
X_test = X_test.drop(columns=zero_importance_features)

# Feature Importance 시각화
plt.figure(figsize=(10, 6))
plt.barh(range(len(sorted_idx)), feature_importance[sorted_idx], align='center')
plt.yticks(range(len(sorted_idx)), [feature_names[i] for i in sorted_idx])
plt.xlabel("Feature Importance")
plt.title("LightGBM Feature Importance")
plt.show()

4. LightGBM feature selecting 
mport pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
import lightgbm as lgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
import shap

# 파일 경로
file_path = "/content/drive/My Drive/Colab Notebooks/appendix_cancer dataset/appendix_cancer_prediction_dataset.csv"

# 데이터 로드
df = pd.read_csv(file_path)

# 불필요한 열 제거
df.drop(columns=['Patient_ID'], inplace=True)

# 레이블 인코딩 (범주형 변수 변환)
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # 나중에 해석을 위해 저장

# 특성과 타겟 분리
X = df.drop(columns=['Appendix_Cancer_Prediction'])
y = df['Appendix_Cancer_Prediction']

# 데이터 불균형 해소 (SMOTE 적용)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# 학습/테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

# 최적 max_depth 찾기
best_depth = -1
best_score = 0
for depth in [3, 5, 7, 10, 15, 20, -1]:
    temp_model = lgb.LGBMClassifier(objective='binary', metric='binary_error', boosting_type='gbdt', max_depth=depth, seed=42)
    scores = cross_val_score(temp_model, X_train, y_train, cv=3, scoring='accuracy')
    mean_score = scores.mean()
    print(f"max_depth={depth}, Accuracy={mean_score:.4f}")
    if mean_score > best_score:
        best_score = mean_score
        best_depth = depth

print(f"\nOptimal max_depth: {best_depth}")

# 최적 max_depth를 반영하여 모델 학습
best_model = lgb.LGBMClassifier(objective='binary', metric='binary_error', boosting_type='gbdt', max_depth=best_depth, seed=42)
best_model.fit(X_train, y_train, eval_set=[(X_test, y_test)], callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])

# SHAP 분석 추가
explainer = shap.Explainer(best_model, X_train)
shap_values = explainer(X_test)

# SHAP Summary Plot
shap.summary_plot(shap_values, X_test)

# SHAP 값을 기반으로 상위 15개 feature 선택
shap_importance = np.abs(shap_values.values).mean(axis=0)
top_15_features = X_train.columns[np.argsort(shap_importance)[-15:]]
print("\nTop 15 Features Based on SHAP:", top_15_features.tolist())

# 상위 15개 feature만 선택하여 데이터 재구성
X_train_top15 = X_train[top_15_features]
X_test_top15 = X_test[top_15_features]

# 최적 max_depth로 다시 모델 학습 (상위 15개 feature만 사용)
best_model.fit(X_train_top15, y_train, eval_set=[(X_test_top15, y_test)], callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])

# 예측
y_pred_prob = best_model.predict_proba(X_test_top15)[:, 1]
y_pred = (y_pred_prob > 0.5).astype(int)

# 평가 지표 계산
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# 평가 결과 출력
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

5. LightGBM feature interacting
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import shap
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
import lightgbm as lgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve

# 파일 경로
file_path = "/content/drive/My Drive/Colab Notebooks/appendix_cancer dataset/appendix_cancer_prediction_dataset.csv"

# 데이터 로드
df = pd.read_csv(file_path)

# 불필요한 열 제거
df.drop(columns=['Patient_ID'], inplace=True)

# 레이블 인코딩 (범주형 변수 변환)
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # 나중에 해석을 위해 저장

# Feature Interaction 추가 (유지할 Feature만 적용)
df["Chronic_Severity"] = df["Chronic_Diseases"] * df["Symptom_Severity"]

#  특성과 타겟 분리
X = df.drop(columns=['Appendix_Cancer_Prediction'])
y = df['Appendix_Cancer_Prediction']

# 데이터 불균형 해소 (SMOTE 적용)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# 학습/테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

# 최적 max_depth 찾기
best_depth = -1
best_score = 0
for depth in [3, 5, 7, 10, 15, 20, -1]:
    temp_model = lgb.LGBMClassifier(objective='binary', metric='binary_error', boosting_type='gbdt', max_depth=depth, seed=42)
    scores = cross_val_score(temp_model, X_train, y_train, cv=3, scoring='accuracy')
    mean_score = scores.mean()
    print(f"max_depth={depth}, Accuracy={mean_score:.4f}")
    if mean_score > best_score:
        best_score = mean_score
        best_depth = depth

print(f"\nOptimal max_depth: {best_depth}")

# 최적 max_depth로 모델 학습
optimal_model = lgb.LGBMClassifier(objective='binary', metric='binary_error', boosting_type='gbdt', max_depth=best_depth, seed=42)
optimal_model.fit(X_train, y_train)

# SHAP 기반 Feature Importance 계산
explainer = shap.Explainer(optimal_model, X_train)
shap_values = explainer(X_train)

# SHAP 기반 Feature Importance 계산
shap_importance = np.abs(shap_values.values).mean(axis=0)
shap_importance_df = pd.DataFrame({'Feature': X_train.columns, 'Importance': shap_importance})
shap_importance_df = shap_importance_df.sort_values(by='Importance', ascending=False)

# 상위 15개 Feature 선택 (Chronic_Severity는 무조건 포함)
top_features = ['Chronic_Severity'] + shap_importance_df[shap_importance_df['Feature'] != 'Chronic_Severity'].head(14)['Feature'].tolist()

X_train_selected = X_train[top_features]
X_test_selected = X_test[top_features]

# 선택된 Feature로 모델 재학습
selected_model = lgb.LGBMClassifier(objective='binary', metric='binary_error', boosting_type='gbdt', max_depth=best_depth, seed=42)
selected_model.fit(X_train_selected, y_train)

# 최종 성능 평가
y_pred_prob_selected = selected_model.predict_proba(X_test_selected)[:, 1]
y_pred_selected = (y_pred_prob_selected > 0.5).astype(int)

accuracy_selected = accuracy_score(y_test, y_pred_selected)
precision_selected = precision_score(y_test, y_pred_selected)
recall_selected = recall_score(y_test, y_pred_selected)
f1_selected = f1_score(y_test, y_pred_selected)
auc_selected = roc_auc_score(y_test, y_pred_prob_selected)

print("\n✅ Final Model Performance after SHAP-based Feature Selection")
print(f"Accuracy: {accuracy_selected:.4f}")
print(f"Precision: {precision_selected:.4f}")
print(f"Recall: {recall_selected:.4f}")
print(f"F1 Score: {f1_selected:.4f}")
print(f"AUC: {auc_selected:.4f}")

# ROC Curve 시각화
fpr, tpr, _ = roc_curve(y_test, y_pred_prob_selected)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='blue', label=f'ROC Curve (AUC = {auc_selected:.4f})')
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend()
plt.show()

# SHAP Summary Plot 생성
plt.figure(figsize=(10, 6))
shap.summary_plot(shap_values, X_train)
print("To visualize SHAP values, a summary plot was generated to illustrate the contribution of each feature to the model's predictions.")


6. LightGBM feature weighting
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import shap
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
import lightgbm as lgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# 📌 1️⃣ 파일 경로
file_path = "/content/drive/My Drive/Colab Notebooks/appendix_cancer dataset/appendix_cancer_prediction_dataset.csv"

# 📌 2️⃣ 데이터 로드
df = pd.read_csv(file_path)

# 불필요한 열 제거
df.drop(columns=['Patient_ID'], inplace=True)

# 📌 3️⃣ 레이블 인코딩 (범주형 변수 변환)
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # 나중에 해석을 위해 저장

# 📌 4️⃣ 특성과 타겟 분리
X = df.drop(columns=['Appendix_Cancer_Prediction'])
y = df['Appendix_Cancer_Prediction']

# 📌 5️⃣ 데이터 불균형 해소 (SMOTE 적용)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# 📌 6️⃣ 학습/테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

# 📌 7️⃣ 최적 max_depth 찾기
best_depth = -1
best_score = 0
for depth in [3, 5, 7, 10, 15, 20, -1]:
    temp_model = lgb.LGBMClassifier(objective='binary', metric='binary_error', boosting_type='gbdt', max_depth=depth, seed=42)
    scores = cross_val_score(temp_model, X_train, y_train, cv=3, scoring='accuracy')
    mean_score = scores.mean()
    print(f"max_depth={depth}, Accuracy={mean_score:.4f}")
    if mean_score > best_score:
        best_score = mean_score
        best_depth = depth

print(f"\nOptimal max_depth: {best_depth}")

# 📌 8️⃣ 최적 max_depth로 모델 학습
optimal_model = lgb.LGBMClassifier(objective='binary', metric='binary_error', boosting_type='gbdt', max_depth=best_depth, seed=42)
optimal_model.fit(X_train, y_train)

# 📌 9️⃣ LightGBM 내장 Feature Importance 계산 (Gain 기준)
feature_importance = optimal_model.feature_importances_
feature_names = X_train.columns

# 중요도 높은 Feature 정렬
important_features = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importance})
important_features = important_features.sort_values(by='Importance', ascending=False)

# 📌 전체 Feature Importance 시각화
plt.figure(figsize=(12, 8))
plt.barh(important_features['Feature'], important_features['Importance'], color='lightblue')
plt.xlabel("Feature Importance")
plt.ylabel("Features")
plt.title("Feature Importance (LightGBM)")
plt.gca().invert_yaxis()
plt.show()

# 📌 상위 15개 Feature 선택
important_features = important_features.head(15)
selected_features = important_features['Feature'].tolist()

# 📌 🔟 선택된 Feature로 데이터셋 축소
X_train_selected = X_train[selected_features]
X_test_selected = X_test[selected_features]

# 📌 1️⃣1️⃣ 선택된 Feature로 최적 max_depth 적용 모델 학습
selected_model = lgb.LGBMClassifier(objective='binary', metric='binary_error', boosting_type='gbdt', max_depth=best_depth, seed=42)
selected_model.fit(X_train_selected, y_train)

# 📌 1️⃣2️⃣ 예측 및 성능 평가
y_pred_selected = (selected_model.predict(X_test_selected) > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred_selected)
precision = precision_score(y_test, y_pred_selected)
recall = recall_score(y_test, y_pred_selected)
f1 = f1_score(y_test, y_pred_selected)

print("\n✅ 상위 15개 Feature로 학습된 최적 max_depth 적용 LightGBM 모델 성능")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

# 📌 1️⃣3️⃣ SHAP 분석기 생성
explainer = shap.Explainer(selected_model, X_train_selected)
shap_values = explainer(X_test_selected)

# 📌 1️⃣4️⃣ SHAP Value 기반 Feature Weighting 계산
shap_importance = np.abs(shap_values.values).mean(axis=0)
shap_weights = shap_importance / np.max(shap_importance)  # 정규화 (0~1 범위)

# 📌 1️⃣5️⃣ Feature Weight 시각화
plt.figure(figsize=(12, 8))
plt.barh(selected_features, shap_weights, color='salmon')
plt.xlabel("Feature Weight (Normalized SHAP Values)")
plt.ylabel("Features")
plt.title("SHAP-based Feature Weighting")
plt.gca().invert_yaxis()
plt.show()

# 📌 1️⃣6️⃣ Feature Weight 적용하여 데이터 변환
X_train_weighted = X_train_selected * shap_weights
X_test_weighted = X_test_selected * shap_weights

# 📌 1️⃣7️⃣ Feature Weighting을 반영한 모델 학습
weighted_model = lgb.LGBMClassifier(objective='binary', metric='binary_error', boosting_type='gbdt', max_depth=best_depth, seed=42)
weighted_model.fit(X_train_weighted, y_train)

# 📌 1️⃣8️⃣ 예측 및 성능 평가 (Feature Weighting 적용 모델)
y_pred_weighted = (weighted_model.predict(X_test_weighted) > 0.5).astype(int)

accuracy_weighted = accuracy_score(y_test, y_pred_weighted)
precision_weighted = precision_score(y_test, y_pred_weighted)
recall_weighted = recall_score(y_test, y_pred_weighted)
f1_weighted = f1_score(y_test, y_pred_weighted)

print("\n✅ SHAP Value 기반 Feature Weighting 적용 LightGBM 모델 성능")
print(f"Accuracy: {accuracy_weighted:.4f}")
print(f"Precision: {precision_weighted:.4f}")
print(f"Recall: {recall_weighted:.4f}")
print(f"F1 Score: {f1_weighted:.4f}")