# Model performance
model_summary = pd.DataFrame({
'Model': ['Multiple Linear Regression', 'Random Forest (Regression)'],
'R² (Test)': [r2_lr, r2_rf],
'RMSE (Test)': [rmse_lr, rmse_rf]
})
print(model_summary)
# Visualization of model results
fig = make_subplots(
rows=2, cols=2,
subplot_titles=('R² Comparison (Test Set)', 'RMSE Comparison (Test Set)',
'Skills vs Salary Impact', 'Predicted vs Actual Salary'),
specs=[[{"type": "bar"}, {"type": "bar"}],
[{"type": "bar"}, {"type": "scatter"}]]
)
# Row 1, Col 1: R² comparison
models = ['Multiple linear regression', 'Random Forest']
r2_values = [r2_lr, r2_rf]
fig.add_trace(go.Bar(x=models, y=r2_values, name="R² Score",
marker_color=['steelblue', 'darkgreen']), row=1, col=1)
# Row 1, Col 2: RMSE comparison
rmse_values = [rmse_lr, rmse_rf]
fig.add_trace(go.Bar(x=models, y=rmse_values, name="RMSE",
marker_color=['coral', 'orange']), row=1, col=2)
# Row 2, Col 1: Skills vs salary impact (top 10)
top_skills_salary = skill_importance[:10]
fig.add_trace(go.Bar(
x=[s[1] for s in top_skills_salary],
y=[s[0] for s in top_skills_salary],
orientation='h',
name="Feature Importance",
marker_color='purple'), row=2, col=1)
# Row 2, Col 2: Predicted vs Actual for Random Forest
sample_size = min(500, len(y_test))
sample_indices = np.random.choice(len(y_test), sample_size, replace=False)
fig.add_trace(go.Scatter(
x=y_test.iloc[sample_indices],
y=y_pred_rf[sample_indices],
mode='markers',
name='RF Predictions',
marker=dict(color='darkgreen', size=5, opacity=0.6)), row=2, col=2)
# Prediction line
min_val = min(y_test.min(), y_pred_rf.min())
max_val = max(y_test.max(), y_pred_rf.max())
fig.add_trace(go.Scatter(
x=[min_val, max_val],
y=[min_val, max_val],
mode='lines',
name='Perfect Prediction',
line=dict(color='red', dash='dash')), row=2, col=2)
# Axis labels
fig.update_xaxes(title_text="Model", row=1, col=1)
fig.update_yaxes(title_text="R² Score", row=1, col=1)
fig.update_xaxes(title_text="Model", row=1, col=2)
fig.update_yaxes(title_text="RMSE ($)", row=1, col=2)
fig.update_xaxes(title_text="Importance", row=2, col=1)
fig.update_yaxes(title_text="Feature", row=2, col=1)
fig.update_xaxes(title_text="Actual Salary ($)", row=2, col=2)
fig.update_yaxes(title_text="Predicted Salary ($)", row=2, col=2)
fig.update_layout(
height=800,
showlegend=False,
template="plotly_white",
title={
'text': "Regression Model Comparison - BA/ML/DS Salary Prediction",
'y': 0.98,
'x': 0.5,
'xanchor': 'center',
'yanchor': 'top',
},
margin=dict(t=80)
)
fig.show()