Risk_Manager / visualizations /segment_ranking_old.py
GenAICoder's picture
Rename visualizations/segment_ranking.py to visualizations/segment_ranking_old.py
9dad52c verified
# visualizations/segment_ranking.py
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from metrics.metric_registry import METRIC_FUNCTIONS
from analytics.performance_analysis import generate_metric_view
def calculate_segment_risk_score(
df,
metric_name,
category
):
"""
Calculate risk scores for each segment in a category.
Args:
df: Master dataframe
metric_name: Metric name for risk calculation
category: Segmentation category
Returns:
DataFrame with segment and risk score
"""
result = generate_metric_view(
df=df,
metric_name=metric_name,
group_col=category
)
rate_col = [
col for col in result.columns
if "rate" in col.lower()
][0]
# Calculate average rate per segment
segment_risk = (
result.groupby(category)
.agg({
rate_col: "mean",
"total_accounts": "sum",
"total_balance": "sum"
})
.reset_index()
)
segment_risk = segment_risk.rename(
columns={
category: "Segment",
rate_col: "Risk_Score"
}
)
return segment_risk
def generate_segment_risk_heatmap(
df,
metrics=None,
categories=None
):
"""
Generate heatmap showing risk scores across segments and metrics.
Args:
df: Master dataframe
metrics: List of metrics to evaluate
categories: List of categories to analyze
Returns:
Plotly figure with heatmap
"""
if metrics is None:
metrics = ["30+@3", "30+@6", "60+@6", "Yr1 NCL"]
if categories is None:
categories = [
"fico_band",
"sourcing_channel",
"city_tier",
"occupation_type"
]
# Prepare data for heatmap
heatmap_data = {}
all_segments = {}
for metric in metrics:
metric_scores = {}
for category in categories:
try:
segment_risk = calculate_segment_risk_score(
df=df,
metric_name=metric,
category=category
)
for _, row in segment_risk.iterrows():
segment_key = f"{category}_{row['Segment']}"
metric_scores[segment_key] = row["Risk_Score"]
all_segments[segment_key] = f"{category.replace('_', ' ').title()}: {row['Segment']}"
except Exception as e:
print(f"Error processing {metric} x {category}: {e}")
heatmap_data[metric] = metric_scores
# Create DataFrame for heatmap
heatmap_df = pd.DataFrame(heatmap_data)
heatmap_df = heatmap_df.fillna(0)
# Sort by average risk
heatmap_df["avg_risk"] = heatmap_df.mean(axis=1)
heatmap_df = heatmap_df.sort_values("avg_risk", ascending=False)
heatmap_df = heatmap_df.drop("avg_risk", axis=1)
# Create heatmap
fig = go.Figure(
data=go.Heatmap(
z=heatmap_df.values,
x=heatmap_df.columns,
y=[all_segments.get(idx, idx) for idx in heatmap_df.index],
colorscale="RdYlGn_r",
hovertemplate=(
"<b>Segment: %{y}</b><br>" +
"<b>Metric: %{x}</b><br>" +
"Risk Score: %{z:.2f}%<br>" +
"<extra></extra>"
),
text=[[f"{val:.2f}%" for val in row] for row in heatmap_df.values],
texttemplate="%{text}",
textfont={"size": 10},
colorbar=dict(
title="Risk Score<br>(%)"
)
)
)
fig.update_layout(
title="Segment Risk Heatmap Across Delinquency Metrics",
xaxis_title="Delinquency Metrics",
yaxis_title="Segments",
height=max(400, len(heatmap_df) * 25),
template="plotly_white",
hovermode="closest"
)
return fig
def generate_segment_risk_ranking(
df,
metric_name,
category,
top_n=10
):
"""
Generate bar chart ranking segments by risk within a category.
Args:
df: Master dataframe
metric_name: Metric name for risk calculation
category: Segmentation category
top_n: Number of top risk segments to display
Returns:
Plotly bar chart figure
"""
segment_risk = calculate_segment_risk_score(
df=df,
metric_name=metric_name,
category=category
)
# Sort by risk score descending
segment_risk = segment_risk.sort_values(
"Risk_Score",
ascending=True
).tail(top_n)
# Color code by risk level
colors = ["#d62728" if score > 10 else "#ff7f0e" if score > 5 else "#2ca02c"
for score in segment_risk["Risk_Score"]]
fig = go.Figure(
data=go.Bar(
y=segment_risk["Segment"],
x=segment_risk["Risk_Score"],
orientation="h",
marker=dict(
color=colors,
line=dict(color="white", width=1)
),
text=segment_risk["Risk_Score"],
texttemplate="%{text:.2f}%",
textposition="outside",
hovertemplate=(
"<b>Segment: %{y}</b><br>" +
"Risk Score: %{x:.2f}%<br>" +
"Accounts: %{customdata[0]}<br>" +
"Balance: %{customdata[1]:,.0f}<br>" +
"<extra></extra>"
),
customdata=segment_risk[["total_accounts", "total_balance"]].values
)
)
fig.update_layout(
title=f"Top {top_n} High-Risk Segments: {metric_name} by {category.replace('_', ' ').title()}",
xaxis_title="Risk Score (%)",
yaxis_title=category.replace('_', ' ').title(),
height=400 + (top_n * 15),
template="plotly_white",
hovermode="closest"
)
fig.update_xaxes(
showgrid=True,
gridwidth=1,
gridcolor="lightgray"
)
return fig
def generate_multi_category_risk_comparison(
df,
metric_name
):
"""
Compare risk across all categories for a single metric.
Args:
df: Master dataframe
metric_name: Metric name for risk calculation
Returns:
Plotly figure with subplots (one per category)
"""
categories = [
"fico_band",
"sourcing_channel",
"city_tier",
"occupation_type"
]
# Create subplots
from plotly.subplots import make_subplots
fig = make_subplots(
rows=2,
cols=2,
subplot_titles=[cat.replace('_', ' ').title() for cat in categories],
specs=[
[{"type": "bar"}, {"type": "bar"}],
[{"type": "bar"}, {"type": "bar"}]
]
)
positions = [
(1, 1),
(1, 2),
(2, 1),
(2, 2)
]
max_segments = 0
for category, (row, col) in zip(categories, positions):
try:
segment_risk = calculate_segment_risk_score(
df=df,
metric_name=metric_name,
category=category
)
# Sort and take top 5
segment_risk = segment_risk.sort_values(
"Risk_Score",
ascending=True
).tail(5)
max_segments = max(max_segments, len(segment_risk))
fig.add_trace(
go.Bar(
y=segment_risk["Segment"],
x=segment_risk["Risk_Score"],
orientation="h",
name=category,
showlegend=False,
marker=dict(
color=segment_risk["Risk_Score"],
colorscale="Reds",
showscale=False
),
text=segment_risk["Risk_Score"],
texttemplate="%{text:.2f}%",
textposition="outside",
hovertemplate=(
"<b>%{y}</b><br>" +
"Risk Score: %{x:.2f}%<br>" +
"<extra></extra>"
)
),
row=row,
col=col
)
fig.update_xaxes(
title_text="Risk Score (%)",
row=row,
col=col
)
except Exception as e:
print(f"Error processing category {category}: {e}")
fig.update_layout(
title_text=f"High-Risk Segments Across Categories: {metric_name}",
height=800,
template="plotly_white",
hovermode="closest"
)
return fig
def calculate_portfolio_risk_summary(
df,
metrics=None
):
"""
Calculate overall portfolio risk summary across metrics and categories.
Args:
df: Master dataframe
metrics: List of metrics to evaluate
Returns:
DataFrame with portfolio risk summary
"""
if metrics is None:
metrics = ["30+@3", "30+@6", "60+@6", "Yr1 NCL"]
summary_data = []
categories = [
"fico_band",
"sourcing_channel",
"city_tier",
"occupation_type"
]
for metric in metrics:
for category in categories:
try:
segment_risk = calculate_segment_risk_score(
df=df,
metric_name=metric,
category=category
)
avg_risk = segment_risk["Risk_Score"].mean()
max_risk = segment_risk["Risk_Score"].max()
high_risk_count = len(segment_risk[segment_risk["Risk_Score"] > 10])
summary_data.append({
"Metric": metric,
"Category": category.replace('_', ' ').title(),
"Avg_Risk": avg_risk,
"Max_Risk": max_risk,
"High_Risk_Segments": high_risk_count
})
except Exception as e:
print(f"Error calculating summary for {metric} x {category}: {e}")
summary_df = pd.DataFrame(summary_data)
return summary_df