GenAICoder commited on
Commit
2b968b1
·
verified ·
1 Parent(s): 9dad52c

Create segment_ranking.py

Browse files
Files changed (1) hide show
  1. visualizations/segment_ranking.py +391 -0
visualizations/segment_ranking.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # visualizations/segment_ranking.py
2
+
3
+ import plotly.graph_objects as go
4
+ import plotly.express as px
5
+ from plotly.subplots import make_subplots
6
+ import pandas as pd
7
+ from metrics.metric_registry import METRIC_FUNCTIONS
8
+ from analytics.performance_analysis import generate_metric_view
9
+
10
+
11
+ def calculate_segment_risk_score(
12
+ df,
13
+ metric_name,
14
+ category
15
+ ):
16
+ """
17
+ Calculate dollar-based risk scores for each segment in a category.
18
+
19
+ Args:
20
+ df: Master dataframe
21
+ metric_name: Metric name for risk calculation
22
+ category: Segmentation category
23
+
24
+ Returns:
25
+ DataFrame with segment and risk score (dollar-based %)
26
+ """
27
+
28
+ result = generate_metric_view(
29
+ df=df,
30
+ metric_name=metric_name,
31
+ group_col=category
32
+ )
33
+
34
+ rate_col = [
35
+ col for col in result.columns
36
+ if "rate" in col.lower()
37
+ ][0]
38
+
39
+ # Calculate dollar-based risk per segment
40
+ # Risk = (Total Bad Balance) / (Total Balance) * 100
41
+ segment_risk = (
42
+ result.groupby(category)
43
+ .agg({
44
+ rate_col: "mean",
45
+ "total_accounts": "sum",
46
+ "total_balance": "sum"
47
+ })
48
+ .reset_index()
49
+ )
50
+
51
+ segment_risk = segment_risk.rename(
52
+ columns={
53
+ category: "Segment",
54
+ rate_col: "Risk_Score"
55
+ }
56
+ )
57
+
58
+ return segment_risk
59
+
60
+
61
+ def generate_segment_risk_heatmap(
62
+ df,
63
+ metrics=None,
64
+ categories=None
65
+ ):
66
+ """
67
+ Generate heatmap showing risk scores across segments and metrics.
68
+
69
+ Args:
70
+ df: Master dataframe
71
+ metrics: List of metrics to evaluate
72
+ categories: List of categories to analyze
73
+
74
+ Returns:
75
+ Plotly figure with heatmap
76
+ """
77
+
78
+ if metrics is None:
79
+ metrics = ["30+@3", "30+@6", "60+@6", "Yr1 NCL"]
80
+
81
+ if categories is None:
82
+ categories = [
83
+ "fico_band",
84
+ "sourcing_channel",
85
+ "city_tier",
86
+ "occupation_type"
87
+ ]
88
+
89
+ # Prepare data for heatmap
90
+ heatmap_data = {}
91
+ all_segments = {}
92
+
93
+ for metric in metrics:
94
+
95
+ metric_scores = {}
96
+
97
+ for category in categories:
98
+
99
+ try:
100
+ segment_risk = calculate_segment_risk_score(
101
+ df=df,
102
+ metric_name=metric,
103
+ category=category
104
+ )
105
+
106
+ for _, row in segment_risk.iterrows():
107
+ segment_key = f"{category}_{row['Segment']}"
108
+ metric_scores[segment_key] = row["Risk_Score"]
109
+ all_segments[segment_key] = f"{category.replace('_', ' ').title()}: {row['Segment']}"
110
+
111
+ except Exception as e:
112
+ print(f"Error processing {metric} x {category}: {e}")
113
+
114
+ heatmap_data[metric] = metric_scores
115
+
116
+ # Create DataFrame for heatmap
117
+ heatmap_df = pd.DataFrame(heatmap_data)
118
+ heatmap_df = heatmap_df.fillna(0)
119
+
120
+ # Sort by average risk
121
+ heatmap_df["avg_risk"] = heatmap_df.mean(axis=1)
122
+ heatmap_df = heatmap_df.sort_values("avg_risk", ascending=False)
123
+ heatmap_df = heatmap_df.drop("avg_risk", axis=1)
124
+
125
+ # Create heatmap
126
+ fig = go.Figure(
127
+ data=go.Heatmap(
128
+ z=heatmap_df.values,
129
+ x=heatmap_df.columns,
130
+ y=[all_segments.get(idx, idx) for idx in heatmap_df.index],
131
+ colorscale="RdYlGn_r",
132
+ hovertemplate=(
133
+ "<b>Segment: %{y}</b><br>" +
134
+ "<b>Metric: %{x}</b><br>" +
135
+ "Risk Score: %{z:.2f}%<br>" +
136
+ "<extra></extra>"
137
+ ),
138
+ text=[[f"{val:.2f}%" for val in row] for row in heatmap_df.values],
139
+ texttemplate="%{text}",
140
+ textfont={"size": 10},
141
+ colorbar=dict(
142
+ title="Risk Score<br>(%)"
143
+ )
144
+ )
145
+ )
146
+
147
+ fig.update_layout(
148
+ title="Segment Risk Heatmap Across Delinquency Metrics",
149
+ xaxis_title="Delinquency Metrics",
150
+ yaxis_title="Segments",
151
+ height=max(400, len(heatmap_df) * 25),
152
+ template="plotly_white",
153
+ hovermode="closest"
154
+ )
155
+
156
+ return fig
157
+
158
+
159
+ def generate_segment_risk_ranking(
160
+ df,
161
+ metric_name,
162
+ category,
163
+ top_n=10
164
+ ):
165
+ """
166
+ Generate bar chart ranking segments by risk within a category.
167
+
168
+ Args:
169
+ df: Master dataframe
170
+ metric_name: Metric name for risk calculation
171
+ category: Segmentation category
172
+ top_n: Number of top risk segments to display
173
+
174
+ Returns:
175
+ Plotly bar chart figure
176
+ """
177
+
178
+ segment_risk = calculate_segment_risk_score(
179
+ df=df,
180
+ metric_name=metric_name,
181
+ category=category
182
+ )
183
+
184
+ # Sort by risk score descending
185
+ segment_risk = segment_risk.sort_values(
186
+ "Risk_Score",
187
+ ascending=True
188
+ ).tail(top_n)
189
+
190
+ # Color code by risk level
191
+ colors = ["#d62728" if score > 10 else "#ff7f0e" if score > 5 else "#2ca02c"
192
+ for score in segment_risk["Risk_Score"]]
193
+
194
+ fig = go.Figure(
195
+ data=go.Bar(
196
+ y=segment_risk["Segment"],
197
+ x=segment_risk["Risk_Score"],
198
+ orientation="h",
199
+ marker=dict(
200
+ color=colors,
201
+ line=dict(color="white", width=1)
202
+ ),
203
+ text=segment_risk["Risk_Score"],
204
+ texttemplate="%{text:.2f}%",
205
+ textposition="outside",
206
+ hovertemplate=(
207
+ "<b>Segment: %{y}</b><br>" +
208
+ "Risk Score: %{x:.2f}%<br>" +
209
+ "Accounts: %{customdata[0]}<br>" +
210
+ "Balance: %{customdata[1]:,.0f}<br>" +
211
+ "<extra></extra>"
212
+ ),
213
+ customdata=segment_risk[["total_accounts", "total_balance"]].values
214
+ )
215
+ )
216
+
217
+ fig.update_layout(
218
+ title=f"Top {top_n} High-Risk Segments: {metric_name} by {category.replace('_', ' ').title()}",
219
+ xaxis_title="Risk Score (%)",
220
+ yaxis_title=category.replace('_', ' ').title(),
221
+ height=400 + (top_n * 15),
222
+ template="plotly_white",
223
+ hovermode="closest"
224
+ )
225
+
226
+ fig.update_xaxes(
227
+ showgrid=True,
228
+ gridwidth=1,
229
+ gridcolor="lightgray"
230
+ )
231
+
232
+ return fig
233
+
234
+
235
+ def generate_multi_category_risk_comparison(
236
+ df,
237
+ metric_name
238
+ ):
239
+ """
240
+ Compare risk across all categories for a single metric.
241
+
242
+ Args:
243
+ df: Master dataframe
244
+ metric_name: Metric name for risk calculation
245
+
246
+ Returns:
247
+ Plotly figure with subplots (one per category)
248
+ """
249
+
250
+ categories = [
251
+ "fico_band",
252
+ "sourcing_channel",
253
+ "city_tier",
254
+ "occupation_type"
255
+ ]
256
+
257
+ # Create subplots
258
+ fig = make_subplots(
259
+ rows=2,
260
+ cols=2,
261
+ subplot_titles=[cat.replace('_', ' ').title() for cat in categories],
262
+ specs=[
263
+ [{"type": "bar"}, {"type": "bar"}],
264
+ [{"type": "bar"}, {"type": "bar"}]
265
+ ]
266
+ )
267
+
268
+ positions = [
269
+ (1, 1),
270
+ (1, 2),
271
+ (2, 1),
272
+ (2, 2)
273
+ ]
274
+
275
+ max_segments = 0
276
+
277
+ for category, (row, col) in zip(categories, positions):
278
+
279
+ try:
280
+ segment_risk = calculate_segment_risk_score(
281
+ df=df,
282
+ metric_name=metric_name,
283
+ category=category
284
+ )
285
+
286
+ # Sort and take top 5
287
+ segment_risk = segment_risk.sort_values(
288
+ "Risk_Score",
289
+ ascending=True
290
+ ).tail(5)
291
+
292
+ max_segments = max(max_segments, len(segment_risk))
293
+
294
+ fig.add_trace(
295
+ go.Bar(
296
+ y=segment_risk["Segment"],
297
+ x=segment_risk["Risk_Score"],
298
+ orientation="h",
299
+ name=category,
300
+ showlegend=False,
301
+ marker=dict(
302
+ color=segment_risk["Risk_Score"],
303
+ colorscale="Reds",
304
+ showscale=False
305
+ ),
306
+ text=segment_risk["Risk_Score"],
307
+ texttemplate="%{text:.2f}%",
308
+ textposition="outside",
309
+ hovertemplate=(
310
+ "<b>%{y}</b><br>" +
311
+ "Risk Score: %{x:.2f}%<br>" +
312
+ "<extra></extra>"
313
+ )
314
+ ),
315
+ row=row,
316
+ col=col
317
+ )
318
+
319
+ fig.update_xaxes(
320
+ title_text="Risk Score (%)",
321
+ row=row,
322
+ col=col
323
+ )
324
+
325
+ except Exception as e:
326
+ print(f"Error processing category {category}: {e}")
327
+
328
+ fig.update_layout(
329
+ title_text=f"High-Risk Segments Across Categories: {metric_name}",
330
+ height=800,
331
+ template="plotly_white",
332
+ hovermode="closest"
333
+ )
334
+
335
+ return fig
336
+
337
+
338
+ def calculate_portfolio_risk_summary(
339
+ df,
340
+ metrics=None
341
+ ):
342
+ """
343
+ Calculate overall portfolio risk summary across metrics and categories.
344
+
345
+ Args:
346
+ df: Master dataframe
347
+ metrics: List of metrics to evaluate
348
+
349
+ Returns:
350
+ DataFrame with portfolio risk summary
351
+ """
352
+
353
+ if metrics is None:
354
+ metrics = ["30+@3", "30+@6", "60+@6", "Yr1 NCL"]
355
+
356
+ summary_data = []
357
+
358
+ categories = [
359
+ "fico_band",
360
+ "sourcing_channel",
361
+ "city_tier",
362
+ "occupation_type"
363
+ ]
364
+
365
+ for metric in metrics:
366
+ for category in categories:
367
+ try:
368
+ segment_risk = calculate_segment_risk_score(
369
+ df=df,
370
+ metric_name=metric,
371
+ category=category
372
+ )
373
+
374
+ avg_risk = segment_risk["Risk_Score"].mean()
375
+ max_risk = segment_risk["Risk_Score"].max()
376
+ high_risk_count = len(segment_risk[segment_risk["Risk_Score"] > 10])
377
+
378
+ summary_data.append({
379
+ "Metric": metric,
380
+ "Category": category.replace('_', ' ').title(),
381
+ "Avg_Risk": avg_risk,
382
+ "Max_Risk": max_risk,
383
+ "High_Risk_Segments": high_risk_count
384
+ })
385
+
386
+ except Exception as e:
387
+ print(f"Error calculating summary for {metric} x {category}: {e}")
388
+
389
+ summary_df = pd.DataFrame(summary_data)
390
+
391
+ return summary_df