Spaces:
Running
Running
| # visualizations/segment_ranking.py | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| import pandas as pd | |
| from metrics.metric_registry import METRIC_FUNCTIONS | |
| from analytics.performance_analysis import generate_metric_view | |
| def calculate_segment_risk_score( | |
| df, | |
| metric_name, | |
| category | |
| ): | |
| """ | |
| Calculate risk scores for each segment in a category. | |
| Args: | |
| df: Master dataframe | |
| metric_name: Metric name for risk calculation | |
| category: Segmentation category | |
| Returns: | |
| DataFrame with segment and risk score | |
| """ | |
| result = generate_metric_view( | |
| df=df, | |
| metric_name=metric_name, | |
| group_col=category | |
| ) | |
| rate_col = [ | |
| col for col in result.columns | |
| if "rate" in col.lower() | |
| ][0] | |
| # Calculate average rate per segment | |
| segment_risk = ( | |
| result.groupby(category) | |
| .agg({ | |
| rate_col: "mean", | |
| "total_accounts": "sum", | |
| "total_balance": "sum" | |
| }) | |
| .reset_index() | |
| ) | |
| segment_risk = segment_risk.rename( | |
| columns={ | |
| category: "Segment", | |
| rate_col: "Risk_Score" | |
| } | |
| ) | |
| return segment_risk | |
| def generate_segment_risk_heatmap( | |
| df, | |
| metrics=None, | |
| categories=None | |
| ): | |
| """ | |
| Generate heatmap showing risk scores across segments and metrics. | |
| Args: | |
| df: Master dataframe | |
| metrics: List of metrics to evaluate | |
| categories: List of categories to analyze | |
| Returns: | |
| Plotly figure with heatmap | |
| """ | |
| if metrics is None: | |
| metrics = ["30+@3", "30+@6", "60+@6", "Yr1 NCL"] | |
| if categories is None: | |
| categories = [ | |
| "fico_band", | |
| "sourcing_channel", | |
| "city_tier", | |
| "occupation_type" | |
| ] | |
| # Prepare data for heatmap | |
| heatmap_data = {} | |
| all_segments = {} | |
| for metric in metrics: | |
| metric_scores = {} | |
| for category in categories: | |
| try: | |
| segment_risk = calculate_segment_risk_score( | |
| df=df, | |
| metric_name=metric, | |
| category=category | |
| ) | |
| for _, row in segment_risk.iterrows(): | |
| segment_key = f"{category}_{row['Segment']}" | |
| metric_scores[segment_key] = row["Risk_Score"] | |
| all_segments[segment_key] = f"{category.replace('_', ' ').title()}: {row['Segment']}" | |
| except Exception as e: | |
| print(f"Error processing {metric} x {category}: {e}") | |
| heatmap_data[metric] = metric_scores | |
| # Create DataFrame for heatmap | |
| heatmap_df = pd.DataFrame(heatmap_data) | |
| heatmap_df = heatmap_df.fillna(0) | |
| # Sort by average risk | |
| heatmap_df["avg_risk"] = heatmap_df.mean(axis=1) | |
| heatmap_df = heatmap_df.sort_values("avg_risk", ascending=False) | |
| heatmap_df = heatmap_df.drop("avg_risk", axis=1) | |
| # Create heatmap | |
| fig = go.Figure( | |
| data=go.Heatmap( | |
| z=heatmap_df.values, | |
| x=heatmap_df.columns, | |
| y=[all_segments.get(idx, idx) for idx in heatmap_df.index], | |
| colorscale="RdYlGn_r", | |
| hovertemplate=( | |
| "<b>Segment: %{y}</b><br>" + | |
| "<b>Metric: %{x}</b><br>" + | |
| "Risk Score: %{z:.2f}%<br>" + | |
| "<extra></extra>" | |
| ), | |
| text=[[f"{val:.2f}%" for val in row] for row in heatmap_df.values], | |
| texttemplate="%{text}", | |
| textfont={"size": 10}, | |
| colorbar=dict( | |
| title="Risk Score<br>(%)" | |
| ) | |
| ) | |
| ) | |
| fig.update_layout( | |
| title="Segment Risk Heatmap Across Delinquency Metrics", | |
| xaxis_title="Delinquency Metrics", | |
| yaxis_title="Segments", | |
| height=max(400, len(heatmap_df) * 25), | |
| template="plotly_white", | |
| hovermode="closest" | |
| ) | |
| return fig | |
| def generate_segment_risk_ranking( | |
| df, | |
| metric_name, | |
| category, | |
| top_n=10 | |
| ): | |
| """ | |
| Generate bar chart ranking segments by risk within a category. | |
| Args: | |
| df: Master dataframe | |
| metric_name: Metric name for risk calculation | |
| category: Segmentation category | |
| top_n: Number of top risk segments to display | |
| Returns: | |
| Plotly bar chart figure | |
| """ | |
| segment_risk = calculate_segment_risk_score( | |
| df=df, | |
| metric_name=metric_name, | |
| category=category | |
| ) | |
| # Sort by risk score descending | |
| segment_risk = segment_risk.sort_values( | |
| "Risk_Score", | |
| ascending=True | |
| ).tail(top_n) | |
| # Color code by risk level | |
| colors = ["#d62728" if score > 10 else "#ff7f0e" if score > 5 else "#2ca02c" | |
| for score in segment_risk["Risk_Score"]] | |
| fig = go.Figure( | |
| data=go.Bar( | |
| y=segment_risk["Segment"], | |
| x=segment_risk["Risk_Score"], | |
| orientation="h", | |
| marker=dict( | |
| color=colors, | |
| line=dict(color="white", width=1) | |
| ), | |
| text=segment_risk["Risk_Score"], | |
| texttemplate="%{text:.2f}%", | |
| textposition="outside", | |
| hovertemplate=( | |
| "<b>Segment: %{y}</b><br>" + | |
| "Risk Score: %{x:.2f}%<br>" + | |
| "Accounts: %{customdata[0]}<br>" + | |
| "Balance: %{customdata[1]:,.0f}<br>" + | |
| "<extra></extra>" | |
| ), | |
| customdata=segment_risk[["total_accounts", "total_balance"]].values | |
| ) | |
| ) | |
| fig.update_layout( | |
| title=f"Top {top_n} High-Risk Segments: {metric_name} by {category.replace('_', ' ').title()}", | |
| xaxis_title="Risk Score (%)", | |
| yaxis_title=category.replace('_', ' ').title(), | |
| height=400 + (top_n * 15), | |
| template="plotly_white", | |
| hovermode="closest" | |
| ) | |
| fig.update_xaxes( | |
| showgrid=True, | |
| gridwidth=1, | |
| gridcolor="lightgray" | |
| ) | |
| return fig | |
| def generate_multi_category_risk_comparison( | |
| df, | |
| metric_name | |
| ): | |
| """ | |
| Compare risk across all categories for a single metric. | |
| Args: | |
| df: Master dataframe | |
| metric_name: Metric name for risk calculation | |
| Returns: | |
| Plotly figure with subplots (one per category) | |
| """ | |
| categories = [ | |
| "fico_band", | |
| "sourcing_channel", | |
| "city_tier", | |
| "occupation_type" | |
| ] | |
| # Create subplots | |
| from plotly.subplots import make_subplots | |
| fig = make_subplots( | |
| rows=2, | |
| cols=2, | |
| subplot_titles=[cat.replace('_', ' ').title() for cat in categories], | |
| specs=[ | |
| [{"type": "bar"}, {"type": "bar"}], | |
| [{"type": "bar"}, {"type": "bar"}] | |
| ] | |
| ) | |
| positions = [ | |
| (1, 1), | |
| (1, 2), | |
| (2, 1), | |
| (2, 2) | |
| ] | |
| max_segments = 0 | |
| for category, (row, col) in zip(categories, positions): | |
| try: | |
| segment_risk = calculate_segment_risk_score( | |
| df=df, | |
| metric_name=metric_name, | |
| category=category | |
| ) | |
| # Sort and take top 5 | |
| segment_risk = segment_risk.sort_values( | |
| "Risk_Score", | |
| ascending=True | |
| ).tail(5) | |
| max_segments = max(max_segments, len(segment_risk)) | |
| fig.add_trace( | |
| go.Bar( | |
| y=segment_risk["Segment"], | |
| x=segment_risk["Risk_Score"], | |
| orientation="h", | |
| name=category, | |
| showlegend=False, | |
| marker=dict( | |
| color=segment_risk["Risk_Score"], | |
| colorscale="Reds", | |
| showscale=False | |
| ), | |
| text=segment_risk["Risk_Score"], | |
| texttemplate="%{text:.2f}%", | |
| textposition="outside", | |
| hovertemplate=( | |
| "<b>%{y}</b><br>" + | |
| "Risk Score: %{x:.2f}%<br>" + | |
| "<extra></extra>" | |
| ) | |
| ), | |
| row=row, | |
| col=col | |
| ) | |
| fig.update_xaxes( | |
| title_text="Risk Score (%)", | |
| row=row, | |
| col=col | |
| ) | |
| except Exception as e: | |
| print(f"Error processing category {category}: {e}") | |
| fig.update_layout( | |
| title_text=f"High-Risk Segments Across Categories: {metric_name}", | |
| height=800, | |
| template="plotly_white", | |
| hovermode="closest" | |
| ) | |
| return fig | |
| def calculate_portfolio_risk_summary( | |
| df, | |
| metrics=None | |
| ): | |
| """ | |
| Calculate overall portfolio risk summary across metrics and categories. | |
| Args: | |
| df: Master dataframe | |
| metrics: List of metrics to evaluate | |
| Returns: | |
| DataFrame with portfolio risk summary | |
| """ | |
| if metrics is None: | |
| metrics = ["30+@3", "30+@6", "60+@6", "Yr1 NCL"] | |
| summary_data = [] | |
| categories = [ | |
| "fico_band", | |
| "sourcing_channel", | |
| "city_tier", | |
| "occupation_type" | |
| ] | |
| for metric in metrics: | |
| for category in categories: | |
| try: | |
| segment_risk = calculate_segment_risk_score( | |
| df=df, | |
| metric_name=metric, | |
| category=category | |
| ) | |
| avg_risk = segment_risk["Risk_Score"].mean() | |
| max_risk = segment_risk["Risk_Score"].max() | |
| high_risk_count = len(segment_risk[segment_risk["Risk_Score"] > 10]) | |
| summary_data.append({ | |
| "Metric": metric, | |
| "Category": category.replace('_', ' ').title(), | |
| "Avg_Risk": avg_risk, | |
| "Max_Risk": max_risk, | |
| "High_Risk_Segments": high_risk_count | |
| }) | |
| except Exception as e: | |
| print(f"Error calculating summary for {metric} x {category}: {e}") | |
| summary_df = pd.DataFrame(summary_data) | |
| return summary_df |