# visualizations/segment_ranking.py import plotly.graph_objects as go import plotly.express as px import pandas as pd from metrics.metric_registry import METRIC_FUNCTIONS from analytics.performance_analysis import generate_metric_view def calculate_segment_risk_score( df, metric_name, category ): """ Calculate risk scores for each segment in a category. Args: df: Master dataframe metric_name: Metric name for risk calculation category: Segmentation category Returns: DataFrame with segment and risk score """ result = generate_metric_view( df=df, metric_name=metric_name, group_col=category ) rate_col = [ col for col in result.columns if "rate" in col.lower() ][0] # Calculate average rate per segment segment_risk = ( result.groupby(category) .agg({ rate_col: "mean", "total_accounts": "sum", "total_balance": "sum" }) .reset_index() ) segment_risk = segment_risk.rename( columns={ category: "Segment", rate_col: "Risk_Score" } ) return segment_risk def generate_segment_risk_heatmap( df, metrics=None, categories=None ): """ Generate heatmap showing risk scores across segments and metrics. Args: df: Master dataframe metrics: List of metrics to evaluate categories: List of categories to analyze Returns: Plotly figure with heatmap """ if metrics is None: metrics = ["30+@3", "30+@6", "60+@6", "Yr1 NCL"] if categories is None: categories = [ "fico_band", "sourcing_channel", "city_tier", "occupation_type" ] # Prepare data for heatmap heatmap_data = {} all_segments = {} for metric in metrics: metric_scores = {} for category in categories: try: segment_risk = calculate_segment_risk_score( df=df, metric_name=metric, category=category ) for _, row in segment_risk.iterrows(): segment_key = f"{category}_{row['Segment']}" metric_scores[segment_key] = row["Risk_Score"] all_segments[segment_key] = f"{category.replace('_', ' ').title()}: {row['Segment']}" except Exception as e: print(f"Error processing {metric} x {category}: {e}") heatmap_data[metric] = metric_scores # Create DataFrame for heatmap heatmap_df = pd.DataFrame(heatmap_data) heatmap_df = heatmap_df.fillna(0) # Sort by average risk heatmap_df["avg_risk"] = heatmap_df.mean(axis=1) heatmap_df = heatmap_df.sort_values("avg_risk", ascending=False) heatmap_df = heatmap_df.drop("avg_risk", axis=1) # Create heatmap fig = go.Figure( data=go.Heatmap( z=heatmap_df.values, x=heatmap_df.columns, y=[all_segments.get(idx, idx) for idx in heatmap_df.index], colorscale="RdYlGn_r", hovertemplate=( "Segment: %{y}
" + "Metric: %{x}
" + "Risk Score: %{z:.2f}%
" + "" ), text=[[f"{val:.2f}%" for val in row] for row in heatmap_df.values], texttemplate="%{text}", textfont={"size": 10}, colorbar=dict( title="Risk Score
(%)" ) ) ) fig.update_layout( title="Segment Risk Heatmap Across Delinquency Metrics", xaxis_title="Delinquency Metrics", yaxis_title="Segments", height=max(400, len(heatmap_df) * 25), template="plotly_white", hovermode="closest" ) return fig def generate_segment_risk_ranking( df, metric_name, category, top_n=10 ): """ Generate bar chart ranking segments by risk within a category. Args: df: Master dataframe metric_name: Metric name for risk calculation category: Segmentation category top_n: Number of top risk segments to display Returns: Plotly bar chart figure """ segment_risk = calculate_segment_risk_score( df=df, metric_name=metric_name, category=category ) # Sort by risk score descending segment_risk = segment_risk.sort_values( "Risk_Score", ascending=True ).tail(top_n) # Color code by risk level colors = ["#d62728" if score > 10 else "#ff7f0e" if score > 5 else "#2ca02c" for score in segment_risk["Risk_Score"]] fig = go.Figure( data=go.Bar( y=segment_risk["Segment"], x=segment_risk["Risk_Score"], orientation="h", marker=dict( color=colors, line=dict(color="white", width=1) ), text=segment_risk["Risk_Score"], texttemplate="%{text:.2f}%", textposition="outside", hovertemplate=( "Segment: %{y}
" + "Risk Score: %{x:.2f}%
" + "Accounts: %{customdata[0]}
" + "Balance: %{customdata[1]:,.0f}
" + "" ), customdata=segment_risk[["total_accounts", "total_balance"]].values ) ) fig.update_layout( title=f"Top {top_n} High-Risk Segments: {metric_name} by {category.replace('_', ' ').title()}", xaxis_title="Risk Score (%)", yaxis_title=category.replace('_', ' ').title(), height=400 + (top_n * 15), template="plotly_white", hovermode="closest" ) fig.update_xaxes( showgrid=True, gridwidth=1, gridcolor="lightgray" ) return fig def generate_multi_category_risk_comparison( df, metric_name ): """ Compare risk across all categories for a single metric. Args: df: Master dataframe metric_name: Metric name for risk calculation Returns: Plotly figure with subplots (one per category) """ categories = [ "fico_band", "sourcing_channel", "city_tier", "occupation_type" ] # Create subplots from plotly.subplots import make_subplots fig = make_subplots( rows=2, cols=2, subplot_titles=[cat.replace('_', ' ').title() for cat in categories], specs=[ [{"type": "bar"}, {"type": "bar"}], [{"type": "bar"}, {"type": "bar"}] ] ) positions = [ (1, 1), (1, 2), (2, 1), (2, 2) ] max_segments = 0 for category, (row, col) in zip(categories, positions): try: segment_risk = calculate_segment_risk_score( df=df, metric_name=metric_name, category=category ) # Sort and take top 5 segment_risk = segment_risk.sort_values( "Risk_Score", ascending=True ).tail(5) max_segments = max(max_segments, len(segment_risk)) fig.add_trace( go.Bar( y=segment_risk["Segment"], x=segment_risk["Risk_Score"], orientation="h", name=category, showlegend=False, marker=dict( color=segment_risk["Risk_Score"], colorscale="Reds", showscale=False ), text=segment_risk["Risk_Score"], texttemplate="%{text:.2f}%", textposition="outside", hovertemplate=( "%{y}
" + "Risk Score: %{x:.2f}%
" + "" ) ), row=row, col=col ) fig.update_xaxes( title_text="Risk Score (%)", row=row, col=col ) except Exception as e: print(f"Error processing category {category}: {e}") fig.update_layout( title_text=f"High-Risk Segments Across Categories: {metric_name}", height=800, template="plotly_white", hovermode="closest" ) return fig def calculate_portfolio_risk_summary( df, metrics=None ): """ Calculate overall portfolio risk summary across metrics and categories. Args: df: Master dataframe metrics: List of metrics to evaluate Returns: DataFrame with portfolio risk summary """ if metrics is None: metrics = ["30+@3", "30+@6", "60+@6", "Yr1 NCL"] summary_data = [] categories = [ "fico_band", "sourcing_channel", "city_tier", "occupation_type" ] for metric in metrics: for category in categories: try: segment_risk = calculate_segment_risk_score( df=df, metric_name=metric, category=category ) avg_risk = segment_risk["Risk_Score"].mean() max_risk = segment_risk["Risk_Score"].max() high_risk_count = len(segment_risk[segment_risk["Risk_Score"] > 10]) summary_data.append({ "Metric": metric, "Category": category.replace('_', ' ').title(), "Avg_Risk": avg_risk, "Max_Risk": max_risk, "High_Risk_Segments": high_risk_count }) except Exception as e: print(f"Error calculating summary for {metric} x {category}: {e}") summary_df = pd.DataFrame(summary_data) return summary_df