import os import time import logging import sys import gradio as gr from pinecone import Pinecone, ServerlessSpec from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings from llama_index.vector_stores.pinecone import PineconeVectorStore from llama_index.readers.file import PDFReader from llama_index.llms.openai import OpenAI from llama_index.embeddings.openai import OpenAIEmbedding # ----------------------------- # Logging # ----------------------------- logging.basicConfig(stream=sys.stdout, level=logging.INFO) logger = logging.getLogger(__name__) # ----------------------------- # Environment Variables # Add these in Hugging Face Spaces Secrets # ----------------------------- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "dds-hr-chatbot") PINECONE_CLOUD = os.getenv("PINECONE_CLOUD", "aws") PINECONE_REGION = os.getenv("PINECONE_REGION", "us-east-1") REINDEX_ON_STARTUP = os.getenv("REINDEX_ON_STARTUP", "false").lower() == "true" DATA_DIR = "data" if not OPENAI_API_KEY: raise ValueError("OPENAI_API_KEY is missing. Please add it in Hugging Face Spaces secrets.") if not PINECONE_API_KEY: raise ValueError("PINECONE_API_KEY is missing. Please add it in Hugging Face Spaces secrets.") # ----------------------------- # LlamaIndex Settings # ----------------------------- Settings.llm = OpenAI( model="gpt-4o-mini", temperature=0.2, api_key=OPENAI_API_KEY ) Settings.embed_model = OpenAIEmbedding( model="text-embedding-ada-002", api_key=OPENAI_API_KEY ) Settings.chunk_size = 600 Settings.chunk_overlap = 200 # ----------------------------- # System Prompt # ----------------------------- system_prompt = """ You are Ayesha, the Decoding Data Science (DDS) Enterprise HR Chatbot. Your role is to answer questions using only the uploaded DDS HR Handbook. Core rules: - Answer only DDS HR policy questions that are supported by the handbook. - Do not answer questions outside HR policy scope. - Do not answer confidential questions, salary questions, legal questions, or old-policy questions. - If the answer is not available in the handbook, politely say that the information is not available and direct the user to connect@decodingdatascience.com. - Do not reveal internal reasoning. - Keep answers concise, professional, and helpful. - Never invent information. For forbidden, confidential, unsupported, or out-of-scope topics, respond with: “I’m sorry, I can only answer questions about the latest DDS HR policies. For confidential or other queries, please email connect@decodingdatascience.com.” Remember: You are Ayesha, the DDS Enterprise HR Chatbot. You must only answer from the authorized HR handbook content. """ # ----------------------------- # Pinecone Setup # ----------------------------- def get_existing_index_names(pc): """ Handles different Pinecone SDK return styles safely. """ try: return pc.list_indexes().names() except Exception: indexes = pc.list_indexes() names = [] for index_info in indexes: if isinstance(index_info, dict): names.append(index_info.get("name")) else: names.append(getattr(index_info, "name", None)) return [name for name in names if name] def setup_pinecone_index(): pc = Pinecone(api_key=PINECONE_API_KEY) existing_indexes = get_existing_index_names(pc) if PINECONE_INDEX_NAME not in existing_indexes: logger.info(f"Creating Pinecone index: {PINECONE_INDEX_NAME}") pc.create_index( name=PINECONE_INDEX_NAME, dimension=1536, metric="cosine", spec=ServerlessSpec( cloud=PINECONE_CLOUD, region=PINECONE_REGION ) ) while True: description = pc.describe_index(PINECONE_INDEX_NAME) try: is_ready = description.status["ready"] except Exception: is_ready = getattr(description.status, "ready", False) if is_ready: break logger.info("Waiting for Pinecone index to be ready...") time.sleep(2) else: logger.info(f"Using existing Pinecone index: {PINECONE_INDEX_NAME}") return pc.Index(PINECONE_INDEX_NAME) # ----------------------------- # Load or Create LlamaIndex Query Engine # ----------------------------- def build_query_engine(): pinecone_index = setup_pinecone_index() vector_store = PineconeVectorStore( pinecone_index=pinecone_index ) storage_context = StorageContext.from_defaults( vector_store=vector_store ) index_stats = pinecone_index.describe_index_stats() total_vectors = index_stats.get("total_vector_count", 0) if total_vectors == 0 or REINDEX_ON_STARTUP: logger.info("Loading documents and creating vector index...") if not os.path.exists(DATA_DIR): raise ValueError( "The 'data' folder is missing. Please create a data folder and upload your PDF file inside it." ) documents = SimpleDirectoryReader( input_dir=DATA_DIR, required_exts=[".pdf"], file_extractor={".pdf": PDFReader()} ).load_data() if not documents: raise ValueError("No PDF documents were loaded from the 'data' folder.") index = VectorStoreIndex.from_documents( documents, storage_context=storage_context ) logger.info("Documents indexed successfully.") else: logger.info("Existing Pinecone vectors found. Loading index from vector store.") index = VectorStoreIndex.from_vector_store( vector_store=vector_store ) query_engine = index.as_query_engine( similarity_top_k=5, system_prompt=system_prompt ) return query_engine query_engine = build_query_engine() # ----------------------------- # Query Function # ----------------------------- def query_doc(prompt): try: response = query_engine.query(prompt) return str(response) except Exception as e: logger.error(f"Error while answering query: {e}") return "Sorry, something went wrong while processing your question. Please try again." # ----------------------------- # Example Questions # ----------------------------- example_questions = [ "What is the leave policy?", "What is the work from home policy?", "What is the probation policy?", "What are the employee code of conduct rules?", "Who should I contact for confidential HR questions?" ] # ----------------------------- # Chat Functions # ----------------------------- initial_chat = [ { "role": "assistant", "content": "Hello, I am Ayesha, the DDS Enterprise HR Chatbot. Ask me a question about DDS HR policies." } ] def respond(message, chat_history): if chat_history is None: chat_history = initial_chat.copy() if not message or not message.strip(): chat_history.append( { "role": "assistant", "content": "Please enter a question about the DDS HR handbook." } ) return "", chat_history answer = query_doc(message) chat_history.append( { "role": "user", "content": message } ) chat_history.append( { "role": "assistant", "content": answer } ) return "", chat_history def clear_chat(): return initial_chat.copy() def set_example_question(question): return question # ----------------------------- # Professional Gradio UI # ----------------------------- DDS_LOGO_URL = "https://raw.githubusercontent.com/Decoding-Data-Science/airesidency/main/dds-logo-removebg-preview.png" custom_css = """ body { background: linear-gradient(135deg, #f8fafc 0%, #eef2ff 45%, #f8fafc 100%); } .gradio-container { font-family: Inter, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; } .main-container { max-width: 1250px; margin: auto; } .header-card { background: rgba(255, 255, 255, 0.95); border-radius: 24px; padding: 26px; box-shadow: 0 16px 40px rgba(15, 23, 42, 0.08); border: 1px solid #e5e7eb; margin-bottom: 20px; } .sidebar-card { background: rgba(255, 255, 255, 0.96); border-radius: 24px; padding: 24px; box-shadow: 0 16px 40px rgba(15, 23, 42, 0.08); border: 1px solid #e5e7eb; height: 100%; } .chat-card { background: rgba(255, 255, 255, 0.96); border-radius: 24px; padding: 22px; box-shadow: 0 16px 40px rgba(15, 23, 42, 0.08); border: 1px solid #e5e7eb; } .logo-img { max-width: 175px; margin-bottom: 8px; } .title-text { font-size: 32px; font-weight: 850; color: #111827; margin-bottom: 8px; letter-spacing: -0.03em; } .subtitle-text { font-size: 16px; color: #4b5563; line-height: 1.65; max-width: 850px; } .badge { display: inline-block; background: #eef2ff; color: #3730a3; padding: 7px 13px; border-radius: 999px; font-size: 13px; font-weight: 650; margin-right: 7px; margin-bottom: 8px; } .status-box { background: #f8fafc; border: 1px solid #e5e7eb; padding: 14px; border-radius: 16px; font-size: 14px; color: #374151; line-height: 1.6; } .small-note { font-size: 13px; color: #6b7280; line-height: 1.55; } .footer-note { font-size: 13px; color: #6b7280; text-align: center; margin-top: 18px; } #chatbot { min-height: 540px; border-radius: 18px; border: 1px solid #e5e7eb; } #question_box textarea { border-radius: 16px !important; } .example-button { margin-bottom: 8px !important; border-radius: 14px !important; white-space: normal !important; text-align: left !important; } .primary-action { border-radius: 14px !important; } .clear-action { border-radius: 14px !important; } """ with gr.Blocks(title="DDS Enterprise HR Chatbot") as demo: with gr.Column(elem_classes=["main-container"]): # ----------------------------- # Header # ----------------------------- with gr.Row(elem_classes=["header-card"]): with gr.Column(scale=1, min_width=190): gr.HTML( f""" DDS Logo """ ) with gr.Column(scale=5): gr.HTML( """
DDS Enterprise HR Chatbot
A professional HR policy assistant built for Decoding Data Science. Ask questions from the uploaded DDS HR Handbook and get clear, concise answers based on the available document content.

HR Handbook Q&A LlamaIndex Pinecone OpenAI Gradio """ ) # ----------------------------- # Two Column Layout # ----------------------------- with gr.Row(): # Left Sidebar with gr.Column(scale=1, min_width=300, elem_classes=["sidebar-card"]): gr.Markdown( """ ### What this assistant can help with This chatbot answers questions only from the uploaded DDS HR Handbook. **You can ask about:** - Leave policies - Work from home rules - Probation guidelines - Code of conduct - Employee handbook policies - HR contact process """ ) gr.HTML( """
Scope: DDS HR policies only
Data source: Uploaded HR handbook
Confidential questions: Redirected to HR email
""" ) gr.Markdown("### Quick questions") example_buttons = [] for question in example_questions: btn = gr.Button( question, variant="secondary", size="sm", elem_classes=["example-button"] ) example_buttons.append(btn) gr.HTML( """
Important:
This chatbot does not answer salary, confidential, legal, or non-HR questions. For confidential queries, contact connect@decodingdatascience.com.
""" ) # Right Chat Area with gr.Column(scale=3, elem_classes=["chat-card"]): chatbot = gr.Chatbot( label="DDS HR Assistant", elem_id="chatbot", value=initial_chat.copy(), height=540 ) user_input = gr.Textbox( label="Ask your HR policy question", placeholder="Example: What is the leave policy?", lines=2, elem_id="question_box" ) with gr.Row(): submit_btn = gr.Button( "Ask Question", variant="primary", elem_classes=["primary-action"] ) clear_btn = gr.Button( "Clear Chat", variant="secondary", elem_classes=["clear-action"] ) gr.Markdown( """ **Tip:** Ask specific questions for better answers. Example: “What does the handbook say about probation?” instead of “Tell me everything.” """ ) # ----------------------------- # Button Actions # ----------------------------- submit_btn.click( fn=respond, inputs=[user_input, chatbot], outputs=[user_input, chatbot] ) user_input.submit( fn=respond, inputs=[user_input, chatbot], outputs=[user_input, chatbot] ) clear_btn.click( fn=clear_chat, inputs=None, outputs=chatbot ) for btn, question in zip(example_buttons, example_questions): btn.click( fn=set_example_question, inputs=gr.State(question), outputs=user_input ) # ----------------------------- # Footer # ----------------------------- gr.HTML( """ """ ) if __name__ == "__main__": demo.launch( theme=gr.themes.Soft( primary_hue="indigo", neutral_hue="slate" ), css=custom_css )