decodingdatascience commited on
Commit
bcf910e
·
verified ·
1 Parent(s): 262b7c1

Create app2.py

Browse files
Files changed (1) hide show
  1. app2.py +588 -0
app2.py ADDED
@@ -0,0 +1,588 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import logging
4
+ import sys
5
+
6
+ import gradio as gr
7
+ from pinecone import Pinecone, ServerlessSpec
8
+
9
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings
10
+ from llama_index.vector_stores.pinecone import PineconeVectorStore
11
+ from llama_index.readers.file import PDFReader
12
+ from llama_index.llms.openai import OpenAI
13
+ from llama_index.embeddings.openai import OpenAIEmbedding
14
+
15
+
16
+ # -----------------------------
17
+ # Logging
18
+ # -----------------------------
19
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ # -----------------------------
24
+ # Environment Variables
25
+ # Add these in Hugging Face Spaces Secrets
26
+ # -----------------------------
27
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
28
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
29
+
30
+ PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "dds-hr-chatbot")
31
+ PINECONE_CLOUD = os.getenv("PINECONE_CLOUD", "aws")
32
+ PINECONE_REGION = os.getenv("PINECONE_REGION", "us-east-1")
33
+
34
+ REINDEX_ON_STARTUP = os.getenv("REINDEX_ON_STARTUP", "false").lower() == "true"
35
+
36
+ DATA_DIR = "data"
37
+
38
+ if not OPENAI_API_KEY:
39
+ raise ValueError("OPENAI_API_KEY is missing. Please add it in Hugging Face Spaces secrets.")
40
+
41
+ if not PINECONE_API_KEY:
42
+ raise ValueError("PINECONE_API_KEY is missing. Please add it in Hugging Face Spaces secrets.")
43
+
44
+
45
+ # -----------------------------
46
+ # LlamaIndex Settings
47
+ # -----------------------------
48
+ Settings.llm = OpenAI(
49
+ model="gpt-4o-mini",
50
+ temperature=0.2,
51
+ api_key=OPENAI_API_KEY
52
+ )
53
+
54
+ Settings.embed_model = OpenAIEmbedding(
55
+ model="text-embedding-ada-002",
56
+ api_key=OPENAI_API_KEY
57
+ )
58
+
59
+ Settings.chunk_size = 600
60
+ Settings.chunk_overlap = 200
61
+
62
+
63
+ # -----------------------------
64
+ # System Prompt
65
+ # -----------------------------
66
+ system_prompt = """
67
+ You are Ayesha, the Decoding Data Science (DDS) Enterprise HR Chatbot.
68
+
69
+ Your role is to answer questions using only the uploaded DDS HR Handbook.
70
+
71
+ Core rules:
72
+ - Answer only DDS HR policy questions that are supported by the handbook.
73
+ - Do not answer questions outside HR policy scope.
74
+ - Do not answer confidential questions, salary questions, legal questions, or old-policy questions.
75
+ - If the answer is not available in the handbook, politely say that the information is not available and direct the user to connect@decodingdatascience.com.
76
+ - Do not reveal internal reasoning.
77
+ - Keep answers concise, professional, and helpful.
78
+ - Never invent information.
79
+
80
+ For forbidden, confidential, unsupported, or out-of-scope topics, respond with:
81
+ “I’m sorry, I can only answer questions about the latest DDS HR policies. For confidential or other queries, please email connect@decodingdatascience.com.”
82
+
83
+ Remember: You are Ayesha, the DDS Enterprise HR Chatbot. You must only answer from the authorized HR handbook content.
84
+ """
85
+
86
+
87
+ # -----------------------------
88
+ # Pinecone Setup
89
+ # -----------------------------
90
+ def get_existing_index_names(pc):
91
+ """
92
+ Handles different Pinecone SDK return styles safely.
93
+ """
94
+ try:
95
+ return pc.list_indexes().names()
96
+ except Exception:
97
+ indexes = pc.list_indexes()
98
+ names = []
99
+
100
+ for index_info in indexes:
101
+ if isinstance(index_info, dict):
102
+ names.append(index_info.get("name"))
103
+ else:
104
+ names.append(getattr(index_info, "name", None))
105
+
106
+ return [name for name in names if name]
107
+
108
+
109
+ def setup_pinecone_index():
110
+ pc = Pinecone(api_key=PINECONE_API_KEY)
111
+
112
+ existing_indexes = get_existing_index_names(pc)
113
+
114
+ if PINECONE_INDEX_NAME not in existing_indexes:
115
+ logger.info(f"Creating Pinecone index: {PINECONE_INDEX_NAME}")
116
+
117
+ pc.create_index(
118
+ name=PINECONE_INDEX_NAME,
119
+ dimension=1536,
120
+ metric="cosine",
121
+ spec=ServerlessSpec(
122
+ cloud=PINECONE_CLOUD,
123
+ region=PINECONE_REGION
124
+ )
125
+ )
126
+
127
+ while True:
128
+ description = pc.describe_index(PINECONE_INDEX_NAME)
129
+
130
+ try:
131
+ is_ready = description.status["ready"]
132
+ except Exception:
133
+ is_ready = getattr(description.status, "ready", False)
134
+
135
+ if is_ready:
136
+ break
137
+
138
+ logger.info("Waiting for Pinecone index to be ready...")
139
+ time.sleep(2)
140
+
141
+ else:
142
+ logger.info(f"Using existing Pinecone index: {PINECONE_INDEX_NAME}")
143
+
144
+ return pc.Index(PINECONE_INDEX_NAME)
145
+
146
+
147
+ # -----------------------------
148
+ # Load or Create LlamaIndex Query Engine
149
+ # -----------------------------
150
+ def build_query_engine():
151
+ pinecone_index = setup_pinecone_index()
152
+
153
+ vector_store = PineconeVectorStore(
154
+ pinecone_index=pinecone_index
155
+ )
156
+
157
+ storage_context = StorageContext.from_defaults(
158
+ vector_store=vector_store
159
+ )
160
+
161
+ index_stats = pinecone_index.describe_index_stats()
162
+ total_vectors = index_stats.get("total_vector_count", 0)
163
+
164
+ if total_vectors == 0 or REINDEX_ON_STARTUP:
165
+ logger.info("Loading documents and creating vector index...")
166
+
167
+ if not os.path.exists(DATA_DIR):
168
+ raise ValueError(
169
+ "The 'data' folder is missing. Please create a data folder and upload your PDF file inside it."
170
+ )
171
+
172
+ documents = SimpleDirectoryReader(
173
+ input_dir=DATA_DIR,
174
+ required_exts=[".pdf"],
175
+ file_extractor={".pdf": PDFReader()}
176
+ ).load_data()
177
+
178
+ if not documents:
179
+ raise ValueError("No PDF documents were loaded from the 'data' folder.")
180
+
181
+ index = VectorStoreIndex.from_documents(
182
+ documents,
183
+ storage_context=storage_context
184
+ )
185
+
186
+ logger.info("Documents indexed successfully.")
187
+
188
+ else:
189
+ logger.info("Existing Pinecone vectors found. Loading index from vector store.")
190
+
191
+ index = VectorStoreIndex.from_vector_store(
192
+ vector_store=vector_store
193
+ )
194
+
195
+ query_engine = index.as_query_engine(
196
+ similarity_top_k=5,
197
+ system_prompt=system_prompt
198
+ )
199
+
200
+ return query_engine
201
+
202
+
203
+ query_engine = build_query_engine()
204
+
205
+
206
+ # -----------------------------
207
+ # Query Function
208
+ # -----------------------------
209
+ def query_doc(prompt):
210
+ try:
211
+ response = query_engine.query(prompt)
212
+ return str(response)
213
+
214
+ except Exception as e:
215
+ logger.error(f"Error while answering query: {e}")
216
+ return "Sorry, something went wrong while processing your question. Please try again."
217
+
218
+
219
+ # -----------------------------
220
+ # Example Questions
221
+ # -----------------------------
222
+ example_questions = [
223
+ "What is the leave policy?",
224
+ "What is the work from home policy?",
225
+ "What is the probation policy?",
226
+ "What are the employee code of conduct rules?",
227
+ "Who should I contact for confidential HR questions?"
228
+ ]
229
+
230
+
231
+ # -----------------------------
232
+ # Chat Functions
233
+ # -----------------------------
234
+ initial_chat = [
235
+ {
236
+ "role": "assistant",
237
+ "content": "Hello, I am Ayesha, the DDS Enterprise HR Chatbot. Ask me a question about DDS HR policies."
238
+ }
239
+ ]
240
+
241
+
242
+ def respond(message, chat_history):
243
+ if chat_history is None:
244
+ chat_history = initial_chat.copy()
245
+
246
+ if not message or not message.strip():
247
+ chat_history.append(
248
+ {
249
+ "role": "assistant",
250
+ "content": "Please enter a question about the DDS HR handbook."
251
+ }
252
+ )
253
+ return "", chat_history
254
+
255
+ answer = query_doc(message)
256
+
257
+ chat_history.append(
258
+ {
259
+ "role": "user",
260
+ "content": message
261
+ }
262
+ )
263
+
264
+ chat_history.append(
265
+ {
266
+ "role": "assistant",
267
+ "content": answer
268
+ }
269
+ )
270
+
271
+ return "", chat_history
272
+
273
+
274
+ def clear_chat():
275
+ return initial_chat.copy()
276
+
277
+
278
+ def set_example_question(question):
279
+ return question
280
+
281
+
282
+ # -----------------------------
283
+ # Professional Gradio UI
284
+ # -----------------------------
285
+ DDS_LOGO_URL = "https://raw.githubusercontent.com/Decoding-Data-Science/airesidency/main/dds-logo-removebg-preview.png"
286
+
287
+ custom_css = """
288
+ body {
289
+ background: linear-gradient(135deg, #f8fafc 0%, #eef2ff 45%, #f8fafc 100%);
290
+ }
291
+
292
+ .gradio-container {
293
+ font-family: Inter, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
294
+ }
295
+
296
+ .main-container {
297
+ max-width: 1250px;
298
+ margin: auto;
299
+ }
300
+
301
+ .header-card {
302
+ background: rgba(255, 255, 255, 0.95);
303
+ border-radius: 24px;
304
+ padding: 26px;
305
+ box-shadow: 0 16px 40px rgba(15, 23, 42, 0.08);
306
+ border: 1px solid #e5e7eb;
307
+ margin-bottom: 20px;
308
+ }
309
+
310
+ .sidebar-card {
311
+ background: rgba(255, 255, 255, 0.96);
312
+ border-radius: 24px;
313
+ padding: 24px;
314
+ box-shadow: 0 16px 40px rgba(15, 23, 42, 0.08);
315
+ border: 1px solid #e5e7eb;
316
+ height: 100%;
317
+ }
318
+
319
+ .chat-card {
320
+ background: rgba(255, 255, 255, 0.96);
321
+ border-radius: 24px;
322
+ padding: 22px;
323
+ box-shadow: 0 16px 40px rgba(15, 23, 42, 0.08);
324
+ border: 1px solid #e5e7eb;
325
+ }
326
+
327
+ .logo-img {
328
+ max-width: 175px;
329
+ margin-bottom: 8px;
330
+ }
331
+
332
+ .title-text {
333
+ font-size: 32px;
334
+ font-weight: 850;
335
+ color: #111827;
336
+ margin-bottom: 8px;
337
+ letter-spacing: -0.03em;
338
+ }
339
+
340
+ .subtitle-text {
341
+ font-size: 16px;
342
+ color: #4b5563;
343
+ line-height: 1.65;
344
+ max-width: 850px;
345
+ }
346
+
347
+ .badge {
348
+ display: inline-block;
349
+ background: #eef2ff;
350
+ color: #3730a3;
351
+ padding: 7px 13px;
352
+ border-radius: 999px;
353
+ font-size: 13px;
354
+ font-weight: 650;
355
+ margin-right: 7px;
356
+ margin-bottom: 8px;
357
+ }
358
+
359
+ .status-box {
360
+ background: #f8fafc;
361
+ border: 1px solid #e5e7eb;
362
+ padding: 14px;
363
+ border-radius: 16px;
364
+ font-size: 14px;
365
+ color: #374151;
366
+ line-height: 1.6;
367
+ }
368
+
369
+ .small-note {
370
+ font-size: 13px;
371
+ color: #6b7280;
372
+ line-height: 1.55;
373
+ }
374
+
375
+ .footer-note {
376
+ font-size: 13px;
377
+ color: #6b7280;
378
+ text-align: center;
379
+ margin-top: 18px;
380
+ }
381
+
382
+ #chatbot {
383
+ min-height: 540px;
384
+ border-radius: 18px;
385
+ border: 1px solid #e5e7eb;
386
+ }
387
+
388
+ #question_box textarea {
389
+ border-radius: 16px !important;
390
+ }
391
+
392
+ .example-button {
393
+ margin-bottom: 8px !important;
394
+ border-radius: 14px !important;
395
+ white-space: normal !important;
396
+ text-align: left !important;
397
+ }
398
+
399
+ .primary-action {
400
+ border-radius: 14px !important;
401
+ }
402
+
403
+ .clear-action {
404
+ border-radius: 14px !important;
405
+ }
406
+ """
407
+
408
+
409
+ with gr.Blocks(
410
+ title="DDS Enterprise HR Chatbot",
411
+ css=custom_css,
412
+ theme=gr.themes.Soft(
413
+ primary_hue="indigo",
414
+ neutral_hue="slate"
415
+ )
416
+ ) as demo:
417
+
418
+ with gr.Column(elem_classes=["main-container"]):
419
+
420
+ # -----------------------------
421
+ # Header
422
+ # -----------------------------
423
+ with gr.Row(elem_classes=["header-card"]):
424
+ with gr.Column(scale=1, min_width=190):
425
+ gr.HTML(
426
+ f"""
427
+ <img src="{DDS_LOGO_URL}" class="logo-img" alt="DDS Logo">
428
+ """
429
+ )
430
+
431
+ with gr.Column(scale=5):
432
+ gr.HTML(
433
+ """
434
+ <div class="title-text">DDS Enterprise HR Chatbot</div>
435
+ <div class="subtitle-text">
436
+ A professional HR policy assistant built for Decoding Data Science.
437
+ Ask questions from the uploaded DDS HR Handbook and get clear, concise answers
438
+ based on the available document content.
439
+ </div>
440
+ <br>
441
+ <span class="badge">HR Handbook Q&A</span>
442
+ <span class="badge">LlamaIndex</span>
443
+ <span class="badge">Pinecone</span>
444
+ <span class="badge">OpenAI</span>
445
+ <span class="badge">Gradio</span>
446
+ """
447
+ )
448
+
449
+ # -----------------------------
450
+ # Two Column Layout
451
+ # -----------------------------
452
+ with gr.Row():
453
+
454
+ # Left Sidebar
455
+ with gr.Column(scale=1, min_width=300, elem_classes=["sidebar-card"]):
456
+
457
+ gr.Markdown(
458
+ """
459
+ ### What this assistant can help with
460
+
461
+ This chatbot answers questions only from the uploaded DDS HR Handbook.
462
+
463
+ **You can ask about:**
464
+
465
+ - Leave policies
466
+ - Work from home rules
467
+ - Probation guidelines
468
+ - Code of conduct
469
+ - Employee handbook policies
470
+ - HR contact process
471
+ """
472
+ )
473
+
474
+ gr.HTML(
475
+ """
476
+ <div class="status-box">
477
+ <strong>Scope:</strong> DDS HR policies only<br>
478
+ <strong>Data source:</strong> Uploaded HR handbook<br>
479
+ <strong>Confidential questions:</strong> Redirected to HR email
480
+ </div>
481
+ """
482
+ )
483
+
484
+ gr.Markdown("### Quick questions")
485
+
486
+ example_buttons = []
487
+
488
+ for question in example_questions:
489
+ btn = gr.Button(
490
+ question,
491
+ variant="secondary",
492
+ size="sm",
493
+ elem_classes=["example-button"]
494
+ )
495
+ example_buttons.append(btn)
496
+
497
+ gr.HTML(
498
+ """
499
+ <hr>
500
+ <div class="small-note">
501
+ <strong>Important:</strong><br>
502
+ This chatbot does not answer salary, confidential, legal, or non-HR questions.
503
+ For confidential queries, contact
504
+ <strong>connect@decodingdatascience.com</strong>.
505
+ </div>
506
+ """
507
+ )
508
+
509
+ # Right Chat Area
510
+ with gr.Column(scale=3, elem_classes=["chat-card"]):
511
+
512
+ chatbot = gr.Chatbot(
513
+ label="DDS HR Assistant",
514
+ type="messages",
515
+ elem_id="chatbot",
516
+ value=initial_chat.copy(),
517
+ height=540
518
+ )
519
+
520
+ user_input = gr.Textbox(
521
+ label="Ask your HR policy question",
522
+ placeholder="Example: What is the leave policy?",
523
+ lines=2,
524
+ elem_id="question_box"
525
+ )
526
+
527
+ with gr.Row():
528
+ submit_btn = gr.Button(
529
+ "Ask Question",
530
+ variant="primary",
531
+ elem_classes=["primary-action"]
532
+ )
533
+
534
+ clear_btn = gr.Button(
535
+ "Clear Chat",
536
+ variant="secondary",
537
+ elem_classes=["clear-action"]
538
+ )
539
+
540
+ gr.Markdown(
541
+ """
542
+ **Tip:** Ask specific questions for better answers.
543
+ Example: “What does the handbook say about probation?” instead of “Tell me everything.”
544
+ """
545
+ )
546
+
547
+ # -----------------------------
548
+ # Button Actions
549
+ # -----------------------------
550
+ submit_btn.click(
551
+ fn=respond,
552
+ inputs=[user_input, chatbot],
553
+ outputs=[user_input, chatbot]
554
+ )
555
+
556
+ user_input.submit(
557
+ fn=respond,
558
+ inputs=[user_input, chatbot],
559
+ outputs=[user_input, chatbot]
560
+ )
561
+
562
+ clear_btn.click(
563
+ fn=clear_chat,
564
+ inputs=None,
565
+ outputs=chatbot
566
+ )
567
+
568
+ for btn, question in zip(example_buttons, example_questions):
569
+ btn.click(
570
+ fn=set_example_question,
571
+ inputs=gr.State(question),
572
+ outputs=user_input
573
+ )
574
+
575
+ # -----------------------------
576
+ # Footer
577
+ # -----------------------------
578
+ gr.HTML(
579
+ """
580
+ <div class="footer-note">
581
+ Built by Decoding Data Science | Enterprise HR Chatbot Demo
582
+ </div>
583
+ """
584
+ )
585
+
586
+
587
+ if __name__ == "__main__":
588
+ demo.launch()