diff --git a/api/Assistant/assistant_model.py b/api/Assistant/assistant_model.py index 45512b8..2d6d306 100644 --- a/api/Assistant/assistant_model.py +++ b/api/Assistant/assistant_model.py @@ -28,7 +28,6 @@ class Context(Base): id = Column(UUID(as_uuid=True), primary_key=True, default=uuid4) content=Column(Text,nullable=True) - file_url=Column(Text,nullable=True) pecha_title = Column(String(255), nullable=True) pecha_text_id = Column(String(255), nullable=True) assistant_id = Column(UUID(as_uuid=True),ForeignKey("assistant.id", ondelete="CASCADE"),nullable=False) diff --git a/api/Assistant/assistant_response_model.py b/api/Assistant/assistant_response_model.py index 80f0724..191dbd2 100644 --- a/api/Assistant/assistant_response_model.py +++ b/api/Assistant/assistant_response_model.py @@ -4,14 +4,12 @@ class ContextRequest(BaseModel): content: Optional[str] = None - file_url: Optional[str] = None pecha_title: Optional[str] = None pecha_text_id: Optional[str] = None class ContextResponse(BaseModel): id: UUID content: Optional[str] = None - file_url: Optional[str] = None pecha_title: Optional[str] = None pecha_text_id: Optional[str] = None diff --git a/api/Assistant/assistant_service.py b/api/Assistant/assistant_service.py index 4997095..914bd99 100644 --- a/api/Assistant/assistant_service.py +++ b/api/Assistant/assistant_service.py @@ -1,4 +1,3 @@ -import logging from api.Users.user_service import validate_and_extract_user_email from api.db.pg_database import SessionLocal from api.Assistant.assistant_repository import get_all_assistants, get_assistant_by_id_repository, delete_assistant_repository, update_assistant_repository @@ -8,16 +7,15 @@ from api.Assistant.assistant_model import Assistant, Context from uuid import UUID from datetime import datetime, timezone -from fastapi import HTTPException, status +from fastapi import HTTPException, status, UploadFile from api.error_constant import ErrorConstants -from api.upload.S3_utils import generate_presigned_access_url, delete_file -from api.config import get from api.cache.cache_enums import CacheType from api.Assistant.assistant_cache_service import ( get_assistant_detail_cache, set_assistant_detail_cache, delete_assistant_detail_cache, ) +from api.utils import Utils def _build_context_responses(contexts) -> List[ContextResponse]: @@ -25,12 +23,6 @@ def _build_context_responses(contexts) -> List[ContextResponse]: ContextResponse( id=context.id, content=context.content, - file_url=( - generate_presigned_access_url( - bucket_name=get("AWS_BUCKET_NAME"), - s3_key=context.file_url - ) if context.file_url else None - ), pecha_title=context.pecha_title, pecha_text_id=context.pecha_text_id ) for context in contexts @@ -79,21 +71,35 @@ def get_assistants(skip: 0, limit: 20) -> AssistantResponse: return assistant_response -def create_assistant_service(token: str, assistant_request: AssistantRequest): - current_user_email=validate_and_extract_user_email(token=token) +async def create_assistant_service(token: str, assistant_request: AssistantRequest, files: List[UploadFile] = None): + current_user_email = validate_and_extract_user_email(token=token) + contexts_list = [] + for ctx in assistant_request.contexts: + contexts_list.append( + Context(content=ctx.content, pecha_title=ctx.pecha_title, pecha_text_id=ctx.pecha_text_id) + ) + + if files: + for file in files: + if file.filename: + file_bytes = await file.read() + try: + Utils.validate_file(file.filename, len(file_bytes)) + extracted_content = Utils.extract_content_from_file(file_bytes, file.filename) + contexts_list.append(Context(content=extracted_content)) + except ValueError as e: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) + with SessionLocal() as db_session: assistant = Assistant( - name=assistant_request.name, - source_type=assistant_request.source_type, - description=assistant_request.description, - system_prompt=assistant_request.system_prompt, - system_assistance=assistant_request.system_assistance, - created_by=current_user_email, - contexts=[ - Context(content=ctx.content, file_url=ctx.file_url, pecha_title=ctx.pecha_title, pecha_text_id=ctx.pecha_text_id) - for ctx in assistant_request.contexts - ] - ) + name=assistant_request.name, + source_type=assistant_request.source_type, + description=assistant_request.description, + system_prompt=assistant_request.system_prompt, + system_assistance=assistant_request.system_assistance, + created_by=current_user_email, + contexts=contexts_list + ) create_assistant_repository(db=db_session, assistant=assistant) async def get_assistant_by_id_service(assistant_id: UUID) -> AssistantInfoResponse: @@ -118,7 +124,7 @@ async def get_assistant_by_id_service(assistant_id: UUID) -> AssistantInfoRespon return assistant_info async def delete_assistant_service(assistant_id: UUID, token: str): - current_user_email=validate_and_extract_user_email(token=token) + current_user_email = validate_and_extract_user_email(token=token) with SessionLocal() as db_session: assistant = get_assistant_by_id_repository(db=db_session, assistant_id=assistant_id) if current_user_email != assistant.created_by: @@ -126,13 +132,6 @@ async def delete_assistant_service(assistant_id: UUID, token: str): if assistant.system_assistance: raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=ErrorConstants.FORBIDDEN_ERROR_MESSAGE) - for context in assistant.contexts: - if context.file_url: - try: - delete_file(context.file_url) - except Exception as e: - logging.error(f"Failed to delete S3 file {context.file_url}: {str(e)}") - delete_assistant_repository(db=db_session, assistant_id=assistant_id) await delete_assistant_detail_cache( @@ -161,7 +160,7 @@ async def update_assistant_service(assistant_id: UUID, update_request: UpdateAss for context in assistant.contexts: db_session.delete(context) assistant.contexts = [ - Context(content=ctx.content, file_url=ctx.file_url, pecha_title=ctx.pecha_title, pecha_text_id=ctx.pecha_text_id) + Context(content=ctx.content, pecha_title=ctx.pecha_title, pecha_text_id=ctx.pecha_text_id) for ctx in update_request.contexts ] diff --git a/api/Assistant/assistant_view.py b/api/Assistant/assistant_view.py index e435f98..69c1b50 100644 --- a/api/Assistant/assistant_view.py +++ b/api/Assistant/assistant_view.py @@ -1,12 +1,13 @@ -from fastapi import APIRouter +from fastapi import APIRouter, UploadFile, File, Form from starlette import status from api.Assistant.assistant_response_model import AssistantResponse, AssistantRequest, AssistantInfoResponse, UpdateAssistantRequest from fastapi import Query, Depends from api.Assistant.assistant_service import create_assistant_service, get_assistant_by_id_service, get_assistants, delete_assistant_service, update_assistant_service from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials -from typing import Annotated +from typing import Annotated, Optional, List from uuid import UUID from api.constant import Constant +import json oauth2_scheme = HTTPBearer() assistant_router=APIRouter( @@ -21,8 +22,30 @@ async def get_all_assistants( return get_assistants(skip=skip, limit=limit) @assistant_router.post("", status_code=status.HTTP_201_CREATED) -async def create_assistant(assistant_request: AssistantRequest, authentication_credential: Annotated[HTTPAuthorizationCredentials, Depends(oauth2_scheme)]): - create_assistant_service(token=authentication_credential.credentials, assistant_request=assistant_request) +async def create_assistant( + authentication_credential: Annotated[HTTPAuthorizationCredentials, Depends(oauth2_scheme)], + name: str = Form(...), + system_prompt: str = Form(...), + source_type: Optional[str] = Form(None), + description: Optional[str] = Form(None), + system_assistance: bool = Form(False), + contexts: Optional[str] = Form(None), + files: List[UploadFile] = File(default=[]) +): + contexts_data = json.loads(contexts) if contexts else [] + assistant_request = AssistantRequest( + name=name, + source_type=source_type, + description=description, + system_prompt=system_prompt, + contexts=contexts_data, + system_assistance=system_assistance + ) + await create_assistant_service( + token=authentication_credential.credentials, + assistant_request=assistant_request, + files=files + ) return {"message": Constant.CREATED_ASSISTANT_MESSAGE} @assistant_router.get("/{assistant_id}", status_code=status.HTTP_200_OK) diff --git a/api/langgraph/context_processor.py b/api/langgraph/context_processor.py deleted file mode 100644 index d46bb03..0000000 --- a/api/langgraph/context_processor.py +++ /dev/null @@ -1,95 +0,0 @@ -import logging -from typing import List, Optional -from io import BytesIO -from pypdf import PdfReader -from docx import Document -from api.Assistant.assistant_response_model import ContextRequest -from api.upload.S3_utils import download_file_from_s3 -from api.config import get - - -def extract_text_from_pdf(pdf_bytes: BytesIO) -> str: - try: - reader = PdfReader(pdf_bytes) - pages = [] - - for page in reader.pages: - page_text = page.extract_text() - if page_text: - pages.append(page_text.strip()) - - return "\n\n".join(pages).strip() - except Exception as e: - logging.error(f"Failed to extract text from PDF: {e}") - raise - - - -def extract_text_from_txt(file_bytes: BytesIO) -> str: - try: - return file_bytes.read().decode('utf-8') - except Exception as e: - logging.error(f"Failed to read text file: {e}") - raise - - -def extract_text_from_docx(file_bytes: BytesIO) -> str: - try: - doc = Document(file_bytes) - paragraphs = [] - - for para in doc.paragraphs: - para_text = para.text.strip() - if para_text: - paragraphs.append(para_text) - - return "\n\n".join(paragraphs).strip() - except Exception as e: - logging.error(f"Failed to extract text from DOCX: {e}") - raise - - -def process_file_context(file_url: str) -> str: - bucket_name = get("AWS_BUCKET_NAME") - file_bytes = download_file_from_s3(bucket_name, file_url) - - if file_url.lower().endswith('.pdf'): - text = extract_text_from_pdf(file_bytes) - elif file_url.lower().endswith(('.txt', '.text')): - text = extract_text_from_txt(file_bytes) - elif file_url.lower().endswith(('.docx')): - text = extract_text_from_docx(file_bytes) - else: - raise ValueError(f"Unsupported file type: {file_url}") - return text - - -def process_contexts(contexts: List[ContextRequest]) -> Optional[List[str]]: - if not contexts: - return None - - processed_contexts = [] - - for idx, ctx in enumerate(contexts, 1): - try: - if ctx.content: - processed_contexts.append(ctx.content) - - elif ctx.file_url: - file_text = process_file_context(ctx.file_url) - processed_contexts.append(file_text) - - elif ctx.pecha_title and ctx.pecha_text_id: - pecha_context = f"[Pecha: {ctx.pecha_title}, ID: {ctx.pecha_text_id}]" - processed_contexts.append(pecha_context) - - else: - logging.warning(f"Empty context #{idx}, skipping") - - except Exception as e: - error_msg = f"Failed to process context #{idx}: {str(e)}" - logging.error(error_msg) - - if not processed_contexts: - return None - return processed_contexts diff --git a/api/langgraph/nodes/node_initialize.py b/api/langgraph/nodes/node_initialize.py index c4faa72..f90635e 100644 --- a/api/langgraph/nodes/node_initialize.py +++ b/api/langgraph/nodes/node_initialize.py @@ -3,7 +3,7 @@ from datetime import datetime from api.langgraph.workflow_type import WorkflowState,Batch -from api.langgraph.context_processor import process_contexts +from api.utils import Utils from api import config DEFAULT_MAX_BATCH_SIZE = 2 @@ -17,7 +17,7 @@ def initialize_workflow(state: WorkflowState) -> WorkflowState: batch_size = init_size - processed_contexts = process_contexts(request.contexts) if request.contexts else None + processed_contexts = Utils.process_contexts(request.contexts) if request.contexts else None for i in range(0, len(texts), batch_size): batch_texts = texts[i : i + batch_size] diff --git a/api/ui.py b/api/ui.py index 0761fd3..1a8131d 100644 --- a/api/ui.py +++ b/api/ui.py @@ -608,7 +608,7 @@ async def serve_ui():
@@ -819,8 +819,7 @@ async def serve_ui(): if (activeAssistant.contexts && activeAssistant.contexts.length) { ctxDiv.innerHTML = activeAssistant.contexts.map((c,i) => { if (c.pecha_title) return `Pecha: ${esc(c.pecha_title)}`; - if (c.content) return `Text #${i+1}`; - if (c.file_url) return `File: ${esc(c.file_url)}`; + if (c.content) return `Context #${i+1}`; return `Context #${i+1}`; }).join(''); } else { @@ -859,10 +858,8 @@ async def serve_ui(): activeAssistant.contexts.forEach(c => { let type = 'content'; if (c.pecha_text_id) type = 'search'; - else if (c.file_url) type = 'file'; addContextEntry(type, { content: c.content, - file_url: c.file_url, pecha_title: c.pecha_title, pecha_text_id: c.pecha_text_id }); @@ -906,7 +903,6 @@ async def serve_ui(): let selectedType = type; if (data.pecha_text_id) selectedType = 'search'; - else if (data.file_url) selectedType = 'file'; else if (data.content) selectedType = 'content'; // Store type as data attribute for retrieval @@ -914,7 +910,7 @@ async def serve_ui(): // Add type label and remove button only let typeLabel = 'Content'; - if (selectedType === 'file') typeLabel = 'File URL'; + if (selectedType === 'file') typeLabel = 'File Upload'; else if (selectedType === 'search') typeLabel = 'Search Pecha'; div.innerHTML = ` @@ -928,7 +924,7 @@ async def serve_ui(): if (selectedType === 'content') { renderContentField(fieldArea, data.content || ''); } else if (selectedType === 'file') { - renderFileField(fieldArea, data.file_url || ''); + renderFileField(fieldArea, ''); } else if (selectedType === 'search') { renderSearchField(fieldArea, data.pecha_title || '', data.pecha_text_id || ''); // If editing and has content, create a mock search result with the content @@ -954,82 +950,71 @@ async def serve_ui(): `; } -function renderFileField(container, value) { - const hasFile = value && value.trim(); +function renderFileField(container, fileName) { + const hasFile = fileName && fileName.trim(); container.innerHTML = `