diff --git a/.gitignore b/.gitignore index d2cdbd62..984178dd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ config/env/* !config/env/*.example -.idea/ \ No newline at end of file +.idea/ +.DS_Store \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index 712082e7..b9f417e7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -147,6 +147,16 @@ Each module contains: - Auth endpoints via Djoser: `/auth/` - JWT token lifetime: 60 minutes (access), 1 day (refresh) +#### API Documentation +- Auto-generated using **drf-spectacular** (OpenAPI 3.0) +- **Swagger UI**: `http://localhost:8000/api/docs/` — interactive API explorer +- **ReDoc**: `http://localhost:8000/api/redoc/` — readable reference docs +- **Raw schema**: `http://localhost:8000/api/schema/` +- Configuration in `SPECTACULAR_SETTINGS` in `settings.py` +- Views use `@extend_schema` decorators and `serializer_class` attributes for schema generation +- JWT auth is configured in the schema — use `JWT ` (not `Bearer`) in Swagger UI's Authorize dialog +- To document a new endpoint: add `serializer_class` to the view if it has one, or add `@extend_schema` with `inline_serializer` for views returning raw dicts + #### Key Data Models - **Medication** (`api.views.listMeds.models`) - Medication catalog with benefits/risks - **MedRule** (`api.models.model_medRule`) - Include/Exclude rules for medications based on patient history diff --git a/README.md b/README.md index e5a246b1..0d7d531a 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ for patients with bipolar disorder, helping them shorten their journey to stabil ## Usage -You can view the current build of the website here: [https://balancertestsite.com](https://balancertestsite.com/) +You can view the current build of the website here: [https://balancerproject.org/](https://balancerproject.org/) ## Contributing @@ -53,7 +53,7 @@ The application supports connecting to PostgreSQL databases via: See [Database Connection Documentation](./docs/DATABASE_CONNECTION.md) for detailed configuration. **Local Development:** -- Download a sample of papers to upload from [https://balancertestsite.com](https://balancertestsite.com/) +- Download a sample of papers to upload from [https://balancerproject.org/](https://balancerproject.org/) - The email and password of `pgAdmin` are specified in `balancer-main/docker-compose.yml` - The first time you use `pgAdmin` after building the Docker containers you will need to register the server. - The `Host name/address` is the Postgres server service name in the Docker Compose file @@ -74,6 +74,23 @@ df = pd.read_sql(query, engine) #### Django REST - The email and password are set in `server/api/management/commands/createsu.py` +## API Documentation + +Interactive API docs are auto-generated using [drf-spectacular](https://drf-spectacular.readthedocs.io/) and available at: + +- **Swagger UI**: [http://localhost:8000/api/docs/](http://localhost:8000/api/docs/) — interactive explorer with "Try it out" functionality +- **ReDoc**: [http://localhost:8000/api/redoc/](http://localhost:8000/api/redoc/) — clean, readable reference docs +- **Raw schema**: [http://localhost:8000/api/schema/](http://localhost:8000/api/schema/) — OpenAPI 3.0 JSON/YAML + +### Testing authenticated endpoints + +Most endpoints require JWT authentication. To test them in Swagger UI: + +1. **Get a token**: Find the `POST /auth/jwt/create/` endpoint in Swagger UI, click **Try it out**, enter an authorized `email` and `password`, and click **Execute**. Copy the `access` token from the response. +2. **Authorize**: Click the **Authorize** button (lock icon) at the top of the page. Enter `JWT ` in the value field. The prefix must be `JWT`, not `Bearer`. +3. **Test endpoints**: All subsequent requests will include your token. Use **Try it out** on any protected endpoint. +4. **Token refresh**: Access tokens expire after 60 minutes. Use `POST /auth/jwt/refresh/` with your `refresh` token, or repeat step 1. + ## Architecture The Balancer website is a Postgres, Django REST, and React project. The source code layout is: diff --git a/docker-compose.yml b/docker-compose.yml index 9182cdb6..7a6e7fe9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,11 +18,6 @@ services: networks: app_net: ipv4_address: 192.168.0.2 - healthcheck: - test: ["CMD-SHELL", "pg_isready -U balancer -d balancer_dev"] - interval: 5s - timeout: 5s - retries: 5 pgadmin: image: dpage/pgadmin4 diff --git a/frontend/src/components/Footer/Footer.tsx b/frontend/src/components/Footer/Footer.tsx index d656f5ad..977c59d4 100644 --- a/frontend/src/components/Footer/Footer.tsx +++ b/frontend/src/components/Footer/Footer.tsx @@ -62,11 +62,11 @@ function Footer() { > Leave feedback - - Donate + Support Development = ({ isAuthenticated, isSuperuser }) => { Leave Feedback - Donate + Support Development {isAuthenticated && isSuperuser && (
{
  • - - Donate + Support Development
  • {isAuthenticated && diff --git a/frontend/src/pages/About/About.tsx b/frontend/src/pages/About/About.tsx index c50f6705..9481c74d 100644 --- a/frontend/src/pages/About/About.tsx +++ b/frontend/src/pages/About/About.tsx @@ -77,9 +77,9 @@ function About() {
    - + diff --git a/frontend/src/pages/DocumentManager/UploadFile.tsx b/frontend/src/pages/DocumentManager/UploadFile.tsx index 2ee7b5db..32b727e8 100644 --- a/frontend/src/pages/DocumentManager/UploadFile.tsx +++ b/frontend/src/pages/DocumentManager/UploadFile.tsx @@ -1,5 +1,5 @@ import React, { useState, useRef } from "react"; -import axios from "axios"; +import { adminApi } from "../../api/apiClient"; import TypingAnimation from "../../components/Header/components/TypingAnimation.tsx"; import Layout from "../Layout/Layout.tsx"; @@ -22,14 +22,9 @@ const UploadFile: React.FC = () => { formData.append("file", file); try { - const response = await axios.post( + const response = await adminApi.post( `/api/v1/api/uploadFile`, formData, - { - headers: { - "Content-Type": "multipart/form-data" - }, - } ); console.log("File uploaded successfully", response.data); } catch (error) { diff --git a/frontend/src/pages/Files/ListOfFiles.tsx b/frontend/src/pages/Files/ListOfFiles.tsx index b6fff4ee..37bd459a 100644 --- a/frontend/src/pages/Files/ListOfFiles.tsx +++ b/frontend/src/pages/Files/ListOfFiles.tsx @@ -61,7 +61,7 @@ const ListOfFiles: React.FC<{ showTable?: boolean }> = ({ const handleDownload = async (guid: string, fileName: string) => { try { setDownloading(guid); - const { data } = await publicApi.get(`/v1/api/uploadFile/${guid}`, { responseType: 'blob' }); + const { data } = await publicApi.get(`/api/v1/api/uploadFile/${guid}`, { responseType: 'blob' }); const url = window.URL.createObjectURL(new Blob([data])); const link = document.createElement("a"); @@ -82,7 +82,7 @@ const ListOfFiles: React.FC<{ showTable?: boolean }> = ({ const handleOpen = async (guid: string) => { try { setOpening(guid); - const { data } = await publicApi.get(`/v1/api/uploadFile/${guid}`, { responseType: 'arraybuffer' }); + const { data } = await publicApi.get(`/api/v1/api/uploadFile/${guid}`, { responseType: 'arraybuffer' }); const file = new Blob([data], { type: 'application/pdf' }); const fileURL = window.URL.createObjectURL(file); diff --git a/server/api/views/ai_promptStorage/views.py b/server/api/views/ai_promptStorage/views.py index 7354feb3..cc50f22e 100644 --- a/server/api/views/ai_promptStorage/views.py +++ b/server/api/views/ai_promptStorage/views.py @@ -1,10 +1,12 @@ from rest_framework import status from rest_framework.decorators import api_view from rest_framework.response import Response +from drf_spectacular.utils import extend_schema from .models import AI_PromptStorage from .serializers import AI_PromptStorageSerializer +@extend_schema(request=AI_PromptStorageSerializer, responses={201: AI_PromptStorageSerializer}) @api_view(['POST']) # @permission_classes([IsAuthenticated]) def store_prompt(request): @@ -21,6 +23,7 @@ def store_prompt(request): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) +@extend_schema(responses={200: AI_PromptStorageSerializer(many=True)}) @api_view(['GET']) def get_all_prompts(request): """ diff --git a/server/api/views/ai_settings/views.py b/server/api/views/ai_settings/views.py index 349b9fd9..9ee6aad7 100644 --- a/server/api/views/ai_settings/views.py +++ b/server/api/views/ai_settings/views.py @@ -2,10 +2,12 @@ from rest_framework.decorators import api_view, permission_classes from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response +from drf_spectacular.utils import extend_schema from .models import AI_Settings from .serializers import AISettingsSerializer +@extend_schema(request=AISettingsSerializer, responses={200: AISettingsSerializer(many=True), 201: AISettingsSerializer}) @api_view(['GET', 'POST']) @permission_classes([IsAuthenticated]) def settings_view(request): diff --git a/server/api/views/assistant/sanitizer.py b/server/api/views/assistant/sanitizer.py index bdbbc77f..fd851df6 100644 --- a/server/api/views/assistant/sanitizer.py +++ b/server/api/views/assistant/sanitizer.py @@ -1,26 +1,76 @@ import re import logging + logger = logging.getLogger(__name__) def sanitize_input(user_input:str) -> str: """ Sanitize user input to prevent injection attacks and remove unwanted characters. + Args: user_input (str): The raw input string from the user. + Returns: str: The sanitized input string. """ try: - # Remove any script tags - sanitized = re.sub(r'.*?', '', user_input, flags=re.IGNORECASE) - # Remove any HTML tags + sanitized = user_input + + # Remove any style tags + sanitized = re.sub(r'.*?', '', sanitized, flags=re.IGNORECASE) + + # Remove any HTML/script tags sanitized = re.sub(r'<.*?>', '', sanitized) + + # Remove Phone Numbers + sanitized = re.sub(r'\+?\d[\d -]{8,}\d', '[Phone Number]', sanitized) + + # Remove Email Addresses + sanitized = re.sub(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', '[Email Address]', sanitized) + + # Remove Medical Record Numbers (simple pattern) + sanitized = re.sub(r'\bMRN[:\s]*\d+\b', '[Medical Record Number]', sanitized, flags=re.IGNORECASE) + + # Normalize pronouns + sanitized = normalize_pronouns(sanitized) + # Escape special characters - sanitized = re.sub(r'["\'\\]', '', sanitized) + sanitized = re.sub(r'\s+', '', sanitized) + # Limit length to prevent buffer overflow attacks - max_length = 1000 + max_length = 5000 if len(sanitized) > max_length: sanitized = sanitized[:max_length] + return sanitized.strip() except Exception as e: logger.error(f"Error sanitizing input: {e}") - return "" \ No newline at end of file + return "" + +def normalize_pronouns(text:str) -> str: + """ + Normalize first and second person pronouns to third person clinical language. + + Converts patient centric pronouns to a more neutral form. + Args: + text (str): The input text containing pronouns. + Returns: + str: The text with normalized pronouns. + """ + # Normalize first person possessives: I, me, my, mine -> the patient + text = re.sub(r'\bMy\b', 'The patient\'s', text) + text = re.sub(r'\bmy\b', 'the patient\'s', text) + + # First person subject: I -> the patient + text = re.sub(r'\bI\b', 'the patient', text) + + # First person object: me -> the patient + text = re.sub(r'\bme\b', 'the patient', text) + + # First person reflexive: myself -> the patient + text = re.sub(r'\bmyself\b', 'the patient', text) + + # Second person: you, your -> the clinician + text = re.sub(r'\bYour\b', 'the clinician', text) + return text + + diff --git a/server/api/views/assistant/views.py b/server/api/views/assistant/views.py index f31ab475..e3e8d6f7 100644 --- a/server/api/views/assistant/views.py +++ b/server/api/views/assistant/views.py @@ -10,6 +10,8 @@ from rest_framework.permissions import AllowAny from django.utils.decorators import method_decorator from django.views.decorators.csrf import csrf_exempt +from drf_spectacular.utils import extend_schema, inline_serializer +from rest_framework import serializers as drf_serializers from openai import OpenAI @@ -113,6 +115,21 @@ def invoke_functions_from_response( class Assistant(APIView): permission_classes = [AllowAny] + @extend_schema( + request=inline_serializer(name='AssistantRequest', fields={ + 'message': drf_serializers.CharField(help_text='User message to send to the assistant'), + 'previous_response_id': drf_serializers.CharField(required=False, allow_null=True, help_text='ID of previous response for conversation continuity'), + }), + responses={ + 200: inline_serializer(name='AssistantResponse', fields={ + 'response_output_text': drf_serializers.CharField(), + 'final_response_id': drf_serializers.CharField(), + }), + 500: inline_serializer(name='AssistantError', fields={ + 'error': drf_serializers.CharField(), + }), + } + ) def post(self, request): try: user = request.user diff --git a/server/api/views/conversations/views.py b/server/api/views/conversations/views.py index eeb68809..de927cf1 100644 --- a/server/api/views/conversations/views.py +++ b/server/api/views/conversations/views.py @@ -16,6 +16,8 @@ from .models import Conversation, Message from .serializers import ConversationSerializer from ...services.tools.tools import tools, execute_tool +from drf_spectacular.utils import extend_schema, inline_serializer +from rest_framework import serializers as drf_serializers @csrf_exempt @@ -95,6 +97,21 @@ def destroy(self, request, *args, **kwargs): self.perform_destroy(instance) return Response(status=status.HTTP_204_NO_CONTENT) + @extend_schema( + request=inline_serializer(name='ContinueConversationRequest', fields={ + 'message': drf_serializers.CharField(help_text='User message to continue the conversation'), + 'page_context': drf_serializers.CharField(required=False, help_text='Optional page context'), + }), + responses={ + 200: inline_serializer(name='ContinueConversationResponse', fields={ + 'response': drf_serializers.CharField(), + 'title': drf_serializers.CharField(), + }), + 400: inline_serializer(name='ContinueConversationBadRequest', fields={ + 'error': drf_serializers.CharField(), + }), + } + ) @action(detail=True, methods=['post']) def continue_conversation(self, request, pk=None): conversation = self.get_object() @@ -123,6 +140,20 @@ def continue_conversation(self, request, pk=None): return Response({"response": chatgpt_response, "title": conversation.title}) + @extend_schema( + request=inline_serializer(name='UpdateTitleRequest', fields={ + 'title': drf_serializers.CharField(help_text='New conversation title'), + }), + responses={ + 200: inline_serializer(name='UpdateTitleResponse', fields={ + 'status': drf_serializers.CharField(), + 'title': drf_serializers.CharField(), + }), + 400: inline_serializer(name='UpdateTitleBadRequest', fields={ + 'error': drf_serializers.CharField(), + }), + } + ) @action(detail=True, methods=['patch']) def update_title(self, request, pk=None): conversation = self.get_object() diff --git a/server/api/views/embeddings/embeddingsView.py b/server/api/views/embeddings/embeddingsView.py index d0bdd8ca..ebcf0774 100644 --- a/server/api/views/embeddings/embeddingsView.py +++ b/server/api/views/embeddings/embeddingsView.py @@ -1,8 +1,9 @@ from rest_framework.views import APIView from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response -from rest_framework import status +from rest_framework import status, serializers as drf_serializers from django.http import StreamingHttpResponse +from drf_spectacular.utils import extend_schema, inline_serializer, OpenApiParameter from ...services.embedding_services import get_closest_embeddings from ...services.conversions_services import convert_uuids from ...services.openai_services import openAIServices @@ -15,6 +16,26 @@ class AskEmbeddingsAPIView(APIView): permission_classes = [IsAuthenticated] + @extend_schema( + parameters=[ + OpenApiParameter(name='guid', type=str, location=OpenApiParameter.QUERY, required=False, description='Optional file GUID to filter embeddings'), + OpenApiParameter(name='stream', type=bool, location=OpenApiParameter.QUERY, required=False, description='Enable streaming response'), + ], + request=inline_serializer(name='AskEmbeddingsRequest', fields={ + 'message': drf_serializers.CharField(help_text='Question to ask against embedded documents'), + }), + responses={ + 200: inline_serializer(name='AskEmbeddingsResponse', fields={ + 'question': drf_serializers.CharField(), + 'llm_response': drf_serializers.CharField(), + 'embeddings_info': drf_serializers.CharField(), + 'sent_to_llm': drf_serializers.CharField(), + }), + 400: inline_serializer(name='AskEmbeddingsBadRequest', fields={ + 'error': drf_serializers.CharField(), + }), + } + ) def post(self, request, *args, **kwargs): try: user = request.user diff --git a/server/api/views/feedback/views.py b/server/api/views/feedback/views.py index d0f0e1da..424e0758 100644 --- a/server/api/views/feedback/views.py +++ b/server/api/views/feedback/views.py @@ -9,6 +9,7 @@ class FeedbackView(APIView): permission_classes = [AllowAny] + serializer_class = FeedbackSerializer def post(self, request, *args, **kwargs): serializer = FeedbackSerializer(data=request.data) diff --git a/server/api/views/listMeds/views.py b/server/api/views/listMeds/views.py index fcd0edf2..1b199a7e 100644 --- a/server/api/views/listMeds/views.py +++ b/server/api/views/listMeds/views.py @@ -1,7 +1,8 @@ -from rest_framework import status +from rest_framework import status, serializers as drf_serializers from rest_framework.permissions import AllowAny from rest_framework.response import Response from rest_framework.views import APIView +from drf_spectacular.utils import extend_schema, inline_serializer from .models import Diagnosis, Medication, Suggestion from .serializers import MedicationSerializer @@ -24,6 +25,33 @@ class GetMedication(APIView): permission_classes = [AllowAny] + @extend_schema( + request=inline_serializer( + name='GetMedicationRequest', + fields={ + 'state': drf_serializers.CharField(help_text='Diagnosis state, e.g. "depressed", "manic"'), + 'suicideHistory': drf_serializers.BooleanField(default=False), + 'kidneyHistory': drf_serializers.BooleanField(default=False), + 'liverHistory': drf_serializers.BooleanField(default=False), + 'bloodPressureHistory': drf_serializers.BooleanField(default=False), + 'weightGainConcern': drf_serializers.BooleanField(default=False), + 'priorMedications': drf_serializers.CharField(required=False, default='', help_text='Comma-separated medication names'), + } + ), + responses={ + 200: inline_serializer( + name='GetMedicationResponse', + fields={ + 'first': drf_serializers.ListField(child=drf_serializers.DictField()), + 'second': drf_serializers.ListField(child=drf_serializers.DictField()), + 'third': drf_serializers.ListField(child=drf_serializers.DictField()), + } + ), + 404: inline_serializer(name='GetMedicationNotFound', fields={ + 'error': drf_serializers.CharField(), + }), + } + ) def post(self, request): data = request.data state_query = data.get('state', '') @@ -75,6 +103,7 @@ def post(self, request): class ListOrDetailMedication(APIView): permission_classes = [AllowAny] + serializer_class = MedicationSerializer def get(self, request): name_query = request.query_params.get('name', None) @@ -98,6 +127,7 @@ class AddMedication(APIView): """ API endpoint to add a medication to the database with its risks and benefits. """ + serializer_class = MedicationSerializer def post(self, request): data = request.data @@ -129,6 +159,22 @@ class DeleteMedication(APIView): API endpoint to delete medication if medication in database. """ + @extend_schema( + request=inline_serializer(name='DeleteMedicationRequest', fields={ + 'name': drf_serializers.CharField(), + }), + responses={ + 200: inline_serializer(name='DeleteMedicationSuccess', fields={ + 'success': drf_serializers.CharField(), + }), + 400: inline_serializer(name='DeleteMedicationBadRequest', fields={ + 'error': drf_serializers.CharField(), + }), + 404: inline_serializer(name='DeleteMedicationNotFound', fields={ + 'error': drf_serializers.CharField(), + }), + } + ) def delete(self, request): data = request.data name = data.get('name', '').strip() diff --git a/server/api/views/medRules/serializers.py b/server/api/views/medRules/serializers.py index df5e3663..e0d7d3f3 100644 --- a/server/api/views/medRules/serializers.py +++ b/server/api/views/medRules/serializers.py @@ -1,4 +1,5 @@ from rest_framework import serializers +from drf_spectacular.utils import extend_schema_field from ...models.model_medRule import MedRule, MedRuleSource from ..listMeds.serializers import MedicationSerializer from ...models.model_embeddings import Embeddings @@ -30,6 +31,7 @@ class Meta: "medication_sources", ] + @extend_schema_field(MedicationWithSourcesSerializer(many=True)) def get_medication_sources(self, obj): medrule_sources = MedRuleSource.objects.filter(medrule=obj).select_related( "medication", "embedding" diff --git a/server/api/views/medRules/views.py b/server/api/views/medRules/views.py index 2fae140b..2f80f8f3 100644 --- a/server/api/views/medRules/views.py +++ b/server/api/views/medRules/views.py @@ -1,9 +1,10 @@ from rest_framework.views import APIView from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response -from rest_framework import status +from rest_framework import status, serializers as drf_serializers from django.utils.decorators import method_decorator from django.views.decorators.csrf import csrf_exempt +from drf_spectacular.utils import extend_schema, inline_serializer from ...models.model_medRule import MedRule from .serializers import MedRuleSerializer # You'll need to create this from ..listMeds.models import Medication @@ -13,6 +14,7 @@ @method_decorator(csrf_exempt, name='dispatch') class MedRules(APIView): permission_classes = [IsAuthenticated] + serializer_class = MedRuleSerializer def get(self, request, format=None): # Get all med rules @@ -29,6 +31,27 @@ def get(self, request, format=None): return Response(data, status=status.HTTP_200_OK) + @extend_schema( + request=inline_serializer(name='MedRuleCreateRequest', fields={ + 'rule_type': drf_serializers.CharField(help_text='INCLUDE or EXCLUDE'), + 'history_type': drf_serializers.CharField(help_text='e.g. DIAGNOSIS_DEPRESSED, DIAGNOSIS_MANIC'), + 'reason': drf_serializers.CharField(), + 'label': drf_serializers.CharField(), + 'explanation': drf_serializers.CharField(), + 'medication_names': drf_serializers.ListField(child=drf_serializers.CharField()), + 'chunk_ids': drf_serializers.ListField(child=drf_serializers.IntegerField()), + 'file_guid': drf_serializers.CharField(), + }), + responses={ + 201: MedRuleSerializer, + 400: inline_serializer(name='MedRuleCreateBadRequest', fields={ + 'error': drf_serializers.CharField(), + }), + 404: inline_serializer(name='MedRuleCreateNotFound', fields={ + 'error': drf_serializers.CharField(), + }), + } + ) def post(self, request): data = request.data diff --git a/server/api/views/risk/views_riskWithSources.py b/server/api/views/risk/views_riskWithSources.py index c02908fc..26cad9f8 100644 --- a/server/api/views/risk/views_riskWithSources.py +++ b/server/api/views/risk/views_riskWithSources.py @@ -1,7 +1,8 @@ from rest_framework.views import APIView from rest_framework.response import Response -from rest_framework import status +from rest_framework import status, serializers as drf_serializers from rest_framework.permissions import AllowAny +from drf_spectacular.utils import extend_schema, inline_serializer from api.views.listMeds.models import Medication from api.models.model_medRule import MedRule, MedRuleSource import openai @@ -11,6 +12,28 @@ class RiskWithSourcesView(APIView): permission_classes = [AllowAny] + @extend_schema( + request=inline_serializer(name='RiskWithSourcesRequest', fields={ + 'drug': drf_serializers.CharField(help_text='Medication name'), + 'source': drf_serializers.CharField(required=False, help_text='One of: include, diagnosis, diagnosis_depressed, diagnosis_manic, diagnosis_hypomanic, diagnosis_euthymic'), + }), + responses={ + 200: inline_serializer(name='RiskWithSourcesResponse', fields={ + 'benefits': drf_serializers.ListField(child=drf_serializers.CharField()), + 'risks': drf_serializers.ListField(child=drf_serializers.CharField()), + 'sources': drf_serializers.ListField(child=drf_serializers.DictField()), + 'medrules_found': drf_serializers.IntegerField(required=False), + 'source_type': drf_serializers.CharField(required=False), + 'note': drf_serializers.CharField(required=False), + }), + 400: inline_serializer(name='RiskWithSourcesBadRequest', fields={ + 'error': drf_serializers.CharField(), + }), + 404: inline_serializer(name='RiskWithSourcesNotFound', fields={ + 'error': drf_serializers.CharField(), + }), + } + ) def post(self, request): openai.api_key = os.environ.get("OPENAI_API_KEY") diff --git a/server/api/views/text_extraction/views.py b/server/api/views/text_extraction/views.py index e4122851..020740ad 100644 --- a/server/api/views/text_extraction/views.py +++ b/server/api/views/text_extraction/views.py @@ -9,6 +9,8 @@ from django.utils.decorators import method_decorator from django.views.decorators.csrf import csrf_exempt import anthropic +from drf_spectacular.utils import extend_schema, inline_serializer, OpenApiParameter +from rest_framework import serializers as drf_serializers from ...services.openai_services import openAIServices from api.models.model_embeddings import Embeddings @@ -97,6 +99,20 @@ class RuleExtractionAPIView(APIView): permission_classes = [IsAuthenticated] + @extend_schema( + parameters=[ + OpenApiParameter(name='guid', type=str, location=OpenApiParameter.QUERY, required=True, description='File GUID to extract rules from'), + ], + responses={ + 200: inline_serializer(name='RuleExtractionResponse', fields={ + 'texts': drf_serializers.CharField(), + 'cited_texts': drf_serializers.CharField(), + }), + 500: inline_serializer(name='RuleExtractionError', fields={ + 'error': drf_serializers.CharField(), + }), + } + ) def get(self, request): try: @@ -141,6 +157,19 @@ def openai_extraction(content_chunks, user_prompt): class RuleExtractionAPIOpenAIView(APIView): permission_classes = [IsAuthenticated] + @extend_schema( + parameters=[ + OpenApiParameter(name='guid', type=str, location=OpenApiParameter.QUERY, required=True, description='File GUID to extract rules from'), + ], + responses={ + 200: inline_serializer(name='RuleExtractionOpenAIResponse', fields={ + 'rules': drf_serializers.ListField(child=drf_serializers.DictField()), + }), + 500: inline_serializer(name='RuleExtractionOpenAIError', fields={ + 'error': drf_serializers.CharField(), + }), + } + ) def get(self, request): try: user_prompt = """ diff --git a/server/api/views/uploadFile/test_title.py b/server/api/views/uploadFile/test_title.py index 69979620..ef694e14 100644 --- a/server/api/views/uploadFile/test_title.py +++ b/server/api/views/uploadFile/test_title.py @@ -4,6 +4,39 @@ from . import title +def make_page_dict(blocks): + """Helper to build a get_text("dict") return value from a simple list of blocks. + Each block is a list of (text, font_size) tuples representing spans. + """ + dict_blocks = [] + for spans in blocks: + dict_blocks.append({ + "type": 0, + "lines": [{ + "spans": [{"text": text, "size": size} for text, size in spans] + }] + }) + return {"blocks": dict_blocks} + + +def make_mock_doc(pages_data, metadata=None): + """Build a mock fitz.Document. + pages_data: list of block lists, one per page. Each block is a list of (text, size) tuples. + """ + doc = MagicMock() + doc.metadata = metadata or {"title": None} + doc.__len__ = lambda self: len(pages_data) + + mock_pages = [] + for page_blocks in pages_data: + page = MagicMock() + page.get_text.return_value = make_page_dict(page_blocks) + mock_pages.append(page) + + doc.__getitem__ = lambda self, idx: mock_pages[idx] + return doc + + class TestGenerateTitle(unittest.TestCase): def test_prefers_metadata_title_if_valid(self): doc = MagicMock() @@ -11,59 +44,112 @@ def test_prefers_metadata_title_if_valid(self): self.assertEqual( "A Study Regarding The Efficacy of Drugs", title.generate_title(doc)) - def test_falls_back_to_first_page_text_if_metadata_title_is_empty(self): - doc = MagicMock() - doc.metadata = {"title": ""} - doc[0].get_text = MagicMock() - - foo_block = [None] * 7 - foo_block[4] = "foo" - foo_block[6] = 0 - - title_block = [None] * 7 - title_block[4] = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia" - title_block[6] = 0 - - bar_block = [None] * 7 - bar_block[4] = "bar" - bar_block[6] = 0 - doc[0].get_text.return_value = [foo_block, title_block, bar_block] - + def test_falls_back_to_font_size_if_metadata_title_is_empty(self): + doc = make_mock_doc( + pages_data=[[ + [("foo", 10.0)], + [("Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia", 18.0)], + [("bar", 10.0)], + ]], + metadata={"title": ""}, + ) expected_title = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia" self.assertEqual(expected_title, title.generate_title(doc)) - def test_falls_back_to_first_page_text_if_metadata_title_does_not_match_regex(self): - doc = MagicMock() - doc.metadata = {"title": "abcd1234"} - doc[0].get_text = MagicMock() - - foo_block = [None] * 7 - foo_block[4] = "foo" - foo_block[6] = 0 - - title_block = [None] * 7 - title_block[4] = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia" - title_block[6] = 0 - - bar_block = [None] * 7 - bar_block[4] = "bar" - bar_block[6] = 0 - doc[0].get_text.return_value = [foo_block, title_block, bar_block] - + def test_falls_back_to_font_size_if_metadata_title_does_not_match_regex(self): + doc = make_mock_doc( + pages_data=[[ + [("foo", 10.0)], + [("Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia", 18.0)], + [("bar", 10.0)], + ]], + metadata={"title": "abcd1234"}, + ) expected_title = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia" self.assertEqual(expected_title, title.generate_title(doc)) - @patch("api.services.openai_services.openAIServices.openAI") + @patch("api.views.uploadFile.title.openAIServices.openAI") def test_falls_back_to_chatgpt_if_no_title_found(self, mock_openAI): - doc = MagicMock() - doc.metadata = {"title": None} - doc.get_text.return_value = [] + doc = make_mock_doc( + pages_data=[[]] # no blocks at all + ) - mock_response = MagicMock() - mock_response.choices = [MagicMock()] - mock_response.choices[0].message.content = "A Study Regarding The Efficacy of Drugs" - mock_openAI.return_value = mock_response + mock_openAI.return_value = "A Study Regarding The Efficacy of Drugs" - title.generate_title(doc) + result = title.generate_title(doc) self.assertTrue(mock_openAI.called) + self.assertEqual(result, "A Study Regarding The Efficacy of Drugs") + + @patch("api.views.uploadFile.title.openAIServices.openAI") + def test_strips_quotes_from_openai_title(self, mock_openAI): + doc = make_mock_doc(pages_data=[[]]) + + mock_openAI.return_value = '"Updated CANMAT/ISBD Guidelines for Treating Mixed Features in Bipolar Disorder"' + + result = title.generate_title(doc) + + self.assertEqual(result, "Updated CANMAT/ISBD Guidelines for Treating Mixed Features in Bipolar Disorder") + + @patch("api.views.uploadFile.title.openAIServices.openAI") + def test_truncates_long_openai_title(self, mock_openAI): + doc = make_mock_doc(pages_data=[[]]) + + mock_openAI.return_value = "A" * 300 + + result = title.generate_title(doc) + + # Ensure the title is truncated to fit the UploadFile model's title field (max_length=255), since OpenAI responses may exceed this limit + self.assertLessEqual(len(result), 255) + + def test_font_size_joins_adjacent_spans_in_same_block(self): + """A title split across multiple spans in the same block should be joined.""" + doc = make_mock_doc( + pages_data=[[ + [("Author Name", 10.0)], + [("Advances in Mood Disorder", 18.0), ("Pharmacotherapy", 18.0)], + [("Some journal info", 10.0)], + ]], + ) + result = title.extract_title_by_font_size(doc) + self.assertEqual(result, "Advances in Mood Disorder Pharmacotherapy") + + def test_font_size_ignores_short_spans(self): + """Superscript markers and other tiny spans should be filtered out.""" + doc = make_mock_doc( + pages_data=[[ + [("Advances in Mood Disorder Pharmacotherapy", 18.0), ("*", 18.0)], + [("Author Name et al.", 10.0)], + ]], + ) + # The "*" span is < 2 chars, so it should be ignored; title is just the real text + result = title.extract_title_by_font_size(doc) + self.assertEqual(result, "Advances in Mood Disorder Pharmacotherapy") + + def test_font_size_returns_none_when_no_regex_match(self): + """If the largest-font text doesn't match the title regex, return None.""" + doc = make_mock_doc( + pages_data=[[ + # Only 2 words — regex requires at least 3 + [("Psychiatry Research", 18.0)], + [("Author Name et al.", 10.0)], + ]], + ) + result = title.extract_title_by_font_size(doc) + self.assertIsNone(result) + + def test_font_size_finds_title_on_later_page(self): + """Title on page 2 should still be found if it has the largest font.""" + doc = make_mock_doc( + pages_data=[ + [ # page 1: cover page with smaller text + [("Some preamble text here", 12.0)], + ], + [ # page 2: actual title in larger font + [("Advances in Mood Disorder Pharmacotherapy", 18.0)], + [("Author Name et al.", 10.0)], + ], + ], + ) + result = title.extract_title_by_font_size(doc) + self.assertEqual(result, "Advances in Mood Disorder Pharmacotherapy") diff --git a/server/api/views/uploadFile/title.py b/server/api/views/uploadFile/title.py index 06e0ce0c..38dcd5d5 100644 --- a/server/api/views/uploadFile/title.py +++ b/server/api/views/uploadFile/title.py @@ -6,44 +6,89 @@ # regular expression to match common research white paper titles. Created by Chat-gpt -# requires at least 3 words, no dates, no version numbers. +# requires at least 3 words, no version numbers. title_regex = re.compile( - r'^(?=(?:\b\w+\b[\s:,\-\(\)]*){3,})(?!.*\b(?:19|20)\d{2}\b)(?!.*\bv\d+\b)[A-Za-z0-9][\w\s:,\-\(\)]*[A-Za-z\)]$', re.IGNORECASE) + r"^(?=(?:\b\w+\b[^A-Za-z0-9]*){3,})(?!.*\bv\d+\b)[A-Za-z0-9].+[A-Za-z\)?!]$", re.IGNORECASE) def generate_title(pdf: fitz.Document) -> str | None: document_metadata_title = pdf.metadata["title"] if document_metadata_title is not None and document_metadata_title != "": if title_regex.match(document_metadata_title): - print("suitable title was found in metadata") return document_metadata_title.strip() - else: - print("metadata title did not match regex") - print("Looking for title in first page text") - first_page = pdf[0] - first_page_blocks = first_page.get_text("blocks") - text_blocks = [ - block[4].strip().replace("\n", " ") - for block in first_page_blocks - if block[6] == 0 # only include text blocks. - ] - - # For some reason, extracted PDF text has extra spaces. Collapse them here. - regex = r"\s{2,}" - text_blocks = [re.sub(regex, " ", text) for text in text_blocks] - - if len(text_blocks) != 0: - for text in text_blocks: - if title_regex.match(text): - return text - - print( - "no suitable title found in first page text. Using GPT-4 to summarize the PDF") + font_title = extract_title_by_font_size(pdf) + if font_title: + return font_title + gpt_title = summarize_pdf(pdf) return gpt_title or None +def extract_title_by_font_size(pdf: fitz.Document, max_pages: int = 3) -> str | None: + """ + Extract the title by finding the largest font size across the first few pages + and collecting contiguous runs of text at that size. + """ + pages_to_scan = min(max_pages, len(pdf)) + + # First pass: collect all spans with their font size, and find the max font size. + all_spans = [] + max_font_size = 0.0 + + for page_idx in range(pages_to_scan): + page_dict = pdf[page_idx].get_text("dict") + for block in page_dict["blocks"]: + if block.get("type") != 0: + continue + for line in block["lines"]: + for span in line["spans"]: + text = span["text"].strip() + size = span["size"] + if len(text) < 2 or size < 6.0: + continue + all_spans.append({"text": text, "size": size}) + if size > max_font_size: + max_font_size = size + + if max_font_size == 0.0: + return None + + # Second pass: gather contiguous runs of spans at the max font size. + # Runs continue across block boundaries so multi-block titles (e.g., + # "BIPOLAR DISORDER IN PRIMARY CARE:" in one block and "DIAGNOSIS AND + # MANAGEMENT" in the next) are joined into a single candidate. + # A run only ends when a non-max-size span interrupts it. + candidates = [] + current_run = [] + + for span in all_spans: + if span["size"] == max_font_size: + current_run.append(span["text"]) + else: + if current_run: + candidates.append(" ".join(current_run)) + current_run = [] + + if current_run: + candidates.append(" ".join(current_run)) + + # Collapse extra whitespace, validate against title regex, and pick the longest match. + # Longest wins because real titles are typically longer than section headers + # (e.g., "About the Author") that may share the same max font size. + best = None + for candidate in candidates: + cleaned = re.sub(r"\s{2,}", " ", candidate).strip() + if title_regex.match(cleaned): + if best is None or len(cleaned) > len(best): + best = cleaned + + if best: + return best[:255] + + return None + + def summarize_pdf(pdf: fitz.Document) -> str: """ Summarize a PDF document using OpenAI's GPT-4 model. @@ -58,4 +103,6 @@ def summarize_pdf(pdf: fitz.Document) -> str: prompt = "Please provide a title for this document. The title should be less than 256 characters and will be displayed on a webpage." response = openAIServices.openAI( first_page_content, prompt, model='gpt-4o', temp=0.0) - return response.choices[0].message.content + title = response.strip().strip('"').strip("'") + # Truncate to fit UploadFile model's max_length=255 title field as a final safeguard + return title[:255] diff --git a/server/api/views/uploadFile/views.py b/server/api/views/uploadFile/views.py index 69dfb996..eda43b76 100644 --- a/server/api/views/uploadFile/views.py +++ b/server/api/views/uploadFile/views.py @@ -1,8 +1,9 @@ from rest_framework.views import APIView from rest_framework.permissions import AllowAny, IsAuthenticated from rest_framework.response import Response -from rest_framework import status +from rest_framework import status, serializers as drf_serializers from rest_framework.generics import UpdateAPIView +from drf_spectacular.utils import extend_schema, inline_serializer, OpenApiResponse import pdfplumber from .models import UploadFile # Import your UploadFile model from .serializers import UploadFileSerializer @@ -12,9 +13,14 @@ import fitz from django.db import transaction from .title import generate_title +import logging + +logger = logging.getLogger(__name__) class UploadFileView(APIView): + serializer_class = UploadFileSerializer + def get_permissions(self): if self.request.method == 'GET': return [AllowAny()] # Public access @@ -28,6 +34,23 @@ def get(self, request, format=None): serializer = UploadFileSerializer(files, many=True) return Response(serializer.data) + @extend_schema( + request={'multipart/form-data': inline_serializer( + name='UploadFileRequest', + fields={ + 'file': drf_serializers.FileField(help_text='PDF file to upload'), + } + )}, + responses={ + 201: inline_serializer(name='UploadFileSuccess', fields={ + 'message': drf_serializers.CharField(), + 'file_id': drf_serializers.IntegerField(), + }), + 400: inline_serializer(name='UploadFileBadRequest', fields={ + 'message': drf_serializers.CharField(), + }), + } + ) def post(self, request, format=None): print(request.auth) print(f"UploadFileView post called. Path: {request.path}") @@ -124,9 +147,26 @@ def post(self, request, format=None): ) except Exception as e: # Handle potential errors + logger.exception("File upload failed for '%s': %s", uploaded_file.name, e) return Response({"message": f"Error processing file and embeddings: {str(e)}"}, status=status.HTTP_400_BAD_REQUEST) + @extend_schema( + request=inline_serializer(name='DeleteFileRequest', fields={ + 'guid': drf_serializers.CharField(help_text='GUID of file to delete'), + }), + responses={ + 200: inline_serializer(name='DeleteFileSuccess', fields={ + 'message': drf_serializers.CharField(), + }), + 403: inline_serializer(name='DeleteFileForbidden', fields={ + 'message': drf_serializers.CharField(), + }), + 404: inline_serializer(name='DeleteFileNotFound', fields={ + 'message': drf_serializers.CharField(), + }), + } + ) def delete(self, request, format=None): guid = request.data.get('guid') if not guid: @@ -157,6 +197,14 @@ def delete(self, request, format=None): class RetrieveUploadFileView(APIView): permission_classes = [AllowAny] + @extend_schema( + responses={ + (200, 'application/pdf'): OpenApiResponse(description='PDF file binary content'), + 404: inline_serializer(name='RetrieveFileNotFound', fields={ + 'message': drf_serializers.CharField(), + }), + } + ) def get(self, request, guid, format=None): try: file = UploadFile.objects.get(guid=guid) diff --git a/server/api/views/version/views.py b/server/api/views/version/views.py index b79d6577..af59e9e0 100644 --- a/server/api/views/version/views.py +++ b/server/api/views/version/views.py @@ -3,11 +3,18 @@ from rest_framework.permissions import AllowAny from rest_framework.views import APIView from rest_framework.response import Response +from rest_framework import serializers as drf_serializers +from drf_spectacular.utils import extend_schema, inline_serializer class VersionView(APIView): permission_classes = [AllowAny] + @extend_schema( + responses={200: inline_serializer(name='VersionResponse', fields={ + 'version': drf_serializers.CharField(), + })} + ) def get(self, request, *args, **kwargs): version = os.environ.get("VERSION") or "dev" return Response({"version": version}) diff --git a/server/balancer_backend/settings.py b/server/balancer_backend/settings.py index 9f917a94..a4ccaaae 100644 --- a/server/balancer_backend/settings.py +++ b/server/balancer_backend/settings.py @@ -51,6 +51,7 @@ "corsheaders", "rest_framework", "djoser", + 'drf_spectacular', ] MIDDLEWARE = [ @@ -195,8 +196,19 @@ "DEFAULT_AUTHENTICATION_CLASSES": ( "rest_framework_simplejwt.authentication.JWTAuthentication", ), + 'DEFAULT_SCHEMA_CLASS': 'drf_spectacular.openapi.AutoSchema', } +SPECTACULAR_SETTINGS = { + 'TITLE': 'Balancer API', + 'DESCRIPTION': 'API for the Balancer medication decision support tool', + 'VERSION': '1.0.0', + 'SERVE_INCLUDE_SCHEMA': False, + 'SECURITY': [{'jwtAuth': []}], + 'SWAGGER_UI_SETTINGS': { + 'persistAuthorization': True, + }, +} SIMPLE_JWT = { "AUTH_HEADER_TYPES": ("JWT",), diff --git a/server/balancer_backend/urls.py b/server/balancer_backend/urls.py index c8bd290d..55bd2032 100644 --- a/server/balancer_backend/urls.py +++ b/server/balancer_backend/urls.py @@ -6,6 +6,9 @@ # Import TemplateView for rendering templates from django.views.generic import TemplateView import importlib # Import the importlib module for dynamic module importing +from drf_spectacular.views import SpectacularAPIView, SpectacularSwaggerView, SpectacularRedocView + + # Define a list of URL patterns for the application # Keep admin outside /api/ prefix @@ -50,6 +53,9 @@ # Wrap all API routes under /api/ prefix urlpatterns += [ path("api/", include(api_urlpatterns)), + path("api/schema/", SpectacularAPIView.as_view(), name="schema"), + path("api/docs/", SpectacularSwaggerView.as_view(url_name="schema"), name="swagger-ui"), + path("api/redoc/", SpectacularRedocView.as_view(url_name="schema"), name="redoc"), ] import os diff --git a/server/requirements.txt b/server/requirements.txt index bbaf7bc9..880500c6 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -18,4 +18,5 @@ sentence_transformers PyMuPDF==1.24.0 Pillow pytesseract -anthropic \ No newline at end of file +anthropic +drf-spectacular \ No newline at end of file