diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..9ba0ea3 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,96 @@ +name: CI +on: + push: + branches: + - "main" + pull_request: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + lint-python: + name: Lint Python + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + cache: "pip" + + - name: Run flake8 + uses: py-actions/flake8@v2 + + validate-compute-block: + name: Validate Compute Block Config + runs-on: ubuntu-latest + needs: lint-python + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + + - name: Intall dependencies + run: | + pip install -r requirements.txt + + - name: Check cbcs + run: | + python3 - <<'EOF' + import main + + from scystream.sdk.config import load_config, get_compute_block + from scystream.sdk.config.config_loader import _compare_configs + from pathlib import Path + + CBC_PATH = Path("cbc.yaml") + + if not CBC_PATH.exists(): + raise FileNotFoundError("cbc.yaml not found in repo root.") + + block_from_code = get_compute_block() + block_from_yaml = load_config(str(CBC_PATH)) + + _compare_configs(block_from_code, block_from_yaml) + + print("cbc.yaml matches python code definition") + EOF + + build: + name: Build docker image + runs-on: ubuntu-latest + needs: validate-compute-block + permissions: + contents: read + packages: write + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata for docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/database-interactions + tags: | + type=ref, event=pr + type=raw, value=latest, enable=${{ (github.ref == format('refs/heads/{0}', 'main')) }} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + diff --git a/Dockerfile b/Dockerfile index fa47a3e..aab174d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,19 @@ -FROM python:3.10 +FROM python:3.13-bookworm -COPY requirements.txt ./ +WORKDIR /app -RUN pip install --trusted-host pypi.python.org -r requirements.txt +COPY requirements.txt . -RUN apt-get update && apt-get install -y openjdk-17-jdk +RUN apt-get update && apt-get install -y \ + build-essential \ + gcc \ + python3-dev \ + openjdk-17-jdk \ + && rm -rf /var/lib/apt/lists/* -COPY . ./ +RUN pip install --upgrade pip +RUN pip install -r requirements.txt -# run the project -CMD ["python3", "-m", "main"] +COPY . . +CMD ["python", "-m", "main"] diff --git a/cbc.yaml b/cbc.yaml index b7fbded..009a345 100644 --- a/cbc.yaml +++ b/cbc.yaml @@ -1,64 +1,58 @@ author: Anna Beckers -description: Query the PatstatDB to a CSV -docker_image: registry.git.rwth-aachen.de/tim-institute/literature-analysis-pipeline/patstat-compute-block/patstatcb -name: Patstat Compute Block +description: Query DB to a CSV +docker_image: ghcr.io/rwth-time/database-interactions/database-interactions +name: Database Interactions entrypoints: - run_query: - description: Run the query by directly passing an SQL Query + run_query_from_file: + description: Run the query using a file containing the query envs: - PATSTAT_DB: patstat - PATSTAT_HOST: 134.130.176.139 - PATSTAT_PORT: 5432 - PATSTAT_USER: guest + DB_DSN: null inputs: - query_str: + query_file: config: - QUERY: '' - description: Pass your SQL-Query here - type: custom + query_file_BUCKET_NAME: null + query_file_FILE_EXT: null + query_file_FILE_NAME: null + query_file_FILE_PATH: null + query_file_S3_ACCESS_KEY: null + query_file_S3_HOST: null + query_file_S3_PORT: null + query_file_S3_SECRET_KEY: null + description: txt file containing the SQL query + type: file outputs: csv_output: config: csv_output_BUCKET_NAME: null - csv_output_FILE_EXT: null + csv_output_FILE_EXT: csv csv_output_FILE_NAME: null csv_output_FILE_PATH: null csv_output_S3_ACCESS_KEY: null csv_output_S3_HOST: null csv_output_S3_PORT: null csv_output_S3_SECRET_KEY: null - description: The CSV-File of the queries result + description: CSV of the queries result type: file - run_query_from_file: - description: Run the query using a txt file + run_query_from_string: + description: Run the query directly by passing an SQL Query envs: - PATSTAT_DB: patstat - PATSTAT_HOST: 134.130.176.139 - PATSTAT_PORT: 5432 - PATSTAT_USER: guest + DB_DSN: null inputs: - query_file: + query_str: config: - query_file_BUCKET_NAME: null - query_file_FILE_EXT: null - query_file_FILE_NAME: null - query_file_FILE_PATH: null - query_file_S3_ACCESS_KEY: null - query_file_S3_HOST: null - query_file_S3_PORT: null - query_file_S3_SECRET_KEY: null - description: Pass the txt file here including your SQL Query - type: file + QUERY: '' + description: Pass your SQL-Query here + type: custom outputs: csv_output: config: csv_output_BUCKET_NAME: null - csv_output_FILE_EXT: null + csv_output_FILE_EXT: csv csv_output_FILE_NAME: null csv_output_FILE_PATH: null csv_output_S3_ACCESS_KEY: null csv_output_S3_HOST: null csv_output_S3_PORT: null csv_output_S3_SECRET_KEY: null - description: The CSV-File of the queries result + description: CSV of the queries result type: file diff --git a/interactions/query.py b/interactions/query.py index 18d7773..25587c5 100644 --- a/interactions/query.py +++ b/interactions/query.py @@ -8,7 +8,11 @@ ) -def query_db(query: str, db_settings: EnvSettings, output_file_name: str) -> None: +def query_db( + query: str, + db_settings: EnvSettings, + output_file_name: str +) -> None: try: engine = create_engine(db_settings.DB_DSN) with engine.connect() as conn: diff --git a/requirements.txt b/requirements.txt index b74d3be..8c65b43 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ scystream-sdk==1.2.1 SQLAlchemy==2.0.43 psycopg2-binary==2.9.10 -PYMSQL==1.1.2 +PyMySQL==1.1.2 duckdb==1.4.1 -cx-Oracle==8.3.0 pyodbc==5.2.0 snowflake-sqlalchemy==1.7.7 +oracledb==3.4.2