Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
name: CI
on:
push:
branches:
- "main"
pull_request:

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}

jobs:
lint-python:
name: Lint Python
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
cache: "pip"

- name: Run flake8
uses: py-actions/flake8@v2

validate-compute-block:
name: Validate Compute Block Config
runs-on: ubuntu-latest
needs: lint-python
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5

- name: Intall dependencies
run: |
pip install -r requirements.txt

- name: Check cbcs
run: |
python3 - <<'EOF'
import main

from scystream.sdk.config import load_config, get_compute_block
from scystream.sdk.config.config_loader import _compare_configs
from pathlib import Path

CBC_PATH = Path("cbc.yaml")

if not CBC_PATH.exists():
raise FileNotFoundError("cbc.yaml not found in repo root.")

block_from_code = get_compute_block()
block_from_yaml = load_config(str(CBC_PATH))

_compare_configs(block_from_code, block_from_yaml)

print("cbc.yaml matches python code definition")
EOF

build:
name: Build docker image
runs-on: ubuntu-latest
needs: validate-compute-block
permissions:
contents: read
packages: write
steps:
- name: Checkout Repository
uses: actions/checkout@v4

- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata for docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/database-interactions
tags: |
type=ref, event=pr
type=raw, value=latest, enable=${{ (github.ref == format('refs/heads/{0}', 'main')) }}

- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

20 changes: 13 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
FROM python:3.10
FROM python:3.13-bookworm

COPY requirements.txt ./
WORKDIR /app

RUN pip install --trusted-host pypi.python.org -r requirements.txt
COPY requirements.txt .

RUN apt-get update && apt-get install -y openjdk-17-jdk
RUN apt-get update && apt-get install -y \
build-essential \
gcc \
python3-dev \
openjdk-17-jdk \
&& rm -rf /var/lib/apt/lists/*

COPY . ./
RUN pip install --upgrade pip
RUN pip install -r requirements.txt

# run the project
CMD ["python3", "-m", "main"]
COPY . .

CMD ["python", "-m", "main"]
62 changes: 28 additions & 34 deletions cbc.yaml
Original file line number Diff line number Diff line change
@@ -1,64 +1,58 @@
author: Anna Beckers
description: Query the PatstatDB to a CSV
docker_image: registry.git.rwth-aachen.de/tim-institute/literature-analysis-pipeline/patstat-compute-block/patstatcb
name: Patstat Compute Block
description: Query DB to a CSV
docker_image: ghcr.io/rwth-time/database-interactions/database-interactions
name: Database Interactions
entrypoints:
run_query:
description: Run the query by directly passing an SQL Query
run_query_from_file:
description: Run the query using a file containing the query
envs:
PATSTAT_DB: patstat
PATSTAT_HOST: 134.130.176.139
PATSTAT_PORT: 5432
PATSTAT_USER: guest
DB_DSN: null
inputs:
query_str:
query_file:
config:
QUERY: ''
description: Pass your SQL-Query here
type: custom
query_file_BUCKET_NAME: null
query_file_FILE_EXT: null
query_file_FILE_NAME: null
query_file_FILE_PATH: null
query_file_S3_ACCESS_KEY: null
query_file_S3_HOST: null
query_file_S3_PORT: null
query_file_S3_SECRET_KEY: null
description: txt file containing the SQL query
type: file
outputs:
csv_output:
config:
csv_output_BUCKET_NAME: null
csv_output_FILE_EXT: null
csv_output_FILE_EXT: csv
csv_output_FILE_NAME: null
csv_output_FILE_PATH: null
csv_output_S3_ACCESS_KEY: null
csv_output_S3_HOST: null
csv_output_S3_PORT: null
csv_output_S3_SECRET_KEY: null
description: The CSV-File of the queries result
description: CSV of the queries result
type: file
run_query_from_file:
description: Run the query using a txt file
run_query_from_string:
description: Run the query directly by passing an SQL Query
envs:
PATSTAT_DB: patstat
PATSTAT_HOST: 134.130.176.139
PATSTAT_PORT: 5432
PATSTAT_USER: guest
DB_DSN: null
inputs:
query_file:
query_str:
config:
query_file_BUCKET_NAME: null
query_file_FILE_EXT: null
query_file_FILE_NAME: null
query_file_FILE_PATH: null
query_file_S3_ACCESS_KEY: null
query_file_S3_HOST: null
query_file_S3_PORT: null
query_file_S3_SECRET_KEY: null
description: Pass the txt file here including your SQL Query
type: file
QUERY: ''
description: Pass your SQL-Query here
type: custom
outputs:
csv_output:
config:
csv_output_BUCKET_NAME: null
csv_output_FILE_EXT: null
csv_output_FILE_EXT: csv
csv_output_FILE_NAME: null
csv_output_FILE_PATH: null
csv_output_S3_ACCESS_KEY: null
csv_output_S3_HOST: null
csv_output_S3_PORT: null
csv_output_S3_SECRET_KEY: null
description: The CSV-File of the queries result
description: CSV of the queries result
type: file
6 changes: 5 additions & 1 deletion interactions/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
)


def query_db(query: str, db_settings: EnvSettings, output_file_name: str) -> None:
def query_db(
query: str,
db_settings: EnvSettings,
output_file_name: str
) -> None:
try:
engine = create_engine(db_settings.DB_DSN)
with engine.connect() as conn:
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
scystream-sdk==1.2.1
SQLAlchemy==2.0.43
psycopg2-binary==2.9.10
PYMSQL==1.1.2
PyMySQL==1.1.2
duckdb==1.4.1
cx-Oracle==8.3.0
pyodbc==5.2.0
snowflake-sqlalchemy==1.7.7
oracledb==3.4.2
Loading