SoftwareUnderstanding · dgarijo · Feb 13, 2026 · Feb 6, 2026 · Feb 6, 2026 · Feb 6, 2026
diff --git a/.github/actions/README.md b/.github/actions/README.md
@@ -0,0 +1,166 @@
+# SW Metadata Bot - GitHub Action
+
+A GitHub Action to automatically analyze repository metadata quality and create issues with improvement suggestions.
+
+Part of the [CodeMetaSoft](https://w3id.org/codemetasoft) project to improve research software metadata quality.
+
+## Features
+
+- **Automated Metadata Analysis**: Detects metadata pitfalls using the metacheck tool
+- **Issue Generation**: Creates detailed issues with improvement suggestions
+- **Dry-run Support**: Review generated issues before they're posted
+- **Flexible Input**: Analyze single repositories or process batch files
+- **Artifact Support**: Save and review analysis results
+
+## Available Actions
+
+### `metacheck-analysis`
+
+Run metadata analysis on repositories to detect pitfalls.
+
+```yaml
+uses: codemetasoft/sw-metadata-bot/.github/actions/metacheck-analysis@v1
+with:
+  input: 'path/to/repos.json'  # Required: URL or JSON file
+  pitfalls-output: 'pitfalls'   # Optional: Output directory (default: pitfalls_outputs)
+  analysis-output: 'results.json' # Optional: Output file (default: analysis_results.json)
+  skip-somef: 'false'           # Optional: Skip SoMEF execution (default: false)
+  threshold: '0.8'              # Optional: SoMEF confidence threshold (default: 0.8)
+```
+
+**Outputs:**
+- `pitfalls-output-dir`: Path to directory with pitfalls JSON-LD files
+- `analysis-output-file`: Path to analysis summary JSON file
+
+### `create-issues`
+
+Create GitHub/GitLab issues based on analysis results.
+
+```yaml
+uses: codemetasoft/sw-metadata-bot/.github/actions/create-issues@v1
+with:
+  pitfalls-output-dir: 'pitfalls'  # Required: Directory with pitfalls files
+  issues-dir: 'issues'             # Optional: Output directory (default: issues_output)
+  dry-run: 'true'                  # Optional: Dry-run mode (default: true)
+  log-level: 'INFO'                # Optional: Logging level (default: INFO)
+```
+
+**Outputs:**
+- `issues-dir`: Path to directory with generated issue files
+
+## Usage Examples
+
+### Example 1: Analyze and Review Issues (Dry-run)
+
+```yaml
+name: Metadata Analysis
+on:
+  schedule:
+    - cron: '0 0 * * 1'  # Weekly on Monday
+  workflow_dispatch:
+
+jobs:
+  analyze:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Analyze metadata
+        uses: codemetasoft/sw-metadata-bot/.github/actions/metacheck-analysis@v1
+        id: analysis
+        with:
+          input: 'repos.json'
+
+      - name: Generate issues
+        uses: codemetasoft/sw-metadata-bot/.github/actions/create-issues@v1
+        with:
+          pitfalls-output-dir: ${{ steps.analysis.outputs.pitfalls-output-dir }}
+          dry-run: 'true'
+
+      - name: Upload results
+        uses: actions/upload-artifact@v4
+        with:
+          name: analysis-results
+          path: |
+            issues_output/
+            analysis_results.json
+```
+
+### Example 2: Analyze Single Repository
+
+```yaml
+- name: Analyze single repo
+  uses: codemetasoft/sw-metadata-bot/.github/actions/metacheck-analysis@v1
+  with:
+    input: 'https://github.com/my-org/my-repo'
+    pitfalls-output: 'my-repo-analysis'
+```
+
+### Example 3: Create Issues in Production
+
+```yaml
+- name: Create issues
+  uses: codemetasoft/sw-metadata-bot/.github/actions/create-issues@v1
+  with:
+    pitfalls-output-dir: 'pitfalls'
+    dry-run: 'false'  # Actually create issues
+    log-level: 'DEBUG'
+```
+
+## Input File Format
+
+For batch analysis, provide a JSON file with repository URLs:
+
+```json
+[
+  "https://github.com/owner/repo1",
+  "https://github.com/owner/repo2",
+  "https://gitlab.com/group/repo3"
+]
+```
+
+## Generated Output
+
+### Analysis Results (`analysis_results.json`)
+
+Summary statistics and metadata quality metrics.
+
+### Pitfalls Files (`pitfalls_outputs/`)
+
+JSON-LD files containing detailed metadata issues detected for each repository.
+
+### Issues Files (`issues_output/`)
+
+Generated issue bodies ready to be posted to repositories. Each file corresponds to a repository that needs improvements.
+
+## Authentication
+
+When creating actual issues (dry-run: false):
+- **GitHub**: Set `GITHUB_TOKEN` environment variable
+- **GitLab**: Set `GITLAB_TOKEN` environment variable
+
+```yaml
+- name: Create issues
+  uses: codemetasoft/sw-metadata-bot/.github/actions/create-issues@v1
+  env:
+    GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+    GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }}
+  with:
+    pitfalls-output-dir: 'pitfalls'
+    dry-run: 'false'
+```
+
+## Requirements
+
+- Python 3.10+
+- The action installs all dependencies via `uv`
+
+## License
+
+MIT
+
+## Questions?
+
+For issues, suggestions, or questions about the bot:
+- GitHub: [codemetasoft/sw-metadata-bot](https://github.com/codemetasoft/sw-metadata-bot)
+- Documentation: [CodeMetaSoft](https://w3id.org/codemetasoft)
diff --git a/.github/actions/create-issues/action.yml b/.github/actions/create-issues/action.yml
@@ -0,0 +1,65 @@
+name: 'SW Metadata Bot - Create Issues'
+description: 'Create GitHub/GitLab issues based on metadata analysis results'
+author: 'Tom François'
+
+inputs:
+  pitfalls-output-dir:
+    description: 'Directory containing pitfalls JSON-LD files from metacheck analysis'
+    required: true
+  issues-dir:
+    description: 'Directory to save issue bodies and reports'
+    required: false
+    default: 'issues_output'
+  dry-run:
+    description: 'Simulate issue creation without actually posting to repositories'
+    required: false
+    default: 'true'
+  log-level:
+    description: 'Logging level (DEBUG, INFO, WARNING, ERROR)'
+    required: false
+    default: 'INFO'
+
+outputs:
+  issues-dir:
+    description: 'Directory containing the generated issue files'
+    value: ${{ inputs.issues-dir }}
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+
+    - name: Install uv
+      uses: astral-sh/setup-uv@v3
+      with:
+        version: "latest"
+
+    - name: Install dependencies
+      shell: bash
+      run: uv sync
+
+    # Download the artifacts from Job 1
+    - name: Download pitfalls artifacts
+      uses: actions/download-artifact@v4
+      with:
+        name: pitfalls-outputs
+        path: pitfalls_outputs/
+
+    - name: Create output directory
+      shell: bash
+      run: mkdir -p "${{ inputs.issues-dir }}"
+
+    - name: Create issues
+      shell: bash
+      run: |
+        uv run sw-metadata-bot create-issues \
+          --pitfalls-output-dir "${{ inputs.pitfalls-output-dir }}" \
+          --issues-dir "${{ inputs.issues-dir }}" \
+          --log-level "${{ inputs.log-level }}" \
+          ${{ inputs.dry-run == 'true' && '--dry-run' || '' }}
diff --git a/.github/actions/metacheck-analysis/action.yml b/.github/actions/metacheck-analysis/action.yml
@@ -0,0 +1,102 @@
+name: 'SW Metadata Bot - Metacheck Analysis'
+description: 'Run metacheck analysis to detect metadata pitfalls in repositories'
+author: 'Tom François'
+
+inputs:
+  input:
+    description: 'Repository URL or JSON file path containing repositories to analyze'
+    required: true
+  skip-somef:
+    description: 'Skip SoMEF execution and analyze existing SoMEF output files directly'
+    required: false
+    default: 'false'
+  pitfalls-output:
+    description: 'Directory to store pitfall JSON-LD files'
+    required: false
+    default: 'pitfalls_outputs'
+  analysis-output:
+    description: 'File path for summary results'
+    required: false
+    default: 'analysis_results.json'
+  threshold:
+    description: 'SoMEF confidence threshold (default: 0.8)'
+    required: false
+    default: '0.8'
+
+outputs:
+  pitfalls-output-dir:
+    description: 'Directory containing the pitfalls JSON-LD files'
+    value: ${{ inputs.pitfalls-output }}
+  analysis-output-file:
+    description: 'Path to the analysis results JSON file'
+    value: ${{ inputs.analysis-output }}
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+
+    - name: Install uv
+      uses: astral-sh/setup-uv@v3
+      with:
+        version: "latest"
+
+    - name: Install dependencies
+      shell: bash
+      run: uv sync
+
+    - name: Create output directory
+      shell: bash
+      run: mkdir -p "${{ inputs.pitfalls-output }}"
+
+    - name: Create NLTK data directory
+      shell: bash
+      run: mkdir -p "${{ github.workspace }}/.nltk_data"
+
+    - name: Configure SoMEF before running analysis
+      shell: bash
+      env:
+        NLTK_DATA: "${{ github.workspace }}/.nltk_data"
+        SOMEF_CONFIGURATION_FILE: ".somef_config.json"
+      run: |
+        uv run python -m nltk.downloader -d "$NLTK_DATA" wordnet
+        uv run python -m nltk.downloader -d "$NLTK_DATA" omw-1.4
+        uv run somef configure -a
+
+    - name: Run metacheck analysis
+      shell: bash
+      env:
+        NLTK_DATA: "${{ github.workspace }}/.nltk_data"
+        SOMEF_CONFIGURATION_FILE: ".somef_config.json"
+      run: |
+        set -euo pipefail
+        if [[ "${{ inputs.skip-somef }}" == "true" ]]; then
+          SKIP_SOMEF="--skip-somef"
+        else
+          SKIP_SOMEF=""
+        fi
+
+        uv run sw-metadata-bot metacheck \
+          --input "${{ inputs.input }}" \
+          --pitfalls-output "${{ inputs.pitfalls-output }}" \
+          --analysis-output "${{ inputs.analysis-output }}" \
+          --threshold "${{ inputs.threshold }}" \
+          ${SKIP_SOMEF}
+
+        echo "Pitfalls output dir: ${{ inputs.pitfalls-output }}"
+        ls -la "${{ inputs.pitfalls-output }}" || true
+
+    - name: Upload pitfalls artifacts
+      uses: actions/upload-artifact@v4
+      with:
+        name: pitfalls-outputs
+        path: pitfalls_outputs/
+        retention-days: 1  # Clean up after 1 day
+
+
diff --git a/.github/workflows/example-usage.yml b/.github/workflows/example-usage.yml
@@ -0,0 +1,41 @@
+name: Example - Run SW Metadata Bot on This Repo
+
+on:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  issues: write
+
+jobs:
+  analyze-and-create-issues:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Run metacheck analysis (current repo)
+        uses: SoftwareUnderstanding/sw-metadata-bot/.github/actions/metacheck-analysis@v1
+        id: analysis
+        with:
+          input: https://github.com/${{ github.repository }}
+          pitfalls-output: pitfalls_outputs
+          analysis-output: analysis_results.json
+
+      - name: Create issues (dry-run)
+        uses: SoftwareUnderstanding/sw-metadata-bot/.github/actions/create-issues@v1
+        with:
+          pitfalls-output-dir: ${{ steps.analysis.outputs.pitfalls-output-dir }}
+          issues-dir: issues_output
+          dry-run: 'true'
+          log-level: INFO
+
+      - name: Upload analysis artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: sw-metadata-bot-results
+          path: |
+            analysis_results.json
+            pitfalls_outputs/
+            issues_output/