diff --git a/.github/workflows/verify_data_Integrity.yml b/.github/workflows/verify_data_Integrity.yml index d4435509..25bb32e5 100644 --- a/.github/workflows/verify_data_Integrity.yml +++ b/.github/workflows/verify_data_Integrity.yml @@ -9,10 +9,51 @@ on: name: Verify station data integrity +permissions: + contents: read + pull-requests: write + issues: write + jobs: verify_migration_data: name: Verify pushed migration data runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - run: cargo run --bin data_validator + + - name: Run data validator + id: validate + run: | + if cargo run --bin data_validator; then + echo "result=success" >> "$GITHUB_OUTPUT" + else + echo "result=failure" >> "$GITHUB_OUTPUT" + fi + + - name: Find existing comment + if: github.event_name == 'pull_request' + uses: peter-evans/find-comment@v3 + id: find_comment + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: "github-actions[bot]" + body-includes: "" + + - name: Post or update validation failure comment + if: github.event_name == 'pull_request' && steps.validate.outputs.result == 'failure' + uses: peter-evans/create-or-update-comment@v4 + with: + issue-number: ${{ github.event.pull_request.number }} + comment-id: ${{ steps.find_comment.outputs.comment-id }} + body-path: /tmp/validation_report.md + edit-mode: replace + + - name: Delete comment if validation passed + if: github.event_name == 'pull_request' && steps.validate.outputs.result == 'success' && steps.find_comment.outputs.comment-id != '' + run: gh api repos/${{ github.repository }}/issues/comments/${{ steps.find_comment.outputs.comment-id }} -X DELETE + env: + GH_TOKEN: ${{ github.token }} + + - name: Fail job if validation failed + if: steps.validate.outputs.result == 'failure' + run: exit 1 diff --git a/data_validator/src/main.rs b/data_validator/src/main.rs index e29f9672..a9bc1b7a 100644 --- a/data_validator/src/main.rs +++ b/data_validator/src/main.rs @@ -1,55 +1,114 @@ use core::panic; +use std::collections::HashSet; use std::path::Path; use csv::{ReaderBuilder, StringRecord}; fn main() -> Result<(), Box> { - let mut has_err = false; + let mut invalid_station_ids: Vec = Vec::new(); + let mut invalid_type_ids: Vec = Vec::new(); let data_path: &Path = Path::new("data"); let mut rdr = ReaderBuilder::new().from_path(data_path.join("3!stations.csv"))?; - let records: Vec = rdr.records().filter_map(|row| row.ok()).collect(); - let station_ids: Vec = records + let records: Vec = rdr.records().collect::, _>>()?; + let station_ids: HashSet = records .iter() .map(|row| row.get(0).unwrap().parse::().unwrap()) .collect(); let mut rdr = ReaderBuilder::new().from_path(data_path.join("4!types.csv"))?; - let records: Vec = rdr.records().filter_map(|row| row.ok()).collect(); - let type_ids: Vec = records + let records: Vec = rdr.records().collect::, _>>()?; + let type_ids: HashSet = records .iter() .map(|row| row.get(1).unwrap().parse::().unwrap()) .collect(); let mut rdr = ReaderBuilder::new().from_path(data_path.join("5!station_station_types.csv"))?; - let records: Vec = rdr.records().filter_map(|row| row.ok()).collect(); + let records: Vec = rdr.records().collect::, _>>()?; - if let Some(invalid_record) = records - .iter() - .find(|row| !station_ids.contains(&row.get(1).unwrap().parse::().unwrap())) - { - println!( - "[INVALID] Unrecognized Station ID {:?} Found!", - invalid_record.get(1).unwrap() - ); - has_err = true; - } + for record in &records { + let line = || record.iter().collect::>().join(","); - if let Some(invalid_record) = records - .iter() - .find(|row| !type_ids.contains(&row.get(2).unwrap().parse::().unwrap())) - { - println!( - "[INVALID] Unrecognized Type ID {:?} Found!", - invalid_record.get(2).unwrap() - ); - has_err = true; + let station_cd: u32 = match record.get(1).and_then(|v| v.parse().ok()) { + Some(id) => id, + None => { + println!("[INVALID] Failed to parse station_cd from row: {}", line()); + invalid_station_ids.push(line()); + continue; + } + }; + let type_cd: u32 = match record.get(2).and_then(|v| v.parse().ok()) { + Some(id) => id, + None => { + println!("[INVALID] Failed to parse type_cd from row: {}", line()); + invalid_type_ids.push(line()); + continue; + } + }; + + if !station_ids.contains(&station_cd) { + println!("[INVALID] Unrecognized Station ID {:?} Found!", station_cd); + invalid_station_ids.push(line()); + } + if !type_ids.contains(&type_cd) { + println!("[INVALID] Unrecognized Type ID {:?} Found!", type_cd); + invalid_type_ids.push(line()); + } } + let has_err = !invalid_station_ids.is_empty() || !invalid_type_ids.is_empty(); + if has_err { + let report = build_markdown_report(&invalid_station_ids, &invalid_type_ids); + let report_path = + std::env::var("VALIDATION_REPORT_PATH").unwrap_or("/tmp/validation_report.md".into()); + std::fs::write(&report_path, &report)?; + eprintln!("Validation report written to {}", report_path); panic!("[FATAL] Verification hasn't been passed!"); } println!("[VALID] No errors reported."); Ok(()) } + +fn build_markdown_report(invalid_station_ids: &[String], invalid_type_ids: &[String]) -> String { + let mut md = String::new(); + + md.push_str("\n"); + md.push_str("## :x: データ整合性チェックに失敗しました\n\n"); + md.push_str("`5!station_station_types.csv` に存在しない外部キーへの参照が含まれています。\n\n"); + + if !invalid_station_ids.is_empty() { + md.push_str(&format!( + "### 不正な Station ID ({} 件)\n\n", + invalid_station_ids.len() + )); + md.push_str("`3!stations.csv` に存在しない `station_cd` が参照されています。\n\n"); + md.push_str("
\n該当レコード一覧\n\n"); + md.push_str("| 行データ |\n|---|\n"); + for line in invalid_station_ids { + md.push_str(&format!("| `{}` |\n", escape_markdown_cell(line))); + } + md.push_str("\n
\n\n"); + } + + if !invalid_type_ids.is_empty() { + md.push_str(&format!( + "### 不正な Type ID ({} 件)\n\n", + invalid_type_ids.len() + )); + md.push_str("`4!types.csv` に存在しない `type_cd` が参照されています。\n\n"); + md.push_str("
\n該当レコード一覧\n\n"); + md.push_str("| 行データ |\n|---|\n"); + for line in invalid_type_ids { + md.push_str(&format!("| `{}` |\n", escape_markdown_cell(line))); + } + md.push_str("\n
\n\n"); + } + + md +} + +fn escape_markdown_cell(s: &str) -> String { + s.replace('`', "`").replace('|', "|") +}