Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion .github/workflows/verify_data_Integrity.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,51 @@ on:

name: Verify station data integrity

permissions:
contents: read
pull-requests: write
issues: write

jobs:
verify_migration_data:
name: Verify pushed migration data
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: cargo run --bin data_validator

- name: Run data validator
id: validate
run: |
if cargo run --bin data_validator; then
echo "result=success" >> "$GITHUB_OUTPUT"
else
echo "result=failure" >> "$GITHUB_OUTPUT"
fi

- name: Find existing comment
if: github.event_name == 'pull_request'
uses: peter-evans/find-comment@v3
id: find_comment
with:
issue-number: ${{ github.event.pull_request.number }}
comment-author: "github-actions[bot]"
body-includes: "<!-- data-validator -->"

- name: Post or update validation failure comment
if: github.event_name == 'pull_request' && steps.validate.outputs.result == 'failure'
uses: peter-evans/create-or-update-comment@v4
with:
issue-number: ${{ github.event.pull_request.number }}
comment-id: ${{ steps.find_comment.outputs.comment-id }}
body-path: /tmp/validation_report.md
edit-mode: replace

- name: Delete comment if validation passed
if: github.event_name == 'pull_request' && steps.validate.outputs.result == 'success' && steps.find_comment.outputs.comment-id != ''
run: gh api repos/${{ github.repository }}/issues/comments/${{ steps.find_comment.outputs.comment-id }} -X DELETE
env:
GH_TOKEN: ${{ github.token }}

- name: Fail job if validation failed
if: steps.validate.outputs.result == 'failure'
run: exit 1
109 changes: 84 additions & 25 deletions data_validator/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,55 +1,114 @@
use core::panic;
use std::collections::HashSet;
use std::path::Path;

use csv::{ReaderBuilder, StringRecord};

fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut has_err = false;
let mut invalid_station_ids: Vec<String> = Vec::new();
let mut invalid_type_ids: Vec<String> = Vec::new();

let data_path: &Path = Path::new("data");
let mut rdr = ReaderBuilder::new().from_path(data_path.join("3!stations.csv"))?;
let records: Vec<StringRecord> = rdr.records().filter_map(|row| row.ok()).collect();
let station_ids: Vec<u32> = records
let records: Vec<StringRecord> = rdr.records().collect::<Result<Vec<_>, _>>()?;
let station_ids: HashSet<u32> = records
.iter()
.map(|row| row.get(0).unwrap().parse::<u32>().unwrap())
.collect();

let mut rdr = ReaderBuilder::new().from_path(data_path.join("4!types.csv"))?;
let records: Vec<StringRecord> = rdr.records().filter_map(|row| row.ok()).collect();
let type_ids: Vec<u32> = records
let records: Vec<StringRecord> = rdr.records().collect::<Result<Vec<_>, _>>()?;
let type_ids: HashSet<u32> = records
.iter()
.map(|row| row.get(1).unwrap().parse::<u32>().unwrap())
.collect();

let mut rdr = ReaderBuilder::new().from_path(data_path.join("5!station_station_types.csv"))?;
let records: Vec<StringRecord> = rdr.records().filter_map(|row| row.ok()).collect();
let records: Vec<StringRecord> = rdr.records().collect::<Result<Vec<_>, _>>()?;

if let Some(invalid_record) = records
.iter()
.find(|row| !station_ids.contains(&row.get(1).unwrap().parse::<u32>().unwrap()))
{
println!(
"[INVALID] Unrecognized Station ID {:?} Found!",
invalid_record.get(1).unwrap()
);
has_err = true;
}
for record in &records {
let line = || record.iter().collect::<Vec<&str>>().join(",");

if let Some(invalid_record) = records
.iter()
.find(|row| !type_ids.contains(&row.get(2).unwrap().parse::<u32>().unwrap()))
{
println!(
"[INVALID] Unrecognized Type ID {:?} Found!",
invalid_record.get(2).unwrap()
);
has_err = true;
let station_cd: u32 = match record.get(1).and_then(|v| v.parse().ok()) {
Some(id) => id,
None => {
println!("[INVALID] Failed to parse station_cd from row: {}", line());
invalid_station_ids.push(line());
continue;
}
};
let type_cd: u32 = match record.get(2).and_then(|v| v.parse().ok()) {
Some(id) => id,
None => {
println!("[INVALID] Failed to parse type_cd from row: {}", line());
invalid_type_ids.push(line());
continue;
}
};

if !station_ids.contains(&station_cd) {
println!("[INVALID] Unrecognized Station ID {:?} Found!", station_cd);
invalid_station_ids.push(line());
}
if !type_ids.contains(&type_cd) {
println!("[INVALID] Unrecognized Type ID {:?} Found!", type_cd);
invalid_type_ids.push(line());
}
}

let has_err = !invalid_station_ids.is_empty() || !invalid_type_ids.is_empty();

if has_err {
let report = build_markdown_report(&invalid_station_ids, &invalid_type_ids);
let report_path =
std::env::var("VALIDATION_REPORT_PATH").unwrap_or("/tmp/validation_report.md".into());
std::fs::write(&report_path, &report)?;
eprintln!("Validation report written to {}", report_path);
panic!("[FATAL] Verification hasn't been passed!");
}

println!("[VALID] No errors reported.");
Ok(())
}

fn build_markdown_report(invalid_station_ids: &[String], invalid_type_ids: &[String]) -> String {
let mut md = String::new();

md.push_str("<!-- data-validator -->\n");
md.push_str("## :x: データ整合性チェックに失敗しました\n\n");
md.push_str("`5!station_station_types.csv` に存在しない外部キーへの参照が含まれています。\n\n");

if !invalid_station_ids.is_empty() {
md.push_str(&format!(
"### 不正な Station ID ({} 件)\n\n",
invalid_station_ids.len()
));
md.push_str("`3!stations.csv` に存在しない `station_cd` が参照されています。\n\n");
md.push_str("<details>\n<summary>該当レコード一覧</summary>\n\n");
md.push_str("| 行データ |\n|---|\n");
for line in invalid_station_ids {
md.push_str(&format!("| `{}` |\n", escape_markdown_cell(line)));
}
md.push_str("\n</details>\n\n");
}

if !invalid_type_ids.is_empty() {
md.push_str(&format!(
"### 不正な Type ID ({} 件)\n\n",
invalid_type_ids.len()
));
md.push_str("`4!types.csv` に存在しない `type_cd` が参照されています。\n\n");
md.push_str("<details>\n<summary>該当レコード一覧</summary>\n\n");
md.push_str("| 行データ |\n|---|\n");
for line in invalid_type_ids {
md.push_str(&format!("| `{}` |\n", escape_markdown_cell(line)));
}
md.push_str("\n</details>\n\n");
}

md
}

fn escape_markdown_cell(s: &str) -> String {
s.replace('`', "&#96;").replace('|', "&#124;")
}