From 36a763a7825e17a4f39d6b410d53cba4978c2930 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 08:08:36 +0000 Subject: [PATCH 1/5] =?UTF-8?q?data=5Fvalidator=E3=81=AE=E4=B8=8D=E6=95=B4?= =?UTF-8?q?=E5=90=88=E6=A4=9C=E5=87=BA=E6=99=82=E3=81=ABPR=E3=82=B3?= =?UTF-8?q?=E3=83=A1=E3=83=B3=E3=83=88=E3=81=B8=E5=86=85=E8=A8=B3=E3=82=92?= =?UTF-8?q?=E6=8A=95=E7=A8=BF=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - data_validatorを改善し、最初の1件だけでなく全ての不整合レコードを収集するように変更 - 不整合検出時にMarkdownレポートファイルを生成 - verify_data_Integrity.ymlワークフローにPRコメント投稿機能を追加 - 失敗時: 不整合の内訳をPRコメントとして投稿 - 成功時: 過去の不整合コメントがあれば削除 - visualize_stopping_patternsと同様のパターン(peter-evans/find-comment, create-or-update-comment)を採用 https://claude.ai/code/session_01Lr3k5y8UYcH26a8hndo8ek --- .github/workflows/verify_data_Integrity.yml | 43 ++++++++++- data_validator/src/main.rs | 80 ++++++++++++++++----- 2 files changed, 103 insertions(+), 20 deletions(-) diff --git a/.github/workflows/verify_data_Integrity.yml b/.github/workflows/verify_data_Integrity.yml index d4435509..25bb32e5 100644 --- a/.github/workflows/verify_data_Integrity.yml +++ b/.github/workflows/verify_data_Integrity.yml @@ -9,10 +9,51 @@ on: name: Verify station data integrity +permissions: + contents: read + pull-requests: write + issues: write + jobs: verify_migration_data: name: Verify pushed migration data runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - run: cargo run --bin data_validator + + - name: Run data validator + id: validate + run: | + if cargo run --bin data_validator; then + echo "result=success" >> "$GITHUB_OUTPUT" + else + echo "result=failure" >> "$GITHUB_OUTPUT" + fi + + - name: Find existing comment + if: github.event_name == 'pull_request' + uses: peter-evans/find-comment@v3 + id: find_comment + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: "github-actions[bot]" + body-includes: "" + + - name: Post or update validation failure comment + if: github.event_name == 'pull_request' && steps.validate.outputs.result == 'failure' + uses: peter-evans/create-or-update-comment@v4 + with: + issue-number: ${{ github.event.pull_request.number }} + comment-id: ${{ steps.find_comment.outputs.comment-id }} + body-path: /tmp/validation_report.md + edit-mode: replace + + - name: Delete comment if validation passed + if: github.event_name == 'pull_request' && steps.validate.outputs.result == 'success' && steps.find_comment.outputs.comment-id != '' + run: gh api repos/${{ github.repository }}/issues/comments/${{ steps.find_comment.outputs.comment-id }} -X DELETE + env: + GH_TOKEN: ${{ github.token }} + + - name: Fail job if validation failed + if: steps.validate.outputs.result == 'failure' + run: exit 1 diff --git a/data_validator/src/main.rs b/data_validator/src/main.rs index e29f9672..8fb4f706 100644 --- a/data_validator/src/main.rs +++ b/data_validator/src/main.rs @@ -4,7 +4,8 @@ use std::path::Path; use csv::{ReaderBuilder, StringRecord}; fn main() -> Result<(), Box> { - let mut has_err = false; + let mut invalid_station_ids: Vec = Vec::new(); + let mut invalid_type_ids: Vec = Vec::new(); let data_path: &Path = Path::new("data"); let mut rdr = ReaderBuilder::new().from_path(data_path.join("3!stations.csv"))?; @@ -24,32 +25,73 @@ fn main() -> Result<(), Box> { let mut rdr = ReaderBuilder::new().from_path(data_path.join("5!station_station_types.csv"))?; let records: Vec = rdr.records().filter_map(|row| row.ok()).collect(); - if let Some(invalid_record) = records - .iter() - .find(|row| !station_ids.contains(&row.get(1).unwrap().parse::().unwrap())) - { - println!( - "[INVALID] Unrecognized Station ID {:?} Found!", - invalid_record.get(1).unwrap() - ); - has_err = true; + for record in &records { + let station_cd = record.get(1).unwrap(); + if !station_ids.contains(&station_cd.parse::().unwrap()) { + let line = record.iter().collect::>().join(","); + println!("[INVALID] Unrecognized Station ID {:?} Found!", station_cd); + invalid_station_ids.push(line); + } } - if let Some(invalid_record) = records - .iter() - .find(|row| !type_ids.contains(&row.get(2).unwrap().parse::().unwrap())) - { - println!( - "[INVALID] Unrecognized Type ID {:?} Found!", - invalid_record.get(2).unwrap() - ); - has_err = true; + for record in &records { + let type_cd = record.get(2).unwrap(); + if !type_ids.contains(&type_cd.parse::().unwrap()) { + let line = record.iter().collect::>().join(","); + println!("[INVALID] Unrecognized Type ID {:?} Found!", type_cd); + invalid_type_ids.push(line); + } } + let has_err = !invalid_station_ids.is_empty() || !invalid_type_ids.is_empty(); + if has_err { + let report = build_markdown_report(&invalid_station_ids, &invalid_type_ids); + let report_path = + std::env::var("VALIDATION_REPORT_PATH").unwrap_or("/tmp/validation_report.md".into()); + std::fs::write(&report_path, &report)?; + eprintln!("Validation report written to {}", report_path); panic!("[FATAL] Verification hasn't been passed!"); } println!("[VALID] No errors reported."); Ok(()) } + +fn build_markdown_report(invalid_station_ids: &[String], invalid_type_ids: &[String]) -> String { + let mut md = String::new(); + + md.push_str("\n"); + md.push_str("## :x: データ整合性チェックに失敗しました\n\n"); + md.push_str("`5!station_station_types.csv` に存在しない外部キーへの参照が含まれています。\n\n"); + + if !invalid_station_ids.is_empty() { + md.push_str(&format!( + "### 不正な Station ID ({} 件)\n\n", + invalid_station_ids.len() + )); + md.push_str("`3!stations.csv` に存在しない `station_cd` が参照されています。\n\n"); + md.push_str("
\n該当レコード一覧\n\n"); + md.push_str("| 行データ |\n|---|\n"); + for line in invalid_station_ids { + md.push_str(&format!("| `{}` |\n", line)); + } + md.push_str("\n
\n\n"); + } + + if !invalid_type_ids.is_empty() { + md.push_str(&format!( + "### 不正な Type ID ({} 件)\n\n", + invalid_type_ids.len() + )); + md.push_str("`4!types.csv` に存在しない `type_cd` が参照されています。\n\n"); + md.push_str("
\n該当レコード一覧\n\n"); + md.push_str("| 行データ |\n|---|\n"); + for line in invalid_type_ids { + md.push_str(&format!("| `{}` |\n", line)); + } + md.push_str("\n
\n\n"); + } + + md +} From 6c31f022ddf79b3e57df8d2446869e6d284a7f19 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 09:18:21 +0000 Subject: [PATCH 2/5] =?UTF-8?q?data=5Fvalidator:=20Vec=E2=86=92HashSet?= =?UTF-8?q?=E5=8C=96=E3=81=A8=E3=83=AB=E3=83=BC=E3=83=97=E7=B5=B1=E5=90=88?= =?UTF-8?q?=E3=81=A7=E6=A4=9C=E8=A8=BC=E3=82=92=E5=8A=B9=E7=8E=87=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - station_ids/type_idsをVecからHashSetに変更しO(1)ルックアップに - 2つの別々のループを1つに統合し、レコード毎にstation_cd/type_cdを一度だけパース https://claude.ai/code/session_01Lr3k5y8UYcH26a8hndo8ek --- data_validator/src/main.rs | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/data_validator/src/main.rs b/data_validator/src/main.rs index 8fb4f706..f67d3fa1 100644 --- a/data_validator/src/main.rs +++ b/data_validator/src/main.rs @@ -1,4 +1,5 @@ use core::panic; +use std::collections::HashSet; use std::path::Path; use csv::{ReaderBuilder, StringRecord}; @@ -10,14 +11,14 @@ fn main() -> Result<(), Box> { let data_path: &Path = Path::new("data"); let mut rdr = ReaderBuilder::new().from_path(data_path.join("3!stations.csv"))?; let records: Vec = rdr.records().filter_map(|row| row.ok()).collect(); - let station_ids: Vec = records + let station_ids: HashSet = records .iter() .map(|row| row.get(0).unwrap().parse::().unwrap()) .collect(); let mut rdr = ReaderBuilder::new().from_path(data_path.join("4!types.csv"))?; let records: Vec = rdr.records().filter_map(|row| row.ok()).collect(); - let type_ids: Vec = records + let type_ids: HashSet = records .iter() .map(|row| row.get(1).unwrap().parse::().unwrap()) .collect(); @@ -26,20 +27,17 @@ fn main() -> Result<(), Box> { let records: Vec = rdr.records().filter_map(|row| row.ok()).collect(); for record in &records { - let station_cd = record.get(1).unwrap(); - if !station_ids.contains(&station_cd.parse::().unwrap()) { - let line = record.iter().collect::>().join(","); + let station_cd: u32 = record.get(1).unwrap().parse().unwrap(); + let type_cd: u32 = record.get(2).unwrap().parse().unwrap(); + let line = || record.iter().collect::>().join(","); + + if !station_ids.contains(&station_cd) { println!("[INVALID] Unrecognized Station ID {:?} Found!", station_cd); - invalid_station_ids.push(line); + invalid_station_ids.push(line()); } - } - - for record in &records { - let type_cd = record.get(2).unwrap(); - if !type_ids.contains(&type_cd.parse::().unwrap()) { - let line = record.iter().collect::>().join(","); + if !type_ids.contains(&type_cd) { println!("[INVALID] Unrecognized Type ID {:?} Found!", type_cd); - invalid_type_ids.push(line); + invalid_type_ids.push(line()); } } From 587f58bdae28751c27951f6d1bafcd0ac1cb8b56 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 09:27:35 +0000 Subject: [PATCH 3/5] data_validator: escape backticks and pipes in Markdown table cells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Raw CSV values containing backticks or pipe characters would break the Markdown table layout in the validation report. Add an escape_markdown_cell helper that replaces ` → ` and | → |. https://claude.ai/code/session_01Lr3k5y8UYcH26a8hndo8ek --- data_validator/src/main.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/data_validator/src/main.rs b/data_validator/src/main.rs index f67d3fa1..41d21e9d 100644 --- a/data_validator/src/main.rs +++ b/data_validator/src/main.rs @@ -72,7 +72,7 @@ fn build_markdown_report(invalid_station_ids: &[String], invalid_type_ids: &[Str md.push_str("
\n該当レコード一覧\n\n"); md.push_str("| 行データ |\n|---|\n"); for line in invalid_station_ids { - md.push_str(&format!("| `{}` |\n", line)); + md.push_str(&format!("| `{}` |\n", escape_markdown_cell(line))); } md.push_str("\n
\n\n"); } @@ -86,10 +86,14 @@ fn build_markdown_report(invalid_station_ids: &[String], invalid_type_ids: &[Str md.push_str("
\n該当レコード一覧\n\n"); md.push_str("| 行データ |\n|---|\n"); for line in invalid_type_ids { - md.push_str(&format!("| `{}` |\n", line)); + md.push_str(&format!("| `{}` |\n", escape_markdown_cell(line))); } md.push_str("\n
\n\n"); } md } + +fn escape_markdown_cell(s: &str) -> String { + s.replace('`', "`").replace('|', "|") +} From 1f357598b808105a3910ef293244e98d9ae58fb4 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 09:35:10 +0000 Subject: [PATCH 4/5] data_validator: propagate CSV parse errors instead of silently dropping them Replace filter_map(|row| row.ok()) with collect::, _>>()? at all three CSV-reading sites so malformed rows surface as errors rather than being silently skipped. https://claude.ai/code/session_01Lr3k5y8UYcH26a8hndo8ek --- data_validator/src/main.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data_validator/src/main.rs b/data_validator/src/main.rs index 41d21e9d..f4f33860 100644 --- a/data_validator/src/main.rs +++ b/data_validator/src/main.rs @@ -10,21 +10,21 @@ fn main() -> Result<(), Box> { let data_path: &Path = Path::new("data"); let mut rdr = ReaderBuilder::new().from_path(data_path.join("3!stations.csv"))?; - let records: Vec = rdr.records().filter_map(|row| row.ok()).collect(); + let records: Vec = rdr.records().collect::, _>>()?; let station_ids: HashSet = records .iter() .map(|row| row.get(0).unwrap().parse::().unwrap()) .collect(); let mut rdr = ReaderBuilder::new().from_path(data_path.join("4!types.csv"))?; - let records: Vec = rdr.records().filter_map(|row| row.ok()).collect(); + let records: Vec = rdr.records().collect::, _>>()?; let type_ids: HashSet = records .iter() .map(|row| row.get(1).unwrap().parse::().unwrap()) .collect(); let mut rdr = ReaderBuilder::new().from_path(data_path.join("5!station_station_types.csv"))?; - let records: Vec = rdr.records().filter_map(|row| row.ok()).collect(); + let records: Vec = rdr.records().collect::, _>>()?; for record in &records { let station_cd: u32 = record.get(1).unwrap().parse().unwrap(); From 5a99b322bbbfe7a604708651b29efb07342064b5 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 09:55:37 +0000 Subject: [PATCH 5/5] data_validator: replace unwrap() with safe parsing in validation loop Use match with record.get().and_then(parse) instead of chained unwrap() calls for station_cd and type_cd. Missing or unparseable values now log the row and push it into the appropriate invalid list instead of panicking. https://claude.ai/code/session_01Lr3k5y8UYcH26a8hndo8ek --- data_validator/src/main.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/data_validator/src/main.rs b/data_validator/src/main.rs index f4f33860..a9bc1b7a 100644 --- a/data_validator/src/main.rs +++ b/data_validator/src/main.rs @@ -27,10 +27,25 @@ fn main() -> Result<(), Box> { let records: Vec = rdr.records().collect::, _>>()?; for record in &records { - let station_cd: u32 = record.get(1).unwrap().parse().unwrap(); - let type_cd: u32 = record.get(2).unwrap().parse().unwrap(); let line = || record.iter().collect::>().join(","); + let station_cd: u32 = match record.get(1).and_then(|v| v.parse().ok()) { + Some(id) => id, + None => { + println!("[INVALID] Failed to parse station_cd from row: {}", line()); + invalid_station_ids.push(line()); + continue; + } + }; + let type_cd: u32 = match record.get(2).and_then(|v| v.parse().ok()) { + Some(id) => id, + None => { + println!("[INVALID] Failed to parse type_cd from row: {}", line()); + invalid_type_ids.push(line()); + continue; + } + }; + if !station_ids.contains(&station_cd) { println!("[INVALID] Unrecognized Station ID {:?} Found!", station_cd); invalid_station_ids.push(line());