forked from kacparas/bioinformatic_tools
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrename_encode.R
More file actions
30 lines (24 loc) · 1.16 KB
/
rename_encode.R
File metadata and controls
30 lines (24 loc) · 1.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
library(tidyverse)
library(glue)
# set working dir
setwd("/Volumes/T9/vilius/human/ipsc_comparison")
# read ENCODE metadata
metadata <- read.table("./report.tsv", sep = "\t",
skip = 1, header = TRUE)
# filter for relevant columns
metadata <- metadata[, c("Accession", "Files")]
# list .fastq.gz in the directory
files <- list.files() # List all files in the current directory
fastq_files <- str_subset(files, "ENCFF.*\\.fastq\\.gz") # Subset to only fastq.gz files.
fastq_files_no_ext <- str_replace(fastq_files, "\\.fastq\\.gz", "") # Use str_replace for conciseness and clarity
file_df <- as.data.frame(fastq_files_no_ext)
file_df$Accession <- NA
# rename .fastq.gz to {accession}_{original_file_name}.fastq.gz
for (file in file_df$fastq_files){
fastq_file_no_ext <- str_replace(file, "\\.fastq\\.gz", "")
file_df$Accession[str_detect(file_df$fastq_files, fastq_file_no_ext)] <- metadata$Accession[str_detect(metadata$Files, fastq_file_no_ext)]
old_file <- glue("{file}.fastq.gz")
accession <- file_df$Accession[str_detect(file_df$fastq_files_no_ext, file)]
new_file <- glue("{accession}_{file}.fastq.gz")
file.rename(old_file, new_file)
}