Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions seqBackupLib/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,13 @@ def build_fp_to_archive(fp: Path, has_index: bool, lane: str) -> list[Path]:
if has_index:
label.extend(["I1", "I2"])

rexp = "".join(["(L00", lane, "_)(R1)(_001.fastq.gz)$"])
modified_fp = [re.sub(rexp, "".join(["\\1", lab, "\\3"]), fp.name) for lab in label]
if "_L" in fp.name:
rexp = "".join(["(L00", lane, "_)(R1)(_001.fastq.gz)$"])
modified_fp = [
re.sub(rexp, "".join(["\\1", lab, "\\3"]), fp.name) for lab in label
]
else:
modified_fp = [fp.name.replace("R1", lab) for lab in label]
return [fp] + [fp.parent / n for n in modified_fp]


Expand Down Expand Up @@ -87,16 +92,21 @@ def backup_fastq(

# move the files to the archive location and remove permission
permission = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
md5s = []
for fp in RI_fps:
output_fp = write_dir / fp.name
if "_L" in fp.name:
dest_name = fp.name
else:
dest_name = fp.name.replace("_S0_", f"_S0_L{r1.lane.zfill(3)}_")
output_fp = write_dir / dest_name
shutil.copyfile(fp, output_fp)
output_fp.chmod(permission)
md5s.append((dest_name, return_md5(fp)))

# copy the sample sheet to destination folder
shutil.copyfile(sample_sheet_fp, write_dir / sample_sheet_fp.name)

# write md5sums to a file
md5s = [(fp.name, return_md5(fp)) for fp in RI_fps]
md5_out_fp = write_dir / ".".join([r1.build_archive_dir(), "md5"])
with open(md5_out_fp, "w") as md5_out:
[md5_out.write("\t".join(md5) + "\n") for md5 in md5s]
Expand Down
22 changes: 15 additions & 7 deletions seqBackupLib/illumina.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,21 @@ def _parse_header(self) -> dict[str, str]:

def _parse_fastq_file(self) -> dict[str, str]:
# Extract file name info
matches = re.match(
"Undetermined_S0_L00([1-8])_([RI])([12])_001.fastq.gz", self.filepath.name
)
keys2 = ("lane", "read_or_index", "read")
vals2 = dict((k, v) for k, v in zip(keys2, matches.groups()))

return vals2
filename = self.filepath.name
if matches := re.match(
"Undetermined_S0_L00([1-8])_([RI])([12])_001.fastq.gz", filename
):
keys2 = ("lane", "read_or_index", "read")
return dict(zip(keys2, matches.groups()))

if matches := re.match("Undetermined_S0_([RI])([12])_001.fastq.gz", filename):
return {
"lane": self.fastq_info["lane"],
"read_or_index": matches.group(1),
"read": matches.group(2),
}

raise ValueError(f"Unexpected FASTQ file name: {filename}")

@property
def lane(self) -> str:
Expand Down
31 changes: 31 additions & 0 deletions test/test_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ def test_build_fp_to_archive():
with pytest.raises(IOError):
build_fp_to_archive(Path("Undetermined_S0_L001_R2_001.fastq.gz"), True, "1")

archive = build_fp_to_archive(Path("Undetermined_S0_R1_001.fastq.gz"), True, "1")
assert archive == [
Path("Undetermined_S0_R1_001.fastq.gz"),
Path("Undetermined_S0_R2_001.fastq.gz"),
Path("Undetermined_S0_I1_001.fastq.gz"),
Path("Undetermined_S0_I2_001.fastq.gz"),
]


def test_return_md5(tmp_path):
test_file = tmp_path / "test.txt"
Expand Down Expand Up @@ -73,3 +81,26 @@ def test_backup_fastq(tmp_path, full_miseq_dir):
True,
100,
)


def test_backup_fastq_without_lane(tmp_path, full_miseq_dir):
raw = tmp_path / "raw_reads"
raw.mkdir(parents=True, exist_ok=True)
sample_sheet_fp = full_miseq_dir / "sample_sheet.csv"

for lab in ["R1", "R2", "I1", "I2"]:
(full_miseq_dir / f"Undetermined_S0_L001_{lab}_001.fastq.gz").rename(
full_miseq_dir / f"Undetermined_S0_{lab}_001.fastq.gz"
)

backup_fastq(
full_miseq_dir / "Undetermined_S0_R1_001.fastq.gz",
raw,
sample_sheet_fp,
True,
100,
)

out_dir = raw / "250407_M03543_0443_000000000-DTHBL_L001"
assert (out_dir / "Undetermined_S0_L001_R1_001.fastq.gz").is_file()
assert (out_dir / "Undetermined_S0_L001_R2_001.fastq.gz").is_file()
10 changes: 10 additions & 0 deletions test/test_illumina.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,13 @@ def test_illumina_dir(machine_type, request):
fp = request.getfixturevalue(fixture_name)

d = IlluminaDir(fp.name)


def test_illumina_fastq_without_lane(novaseq_dir):
original = novaseq_dir / "Undetermined_S0_L001_R1_001.fastq.gz"
renamed = novaseq_dir / "Undetermined_S0_R1_001.fastq.gz"
original.rename(renamed)
with gzip.open(renamed, "rt") as f:
r1 = IlluminaFastq(f)
assert r1.check_fp_vs_content()[0]
assert r1.build_archive_dir().endswith("L001")