From 5f70cccfcb4d1a90857e3690ed828133f8c7552f Mon Sep 17 00:00:00 2001 From: David Bosschaert Date: Fri, 28 Feb 2025 14:33:44 -0800 Subject: [PATCH 1/5] Initial changes for Windows --- src/file_util.rs | 33 +++++++++++++++++++++++---------- src/xml_util.rs | 12 +++++++----- src/zip_util.rs | 25 +++++++++++++++++++------ 3 files changed, 49 insertions(+), 21 deletions(-) diff --git a/src/file_util.rs b/src/file_util.rs index bb42435..08adee4 100644 --- a/src/file_util.rs +++ b/src/file_util.rs @@ -1,24 +1,37 @@ -use std::path::Path; +use std::path::{MAIN_SEPARATOR, MAIN_SEPARATOR_STR, Path}; pub struct FileUtil { } impl FileUtil { + pub fn normalize_path(s: &str) -> String { + let src_char = if MAIN_SEPARATOR == '/' { + "\\" + } else { + "/" + }; + + s.replace(src_char, MAIN_SEPARATOR_STR) + } + pub fn get_sub_path(path: &Path, base_dir: &str) -> String { + let nbase_dir = FileUtil::normalize_path(base_dir); + let base; - if base_dir.ends_with("/") { - base = base_dir.to_owned(); + if nbase_dir.ends_with(MAIN_SEPARATOR_STR) { + base = nbase_dir; } else { - base = base_dir.to_owned() + "/"; + base = nbase_dir + MAIN_SEPARATOR_STR; } let sub_path; let full_path = path.to_string_lossy(); - if full_path.starts_with(&base) { - sub_path = &full_path[base.len()..]; + let nfull_path = FileUtil::normalize_path(&full_path); + if nfull_path.starts_with(&base) { + sub_path = &nfull_path[base.len()..]; } else { - sub_path = &full_path; + sub_path = &nfull_path; } sub_path.to_owned() @@ -34,20 +47,20 @@ mod tests { fn test_get_sub_path() { let p = Path::new("/some/where/on/the/rainbow.docx"); let b = "/some/where/on/"; - assert_eq!("the/rainbow.docx", FileUtil::get_sub_path(p, b)); + assert_eq!(FileUtil::normalize_path("the/rainbow.docx"), FileUtil::get_sub_path(p, b)); } #[test] fn test_get_sub_path1() { let p = Path::new("/some/where/on/the/rainbow.docx"); let b = "/some/where/on"; - assert_eq!("the/rainbow.docx", FileUtil::get_sub_path(p, b)); + assert_eq!(FileUtil::normalize_path("the/rainbow.docx"), FileUtil::get_sub_path(p, b)); } #[test] fn test_get_sub_path2() { let b = "/some/where/on/"; let p = Path::new("/elsewhere/cloud.docx"); - assert_eq!("/elsewhere/cloud.docx", FileUtil::get_sub_path(p, b)); + assert_eq!(FileUtil::normalize_path("/elsewhere/cloud.docx"), FileUtil::get_sub_path(p, b)); } } \ No newline at end of file diff --git a/src/xml_util.rs b/src/xml_util.rs index 1620a75..6f358c0 100644 --- a/src/xml_util.rs +++ b/src/xml_util.rs @@ -7,7 +7,7 @@ use regex::Regex; use std::collections::{BTreeMap, HashMap}; use std::fs::{File, self}; use std::io::{BufReader, BufWriter}; -use std::path::{Path, PathBuf}; +use std::path::{Path, PathBuf, MAIN_SEPARATOR, MAIN_SEPARATOR_STR}; use std::str; use uuid::Uuid; use unicase::UniCase; @@ -162,7 +162,7 @@ impl XMLUtil { new_fn.push_str(&f[last_slash..]); new_fn.push('.'); new_fn.push_str(rels_extension); - rels_files.push(new_fn); + rels_files.push(FileUtil::normalize_path(&new_fn)); } rels_files @@ -175,9 +175,9 @@ impl XMLUtil { /// `pattern` and `replacement` are used to search/replace operations. /// `output_file` optionally specifies a different output file for replacement operations. fn snr_xml(mode: Mode, dir: &str, src_file: &str, files: Option>, output_file: Option<&str>) { - let mut base_dir = dir.to_owned(); - if !dir.ends_with("/") { - base_dir.push('/'); + let mut base_dir = FileUtil::normalize_path(dir); + if !base_dir.ends_with(MAIN_SEPARATOR_STR) { + base_dir.push(MAIN_SEPARATOR); } for entry in WalkDir::new(dir).into_iter() @@ -991,6 +991,7 @@ mod tests { #[serial] // This test has to run serially to avoid multiple tests to capture stdout fn test_grep() { let out = capture_stdout!(XMLUtil::grep_xml("./src/test/test_tree2", "doc123.docx", "[oe]re")); + println!("out: {}", out); assert!(out.contains("doc123.docx: And some some some more text")); assert!(out.contains("doc123.docx: Something here")); assert!(out.contains("doc123.docx: Here’s a hyperlink:")); @@ -1239,6 +1240,7 @@ mod tests { // Check that the replacement worked as expected let after = fs::read_to_string(testdir.join("word/document2.xml"))?; + println!("After: {}", after); assert!(after.contains("And zzz zzz more text")); assert!(after.contains("and then zzz")); assert!(after.contains("zzzthing here")); diff --git a/src/zip_util.rs b/src/zip_util.rs index 39e079a..dc0aef7 100644 --- a/src/zip_util.rs +++ b/src/zip_util.rs @@ -10,7 +10,7 @@ pub struct ZipUtil { } impl ZipUtil { - pub fn read_zip( + pub fn read_zip( zip_file: &str, dest_dir: &str ) -> zip::result::ZipResult<()> { @@ -151,10 +151,20 @@ impl ZipUtil { mod tests { use crate::file_util::FileUtil; use super::ZipUtil; - use std::{path::Path, fs, io}; + use std::{path::MAIN_SEPARATOR, path::MAIN_SEPARATOR_STR, path::Path, fs, io}; use walkdir::WalkDir; use testdir::testdir; + fn normalize_path(s: &str) -> String { + let src_char = if MAIN_SEPARATOR == '/' { + "\\" + } else { + "/" + }; + + s.replace(src_char, MAIN_SEPARATOR_STR) + } + #[test] fn test_unzip() -> io::Result<()> { let zipfile = "./src/test/test_zip.zip"; @@ -165,13 +175,16 @@ mod tests { let wd = WalkDir::new(&outdir); let extracts: Vec = wd.into_iter() .map(|e| FileUtil::get_sub_path(&e.unwrap().path(), &outdir.to_string_lossy())) - .filter(|e| !e.starts_with("/")) + .filter(|e| !e.starts_with(MAIN_SEPARATOR_STR)) .filter(|e| e.contains('.')) .collect(); + println!("Extracts: {:?}", extracts); + println!("Separator: {}", MAIN_SEPARATOR_STR); + assert!(extracts.contains(&"foo.test.txt".into())); assert!(extracts.contains(&"empty.file".into())); - assert!(extracts.contains(&"sub/sub/[Content_Types].xml".into())); + assert!(extracts.contains(&normalize_path("sub/sub/[Content_Types].xml"))); assert_eq!(3, extracts.len(), "Should be only 3 files"); let empty_file = Path::new(&outdir).join("empty.file"); @@ -209,14 +222,14 @@ mod tests { let extracts: Vec = WalkDir::new(&expldir).into_iter() .map(|e| FileUtil::get_sub_path(&e.unwrap().path(), &expldir.to_string_lossy())) - .filter(|e| !e.starts_with("/")) + .filter(|e| !e.starts_with(MAIN_SEPARATOR_STR)) .filter(|e| e.contains('.')) .collect(); assert_eq!(3, extracts.len()); assert!(extracts.contains(&"foo.test.txt".into())); assert!(extracts.contains(&"empty.file".into())); - assert!(extracts.contains(&"sub/sub/[Content_Types].xml".into())); + assert!(extracts.contains(&normalize_path("sub/sub/[Content_Types].xml"))); let empty_file = Path::new(&expldir).join("empty.file"); assert!(empty_file.is_file()); From 31bb7dd194b46e07ed7c64ff5ebe8d8ed7df65df Mon Sep 17 00:00:00 2001 From: David Bosschaert Date: Fri, 28 Feb 2025 15:51:43 -0800 Subject: [PATCH 2/5] More windows support changes --- src/xml_util.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xml_util.rs b/src/xml_util.rs index 6f358c0..0cbbf5a 100644 --- a/src/xml_util.rs +++ b/src/xml_util.rs @@ -154,7 +154,7 @@ impl XMLUtil { let mut rels_files = vec!(); for f in files { - let last_slash = f.rfind('/').expect(&f); + let last_slash = f.rfind(MAIN_SEPARATOR).expect(&f); let mut new_fn = String::new(); new_fn.push_str(&f[..last_slash]); new_fn.push_str("/_"); @@ -838,7 +838,7 @@ impl XMLUtil { } else { rel_pn = pn; } - mappings.insert(rel_pn.to_string(), + mappings.insert(FileUtil::normalize_path(rel_pn), str::from_utf8(cv.value.as_ref()).unwrap().to_string()); } } From 147e2489b2847655338767df9f9b455f7da2a697 Mon Sep 17 00:00:00 2001 From: David Bosschaert Date: Fri, 28 Feb 2025 15:55:45 +0000 Subject: [PATCH 3/5] Cleanup --- src/xml_util.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/xml_util.rs b/src/xml_util.rs index 0cbbf5a..fc705ed 100644 --- a/src/xml_util.rs +++ b/src/xml_util.rs @@ -991,7 +991,6 @@ mod tests { #[serial] // This test has to run serially to avoid multiple tests to capture stdout fn test_grep() { let out = capture_stdout!(XMLUtil::grep_xml("./src/test/test_tree2", "doc123.docx", "[oe]re")); - println!("out: {}", out); assert!(out.contains("doc123.docx: And some some some more text")); assert!(out.contains("doc123.docx: Something here")); assert!(out.contains("doc123.docx: Here’s a hyperlink:")); @@ -1240,7 +1239,6 @@ mod tests { // Check that the replacement worked as expected let after = fs::read_to_string(testdir.join("word/document2.xml"))?; - println!("After: {}", after); assert!(after.contains("And zzz zzz more text")); assert!(after.contains("and then zzz")); assert!(after.contains("zzzthing here")); From fc3a5536f035246fc257d96ec541db41d42821a9 Mon Sep 17 00:00:00 2001 From: David Bosschaert Date: Fri, 28 Feb 2025 16:03:52 +0000 Subject: [PATCH 4/5] More cleanup --- Cargo.toml | 2 +- src/zip_util.rs | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0ef678f..b89279f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "docxtools" -version = "0.9.1-SNAPSHOT" +version = "0.10.0-SNAPSHOT" edition = "2021" authors = ["David Bosschaert "] license = "Apache-2.0" diff --git a/src/zip_util.rs b/src/zip_util.rs index dc0aef7..ab3afbc 100644 --- a/src/zip_util.rs +++ b/src/zip_util.rs @@ -10,7 +10,7 @@ pub struct ZipUtil { } impl ZipUtil { - pub fn read_zip( + pub fn read_zip( zip_file: &str, dest_dir: &str ) -> zip::result::ZipResult<()> { @@ -157,9 +157,9 @@ mod tests { fn normalize_path(s: &str) -> String { let src_char = if MAIN_SEPARATOR == '/' { - "\\" - } else { - "/" + "\\" + } else { + "/" }; s.replace(src_char, MAIN_SEPARATOR_STR) @@ -179,9 +179,6 @@ mod tests { .filter(|e| e.contains('.')) .collect(); - println!("Extracts: {:?}", extracts); - println!("Separator: {}", MAIN_SEPARATOR_STR); - assert!(extracts.contains(&"foo.test.txt".into())); assert!(extracts.contains(&"empty.file".into())); assert!(extracts.contains(&normalize_path("sub/sub/[Content_Types].xml"))); From b07907a2e89063e75aa7e3d564efd31fb05fe6fb Mon Sep 17 00:00:00 2001 From: David Bosschaert Date: Fri, 28 Feb 2025 16:07:39 +0000 Subject: [PATCH 5/5] Reduce duplication --- src/zip_util.rs | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/zip_util.rs b/src/zip_util.rs index ab3afbc..bf285e4 100644 --- a/src/zip_util.rs +++ b/src/zip_util.rs @@ -151,20 +151,10 @@ impl ZipUtil { mod tests { use crate::file_util::FileUtil; use super::ZipUtil; - use std::{path::MAIN_SEPARATOR, path::MAIN_SEPARATOR_STR, path::Path, fs, io}; + use std::{path::MAIN_SEPARATOR_STR, path::Path, fs, io}; use walkdir::WalkDir; use testdir::testdir; - fn normalize_path(s: &str) -> String { - let src_char = if MAIN_SEPARATOR == '/' { - "\\" - } else { - "/" - }; - - s.replace(src_char, MAIN_SEPARATOR_STR) - } - #[test] fn test_unzip() -> io::Result<()> { let zipfile = "./src/test/test_zip.zip"; @@ -181,7 +171,7 @@ mod tests { assert!(extracts.contains(&"foo.test.txt".into())); assert!(extracts.contains(&"empty.file".into())); - assert!(extracts.contains(&normalize_path("sub/sub/[Content_Types].xml"))); + assert!(extracts.contains(&FileUtil::normalize_path("sub/sub/[Content_Types].xml"))); assert_eq!(3, extracts.len(), "Should be only 3 files"); let empty_file = Path::new(&outdir).join("empty.file"); @@ -226,7 +216,7 @@ mod tests { assert_eq!(3, extracts.len()); assert!(extracts.contains(&"foo.test.txt".into())); assert!(extracts.contains(&"empty.file".into())); - assert!(extracts.contains(&normalize_path("sub/sub/[Content_Types].xml"))); + assert!(extracts.contains(&FileUtil::normalize_path("sub/sub/[Content_Types].xml"))); let empty_file = Path::new(&expldir).join("empty.file"); assert!(empty_file.is_file());