Skip to content

Commit cb7280e

Browse files
committed
Filter the directories/files hashed when using commands with patterns
1 parent b739df3 commit cb7280e

File tree

1 file changed

+60
-33
lines changed

1 file changed

+60
-33
lines changed

package.py

Lines changed: 60 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -243,32 +243,50 @@ def generate_content_hash(source_paths, hash_func=hashlib.sha256, log=None):
243243

244244
if log:
245245
log = log.getChild("hash")
246+
_log = log if log.isEnabledFor(DEBUG3) else None
246247

247248
hash_obj = hash_func()
248249

249-
for source_path in source_paths:
250-
if os.path.isdir(source_path):
251-
source_dir = source_path
252-
_log = log if log.isEnabledFor(DEBUG3) else None
253-
for source_file in list_files(source_dir, log=_log):
250+
for source_path, pf, prefix in source_paths:
251+
if pf is not None:
252+
for path_from_pattern in pf.filter(source_path, prefix):
253+
if os.path.isdir(path_from_pattern):
254+
# Hash only the path of the directory
255+
source_dir = path_from_pattern
256+
source_file = None
257+
else:
258+
source_dir = os.path.dirname(path_from_pattern)
259+
source_file = os.path.relpath(path_from_pattern, source_dir)
254260
update_hash(hash_obj, source_dir, source_file)
255261
if log:
256-
log.debug(os.path.join(source_dir, source_file))
262+
log.debug(path_from_pattern)
257263
else:
258-
source_dir = os.path.dirname(source_path)
259-
source_file = os.path.relpath(source_path, source_dir)
260-
update_hash(hash_obj, source_dir, source_file)
261-
if log:
262-
log.debug(source_path)
264+
if os.path.isdir(source_path):
265+
source_dir = source_path
266+
for source_file in list_files(source_dir, log=_log):
267+
update_hash(hash_obj, source_dir, source_file)
268+
if log:
269+
log.debug(os.path.join(source_dir, source_file))
270+
else:
271+
source_dir = os.path.dirname(source_path)
272+
source_file = os.path.relpath(source_path, source_dir)
273+
update_hash(hash_obj, source_dir, source_file)
274+
if log:
275+
log.debug(source_path)
263276

264277
return hash_obj
265278

266279

267-
def update_hash(hash_obj, file_root, file_path):
280+
def update_hash(hash_obj, file_root, file_path=None):
268281
"""
269-
Update a hashlib object with the relative path and contents of a file.
282+
Update a hashlib object with the relative path and, if the given
283+
file_path is not None, its content.
270284
"""
271285

286+
if file_path is None:
287+
hash_obj.update(file_root.encode())
288+
return
289+
272290
relative_path = os.path.join(file_root, file_path)
273291
hash_obj.update(relative_path.encode())
274292

@@ -562,7 +580,6 @@ class ZipContentFilter:
562580
def __init__(self, args):
563581
self._args = args
564582
self._rules = None
565-
self._excludes = set()
566583
self._log = logging.getLogger("zip")
567584

568585
def compile(self, patterns):
@@ -668,7 +685,7 @@ def hash(self, extra_paths):
668685
if not self._source_paths:
669686
raise ValueError("BuildPlanManager.plan() should be called first")
670687

671-
content_hash_paths = self._source_paths + extra_paths
688+
content_hash_paths = self._source_paths + [(p, None, None) for p in extra_paths]
672689

673690
# Generate a hash based on file names and content. Also use the
674691
# runtime value, build command, and content of the build paths
@@ -677,7 +694,7 @@ def hash(self, extra_paths):
677694
content_hash = generate_content_hash(content_hash_paths, log=self._log)
678695
return content_hash
679696

680-
def plan(self, source_path, query):
697+
def plan(self, source_path, query, log=None):
681698
claims = source_path
682699
if not isinstance(source_path, list):
683700
claims = [source_path]
@@ -686,11 +703,14 @@ def plan(self, source_path, query):
686703
build_plan = []
687704
build_step = []
688705

706+
if log:
707+
log = log.getChild("plan")
708+
689709
def step(*x):
690710
build_step.append(x)
691711

692-
def hash(path):
693-
source_paths.append(path)
712+
def hash(path, patterns=None, prefix=None):
713+
source_paths.append((path, patterns, prefix))
694714

695715
def pip_requirements_step(path, prefix=None, required=False, tmp_dir=None):
696716
command = runtime
@@ -759,7 +779,7 @@ def npm_requirements_step(path, prefix=None, required=False, tmp_dir=None):
759779
step("npm", runtime, requirements, prefix, tmp_dir)
760780
hash(requirements)
761781

762-
def commands_step(path, commands):
782+
def commands_step(path, commands, patterns):
763783
if not commands:
764784
return
765785

@@ -773,8 +793,6 @@ def commands_step(path, commands):
773793
for c in commands:
774794
if isinstance(c, str):
775795
if c.startswith(":zip"):
776-
if path:
777-
hash(path)
778796
if batch:
779797
step("sh", "\n".join(batch))
780798
batch.clear()
@@ -785,12 +803,18 @@ def commands_step(path, commands):
785803
prefix = prefix.strip()
786804
_path = os.path.normpath(_path)
787805
step("zip:embedded", _path, prefix)
806+
if path:
807+
hash(path, patterns, prefix)
788808
elif n == 2:
789809
_, _path = c
790810
_path = os.path.normpath(_path)
791811
step("zip:embedded", _path)
812+
if path:
813+
hash(path, patterns=patterns)
792814
elif n == 1:
793815
step("zip:embedded")
816+
if path:
817+
hash(path, patterns=patterns)
794818
else:
795819
raise ValueError(
796820
":zip invalid call signature, use: "
@@ -829,7 +853,7 @@ def commands_step(path, commands):
829853
if patterns:
830854
step("set:filter", patterns_list(self._args, patterns))
831855
if commands:
832-
commands_step(path, commands)
856+
commands_step(path, commands, patterns)
833857
else:
834858
prefix = claim.get("prefix_in_zip")
835859
pip_requirements = claim.get("pip_requirements")
@@ -883,23 +907,26 @@ def commands_step(path, commands):
883907
)
884908
if path:
885909
step("zip", path, prefix)
886-
if patterns:
887-
# Take patterns into account when computing hash
888-
pf = ZipContentFilter(args=self._args)
889-
pf.compile(patterns)
890-
891-
for path_from_pattern in pf.filter(path, prefix):
892-
hash(path_from_pattern)
893-
else:
894-
hash(path)
910+
hash(path, patterns, prefix)
895911
else:
896912
raise ValueError("Unsupported source_path item: {}".format(claim))
897913

898914
if build_step:
899915
build_plan.append(build_step)
900916
build_step = []
901917

902-
self._source_paths = source_paths
918+
if log.isEnabledFor(DEBUG3):
919+
log.debug("source_paths: %s", json.dumps(source_paths, indent=2))
920+
921+
for p, patterns, prefix in source_paths:
922+
if self._source_paths is None:
923+
self._source_paths = []
924+
pf = None
925+
if patterns is not None:
926+
pf = ZipContentFilter(args=self._args)
927+
pf.compile(patterns)
928+
self._source_paths.append((p, pf, prefix))
929+
903930
return build_plan
904931

905932
def execute(self, build_plan, zip_stream, query):
@@ -1713,7 +1740,7 @@ def prepare_command(args):
17131740
docker = query.docker
17141741

17151742
bpm = BuildPlanManager(args, log=log)
1716-
build_plan = bpm.plan(source_path, query)
1743+
build_plan = bpm.plan(source_path, query, log)
17171744

17181745
if log.isEnabledFor(DEBUG2):
17191746
log.debug("BUILD_PLAN: %s", json.dumps(build_plan, indent=2))

0 commit comments

Comments
 (0)