blob: e89314ea7016f38b80fcf8ab53c5916dc9633f51 [file] [log] [blame]
darin@apple.comfe915df2020-08-02 15:12:40 +00001#!/usr/bin/env python
2#
3# Copyright (C) 2020 Apple Inc. All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions
7# are met:
8# 1. Redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer.
10# 2. Redistributions in binary form must reproduce the above copyright
11# notice, this list of conditions and the following disclaimer in the
12# documentation and/or other materials provided with the distribution.
13#
14# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
15# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
25import argparse, hashlib, os, sys
26from collections import defaultdict
27
28parser = argparse.ArgumentParser(description='Find any files that have the same contents.')
29args = parser.parse_args()
30
31filesByDigest = defaultdict(list)
32
33width = 80
34
35root = os.getcwd()
36for subroot, directories, files in os.walk(os.getcwd()):
37 prefix = subroot[len(root) + 1:]
38 for file in files:
39 path = os.path.join(prefix, file)
40 blockSize = 65536
41 hash = hashlib.md5()
42 with open(os.path.join(subroot, file), "rb") as handle:
43 bytes = handle.read(blockSize)
44 while len(bytes) > 0:
45 hash.update(bytes)
46 bytes = handle.read(blockSize)
47 filesByDigest[hash.digest()].append(path)
48
49duplicates = list()
50
51for fileList in filesByDigest.values():
52 if len(fileList) != 1:
53 duplicates.append(sorted(fileList))
54
55for fileList in sorted(duplicates):
ap@apple.com0dcde702021-01-05 17:59:56 +000056 print(fileList)