Dive into Code

Discover code snippets, tutorials, and programming insights

Python

remove duplicate files in python

Remove duplicate files in python

python
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os
import hashlib
path = u'/path/to/folder'
def file_as_bytes(file):
    with file:
        return file.read()

hashes = []
fname = []
files = []
for root,d_names,f_names in os.walk(path):
    for f in f_names:
        for name in f_names:
            if name in files:
                print(name)
            files.append(name)
        fname.append(os.path.join(root, f))
        hash_string = hashlib.md5(file_as_bytes(open(os.path.join(root, f), 'rb'))).hexdigest()
        hashes.append(hash_string)

multiple_item = list(set([x for x in hashes if hashes.count(x) > 2]))

for file_path in fname:
    hash_string = hashlib.md5(file_as_bytes(open(file_path, 'rb'))).hexdigest()
    if hash_string in multiple_item:
        print('hash_string: {}  exists for file {}'.format(hash_string, file_path))
        # os.remove(file_path)
Python

clean directory contents

<p>The function <code>delete_directory_contents</code> you provided is designed to delete all the contents (files and subdirectories) within a specified directory. It uses the <code>os</code> and <code>shutil</code> modules in Python to perform …

python
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
import os,shutil
def delete_direcory_contents(directory_name):
    '''
    Delete directory contents
    '''
    for filename in os.listdir(directory_name):
        file_path = os.path.join(directory_name, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))