summaryrefslogtreecommitdiff
path: root/replace_same_file_to_hard_link.py
blob: 12829f0d827c7f3b4cdac0158de7ffee4b31b738 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
function: This script resolves locales's BEP inconsistence,
          it scans a specific path and replaces the same file 
          in that path with a hard link.Avoid different language 
          packs each time due to concurrent compilation.
"""
import os
import sys
import time

all_file = {}

def cmp_file(f1, f2):
    """compare two files in bytes"""
    st1 = os.stat(f1)
    st2 = os.stat(f2)

    bufsize = 8 * 1024
    with open(f1, 'rb') as fp1, open(f2, 'rb') as fp2:
        while True:
            b1 = fp1.read(bufsize)
            b2 = fp2.read(bufsize)
            if b1 != b2:
                return False
            if not b1:
                return True


def search_all_inode(dir_path):
    """recursively traverse the directory to group all"""
    files = os.listdir(dir_path)
    
    for fi in files:
        fi_d = os.path.join(dir_path, fi)
        if os.path.isdir(fi_d):
            search_all_inode(fi_d)
        else:
            size = os.stat(fi_d).st_size
            if size in all_file:
                all_file[size].append(fi_d)
            else:
                all_file[size] = [fi_d]


def deal_one(file_paths):
    """traverse the file array, delete the same file and create a hard link"""
    file_count = len(file_paths)
    inode_files = {}

    for i in range(0, file_count):
        for j in range(i + 1, file_count):
            file1 = file_paths[i]
            file2 = file_paths[j]
            
            file1_inode = os.stat(file1).st_ino
            file2_inode = os.stat(file2).st_ino

            if file1_inode not in inode_files:
                inode_files[file1_inode] = file1
                
            if file1_inode == file2_inode:
                continue

            if cmp_file(file1, file2):
                print('deal same file:', file1, '==', file2)
                os.remove(file2)
                os.link(file1, file2)
            else:
                if file2_inode not in inode_files:
                    inode_files[file2_inode] = file2


def deal_files():
    """get file array and processed one by one"""
    for size in all_file:
        file_paths = all_file[size]
        if len(file_paths) > 1:
            deal_one(file_paths)


def usage():
    """print usage"""
    print("""
rm_same_file: Replace the same file with a hard link.

rm_same_file.py [target path]

    """)

if __name__ == "__main__":
    if len(sys.argv) == 2:
        search_all_inode(sys.argv[1])
        deal_files()
    else:
        usage()
        sys.exit()