1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
function: This script resolves locales's BEP inconsistence,
it scans a specific path and replaces the same file
in that path with a hard link.Avoid different language
packs each time due to concurrent compilation.
"""
import os
import sys
import time
all_file = {}
def cmp_file(f1, f2):
"""compare two files in bytes"""
st1 = os.stat(f1)
st2 = os.stat(f2)
bufsize = 8 * 1024
with open(f1, 'rb') as fp1, open(f2, 'rb') as fp2:
while True:
b1 = fp1.read(bufsize)
b2 = fp2.read(bufsize)
if b1 != b2:
return False
if not b1:
return True
def search_all_inode(dir_path):
"""recursively traverse the directory to group all"""
files = os.listdir(dir_path)
for fi in files:
fi_d = os.path.join(dir_path, fi)
if os.path.isdir(fi_d):
search_all_inode(fi_d)
else:
size = os.stat(fi_d).st_size
if size in all_file:
all_file[size].append(fi_d)
else:
all_file[size] = [fi_d]
def deal_one(file_paths):
"""traverse the file array, delete the same file and create a hard link"""
file_count = len(file_paths)
inode_files = {}
for i in range(0, file_count):
for j in range(i + 1, file_count):
file1 = file_paths[i]
file2 = file_paths[j]
file1_inode = os.stat(file1).st_ino
file2_inode = os.stat(file2).st_ino
if file1_inode not in inode_files:
inode_files[file1_inode] = file1
if file1_inode == file2_inode:
continue
if cmp_file(file1, file2):
print('deal same fileļ¼', file1, '==', file2)
os.remove(file2)
os.link(file1, file2)
else:
if file2_inode not in inode_files:
inode_files[file2_inode] = file2
def deal_files():
"""get file array and processed one by one"""
for size in all_file:
file_paths = all_file[size]
if len(file_paths) > 1:
deal_one(file_paths)
def usage():
"""print usage"""
print("""
rm_same_file: Replace the same file with a hard link.
rm_same_file.py [target path]
""")
if __name__ == "__main__":
if len(sys.argv) == 2:
search_all_inode(sys.argv[1])
deal_files()
else:
usage()
sys.exit()
|