summaryrefslogtreecommitdiff
path: root/replace_same_file_to_hard_link.py
diff options
context:
space:
mode:
Diffstat (limited to 'replace_same_file_to_hard_link.py')
-rw-r--r--replace_same_file_to_hard_link.py98
1 files changed, 98 insertions, 0 deletions
diff --git a/replace_same_file_to_hard_link.py b/replace_same_file_to_hard_link.py
new file mode 100644
index 0000000..12829f0
--- /dev/null
+++ b/replace_same_file_to_hard_link.py
@@ -0,0 +1,98 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+function: This script resolves locales's BEP inconsistence,
+ it scans a specific path and replaces the same file
+ in that path with a hard link.Avoid different language
+ packs each time due to concurrent compilation.
+"""
+import os
+import sys
+import time
+
+all_file = {}
+
+def cmp_file(f1, f2):
+ """compare two files in bytes"""
+ st1 = os.stat(f1)
+ st2 = os.stat(f2)
+
+ bufsize = 8 * 1024
+ with open(f1, 'rb') as fp1, open(f2, 'rb') as fp2:
+ while True:
+ b1 = fp1.read(bufsize)
+ b2 = fp2.read(bufsize)
+ if b1 != b2:
+ return False
+ if not b1:
+ return True
+
+
+def search_all_inode(dir_path):
+ """recursively traverse the directory to group all"""
+ files = os.listdir(dir_path)
+
+ for fi in files:
+ fi_d = os.path.join(dir_path, fi)
+ if os.path.isdir(fi_d):
+ search_all_inode(fi_d)
+ else:
+ size = os.stat(fi_d).st_size
+ if size in all_file:
+ all_file[size].append(fi_d)
+ else:
+ all_file[size] = [fi_d]
+
+
+def deal_one(file_paths):
+ """traverse the file array, delete the same file and create a hard link"""
+ file_count = len(file_paths)
+ inode_files = {}
+
+ for i in range(0, file_count):
+ for j in range(i + 1, file_count):
+ file1 = file_paths[i]
+ file2 = file_paths[j]
+
+ file1_inode = os.stat(file1).st_ino
+ file2_inode = os.stat(file2).st_ino
+
+ if file1_inode not in inode_files:
+ inode_files[file1_inode] = file1
+
+ if file1_inode == file2_inode:
+ continue
+
+ if cmp_file(file1, file2):
+ print('deal same file:', file1, '==', file2)
+ os.remove(file2)
+ os.link(file1, file2)
+ else:
+ if file2_inode not in inode_files:
+ inode_files[file2_inode] = file2
+
+
+def deal_files():
+ """get file array and processed one by one"""
+ for size in all_file:
+ file_paths = all_file[size]
+ if len(file_paths) > 1:
+ deal_one(file_paths)
+
+
+def usage():
+ """print usage"""
+ print("""
+rm_same_file: Replace the same file with a hard link.
+
+rm_same_file.py [target path]
+
+ """)
+
+if __name__ == "__main__":
+ if len(sys.argv) == 2:
+ search_all_inode(sys.argv[1])
+ deal_files()
+ else:
+ usage()
+ sys.exit()