summaryrefslogtreecommitdiff
path: root/check-debug-symbols.py
blob: b873c779fbf68bed4c695e3d26339404f785af14 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/python3

"""
Check debug symbols are present in shared object and can identify
code.

It starts scanning from a directory and recursively scans all ELF
files found in it for various symbols to ensure all debuginfo is
present and nothing has been stripped.

Usage:

./check-debug-symbols /path/of/dir/to/scan/


Example:

./check-debug-symbols /usr/lib64
"""

# This technique was explained to me by Mark Wielaard (mjw).

import collections
import os
import re
import subprocess
import sys

ScanResult = collections.namedtuple('ScanResult',
                                    'file_name debug_info debug_abbrev file_symbols gnu_debuglink')


def scan_file(file):
    "Scan the provided file and return a ScanResult containing results of the scan."

    # Test for .debug_* sections in the shared object. This is the  main test.
    # Stripped objects will not contain these.
    readelf_S_result = subprocess.run(['eu-readelf', '-S', file],
                                      stdout=subprocess.PIPE, encoding='utf-8', check=True)
    has_debug_info = any(line for line in readelf_S_result.stdout.split('\n') if '] .debug_info' in line)

    has_debug_abbrev = any(line for line in readelf_S_result.stdout.split('\n') if '] .debug_abbrev' in line)

    # Test FILE symbols. These will most likely be removed by anyting that
    # manipulates symbol tables because it's generally useless. So a nice test
    # that nothing has messed with symbols.
    def contains_file_symbols(line):
        parts = line.split()
        if len(parts) < 8:
            return False
        return \
            parts[2] == '0' and parts[3] == 'FILE' and parts[4] == 'LOCAL' and parts[5] == 'DEFAULT' and \
            parts[6] == 'ABS' and re.match(r'((.*/)?[-_a-zA-Z0-9]+\.(c|cc|cpp|cxx))?', parts[7])

    readelf_s_result = subprocess.run(["eu-readelf", '-s', file],
                                      stdout=subprocess.PIPE, encoding='utf-8', check=True)
    has_file_symbols = any(line for line in readelf_s_result.stdout.split('\n') if contains_file_symbols(line))

    # Test that there are no .gnu_debuglink sections pointing to another
    # debuginfo file. There shouldn't be any debuginfo files, so the link makes
    # no sense either.
    has_gnu_debuglink = any(line for line in readelf_s_result.stdout.split('\n') if '] .gnu_debuglink' in line)

    return ScanResult(file, has_debug_info, has_debug_abbrev, has_file_symbols, has_gnu_debuglink)

def is_elf(file):
    result = subprocess.run(['file', file], stdout=subprocess.PIPE, encoding='utf-8', check=True)
    return re.search('ELF 64-bit LSB (?:pie )(?:executable|shared object)', result.stdout)

def scan_file_if_sensible(file):
    if is_elf(file):
        # print(file)
        return scan_file(file)
    return None

def scan_dir(dir):
    results = []
    for root, _, files in os.walk(dir):
        for name in files:
            result = scan_file_if_sensible(os.path.join(root, name))
            if result:
                results.append(result)
    return results

def scan(file):
    file = os.path.abspath(file)
    if os.path.isdir(file):
        return scan_dir(file)
    elif os.path.isfile(file):
        return [scan_file_if_sensible(file)]

def is_bad_result(result):
    return not result.debug_info or not result.debug_abbrev or not result.file_symbols or result.gnu_debuglink

def print_scan_results(results, verbose):
    # print(results)
    for result in results:
        file_name = result.file_name
        found_issue = False
        if not result.debug_info:
            found_issue = True
            print('error: missing .debug_info section in', file_name)
        if not result.debug_abbrev:
            found_issue = True
            print('error: missing .debug_abbrev section in', file_name)
        if not result.file_symbols:
            found_issue = True
            print('error: missing FILE symbols in', file_name)
        if result.gnu_debuglink:
            found_issue = True
            print('error: unexpected .gnu_debuglink section in', file_name)
        if verbose and not found_issue:
            print('OK: ', file_name)

def main(args):
    verbose = False
    files = []
    for arg in args:
        if arg == '--verbose' or arg == '-v':
            verbose = True
        else:
            files.append(arg)

    results = []
    for file in files:
        results.extend(scan(file))

    print_scan_results(results, verbose)

    if any(is_bad_result(result) for result in results):
        return 1
    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))