1import gdb
2
3
4def parse_address_to_int(address):
5    int_address_string = gdb.execute(
6        'p/d {}'.format(address), to_string=True)
7    int_address = int(int_address_string.split('=')[1].strip())
8    return int_address
9
10
11def parse_gdb_equals(str):
12    """
13    str is $1 = value. so it returns value
14    """
15    return str.split("=")[1].strip()
16
17
18class HeapMapping:
19    """
20    Wrapper class for dictionary to have customization for the dictionary
21    and one entry point
22    """
23
24    address_length_mapping = {}
25    address_set = set()
26
27    @staticmethod
28    def put(address, length):
29        HeapMapping.address_length_mapping[address] = length
30        HeapMapping.address_set.add(address)
31
32    @staticmethod
33    def get(address):
34        """
35        Gets the length of the dynamic array corresponding to address. Suppose dynamic
36        array is {1,2,3,4,5} and starting address is 400 which is passed as address to this
37        method, then method would return 20(i.e. 5 * sizeof(int)). When this address
38        is offsetted for eg 408 is passed to this method, then it will return remainder
39        number of bytes allocated, here it would be 12 (i.e. 420 - 408)
40        Algorithm tries to find address in address_length_apping, if it doesn't find it
41        then it tries to find the range that can fit the address. if it fails to find such
42        mapping then it would return None.
43        """
44
45        length_found = HeapMapping.address_length_mapping.get(address)
46        if length_found:
47            return length_found
48        else:
49            address_list = list(HeapMapping.address_set)
50            address_list.sort()
51            left = 0
52            right = len(address_list) - 1
53            while left <= right:
54                mid = int((left + right) / 2)
55                if address > address_list[mid]:
56                    left = mid + 1
57                # only < case would be accounted in else.
58                # As == would be handled in the if-check above (outside while)
59                else:
60                    right = mid - 1
61
62            index = left - 1
63            if index == -1:
64                return None
65            base_address = address_list[index]
66            base_len = HeapMapping.address_length_mapping.get(base_address)
67            if base_address + base_len > address:
68                return base_address + base_len - address
69            else:
70                return None
71
72    @staticmethod
73    def remove(address):
74        HeapMapping.address_length_mapping.pop(address, None)
75        HeapMapping.address_set.discard(address)
76
77
78class AllocationFinishedBreakpoint(gdb.FinishBreakpoint):
79    """
80    Sets temporary breakpoints on returns (specifically returns of memory allocations)
81    to record address allocated.
82    It get instantiated from AllocationBreakpoint and ReallocationBreakpoint. When it is
83    instantiated from ReallocationBreakPoint, it carries prev_address.
84    """
85
86    def __init__(self, length, prev_address=None):
87        super().__init__(internal=True)
88        self.length = length
89        self.prev_address = prev_address
90
91    def stop(self):
92        """
93        Called when the return address in the current frame is hit. It parses hex address
94        into int address. If return address is not null then it stores address and length
95        into the address_length_mapping dictionary.
96        """
97
98        return_address = self.return_value
99        if return_address is not None or return_address == 0x0:
100            if self.prev_address != None:
101                HeapMapping.remove(self.prev_address)
102
103            # Converting hex address to int address
104            int_address = parse_address_to_int(return_address)
105            HeapMapping.put(int_address, self.length)
106        return False
107
108
109class AllocationBreakpoint(gdb.Breakpoint):
110    """
111    Handler class when malloc and operator new[] gets hit
112    """
113
114    def __init__(self, spec):
115        super().__init__(spec, internal=True)
116
117    def stop(self):
118        # handle malloc and new
119        func_args_string = gdb.execute('info args', to_string=True)
120        if func_args_string.find("=") != -1:
121            # There will be just 1 argument to malloc. So no need to handle multiline
122            length = int(parse_gdb_equals(func_args_string))
123            AllocationFinishedBreakpoint(length)
124            return False
125
126
127class ReallocationBreakpoint(gdb.Breakpoint):
128    """
129    Handler class when realloc gets hit
130    """
131
132    def __init__(self, spec):
133        super().__init__(spec, internal=True)
134
135    def stop(self):
136        # handle realloc
137        func_args_string = gdb.execute('info args', to_string=True)
138        if func_args_string.find("=") != -1:
139            args = func_args_string.split("\n")
140            address = parse_gdb_equals(args[0])
141            int_address = parse_address_to_int(address)
142            length = int(parse_gdb_equals(args[1]))
143            AllocationFinishedBreakpoint(length, int_address)
144            return False
145
146
147class DeallocationBreakpoint(gdb.Breakpoint):
148    """
149    Handler class when free and operator delete[] gets hit
150    """
151
152    def __init__(self, spec):
153        super().__init__(spec, internal=True)
154
155    def stop(self):
156        func_args_string = gdb.execute('info args', to_string=True)
157        if func_args_string.find("=") != -1:
158            address = parse_gdb_equals(func_args_string)
159            int_address = parse_address_to_int(address)
160            HeapMapping.remove(int_address)
161        return False
162
163
164class WatchHeap(gdb.Command):
165    """
166    Custom Command to keep track of Heap Memory Allocation.
167    Currently keeps tracks of memory allocated/deallocated using
168    malloc, realloc, free, operator new[] and operator delete[]
169    """
170
171    def __init__(self):
172        super(WatchHeap, self).__init__("watch_heap", gdb.COMMAND_USER)
173
174    def complete(self, text, word):
175        return gdb.COMPLETE_COMMAND
176
177    def invoke(self, args, from_tty):
178        # TODO : Check whether break location methods are defined
179        AllocationBreakpoint("malloc")
180        AllocationBreakpoint("operator new[]")
181        ReallocationBreakpoint("realloc")
182        DeallocationBreakpoint("free")
183        DeallocationBreakpoint("operator delete[]")
184
185
186class PrintHeapPointer(gdb.Command):
187    """
188    Custom command to print memory allocated at dynamic time
189    """
190
191    def __init__(self):
192        super(PrintHeapPointer, self).__init__("print_ptr", gdb.COMMAND_USER)
193
194    def complete(self, text, word):
195        return gdb.COMPLETE_COMMAND
196
197    def invoke(self, args, from_tty=True):
198        try:
199            value = gdb.parse_and_eval(args)
200            if value.type.code == gdb.TYPE_CODE_PTR:
201                print("Type : ", value.type)
202                starting_address_string = gdb.execute(
203                    'p/x {}'.format(value), to_string=True)
204                print("Address: ",
205                      parse_gdb_equals(starting_address_string))
206                int_address = parse_address_to_int(value)
207                # print memory
208                self.print_heap(int_address)
209        except Exception:
210            print('No symbol found!')
211
212    def print_heap(self, address):
213        """
214        Prints the memory that is being pointed by address in hex format
215
216        Parameters
217        ---------
218        address : raw pointer
219        """
220
221        memory_size = HeapMapping.get(address)
222        if memory_size:
223            print('Length :', memory_size)
224            result = ''
225            i = 0
226            while i < memory_size:
227                byte_string = gdb.execute(
228                    'x/1bx {}'.format(address), to_string=True)
229                result += byte_string.split(':')[1].strip() + " "
230                address += 1
231                i += 1
232            print(result)
233        else:
234            print("No address mapping found!")
235
236
237if __name__ == '__main__':
238    WatchHeap()
239    PrintHeapPointer()
240