Fangrui Song | b02fa51 | 2018-02-02 16:41:07 +0000 | [diff] [blame] | 1 | from __future__ import print_function |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 2 | import re |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 3 | import string |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 4 | import subprocess |
Fangrui Song | b02fa51 | 2018-02-02 16:41:07 +0000 | [diff] [blame] | 5 | import sys |
Simon Pilgrim | d0ebabd | 2018-06-01 13:37:01 +0000 | [diff] [blame] | 6 | import copy |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 7 | |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 8 | if sys.version_info[0] > 2: |
| 9 | class string: |
| 10 | expandtabs = str.expandtabs |
| 11 | else: |
| 12 | import string |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 13 | |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 14 | ##### Common utilities for update_*test_checks.py |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 15 | |
| 16 | def should_add_line_to_output(input_line, prefix_set): |
| 17 | # Skip any blank comment lines in the IR. |
| 18 | if input_line.strip() == ';': |
| 19 | return False |
| 20 | # Skip any blank lines in the IR. |
| 21 | #if input_line.strip() == '': |
| 22 | # return False |
| 23 | # And skip any CHECK lines. We're building our own. |
| 24 | m = CHECK_RE.match(input_line) |
| 25 | if m and m.group(1) in prefix_set: |
| 26 | return False |
| 27 | |
| 28 | return True |
| 29 | |
| 30 | # Invoke the tool that is being tested. |
| 31 | def invoke_tool(exe, cmd_args, ir): |
| 32 | with open(ir) as ir_file: |
Fangrui Song | 94497ce | 2018-03-02 17:37:04 +0000 | [diff] [blame] | 33 | # TODO Remove the str form which is used by update_test_checks.py and |
| 34 | # update_llc_test_checks.py |
| 35 | # The safer list form is used by update_cc_test_checks.py |
| 36 | if isinstance(cmd_args, list): |
| 37 | stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file) |
| 38 | else: |
| 39 | stdout = subprocess.check_output(exe + ' ' + cmd_args, |
| 40 | shell=True, stdin=ir_file) |
Fangrui Song | b02fa51 | 2018-02-02 16:41:07 +0000 | [diff] [blame] | 41 | if sys.version_info[0] > 2: |
| 42 | stdout = stdout.decode() |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 43 | # Fix line endings to unix CR style. |
Fangrui Song | b02fa51 | 2018-02-02 16:41:07 +0000 | [diff] [blame] | 44 | return stdout.replace('\r\n', '\n') |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 45 | |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 46 | ##### LLVM IR parser |
| 47 | |
Justin Bogner | 6028581 | 2018-02-28 00:56:24 +0000 | [diff] [blame] | 48 | RUN_LINE_RE = re.compile('^\s*[;#]\s*RUN:\s*(.*)$') |
| 49 | CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?[= ](\S+)') |
| 50 | CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:') |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 51 | |
| 52 | OPT_FUNCTION_RE = re.compile( |
| 53 | r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\(' |
| 54 | r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$', |
| 55 | flags=(re.M | re.S)) |
| 56 | |
Simon Pilgrim | d79c539 | 2018-04-06 12:36:27 +0000 | [diff] [blame] | 57 | ANALYZE_FUNCTION_RE = re.compile( |
| 58 | r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w-]+?)\':' |
| 59 | r'\s*\n(?P<body>.*)$', |
| 60 | flags=(re.X | re.S)) |
| 61 | |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 62 | IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(') |
Justin Bogner | 6028581 | 2018-02-28 00:56:24 +0000 | [diff] [blame] | 63 | TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') |
| 64 | TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)') |
| 65 | MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)') |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 66 | |
| 67 | SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)') |
| 68 | SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M) |
| 69 | SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) |
| 70 | SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n') |
| 71 | SCRUB_LOOP_COMMENT_RE = re.compile( |
| 72 | r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M) |
| 73 | |
| 74 | def scrub_body(body): |
| 75 | # Scrub runs of whitespace out of the assembly, but leave the leading |
| 76 | # whitespace in place. |
| 77 | body = SCRUB_WHITESPACE_RE.sub(r' ', body) |
| 78 | # Expand the tabs used for indentation. |
| 79 | body = string.expandtabs(body, 2) |
| 80 | # Strip trailing whitespace. |
| 81 | body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body) |
| 82 | return body |
| 83 | |
Simon Pilgrim | d0ebabd | 2018-06-01 13:37:01 +0000 | [diff] [blame] | 84 | def do_scrub(body, scrubber, scrubber_args, extra): |
| 85 | if scrubber_args: |
| 86 | local_args = copy.deepcopy(scrubber_args) |
| 87 | local_args[0].extra_scrub = extra |
| 88 | return scrubber(body, *local_args) |
| 89 | return scrubber(body, *scrubber_args) |
| 90 | |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 91 | # Build up a dictionary of all the function bodies. |
Simon Pilgrim | d0ebabd | 2018-06-01 13:37:01 +0000 | [diff] [blame] | 92 | class function_body(object): |
| 93 | def __init__(self, string, extra): |
| 94 | self.scrub = string |
| 95 | self.extrascrub = extra |
| 96 | def __str__(self): |
| 97 | return self.scrub |
| 98 | |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 99 | def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose): |
| 100 | for m in function_re.finditer(raw_tool_output): |
| 101 | if not m: |
| 102 | continue |
| 103 | func = m.group('func') |
Simon Pilgrim | d0ebabd | 2018-06-01 13:37:01 +0000 | [diff] [blame] | 104 | body = m.group('body') |
| 105 | scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False) |
| 106 | scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True) |
Roger Ferrer Ibanez | 9fb7c2f | 2018-12-07 09:49:21 +0000 | [diff] [blame] | 107 | if 'analysis' in m.groupdict(): |
Simon Pilgrim | d79c539 | 2018-04-06 12:36:27 +0000 | [diff] [blame] | 108 | analysis = m.group('analysis') |
| 109 | if analysis.lower() != 'cost model analysis': |
| 110 | print('WARNING: Unsupported analysis mode: %r!' % (analysis,), file=sys.stderr) |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 111 | if func.startswith('stress'): |
| 112 | # We only use the last line of the function body for stress tests. |
| 113 | scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:]) |
| 114 | if verbose: |
Fangrui Song | b02fa51 | 2018-02-02 16:41:07 +0000 | [diff] [blame] | 115 | print('Processing function: ' + func, file=sys.stderr) |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 116 | for l in scrubbed_body.splitlines(): |
Fangrui Song | b02fa51 | 2018-02-02 16:41:07 +0000 | [diff] [blame] | 117 | print(' ' + l, file=sys.stderr) |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 118 | for prefix in prefixes: |
Simon Pilgrim | d0ebabd | 2018-06-01 13:37:01 +0000 | [diff] [blame] | 119 | if func in func_dict[prefix] and str(func_dict[prefix][func]) != scrubbed_body: |
| 120 | if func_dict[prefix][func] and func_dict[prefix][func].extrascrub == scrubbed_extra: |
| 121 | func_dict[prefix][func].scrub = scrubbed_extra |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 122 | continue |
Simon Pilgrim | d0ebabd | 2018-06-01 13:37:01 +0000 | [diff] [blame] | 123 | else: |
| 124 | if prefix == prefixes[-1]: |
| 125 | print('WARNING: Found conflicting asm under the ' |
| 126 | 'same prefix: %r!' % (prefix,), file=sys.stderr) |
| 127 | else: |
| 128 | func_dict[prefix][func] = None |
| 129 | continue |
Fangrui Song | 6ceb7d8 | 2018-01-30 00:40:05 +0000 | [diff] [blame] | 130 | |
Simon Pilgrim | d0ebabd | 2018-06-01 13:37:01 +0000 | [diff] [blame] | 131 | func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra) |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 132 | |
| 133 | ##### Generator of LLVM IR CHECK lines |
| 134 | |
| 135 | SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') |
| 136 | |
| 137 | # Match things that look at identifiers, but only if they are followed by |
| 138 | # spaces, commas, paren, or end of the string |
Alexander Richardson | b18de8b | 2018-03-14 20:28:53 +0000 | [diff] [blame] | 139 | IR_VALUE_RE = re.compile(r'(\s+)%([\w\.\-]+?)([,\s\(\)]|\Z)') |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 140 | |
| 141 | # Create a FileCheck variable name based on an IR name. |
| 142 | def get_value_name(var): |
| 143 | if var.isdigit(): |
| 144 | var = 'TMP' + var |
| 145 | var = var.replace('.', '_') |
Alexander Richardson | b18de8b | 2018-03-14 20:28:53 +0000 | [diff] [blame] | 146 | var = var.replace('-', '_') |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 147 | return var.upper() |
| 148 | |
| 149 | |
| 150 | # Create a FileCheck variable from regex. |
| 151 | def get_value_definition(var): |
| 152 | return '[[' + get_value_name(var) + ':%.*]]' |
| 153 | |
| 154 | |
| 155 | # Use a FileCheck variable. |
| 156 | def get_value_use(var): |
| 157 | return '[[' + get_value_name(var) + ']]' |
| 158 | |
| 159 | # Replace IR value defs and uses with FileCheck variables. |
Simon Pilgrim | d79c539 | 2018-04-06 12:36:27 +0000 | [diff] [blame] | 160 | def genericize_check_lines(lines, is_analyze): |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 161 | # This gets called for each match that occurs in |
| 162 | # a line. We transform variables we haven't seen |
| 163 | # into defs, and variables we have seen into uses. |
| 164 | def transform_line_vars(match): |
| 165 | var = match.group(2) |
| 166 | if var in vars_seen: |
| 167 | rv = get_value_use(var) |
| 168 | else: |
| 169 | vars_seen.add(var) |
| 170 | rv = get_value_definition(var) |
| 171 | # re.sub replaces the entire regex match |
| 172 | # with whatever you return, so we have |
| 173 | # to make sure to hand it back everything |
| 174 | # including the commas and spaces. |
| 175 | return match.group(1) + rv + match.group(3) |
| 176 | |
| 177 | vars_seen = set() |
| 178 | lines_with_def = [] |
| 179 | |
| 180 | for i, line in enumerate(lines): |
| 181 | # An IR variable named '%.' matches the FileCheck regex string. |
| 182 | line = line.replace('%.', '%dot') |
| 183 | # Ignore any comments, since the check lines will too. |
| 184 | scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line) |
Simon Pilgrim | d79c539 | 2018-04-06 12:36:27 +0000 | [diff] [blame] | 185 | if is_analyze == False: |
| 186 | lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line) |
| 187 | else: |
| 188 | lines[i] = scrubbed_line |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 189 | return lines |
| 190 | |
| 191 | |
Simon Pilgrim | d79c539 | 2018-04-06 12:36:27 +0000 | [diff] [blame] | 192 | def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze): |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 193 | printed_prefixes = [] |
Fangrui Song | 9c2e5e4 | 2018-03-14 17:47:07 +0000 | [diff] [blame] | 194 | for p in prefix_list: |
| 195 | checkprefixes = p[0] |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 196 | for checkprefix in checkprefixes: |
| 197 | if checkprefix in printed_prefixes: |
| 198 | break |
Simon Pilgrim | 90e8633 | 2018-04-05 10:26:13 +0000 | [diff] [blame] | 199 | # TODO func_dict[checkprefix] may be None, '' or not exist. |
| 200 | # Fix the call sites. |
| 201 | if func_name not in func_dict[checkprefix] or not func_dict[checkprefix][func_name]: |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 202 | continue |
Simon Pilgrim | 90e8633 | 2018-04-05 10:26:13 +0000 | [diff] [blame] | 203 | |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 204 | # Add some space between different check prefixes, but not after the last |
| 205 | # check line (before the test code). |
Simon Pilgrim | 93c4743 | 2018-04-05 10:48:38 +0000 | [diff] [blame] | 206 | if is_asm == True: |
| 207 | if len(printed_prefixes) != 0: |
| 208 | output_lines.append(comment_marker) |
Simon Pilgrim | 90e8633 | 2018-04-05 10:26:13 +0000 | [diff] [blame] | 209 | |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 210 | printed_prefixes.append(checkprefix) |
| 211 | output_lines.append(check_label_format % (checkprefix, func_name)) |
Simon Pilgrim | d0ebabd | 2018-06-01 13:37:01 +0000 | [diff] [blame] | 212 | func_body = str(func_dict[checkprefix][func_name]).splitlines() |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 213 | |
Simon Pilgrim | 93c4743 | 2018-04-05 10:48:38 +0000 | [diff] [blame] | 214 | # For ASM output, just emit the check lines. |
| 215 | if is_asm == True: |
| 216 | output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) |
| 217 | for func_line in func_body[1:]: |
| 218 | output_lines.append('%s %s-NEXT: %s' % (comment_marker, checkprefix, func_line)) |
| 219 | break |
| 220 | |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 221 | # For IR output, change all defs to FileCheck variables, so we're immune |
| 222 | # to variable naming fashions. |
Simon Pilgrim | d79c539 | 2018-04-06 12:36:27 +0000 | [diff] [blame] | 223 | func_body = genericize_check_lines(func_body, is_analyze) |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 224 | |
| 225 | # This could be selectively enabled with an optional invocation argument. |
| 226 | # Disabled for now: better to check everything. Be safe rather than sorry. |
| 227 | |
| 228 | # Handle the first line of the function body as a special case because |
| 229 | # it's often just noise (a useless asm comment or entry label). |
| 230 | #if func_body[0].startswith("#") or func_body[0].startswith("entry:"): |
| 231 | # is_blank_line = True |
| 232 | #else: |
Simon Pilgrim | 90e8633 | 2018-04-05 10:26:13 +0000 | [diff] [blame] | 233 | # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 234 | # is_blank_line = False |
| 235 | |
| 236 | is_blank_line = False |
| 237 | |
| 238 | for func_line in func_body: |
| 239 | if func_line.strip() == '': |
| 240 | is_blank_line = True |
| 241 | continue |
| 242 | # Do not waste time checking IR comments. |
| 243 | func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) |
| 244 | |
| 245 | # Skip blank lines instead of checking them. |
| 246 | if is_blank_line == True: |
Fangrui Song | 9c2e5e4 | 2018-03-14 17:47:07 +0000 | [diff] [blame] | 247 | output_lines.append('{} {}: {}'.format( |
| 248 | comment_marker, checkprefix, func_line)) |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 249 | else: |
Fangrui Song | 9c2e5e4 | 2018-03-14 17:47:07 +0000 | [diff] [blame] | 250 | output_lines.append('{} {}-NEXT: {}'.format( |
| 251 | comment_marker, checkprefix, func_line)) |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 252 | is_blank_line = False |
| 253 | |
| 254 | # Add space between different check prefixes and also before the first |
| 255 | # line of code in the test function. |
Fangrui Song | 9c2e5e4 | 2018-03-14 17:47:07 +0000 | [diff] [blame] | 256 | output_lines.append(comment_marker) |
Fangrui Song | 19c9b1c | 2018-02-10 05:01:33 +0000 | [diff] [blame] | 257 | break |
Simon Pilgrim | 90e8633 | 2018-04-05 10:26:13 +0000 | [diff] [blame] | 258 | |
| 259 | def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): |
| 260 | # Label format is based on IR string. |
| 261 | check_label_format = '{} %s-LABEL: @%s('.format(comment_marker) |
Simon Pilgrim | d79c539 | 2018-04-06 12:36:27 +0000 | [diff] [blame] | 262 | add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, False) |
| 263 | |
| 264 | def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): |
| 265 | check_label_format = '{} %s-LABEL: \'%s\''.format(comment_marker) |
| 266 | add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True) |