blob: 82a1e6bd221786ec674e9befb7c3477abadc44f3 [file] [log] [blame]
David Brazdilee690a32014-12-01 17:04:16 +00001#!/usr/bin/env python3
2#
3# Copyright (C) 2014 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17
18# Checker is a testing tool which compiles a given test file and compares the
19# state of the control-flow graph before and after each optimization pass
20# against a set of assertions specified alongside the tests.
21#
22# Tests are written in Java, turned into DEX and compiled with the Optimizing
23# compiler. "Check lines" are comments in the Java file which begin with prefix
24# 'CHECK' followed by a pattern that the engine attempts to match in the
25# compiler-generated output.
26#
27# Assertions are tested in groups which correspond to the individual compiler
28# passes. Each group of check lines therefore must start with a 'CHECK-START'
29# header which specifies the output group it should be tested against. The group
30# name must exactly match one of the groups recognized in the output (they can
31# be listed with the '--list-groups' command-line flag).
32#
33# Check line patterns are treated as plain text rather than regular expressions
34# but are whitespace agnostic.
35#
36# Actual regex patterns can be inserted enclosed in '{{' and '}}' brackets. If
37# curly brackets need to be used inside the body of the regex, they need to be
38# enclosed in round brackets. For example, the pattern '{{foo{2}}}' will parse
39# the invalid regex 'foo{2', but '{{(fo{2})}}' will match 'foo'.
40#
41# Regex patterns can be named and referenced later. A new variable is defined
42# with '[[name:regex]]' and can be referenced with '[[name]]'. Variables are
43# only valid within the scope of the defining group. Within a group they cannot
44# be redefined or used undefined.
45#
46# Example:
47# The following assertions can be placed in a Java source file:
48#
49# // CHECK-START: int MyClass.MyMethod() constant_folding (after)
50# // CHECK: [[ID:i[0-9]+]] IntConstant {{11|22}}
51# // CHECK: Return [ [[ID]] ]
52#
53# The engine will attempt to match the check lines against the output of the
54# group named on the first line. Together they verify that the CFG after
55# constant folding returns an integer constant with value either 11 or 22.
56#
57
58import argparse
59import os
60import re
61import shutil
62import sys
63import tempfile
64from subprocess import check_call
65
66class CommonEqualityMixin:
67 """Mixin for class equality as equality of the fields."""
68 def __eq__(self, other):
69 return (isinstance(other, self.__class__)
70 and self.__dict__ == other.__dict__)
71
72 def __ne__(self, other):
73 return not self.__eq__(other)
74
75 def __repr__(self):
76 return "<%s: %s>" % (type(self).__name__, str(self.__dict__))
77
78
79class CheckElement(CommonEqualityMixin):
80 """Single element of the check line."""
81
82 class Variant(object):
83 """Supported language constructs."""
84 Text, Pattern, VarRef, VarDef = range(4)
85
86 def __init__(self, variant, name, pattern):
87 self.variant = variant
88 self.name = name
89 self.pattern = pattern
90
91 @staticmethod
92 def parseText(text):
93 return CheckElement(CheckElement.Variant.Text, None, re.escape(text))
94
95 @staticmethod
96 def parsePattern(patternElem):
97 return CheckElement(CheckElement.Variant.Pattern, None, patternElem[2:len(patternElem)-2])
98
99 @staticmethod
100 def parseVariable(varElem):
101 colonPos = varElem.find(":")
102 if colonPos == -1:
103 # Variable reference
104 name = varElem[2:len(varElem)-2]
105 return CheckElement(CheckElement.Variant.VarRef, name, None)
106 else:
107 # Variable definition
108 name = varElem[2:colonPos]
109 body = varElem[colonPos+1:len(varElem)-2]
110 return CheckElement(CheckElement.Variant.VarDef, name, body)
111
112
113class CheckLine(CommonEqualityMixin):
114 """Representation of a single assertion in the check file formed of one or
115 more regex elements. Matching against an output line is successful only
116 if all regex elements can be matched in the given order."""
117
118 def __init__(self, lineContent, lineNo=-1):
119 lineContent = lineContent.strip()
120
121 self.lineNo = lineNo
122 self.content = lineContent
123
124 self.lineParts = self.__parse(lineContent)
125 if not self.lineParts:
126 raise Exception("Empty check line")
127
128 # Returns True if the given Match object was at the beginning of the line.
129 def __isMatchAtStart(self, match):
130 return (match is not None) and (match.start() == 0)
131
132 # Takes in a list of Match objects and returns the minimal start point among
133 # them. If there aren't any successful matches it returns the length of
134 # the searched string.
135 def __firstMatch(self, matches, string):
136 starts = map(lambda m: len(string) if m is None else m.start(), matches)
137 return min(starts)
138
139 # Returns the regex for finding a regex pattern in the check line.
140 def __getPatternRegex(self):
141 rStartSym = "\{\{"
142 rEndSym = "\}\}"
143 rBody = ".+?"
144 return rStartSym + rBody + rEndSym
145
146 # Returns the regex for finding a variable use in the check line.
147 def __getVariableRegex(self):
148 rStartSym = "\[\["
149 rEndSym = "\]\]"
150 rStartOptional = "("
151 rEndOptional = ")?"
152 rName = "[a-zA-Z][a-zA-Z0-9]*"
153 rSeparator = ":"
154 rBody = ".+?"
155 return rStartSym + rName + rStartOptional + rSeparator + rBody + rEndOptional + rEndSym
156
157 # This method parses the content of a check line stripped of the initial
158 # comment symbol and the CHECK keyword.
159 def __parse(self, line):
160 lineParts = []
161 # Loop as long as there is something to parse.
162 while line:
163 # Search for the nearest occurrence of the special markers.
164 matchWhitespace = re.search("\s+", line)
165 matchPattern = re.search(self.__getPatternRegex(), line)
166 matchVariable = re.search(self.__getVariableRegex(), line)
167
168 # If one of the above was identified at the current position, extract them
169 # from the line, parse them and add to the list of line parts.
170 if self.__isMatchAtStart(matchWhitespace):
171 # We want to be whitespace-agnostic so whenever a check line contains
172 # a whitespace, we add a regex pattern for an arbitrary non-zero number
173 # of whitespaces.
174 line = line[matchWhitespace.end():]
175 lineParts.append(CheckElement.parsePattern("{{\s+}}"))
176 elif self.__isMatchAtStart(matchPattern):
177 pattern = line[0:matchPattern.end()]
178 line = line[matchPattern.end():]
179 lineParts.append(CheckElement.parsePattern(pattern))
180 elif self.__isMatchAtStart(matchVariable):
181 var = line[0:matchVariable.end()]
182 line = line[matchVariable.end():]
183 lineParts.append(CheckElement.parseVariable(var))
184 else:
185 # If we're not currently looking at a special marker, this is a plain
186 # text match all the way until the first special marker (or the end
187 # of the line).
188 firstMatch = self.__firstMatch([ matchWhitespace, matchPattern, matchVariable ], line)
189 text = line[0:firstMatch]
190 line = line[firstMatch:]
191 lineParts.append(CheckElement.parseText(text))
192 return lineParts
193
194 # Returns the regex pattern to be matched in the output line. Variable
195 # references are substituted with their current values provided in the
196 # 'varState' argument.
197 # An exception is raised if a referenced variable is undefined.
198 def __generatePattern(self, linePart, varState):
199 if linePart.variant == CheckElement.Variant.VarRef:
200 try:
201 return re.escape(varState[linePart.name])
202 except KeyError:
203 raise Exception("Use of undefined variable '" + linePart.name + "' " +
204 "(line " + str(self.lineNo))
205 else:
206 return linePart.pattern
207
208 # Attempts to match the check line against a line from the output file with
209 # the given initial variable values. It returns the new variable state if
210 # successful and None otherwise.
211 def match(self, outputLine, initialVarState):
212 initialSearchFrom = 0
213 initialPattern = self.__generatePattern(self.lineParts[0], initialVarState)
214 while True:
215 # Search for the first element on the regex parts list. This will mark
216 # the point on the line from which we will attempt to match the rest of
217 # the check pattern. If this iteration produces only a partial match,
218 # the next iteration will start searching further in the output.
219 firstMatch = re.search(initialPattern, outputLine[initialSearchFrom:])
220 if firstMatch is None:
221 return None
222 matchStart = initialSearchFrom + firstMatch.start()
223 initialSearchFrom += firstMatch.start() + 1
224
225 # Do the full matching on a shadow copy of the variable state. If the
226 # matching fails half-way, we will not need to revert the state.
227 varState = dict(initialVarState)
228
229 # Now try to parse all of the parts of the check line in the right order.
230 # Variable values are updated on-the-fly, meaning that a variable can
231 # be referenced immediately after its definition.
232 fullyMatched = True
233 for part in self.lineParts:
234 pattern = self.__generatePattern(part, varState)
235 match = re.match(pattern, outputLine[matchStart:])
236 if match is None:
237 fullyMatched = False
238 break
239 matchEnd = matchStart + match.end()
240 if part.variant == CheckElement.Variant.VarDef:
241 if part.name in varState:
242 raise Exception("Redefinition of variable '" + part.name + "'" +
243 " (line " + str(self.lineNo) + ")")
244 varState[part.name] = outputLine[matchStart:matchEnd]
245 matchStart = matchEnd
246
247 # Return the new variable state if all parts were successfully matched.
248 # Otherwise loop and try to find another start point on the same line.
249 if fullyMatched:
250 return varState
251
252
253class CheckGroup(CommonEqualityMixin):
254 """Represents a named collection of check lines which are to be matched
255 against an output group of the same name."""
256
257 def __init__(self, name, lines):
258 if name:
259 self.name = name
260 else:
261 raise Exception("Check group does not have a name")
262 if lines:
263 self.lines = lines
264 else:
265 raise Exception("Check group " + self.name + " does not have a body")
266
267 def __headAndTail(self, list):
268 return list[0], list[1:]
269
270 # The driver of matching inside a group. It simultaneously reads lines from
271 # the output and check groups and attempts to match them against each other
272 # in the correct order.
273 def match(self, outputGroup):
274 readOutputLines = 0
275 lastMatch = 0
276
277 # Check and output lines which remain to be matched.
278 checkLines = self.lines
279 outputLines = outputGroup.body
280 varState = {}
281
282 # Retrieve the next check line.
283 while checkLines:
284 checkLine, checkLines = self.__headAndTail(checkLines)
285 foundMatch = False
286
287 # Retrieve the next output line.
288 while outputLines:
289 outputLine, outputLines = self.__headAndTail(outputLines)
290 readOutputLines += 1
291
292 # Try to match the current lines against each other. If successful,
293 # save the new state of variables and continue to the next check line.
294 newVarState = checkLine.match(outputLine, varState)
295 if newVarState is not None:
296 varState = newVarState
297 lastMatch = readOutputLines
298 foundMatch = True
299 break
300 if not foundMatch:
301 raise Exception("Could not match check line \"" + checkLine.content + "\" from line " +
302 str(lastMatch+1) + " of the output. [vars=" + str(varState) + "]")
303
304 @staticmethod
305 def parse(name, lines):
306 return CheckGroup(name, list(map(lambda line: CheckLine(line), lines)))
307
308
309class OutputGroup(CommonEqualityMixin):
310 """Represents a named part of the test output against which a check group of
311 the same name is to be matched."""
312
313 def __init__(self, name, body):
314 if name:
315 self.name = name
316 else:
317 raise Exception("Output group does not have a name")
318 if body:
319 self.body = body
320 else:
321 raise Exception("Output group " + self.name + " does not have a body")
322
323
324class FileSplitMixin(object):
325 """Mixin for representing text files which need to be split into smaller
326 chunks before being parsed."""
327
328 def _parseStream(self, stream):
329 lineNo = 0
330 allGroups = []
331 currentGroup = None
332
333 for line in stream:
334 lineNo += 1
335 line = line.strip()
336 if not line:
337 continue
338
339 # Let the child class process the line and return information about it.
340 # The _processLine method can modify the content of the line (or delete it
341 # entirely) and specify whether it starts a new group.
342 processedLine, newGroupName = self._processLine(line, lineNo)
343 if newGroupName is not None:
344 currentGroup = (newGroupName, [])
345 allGroups.append(currentGroup)
346 if processedLine is not None:
347 currentGroup[1].append(processedLine)
348
349 # Finally, take the generated line groups and let the child class process
350 # each one before storing the final outcome.
351 return list(map(lambda group: self._processGroup(group[0], group[1]), allGroups))
352
353
354class CheckFile(FileSplitMixin):
355 """Collection of check groups extracted from the input test file."""
356
357 def __init__(self, prefix, checkStream):
358 self.prefix = prefix
359 self.groups = self._parseStream(checkStream)
360
361 # Attempts to parse a check line. The regex searches for a comment symbol
362 # followed by the CHECK keyword, given attribute and a colon at the very
363 # beginning of the line. Whitespaces are ignored.
364 def _extractLine(self, prefix, line):
365 ignoreWhitespace = "\s*"
366 commentSymbols = ["//", "#"]
367 prefixRegex = ignoreWhitespace + \
368 "(" + "|".join(commentSymbols) + ")" + \
369 ignoreWhitespace + \
370 prefix + ":"
371
372 # The 'match' function succeeds only if the pattern is matched at the
373 # beginning of the line.
374 match = re.match(prefixRegex, line)
375 if match is not None:
376 return line[match.end():].strip()
377 else:
378 return None
379
380 def _processLine(self, line, lineNo):
381 startLine = self._extractLine(self.prefix + "-START", line)
382 if startLine is not None:
383 # Line starts with the CHECK-START keyword, start a new group
384 return (None, startLine)
385 else:
386 # Otherwise try to parse it as a standard CHECK line. If unsuccessful,
387 # _extractLine will return None and the line will be ignored.
388 return (self._extractLine(self.prefix, line), None)
389
390 def _exceptionLineOutsideGroup(self, line, lineNo):
391 raise Exception("Check file line lies outside a group (line " + str(lineNo) + ")")
392
393 def _processGroup(self, name, lines):
394 return CheckGroup.parse(name, lines)
395
396 def match(self, outputFile, printInfo=False):
397 for checkGroup in self.groups:
398 # TODO: Currently does not handle multiple occurrences of the same group
399 # name, e.g. when a pass is run multiple times. It will always try to
400 # match a check group against the first output group of the same name.
401 outputGroup = outputFile.findGroup(checkGroup.name)
402 if outputGroup is None:
403 raise Exception("Group " + checkGroup.name + " not found in the output")
404 if printInfo:
405 print("TEST " + checkGroup.name + "... ", end="", flush=True)
406 try:
407 checkGroup.match(outputGroup)
408 if printInfo:
409 print("PASSED")
410 except Exception as e:
411 if printInfo:
412 print("FAILED!")
413 raise e
414
415
416class OutputFile(FileSplitMixin):
417 """Representation of the output generated by the test and split into groups
418 within which the checks are performed.
419
420 C1visualizer format is parsed with a state machine which differentiates
421 between the 'compilation' and 'cfg' blocks. The former marks the beginning
422 of a method. It is parsed for the method's name but otherwise ignored. Each
423 subsequent CFG block represents one stage of the compilation pipeline and
424 is parsed into an output group named "<method name> <pass name>".
425 """
426
427 class ParsingState:
428 OutsideBlock, InsideCompilationBlock, StartingCfgBlock, InsideCfgBlock = range(4)
429
430 def __init__(self, outputStream):
431 # Initialize the state machine
432 self.lastMethodName = None
433 self.state = OutputFile.ParsingState.OutsideBlock
434 self.groups = self._parseStream(outputStream)
435
436 def _processLine(self, line, lineNo):
437 if self.state == OutputFile.ParsingState.StartingCfgBlock:
438 # Previous line started a new 'cfg' block which means that this one must
439 # contain the name of the pass (this is enforced by C1visualizer).
440 if re.match("name\s+\"[^\"]+\"", line):
441 # Extract the pass name, prepend it with the name of the method and
442 # return as the beginning of a new group.
443 self.state = OutputFile.ParsingState.InsideCfgBlock
444 return (None, self.lastMethodName + " " + line.split("\"")[1])
445 else:
446 raise Exception("Expected group name in output file (line " + str(lineNo) + ")")
447
448 elif self.state == OutputFile.ParsingState.InsideCfgBlock:
449 if line == "end_cfg":
450 self.state = OutputFile.ParsingState.OutsideBlock
451 return (None, None)
452 else:
453 return (line, None)
454
455 elif self.state == OutputFile.ParsingState.InsideCompilationBlock:
456 # Search for the method's name. Format: method "<name>"
457 if re.match("method\s+\"[^\"]+\"", line):
458 self.lastMethodName = line.split("\"")[1]
459 elif line == "end_compilation":
460 self.state = OutputFile.ParsingState.OutsideBlock
461 return (None, None)
462
463 else: # self.state == OutputFile.ParsingState.OutsideBlock:
464 if line == "begin_cfg":
465 # The line starts a new group but we'll wait until the next line from
466 # which we can extract the name of the pass.
467 if self.lastMethodName is None:
468 raise Exception("Output contains a pass without a method header" +
469 " (line " + str(lineNo) + ")")
470 self.state = OutputFile.ParsingState.StartingCfgBlock
471 return (None, None)
472 elif line == "begin_compilation":
473 self.state = OutputFile.ParsingState.InsideCompilationBlock
474 return (None, None)
475 else:
476 raise Exception("Output line lies outside a group (line " + str(lineNo) + ")")
477
478 def _processGroup(self, name, lines):
479 return OutputGroup(name, lines)
480
481 def findGroup(self, name):
482 for group in self.groups:
483 if group.name == name:
484 return group
485 return None
486
487
488def ParseArguments():
489 parser = argparse.ArgumentParser()
490 parser.add_argument("test_file", help="the source of the test with checking annotations")
491 parser.add_argument("--check-prefix", dest="check_prefix", default="CHECK", metavar="PREFIX",
492 help="prefix of checks in the test file (default: CHECK)")
493 parser.add_argument("--list-groups", dest="list_groups", action="store_true",
494 help="print a list of all groups found in the test output")
495 parser.add_argument("--dump-group", dest="dump_group", metavar="GROUP",
496 help="print the contents of an output group")
497 return parser.parse_args()
498
499
500class cd:
501 """Helper class which temporarily changes the working directory."""
502
503 def __init__(self, newPath):
504 self.newPath = newPath
505
506 def __enter__(self):
507 self.savedPath = os.getcwd()
508 os.chdir(self.newPath)
509
510 def __exit__(self, etype, value, traceback):
511 os.chdir(self.savedPath)
512
513
514def CompileTest(inputFile, tempFolder):
515 classFolder = tempFolder + "/classes"
516 dexFile = tempFolder + "/test.dex"
517 oatFile = tempFolder + "/test.oat"
518 outputFile = tempFolder + "/art.cfg"
519 os.makedirs(classFolder)
520
521 # Build a DEX from the source file. We pass "--no-optimize" to dx to avoid
522 # interference with its optimizations.
523 check_call(["javac", "-d", classFolder, inputFile])
524 check_call(["dx", "--dex", "--no-optimize", "--output=" + dexFile, classFolder])
525
526 # Run dex2oat and export the HGraph. The output is stored into ${PWD}/art.cfg.
527 with cd(tempFolder):
528 check_call(["dex2oat", "-j1", "--dump-passes", "--compiler-backend=Optimizing",
529 "--android-root=" + os.environ["ANDROID_HOST_OUT"],
530 "--boot-image=" + os.environ["ANDROID_HOST_OUT"] + "/framework/core-optimizing.art",
531 "--runtime-arg", "-Xnorelocate", "--dex-file=" + dexFile, "--oat-file=" + oatFile])
532
533 return outputFile
534
535
536def ListGroups(outputFilename):
537 outputFile = OutputFile(open(outputFilename, "r"))
538 for group in outputFile.groups:
539 print(group.name)
540
541
542def DumpGroup(outputFilename, groupName):
543 outputFile = OutputFile(open(outputFilename, "r"))
544 group = outputFile.findGroup(groupName)
545 if group:
546 print("\n".join(group.body))
547 else:
548 raise Exception("Check group " + groupName + " not found in the output")
549
550
551def RunChecks(checkPrefix, checkFilename, outputFilename):
552 checkFile = CheckFile(checkPrefix, open(checkFilename, "r"))
553 outputFile = OutputFile(open(outputFilename, "r"))
554 checkFile.match(outputFile, True)
555
556
557if __name__ == "__main__":
558 args = ParseArguments()
559 tempFolder = tempfile.mkdtemp()
560
561 try:
562 outputFile = CompileTest(args.test_file, tempFolder)
563 if args.list_groups:
564 ListGroups(outputFile)
565 elif args.dump_group:
566 DumpGroup(outputFile, args.dump_group)
567 else:
568 RunChecks(args.check_prefix, args.test_file, outputFile)
569 finally:
570 shutil.rmtree(tempFolder)