blob: 84beff59c74ce271f3c0f90985e6f50b91edfc66 [file] [log] [blame]
Igor Murashkinaaebaa02015-01-26 10:55:53 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
18#define ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
19
20#include <type_traits>
21#include <assert.h>
22#include <functional>
23#include <vector>
24#include <algorithm>
25#include <numeric>
26#include <memory>
27
Vladimir Marko88b2b802015-12-04 14:19:04 +000028#include "cmdline_parse_result.h"
29#include "cmdline_types.h"
30#include "token_range.h"
31#include "unit.h"
Igor Murashkinaaebaa02015-01-26 10:55:53 -080032
33namespace art {
34 // Implementation details for the parser. Do not look inside if you hate templates.
35 namespace detail {
36 // A non-templated base class for argument parsers. Used by the general parser
37 // to parse arguments, without needing to know the argument type at compile time.
38 //
39 // This is an application of the type erasure idiom.
40 struct CmdlineParseArgumentAny {
41 virtual ~CmdlineParseArgumentAny() {}
42
43 // Attempt to parse this argument starting at arguments[position].
44 // If the parsing succeeds, the parsed value will be saved as a side-effect.
45 //
46 // In most situations, the parsing will not match by returning kUnknown. In this case,
47 // no tokens were consumed and the position variable will not be updated.
48 //
49 // At other times, parsing may fail due to validation but the initial token was still matched
50 // (for example an out of range value, or passing in a string where an int was expected).
51 // In this case the tokens are still consumed, and the position variable will get incremented
52 // by all the consumed tokens.
53 //
54 // The # of tokens consumed by the parse attempt will be set as an out-parameter into
55 // consumed_tokens. The parser should skip this many tokens before parsing the next
56 // argument.
57 virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) = 0;
58 // How many tokens should be taken off argv for parsing this argument.
59 // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
60 //
61 // A [min,max] range is returned to represent argument definitions with multiple
62 // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
63 virtual std::pair<size_t, size_t> GetNumTokens() const = 0;
64 // Get the run-time typename of the argument type.
65 virtual const char* GetTypeName() const = 0;
66 // Try to do a close match, returning how many tokens were matched against this argument
67 // definition. More tokens is better.
68 //
69 // Do a quick match token-by-token, and see if they match.
70 // Any tokens with a wildcard in them are only matched up until the wildcard.
71 // If this is true, then the wildcard matching later on can still fail, so this is not
72 // a guarantee that the argument is correct, it's more of a strong hint that the
73 // user-provided input *probably* was trying to match this argument.
74 //
75 // Returns how many tokens were either matched (or ignored because there was a
76 // wildcard present). 0 means no match. If the Size() tokens are returned.
77 virtual size_t MaybeMatches(const TokenRange& tokens) = 0;
78 };
79
80 template <typename T>
81 using EnableIfNumeric = std::enable_if<std::is_arithmetic<T>::value>;
82
83 template <typename T>
84 using DisableIfNumeric = std::enable_if<!std::is_arithmetic<T>::value>;
85
86 // Argument definition information, created by an ArgumentBuilder and an UntypedArgumentBuilder.
87 template <typename TArg>
88 struct CmdlineParserArgumentInfo {
89 // This version will only be used if TArg is arithmetic and thus has the <= operators.
90 template <typename T = TArg> // Necessary to get SFINAE to kick in.
91 bool CheckRange(const TArg& value, typename EnableIfNumeric<T>::type* = 0) {
92 if (has_range_) {
93 return min_ <= value && value <= max_;
94 }
95 return true;
96 }
97
98 // This version will be used at other times when TArg is not arithmetic.
99 template <typename T = TArg>
100 bool CheckRange(const TArg&, typename DisableIfNumeric<T>::type* = 0) {
101 assert(!has_range_);
102 return true;
103 }
104
105 // Do a quick match token-by-token, and see if they match.
106 // Any tokens with a wildcard in them only match the prefix up until the wildcard.
107 //
108 // If this is true, then the wildcard matching later on can still fail, so this is not
109 // a guarantee that the argument is correct, it's more of a strong hint that the
110 // user-provided input *probably* was trying to match this argument.
111 size_t MaybeMatches(TokenRange token_list) const {
112 auto best_match = FindClosestMatch(token_list);
113
114 return best_match.second;
115 }
116
117 // Attempt to find the closest match (see MaybeMatches).
118 //
119 // Returns the token range that was the closest match and the # of tokens that
120 // this range was matched up until.
121 std::pair<const TokenRange*, size_t> FindClosestMatch(TokenRange token_list) const {
122 const TokenRange* best_match_ptr = nullptr;
123
124 size_t best_match = 0;
125 for (auto&& token_range : tokenized_names_) {
126 size_t this_match = token_range.MaybeMatches(token_list, std::string("_"));
127
128 if (this_match > best_match) {
129 best_match_ptr = &token_range;
130 best_match = this_match;
131 }
132 }
133
134 return std::make_pair(best_match_ptr, best_match);
135 }
136
137 // Mark the argument definition as completed, do not mutate the object anymore after this
138 // call is done.
139 //
140 // Performs several sanity checks and token calculations.
141 void CompleteArgument() {
142 assert(names_.size() >= 1);
143 assert(!is_completed_);
144
145 is_completed_ = true;
146
147 size_t blank_count = 0;
148 size_t token_count = 0;
149
150 size_t global_blank_count = 0;
151 size_t global_token_count = 0;
152 for (auto&& name : names_) {
153 std::string s(name);
154
155 size_t local_blank_count = std::count(s.begin(), s.end(), '_');
156 size_t local_token_count = std::count(s.begin(), s.end(), ' ');
157
158 if (global_blank_count != 0) {
159 assert(local_blank_count == global_blank_count
160 && "Every argument descriptor string must have same amount of blanks (_)");
161 }
162
163 if (local_blank_count != 0) {
164 global_blank_count = local_blank_count;
165 blank_count++;
166
167 assert(local_blank_count == 1 && "More than one blank is not supported");
168 assert(s.back() == '_' && "The blank character must only be at the end of the string");
169 }
170
171 if (global_token_count != 0) {
172 assert(local_token_count == global_token_count
173 && "Every argument descriptor string must have same amount of tokens (spaces)");
174 }
175
176 if (local_token_count != 0) {
177 global_token_count = local_token_count;
178 token_count++;
179 }
180
181 // Tokenize every name, turning it from a string to a token list.
182 tokenized_names_.clear();
183 for (auto&& name1 : names_) {
184 // Split along ' ' only, removing any duplicated spaces.
185 tokenized_names_.push_back(
186 TokenRange::Split(name1, {' '}).RemoveToken(" "));
187 }
188
189 // remove the _ character from each of the token ranges
190 // we will often end up with an empty token (i.e. ["-XX", "_"] -> ["-XX", ""]
191 // and this is OK because we still need an empty token to simplify
192 // range comparisons
193 simple_names_.clear();
194
195 for (auto&& tokenized_name : tokenized_names_) {
196 simple_names_.push_back(tokenized_name.RemoveCharacter('_'));
197 }
198 }
199
200 if (token_count != 0) {
201 assert(("Every argument descriptor string must have equal amount of tokens (spaces)" &&
202 token_count == names_.size()));
203 }
204
205 if (blank_count != 0) {
206 assert(("Every argument descriptor string must have an equal amount of blanks (_)" &&
207 blank_count == names_.size()));
208 }
209
210 using_blanks_ = blank_count > 0;
211 {
212 size_t smallest_name_token_range_size =
213 std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), ~(0u),
214 [](size_t min, const TokenRange& cur) {
215 return std::min(min, cur.Size());
216 });
217 size_t largest_name_token_range_size =
218 std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), 0u,
219 [](size_t max, const TokenRange& cur) {
220 return std::max(max, cur.Size());
221 });
222
223 token_range_size_ = std::make_pair(smallest_name_token_range_size,
224 largest_name_token_range_size);
225 }
226
227 if (has_value_list_) {
228 assert(names_.size() == value_list_.size()
229 && "Number of arg descriptors must match number of values");
230 assert(!has_value_map_);
231 }
232 if (has_value_map_) {
233 if (!using_blanks_) {
234 assert(names_.size() == value_map_.size() &&
235 "Since no blanks were specified, each arg is mapped directly into a mapped "
236 "value without parsing; sizes must match");
237 }
238
239 assert(!has_value_list_);
240 }
241
242 if (!using_blanks_ && !CmdlineType<TArg>::kCanParseBlankless) {
243 assert((has_value_map_ || has_value_list_) &&
244 "Arguments without a blank (_) must provide either a value map or a value list");
245 }
246
247 TypedCheck();
248 }
249
250 // List of aliases for a single argument definition, e.g. {"-Xdex2oat", "-Xnodex2oat"}.
251 std::vector<const char*> names_;
252 // Is there at least 1 wildcard '_' in the argument definition?
253 bool using_blanks_ = false;
254 // [min, max] token counts in each arg def
255 std::pair<size_t, size_t> token_range_size_;
256
257 // contains all the names in a tokenized form, i.e. as a space-delimited list
258 std::vector<TokenRange> tokenized_names_;
259
260 // contains the tokenized names, but with the _ character stripped
261 std::vector<TokenRange> simple_names_;
262
263 // For argument definitions created with '.AppendValues()'
264 // Meaning that parsing should mutate the existing value in-place if possible.
265 bool appending_values_ = false;
266
267 // For argument definitions created with '.WithRange(min, max)'
268 bool has_range_ = false;
269 TArg min_;
270 TArg max_;
271
272 // For argument definitions created with '.WithValueMap'
273 bool has_value_map_ = false;
274 std::vector<std::pair<const char*, TArg>> value_map_;
275
276 // For argument definitions created with '.WithValues'
277 bool has_value_list_ = false;
278 std::vector<TArg> value_list_;
279
280 // Make sure there's a default constructor.
281 CmdlineParserArgumentInfo() = default;
282
283 // Ensure there's a default move constructor.
284 CmdlineParserArgumentInfo(CmdlineParserArgumentInfo&&) = default;
285
286 private:
287 // Perform type-specific checks at runtime.
288 template <typename T = TArg>
289 void TypedCheck(typename std::enable_if<std::is_same<Unit, T>::value>::type* = 0) {
290 assert(!using_blanks_ &&
291 "Blanks are not supported in Unit arguments; since a Unit has no parse-able value");
292 }
293
294 void TypedCheck() {}
295
296 bool is_completed_ = false;
297 };
298
299 // A virtual-implementation of the necessary argument information in order to
300 // be able to parse arguments.
301 template <typename TArg>
302 struct CmdlineParseArgument : CmdlineParseArgumentAny {
Roland Levillain3887c462015-08-12 18:15:42 +0100303 CmdlineParseArgument(CmdlineParserArgumentInfo<TArg>&& argument_info,
304 std::function<void(TArg&)>&& save_argument,
305 std::function<TArg&(void)>&& load_argument)
Igor Murashkinaaebaa02015-01-26 10:55:53 -0800306 : argument_info_(std::forward<decltype(argument_info)>(argument_info)),
307 save_argument_(std::forward<decltype(save_argument)>(save_argument)),
308 load_argument_(std::forward<decltype(load_argument)>(load_argument)) {
309 }
310
311 using UserTypeInfo = CmdlineType<TArg>;
312
313 virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) {
314 assert(arguments.Size() > 0);
315 assert(consumed_tokens != nullptr);
316
317 auto closest_match_res = argument_info_.FindClosestMatch(arguments);
318 size_t best_match_size = closest_match_res.second;
319 const TokenRange* best_match_arg_def = closest_match_res.first;
320
321 if (best_match_size > arguments.Size()) {
322 // The best match has more tokens than were provided.
323 // Shouldn't happen in practice since the outer parser does this check.
324 return CmdlineResult(CmdlineResult::kUnknown, "Size mismatch");
325 }
326
327 assert(best_match_arg_def != nullptr);
328 *consumed_tokens = best_match_arg_def->Size();
329
330 if (!argument_info_.using_blanks_) {
331 return ParseArgumentSingle(arguments.Join(' '));
332 }
333
334 // Extract out the blank value from arguments
335 // e.g. for a def of "foo:_" and input "foo:bar", blank_value == "bar"
336 std::string blank_value = "";
337 size_t idx = 0;
338 for (auto&& def_token : *best_match_arg_def) {
339 auto&& arg_token = arguments[idx];
340
341 // Does this definition-token have a wildcard in it?
342 if (def_token.find('_') == std::string::npos) {
343 // No, regular token. Match 1:1 against the argument token.
344 bool token_match = def_token == arg_token;
345
346 if (!token_match) {
347 return CmdlineResult(CmdlineResult::kFailure,
348 std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
349 + " at token " + std::to_string(idx));
350 }
351 } else {
352 // This is a wild-carded token.
353 TokenRange def_split_wildcards = TokenRange::Split(def_token, {'_'});
354
355 // Extract the wildcard contents out of the user-provided arg_token.
356 std::unique_ptr<TokenRange> arg_matches =
357 def_split_wildcards.MatchSubstrings(arg_token, "_");
358 if (arg_matches == nullptr) {
359 return CmdlineResult(CmdlineResult::kFailure,
360 std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
361 + ", with a wildcard pattern " + def_token
362 + " at token " + std::to_string(idx));
363 }
364
365 // Get the corresponding wildcard tokens from arg_matches,
366 // and concatenate it to blank_value.
367 for (size_t sub_idx = 0;
368 sub_idx < def_split_wildcards.Size() && sub_idx < arg_matches->Size(); ++sub_idx) {
369 if (def_split_wildcards[sub_idx] == "_") {
370 blank_value += arg_matches->GetToken(sub_idx);
371 }
372 }
373 }
374
375 ++idx;
376 }
377
378 return ParseArgumentSingle(blank_value);
379 }
380
381 private:
382 virtual CmdlineResult ParseArgumentSingle(const std::string& argument) {
383 // TODO: refactor to use LookupValue for the value lists/maps
384
385 // Handle the 'WithValueMap(...)' argument definition
386 if (argument_info_.has_value_map_) {
387 for (auto&& value_pair : argument_info_.value_map_) {
388 const char* name = value_pair.first;
389
390 if (argument == name) {
391 return SaveArgument(value_pair.second);
392 }
393 }
394
395 // Error case: Fail, telling the user what the allowed values were.
396 std::vector<std::string> allowed_values;
397 for (auto&& value_pair : argument_info_.value_map_) {
398 const char* name = value_pair.first;
399 allowed_values.push_back(name);
400 }
401
402 std::string allowed_values_flat = Join(allowed_values, ',');
403 return CmdlineResult(CmdlineResult::kFailure,
404 "Argument value '" + argument + "' does not match any of known valid"
405 "values: {" + allowed_values_flat + "}");
406 }
407
408 // Handle the 'WithValues(...)' argument definition
409 if (argument_info_.has_value_list_) {
410 size_t arg_def_idx = 0;
411 for (auto&& value : argument_info_.value_list_) {
412 auto&& arg_def_token = argument_info_.names_[arg_def_idx];
413
414 if (arg_def_token == argument) {
415 return SaveArgument(value);
416 }
417 ++arg_def_idx;
418 }
419
420 assert(arg_def_idx + 1 == argument_info_.value_list_.size() &&
421 "Number of named argument definitions must match number of values defined");
422
423 // Error case: Fail, telling the user what the allowed values were.
424 std::vector<std::string> allowed_values;
425 for (auto&& arg_name : argument_info_.names_) {
426 allowed_values.push_back(arg_name);
427 }
428
429 std::string allowed_values_flat = Join(allowed_values, ',');
430 return CmdlineResult(CmdlineResult::kFailure,
431 "Argument value '" + argument + "' does not match any of known valid"
432 "values: {" + allowed_values_flat + "}");
433 }
434
435 // Handle the regular case where we parsed an unknown value from a blank.
436 UserTypeInfo type_parser;
437
438 if (argument_info_.appending_values_) {
439 TArg& existing = load_argument_();
440 CmdlineParseResult<TArg> result = type_parser.ParseAndAppend(argument, existing);
441
442 assert(!argument_info_.has_range_);
443
444 return result;
445 }
446
447 CmdlineParseResult<TArg> result = type_parser.Parse(argument);
448
449 if (result.IsSuccess()) {
450 TArg& value = result.GetValue();
451
452 // Do a range check for 'WithRange(min,max)' argument definition.
453 if (!argument_info_.CheckRange(value)) {
454 return CmdlineParseResult<TArg>::OutOfRange(
455 value, argument_info_.min_, argument_info_.max_);
456 }
457
458 return SaveArgument(value);
459 }
460
461 // Some kind of type-specific parse error. Pass the result as-is.
462 CmdlineResult raw_result = std::move(result);
463 return raw_result;
464 }
465
466 public:
467 virtual const char* GetTypeName() const {
468 // TODO: Obviate the need for each type specialization to hardcode the type name
469 return UserTypeInfo::Name();
470 }
471
472 // How many tokens should be taken off argv for parsing this argument.
473 // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
474 //
475 // A [min,max] range is returned to represent argument definitions with multiple
476 // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
477 virtual std::pair<size_t, size_t> GetNumTokens() const {
478 return argument_info_.token_range_size_;
479 }
480
481 // See if this token range might begin the same as the argument definition.
482 virtual size_t MaybeMatches(const TokenRange& tokens) {
483 return argument_info_.MaybeMatches(tokens);
484 }
485
486 private:
487 CmdlineResult SaveArgument(const TArg& value) {
488 assert(!argument_info_.appending_values_
489 && "If the values are being appended, then the updated parse value is "
490 "updated by-ref as a side effect and shouldn't be stored directly");
491 TArg val = value;
492 save_argument_(val);
493 return CmdlineResult(CmdlineResult::kSuccess);
494 }
495
496 CmdlineParserArgumentInfo<TArg> argument_info_;
497 std::function<void(TArg&)> save_argument_;
498 std::function<TArg&(void)> load_argument_;
499 };
Roland Levillain7cbd27f2016-08-11 23:53:33 +0100500 } // namespace detail // NOLINT [readability/namespace] [5]
Igor Murashkinaaebaa02015-01-26 10:55:53 -0800501} // namespace art
502
503#endif // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_