From 286ae28e90fb8a8c65ce5361f04f9fbfd597bdd4 Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Mon, 22 May 2017 15:29:41 -0700 Subject: [PATCH 01/13] Use and modify defaults from flay instead of redundantly specifying everything. --- lib/cc/engine/analyzers/reporter.rb | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/lib/cc/engine/analyzers/reporter.rb b/lib/cc/engine/analyzers/reporter.rb index 2c755e4c..3e0f3fcf 100644 --- a/lib/cc/engine/analyzers/reporter.rb +++ b/lib/cc/engine/analyzers/reporter.rb @@ -10,8 +10,6 @@ module CC module Engine module Analyzers class Reporter - TIMEOUT = 10 - def initialize(engine_config, language_strategy, io) @engine_config = engine_config @language_strategy = language_strategy @@ -87,17 +85,11 @@ def new_violations(issue) end def flay_options - { - diff: false, + changes = { mass: language_strategy.mass_threshold, - summary: false, - verbose: false, - number: true, - timeout: TIMEOUT, - liberal: false, - fuzzy: false, - only: nil, } + + CCFlay.default_options.merge changes end def debug(message) From dd7866a78b409b2d7c29a3c2e5f6475948abe882 Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Mon, 22 May 2017 15:31:57 -0700 Subject: [PATCH 02/13] Update flay and remove code pushed upstream. This code also adds a bunch of node names to Sexp::NODE_NAMES for JS and PHP. It is probably not exhaustive. --- Gemfile | 2 +- Gemfile.lock | 8 ++-- lib/ccflay.rb | 123 +++++++++++++++++++------------------------------- 3 files changed, 51 insertions(+), 82 deletions(-) diff --git a/Gemfile b/Gemfile index 50173fc8..88336385 100644 --- a/Gemfile +++ b/Gemfile @@ -2,7 +2,7 @@ source "https://rubygems.org" gem "concurrent-ruby", "~> 1.0.0" -gem "flay" +gem "flay", "~> 2.9" gem "json" group :test do diff --git a/Gemfile.lock b/Gemfile.lock index 1ce5a7d4..dd35e9ac 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -5,7 +5,7 @@ GEM concurrent-ruby (1.0.0) diff-lcs (1.2.5) erubis (2.7.0) - flay (2.8.1) + flay (2.9.0) erubis (~> 2.7.0) path_expander (~> 1.0) ruby_parser (~> 3.0) @@ -31,9 +31,9 @@ GEM diff-lcs (>= 1.2.0, < 2.0) rspec-support (~> 3.3.0) rspec-support (3.3.0) - ruby_parser (3.8.4) + ruby_parser (3.9.0) sexp_processor (~> 4.1) - sexp_processor (4.8.0) + sexp_processor (4.9.0) slop (3.6.0) PLATFORMS @@ -41,7 +41,7 @@ PLATFORMS DEPENDENCIES concurrent-ruby (~> 1.0.0) - flay + flay (~> 2.9) json pry rake diff --git a/lib/ccflay.rb b/lib/ccflay.rb index bdd62416..02a414d0 100644 --- a/lib/ccflay.rb +++ b/lib/ccflay.rb @@ -18,71 +18,54 @@ def initialize(option = nil) self.identical = Concurrent::Hash.new self.masses = Concurrent::Hash.new end - - ## - # Calculate the structural hash for this sexp. Cached, so don't - # modify the sexp afterwards and expect it to be correct. - - if ENV["PURE_HASH"] - def structural_hash - @structural_hash ||= pure_ruby_hash - end - else - def structural_hash - @structural_hash ||= Digest::MD5.hexdigest(structure.to_s) - end - end end -# TODO: move this to flay proper... it's benchmarking faster -class Sexp # straight from flay-persistent - names = %w[alias and arglist args array attrasgn attrset back_ref - begin block block_pass break call case cdecl class colon2 - colon3 const cvar cvasgn cvdecl defined defn defs dot2 - dot3 dregx dregx_once dstr dsym dxstr ensure evstr false - flip2 flip3 for gasgn gvar hash iasgn if iter ivar lasgn - lit lvar masgn match match2 match3 module next nil not - nth_ref op_asgn op_asgn1 op_asgn2 op_asgn_and op_asgn_or or - postexe redo resbody rescue retry return sclass self - splat str super svalue to_ary true undef until valias - when while xstr yield zsuper kwarg kwsplat safe_call] - - ## - # All ruby_parser nodes in an index hash. Used by jenkins algorithm. - - NODE_NAMES = Hash[names.each_with_index.map { |n, i| [n.to_sym, i] }] - - NODE_NAMES.default_proc = lambda { |h, k| - $stderr.puts "ERROR: couldn't find node type #{k} in Sexp::NODE_NAMES." - h[k] = NODE_NAMES.size - } - - MAX_INT32 = 2**32 - 1 # :nodoc: - - def pure_ruby_hash # :nodoc: see above - hash = 0 - - n = NODE_NAMES.fetch first - - raise "Bad lookup: #{first} in #{sexp.inspect}" unless n - - hash += n & MAX_INT32 - hash += hash << 10 & MAX_INT32 - hash ^= hash >> 6 & MAX_INT32 - - each do |o| - next unless o.is_a? Sexp - hash = hash + o.pure_ruby_hash & MAX_INT32 - hash = (hash + (hash << 10)) & MAX_INT32 - hash = (hash ^ (hash >> 6)) & MAX_INT32 - end - - hash = (hash + (hash << 3)) & MAX_INT32 - hash = (hash ^ (hash >> 11)) & MAX_INT32 - hash = (hash + (hash << 15)) & MAX_INT32 - - hash - end +new_nodes = [ + :And, :ArrayExpression, :ArrowFunctionExpression, + :Assign, :AssignmentExpression, :AssignmentPattern, + :Attribute, :BinaryExpression, :BlockStatement, :BoolOp, + :BooleanLiteral, :Break, :BreakStatement, :Call, + :CallExpression, :CatchClause, :ClassBody, + :ClassDeclaration, :ClassMethod, :Compare, + :ConditionalExpression, :Continue, :ContinueStatement, + :Dict, :Directive, :DirectiveLiteral, :DirectiveLiteral, + :DoWhileStatement, :EmptyStatement, :Eq, :ExceptHandler, + :ExportDefaultDeclaration, :ExportNamedDeclaration, + :ExportSpecifier, :Expr, :ExpressionStatement, :For, + :ForInStatement, :ForStatement, :FunctionDeclaration, + :FunctionDef, :FunctionExpression, :Gt, :Identifier, :If, + :IfExp, :IfStatement, :Import, :ImportDeclaration, + :ImportDefaultSpecifier, :ImportFrom, :ImportSpecifier, + :Index, :LabeledStatement, :LogicalExpression, :LtE, + :MemberExpression, :Name, :NewExpression, :NotIn, + :NullLiteral, :Num, :NumericLiteral, :ObjectExpression, + :ObjectMethod, :ObjectPattern, :ObjectProperty, :Or, + :Print, :RegExpLiteral, :ReturnStatement, + :SequenceExpression, :Slice, :Str, :StringLiteral, + :Subscript, :Super, :SwitchCase, :SwitchStatement, + :TaggedTemplateExpression, :TemplateElement, + :TemplateLiteral, :ThisExpression, :ThrowStatement, + :TryExcept, :TryStatement, :Tuple, :UnaryExpression, + :UpdateExpression, :VariableDeclaration, + :VariableDeclarator, :WhileStatement, :Yield, :alternate, + :argument, :arguments, :array_dim_fetch, :assign, + :assign_op_minus, :binary_op_bitwise_and, + :binary_op_bitwise_or, :binary_op_concat, + :binary_op_shift_right, :binary_op_smaller_or_equal, + :body, :callee, :cases, :comparators, :consequent, + :declaration, :declarations, :directives, :elements, + :elts, :exp, :expression, :expressions, :extra, + :finalizer, :foreach, :func_call, :function, :id, :init, + :init, :key, :keyword, :left, :list, :lnumber, :name, + :object, :param, :params, :properties, :property, + :quasis, :right, :specifiers, :string, :superClass, + :target, :test, :update, :value, :values, + :variable + ] + +# Add known javascript and php nodes to the hash registry. +new_nodes.each do |name| + Sexp::NODE_NAMES[name] = Sexp::NODE_NAMES.size end class Sexp @@ -108,17 +91,3 @@ def flatter end end end - -class Sexp # TODO: push this back to flay - alias old_mass mass - - def mass - @mass ||= inject(1) do |t, s| - if s.is_a?(Sexp) - t + s.mass - else - t - end - end - end -end From 97d98fde91db7573f82e9ebc77e74ac27b2d5dee Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Mon, 22 May 2017 15:59:51 -0700 Subject: [PATCH 03/13] Fixed in latest ruby_parser: * Fixed bug setting line number for hash literals to line of opening brace. Which changes line numbers in the tests here. --- spec/cc/engine/analyzers/sexp_lines_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/cc/engine/analyzers/sexp_lines_spec.rb b/spec/cc/engine/analyzers/sexp_lines_spec.rb index 45091c81..75d8a4df 100644 --- a/spec/cc/engine/analyzers/sexp_lines_spec.rb +++ b/spec/cc/engine/analyzers/sexp_lines_spec.rb @@ -49,8 +49,8 @@ module CC::Engine::Analyzers expect(locations.count).to eq 2 - expect([locations[0].begin_line, locations[0].end_line]).to eq([2, 7]) - expect([locations[1].begin_line, locations[1].end_line]).to eq([11, 16]) + expect([locations[0].begin_line, locations[0].end_line]).to eq([1, 7]) + expect([locations[1].begin_line, locations[1].end_line]).to eq([10, 16]) end end From 3df53174cdb9b94e63130bce69101aef1855cf53 Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Mon, 22 May 2017 16:16:36 -0700 Subject: [PATCH 04/13] Minor refactoring: extract `self.class::LANGUAGE` to `language` method. --- lib/cc/engine/analyzers/analyzer_base.rb | 10 +++++++--- spec/cc/engine/analyzers/analyzer_base_spec.rb | 4 ++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/lib/cc/engine/analyzers/analyzer_base.rb b/lib/cc/engine/analyzers/analyzer_base.rb index 0340c756..784e06d9 100644 --- a/lib/cc/engine/analyzers/analyzer_base.rb +++ b/lib/cc/engine/analyzers/analyzer_base.rb @@ -40,12 +40,16 @@ def files file_list.files end + def language + self.class::LANGUAGE + end + def mass_threshold - engine_config.mass_threshold_for(self.class::LANGUAGE) || self.class::DEFAULT_MASS_THRESHOLD + engine_config.mass_threshold_for(language) || self.class::DEFAULT_MASS_THRESHOLD end def count_threshold - engine_config.count_threshold_for(self.class::LANGUAGE) + engine_config.count_threshold_for(language) end def calculate_points(mass) @@ -77,7 +81,7 @@ def file_list @_file_list ||= ::CC::Engine::Analyzers::FileList.new( engine_config: engine_config, patterns: engine_config.patterns_for( - self.class::LANGUAGE, + language, self.class::PATTERNS, ), ) diff --git a/spec/cc/engine/analyzers/analyzer_base_spec.rb b/spec/cc/engine/analyzers/analyzer_base_spec.rb index 8a6344ca..b32f615f 100644 --- a/spec/cc/engine/analyzers/analyzer_base_spec.rb +++ b/spec/cc/engine/analyzers/analyzer_base_spec.rb @@ -28,6 +28,10 @@ class DummyAnalyzer < Base expect(analyzer.files).to match_array(['./foo.a', './foo.b']) end + it "knows what language it is analyzing" do + expect(analyzer.language).to eq("dummy") + end + context "with custom patterns" do let(:engine_config) do EngineConfig.new({ From d037e5df529e6944a21d490d1eeac5c3cf31bbd8 Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Fri, 26 May 2017 13:49:32 -0700 Subject: [PATCH 05/13] Changed strategy wrt parsing and timeouts. By observation, either an external parser is going to finish quickly or it probably isn't going to finish at all (at least within the total run timeout thresholds). Currently the timeout is 300s (5 minutes) out of a total run timeout of 900s (15m). That means 3 large files will kill off an entire project run. I've found that even rather javascript large files all parse within 2 seconds and those that don't don't parse within 5 minutes. Dropping the thresholds to 10s seems safe and will allow more projects to finish analyzing to report SOMETHING. Besides, all the files I've seen time out were vendored versions of react/ember and the like. They're huge and they're NOT actually part of the project. Losing them quickly will not reduce the quality of analysis. --- lib/cc/engine/analyzers/javascript/main.rb | 3 ++- lib/cc/engine/analyzers/javascript/parser.rb | 6 +++++- lib/cc/engine/analyzers/php/main.rb | 6 ++---- lib/cc/engine/analyzers/php/parser.rb | 7 ++++++- lib/cc/engine/analyzers/ruby/main.rb | 2 +- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/cc/engine/analyzers/javascript/main.rb b/lib/cc/engine/analyzers/javascript/main.rb index 30505d35..c69b3fa4 100644 --- a/lib/cc/engine/analyzers/javascript/main.rb +++ b/lib/cc/engine/analyzers/javascript/main.rb @@ -28,7 +28,8 @@ def transform_sexp(sexp) private def process_file(path) - Node.new(js_parser.new(File.read(path), path).parse.syntax_tree, path).format + ast = js_parser.new(File.read(path), path).parse + Node.new(ast.syntax_tree, path).format if ast end def js_parser diff --git a/lib/cc/engine/analyzers/javascript/parser.rb b/lib/cc/engine/analyzers/javascript/parser.rb index 7ab71664..f544041f 100644 --- a/lib/cc/engine/analyzers/javascript/parser.rb +++ b/lib/cc/engine/analyzers/javascript/parser.rb @@ -7,6 +7,8 @@ module Engine module Analyzers module Javascript class Parser < ParserBase + TIMEOUT = 10 + attr_reader :code, :filename, :syntax_tree def initialize(code, filename) @@ -15,12 +17,14 @@ def initialize(code, filename) end def parse - runner = CommandLineRunner.new(js_command) + runner = CommandLineRunner.new(js_command, TIMEOUT) runner.run(strip_shebang(code)) do |ast| @syntax_tree = parse_json(ast) end self + rescue Timeout::Error + warn "TIMEOUT parsing #{filename}. Skipping." end private diff --git a/lib/cc/engine/analyzers/php/main.rb b/lib/cc/engine/analyzers/php/main.rb index 3ed01886..f4c73509 100644 --- a/lib/cc/engine/analyzers/php/main.rb +++ b/lib/cc/engine/analyzers/php/main.rb @@ -27,10 +27,8 @@ def transform_sexp(sexp) def process_file(path) code = File.binread(path) - parser = php_parser.new(code, path).parse - syntax_tree = parser.syntax_tree - - syntax_tree&.to_sexp + ast = php_parser.new(code, path).parse + ast.syntax_tree&.to_sexp if ast end def php_parser diff --git a/lib/cc/engine/analyzers/php/parser.rb b/lib/cc/engine/analyzers/php/parser.rb index 2d6b9f87..c9fdc3e3 100644 --- a/lib/cc/engine/analyzers/php/parser.rb +++ b/lib/cc/engine/analyzers/php/parser.rb @@ -10,6 +10,8 @@ module Engine module Analyzers module Php class Parser < ParserBase + TIMEOUT = 10 + attr_reader :code, :filename, :syntax_tree def initialize(code, filename) @@ -18,7 +20,8 @@ def initialize(code, filename) end def parse - runner = CommandLineRunner.new("php -d 'display_errors = Off' #{parser_path}") + cmd = "php -d 'display_errors = Off' #{parser_path}" + runner = CommandLineRunner.new(cmd, TIMEOUT) runner.run(code) do |output| json = parse_json(output) @@ -29,6 +32,8 @@ def parse end self + rescue Timeout::Error + warn "TIMEOUT parsing #{filename}. Skipping." end private diff --git a/lib/cc/engine/analyzers/ruby/main.rb b/lib/cc/engine/analyzers/ruby/main.rb index ce6951ef..c8d67695 100644 --- a/lib/cc/engine/analyzers/ruby/main.rb +++ b/lib/cc/engine/analyzers/ruby/main.rb @@ -20,7 +20,7 @@ class Main < CC::Engine::Analyzers::Base ].freeze DEFAULT_MASS_THRESHOLD = 18 POINTS_PER_OVERAGE = 100_000 - TIMEOUT = 300 + TIMEOUT = 30 private From 12cf1be7885abfb514103534c478a4d88b927e19 Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Fri, 26 May 2017 16:33:39 -0700 Subject: [PATCH 06/13] Updated node additions list. I should probably make these 1 column and put it at the end of the file for maintainability. --- lib/ccflay.rb | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/lib/ccflay.rb b/lib/ccflay.rb index 02a414d0..ecad0f3b 100644 --- a/lib/ccflay.rb +++ b/lib/ccflay.rb @@ -26,17 +26,21 @@ def initialize(option = nil) :Attribute, :BinaryExpression, :BlockStatement, :BoolOp, :BooleanLiteral, :Break, :BreakStatement, :Call, :CallExpression, :CatchClause, :ClassBody, - :ClassDeclaration, :ClassMethod, :Compare, - :ConditionalExpression, :Continue, :ContinueStatement, - :Dict, :Directive, :DirectiveLiteral, :DirectiveLiteral, - :DoWhileStatement, :EmptyStatement, :Eq, :ExceptHandler, - :ExportDefaultDeclaration, :ExportNamedDeclaration, - :ExportSpecifier, :Expr, :ExpressionStatement, :For, - :ForInStatement, :ForStatement, :FunctionDeclaration, - :FunctionDef, :FunctionExpression, :Gt, :Identifier, :If, - :IfExp, :IfStatement, :Import, :ImportDeclaration, + :ClassDeclaration, :ClassMethod, :ClassProperty, + :Compare, :ConditionalExpression, :Continue, + :ContinueStatement, :Dict, :Directive, :DirectiveLiteral, + :DirectiveLiteral, :DoWhileStatement, :EmptyStatement, + :Eq, :ExceptHandler, :ExportDefaultDeclaration, + :ExportNamedDeclaration, :ExportSpecifier, :Expr, + :ExpressionStatement, :For, :ForInStatement, + :ForStatement, :FunctionDeclaration, :FunctionDef, + :FunctionExpression, :Gt, :Identifier, :If, :IfExp, + :IfStatement, :Import, :ImportDeclaration, :ImportDefaultSpecifier, :ImportFrom, :ImportSpecifier, - :Index, :LabeledStatement, :LogicalExpression, :LtE, + :Index, :JSXAttribute, :JSXClosingElement, :JSXElement, + :JSXExpressionContainer, :JSXIdentifier, + :JSXOpeningElement, :JSXSpreadAttribute, :JSXText, + :LabeledStatement, :LogicalExpression, :LtE, :MemberExpression, :Name, :NewExpression, :NotIn, :NullLiteral, :Num, :NumericLiteral, :ObjectExpression, :ObjectMethod, :ObjectPattern, :ObjectProperty, :Or, @@ -49,18 +53,17 @@ def initialize(option = nil) :UpdateExpression, :VariableDeclaration, :VariableDeclarator, :WhileStatement, :Yield, :alternate, :argument, :arguments, :array_dim_fetch, :assign, - :assign_op_minus, :binary_op_bitwise_and, + :assign_op_minus, :attributes, :binary_op_bitwise_and, :binary_op_bitwise_or, :binary_op_concat, :binary_op_shift_right, :binary_op_smaller_or_equal, - :body, :callee, :cases, :comparators, :consequent, - :declaration, :declarations, :directives, :elements, - :elts, :exp, :expression, :expressions, :extra, - :finalizer, :foreach, :func_call, :function, :id, :init, - :init, :key, :keyword, :left, :list, :lnumber, :name, - :object, :param, :params, :properties, :property, + :body, :callee, :cases, :children, :comparators, + :consequent, :declaration, :declarations, :directives, + :elements, :elts, :exp, :expression, :expressions, + :extra, :finalizer, :foreach, :func_call, :function, :id, + :init, :init, :key, :keyword, :left, :list, :lnumber, + :name, :object, :param, :params, :properties, :property, :quasis, :right, :specifiers, :string, :superClass, - :target, :test, :update, :value, :values, - :variable + :target, :test, :update, :value, :values, :variable, ] # Add known javascript and php nodes to the hash registry. From 69247ef53fb7d830069dff598de0f3544c3185a8 Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Fri, 26 May 2017 16:36:57 -0700 Subject: [PATCH 07/13] Added filtering to config & analysis. So now a .codeclimate.yml file can have an entry like: - ruby: filters: - "(defn, [m /^test_/], _, ___)" and that will filter out all methods whose names start with "test_". This requires releases of flay and sexp_processor to pass tests. Pushing now to get eyeballs on. --- lib/cc/engine/analyzers/analyzer_base.rb | 4 ++ lib/cc/engine/analyzers/engine_config.rb | 6 +++ lib/cc/engine/analyzers/reporter.rb | 1 + .../cc/engine/analyzers/engine_config_spec.rb | 41 +++++++++++++++++++ 4 files changed, 52 insertions(+) diff --git a/lib/cc/engine/analyzers/analyzer_base.rb b/lib/cc/engine/analyzers/analyzer_base.rb index 784e06d9..1f38ed1d 100644 --- a/lib/cc/engine/analyzers/analyzer_base.rb +++ b/lib/cc/engine/analyzers/analyzer_base.rb @@ -40,6 +40,10 @@ def files file_list.files end + def filters + engine_config.filters_for(language) + end + def language self.class::LANGUAGE end diff --git a/lib/cc/engine/analyzers/engine_config.rb b/lib/cc/engine/analyzers/engine_config.rb index d46f864c..81ac03f6 100644 --- a/lib/cc/engine/analyzers/engine_config.rb +++ b/lib/cc/engine/analyzers/engine_config.rb @@ -27,6 +27,12 @@ def concurrency config.fetch("config", {}).fetch("concurrency", 2).to_i end + def filters_for(language) + fetch_language(language).fetch("filters", []).map { |s| + Sexp::Matcher.parse s + } + end + def mass_threshold_for(language) threshold = fetch_language(language).fetch("mass_threshold", nil) diff --git a/lib/cc/engine/analyzers/reporter.rb b/lib/cc/engine/analyzers/reporter.rb index 3e0f3fcf..53d83eb3 100644 --- a/lib/cc/engine/analyzers/reporter.rb +++ b/lib/cc/engine/analyzers/reporter.rb @@ -87,6 +87,7 @@ def new_violations(issue) def flay_options changes = { mass: language_strategy.mass_threshold, + filters: language_strategy.filters, } CCFlay.default_options.merge changes diff --git a/spec/cc/engine/analyzers/engine_config_spec.rb b/spec/cc/engine/analyzers/engine_config_spec.rb index e94cefd9..305764e1 100644 --- a/spec/cc/engine/analyzers/engine_config_spec.rb +++ b/spec/cc/engine/analyzers/engine_config_spec.rb @@ -1,6 +1,7 @@ require "spec_helper" require "cc/engine/analyzers/engine_config" require "cc/engine/analyzers/ruby/main" +require "sexp" # to build matchers for filtering RSpec.describe CC::Engine::Analyzers::EngineConfig do describe "#config" do @@ -106,6 +107,16 @@ expect(engine_config.mass_threshold_for("ruby")).to be_nil end + + it "returns nil when language is empty via array" do + engine_config = CC::Engine::Analyzers::EngineConfig.new({ + "config" => { + "languages" => %w[ruby], + }, + }) + + expect(engine_config.mass_threshold_for("ruby")).to be_nil + end end describe "count_threshold_for" do @@ -149,6 +160,36 @@ end end + describe "filters_for" do + it "returns configured filter for language" do + engine_config = CC::Engine::Analyzers::EngineConfig.new({ + "config" => { + "languages" => { + "ruby" => { + "filters" => ["(defn [m /^test_/] _ ___)"], + }, + }, + }, + }) + + exp = [s{ s(:defn, m(/^test_/), _, ___) }] + + expect(engine_config.filters_for("ruby")).to eq(exp) + end + + it "returns default value when language value is empty" do + engine_config = CC::Engine::Analyzers::EngineConfig.new({ + "config" => { + "languages" => { + "ruby" => "", + }, + }, + }) + + expect(engine_config.filters_for("ruby")).to eq([]) + end + end + describe "include_paths" do it "returns given include paths" do engine_config = CC::Engine::Analyzers::EngineConfig.new({ From d11e37fca8add47a8b31611b3ee29deb2ed9e5d1 Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Wed, 14 Jun 2017 15:10:36 -0700 Subject: [PATCH 08/13] Added a default timeout of 10 seconds to python parsing. AFAIK, I have NO projects to test against from the testing group. So I don't know if that value is too small. I suspect it'll be on par with my data from javascript and PHP tho (ie, if it can't parse in 10s, it can't parse in 5m). --- lib/cc/engine/analyzers/python/parser.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/cc/engine/analyzers/python/parser.rb b/lib/cc/engine/analyzers/python/parser.rb index f9033ccd..81efd407 100644 --- a/lib/cc/engine/analyzers/python/parser.rb +++ b/lib/cc/engine/analyzers/python/parser.rb @@ -9,6 +9,7 @@ module Engine module Analyzers module Python class Parser < ParserBase + TIMEOUT = 10 attr_reader :code, :filename, :syntax_tree def initialize(python_version, code, filename) @@ -18,7 +19,7 @@ def initialize(python_version, code, filename) end def parse - runner = CommandLineRunner.new(python_command) + runner = CommandLineRunner.new(python_command, TIMEOUT) runner.run(code) do |ast| @syntax_tree = parse_json(ast) end From 3f2ae1f0a1238e9fd0cbe8cb6b74b5cc7aed3d8e Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Wed, 14 Jun 2017 15:12:49 -0700 Subject: [PATCH 09/13] Added pattern-based filtering patch for Flay. Includes a failing spec from: https://github.com/codeclimate/codeclimate-duplication/issues/188 made to pass by filtering "(hash ___)" (for now?). --- lib/ccflay.rb | 17 ++++++++ spec/cc/engine/analyzers/ruby/main_spec.rb | 51 ++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/lib/ccflay.rb b/lib/ccflay.rb index ecad0f3b..a50ca9e5 100644 --- a/lib/ccflay.rb +++ b/lib/ccflay.rb @@ -18,6 +18,23 @@ def initialize(option = nil) self.identical = Concurrent::Hash.new self.masses = Concurrent::Hash.new end + + def filter *patterns + return if patterns.empty? + + self.hashes.delete_if { |_, sexps| + sexps.any? { |sexp| + patterns.any? { |pattern| + pattern =~ sexp + } + } + } + end + + def prune + super + self.filter(*option[:filters]) + end end new_nodes = [ diff --git a/spec/cc/engine/analyzers/ruby/main_spec.rb b/spec/cc/engine/analyzers/ruby/main_spec.rb index 62ed1cd3..9eeb3dc4 100644 --- a/spec/cc/engine/analyzers/ruby/main_spec.rb +++ b/spec/cc/engine/analyzers/ruby/main_spec.rb @@ -149,6 +149,44 @@ def self.from_remediation_amount(amount) expect(run_engine(engine_conf)).to eq("") }.to output(/Skipping file/).to_stderr end + + it "does not see hashes as similar" do + create_source_file("foo.rb", <<-EORUBY) + ANIMALS = { + bat: "Bat", + bee: "Bee", + cat: "Cat", + cow: "Cow", + dog: "Dog", + fly: "Fly", + human: "Human", + lizard: "Lizard", + owl: "Owl", + ringworm: "Ringworm", + salmon: "Salmon", + whale: "Whale", + }.freeze + + TRANSPORT = { + airplane: "Airplane", + bicycle: "Bicycle", + bus: "Bus", + car: "Car", + escalator: "Escalator", + helicopter: "Helicopter", + lift: "Lift", + motorcycle: "Motorcycle", + rocket: "Rocket", + scooter: "Scooter", + skateboard: "Skateboard", + truck: "Truck", + }.freeze + EORUBY + + issues = run_engine(filtered_engine_conf("(hash ___)")).strip.split("\0") + + expect(issues.length).to eq(0) + end end describe "#calculate_points(mass)" do @@ -197,5 +235,18 @@ def self.from_remediation_amount(amount) def engine_conf EngineConfig.new({}) end + + def filtered_engine_conf *patterns + EngineConfig.new({ + "config" => { + "languages" => { + "ruby" => { + "filters" => patterns + } + } + } + } + ) + end end end From b8ba0d8872f44e9a2eb7ce89fd97a74090558c79 Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Wed, 14 Jun 2017 15:45:05 -0700 Subject: [PATCH 10/13] Updated Gemfile to use the beta version of sexp_processor. --- Gemfile | 1 + Gemfile.lock | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Gemfile b/Gemfile index 88336385..c50ce18f 100644 --- a/Gemfile +++ b/Gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "concurrent-ruby", "~> 1.0.0" gem "flay", "~> 2.9" +gem "sexp_processor", "~> 4.10including-betas0" # TODO: Remove when flay >= 2.10 gem "json" group :test do diff --git a/Gemfile.lock b/Gemfile.lock index dd35e9ac..729b170e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -33,7 +33,7 @@ GEM rspec-support (3.3.0) ruby_parser (3.9.0) sexp_processor (~> 4.1) - sexp_processor (4.9.0) + sexp_processor (4.10.0b1) slop (3.6.0) PLATFORMS @@ -46,6 +46,4 @@ DEPENDENCIES pry rake rspec - -BUNDLED WITH - 1.14.4 + sexp_processor (~> 4.10including.pre.betas0) From 72f673e789e5c2125e70043c32eaf2d59ff09123 Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Wed, 14 Jun 2017 15:45:23 -0700 Subject: [PATCH 11/13] Added top-level doco for filtering --- README.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/README.md b/README.md index 006311b5..472ec384 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,33 @@ engines: - "**/*.ruby" ``` +### Node Filtering + +Sometimes structural similarities are reported that you just don't +care about. For example, the contents of arrays or hashes might have +similar structures and there's little you can do to refactor them. You +can specify language specific filters to ignore any issues that match +the pattern. Here is an example that filters simple hashes and arrays: + +```yaml +engines: + duplication: + enabled: true + config: + languages: + ruby: + filters: + - "(hash (lit _) (str _) ___)" + - "(array (str _) ___)" +``` + +The syntax for patterns are pretty simple. In the first pattern: +`"(hash (lit _) (str _) ___)"` specifies "A hash with a literal key, a +string value, followed by anything else (including nothing)". You +could also specify `"(hash ___)"` to ignore all hashes altogether. + +For more information on pattern matching, +see [sexp_processor][sexp_processor], especially [sexp.rb][sexp.rb] [codeclimate]: https://codeclimate.com/dashboard @@ -134,3 +161,5 @@ engines: [cli]: https://github.com/codeclimate/codeclimate [rule-of-three]: https://en.wikipedia.org/wiki/Rule_of_three_(computer_programming) [exclude-files-engine]: https://docs.codeclimate.com/docs/excluding-files-and-folders#section-exclude-paths-for-specific-engines +[sexp_processor]: https://github.com/seattlerb/sexp_processor/ +[sexp.rb]: https://github.com/seattlerb/sexp_processor/blob/master/lib/sexp.rb From b43ec5d976e518dd317011ceec79aca3f39e14c1 Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Wed, 14 Jun 2017 15:52:26 -0700 Subject: [PATCH 12/13] oops. Need to catch the timeout. Should probably be refactored up to parser_base. --- lib/cc/engine/analyzers/python/parser.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/cc/engine/analyzers/python/parser.rb b/lib/cc/engine/analyzers/python/parser.rb index 81efd407..519c446f 100644 --- a/lib/cc/engine/analyzers/python/parser.rb +++ b/lib/cc/engine/analyzers/python/parser.rb @@ -25,6 +25,8 @@ def parse end self + rescue Timeout::Error + warn "TIMEOUT parsing #{filename}. Skipping." end private From 03dbb56c836c11ae24d33f6096c4d050c31fc23e Mon Sep 17 00:00:00 2001 From: Ryan Davis Date: Thu, 15 Jun 2017 12:53:10 -0700 Subject: [PATCH 13/13] minor changes based on reviews --- lib/cc/engine/analyzers/engine_config.rb | 4 ++-- spec/cc/engine/analyzers/ruby/main_spec.rb | 18 ++++++++---------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/lib/cc/engine/analyzers/engine_config.rb b/lib/cc/engine/analyzers/engine_config.rb index 81ac03f6..1adbeaa4 100644 --- a/lib/cc/engine/analyzers/engine_config.rb +++ b/lib/cc/engine/analyzers/engine_config.rb @@ -28,8 +28,8 @@ def concurrency end def filters_for(language) - fetch_language(language).fetch("filters", []).map { |s| - Sexp::Matcher.parse s + fetch_language(language).fetch("filters", []).map { |filter| + Sexp::Matcher.parse filter } end diff --git a/spec/cc/engine/analyzers/ruby/main_spec.rb b/spec/cc/engine/analyzers/ruby/main_spec.rb index 9eeb3dc4..dc80db02 100644 --- a/spec/cc/engine/analyzers/ruby/main_spec.rb +++ b/spec/cc/engine/analyzers/ruby/main_spec.rb @@ -237,16 +237,14 @@ def engine_conf end def filtered_engine_conf *patterns - EngineConfig.new({ - "config" => { - "languages" => { - "ruby" => { - "filters" => patterns - } - } - } - } - ) + EngineConfig.new({ "config" => { + "languages" => { + "ruby" => { + "filters" => patterns + } + } + } + }) end end end