Codebase list ruby-cms-scanner / adf9314
New upstream version 0.5.0 Sophie Brun 5 years ago
13 changed file(s) with 208 addition(s) and 133 deletion(s). Raw diff Collapse all Expand all
1919 s.require_paths = ['lib']
2020
2121 s.add_dependency 'nokogiri', '~> 1.10.0'
22 s.add_dependency 'opt_parse_validator', '~> 0.0.17.1'
22 s.add_dependency 'opt_parse_validator', '~> 1.7.2'
2323 s.add_dependency 'public_suffix', '~> 3.0.0'
2424 s.add_dependency 'ruby-progressbar', '~> 1.10.0'
2525 s.add_dependency 'typhoeus', '~> 1.3.0'
3030 s.add_development_dependency 'coveralls', '~> 0.8.0'
3131 s.add_development_dependency 'rake', '~> 12.3'
3232 s.add_development_dependency 'rspec', '~> 3.8.0'
33 s.add_development_dependency 'rspec-its', '~> 1.2.0'
34 s.add_development_dependency 'rubocop', '~> 0.67.1'
33 s.add_development_dependency 'rspec-its', '~> 1.3.0'
34 s.add_development_dependency 'rubocop', '~> 0.67.2'
3535 s.add_development_dependency 'simplecov', '~> 0.16.1'
3636 s.add_development_dependency 'webmock', '~> 3.5.1'
3737 end
6363
6464 def to_s
6565 "The URL supplied redirects to #{redirect_uri}. Use the --ignore-main-redirect "\
66 'option to ignore the redirection and scan the target.'
66 'option to ignore the redirection and scan the target, or change the --url option value to the redirected URL.'
6767 end
6868 end
6969 end
2828 #
2929 # @return [ Array<String> ]
3030 def passive_urls(_opts = {})
31 target.in_scope_urls(target.homepage_res, passive_urls_xpath)
31 target.in_scope_uris(target.homepage_res, passive_urls_xpath).map(&:to_s)
3232 end
3333
3434 # @return [ String ]
77 @scope ||= Scope.new
88 end
99
10 # @param [ String ] url An absolute URL
10 # @param [ String, Addressable::URI ] url An absolute URL or URI
1111 #
1212 # @return [ Boolean ] true if the url given is in scope
13 def in_scope?(url)
14 scope.include?(Addressable::URI.parse(url.strip).host)
13 def in_scope?(url_or_uri)
14 url_or_uri = Addressable::URI.parse(url_or_uri.strip) unless url_or_uri.is_a?(Addressable::URI)
15
16 scope.include?(url_or_uri.host)
1517 rescue StandardError
1618 false
1719 end
1921 # @param [ Typhoeus::Response ] res
2022 # @param [ String ] xpath
2123 #
22 # @yield [ String, Nokogiri::XML::Element ] The in scope url and its associated tag
24 # @yield [ Addressable::URI, Nokogiri::XML::Element ] The in scope url and its associated tag
2325 #
24 # @return [ Array<String> ] The in scope absolute URLs detected in the response's body
25 def in_scope_urls(res, xpath = '//@href|//@src|//@data-src')
26 # @return [ Array<Addressable::URI> ] The in scope absolute URIs detected in the response's body
27 def in_scope_uris(res, xpath = '//@href|//@src|//@data-src')
2628 found = []
2729
28 urls_from_page(res, xpath) do |url, tag|
29 next unless in_scope?(url)
30 uris_from_page(res, xpath) do |uri, tag|
31 next unless in_scope?(uri)
3032
31 yield url, tag if block_given?
33 yield uri, tag if block_given?
3234
33 found << url
35 found << uri
3436 end
3537
3638 found
3739 end
3840
41 # Similar to Target#url_pattern but considering the in scope domains as well
42 #
43 # @return [ Regexp ] The pattern related to the target url and in scope domains,
44 # it also matches escaped /, such as in JSON JS data: http:\/\/t.com\/
45 def scope_url_pattern
46 return @scope_url_pattern if @scope_url_pattern
47
48 domains = [uri.host + uri.path] + scope.domains[1..-1]&.map(&:to_s) + scope.invalid_domains
49
50 domains.map! { |d| Regexp.escape(d.gsub(%r{/$}, '')).gsub('\*', '.*').gsub('/', '\\\\\?/') }
51
52 @scope_url_pattern = %r{https?:\\?/\\?/(?:#{domains.join('|')})\\?/?}i
53 end
54
3955 # Scope Implementation
4056 class Scope
41 # @return [ Array<PublicSuffix::Domain ] The valid domains in scope
57 # @return [ Array<PublicSuffix::Domain> ] The valid domains in scope
4258 def domains
4359 @domains ||= []
4460 end
4965 end
5066
5167 def <<(element)
52 if PublicSuffix.valid?(element)
53 domains << PublicSuffix.parse(element)
68 if PublicSuffix.valid?(element, ignore_private: true)
69 domains << PublicSuffix.parse(element, ignore_private: true)
5470 else
5571 invalid_domains << element
5672 end
5874
5975 # @return [ Boolean ] Wether or not the host is in the scope
6076 def include?(host)
61 if PublicSuffix.valid?(host)
62 domain = PublicSuffix.parse(host)
77 if PublicSuffix.valid?(host, ignore_private: true)
78 domain = PublicSuffix.parse(host, ignore_private: true)
6379
6480 domains.each { |d| return true if domain.match(d) }
6581 else
3434 # @return [ Boolean ]
3535 def vulnerable?
3636 raise NotImplementedError
37 end
38
39 # @return [ Regexp ] The pattern related to the target url, also matches escaped /, such as
40 # in JSON JS data: http:\/\/t.com\/
41 def url_pattern
42 @url_pattern ||= Regexp.new(Regexp.escape(url).gsub(/https?/i, 'https?').gsub('/', '\\\\\?/'), Regexp::IGNORECASE)
3743 end
3844
3945 # @param [ String ] xpath
8288 # @param [ Typhoeus::Response, String ] page
8389 # @param [ String ] xpath
8490 #
85 # @yield [ String, Nokogiri::XML::Element ] The url and its associated tag
91 # @yield [ Addressable::URI, Nokogiri::XML::Element ] The url and its associated tag
8692 #
87 # @return [ Array<String> ] The absolute URLs detected in the response's body from the HTML tags
88 def urls_from_page(page = nil, xpath = '//@href|//@src|//@data-src')
93 # @return [ Array<Addressable::URI> ] The absolute URIs detected in the response's body from the HTML tags
94 def uris_from_page(page = nil, xpath = '//@href|//@src|//@data-src')
8995 page = NS::Browser.get(url(page)) unless page.is_a?(Typhoeus::Response)
9096 found = []
9197
101107 next
102108 end
103109
104 node_uri_string = node_uri.to_s
105
106110 next unless node_uri.host
107111
108 yield node_uri_string, node.parent if block_given? && !found.include?(node_uri_string)
112 yield node_uri, node.parent if block_given? && !found.include?(node_uri)
109113
110 found << node_uri_string
114 found << node_uri
111115 end
112116
113117 found.uniq
11
22 # Version
33 module CMSScanner
4 VERSION = '0.0.44.1'
4 VERSION = '0.5.0'
55 end
152152 expect { core.before_scan }.to raise_error(
153153 CMSScanner::Error::HTTPRedirect,
154154 "The URL supplied redirects to #{redirection}." \
155 ' Use the --ignore-main-redirect option to ignore the redirection and scan the target.'
155 ' Use the --ignore-main-redirect option to ignore the redirection and scan the target,' \
156 ' or change the --url option value to the redirected URL.'
156157 )
157158 end
158159 end
33 <a href="mailto:[email protected]">eMail me!</a>
44 <a href="jaVaScript:alert(2)">Click me Fool !</a>
55
6 <script src=" https://cdn.e.org/f2.js "></script> <!-- head & tail spaces should be removed -->
6 <script src=" https://a.cdn.com/f2.js "></script> <!-- head & tail spaces should be removed -->
77
88 <script src="/script/s.js"></script>
99
0 <a href="http://e.org/f.txt">Link</a>
1 Duplicates should be ignored
2 <a href="http://e.org/f.txt">Link</a>
3 <img src="http://e.org/f.txt" />
4
5 <a href="mailto:[email protected]">eMail me!</a>
6 <a href="jaVaScript:alert(2)">Click me Fool !</a>
7
8 Head and tail spaces should be removed
9 <script src=" https://cdn.e.org/f2.js "></script>
10
11 <script src="/script/s.js"></script>
12
13 <link rel="alternate" type="application/rss+xml" title="Spec" href="http://wp-lamp/feed.xml" />
14
15 <a href="">Empty Link should be ignored</a>
16
17 <img src="//g.com/img.jpg" width="" height="" alt="" />
18
19 <a href="http://">no host, should be ignored</a>
20
21 Don't parse that either
22 <img src="" />
23
24 <img class="fl-photo-img wp-image-608 size-full" src="" alt="XXX" itemprop="image" height="10" width="100" data-src="//g.org/logo.png"
+0
-24
spec/fixtures/target/urls_from_page.html less more
0 <a href="http://e.org/f.txt">Link</a>
1 Duplicates should be ignored
2 <a href="http://e.org/f.txt">Link</a>
3
4 <a href="mailto:[email protected]">eMail me!</a>
5 <a href="jaVaScript:alert(2)">Click me Fool !</a>
6
7 Head and tail spaces should be removed
8 <script src=" https://cdn.e.org/f2.js "></script>
9
10 <script src="/script/s.js"></script>
11
12 <link rel="alternate" type="application/rss+xml" title="Spec" href="http://wp-lamp/feed.xml" />
13
14 <a href="">Empty Link should be ignored</a>
15
16 <img src="//g.com/img.jpg" width="" height="" alt="" />
17
18 <a href="http://">no host, should be ignored</a>
19
20 Don't parse that either
21 <img src="" />
22
23 <img class="fl-photo-img wp-image-608 size-full" src="" alt="XXX" itemprop="image" height="10" width="100" data-src="//g.org/logo.png"
1919 def new_method
2020 'working'
2121 end
22 end
23
24 class ParsedCli < CMSScanner::ParsedCli
2225 end
2326
2427 # Testing the override of the register_options_files
5558 let(:formatter_class) { SubScanner::Formatter }
5659 let(:target_url) { 'http://ex.lo/' }
5760
58 before do
59 SubScanner::ParsedCli.options = { url: target_url }
60 end
61 context 'when no CLI options given' do
62 it 'runs the controlllers and calls the formatter in the correct order' do
63 expect(scanner.controllers).to receive(:run).ordered.and_call_original
6164
62 describe '#app_name' do
63 it 'returns the correct app_name' do
64 expect(SubScanner.app_name).to eql 'subscanner'
65 expect(scanner.formatter).to receive(:output)
66 .ordered
67 .with('@usage', msg: 'One of the following options is required: url, help, hh, version')
68
69 expect(scanner.formatter).to receive(:beautify).ordered
70
71 scanner.run
6572 end
6673 end
6774
68 describe 'Browser#default_user_agent' do
69 it 'returns the correct user_agent' do
70 expect(SubScanner::Browser.instance.default_user_agent).to eql 'SubScanner v1.0-Spec'
75 context 'when CLI options provided' do
76 before do
77 SubScanner::ParsedCli.options = { url: target_url }
7178 end
72 end
7379
74 describe 'Controllers' do
75 describe '#target' do
76 it 'loads the overrided Target class' do
77 target = scanner.controllers.first.target
78
79 expect(target).to be_a SubScanner::Target
80 expect(target).to respond_to(:new_method)
81 expect(target.new_method).to eq 'working'
82 expect(target.url).to eql target_url
80 describe '#app_name' do
81 it 'returns the correct app_name' do
82 expect(SubScanner.app_name).to eql 'subscanner'
8383 end
8484 end
8585
86 describe '#register_options_files' do
87 let(:options_file_path) { '.subscanner/rspec.yml' }
88
89 it 'register the correct file' do
90 allow(File).to receive(:exist?).and_call_original
91 allow(File).to receive(:exist?).with(options_file_path).and_return(true)
92
93 option_parser = SubScanner::Scan.new.controllers.option_parser
94
95 expect(option_parser.options_files.map(&:path)).to eql [options_file_path]
86 describe 'Browser#default_user_agent' do
87 it 'returns the correct user_agent' do
88 expect(SubScanner::Browser.instance.default_user_agent).to eql 'SubScanner v1.0-Spec'
9689 end
9790 end
98 end
9991
100 describe 'Controller::Base#tmp_directory' do
101 it 'returns the expected value' do
102 expect(scanner.controllers.first.tmp_directory).to eql '/tmp/subscanner'
103 end
104 end
92 describe 'Controllers' do
93 describe '#target' do
94 it 'loads the overrided Target class' do
95 target = scanner.controllers.first.target
10596
106 describe 'Formatter' do
107 it_behaves_like CMSScanner::Formatter::ClassMethods do
108 subject(:formatter) { formatter_class }
109 end
97 expect(target).to be_a SubScanner::Target
98 expect(target).to respond_to(:new_method)
99 expect(target.new_method).to eq 'working'
100 expect(target.url).to eql target_url
101 end
102 end
110103
111 describe '.load' do
112 it 'adds the #custom method for all formatters' do
113 formatter_class.availables.each do |format|
114 expect(formatter_class.load(format).custom).to eql 'It Works!'
104 describe '#register_options_files' do
105 let(:options_file_path) { '.subscanner/rspec.yml' }
106
107 it 'register the correct file' do
108 allow(File).to receive(:exist?).and_call_original
109 allow(File).to receive(:exist?).with(options_file_path).and_return(true)
110
111 option_parser = SubScanner::Scan.new.controllers.option_parser
112
113 expect(option_parser.options_files.map(&:path)).to eql [options_file_path]
115114 end
116115 end
117116 end
118117
119 describe '#views_directories' do
120 it 'returns the expected paths' do
121 expect(scanner.formatter.views_directories).to eql(
122 [
123 CMSScanner::APP_DIR, SubScanner::APP_DIR,
124 File.join(Dir.home, '.subscanner'), File.join(Dir.pwd, '.subscanner')
125 ].reduce([]) do |a, e|
126 a << File.join(e, 'views')
118 describe 'Controller::Base#tmp_directory' do
119 it 'returns the expected value' do
120 expect(scanner.controllers.first.tmp_directory).to eql '/tmp/subscanner'
121 end
122 end
123
124 describe 'Formatter' do
125 it_behaves_like CMSScanner::Formatter::ClassMethods do
126 subject(:formatter) { formatter_class }
127 end
128
129 describe '.load' do
130 it 'adds the #custom method for all formatters' do
131 formatter_class.availables.each do |format|
132 expect(formatter_class.load(format).custom).to eql 'It Works!'
127133 end
128 )
134 end
135 end
136
137 describe '#views_directories' do
138 it 'returns the expected paths' do
139 expect(scanner.formatter.views_directories).to eql(
140 [
141 CMSScanner::APP_DIR, SubScanner::APP_DIR,
142 File.join(Dir.home, '.subscanner'), File.join(Dir.pwd, '.subscanner')
143 ].reduce([]) do |a, e|
144 a << File.join(e, 'views')
145 end
146 )
147 end
129148 end
130149 end
131150 end
3131 describe '#in_scope?' do
3232 context 'when default scope (target domain)' do
3333 [nil, '', 'http://out-of-scope.com', '//jquery.com/j.js',
34 'javascript:alert(3)', 'mailto:[email protected]'].each do |url|
34 'javascript:alert(3)', 'mailto:[email protected]',
35 Addressable::URI.parse('https://out.cloudfront.net')].each do |url|
3536 it "returns false for #{url}" do
3637 expect(target.in_scope?(url)).to eql false
3738 end
3839 end
3940
40 %w[https://e.org/file.txt http://e.org/ //e.org].each do |url|
41 ['https://e.org/file.txt', 'http://e.org/', '//e.org', Addressable::URI.parse('http://e.org')].each do |url|
4142 it "returns true for #{url}" do
4243 expect(target.in_scope?(url)).to eql true
4344 end
4546 end
4647
4748 context 'when custom scope' do
48 let(:opts) { { scope: ['*.e.org', '192.168.1.12'] } }
49 let(:opts) { { scope: ['*.cdn.com', '192.168.1.12', '*.cloudfront.net'] } }
4950
5051 [nil, '', 'http://out-of-scope.com', '//jquery.com/j.js', 'http://192.168.1.2/'].each do |url|
5152 it "returns false for #{url}" do
5354 end
5455 end
5556
56 %w[http://e.org //cdn.e.org/f.txt http://s.e.org/ https://192.168.1.12/h].each do |url|
57 %w[
58 https://e.org //aa.cdn.com/f.txt http://s.cdn.com/
59 https://192.168.1.12/h https://aa.cloudfront.net/
60 ].each do |url|
5761 it "returns true for #{url}" do
5862 expect(target.in_scope?(url)).to eql true
5963 end
6165 end
6266 end
6367
64 describe '#in_scope_urls' do
68 describe '#in_scope_uris' do
6569 let(:res) { Typhoeus::Response.new(body: File.read(fixtures.join('index.html'))) }
6670
6771 context 'when block given' do
6872 it 'yield the url' do
69 expect { |b| target.in_scope_urls(res, &b) }
73 expect { |b| target.in_scope_uris(res, &b) }
7074 .to yield_successive_args(
71 ['http://e.org/f.txt', Nokogiri::XML::Element],
72 ['http://e.org/script/s.js', Nokogiri::XML::Element],
73 ['http://e.org/feed', Nokogiri::XML::Element]
75 [Addressable::URI.parse('http://e.org/f.txt'), Nokogiri::XML::Element],
76 [Addressable::URI.parse('http://e.org/script/s.js'), Nokogiri::XML::Element],
77 [Addressable::URI.parse('http://e.org/feed'), Nokogiri::XML::Element]
7478 )
7579 end
7680 end
7983 it 'returns the expected array' do
8084 xpath = '//link[@rel="alternate" and @type="application/rss+xml"]/@href'
8185
82 expect(target.in_scope_urls(res, xpath)).to eql(%w[http://e.org/feed])
86 expect(target.in_scope_uris(res, xpath)).to eql([Addressable::URI.parse('http://e.org/feed')])
8387 end
8488 end
8589
8690 context 'when no block given' do
87 after { expect(target.in_scope_urls(res)).to eql @expected }
91 after { expect(target.in_scope_uris(res)).to eql @expected }
8892
8993 context 'when default scope' do
9094 it 'returns the expected array' do
91 @expected = %w[http://e.org/f.txt http://e.org/script/s.js http://e.org/feed]
95 @expected = %w[http://e.org/f.txt http://e.org/script/s.js
96 http://e.org/feed].map { |url| Addressable::URI.parse(url) }
9297 end
9398 end
9499
95100 context 'when supplied scope' do
96 let(:opts) { super().merge(scope: ['*.e.org', 'wp-lamp']) }
101 let(:opts) { super().merge(scope: ['*.cdn.com', 'wp-lamp']) }
97102
98103 it 'returns the expected array' do
99 @expected = %w[http://e.org/f.txt https://cdn.e.org/f2.js http://e.org/script/s.js
100 http://wp-lamp/robots.txt http://e.org/feed]
104 @expected = %w[http://e.org/f.txt https://a.cdn.com/f2.js http://e.org/script/s.js
105 http://wp-lamp/robots.txt http://e.org/feed].map { |url| Addressable::URI.parse(url) }
101106 end
102107 end
103108 end
104109 end
110
111 describe '#scope_url_pattern' do
112 context 'when no scope given' do
113 its(:scope_url_pattern) { should eql %r{https?:\\?/\\?/(?:e\.org)\\?/?}i }
114 end
115
116 context 'when scope given' do
117 let(:opts) { super().merge(scope: ['*.cdn.org', 'wp-lamp']) }
118
119 its(:scope_url_pattern) { should eql %r{https?:\\?/\\?/(?:e\.org|.*\.cdn\.org|wp\-lamp)\\?/?}i }
120
121 context 'when target URL has a subdir' do
122 let(:url) { 'https://e.org/blog/test' }
123
124 its(:scope_url_pattern) { should eql %r{https?:\\?/\\?/(?:e\.org\\?/blog\\?/test|.*\.cdn\.org|wp\-lamp)\\?/?}i }
125 end
126 end
127 end
105128 end
3131 describe '#vulnerable' do
3232 it 'raises an error' do
3333 expect { target.vulnerable? }.to raise_error(NotImplementedError)
34 end
35 end
36
37 describe '#url_pattern' do
38 its(:url_pattern) { should eql %r{https?:\\?/\\?/e\.org\\?/}i }
39 its(:url_pattern) { should match 'https:\/\/e.org\/' }
40
41 context 'when already https protocol' do
42 let(:url) { 'htTpS://ex.com/' }
43
44 its(:url_pattern) { should eql %r{https?:\\?/\\?/ex\.com\\?/}i }
3445 end
3546 end
3647
122133 end
123134 end
124135
125 describe '#urls_from_page' do
126 let(:page) { Typhoeus::Response.new(body: File.read(fixtures.join('urls_from_page.html'))) }
136 describe '#uris_from_page' do
137 let(:page) { Typhoeus::Response.new(body: File.read(fixtures.join('uris_from_page.html'))) }
127138
128139 context 'when block given' do
129140 it 'yield the url' do
130 expect { |b| target.urls_from_page(page, &b) }
141 expect { |b| target.uris_from_page(page, &b) }
131142 .to yield_successive_args(
132 ['http://e.org/f.txt', Nokogiri::XML::Element],
133 ['https://cdn.e.org/f2.js', Nokogiri::XML::Element],
134 ['http://e.org/script/s.js', Nokogiri::XML::Element],
135 ['http://wp-lamp/feed.xml', Nokogiri::XML::Element],
136 ['http://g.com/img.jpg', Nokogiri::XML::Element],
137 ['http://g.org/logo.png', Nokogiri::XML::Element]
143 [Addressable::URI.parse('http://e.org/f.txt'), Nokogiri::XML::Element],
144 [Addressable::URI.parse('https://cdn.e.org/f2.js'), Nokogiri::XML::Element],
145 [Addressable::URI.parse('http://e.org/script/s.js'), Nokogiri::XML::Element],
146 [Addressable::URI.parse('http://wp-lamp/feed.xml'), Nokogiri::XML::Element],
147 [Addressable::URI.parse('http://g.com/img.jpg'), Nokogiri::XML::Element],
148 [Addressable::URI.parse('http://g.org/logo.png'), Nokogiri::XML::Element]
138149 )
139150 end
140151 end
141152
142153 context 'when no block given' do
143154 it 'returns the expected array' do
144 expect(target.urls_from_page(page)).to eql(
155 expect(target.uris_from_page(page)).to eql(
145156 %w[
146157 http://e.org/f.txt https://cdn.e.org/f2.js http://e.org/script/s.js
147158 http://wp-lamp/feed.xml http://g.com/img.jpg http://g.org/logo.png
148 ]
159 ].map { |url| Addressable::URI.parse(url) }
149160 )
150161 end
151162
153164 it 'returns the expected array' do
154165 xpath = '//link[@rel="alternate" and @type="application/rss+xml"]/@href'
155166
156 expect(target.urls_from_page(page, xpath)).to eql(%w[http://wp-lamp/feed.xml])
167 expect(target.uris_from_page(page, xpath)).to eql([Addressable::URI.parse('http://wp-lamp/feed.xml')])
157168 end
158169 end
159170 end