New upstream version 0.5.0
Sophie Brun
5 years ago
19 | 19 | s.require_paths = ['lib'] |
20 | 20 | |
21 | 21 | s.add_dependency 'nokogiri', '~> 1.10.0' |
22 | s.add_dependency 'opt_parse_validator', '~> 0.0.17.1' | |
22 | s.add_dependency 'opt_parse_validator', '~> 1.7.2' | |
23 | 23 | s.add_dependency 'public_suffix', '~> 3.0.0' |
24 | 24 | s.add_dependency 'ruby-progressbar', '~> 1.10.0' |
25 | 25 | s.add_dependency 'typhoeus', '~> 1.3.0' |
30 | 30 | s.add_development_dependency 'coveralls', '~> 0.8.0' |
31 | 31 | s.add_development_dependency 'rake', '~> 12.3' |
32 | 32 | s.add_development_dependency 'rspec', '~> 3.8.0' |
33 | s.add_development_dependency 'rspec-its', '~> 1.2.0' | |
34 | s.add_development_dependency 'rubocop', '~> 0.67.1' | |
33 | s.add_development_dependency 'rspec-its', '~> 1.3.0' | |
34 | s.add_development_dependency 'rubocop', '~> 0.67.2' | |
35 | 35 | s.add_development_dependency 'simplecov', '~> 0.16.1' |
36 | 36 | s.add_development_dependency 'webmock', '~> 3.5.1' |
37 | 37 | end |
63 | 63 | |
64 | 64 | def to_s |
65 | 65 | "The URL supplied redirects to #{redirect_uri}. Use the --ignore-main-redirect "\ |
66 | 'option to ignore the redirection and scan the target.' | |
66 | 'option to ignore the redirection and scan the target, or change the --url option value to the redirected URL.' | |
67 | 67 | end |
68 | 68 | end |
69 | 69 | end |
28 | 28 | # |
29 | 29 | # @return [ Array<String> ] |
30 | 30 | def passive_urls(_opts = {}) |
31 | target.in_scope_urls(target.homepage_res, passive_urls_xpath) | |
31 | target.in_scope_uris(target.homepage_res, passive_urls_xpath).map(&:to_s) | |
32 | 32 | end |
33 | 33 | |
34 | 34 | # @return [ String ] |
7 | 7 | @scope ||= Scope.new |
8 | 8 | end |
9 | 9 | |
10 | # @param [ String ] url An absolute URL | |
10 | # @param [ String, Addressable::URI ] url An absolute URL or URI | |
11 | 11 | # |
12 | 12 | # @return [ Boolean ] true if the url given is in scope |
13 | def in_scope?(url) | |
14 | scope.include?(Addressable::URI.parse(url.strip).host) | |
13 | def in_scope?(url_or_uri) | |
14 | url_or_uri = Addressable::URI.parse(url_or_uri.strip) unless url_or_uri.is_a?(Addressable::URI) | |
15 | ||
16 | scope.include?(url_or_uri.host) | |
15 | 17 | rescue StandardError |
16 | 18 | false |
17 | 19 | end |
19 | 21 | # @param [ Typhoeus::Response ] res |
20 | 22 | # @param [ String ] xpath |
21 | 23 | # |
22 | # @yield [ String, Nokogiri::XML::Element ] The in scope url and its associated tag | |
24 | # @yield [ Addressable::URI, Nokogiri::XML::Element ] The in scope url and its associated tag | |
23 | 25 | # |
24 | # @return [ Array<String> ] The in scope absolute URLs detected in the response's body | |
25 | def in_scope_urls(res, xpath = '//@href|//@src|//@data-src') | |
26 | # @return [ Array<Addressable::URI> ] The in scope absolute URIs detected in the response's body | |
27 | def in_scope_uris(res, xpath = '//@href|//@src|//@data-src') | |
26 | 28 | found = [] |
27 | 29 | |
28 | urls_from_page(res, xpath) do |url, tag| | |
29 | next unless in_scope?(url) | |
30 | uris_from_page(res, xpath) do |uri, tag| | |
31 | next unless in_scope?(uri) | |
30 | 32 | |
31 | yield url, tag if block_given? | |
33 | yield uri, tag if block_given? | |
32 | 34 | |
33 | found << url | |
35 | found << uri | |
34 | 36 | end |
35 | 37 | |
36 | 38 | found |
37 | 39 | end |
38 | 40 | |
41 | # Similar to Target#url_pattern but considering the in scope domains as well | |
42 | # | |
43 | # @return [ Regexp ] The pattern related to the target url and in scope domains, | |
44 | # it also matches escaped /, such as in JSON JS data: http:\/\/t.com\/ | |
45 | def scope_url_pattern | |
46 | return @scope_url_pattern if @scope_url_pattern | |
47 | ||
48 | domains = [uri.host + uri.path] + scope.domains[1..-1]&.map(&:to_s) + scope.invalid_domains | |
49 | ||
50 | domains.map! { |d| Regexp.escape(d.gsub(%r{/$}, '')).gsub('\*', '.*').gsub('/', '\\\\\?/') } | |
51 | ||
52 | @scope_url_pattern = %r{https?:\\?/\\?/(?:#{domains.join('|')})\\?/?}i | |
53 | end | |
54 | ||
39 | 55 | # Scope Implementation |
40 | 56 | class Scope |
41 | # @return [ Array<PublicSuffix::Domain ] The valid domains in scope | |
57 | # @return [ Array<PublicSuffix::Domain> ] The valid domains in scope | |
42 | 58 | def domains |
43 | 59 | @domains ||= [] |
44 | 60 | end |
49 | 65 | end |
50 | 66 | |
51 | 67 | def <<(element) |
52 | if PublicSuffix.valid?(element) | |
53 | domains << PublicSuffix.parse(element) | |
68 | if PublicSuffix.valid?(element, ignore_private: true) | |
69 | domains << PublicSuffix.parse(element, ignore_private: true) | |
54 | 70 | else |
55 | 71 | invalid_domains << element |
56 | 72 | end |
58 | 74 | |
59 | 75 | # @return [ Boolean ] Wether or not the host is in the scope |
60 | 76 | def include?(host) |
61 | if PublicSuffix.valid?(host) | |
62 | domain = PublicSuffix.parse(host) | |
77 | if PublicSuffix.valid?(host, ignore_private: true) | |
78 | domain = PublicSuffix.parse(host, ignore_private: true) | |
63 | 79 | |
64 | 80 | domains.each { |d| return true if domain.match(d) } |
65 | 81 | else |
34 | 34 | # @return [ Boolean ] |
35 | 35 | def vulnerable? |
36 | 36 | raise NotImplementedError |
37 | end | |
38 | ||
39 | # @return [ Regexp ] The pattern related to the target url, also matches escaped /, such as | |
40 | # in JSON JS data: http:\/\/t.com\/ | |
41 | def url_pattern | |
42 | @url_pattern ||= Regexp.new(Regexp.escape(url).gsub(/https?/i, 'https?').gsub('/', '\\\\\?/'), Regexp::IGNORECASE) | |
37 | 43 | end |
38 | 44 | |
39 | 45 | # @param [ String ] xpath |
82 | 88 | # @param [ Typhoeus::Response, String ] page |
83 | 89 | # @param [ String ] xpath |
84 | 90 | # |
85 | # @yield [ String, Nokogiri::XML::Element ] The url and its associated tag | |
91 | # @yield [ Addressable::URI, Nokogiri::XML::Element ] The url and its associated tag | |
86 | 92 | # |
87 | # @return [ Array<String> ] The absolute URLs detected in the response's body from the HTML tags | |
88 | def urls_from_page(page = nil, xpath = '//@href|//@src|//@data-src') | |
93 | # @return [ Array<Addressable::URI> ] The absolute URIs detected in the response's body from the HTML tags | |
94 | def uris_from_page(page = nil, xpath = '//@href|//@src|//@data-src') | |
89 | 95 | page = NS::Browser.get(url(page)) unless page.is_a?(Typhoeus::Response) |
90 | 96 | found = [] |
91 | 97 | |
101 | 107 | next |
102 | 108 | end |
103 | 109 | |
104 | node_uri_string = node_uri.to_s | |
105 | ||
106 | 110 | next unless node_uri.host |
107 | 111 | |
108 | yield node_uri_string, node.parent if block_given? && !found.include?(node_uri_string) | |
112 | yield node_uri, node.parent if block_given? && !found.include?(node_uri) | |
109 | 113 | |
110 | found << node_uri_string | |
114 | found << node_uri | |
111 | 115 | end |
112 | 116 | |
113 | 117 | found.uniq |
152 | 152 | expect { core.before_scan }.to raise_error( |
153 | 153 | CMSScanner::Error::HTTPRedirect, |
154 | 154 | "The URL supplied redirects to #{redirection}." \ |
155 | ' Use the --ignore-main-redirect option to ignore the redirection and scan the target.' | |
155 | ' Use the --ignore-main-redirect option to ignore the redirection and scan the target,' \ | |
156 | ' or change the --url option value to the redirected URL.' | |
156 | 157 | ) |
157 | 158 | end |
158 | 159 | end |
3 | 3 | <a href="mailto:[email protected]">eMail me!</a> |
4 | 4 | <a href="jaVaScript:alert(2)">Click me Fool !</a> |
5 | 5 | |
6 | <script src=" https://cdn.e.org/f2.js "></script> <!-- head & tail spaces should be removed --> | |
6 | <script src=" https://a.cdn.com/f2.js "></script> <!-- head & tail spaces should be removed --> | |
7 | 7 | |
8 | 8 | <script src="/script/s.js"></script> |
9 | 9 |
0 | <a href="http://e.org/f.txt">Link</a> | |
1 | Duplicates should be ignored | |
2 | <a href="http://e.org/f.txt">Link</a> | |
3 | <img src="http://e.org/f.txt" /> | |
4 | ||
5 | <a href="mailto:[email protected]">eMail me!</a> | |
6 | <a href="jaVaScript:alert(2)">Click me Fool !</a> | |
7 | ||
8 | Head and tail spaces should be removed | |
9 | <script src=" https://cdn.e.org/f2.js "></script> | |
10 | ||
11 | <script src="/script/s.js"></script> | |
12 | ||
13 | <link rel="alternate" type="application/rss+xml" title="Spec" href="http://wp-lamp/feed.xml" /> | |
14 | ||
15 | <a href="">Empty Link should be ignored</a> | |
16 | ||
17 | <img src="//g.com/img.jpg" width="" height="" alt="" /> | |
18 | ||
19 | <a href="http://">no host, should be ignored</a> | |
20 | ||
21 | Don't parse that either | |
22 | <img src="data:image/jpeg;base64,/9j/4AAQ/" /> | |
23 | ||
24 | <img class="fl-photo-img wp-image-608 size-full" src="data:image/png;base64,SNIPPED" alt="XXX" itemprop="image" height="10" width="100" data-src="//g.org/logo.png" |
0 | <a href="http://e.org/f.txt">Link</a> | |
1 | Duplicates should be ignored | |
2 | <a href="http://e.org/f.txt">Link</a> | |
3 | ||
4 | <a href="mailto:[email protected]">eMail me!</a> | |
5 | <a href="jaVaScript:alert(2)">Click me Fool !</a> | |
6 | ||
7 | Head and tail spaces should be removed | |
8 | <script src=" https://cdn.e.org/f2.js "></script> | |
9 | ||
10 | <script src="/script/s.js"></script> | |
11 | ||
12 | <link rel="alternate" type="application/rss+xml" title="Spec" href="http://wp-lamp/feed.xml" /> | |
13 | ||
14 | <a href="">Empty Link should be ignored</a> | |
15 | ||
16 | <img src="//g.com/img.jpg" width="" height="" alt="" /> | |
17 | ||
18 | <a href="http://">no host, should be ignored</a> | |
19 | ||
20 | Don't parse that either | |
21 | <img src="data:image/jpeg;base64,/9j/4AAQ/" /> | |
22 | ||
23 | <img class="fl-photo-img wp-image-608 size-full" src="data:image/png;base64,SNIPPED" alt="XXX" itemprop="image" height="10" width="100" data-src="//g.org/logo.png" |
19 | 19 | def new_method |
20 | 20 | 'working' |
21 | 21 | end |
22 | end | |
23 | ||
24 | class ParsedCli < CMSScanner::ParsedCli | |
22 | 25 | end |
23 | 26 | |
24 | 27 | # Testing the override of the register_options_files |
55 | 58 | let(:formatter_class) { SubScanner::Formatter } |
56 | 59 | let(:target_url) { 'http://ex.lo/' } |
57 | 60 | |
58 | before do | |
59 | SubScanner::ParsedCli.options = { url: target_url } | |
60 | end | |
61 | context 'when no CLI options given' do | |
62 | it 'runs the controlllers and calls the formatter in the correct order' do | |
63 | expect(scanner.controllers).to receive(:run).ordered.and_call_original | |
61 | 64 | |
62 | describe '#app_name' do | |
63 | it 'returns the correct app_name' do | |
64 | expect(SubScanner.app_name).to eql 'subscanner' | |
65 | expect(scanner.formatter).to receive(:output) | |
66 | .ordered | |
67 | .with('@usage', msg: 'One of the following options is required: url, help, hh, version') | |
68 | ||
69 | expect(scanner.formatter).to receive(:beautify).ordered | |
70 | ||
71 | scanner.run | |
65 | 72 | end |
66 | 73 | end |
67 | 74 | |
68 | describe 'Browser#default_user_agent' do | |
69 | it 'returns the correct user_agent' do | |
70 | expect(SubScanner::Browser.instance.default_user_agent).to eql 'SubScanner v1.0-Spec' | |
75 | context 'when CLI options provided' do | |
76 | before do | |
77 | SubScanner::ParsedCli.options = { url: target_url } | |
71 | 78 | end |
72 | end | |
73 | 79 | |
74 | describe 'Controllers' do | |
75 | describe '#target' do | |
76 | it 'loads the overrided Target class' do | |
77 | target = scanner.controllers.first.target | |
78 | ||
79 | expect(target).to be_a SubScanner::Target | |
80 | expect(target).to respond_to(:new_method) | |
81 | expect(target.new_method).to eq 'working' | |
82 | expect(target.url).to eql target_url | |
80 | describe '#app_name' do | |
81 | it 'returns the correct app_name' do | |
82 | expect(SubScanner.app_name).to eql 'subscanner' | |
83 | 83 | end |
84 | 84 | end |
85 | 85 | |
86 | describe '#register_options_files' do | |
87 | let(:options_file_path) { '.subscanner/rspec.yml' } | |
88 | ||
89 | it 'register the correct file' do | |
90 | allow(File).to receive(:exist?).and_call_original | |
91 | allow(File).to receive(:exist?).with(options_file_path).and_return(true) | |
92 | ||
93 | option_parser = SubScanner::Scan.new.controllers.option_parser | |
94 | ||
95 | expect(option_parser.options_files.map(&:path)).to eql [options_file_path] | |
86 | describe 'Browser#default_user_agent' do | |
87 | it 'returns the correct user_agent' do | |
88 | expect(SubScanner::Browser.instance.default_user_agent).to eql 'SubScanner v1.0-Spec' | |
96 | 89 | end |
97 | 90 | end |
98 | end | |
99 | 91 | |
100 | describe 'Controller::Base#tmp_directory' do | |
101 | it 'returns the expected value' do | |
102 | expect(scanner.controllers.first.tmp_directory).to eql '/tmp/subscanner' | |
103 | end | |
104 | end | |
92 | describe 'Controllers' do | |
93 | describe '#target' do | |
94 | it 'loads the overrided Target class' do | |
95 | target = scanner.controllers.first.target | |
105 | 96 | |
106 | describe 'Formatter' do | |
107 | it_behaves_like CMSScanner::Formatter::ClassMethods do | |
108 | subject(:formatter) { formatter_class } | |
109 | end | |
97 | expect(target).to be_a SubScanner::Target | |
98 | expect(target).to respond_to(:new_method) | |
99 | expect(target.new_method).to eq 'working' | |
100 | expect(target.url).to eql target_url | |
101 | end | |
102 | end | |
110 | 103 | |
111 | describe '.load' do | |
112 | it 'adds the #custom method for all formatters' do | |
113 | formatter_class.availables.each do |format| | |
114 | expect(formatter_class.load(format).custom).to eql 'It Works!' | |
104 | describe '#register_options_files' do | |
105 | let(:options_file_path) { '.subscanner/rspec.yml' } | |
106 | ||
107 | it 'register the correct file' do | |
108 | allow(File).to receive(:exist?).and_call_original | |
109 | allow(File).to receive(:exist?).with(options_file_path).and_return(true) | |
110 | ||
111 | option_parser = SubScanner::Scan.new.controllers.option_parser | |
112 | ||
113 | expect(option_parser.options_files.map(&:path)).to eql [options_file_path] | |
115 | 114 | end |
116 | 115 | end |
117 | 116 | end |
118 | 117 | |
119 | describe '#views_directories' do | |
120 | it 'returns the expected paths' do | |
121 | expect(scanner.formatter.views_directories).to eql( | |
122 | [ | |
123 | CMSScanner::APP_DIR, SubScanner::APP_DIR, | |
124 | File.join(Dir.home, '.subscanner'), File.join(Dir.pwd, '.subscanner') | |
125 | ].reduce([]) do |a, e| | |
126 | a << File.join(e, 'views') | |
118 | describe 'Controller::Base#tmp_directory' do | |
119 | it 'returns the expected value' do | |
120 | expect(scanner.controllers.first.tmp_directory).to eql '/tmp/subscanner' | |
121 | end | |
122 | end | |
123 | ||
124 | describe 'Formatter' do | |
125 | it_behaves_like CMSScanner::Formatter::ClassMethods do | |
126 | subject(:formatter) { formatter_class } | |
127 | end | |
128 | ||
129 | describe '.load' do | |
130 | it 'adds the #custom method for all formatters' do | |
131 | formatter_class.availables.each do |format| | |
132 | expect(formatter_class.load(format).custom).to eql 'It Works!' | |
127 | 133 | end |
128 | ) | |
134 | end | |
135 | end | |
136 | ||
137 | describe '#views_directories' do | |
138 | it 'returns the expected paths' do | |
139 | expect(scanner.formatter.views_directories).to eql( | |
140 | [ | |
141 | CMSScanner::APP_DIR, SubScanner::APP_DIR, | |
142 | File.join(Dir.home, '.subscanner'), File.join(Dir.pwd, '.subscanner') | |
143 | ].reduce([]) do |a, e| | |
144 | a << File.join(e, 'views') | |
145 | end | |
146 | ) | |
147 | end | |
129 | 148 | end |
130 | 149 | end |
131 | 150 | end |
31 | 31 | describe '#in_scope?' do |
32 | 32 | context 'when default scope (target domain)' do |
33 | 33 | [nil, '', 'http://out-of-scope.com', '//jquery.com/j.js', |
34 | 'javascript:alert(3)', 'mailto:[email protected]'].each do |url| | |
34 | 'javascript:alert(3)', 'mailto:[email protected]', | |
35 | Addressable::URI.parse('https://out.cloudfront.net')].each do |url| | |
35 | 36 | it "returns false for #{url}" do |
36 | 37 | expect(target.in_scope?(url)).to eql false |
37 | 38 | end |
38 | 39 | end |
39 | 40 | |
40 | %w[https://e.org/file.txt http://e.org/ //e.org].each do |url| | |
41 | ['https://e.org/file.txt', 'http://e.org/', '//e.org', Addressable::URI.parse('http://e.org')].each do |url| | |
41 | 42 | it "returns true for #{url}" do |
42 | 43 | expect(target.in_scope?(url)).to eql true |
43 | 44 | end |
45 | 46 | end |
46 | 47 | |
47 | 48 | context 'when custom scope' do |
48 | let(:opts) { { scope: ['*.e.org', '192.168.1.12'] } } | |
49 | let(:opts) { { scope: ['*.cdn.com', '192.168.1.12', '*.cloudfront.net'] } } | |
49 | 50 | |
50 | 51 | [nil, '', 'http://out-of-scope.com', '//jquery.com/j.js', 'http://192.168.1.2/'].each do |url| |
51 | 52 | it "returns false for #{url}" do |
53 | 54 | end |
54 | 55 | end |
55 | 56 | |
56 | %w[http://e.org //cdn.e.org/f.txt http://s.e.org/ https://192.168.1.12/h].each do |url| | |
57 | %w[ | |
58 | https://e.org //aa.cdn.com/f.txt http://s.cdn.com/ | |
59 | https://192.168.1.12/h https://aa.cloudfront.net/ | |
60 | ].each do |url| | |
57 | 61 | it "returns true for #{url}" do |
58 | 62 | expect(target.in_scope?(url)).to eql true |
59 | 63 | end |
61 | 65 | end |
62 | 66 | end |
63 | 67 | |
64 | describe '#in_scope_urls' do | |
68 | describe '#in_scope_uris' do | |
65 | 69 | let(:res) { Typhoeus::Response.new(body: File.read(fixtures.join('index.html'))) } |
66 | 70 | |
67 | 71 | context 'when block given' do |
68 | 72 | it 'yield the url' do |
69 | expect { |b| target.in_scope_urls(res, &b) } | |
73 | expect { |b| target.in_scope_uris(res, &b) } | |
70 | 74 | .to yield_successive_args( |
71 | ['http://e.org/f.txt', Nokogiri::XML::Element], | |
72 | ['http://e.org/script/s.js', Nokogiri::XML::Element], | |
73 | ['http://e.org/feed', Nokogiri::XML::Element] | |
75 | [Addressable::URI.parse('http://e.org/f.txt'), Nokogiri::XML::Element], | |
76 | [Addressable::URI.parse('http://e.org/script/s.js'), Nokogiri::XML::Element], | |
77 | [Addressable::URI.parse('http://e.org/feed'), Nokogiri::XML::Element] | |
74 | 78 | ) |
75 | 79 | end |
76 | 80 | end |
79 | 83 | it 'returns the expected array' do |
80 | 84 | xpath = '//link[@rel="alternate" and @type="application/rss+xml"]/@href' |
81 | 85 | |
82 | expect(target.in_scope_urls(res, xpath)).to eql(%w[http://e.org/feed]) | |
86 | expect(target.in_scope_uris(res, xpath)).to eql([Addressable::URI.parse('http://e.org/feed')]) | |
83 | 87 | end |
84 | 88 | end |
85 | 89 | |
86 | 90 | context 'when no block given' do |
87 | after { expect(target.in_scope_urls(res)).to eql @expected } | |
91 | after { expect(target.in_scope_uris(res)).to eql @expected } | |
88 | 92 | |
89 | 93 | context 'when default scope' do |
90 | 94 | it 'returns the expected array' do |
91 | @expected = %w[http://e.org/f.txt http://e.org/script/s.js http://e.org/feed] | |
95 | @expected = %w[http://e.org/f.txt http://e.org/script/s.js | |
96 | http://e.org/feed].map { |url| Addressable::URI.parse(url) } | |
92 | 97 | end |
93 | 98 | end |
94 | 99 | |
95 | 100 | context 'when supplied scope' do |
96 | let(:opts) { super().merge(scope: ['*.e.org', 'wp-lamp']) } | |
101 | let(:opts) { super().merge(scope: ['*.cdn.com', 'wp-lamp']) } | |
97 | 102 | |
98 | 103 | it 'returns the expected array' do |
99 | @expected = %w[http://e.org/f.txt https://cdn.e.org/f2.js http://e.org/script/s.js | |
100 | http://wp-lamp/robots.txt http://e.org/feed] | |
104 | @expected = %w[http://e.org/f.txt https://a.cdn.com/f2.js http://e.org/script/s.js | |
105 | http://wp-lamp/robots.txt http://e.org/feed].map { |url| Addressable::URI.parse(url) } | |
101 | 106 | end |
102 | 107 | end |
103 | 108 | end |
104 | 109 | end |
110 | ||
111 | describe '#scope_url_pattern' do | |
112 | context 'when no scope given' do | |
113 | its(:scope_url_pattern) { should eql %r{https?:\\?/\\?/(?:e\.org)\\?/?}i } | |
114 | end | |
115 | ||
116 | context 'when scope given' do | |
117 | let(:opts) { super().merge(scope: ['*.cdn.org', 'wp-lamp']) } | |
118 | ||
119 | its(:scope_url_pattern) { should eql %r{https?:\\?/\\?/(?:e\.org|.*\.cdn\.org|wp\-lamp)\\?/?}i } | |
120 | ||
121 | context 'when target URL has a subdir' do | |
122 | let(:url) { 'https://e.org/blog/test' } | |
123 | ||
124 | its(:scope_url_pattern) { should eql %r{https?:\\?/\\?/(?:e\.org\\?/blog\\?/test|.*\.cdn\.org|wp\-lamp)\\?/?}i } | |
125 | end | |
126 | end | |
127 | end | |
105 | 128 | end |
31 | 31 | describe '#vulnerable' do |
32 | 32 | it 'raises an error' do |
33 | 33 | expect { target.vulnerable? }.to raise_error(NotImplementedError) |
34 | end | |
35 | end | |
36 | ||
37 | describe '#url_pattern' do | |
38 | its(:url_pattern) { should eql %r{https?:\\?/\\?/e\.org\\?/}i } | |
39 | its(:url_pattern) { should match 'https:\/\/e.org\/' } | |
40 | ||
41 | context 'when already https protocol' do | |
42 | let(:url) { 'htTpS://ex.com/' } | |
43 | ||
44 | its(:url_pattern) { should eql %r{https?:\\?/\\?/ex\.com\\?/}i } | |
34 | 45 | end |
35 | 46 | end |
36 | 47 | |
122 | 133 | end |
123 | 134 | end |
124 | 135 | |
125 | describe '#urls_from_page' do | |
126 | let(:page) { Typhoeus::Response.new(body: File.read(fixtures.join('urls_from_page.html'))) } | |
136 | describe '#uris_from_page' do | |
137 | let(:page) { Typhoeus::Response.new(body: File.read(fixtures.join('uris_from_page.html'))) } | |
127 | 138 | |
128 | 139 | context 'when block given' do |
129 | 140 | it 'yield the url' do |
130 | expect { |b| target.urls_from_page(page, &b) } | |
141 | expect { |b| target.uris_from_page(page, &b) } | |
131 | 142 | .to yield_successive_args( |
132 | ['http://e.org/f.txt', Nokogiri::XML::Element], | |
133 | ['https://cdn.e.org/f2.js', Nokogiri::XML::Element], | |
134 | ['http://e.org/script/s.js', Nokogiri::XML::Element], | |
135 | ['http://wp-lamp/feed.xml', Nokogiri::XML::Element], | |
136 | ['http://g.com/img.jpg', Nokogiri::XML::Element], | |
137 | ['http://g.org/logo.png', Nokogiri::XML::Element] | |
143 | [Addressable::URI.parse('http://e.org/f.txt'), Nokogiri::XML::Element], | |
144 | [Addressable::URI.parse('https://cdn.e.org/f2.js'), Nokogiri::XML::Element], | |
145 | [Addressable::URI.parse('http://e.org/script/s.js'), Nokogiri::XML::Element], | |
146 | [Addressable::URI.parse('http://wp-lamp/feed.xml'), Nokogiri::XML::Element], | |
147 | [Addressable::URI.parse('http://g.com/img.jpg'), Nokogiri::XML::Element], | |
148 | [Addressable::URI.parse('http://g.org/logo.png'), Nokogiri::XML::Element] | |
138 | 149 | ) |
139 | 150 | end |
140 | 151 | end |
141 | 152 | |
142 | 153 | context 'when no block given' do |
143 | 154 | it 'returns the expected array' do |
144 | expect(target.urls_from_page(page)).to eql( | |
155 | expect(target.uris_from_page(page)).to eql( | |
145 | 156 | %w[ |
146 | 157 | http://e.org/f.txt https://cdn.e.org/f2.js http://e.org/script/s.js |
147 | 158 | http://wp-lamp/feed.xml http://g.com/img.jpg http://g.org/logo.png |
148 | ] | |
159 | ].map { |url| Addressable::URI.parse(url) } | |
149 | 160 | ) |
150 | 161 | end |
151 | 162 | |
153 | 164 | it 'returns the expected array' do |
154 | 165 | xpath = '//link[@rel="alternate" and @type="application/rss+xml"]/@href' |
155 | 166 | |
156 | expect(target.urls_from_page(page, xpath)).to eql(%w[http://wp-lamp/feed.xml]) | |
167 | expect(target.uris_from_page(page, xpath)).to eql([Addressable::URI.parse('http://wp-lamp/feed.xml')]) | |
157 | 168 | end |
158 | 169 | end |
159 | 170 | end |