// Package waybackarchive logic
package waybackarchive
import (
"bufio"
"context"
"fmt"
"net/url"
"strings"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping"
)
// Source is the passive scraping agent
type Source struct{}
// Run function returns all subdomains found with the service
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
results := make(chan subscraping.Result)
go func() {
defer close(results)
resp, err := session.SimpleGet(ctx, fmt.Sprintf("http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=txt&fl=original&collapse=urlkey", domain))
if err != nil {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
session.DiscardHTTPResponse(resp)
return
}
defer resp.Body.Close()
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
line := scanner.Text()
if line == "" {
continue
}
line, _ = url.QueryUnescape(line)
subdomain := session.Extractor.FindString(line)
if subdomain != "" {
// fix for triple encoded URL
subdomain = strings.ToLower(subdomain)
subdomain = strings.TrimPrefix(subdomain, "25")
subdomain = strings.TrimPrefix(subdomain, "2f")
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
}
}
}()
return results
}
// Name returns the name of the source
func (s *Source) Name() string {
return "waybackarchive"
}
func (s *Source) IsDefault() bool {
return false
}
func (s *Source) HasRecursiveSupport() bool {
return false
}
func (s *Source) NeedsKey() bool {
return false
}
func (s *Source) AddApiKeys(_ []string) {
// no key needed
}