//go:generate go run ../cmd/gen/gen.go
// Package publicsuffix provides a domain name parser
// based on data from the public suffix list http://publicsuffix.org/.
// A public suffix is one under which Internet users can directly register names.
package publicsuffix
import (
"bufio"
"fmt"
"io"
"net/http/cookiejar"
"os"
"strings"
"golang.org/x/net/idna"
)
const (
// Version identifies the current library version.
// This is a pro forma convention given that Go dependencies
// tends to be fetched directly from the repo.
Version = "0.13.0"
// NormalType represents a normal rule such as "com"
NormalType = 1
// WildcardType represents a wildcard rule such as "*.com"
WildcardType = 2
// ExceptionType represents an exception to a wildard rule
ExceptionType = 3
listTokenPrivateDomains = "===BEGIN PRIVATE DOMAINS==="
listTokenComment = "//"
)
// DefaultList is the default List and it is used by Parse and Domain.
var DefaultList = NewList()
// DefaultRule is the default Rule that represents "*".
var DefaultRule = MustNewRule("*")
// DefaultParserOptions are the default options used to parse a Public Suffix list.
var DefaultParserOptions = &ParserOption{PrivateDomains: true, ASCIIEncoded: false}
// DefaultFindOptions are the default options used to perform the lookup of rules in the list.
var DefaultFindOptions = &FindOptions{IgnorePrivate: false, DefaultRule: DefaultRule}
// Rule represents a single rule in a Public Suffix List.
type Rule struct {
Type int
Value string
Length int
Private bool
}
// ParserOption are the options you can use to customize the way a List
// is parsed from a file or a string.
type ParserOption struct {
// Set to false to skip the private domains when parsing.
// Default to true, which means the private domains are included.
PrivateDomains bool
// Set to false if the input is encoded in U-labels (Unicode)
// as opposite to A-labels.
// Default to false, which means the list is containing Unicode domains.
// This is the default because the original PSL currently contains Unicode.
ASCIIEncoded bool
}
// FindOptions are the options you can use to customize the way a Rule
// is searched within the list.
type FindOptions struct {
// Set to true to ignore the rules within the "Private" section of the Public Suffix List.
IgnorePrivate bool
// The default rule to use when no rule matches the input.
// The format Public Suffix algorithm states that the rule "*" should be used when no other rule matches,
// but some consumers may have different needs.
DefaultRule *Rule
}
// List represents a Public Suffix List.
type List struct {
// rules is kept private because you should not access rules directly
rules map[string]*Rule
}
// NewList creates a new empty list.
func NewList() *List {
return &List{
rules: map[string]*Rule{},
}
}
// NewListFromString parses a string that represents a Public Suffix source
// and returns a List initialized with the rules in the source.
func NewListFromString(src string, options *ParserOption) (*List, error) {
l := NewList()
_, err := l.LoadString(src, options)
return l, err
}
// NewListFromFile parses a string that represents a Public Suffix source
// and returns a List initialized with the rules in the source.
func NewListFromFile(path string, options *ParserOption) (*List, error) {
l := NewList()
_, err := l.LoadFile(path, options)
return l, err
}
// Load parses and loads a set of rules from an io.Reader into the current list.
func (l *List) Load(r io.Reader, options *ParserOption) ([]Rule, error) {
return l.parse(r, options)
}
// LoadString parses and loads a set of rules from a String into the current list.
func (l *List) LoadString(src string, options *ParserOption) ([]Rule, error) {
r := strings.NewReader(src)
return l.parse(r, options)
}
// LoadFile parses and loads a set of rules from a File into the current list.
func (l *List) LoadFile(path string, options *ParserOption) ([]Rule, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
return l.parse(f, options)
}
// AddRule adds a new rule to the list.
//
// The exact position of the rule into the list is unpredictable.
// The list may be optimized internally for lookups, therefore the algorithm
// will decide the best position for the new rule.
func (l *List) AddRule(r *Rule) error {
l.rules[r.Value] = r
return nil
}
// Size returns the size of the list, which is the number of rules.
func (l *List) Size() int {
return len(l.rules)
}
func (l *List) parse(r io.Reader, options *ParserOption) ([]Rule, error) {
if options == nil {
options = DefaultParserOptions
}
var rules []Rule
scanner := bufio.NewScanner(r)
var section int // 1 == ICANN, 2 == PRIVATE
Scanning:
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
switch {
// skip blank lines
case line == "":
break
// include private domains or stop scanner
case strings.Contains(line, listTokenPrivateDomains):
if !options.PrivateDomains {
break Scanning
}
section = 2
// skip comments
case strings.HasPrefix(line, listTokenComment):
break
default:
var rule *Rule
var err error
if options.ASCIIEncoded {
rule, err = NewRule(line)
} else {
rule, err = NewRuleUnicode(line)
}
if err != nil {
return []Rule{}, err
}
rule.Private = (section == 2)
l.AddRule(rule)
rules = append(rules, *rule)
}
}
return rules, scanner.Err()
}
// Find and returns the most appropriate rule for the domain name.
func (l *List) Find(name string, options *FindOptions) *Rule {
if options == nil {
options = DefaultFindOptions
}
part := name
for {
rule, ok := l.rules[part]
if ok && rule.Match(name) && !(options.IgnorePrivate && rule.Private) {
return rule
}
i := strings.IndexRune(part, '.')
if i < 0 {
return options.DefaultRule
}
part = part[i+1:]
}
}
// NewRule parses the rule content, creates and returns a Rule.
//
// The content of the rule MUST be encoded in ASCII (A-labels).
func NewRule(content string) (*Rule, error) {
var rule *Rule
var value string
switch content[0:1] {
case "*": // wildcard
if content == "*" {
value = ""
} else {
value = content[2:]
}
rule = &Rule{Type: WildcardType, Value: value, Length: len(Labels(value)) + 1}
case "!": // exception
value = content[1:]
rule = &Rule{Type: ExceptionType, Value: value, Length: len(Labels(value))}
default: // normal
value = content
rule = &Rule{Type: NormalType, Value: value, Length: len(Labels(value))}
}
return rule, nil
}
// NewRuleUnicode is like NewRule, but expects the content to be encoded in Unicode (U-labels).
func NewRuleUnicode(content string) (*Rule, error) {
var err error
content, err = ToASCII(content)
if err != nil {
return nil, err
}
return NewRule(content)
}
// MustNewRule is like NewRule, but panics if the content cannot be parsed.
func MustNewRule(content string) *Rule {
rule, err := NewRule(content)
if err != nil {
panic(err)
}
return rule
}
// Match checks if the rule matches the name.
//
// A domain name is said to match a rule if and only if all of the following conditions are met:
// - When the domain and rule are split into corresponding labels,
// that the domain contains as many or more labels than the rule.
// - Beginning with the right-most labels of both the domain and the rule,
// and continuing for all labels in the rule, one finds that for every pair,
// either they are identical, or that the label from the rule is "*".
//
// See https://publicsuffix.org/list/
func (r *Rule) Match(name string) bool {
left := strings.TrimSuffix(name, r.Value)
// the name contains as many labels than the rule
// this is a match, unless it's a wildcard
// because the wildcard requires one more label
if left == "" {
return r.Type != WildcardType
}
// if there is one more label, the rule match
// because either the rule is shorter than the domain
// or the rule is a wildcard and there is one more label
return left[len(left)-1:] == "."
}
// Decompose takes a name as input and decomposes it into a tuple of <TRD+SLD, TLD>,
// according to the rule definition and type.
func (r *Rule) Decompose(name string) (result [2]string) {
if r == DefaultRule {
i := strings.LastIndex(name, ".")
if i < 0 {
return
}
result[0], result[1] = name[:i], name[i+1:]
return
}
switch r.Type {
case NormalType:
name = strings.TrimSuffix(name, r.Value)
if len(name) == 0 {
return
}
result[0], result[1] = name[:len(name)-1], r.Value
case WildcardType:
name := strings.TrimSuffix(name, r.Value)
if len(name) == 0 {
return
}
name = name[:len(name)-1]
i := strings.LastIndex(name, ".")
if i < 0 {
return
}
result[0], result[1] = name[:i], name[i+1:]+"."+r.Value
case ExceptionType:
i := strings.IndexRune(r.Value, '.')
if i < 0 {
return
}
suffix := r.Value[i+1:]
name = strings.TrimSuffix(name, suffix)
if len(name) == 0 {
return
}
result[0], result[1] = name[:len(name)-1], suffix
}
return
}
// Labels decomposes given domain name into labels,
// corresponding to the dot-separated tokens.
func Labels(name string) []string {
return strings.Split(name, ".")
}
// DomainName represents a domain name.
type DomainName struct {
TLD string
SLD string
TRD string
Rule *Rule
}
// String joins the components of the domain name into a single string.
// Empty labels are skipped.
//
// Examples:
//
// DomainName{"com", "example"}.String()
// // example.com
// DomainName{"com", "example", "www"}.String()
// // www.example.com
//
func (d *DomainName) String() string {
switch {
case d.TLD == "":
return ""
case d.SLD == "":
return d.TLD
case d.TRD == "":
return d.SLD + "." + d.TLD
default:
return d.TRD + "." + d.SLD + "." + d.TLD
}
}
// Domain extract and return the domain name from the input
// using the default (Public Suffix) List.
//
// Examples:
//
// publicsuffix.Domain("example.com")
// // example.com
// publicsuffix.Domain("www.example.com")
// // example.com
// publicsuffix.Domain("www.example.co.uk")
// // example.co.uk
//
func Domain(name string) (string, error) {
return DomainFromListWithOptions(DefaultList, name, DefaultFindOptions)
}
// Parse decomposes the name into TLD, SLD, TRD
// using the default (Public Suffix) List,
// and returns the result as a DomainName
//
// Examples:
//
// list := NewList()
//
// publicsuffix.Parse("example.com")
// // &DomainName{"com", "example"}
// publicsuffix.Parse("www.example.com")
// // &DomainName{"com", "example", "www"}
// publicsuffix.Parse("www.example.co.uk")
// // &DomainName{"co.uk", "example"}
//
func Parse(name string) (*DomainName, error) {
return ParseFromListWithOptions(DefaultList, name, DefaultFindOptions)
}
// DomainFromListWithOptions extract and return the domain name from the input
// using the (Public Suffix) list passed as argument.
//
// Examples:
//
// list := NewList()
//
// publicsuffix.DomainFromListWithOptions(list, "example.com")
// // example.com
// publicsuffix.DomainFromListWithOptions(list, "www.example.com")
// // example.com
// publicsuffix.DomainFromListWithOptions(list, "www.example.co.uk")
// // example.co.uk
//
func DomainFromListWithOptions(l *List, name string, options *FindOptions) (string, error) {
dn, err := ParseFromListWithOptions(l, name, options)
if err != nil {
return "", err
}
return dn.SLD + "." + dn.TLD, nil
}
// ParseFromListWithOptions decomposes the name into TLD, SLD, TRD
// using the (Public Suffix) list passed as argument,
// and returns the result as a DomainName
//
// Examples:
//
// list := NewList()
//
// publicsuffix.ParseFromListWithOptions(list, "example.com")
// // &DomainName{"com", "example"}
// publicsuffix.ParseFromListWithOptions(list, "www.example.com")
// // &DomainName{"com", "example", "www"}
// publicsuffix.ParseFromListWithOptions(list, "www.example.co.uk")
// // &DomainName{"co.uk", "example"}
//
func ParseFromListWithOptions(l *List, name string, options *FindOptions) (*DomainName, error) {
n, err := normalize(name)
if err != nil {
return nil, err
}
r := l.Find(n, options)
if r == nil {
return nil, fmt.Errorf("no rule matching name %s", name)
}
parts := r.Decompose(n)
left, tld := parts[0], parts[1]
if tld == "" {
return nil, fmt.Errorf("%s is a suffix", n)
}
dn := &DomainName{
Rule: r,
TLD: tld,
}
if i := strings.LastIndex(left, "."); i < 0 {
dn.SLD = left
} else {
dn.TRD = left[:i]
dn.SLD = left[i+1:]
}
return dn, nil
}
func normalize(name string) (string, error) {
ret := strings.ToLower(name)
if ret == "" {
return "", fmt.Errorf("name is blank")
}
if ret[0] == '.' {
return "", fmt.Errorf("name %s starts with a dot", ret)
}
return ret, nil
}
// ToASCII is a wrapper for idna.ToASCII.
//
// This wrapper exists because idna.ToASCII backward-compatibility was broken twice in few months
// and I can't call this package directly anymore. The wrapper performs some terrible-but-necessary
// before-after replacements to make sure an already ASCII input always results in the same output
// even if passed through ToASCII.
//
// See golang/net@67957fd0b1, golang/net@f2499483f9, golang/net@78ebe5c8b6,
// and weppos/publicsuffix-go#66.
func ToASCII(s string) (string, error) {
// .example.com should be .example.com
// ..example.com should be ..example.com
if strings.HasPrefix(s, ".") {
dotIndex := 0
for i := 0; i < len(s); i++ {
if s[i] == '.' {
dotIndex = i
} else {
break
}
}
out, err := idna.ToASCII(s[dotIndex+1:])
out = s[:dotIndex+1] + out
return out, err
}
return idna.ToASCII(s)
}
// ToUnicode is a wrapper for idna.ToUnicode.
//
// See ToASCII for more details about why this wrapper exists.
func ToUnicode(s string) (string, error) {
return idna.ToUnicode(s)
}
// CookieJarList implements the cookiejar.PublicSuffixList interface.
var CookieJarList cookiejar.PublicSuffixList = cookiejarList{DefaultList}
type cookiejarList struct {
List *List
}
// PublicSuffix implements cookiejar.PublicSuffixList.
func (l cookiejarList) PublicSuffix(domain string) string {
rule := l.List.Find(domain, nil)
return rule.Decompose(domain)[1]
}
// PublicSuffix implements cookiejar.String.
func (cookiejarList) String() string {
return defaultListVersion
}