#!/bin/bash
#
# bing-ip2hosts - Enumerate hostnames from Bing.com for an IP address.
# Bing.com is Microsoft's search engine which has an IP: search parameter.
#
# Copyright (C) 2009 - 2020 Andrew Horton aka urbanadventurer
# Homepage: http://www.morningstarsecurity.com/research/bing-ip2hosts
#
# License: GPLv3
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
VERSION=1.0.5
TMP_DIR="${TMPDIR:-/tmp}"
QUIET=0
OUTPUT_IP=0
DISPLAY_URL_PREFIX=1
IP=
PREFIX=
DEBUG=1
STOP_AFTER_PAGES=5
BING_SETLANG=en-us
BING_SETMKT=
UNAME=$(uname)
USER_AGENT="Wget/1.20 (linux-gnu)" # Bing responds differently to wget version 1.20+
N="\033[0;0m" #NORMAL
W="\033[0;97m" #WHITE
R="\033[0;31m" #RED
DR="\033[1;31m" #DARKRED
G="\033[0;32m" #GREEN
Y="\033[1;33m" #YELLOW
B="\033[0;34m" #BLUE
DG="\033[1;30m" #DARKGRAY
REVERSED="\033[1;7m" #REVERSED
trap ctrl_c INT
function ctrl_c() {
echo -e "\n* Stopping after user pressed CTRL-C."
# clean up tmp files
if [[ $DEBUG == 0 ]]; then
rm -f "$f_scraped_html"
fi
show_hosts
exit 1
}
function dependency_check() {
if [[ "$(which wget)" == "" ]]; then
echo "This script requires wget, please resolve and try again."
if [[ $UNAME == "Darwin" ]]; then
echo "Try: brew install wget"
fi
exit 1
fi
}
function banner() {
BANNER="
$G m, $N .,recon:, ,,
$G ##### $N ]##\"\"^^\"%##m %##b
$G ####b $N ]## \`##b
$G ####b $N ]## ## i## @#b,######m ,######m ##b
$G ####b 1mw, $N ]##MMM#### i## ]###\` %## ###\` \`@##
$G ####b 1#####Nw, $N ]##\`\` @#b i## ]## ### ### j##
$G ####i %########[$N ]## @## i## ]## ### ##g j##
$G ####n 2#####[$N ]## @## i## ]## ### @## {##
$G ####g ,#########b$N ]## ,,e### j## ]## ### 7##m,,,s#M##
$G #############M^ $N 'WWWWWW%b^ ii 'nn nn* \`1337\` g##
$G ##########\" $N G##
$G \"%##\" $N @#Gmmem###G
,i $G ,s2e, $N ## \`\`\`\`
\` $G \"\` %#$N ## T#
]# ]#,#M5@#p $G #b $N #H#H%@# s#M5O#o ,#MSSM W@##W= s#SSW
]# j#p ^#p$G ,#M $N ## @# ##' 'O# S#, ]# #b
]# j# #M$G ,#M $N ## @# #o O# \"SXm ]# ^\"@#
]# j##, ,## $G ,#2 $N ## @# 7#. .#O , ]# ]#Q ,#s
]# j######' $G #######x $N ## @# s#####o ####^ #Tt ####^
j#
$N j# $W bing-ip2hosts ($VERSION) by Andrew Horton @urbanadventurer
$N j# $W https://morningstarsecurity.com/research/bing-ip2hosts
$N $W https://github.com/urbanadventurer/bing-ip2hosts$N"
echo -e "$BANNER"
}
function display_progress() {
tput_cols=$(tput cols)
maxwidth_url=$((tput_cols - 6))
unset urlpath_truncated
if [[ ${#urlpath} -gt $maxwidth_url ]]; then
urlpath_truncated="${urlpath:0:$maxwidth_url}"
fi
tput_lines=$(tput lines)
max_vhosts=$(( tput_lines - 15))
vhosts_truncated=$( echo "$vhosts" | head -$max_vhosts )
highlight_paginated_links="${paginated_links/$paginated_current_link/${G}${REVERSED}$paginated_current_link${N}}"
PROGRESS="$G m, $DG -----.--[$N bing-ip2hosts v$VERSION $DG]-------------------------
$G ##### $DG | $N Searching : $IP
$G ####b $DG | $N$Y Found : $uniq_hosts
$G ####b $DG | $N Scraped pages: $page
$G ####b 1mw, $DG | $N
$G ####b 1#####Nw, $DG | $N Page Title : $page_title
$G ####i %########[$DG | $N Results : $results_count
$G ####n 2#####[$DG | $N Pagination : $highlight_paginated_links
$G ####g ,#########b$DG | $N New : $new_results new
$G #############M^ $DG | $N
$G ##########\" $DG | $N $extra_status_update
$G \"%##\" $DG | $N CTRL-C to stop
${N}[ ] /${urlpath_truncated:-$urlpath}"
#progress_animation="⣾⣽⣻⢿⡿⣟⣯⣷"
progress_animation="▏▎▍▌▋▊▉█▊▋▌▍▎"
clear
echo -e "$PROGRESS"
echo -e "${B}$vhosts_truncated${N}"
x=$((page % 13))
tput cup 13 1
echo -e "$DR${progress_animation:$x:1}"
tput cup 24 0
}
function show_hosts() {
local suffix=""
# sort by hostname. ignore URL prefix
if [[ $DISPLAY_URL_PREFIX == 1 ]]; then
uniq_hosts=$(sort -u -t '/' -k 3 < "$f_results")
else
uniq_hosts=$(sort -u < "$f_results")
fi
count_uniq_hosts=$(sort < "$f_results" | uniq | wc -l | tr -d ' ')
if [[ $DEBUG == 0 ]]; then
rm -f "$f_results"
fi
if [[ $OUTPUT_IP == 1 ]]; then
PREFIX="$IP,"
fi
if [[ $DISPLAY_URL_PREFIX == 1 ]]; then
suffix="/"
fi
echo -en "$N"
for h in $uniq_hosts
do
echo "$PREFIX$h$suffix"
done
if [[ -n "$OUTPUT_FILE" ]]; then
for h in $uniq_hosts
do
echo "$PREFIX$h$suffix" >> "$OUTPUT_FILE"
done
fi
if [[ $QUIET == 0 ]]; then
echo -e "\n${G}✓${N} Found ${Y}$count_uniq_hosts ${N}results after scraping ${Y}$page ${N}pages."
echo
fi
}
function validate_target(){
# if the parameter looks like an IP go ahead, otherwise resolve it
if [[ "$1" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
ret="$1"
else
ret=$(nslookup "$1" | grep -E "^Address: \w+\.\w+\.\w+\.\w+$" | tail -1 | awk '{ print $2 }')
if [ "$ret" == "" ]; then
echo "Cannot resolve $1 to an IP address." >&2
fi
fi
}
function scrape_ip(){
target="$1"
page=1
last_page_check=
results_count=
uniq_hosts=0
single_page=
results_removed=
extra_status_update=
declare -a scrape_results
f_results=$(mktemp -p "$TMP_DIR" -t bing-ip2hosts.tmp.XXXXXX 2>/dev/null || mktemp "$TMP_DIR"/bing-ip2hosts.tmp.XXXXXX)
validate_target "$target"
IP="$ret"
if [[ -z "$IP" ]]; then
return 1
fi
while true; do
url="https://www.bing.com/search?q=ip%3A$IP+.&qs=n&first=$((page-1))0&FORM=PERE&setlang=$BING_SETLANG&setmkt=$BING_SETMKT"
urlpath=$(echo "$url" | cut -d '/' -f 4-)
if [[ $page == 1 ]] && [[ $QUIET == 0 ]]; then
display_progress
fi
f_scraped_html=$(mktemp -p "$TMP_DIR" -t bing-ip2hosts.tmp.XXXXXX 2>/dev/null || mktemp "$TMP_DIR"/bing-ip2hosts.tmp.XXXXXX)
if ! wget --quiet --no-check-certificate --user-agent "$USER_AGENT" --output-document "$f_scraped_html" "$url"; then
echo -e "\n${R}wget failed to scrape $url" >&2
exit 1
fi
last_page_check=$(grep -E -o '<span class="sb_count">[0-9]+-([0-9]+) of (\1) results' "$f_scraped_html")
results_count=$(grep -E -o '<span class="sb_count">[^<]+' "$f_scraped_html" |cut -d '>' -f 2|cut -d ' ' -f 1-3)
single_page=$(grep -E -o '<span class="sb_count">[0-9] results' "$f_scraped_html")
results_removed=$(grep -E -o '>Some results have been removed<' "$f_scraped_html")
page_title=$(grep -E -o '<title>([^<]+)' "$f_scraped_html" | cut -d '>' -f 2)
paginated_current_link=$(grep -E -o "class=\"sb_pagS[^>]+sb_bp\">([0-9]+)" "$f_scraped_html" | cut -d '>' -f 2)
paginated_links=$(grep -E -o "<h4 class=\"b_hide\">Pagination</h4>.*?/nav" "$f_scraped_html" | grep -E -o ">([0-9\.]+)</a" | grep -E -o "[0-9\.]+" | tr '\n' ' ')
results=$(grep -E -o "<h2><a href=\"[^\"]+" "$f_scraped_html" | cut -d '"' -f 2)
if [[ $DISPLAY_URL_PREFIX == 1 ]]; then
vhosts=$(echo "$results" | cut -d '/' -f 1-3)
else
vhosts=$(echo "$results" | cut -d '/' -f 3)
fi
if [[ $UNAME == "Darwin" ]]; then
vhosts=$(echo "$vhosts" | tr '[:upper:]' '[:lower:]' ) # macOS has an old version of Bash
else
vhosts="${vhosts,,}" # convert to lowercase in pure Bash
fi
echo -e "$vhosts" >> "$f_results"
old_uniq_hosts=$uniq_hosts
uniq_hosts=$(sort < "$f_results"| uniq | wc -l | tr -d ' ')
# how many new results did we get
new_results=$(( uniq_hosts - old_uniq_hosts ))
scrape_results+=("$new_results")
if [[ $QUIET == 0 ]]; then
extra_status_update=""
if [ -n "$results_removed" ]; then
extra_status_update="${R}Some results have been removed"
fi
display_progress
fi
# clean up tmp files
if [[ $DEBUG == 0 ]]; then
rm -f "$f_scraped_html"
fi
# check end conditions
if [[ -n "$last_page_check" ]]; then
if [[ $QUIET == 0 ]]; then
echo -e "\n${R}Stopping. This is the last page of results." >&2
fi
break
fi
if [[ -z "$results_count" ]]; then
if [[ $QUIET == 0 ]]; then
echo -e "\n${R}Stopping. The search results count is missing." >&2
fi
break
fi
if [[ -n "$single_page" ]]; then
if [[ $QUIET == 0 ]]; then
echo -e "\n${R}Stopping. Returned only one page of results." >&2
fi
break
fi
if [[ ${scrape_results[*]} =~ $stop_sequence ]]; then
if [[ $QUIET == 0 ]]; then
echo -e "\n${R}Stopping after scraping $STOP_AFTER_PAGES pages without any new results.$N" >&2
fi
break
fi
(( page += 1 ))
done
}
function repeat_zero() {
how_many="$1"
local str="0"
for ((i = 0; i < ((how_many-1)); i++)); do str+=" 0"; done
echo "$str"
}
function usage()
{
clear
banner
if [[ $(tput lines) -ge 24 ]] && [[ $(tput lines) -le 26 ]]; then
# show the banner only in an 80x24 default terminal
echo -n "[Press Enter]"
read -r
else
echo
fi
echo -e "bing-ip2hosts is a Bing.com web scraper that discovers websites by IP address.
Use for OSINT and discovering attack-surface of penetration test targets.
Usage: $0 [OPTIONS] IP|hostname
OPTIONS are:
-o FILE\tOutput hostnames to FILE.
-i FILE\tInput list of IP addresses or hostnames from FILE.
-n NUM\tStop after NUM scraped pages return no new results (Default: $STOP_AFTER_PAGES).
-l\tSelect the language for use in the setlang parameter (Default: $BING_SETLANG).
-m\tSelect the market for use in the setmkt parameter (Default is unset).
-u\tOnly display hostnames. Default is to include URL prefixes.
-c\tCSV output. Outputs the IP and hostname on each line, separated by a comma.
-q\tQuiet. Disable output except for final results.
-t DIR\tUse this directory instead of /tmp.
-V\tDisplay the version number of bing-ip2hosts and exit.
"
dependency_check
exit 1
}
if [[ -z "$1" ]] || [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then
usage
fi
dependency_check
while getopts "i:o:n:l:m:ucqt:V" optionName; do
case "$optionName" in
i) INPUT_FILE=$OPTARG;;
o) OUTPUT_FILE=$OPTARG;;
n) STOP_AFTER_PAGES=$OPTARG;;
l) BING_SETLANG="$OPTARG";;
m) BING_SETMKT="$OPTARG";;
u) DISPLAY_URL_PREFIX=0;;
c) OUTPUT_IP=1;;
q) QUIET=1;;
t) TMP_DIR="$OPTARG";;
V) echo "bing-ip2hosts v$VERSION"; exit;;
[?]) echo "Error"; exit 1;;
esac
done
shift $((OPTIND -1))
if [[ -z "$1" ]] && [[ -z "$INPUT_FILE" ]]; then
echo "Missing IP address or hostname." >&2
exit 1
fi
if [[ -n "$INPUT_FILE" ]]; then
if [[ ! -r "$INPUT_FILE" ]]; then
echo "Cannot read from $INPUT_FILE." >&2
exit 1
fi
fi
# initialize outputfile
if [[ -n "$OUTPUT_FILE" ]]; then
touch "$OUTPUT_FILE"
if [[ ! -w "$OUTPUT_FILE" ]]; then
echo "Cannot write to $OUTPUT_FILE." >&2
exit 1
fi
fi
# Create TMP_DIR if necessary
if [[ ! -d "$TMP_DIR" ]]; then
if [[ "$QUIET" == 0 ]]; then
mkdir -v -p "$TMP_DIR"
else
mkdir -p "$TMP_DIR"
fi
if [[ ! "$?" == 0 ]]; then
echo "Invalid TMP Directory: $TMP_DIR." >&2
exit 1
fi
fi
if [[ $QUIET == 0 ]]; then
clear
fi
stop_sequence=$(repeat_zero "$STOP_AFTER_PAGES")
if [[ -n "$INPUT_FILE" ]]; then
declare -a result_files
declare -a result_ips
declare -a result_pages
while read -r target
do
scrape_ip "$target"
# copy $f_results
result_files+=("$f_results")
result_ips+=("$IP")
result_pages+=("$page")
done < "$INPUT_FILE"
# report on each result
for (( i=0; i < ${#result_files[@]}; i++)); do
f_results=${result_files[i]}
IP=${result_ips[i]}
page=${result_pages[i]}
show_hosts
done
else
target="$1"
scrape_ip "$target"
show_hosts
fi