#!/usr/bin/env perl
use strict;
use warnings;
use autodie;
use utf8;
use Cwd qw( abs_path );
use File::Basename qw( dirname );
use File::Slurper qw( read_binary write_binary );
use Cpanel::JSON::XS 4.16 qw( decode_json );
use Math::Int128 qw( MAX_UINT128 string_to_uint128 uint128 );
use MaxMind::DB::Writer::Serializer 0.100004;
use MaxMind::DB::Writer::Tree 0.100004;
use MaxMind::DB::Writer::Util qw( key_for_data );
use Net::Works::Network ();
use Test::MaxMind::DB::Common::Util qw( standard_test_metadata );
my $Dir = dirname( abs_path($0) );
sub main {
my @sizes = ( 24, 28, 32 );
my @ipv4_range = ( '1.1.1.1', '1.1.1.32' );
my @ipv4_subnets = Net::Works::Network->range_as_subnets(@ipv4_range);
for my $record_size (@sizes) {
write_test_db(
$record_size,
\@ipv4_subnets,
{ ip_version => 4 },
'ipv4',
);
}
write_broken_pointers_test_db(
24,
\@ipv4_subnets,
{ ip_version => 4 },
'broken-pointers',
);
write_broken_search_tree_db(
24,
\@ipv4_subnets,
{ ip_version => 4 },
'broken-search-tree',
);
my @ipv6_subnets = Net::Works::Network->range_as_subnets(
'::1:ffff:ffff',
'::2:0000:0059'
);
for my $record_size (@sizes) {
write_test_db(
$record_size,
\@ipv6_subnets,
{ ip_version => 6 },
'ipv6',
);
write_test_db(
$record_size,
[
@ipv6_subnets,
Net::Works::Network->range_as_subnets( @ipv4_range, 6 ),
],
{ ip_version => 6 },
'mixed',
);
}
write_decoder_test_db();
write_deeply_nested_structures_db();
write_geoip2_dbs();
write_broken_geoip2_city_db();
write_invalid_node_count();
write_no_ipv4_tree_db();
write_no_map_db( \@ipv4_subnets );
write_test_serialization_data();
write_db_with_metadata_pointers();
}
sub write_broken_pointers_test_db {
no warnings 'redefine';
my $orig_store_data = MaxMind::DB::Writer::Serializer->can('store_data');
# This breaks the value of the record for the 1.1.1.32 network, causing it
# to point outside the database.
local *MaxMind::DB::Writer::Serializer::store_data = sub {
my $data_pointer = shift->$orig_store_data(@_);
my $value = $_[1];
if ( ref($value) eq 'HASH'
&& exists $value->{ip}
&& $value->{ip} eq '1.1.1.32' ) {
$data_pointer += 100_000;
}
return $data_pointer;
};
# The next hack will poison the data section for the 1.1.16/28 subnet
# value. It's value will be a pointer that resolves to an offset outside
# the database.
my $key_to_poison = key_for_data( { ip => '1.1.1.16' } );
my $orig_position_for_data
= MaxMind::DB::Writer::Serializer->can('_position_for_data');
local *MaxMind::DB::Writer::Serializer::_position_for_data = sub {
my $key = $_[1];
if ( $key eq $key_to_poison ) {
return 1_000_000;
}
else {
return shift->$orig_position_for_data(@_);
}
};
write_test_db(@_);
return;
}
sub write_broken_search_tree_db {
my $filename = ( write_test_db(@_) )[1];
my $content = read_binary($filename);
# This causes the right record of the first node to be 0, meaning it
# points back to the top of the tree. This should never happen in a
# database that follows the spec.
substr( $content, 5, 1 ) = "\0";
write_binary( $filename, $content );
return;
}
sub write_test_db {
my $record_size = shift;
my $subnets = shift;
my $metadata = shift;
my $ip_version_name = shift;
my $writer = MaxMind::DB::Writer::Tree->new(
ip_version => $subnets->[0]->version(),
record_size => $record_size,
alias_ipv6_to_ipv4 => ( $subnets->[0]->version() == 6 ? 1 : 0 ),
map_key_type_callback => sub { 'utf8_string' },
standard_test_metadata(),
%{$metadata},
);
for my $subnet ( @{$subnets} ) {
$writer->insert_network(
$subnet,
{ ip => $subnet->first()->as_string() }
);
}
my $filename = sprintf(
"$Dir/MaxMind-DB-test-%s-%i.mmdb",
$ip_version_name,
$record_size,
);
open my $fh, '>', $filename;
$writer->write_tree($fh);
close $fh;
return ( $writer, $filename );
}
{
# We will store this once for each subnet so we will also be testing
# pointers, since the serializer will generate a pointer to this
# structure.
my %all_types = (
utf8_string => 'unicode! ☯ - ♫',
double => 42.123456,
bytes => pack( 'N', 42 ),
uint16 => 100,
uint32 => 2**28,
int32 => -1 * ( 2**28 ),
uint64 => uint128(1) << 60,
uint128 => uint128(1) << 120,
array => [ 1, 2, 3, ],
map => {
mapX => {
utf8_stringX => 'hello',
arrayX => [ 7, 8, 9 ],
},
},
boolean => 1,
float => 1.1,
);
my %all_types_0 = (
utf8_string => q{},
double => 0,
bytes => q{},
uint16 => 0,
uint32 => 0,
int32 => 0,
uint64 => uint128(0),
uint128 => uint128(0),
array => [],
map => {},
boolean => 0,
float => 0,
);
# We limit this to numeric types as the other types would generate
# very large databases
my %numeric_types_max = (
double => 'Inf',
float => 'Inf',
int32 => 0x7fffffff,
uint16 => 0xffff,
uint32 => string_to_uint128('0xffff_ffff'),
uint64 => string_to_uint128('0xffff_ffff_ffff_ffff'),
uint128 => MAX_UINT128,
);
sub write_decoder_test_db {
my $writer = MaxMind::DB::Writer::Tree->new(
ip_version => 6,
record_size => 24,
database_type => 'MaxMind DB Decoder Test',
languages => ['en'],
description => {
en =>
'MaxMind DB Decoder Test database - contains every MaxMind DB data type',
},
alias_ipv6_to_ipv4 => 1,
remove_reserved_networks => 0,
map_key_type_callback => sub {
my $key = $_[0];
$key =~ s/X$//;
return $key eq 'array' ? [ 'array', 'uint32' ] : $key;
},
);
my @subnets
= map { Net::Works::Network->new_from_string( string => $_ ) }
qw(
::1.1.1.0/120
::2.2.0.0/112
::3.0.0.0/104
::4.5.6.7/128
abcd::/64
1000::1234:0000/112
);
for my $subnet (@subnets) {
$writer->insert_network(
$subnet,
\%all_types,
);
}
$writer->insert_network(
Net::Works::Network->new_from_string( string => '::0.0.0.0/128' ),
\%all_types_0,
);
$writer->insert_network(
Net::Works::Network->new_from_string(
string => '::255.255.255.255/128'
),
\%numeric_types_max,
);
open my $fh, '>', "$Dir/MaxMind-DB-test-decoder.mmdb";
$writer->write_tree($fh);
close $fh;
return;
}
}
{
my %nested = (
map1 => {
map2 => {
array => [
{
map3 => { a => 1, b => 2, c => 3 },
},
],
},
},
);
sub write_deeply_nested_structures_db {
my $writer = MaxMind::DB::Writer::Tree->new(
ip_version => 6,
record_size => 24,
ip_version => 6,
database_type => 'MaxMind DB Nested Data Structures',
languages => ['en'],
description => {
en =>
'MaxMind DB Nested Data Structures Test database - contains deeply nested map/array structures',
},
alias_ipv6_to_ipv4 => 1,
map_key_type_callback => sub {
my $key = shift;
return
$key =~ /^map/ ? 'map'
: $key eq 'array' ? [ 'array', 'map' ]
: 'uint32';
}
);
my @subnets
= map { Net::Works::Network->new_from_string( string => $_ ) }
qw(
::1.1.1.0/120
::2.2.0.0/112
::3.0.0.0/104
::4.5.6.7/128
abcd::/64
1000::1234:0000/112
);
for my $subnet (@subnets) {
$writer->insert_network(
$subnet,
\%nested,
);
}
open my $fh, '>', "$Dir/MaxMind-DB-test-nested.mmdb";
$writer->write_tree($fh);
close $fh;
return;
}
}
sub write_geoip2_dbs {
_write_geoip2_db( @{$_}[ 0, 1 ], 'Test' )
for (
[ 'GeoIP2-Anonymous-IP', {} ],
['GeoIP2-City'],
['GeoIP2-Connection-Type'],
['GeoIP2-Country'],
['GeoIP2-DensityIncome'],
['GeoIP2-Domain'],
['GeoIP2-Enterprise'],
['GeoIP2-ISP'],
['GeoIP2-Precision-Enterprise'],
['GeoIP2-Static-IP-Score'],
['GeoIP2-User-Count'],
['GeoLite2-ASN'],
);
}
sub write_broken_geoip2_city_db {
no warnings 'redefine';
# This is how we _used_ to encode doubles. Storing them this way with the
# current reader tools can lead to weird errors. This broken database is a
# good way to test the robustness of reader code in the face of broken
# databases.
local *MaxMind::DB::Writer::Serializer::_encode_double = sub {
my $self = shift;
my $value = shift;
$self->_simple_encode( double => $value );
};
_write_geoip2_db( 'GeoIP2-City', 0, 'Test Broken Double Format' );
}
sub write_invalid_node_count {
no warnings 'redefine';
local *MaxMind::DB::Writer::Tree::node_count = sub { 100000 };
_write_geoip2_db( 'GeoIP2-City', 0, 'Test Invalid Node Count' );
}
sub _universal_map_key_type_callback {
my $map = {
# languages
de => 'utf8_string',
en => 'utf8_string',
es => 'utf8_string',
fr => 'utf8_string',
ja => 'utf8_string',
'pt-BR' => 'utf8_string',
ru => 'utf8_string',
'zh-CN' => 'utf8_string',
# production
accuracy_radius => 'uint16',
autonomous_system_number => 'uint32',
autonomous_system_organization => 'utf8_string',
average_income => 'uint32',
city => 'map',
code => 'utf8_string',
confidence => 'uint16',
connection_type => 'utf8_string',
continent => 'map',
country => 'map',
domain => 'utf8_string',
geoname_id => 'uint32',
ipv4_24 => 'uint32',
ipv4_32 => 'uint32',
ipv6_32 => 'uint32',
ipv6_48 => 'uint32',
ipv6_64 => 'uint32',
is_anonymous => 'boolean',
is_anonymous_proxy => 'boolean',
is_anonymous_vpn => 'boolean',
is_hosting_provider => 'boolean',
is_in_european_union => 'boolean',
is_legitimate_proxy => 'boolean',
is_public_proxy => 'boolean',
is_satellite_provider => 'boolean',
is_tor_exit_node => 'boolean',
iso_code => 'utf8_string',
isp => 'utf8_string',
latitude => 'double',
location => 'map',
longitude => 'double',
metro_code => 'uint16',
names => 'map',
organization => 'utf8_string',
population_density => 'uint32',
postal => 'map',
registered_country => 'map',
represented_country => 'map',
score => 'double',
subdivisions => [ 'array', 'map' ],
time_zone => 'utf8_string',
traits => 'map',
traits => 'map',
type => 'utf8_string',
user_type => 'utf8_string',
# for testing only
foo => 'utf8_string',
bar => 'utf8_string',
buzz => 'utf8_string',
our_value => 'utf8_string',
};
my $callback = sub {
my $key = shift;
return $map->{$key} || die <<"ERROR";
Unknown tree key '$key'.
The universal_map_key_type_callback doesn't know what type to use for the passed
key. If you are adding a new key that will be used in a frozen tree / mmdb then
you should update the mapping in both our internal code and here.
ERROR
};
return $callback;
}
sub _write_geoip2_db {
my $type = shift;
my $populate_all_networks_with_data = shift;
my $description = shift;
my $writer = MaxMind::DB::Writer::Tree->new(
ip_version => 6,
record_size => 28,
ip_version => 6,
database_type => $type,
languages => [ 'en', $type eq 'GeoIP2-City' ? ('zh') : () ],
description => {
en => ( $type =~ s/-/ /gr )
. " $description Database (fake GeoIP2 data, for example purposes only)",
$type eq 'GeoIP2-City' ? ( zh => '小型数据库' ) : (),
},
alias_ipv6_to_ipv4 => 1,
map_key_type_callback => _universal_map_key_type_callback(),
);
_populate_all_networks( $writer, $populate_all_networks_with_data )
if $populate_all_networks_with_data;
my $value = shift;
my $nodes
= decode_json( read_binary("$Dir/../source-data/$type-Test.json") );
for my $node (@$nodes) {
for my $network ( keys %$node ) {
$writer->insert_network(
Net::Works::Network->new_from_string( string => $network ),
$node->{$network}
);
}
}
my $suffix = $description =~ s/ /-/gr;
open my $output_fh, '>', "$Dir/$type-$suffix.mmdb";
$writer->write_tree($output_fh);
close $output_fh;
return;
}
sub _populate_all_networks {
my $writer = shift;
my $data = shift;
my $max_uint128 = uint128(0) - 1;
my @networks = Net::Works::Network->range_as_subnets(
Net::Works::Address->new_from_integer(
integer => 0,
version => 6,
),
Net::Works::Address->new_from_integer(
integer => $max_uint128,
version => 6,
),
);
for my $network (@networks) {
$writer->insert_network( $network => $data );
}
}
sub write_no_ipv4_tree_db {
my $subnets = shift;
my $writer = MaxMind::DB::Writer::Tree->new(
ip_version => 6,
record_size => 24,
ip_version => 6,
database_type => 'MaxMind DB No IPv4 Search Tree',
languages => ['en'],
description => {
en => 'MaxMind DB No IPv4 Search Tree',
},
remove_reserved_networks => 0,
root_data_type => 'utf8_string',
map_key_type_callback => sub { {} },
);
my $subnet = Net::Works::Network->new_from_string( string => '::/64' );
$writer->insert_network( $subnet, $subnet->as_string() );
open my $output_fh, '>', "$Dir/MaxMind-DB-no-ipv4-search-tree.mmdb";
$writer->write_tree($output_fh);
close $output_fh;
return;
}
# The point of this database is to provide something where we can test looking
# up a single value. In other words, each IP address points to a non-compound
# value, a string rather than a map or array.
sub write_no_map_db {
my $subnets = shift;
my $writer = MaxMind::DB::Writer::Tree->new(
ip_version => 4,
record_size => 24,
database_type => 'MaxMind DB String Value Entries',
languages => ['en'],
description => {
en =>
'MaxMind DB String Value Entries (no maps or arrays as values)',
},
root_data_type => 'utf8_string',
map_key_type_callback => sub { {} },
);
for my $subnet ( @{$subnets} ) {
$writer->insert_network( $subnet, $subnet->as_string() );
}
open my $output_fh, '>', "$Dir/MaxMind-DB-string-value-entries.mmdb";
$writer->write_tree($output_fh);
close $output_fh;
return;
}
sub write_test_serialization_data {
my $serializer = MaxMind::DB::Writer::Serializer->new(
map_key_type_callback => sub { 'utf8_string' } );
$serializer->store_data( map => { long_key => 'long_value1' } );
$serializer->store_data( map => { long_key => 'long_value2' } );
$serializer->store_data( map => { long_key2 => 'long_value1' } );
$serializer->store_data( map => { long_key2 => 'long_value2' } );
$serializer->store_data( map => { long_key => 'long_value1' } );
$serializer->store_data( map => { long_key2 => 'long_value2' } );
open my $fh, '>', 'maps-with-pointers.raw';
print {$fh} ${ $serializer->buffer() }
or die "Cannot write to maps-with-pointers.raw: $!";
close $fh;
return;
}
sub write_db_with_metadata_pointers {
my $repeated_string = 'Lots of pointers in metadata';
my $writer = MaxMind::DB::Writer::Tree->new(
ip_version => 6,
record_size => 24,
map_key_type_callback => sub { 'utf8_string' },
database_type => $repeated_string,
languages => [ 'en', 'es', 'zh' ],
description => {
en => $repeated_string,
es => $repeated_string,
zh => $repeated_string,
},
);
_populate_all_networks( $writer, {} );
open my $fh, '>', 'MaxMind-DB-test-metadata-pointers.mmdb';
$writer->write_tree($fh);
close $fh;
}
main();