Date post: | 15-Apr-2017 |
Category: |
Engineering |
Upload: | ireneusz-skrobis |
View: | 161 times |
Download: | 7 times |
Geolocation Databasesin Ruby on Rails
Ireneusz SkrobiśLead Developer @ Selleo
Challenge description - what we have?
Challenge description - what we want?
Challenge description - why we want to do that?
Research
Research
GeoLite
ip2notion
ip2location
GeoNames
Research
GeoLite
ip2notion
ip2location
GeoNames
and the winner is:
GeoNames
Problem
countryInfo.txt
allCountries.txt(locations, states, cities)
Problem
countryInfo.txt
252 entries
allCountries.txt(locations, states, cities)
Problem
countryInfo.txt
252 entries
allCountries.txt(locations, states, cities)
11,157,064 entries
Problem
countryInfo.txt
252 entries
allCountries.txt (locations, states, cities)
11,157,064 entries
1,7GB (!)
Initial implementation create table geo_names ( geonameid int, name varchar(200), fclass char(1), fcode varchar(10), population bigint, country varchar(2), admin1 varchar(20),
admin2 varchar(80), admin3 varchar(20), admin4 varchar(20), asciiname varchar(200), alternatenames text, latitude float, longitude float, cc2 varchar(100), elevation int, gtopo30 int, timezone varchar(40), moddate date );
Initial implementation create table countries ( iso_alpha2 char(2), name varchar(200), geonameId int, iso_alpha3 char(3), iso_numeric integer, fips_code varchar(3), capital varchar(200), areainsqkm double precision, population integer, continent varchar(2),
tld varchar(10), currencycode varchar(3), currencyname varchar(20), phone varchar(20), postalcode varchar(100), postalcoderegex varchar(200), languages varchar(200), neighbors varchar(50), equivfipscode varchar(3) );
Initial implementation
COPY countries (iso_alpha2,iso_alpha3,iso_numeric,fips_code,name,capital,areainsqkm, population,continent,tld,currencycode,currencyname,phone,postalcode, postalcoderegex,languages,geonameid,neighbors,equivfipscode) FROM '#{Rails.root.join('db', 'files', 'countryInfo.txt').to_s}' null as '' CSV DELIMITER '\t' HEADER;
Initial implementation
COPY geo_names (geonameid,name,asciiname,alternatenames,latitude,longitude,fclass, fcode,country,cc2,admin1,admin2,admin3,admin4,population,elevation, gtopo30,timezone,moddate) FROM '#{Rails.root.join('db', 'files', 'allCountries.txt').to_s}' null as '' CSV DELIMITER '\t' HEADER;
Initial implementation
# get all administrative regions for country codewhere(country: code, fcode: 'ADM1').order(:name)
# get all cities/villages country code and administrative regionwhere(country: code, admin1: adm.admin1, fclass: 'P') .where.not(population: 0) .order(:name)
Findings
we don’t need all fields
we don’t need all entries
Database adjustmentSTART
COUNT: 11,157,064 DB: 1,748,198,652 TIME: 2206ms
Database adjustmentSTART
COUNT: 11,157,064 DB: 1,748,198,652 TIME: 2206ms
REMOVE COLUMNS
Database adjustmentSTART
COUNT: 11,157,064 DB: 1,748,198,652 TIME: 2206ms
REMOVE COLUMNS
geo_names:admin2, admin3, admin4, asciiname, alternatenames, latitude, longitude, cc2,
elevation, gtopo30, timezone, moddate
countries:iso_alpha3, iso_numeric, fips_code, capital, areainsqkm, population,
continent, tld, currencycode, currencyname, phone, postalcode,postalcoderegex, languages, neighbors, equivfipscode
Database adjustmentSTART
COUNT: 11,157,064 DB: 1,748,198,652 TIME: 2206ms
REMOVE COLUMNSCOUNT: 11,157,064 DB: 409,655,812 TIME: 2197ms
Database adjustmentSTART
COUNT: 11,157,064 DB: 1,748,198,652 TIME: 2206ms
REMOVE COLUMNSCOUNT: 11,157,064 DB: 409,655,812 TIME: 2197ms
GeoName.where.not(fclass: %w(A P)).delete_all
Database adjustmentSTART
COUNT: 11,157,064 DB: 1,748,198,652 TIME: 2206ms
REMOVE COLUMNSCOUNT: 11,157,064 DB: 409,655,812 TIME: 2197ms
GeoName.where.not(fclass: %w(A P)).delete_allCOUNT: 4,729,998 DB: 160,005,106 TIME: 1310ms
Database adjustmentSTART
COUNT: 11,157,064 DB: 1,748,198,652 TIME: 2206ms
REMOVE COLUMNSCOUNT: 11,157,064 DB: 409,655,812 TIME: 2197ms
GeoName.where.not(fclass: %w(A P)).delete_allCOUNT: 4,729,998 DB: 160,005,106 TIME: 1310ms
GeoName.where(fclass: 'P', population: 0).delete_all
Database adjustmentSTART
COUNT: 11,157,064 DB: 1,748,198,652 TIME: 2206ms
REMOVE COLUMNSCOUNT: 11,157,064 DB: 409,655,812 TIME: 2197ms
GeoName.where.not(fclass: %w(A P)).delete_allCOUNT: 4,729,998 DB: 160,005,106 TIME: 1310ms
GeoName.where(fclass: 'P', population: 0).delete_allCOUNT: 723,681 DB: 27,268,183 TIME: 854ms
Database adjustmentSTART
COUNT: 11,157,064 DB: 1,748,198,652 TIME: 2206ms
REMOVE COLUMNSCOUNT: 11,157,064 DB: 409,655,812 TIME: 2197ms
GeoName.where.not(fclass: %w(A P)).delete_allCOUNT: 4,729,998 DB: 160,005,106 TIME: 1310ms
GeoName.where(fclass: 'P', population: 0).delete_allCOUNT: 723,681 DB: 27,268,183 TIME: 854ms
GeoName.where(fclass: 'A').where.not(fcode: 'ADM1').delete_all
Database adjustmentSTART
COUNT: 11,157,064 DB: 1,748,198,652 TIME: 2206ms
REMOVE COLUMNSCOUNT: 11,157,064 DB: 409,655,812 TIME: 2197ms
GeoName.where.not(fclass: %w(A P)).delete_allCOUNT: 4,729,998 DB: 160,005,106 TIME: 1310ms
GeoName.where(fclass: 'P', population: 0).delete_allCOUNT: 723,681 DB: 27,268,183 TIME: 854ms
GeoName.where(fclass: 'A').where.not(fcode: 'ADM1').delete_allCOUNT: 367,782 DB: 13,644,454 TIME: 770ms
Database adjustmentSTART
COUNT: 11,157,064 DB: 1,748,198,652 TIME: 2206ms
REMOVE COLUMNSCOUNT: 11,157,064 DB: 409,655,812 TIME: 2197ms
GeoName.where.not(fclass: %w(A P)).delete_allCOUNT: 4,729,998 DB: 160,005,106 TIME: 1310ms
GeoName.where(fclass: 'P', population: 0).delete_allCOUNT: 723,681 DB: 27,268,183 TIME: 854ms
GeoName.where(fclass: 'A').where.not(fcode: 'ADM1').delete_allCOUNT: 367,782 DB: 13,644,454 TIME: 61 ms (after restart)
Final implementation
psql project_dev_db
COPY countries TO '/Users/irek/rails_workspace/battleriff/db/files/countries.csv'
DELIMITER E'\t' CSV HEADER;
COPY geo_names TO '/Users/irek/rails_workspace/battleriff/db/files/geo_names.csv'
DELIMITER E'\t' CSV HEADER;
Final implementationclass AddGeoNamesTables < ActiveRecord::Migration def up execute <<-SQL
create table geo_names ( geonameid int, name varchar(200), fclass char(1), fcode varchar(10), population bigint, country varchar(2), admin1 varchar(20) );
create table countries ( iso_alpha2 char(2), name varchar(200), geonameid int );
SQL end def down drop_table :countries drop_table :geo_names endend
Final implementation
namespace :geo_names do desc "Setup ALL data needed for countries/states/cities selection" task setup_all: :environment do ActiveRecord::Base.connection.execute <<-SQL copy countries (iso_alpha2,name,geonameid) from '#{Rails.root.join('db', 'files', 'countries.csv').to_s}' null as '' CSV DELIMITER '\t' HEADER;
copy geo_names (geonameid,name,fclass,fcode,population,country, admin1) from '#{Rails.root.join('db', 'files', 'geo_names.csv').to_s}' null as '' CSV DELIMITER '\t' HEADER; SQL endend
Thank you!Live long and prosper :)
Ireneusz SkrobiśLead Developer @ Selleo