SlideShare ist ein Scribd-Unternehmen logo
1 von 28
Downloaden Sie, um offline zu lesen
!
Crunching data with go:
Tips, tricks, use-cases
S e r g i i K h o m e n k o , D a t a S c i e n t i s t , S T Y L I G H T
s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r
M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
Agenda
Relational databases
!
Google Analytics and BigQuery
!
Geolocation
!
Useful things from Go-world
W H A T I T ’ S A B O U T
Relational databases
• github.com/jmoiron/sqlx
type Clickout struct {!
! Id, Count int!
! Ip string!
! Type int!
! Commision, Eu_commission float32!
}
! db, err := sqlx.Connect(config.Database.Driver, fmt.Sprintf("%s:%s@%s(%s)/%s?
parseTime=true", config.Database.Username,!
! ! config.Database.Password, config.Database.Protocol, config.Database.Server,
config.Database.Database))!
!! fmt.Printf("Connect to %s:(%s)... n", config.Database.Protocol,
config.Database.Server)!
! if err != nil {!
! ! log.Fatalf("Can not connect to the mysql server - %s", err)!
! ! return!
! }!
! defer db.Close()!
!!
! dbParams := paramStruct{"start": arguments["<from>"].(string) + " 00:00:00", "end":
arguments["<to>"].(string) + " 23:59:59"}!
! geoParams := paramStruct{}!
!! siteStr, _ := arguments["--site"].(string)!
! if siteInt, err2 := strconv.Atoi(siteStr); err2 == nil {!
! ! dbParams["site"] = siteInt!
! }!
!! query := getClickoutsQuery(dbParams)!
! rows, err := db.Queryx(query)
! if err == nil {!
! ! for rows.Next() {!
! ! ! click := Clickout{}!
!! ! ! err2 := rows.StructScan(&click)!
! ! ! if err2 == nil {!
! ! ! ! task <- click!
!! ! ! } else {!
! ! ! ! fmt.Println(err2)!
! ! ! }!
! ! }!
! ! close(task)!
! } else {!
! ! log.Fatalf("SQL Error - %s", err)!
! }!
Geolocation
W H E R E M Y I P S A R E F R O M
! task := make(chan Clickout)!
! result := make(chan IpResult)!
! done = make(chan interface{})!
!! go processChannel(task, result)!
! go aggregateResults(result, &results)!
!! if err == nil {!
! ! for rows.Next() {!
! ! ! click := Clickout{}!
!! ! ! err2 := rows.StructScan(&click)!
! ! ! if err2 == nil {!
! ! ! ! task <- click!
!! ! ! } else {!
! ! ! ! fmt.Println(err2)!
! ! ! }!
! ! }!
! ! close(task)!
! } else {!
! ! log.Fatalf("SQL Error - %s", err)!
! }
func processChannel(tc chan Clickout, rc chan IpResult) {!
! for click := range tc {!
! ! if subnet, err := findNetwork(click.Ip); err == nil {!
! ! ! rc <- IpResult{click, subnet}!
! ! } else {!
! ! ! rc <- IpResult{click, new(IpSubnet)}!
! ! }!
! }!
! close(rc)!
}!
func aggregateResults(rc chan IpResult, rs *map[string]*AggrResults) {!
! results := *rs!
! found, notFound := 0, 0!
!! for result := range rc {!
! ! if result.Subnet.startInt == 0 {!
! ! ! notFound += result.click.Count!
! ! ! log.Printf("Can not find ip %sn", result.click.Ip)!
! ! } else {!
! ! ! found += result.click.Count!
! ! ! log.Printf("%s is {%s - %s} n", result.click.Ip,!
! ! ! ! result.Subnet.startIp, result.Subnet.endIp)!
!! ! ! AddResult(&results, result)!
! ! }!
! }!
! fmt.Printf("%f (%d) IPs in GeoIP db and %f (%d) not found out of %dn",
float32(found)/float32(found+notFound),!
! ! found, float32(notFound)/float32(found+notFound), notFound, found+notFound)!
!! close(done)!
}!
!
package main!
!import (!
! "fmt"!
! "runtime"!
)!
!func main() {!
!! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(),
runtime.NumGoroutine())!
!! runtime.GOMAXPROCS(runtime.NumCPU())!
! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(),
runtime.NumGoroutine())!
!}!
! db, err := geoip2.Open("data/GeoLite2-City.mmdb")!
! if err != nil {!
! ! panic(err)!
! }!
! !
! ip := net.ParseIP("81.2.69.142")!
! record, err := db.City(ip)!
! if err != nil {!
! ! panic(err)!
! }!
!! fmt.Printf("Portuguese (BR) city name: %vn", record.City.Names["pt-BR"])!
! fmt.Printf("English subdivision name: %vn", record.Subdivisions[0].Names["en"])!
! fmt.Printf("Russian country name: %vn", record.Country.Names["ru"])!
! fmt.Printf("ISO country code: %vn", record.Country.IsoCode)!
! fmt.Printf("Time zone: %vn", record.Location.TimeZone)!
! fmt.Printf("Coordinates: %v, %vn", record.Location.Latitude,
record.Location.Longitude)!
!! db.Close()
Google Analytics and BigQuery
var config = &oauth.Config{!
! ClientId: “client-id-here.apps.googleusercontent.com",!
! ClientSecret: “client-secret-here“,!
! Scope: "https://www.googleapis.com/auth/analytics.readonly",!
! AuthURL: "https://accounts.google.com/o/oauth2/auth",!
! TokenURL: "https://accounts.google.com/o/oauth2/token",!
}
! oauthHttpClient := getOAuthClient(config)!
! analyticsService, err := analytics.New(oauthHttpClient)!
! if err != nil {!
! ! log.Fatal("Failed to create GA service")!
! }!
!! dataService := analytics.NewDataGaService(analyticsService)!
! dataGaGetCall := dataService.Get(gaId, start, end, metrics)
! data, err := dataGaGetCall.Do()!
! if err != nil {!
! ! log.Fatal("Failed fetch data from GA")!
! }!
!! return data.Rows
func main() {!
! gaOptions := map[string]string{!
! ! "dimensions": "ga:region,ga:city",!
! ! "sort": "-ga:visits",!
! ! "limit": "10",!
! }!
! rows := fetchGAData(config, "ga:11781168", "2014-04-06", "2014-04-06", !
"ga:visits", gaOptions)!
!! for row := 0; row <= len(rows)-1; row++ {!
! ! fmt.Printf("row=%d %vn", row, rows[row])!
! }!
}
! config := &oauth.Config{!
! ! ClientId: "client-id-here.apps.googleusercontent.com",!
! ! ClientSecret: "client-secret-here",!
! ! Scope: bigquery.BigqueryScope,!
! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",!
! ! TokenURL: "https://accounts.google.com/o/oauth2/token",!
! }!
!
! transport := &oauth.Transport{!
! ! Token: token,!
! ! Config: config,!
! }!
! client := transport.Client()
! service, err := bigquery.New(client)!
! if err != nil {!
! ! panic(err)!
! }!
!
! datasetList, err := service.Datasets.List(“testing-project").Do()!
! if err != nil {!
! ! panic(err)!
! }!
!
! for _, d := range datasetList.Datasets {!
! ! fmt.Println(d.FriendlyName)!
! }!
Useful and interesting Gophers
Interesting Gophers
• Golang machine learning lib 

https://github.com/xlvector/hector
• Logistic Regression
• Factorized Machine
• CART, Random Forest, Random Decision Tree,
Gradient Boosting Decision Tree
• Neural Network
Interesting Gophers
• library for numeric operation

https://github.com/gonum - fairly, but they are working
to bring some useful packages
• matrix - Scientific math package for the Go
language.
• graph - Discrete math structures and functions
Reference list
• Why are ‘Cool Kids’ at Github Moving to GO
Language? - http://www.homolog.us/blogs/blog/
2014/01/16/golang/
• How suitable Go will be for scientific computing? -
https://groups.google.com/forum/#!topic/golang-
nuts/_VoZfniBTZE
Thank you!
M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
S e r g i i K h o m e n k o ,
D a t a S c i e n t i s t
S T Y L I G H T G m b H
s e r g i i . k h o m e n k o @ s t y l i g h t . c o m
@ l c 0 d 3 r
!
S T Y L I G H T . C O M
DAHO.AM — Developer Conference 06-06-14
S A F E T H E D A T E

Weitere ähnliche Inhalte

Andere mochten auch

See This, Do That Analytics presentation from Superweek 2014
See This, Do That Analytics presentation from Superweek 2014See This, Do That Analytics presentation from Superweek 2014
See This, Do That Analytics presentation from Superweek 2014Peter O'Neill
 
Google Analytics Crash Course
Google Analytics Crash CourseGoogle Analytics Crash Course
Google Analytics Crash CoursePeter O'Neill
 
Impacting Business Performance with Analytics
Impacting Business Performance with AnalyticsImpacting Business Performance with Analytics
Impacting Business Performance with AnalyticsPeter O'Neill
 
From Data Science to Production - deploy, scale, enjoy! / PyData Amsterdam - ...
From Data Science to Production - deploy, scale, enjoy! / PyData Amsterdam - ...From Data Science to Production - deploy, scale, enjoy! / PyData Amsterdam - ...
From Data Science to Production - deploy, scale, enjoy! / PyData Amsterdam - ...Sergii Khomenko
 
Get more from Analytics 360 with BigQuery and the Google Cloud Platform
Get more from Analytics 360 with BigQuery and the Google Cloud PlatformGet more from Analytics 360 with BigQuery and the Google Cloud Platform
Get more from Analytics 360 with BigQuery and the Google Cloud Platformjavier ramirez
 
Breaking down the barriers to the use of digital analytics
Breaking down the barriers to the use of digital analyticsBreaking down the barriers to the use of digital analytics
Breaking down the barriers to the use of digital analyticsPeter O'Neill
 
User-Centric Analytics (MeasureCamp Talk)
User-Centric Analytics (MeasureCamp Talk)User-Centric Analytics (MeasureCamp Talk)
User-Centric Analytics (MeasureCamp Talk)Taste Medio
 
Superweek 2015 traffic attribution
Superweek 2015 traffic attributionSuperweek 2015 traffic attribution
Superweek 2015 traffic attributionJacob Kildebogaard
 
A/B Testing Pitfalls - MeasureCamp London 2015
A/B Testing Pitfalls - MeasureCamp London 2015A/B Testing Pitfalls - MeasureCamp London 2015
A/B Testing Pitfalls - MeasureCamp London 2015Michal Parizek
 
Google BigQuery 101 & What’s New
Google BigQuery 101 & What’s NewGoogle BigQuery 101 & What’s New
Google BigQuery 101 & What’s NewDoiT International
 
31 Ways To Destroy Your Google Analytics Implementation
31 Ways To Destroy Your Google Analytics Implementation31 Ways To Destroy Your Google Analytics Implementation
31 Ways To Destroy Your Google Analytics ImplementationCharles Meaden
 
Google Analytics Premium for Better Data-Driven Decisions With Swapnil Sinha
Google Analytics Premium for Better Data-Driven Decisions With Swapnil SinhaGoogle Analytics Premium for Better Data-Driven Decisions With Swapnil Sinha
Google Analytics Premium for Better Data-Driven Decisions With Swapnil SinhaTatvic Analytics
 

Andere mochten auch (13)

See This, Do That Analytics presentation from Superweek 2014
See This, Do That Analytics presentation from Superweek 2014See This, Do That Analytics presentation from Superweek 2014
See This, Do That Analytics presentation from Superweek 2014
 
Google Analytics Crash Course
Google Analytics Crash CourseGoogle Analytics Crash Course
Google Analytics Crash Course
 
Impacting Business Performance with Analytics
Impacting Business Performance with AnalyticsImpacting Business Performance with Analytics
Impacting Business Performance with Analytics
 
From Data Science to Production - deploy, scale, enjoy! / PyData Amsterdam - ...
From Data Science to Production - deploy, scale, enjoy! / PyData Amsterdam - ...From Data Science to Production - deploy, scale, enjoy! / PyData Amsterdam - ...
From Data Science to Production - deploy, scale, enjoy! / PyData Amsterdam - ...
 
Measure camp pres 5 cro myths
Measure camp pres   5 cro mythsMeasure camp pres   5 cro myths
Measure camp pres 5 cro myths
 
Get more from Analytics 360 with BigQuery and the Google Cloud Platform
Get more from Analytics 360 with BigQuery and the Google Cloud PlatformGet more from Analytics 360 with BigQuery and the Google Cloud Platform
Get more from Analytics 360 with BigQuery and the Google Cloud Platform
 
Breaking down the barriers to the use of digital analytics
Breaking down the barriers to the use of digital analyticsBreaking down the barriers to the use of digital analytics
Breaking down the barriers to the use of digital analytics
 
User-Centric Analytics (MeasureCamp Talk)
User-Centric Analytics (MeasureCamp Talk)User-Centric Analytics (MeasureCamp Talk)
User-Centric Analytics (MeasureCamp Talk)
 
Superweek 2015 traffic attribution
Superweek 2015 traffic attributionSuperweek 2015 traffic attribution
Superweek 2015 traffic attribution
 
A/B Testing Pitfalls - MeasureCamp London 2015
A/B Testing Pitfalls - MeasureCamp London 2015A/B Testing Pitfalls - MeasureCamp London 2015
A/B Testing Pitfalls - MeasureCamp London 2015
 
Google BigQuery 101 & What’s New
Google BigQuery 101 & What’s NewGoogle BigQuery 101 & What’s New
Google BigQuery 101 & What’s New
 
31 Ways To Destroy Your Google Analytics Implementation
31 Ways To Destroy Your Google Analytics Implementation31 Ways To Destroy Your Google Analytics Implementation
31 Ways To Destroy Your Google Analytics Implementation
 
Google Analytics Premium for Better Data-Driven Decisions With Swapnil Sinha
Google Analytics Premium for Better Data-Driven Decisions With Swapnil SinhaGoogle Analytics Premium for Better Data-Driven Decisions With Swapnil Sinha
Google Analytics Premium for Better Data-Driven Decisions With Swapnil Sinha
 

Ähnlich wie Crunching data with go: Tips, tricks, use-cases

An Introduction to Go
An Introduction to GoAn Introduction to Go
An Introduction to GoCloudflare
 
20th.陈晓鸣 百度海量日志分析架构及处理经验分享
20th.陈晓鸣 百度海量日志分析架构及处理经验分享20th.陈晓鸣 百度海量日志分析架构及处理经验分享
20th.陈晓鸣 百度海量日志分析架构及处理经验分享elevenma
 
Machine learning on Go Code
Machine learning on Go CodeMachine learning on Go Code
Machine learning on Go Codesource{d}
 
Go, the one language to learn in 2014
Go, the one language to learn in 2014Go, the one language to learn in 2014
Go, the one language to learn in 2014Andrzej Grzesik
 
JDD2014: GO! The one language you have to try in 2014 - Andrzej Grzesik
JDD2014: GO! The one language you have to try in 2014 - Andrzej GrzesikJDD2014: GO! The one language you have to try in 2014 - Andrzej Grzesik
JDD2014: GO! The one language you have to try in 2014 - Andrzej GrzesikPROIDEA
 
Postgres is easier
Postgres is easierPostgres is easier
Postgres is easiergisborne
 
Programming Paradigms Which One Is The Best?
Programming Paradigms Which One Is The Best?Programming Paradigms Which One Is The Best?
Programming Paradigms Which One Is The Best?Netguru
 
Machine Learning on Code - SF meetup
Machine Learning on Code - SF meetupMachine Learning on Code - SF meetup
Machine Learning on Code - SF meetupsource{d}
 
EuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and HadoopEuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and HadoopMax Tepkeev
 
PyDX Presentation about Python, GeoData and Maps
PyDX Presentation about Python, GeoData and MapsPyDX Presentation about Python, GeoData and Maps
PyDX Presentation about Python, GeoData and MapsHannes Hapke
 
Introduction to source{d} Engine and source{d} Lookout
Introduction to source{d} Engine and source{d} Lookout Introduction to source{d} Engine and source{d} Lookout
Introduction to source{d} Engine and source{d} Lookout source{d}
 
An Empirical Study on the Risks of Using Off-the-Shelf Techniques for Process...
An Empirical Study on the Risks of Using Off-the-Shelf Techniques for Process...An Empirical Study on the Risks of Using Off-the-Shelf Techniques for Process...
An Empirical Study on the Risks of Using Off-the-Shelf Techniques for Process...Nicolas Bettenburg
 
RubyConf Portugal 2014 - Why ruby must go!
RubyConf Portugal 2014 - Why ruby must go!RubyConf Portugal 2014 - Why ruby must go!
RubyConf Portugal 2014 - Why ruby must go!Gautam Rege
 
To GO or not to GO
To GO or not to GOTo GO or not to GO
To GO or not to GOsuperstas88
 
PySpark with Juypter
PySpark with JuypterPySpark with Juypter
PySpark with JuypterLi Ming Tsai
 
Hadoop Streaming: Programming Hadoop without Java
Hadoop Streaming: Programming Hadoop without JavaHadoop Streaming: Programming Hadoop without Java
Hadoop Streaming: Programming Hadoop without JavaGlenn K. Lockwood
 
FrontInBahia 2014: 10 dicas de desempenho para apps mobile híbridas
FrontInBahia 2014: 10 dicas de desempenho para apps mobile híbridasFrontInBahia 2014: 10 dicas de desempenho para apps mobile híbridas
FrontInBahia 2014: 10 dicas de desempenho para apps mobile híbridasLoiane Groner
 

Ähnlich wie Crunching data with go: Tips, tricks, use-cases (20)

An Introduction to Go
An Introduction to GoAn Introduction to Go
An Introduction to Go
 
20th.陈晓鸣 百度海量日志分析架构及处理经验分享
20th.陈晓鸣 百度海量日志分析架构及处理经验分享20th.陈晓鸣 百度海量日志分析架构及处理经验分享
20th.陈晓鸣 百度海量日志分析架构及处理经验分享
 
Machine learning on Go Code
Machine learning on Go CodeMachine learning on Go Code
Machine learning on Go Code
 
Go, the one language to learn in 2014
Go, the one language to learn in 2014Go, the one language to learn in 2014
Go, the one language to learn in 2014
 
JDD2014: GO! The one language you have to try in 2014 - Andrzej Grzesik
JDD2014: GO! The one language you have to try in 2014 - Andrzej GrzesikJDD2014: GO! The one language you have to try in 2014 - Andrzej Grzesik
JDD2014: GO! The one language you have to try in 2014 - Andrzej Grzesik
 
Postgres is easier
Postgres is easierPostgres is easier
Postgres is easier
 
Hadoop I/O Analysis
Hadoop I/O AnalysisHadoop I/O Analysis
Hadoop I/O Analysis
 
Programming Paradigms Which One Is The Best?
Programming Paradigms Which One Is The Best?Programming Paradigms Which One Is The Best?
Programming Paradigms Which One Is The Best?
 
Machine Learning on Code - SF meetup
Machine Learning on Code - SF meetupMachine Learning on Code - SF meetup
Machine Learning on Code - SF meetup
 
EuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and HadoopEuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and Hadoop
 
PyDX Presentation about Python, GeoData and Maps
PyDX Presentation about Python, GeoData and MapsPyDX Presentation about Python, GeoData and Maps
PyDX Presentation about Python, GeoData and Maps
 
Introduction to source{d} Engine and source{d} Lookout
Introduction to source{d} Engine and source{d} Lookout Introduction to source{d} Engine and source{d} Lookout
Introduction to source{d} Engine and source{d} Lookout
 
An Empirical Study on the Risks of Using Off-the-Shelf Techniques for Process...
An Empirical Study on the Risks of Using Off-the-Shelf Techniques for Process...An Empirical Study on the Risks of Using Off-the-Shelf Techniques for Process...
An Empirical Study on the Risks of Using Off-the-Shelf Techniques for Process...
 
Hadoop london
Hadoop londonHadoop london
Hadoop london
 
RubyConf Portugal 2014 - Why ruby must go!
RubyConf Portugal 2014 - Why ruby must go!RubyConf Portugal 2014 - Why ruby must go!
RubyConf Portugal 2014 - Why ruby must go!
 
To GO or not to GO
To GO or not to GOTo GO or not to GO
To GO or not to GO
 
PySpark with Juypter
PySpark with JuypterPySpark with Juypter
PySpark with Juypter
 
Elk stack
Elk stackElk stack
Elk stack
 
Hadoop Streaming: Programming Hadoop without Java
Hadoop Streaming: Programming Hadoop without JavaHadoop Streaming: Programming Hadoop without Java
Hadoop Streaming: Programming Hadoop without Java
 
FrontInBahia 2014: 10 dicas de desempenho para apps mobile híbridas
FrontInBahia 2014: 10 dicas de desempenho para apps mobile híbridasFrontInBahia 2014: 10 dicas de desempenho para apps mobile híbridas
FrontInBahia 2014: 10 dicas de desempenho para apps mobile híbridas
 

Mehr von Sergii Khomenko

Handle your Lambdas - From event-based processing to Continuous Integration /...
Handle your Lambdas - From event-based processing to Continuous Integration /...Handle your Lambdas - From event-based processing to Continuous Integration /...
Handle your Lambdas - From event-based processing to Continuous Integration /...Sergii Khomenko
 
Building data pipelines: from simple to more advanced - hands-on experience /...
Building data pipelines: from simple to more advanced - hands-on experience /...Building data pipelines: from simple to more advanced - hands-on experience /...
Building data pipelines: from simple to more advanced - hands-on experience /...Sergii Khomenko
 
Scaling up Business Intelligence from the scratch and to 15 countries worldwi...
Scaling up Business Intelligence from the scratch and to 15 countries worldwi...Scaling up Business Intelligence from the scratch and to 15 countries worldwi...
Scaling up Business Intelligence from the scratch and to 15 countries worldwi...Sergii Khomenko
 
Secure Data Scalability at Stylight with Tableau Online and Amazon Redshift /...
Secure Data Scalability at Stylight with Tableau Online and Amazon Redshift /...Secure Data Scalability at Stylight with Tableau Online and Amazon Redshift /...
Secure Data Scalability at Stylight with Tableau Online and Amazon Redshift /...Sergii Khomenko
 
Helping Data Teams with Puppet / Puppet Camp London - Apr 13, 2015
Helping Data Teams with Puppet / Puppet Camp London - Apr 13, 2015Helping Data Teams with Puppet / Puppet Camp London - Apr 13, 2015
Helping Data Teams with Puppet / Puppet Camp London - Apr 13, 2015Sergii Khomenko
 
Scaling your Tableau - Migrating from Tableau Online to a proper DWH solution...
Scaling your Tableau - Migrating from Tableau Online to a proper DWH solution...Scaling your Tableau - Migrating from Tableau Online to a proper DWH solution...
Scaling your Tableau - Migrating from Tableau Online to a proper DWH solution...Sergii Khomenko
 
Building Ranking Infrastructure: Data-Driven, Lean, Flexible - Sergii Khomenk...
Building Ranking Infrastructure: Data-Driven, Lean, Flexible - Sergii Khomenk...Building Ranking Infrastructure: Data-Driven, Lean, Flexible - Sergii Khomenk...
Building Ranking Infrastructure: Data-Driven, Lean, Flexible - Sergii Khomenk...Sergii Khomenko
 
From simple to more advanced: Lessons learned in 13 months with Tableau
From simple to more advanced: Lessons learned in 13 months with TableauFrom simple to more advanced: Lessons learned in 13 months with Tableau
From simple to more advanced: Lessons learned in 13 months with TableauSergii Khomenko
 
Lean Ranking infrastructure with Solr
Lean Ranking infrastructure with SolrLean Ranking infrastructure with Solr
Lean Ranking infrastructure with SolrSergii Khomenko
 
Data Visualization with R
Data Visualization with RData Visualization with R
Data Visualization with RSergii Khomenko
 

Mehr von Sergii Khomenko (10)

Handle your Lambdas - From event-based processing to Continuous Integration /...
Handle your Lambdas - From event-based processing to Continuous Integration /...Handle your Lambdas - From event-based processing to Continuous Integration /...
Handle your Lambdas - From event-based processing to Continuous Integration /...
 
Building data pipelines: from simple to more advanced - hands-on experience /...
Building data pipelines: from simple to more advanced - hands-on experience /...Building data pipelines: from simple to more advanced - hands-on experience /...
Building data pipelines: from simple to more advanced - hands-on experience /...
 
Scaling up Business Intelligence from the scratch and to 15 countries worldwi...
Scaling up Business Intelligence from the scratch and to 15 countries worldwi...Scaling up Business Intelligence from the scratch and to 15 countries worldwi...
Scaling up Business Intelligence from the scratch and to 15 countries worldwi...
 
Secure Data Scalability at Stylight with Tableau Online and Amazon Redshift /...
Secure Data Scalability at Stylight with Tableau Online and Amazon Redshift /...Secure Data Scalability at Stylight with Tableau Online and Amazon Redshift /...
Secure Data Scalability at Stylight with Tableau Online and Amazon Redshift /...
 
Helping Data Teams with Puppet / Puppet Camp London - Apr 13, 2015
Helping Data Teams with Puppet / Puppet Camp London - Apr 13, 2015Helping Data Teams with Puppet / Puppet Camp London - Apr 13, 2015
Helping Data Teams with Puppet / Puppet Camp London - Apr 13, 2015
 
Scaling your Tableau - Migrating from Tableau Online to a proper DWH solution...
Scaling your Tableau - Migrating from Tableau Online to a proper DWH solution...Scaling your Tableau - Migrating from Tableau Online to a proper DWH solution...
Scaling your Tableau - Migrating from Tableau Online to a proper DWH solution...
 
Building Ranking Infrastructure: Data-Driven, Lean, Flexible - Sergii Khomenk...
Building Ranking Infrastructure: Data-Driven, Lean, Flexible - Sergii Khomenk...Building Ranking Infrastructure: Data-Driven, Lean, Flexible - Sergii Khomenk...
Building Ranking Infrastructure: Data-Driven, Lean, Flexible - Sergii Khomenk...
 
From simple to more advanced: Lessons learned in 13 months with Tableau
From simple to more advanced: Lessons learned in 13 months with TableauFrom simple to more advanced: Lessons learned in 13 months with Tableau
From simple to more advanced: Lessons learned in 13 months with Tableau
 
Lean Ranking infrastructure with Solr
Lean Ranking infrastructure with SolrLean Ranking infrastructure with Solr
Lean Ranking infrastructure with Solr
 
Data Visualization with R
Data Visualization with RData Visualization with R
Data Visualization with R
 

Kürzlich hochgeladen

Zeshan Sattar- Assessing the skill requirements and industry expectations for...
Zeshan Sattar- Assessing the skill requirements and industry expectations for...Zeshan Sattar- Assessing the skill requirements and industry expectations for...
Zeshan Sattar- Assessing the skill requirements and industry expectations for...itnewsafrica
 
Generative Artificial Intelligence: How generative AI works.pdf
Generative Artificial Intelligence: How generative AI works.pdfGenerative Artificial Intelligence: How generative AI works.pdf
Generative Artificial Intelligence: How generative AI works.pdfIngrid Airi González
 
New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024BookNet Canada
 
Moving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdfMoving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdfLoriGlavin3
 
Varsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
Varsha Sewlal- Cyber Attacks on Critical Critical InfrastructureVarsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
Varsha Sewlal- Cyber Attacks on Critical Critical Infrastructureitnewsafrica
 
QCon London: Mastering long-running processes in modern architectures
QCon London: Mastering long-running processes in modern architecturesQCon London: Mastering long-running processes in modern architectures
QCon London: Mastering long-running processes in modern architecturesBernd Ruecker
 
Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...
Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...
Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...Nikki Chapple
 
The Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and ConsThe Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and ConsPixlogix Infotech
 
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptxUse of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptxLoriGlavin3
 
So einfach geht modernes Roaming fuer Notes und Nomad.pdf
So einfach geht modernes Roaming fuer Notes und Nomad.pdfSo einfach geht modernes Roaming fuer Notes und Nomad.pdf
So einfach geht modernes Roaming fuer Notes und Nomad.pdfpanagenda
 
How to write a Business Continuity Plan
How to write a Business Continuity PlanHow to write a Business Continuity Plan
How to write a Business Continuity PlanDatabarracks
 
Bridging Between CAD & GIS: 6 Ways to Automate Your Data Integration
Bridging Between CAD & GIS:  6 Ways to Automate Your Data IntegrationBridging Between CAD & GIS:  6 Ways to Automate Your Data Integration
Bridging Between CAD & GIS: 6 Ways to Automate Your Data Integrationmarketing932765
 
Potential of AI (Generative AI) in Business: Learnings and Insights
Potential of AI (Generative AI) in Business: Learnings and InsightsPotential of AI (Generative AI) in Business: Learnings and Insights
Potential of AI (Generative AI) in Business: Learnings and InsightsRavi Sanghani
 
TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024Lonnie McRorey
 
Time Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directionsTime Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directionsNathaniel Shimoni
 
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptxThe Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptxLoriGlavin3
 
Abdul Kader Baba- Managing Cybersecurity Risks and Compliance Requirements i...
Abdul Kader Baba- Managing Cybersecurity Risks  and Compliance Requirements i...Abdul Kader Baba- Managing Cybersecurity Risks  and Compliance Requirements i...
Abdul Kader Baba- Managing Cybersecurity Risks and Compliance Requirements i...itnewsafrica
 
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptxThe Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptxLoriGlavin3
 
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptx
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptxMerck Moving Beyond Passwords: FIDO Paris Seminar.pptx
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptxLoriGlavin3
 
Scale your database traffic with Read & Write split using MySQL Router
Scale your database traffic with Read & Write split using MySQL RouterScale your database traffic with Read & Write split using MySQL Router
Scale your database traffic with Read & Write split using MySQL RouterMydbops
 

Kürzlich hochgeladen (20)

Zeshan Sattar- Assessing the skill requirements and industry expectations for...
Zeshan Sattar- Assessing the skill requirements and industry expectations for...Zeshan Sattar- Assessing the skill requirements and industry expectations for...
Zeshan Sattar- Assessing the skill requirements and industry expectations for...
 
Generative Artificial Intelligence: How generative AI works.pdf
Generative Artificial Intelligence: How generative AI works.pdfGenerative Artificial Intelligence: How generative AI works.pdf
Generative Artificial Intelligence: How generative AI works.pdf
 
New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
New from BookNet Canada for 2024: Loan Stars - Tech Forum 2024
 
Moving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdfMoving Beyond Passwords: FIDO Paris Seminar.pdf
Moving Beyond Passwords: FIDO Paris Seminar.pdf
 
Varsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
Varsha Sewlal- Cyber Attacks on Critical Critical InfrastructureVarsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
Varsha Sewlal- Cyber Attacks on Critical Critical Infrastructure
 
QCon London: Mastering long-running processes in modern architectures
QCon London: Mastering long-running processes in modern architecturesQCon London: Mastering long-running processes in modern architectures
QCon London: Mastering long-running processes in modern architectures
 
Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...
Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...
Microsoft 365 Copilot: How to boost your productivity with AI – Part one: Ado...
 
The Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and ConsThe Ultimate Guide to Choosing WordPress Pros and Cons
The Ultimate Guide to Choosing WordPress Pros and Cons
 
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptxUse of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
Use of FIDO in the Payments and Identity Landscape: FIDO Paris Seminar.pptx
 
So einfach geht modernes Roaming fuer Notes und Nomad.pdf
So einfach geht modernes Roaming fuer Notes und Nomad.pdfSo einfach geht modernes Roaming fuer Notes und Nomad.pdf
So einfach geht modernes Roaming fuer Notes und Nomad.pdf
 
How to write a Business Continuity Plan
How to write a Business Continuity PlanHow to write a Business Continuity Plan
How to write a Business Continuity Plan
 
Bridging Between CAD & GIS: 6 Ways to Automate Your Data Integration
Bridging Between CAD & GIS:  6 Ways to Automate Your Data IntegrationBridging Between CAD & GIS:  6 Ways to Automate Your Data Integration
Bridging Between CAD & GIS: 6 Ways to Automate Your Data Integration
 
Potential of AI (Generative AI) in Business: Learnings and Insights
Potential of AI (Generative AI) in Business: Learnings and InsightsPotential of AI (Generative AI) in Business: Learnings and Insights
Potential of AI (Generative AI) in Business: Learnings and Insights
 
TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024TeamStation AI System Report LATAM IT Salaries 2024
TeamStation AI System Report LATAM IT Salaries 2024
 
Time Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directionsTime Series Foundation Models - current state and future directions
Time Series Foundation Models - current state and future directions
 
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptxThe Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
The Role of FIDO in a Cyber Secure Netherlands: FIDO Paris Seminar.pptx
 
Abdul Kader Baba- Managing Cybersecurity Risks and Compliance Requirements i...
Abdul Kader Baba- Managing Cybersecurity Risks  and Compliance Requirements i...Abdul Kader Baba- Managing Cybersecurity Risks  and Compliance Requirements i...
Abdul Kader Baba- Managing Cybersecurity Risks and Compliance Requirements i...
 
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptxThe Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
The Fit for Passkeys for Employee and Consumer Sign-ins: FIDO Paris Seminar.pptx
 
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptx
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptxMerck Moving Beyond Passwords: FIDO Paris Seminar.pptx
Merck Moving Beyond Passwords: FIDO Paris Seminar.pptx
 
Scale your database traffic with Read & Write split using MySQL Router
Scale your database traffic with Read & Write split using MySQL RouterScale your database traffic with Read & Write split using MySQL Router
Scale your database traffic with Read & Write split using MySQL Router
 

Crunching data with go: Tips, tricks, use-cases

  • 1. ! Crunching data with go: Tips, tricks, use-cases S e r g i i K h o m e n k o , D a t a S c i e n t i s t , S T Y L I G H T s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
  • 2. Agenda Relational databases ! Google Analytics and BigQuery ! Geolocation ! Useful things from Go-world W H A T I T ’ S A B O U T
  • 4. • github.com/jmoiron/sqlx type Clickout struct {! ! Id, Count int! ! Ip string! ! Type int! ! Commision, Eu_commission float32! }
  • 5. ! db, err := sqlx.Connect(config.Database.Driver, fmt.Sprintf("%s:%s@%s(%s)/%s? parseTime=true", config.Database.Username,! ! ! config.Database.Password, config.Database.Protocol, config.Database.Server, config.Database.Database))! !! fmt.Printf("Connect to %s:(%s)... n", config.Database.Protocol, config.Database.Server)! ! if err != nil {! ! ! log.Fatalf("Can not connect to the mysql server - %s", err)! ! ! return! ! }! ! defer db.Close()! !!
  • 6. ! dbParams := paramStruct{"start": arguments["<from>"].(string) + " 00:00:00", "end": arguments["<to>"].(string) + " 23:59:59"}! ! geoParams := paramStruct{}! !! siteStr, _ := arguments["--site"].(string)! ! if siteInt, err2 := strconv.Atoi(siteStr); err2 == nil {! ! ! dbParams["site"] = siteInt! ! }! !! query := getClickoutsQuery(dbParams)! ! rows, err := db.Queryx(query)
  • 7. ! if err == nil {! ! ! for rows.Next() {! ! ! ! click := Clickout{}! !! ! ! err2 := rows.StructScan(&click)! ! ! ! if err2 == nil {! ! ! ! ! task <- click! !! ! ! } else {! ! ! ! ! fmt.Println(err2)! ! ! ! }! ! ! }! ! ! close(task)! ! } else {! ! ! log.Fatalf("SQL Error - %s", err)! ! }!
  • 8. Geolocation W H E R E M Y I P S A R E F R O M
  • 9. ! task := make(chan Clickout)! ! result := make(chan IpResult)! ! done = make(chan interface{})! !! go processChannel(task, result)! ! go aggregateResults(result, &results)! !! if err == nil {! ! ! for rows.Next() {! ! ! ! click := Clickout{}! !! ! ! err2 := rows.StructScan(&click)! ! ! ! if err2 == nil {! ! ! ! ! task <- click! !! ! ! } else {! ! ! ! ! fmt.Println(err2)! ! ! ! }! ! ! }! ! ! close(task)! ! } else {! ! ! log.Fatalf("SQL Error - %s", err)! ! }
  • 10. func processChannel(tc chan Clickout, rc chan IpResult) {! ! for click := range tc {! ! ! if subnet, err := findNetwork(click.Ip); err == nil {! ! ! ! rc <- IpResult{click, subnet}! ! ! } else {! ! ! ! rc <- IpResult{click, new(IpSubnet)}! ! ! }! ! }! ! close(rc)! }!
  • 11. func aggregateResults(rc chan IpResult, rs *map[string]*AggrResults) {! ! results := *rs! ! found, notFound := 0, 0! !! for result := range rc {! ! ! if result.Subnet.startInt == 0 {! ! ! ! notFound += result.click.Count! ! ! ! log.Printf("Can not find ip %sn", result.click.Ip)! ! ! } else {! ! ! ! found += result.click.Count! ! ! ! log.Printf("%s is {%s - %s} n", result.click.Ip,! ! ! ! ! result.Subnet.startIp, result.Subnet.endIp)! !! ! ! AddResult(&results, result)! ! ! }! ! }! ! fmt.Printf("%f (%d) IPs in GeoIP db and %f (%d) not found out of %dn", float32(found)/float32(found+notFound),! ! ! found, float32(notFound)/float32(found+notFound), notFound, found+notFound)! !! close(done)! }! !
  • 12. package main! !import (! ! "fmt"! ! "runtime"! )! !func main() {! !! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(), runtime.NumGoroutine())! !! runtime.GOMAXPROCS(runtime.NumCPU())! ! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(), runtime.NumGoroutine())! !}!
  • 13. ! db, err := geoip2.Open("data/GeoLite2-City.mmdb")! ! if err != nil {! ! ! panic(err)! ! }! ! ! ! ip := net.ParseIP("81.2.69.142")! ! record, err := db.City(ip)! ! if err != nil {! ! ! panic(err)! ! }! !! fmt.Printf("Portuguese (BR) city name: %vn", record.City.Names["pt-BR"])! ! fmt.Printf("English subdivision name: %vn", record.Subdivisions[0].Names["en"])! ! fmt.Printf("Russian country name: %vn", record.Country.Names["ru"])! ! fmt.Printf("ISO country code: %vn", record.Country.IsoCode)! ! fmt.Printf("Time zone: %vn", record.Location.TimeZone)! ! fmt.Printf("Coordinates: %v, %vn", record.Location.Latitude, record.Location.Longitude)! !! db.Close()
  • 15. var config = &oauth.Config{! ! ClientId: “client-id-here.apps.googleusercontent.com",! ! ClientSecret: “client-secret-here“,! ! Scope: "https://www.googleapis.com/auth/analytics.readonly",! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",! ! TokenURL: "https://accounts.google.com/o/oauth2/token",! }
  • 16. ! oauthHttpClient := getOAuthClient(config)! ! analyticsService, err := analytics.New(oauthHttpClient)! ! if err != nil {! ! ! log.Fatal("Failed to create GA service")! ! }! !! dataService := analytics.NewDataGaService(analyticsService)! ! dataGaGetCall := dataService.Get(gaId, start, end, metrics)
  • 17. ! data, err := dataGaGetCall.Do()! ! if err != nil {! ! ! log.Fatal("Failed fetch data from GA")! ! }! !! return data.Rows
  • 18. func main() {! ! gaOptions := map[string]string{! ! ! "dimensions": "ga:region,ga:city",! ! ! "sort": "-ga:visits",! ! ! "limit": "10",! ! }! ! rows := fetchGAData(config, "ga:11781168", "2014-04-06", "2014-04-06", ! "ga:visits", gaOptions)! !! for row := 0; row <= len(rows)-1; row++ {! ! ! fmt.Printf("row=%d %vn", row, rows[row])! ! }! }
  • 19.
  • 20. ! config := &oauth.Config{! ! ! ClientId: "client-id-here.apps.googleusercontent.com",! ! ! ClientSecret: "client-secret-here",! ! ! Scope: bigquery.BigqueryScope,! ! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",! ! ! TokenURL: "https://accounts.google.com/o/oauth2/token",! ! }! ! ! transport := &oauth.Transport{! ! ! Token: token,! ! ! Config: config,! ! }! ! client := transport.Client()
  • 21. ! service, err := bigquery.New(client)! ! if err != nil {! ! ! panic(err)! ! }! ! ! datasetList, err := service.Datasets.List(“testing-project").Do()! ! if err != nil {! ! ! panic(err)! ! }! ! ! for _, d := range datasetList.Datasets {! ! ! fmt.Println(d.FriendlyName)! ! }!
  • 23. Interesting Gophers • Golang machine learning lib 
 https://github.com/xlvector/hector • Logistic Regression • Factorized Machine • CART, Random Forest, Random Decision Tree, Gradient Boosting Decision Tree • Neural Network
  • 24. Interesting Gophers • library for numeric operation
 https://github.com/gonum - fairly, but they are working to bring some useful packages • matrix - Scientific math package for the Go language. • graph - Discrete math structures and functions
  • 25. Reference list • Why are ‘Cool Kids’ at Github Moving to GO Language? - http://www.homolog.us/blogs/blog/ 2014/01/16/golang/ • How suitable Go will be for scientific computing? - https://groups.google.com/forum/#!topic/golang- nuts/_VoZfniBTZE
  • 26. Thank you! M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
  • 27. M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H S e r g i i K h o m e n k o , D a t a S c i e n t i s t S T Y L I G H T G m b H s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r ! S T Y L I G H T . C O M
  • 28. DAHO.AM — Developer Conference 06-06-14 S A F E T H E D A T E