SlideShare ist ein Scribd-Unternehmen logo
1 von 28
Downloaden Sie, um offline zu lesen
!
Crunching data with go:
Tips, tricks, use-cases
S e r g i i K h o m e n k o , D a t a S c i e n t i s t , S T Y L I G H T
s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r
M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
Agenda
Relational databases
!
Google Analytics and BigQuery
!
Geolocation
!
Useful things from Go-world
W H A T I T ’ S A B O U T
Relational databases
‱ github.com/jmoiron/sqlx
type Clickout struct {!
! Id, Count int!
! Ip string!
! Type int!
! Commision, Eu_commission float32!
}
! db, err := sqlx.Connect(config.Database.Driver, fmt.Sprintf("%s:%s@%s(%s)/%s?
parseTime=true", config.Database.Username,!
! ! config.Database.Password, config.Database.Protocol, config.Database.Server,
config.Database.Database))!
!! fmt.Printf("Connect to %s:(%s)... n", config.Database.Protocol,
config.Database.Server)!
! if err != nil {!
! ! log.Fatalf("Can not connect to the mysql server - %s", err)!
! ! return!
! }!
! defer db.Close()!
!!
! dbParams := paramStruct{"start": arguments["<from>"].(string) + " 00:00:00", "end":
arguments["<to>"].(string) + " 23:59:59"}!
! geoParams := paramStruct{}!
!! siteStr, _ := arguments["--site"].(string)!
! if siteInt, err2 := strconv.Atoi(siteStr); err2 == nil {!
! ! dbParams["site"] = siteInt!
! }!
!! query := getClickoutsQuery(dbParams)!
! rows, err := db.Queryx(query)
! if err == nil {!
! ! for rows.Next() {!
! ! ! click := Clickout{}!
!! ! ! err2 := rows.StructScan(&click)!
! ! ! if err2 == nil {!
! ! ! ! task <- click!
!! ! ! } else {!
! ! ! ! fmt.Println(err2)!
! ! ! }!
! ! }!
! ! close(task)!
! } else {!
! ! log.Fatalf("SQL Error - %s", err)!
! }!
Geolocation
W H E R E M Y I P S A R E F R O M
! task := make(chan Clickout)!
! result := make(chan IpResult)!
! done = make(chan interface{})!
!! go processChannel(task, result)!
! go aggregateResults(result, &results)!
!! if err == nil {!
! ! for rows.Next() {!
! ! ! click := Clickout{}!
!! ! ! err2 := rows.StructScan(&click)!
! ! ! if err2 == nil {!
! ! ! ! task <- click!
!! ! ! } else {!
! ! ! ! fmt.Println(err2)!
! ! ! }!
! ! }!
! ! close(task)!
! } else {!
! ! log.Fatalf("SQL Error - %s", err)!
! }
func processChannel(tc chan Clickout, rc chan IpResult) {!
! for click := range tc {!
! ! if subnet, err := findNetwork(click.Ip); err == nil {!
! ! ! rc <- IpResult{click, subnet}!
! ! } else {!
! ! ! rc <- IpResult{click, new(IpSubnet)}!
! ! }!
! }!
! close(rc)!
}!
func aggregateResults(rc chan IpResult, rs *map[string]*AggrResults) {!
! results := *rs!
! found, notFound := 0, 0!
!! for result := range rc {!
! ! if result.Subnet.startInt == 0 {!
! ! ! notFound += result.click.Count!
! ! ! log.Printf("Can not find ip %sn", result.click.Ip)!
! ! } else {!
! ! ! found += result.click.Count!
! ! ! log.Printf("%s is {%s - %s} n", result.click.Ip,!
! ! ! ! result.Subnet.startIp, result.Subnet.endIp)!
!! ! ! AddResult(&results, result)!
! ! }!
! }!
! fmt.Printf("%f (%d) IPs in GeoIP db and %f (%d) not found out of %dn",
float32(found)/float32(found+notFound),!
! ! found, float32(notFound)/float32(found+notFound), notFound, found+notFound)!
!! close(done)!
}!
!
package main!
!import (!
! "fmt"!
! "runtime"!
)!
!func main() {!
!! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(),
runtime.NumGoroutine())!
!! runtime.GOMAXPROCS(runtime.NumCPU())!
! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(),
runtime.NumGoroutine())!
!}!
! db, err := geoip2.Open("data/GeoLite2-City.mmdb")!
! if err != nil {!
! ! panic(err)!
! }!
! !
! ip := net.ParseIP("81.2.69.142")!
! record, err := db.City(ip)!
! if err != nil {!
! ! panic(err)!
! }!
!! fmt.Printf("Portuguese (BR) city name: %vn", record.City.Names["pt-BR"])!
! fmt.Printf("English subdivision name: %vn", record.Subdivisions[0].Names["en"])!
! fmt.Printf("Russian country name: %vn", record.Country.Names["ru"])!
! fmt.Printf("ISO country code: %vn", record.Country.IsoCode)!
! fmt.Printf("Time zone: %vn", record.Location.TimeZone)!
! fmt.Printf("Coordinates: %v, %vn", record.Location.Latitude,
record.Location.Longitude)!
!! db.Close()
Google Analytics and BigQuery
var config = &oauth.Config{!
! ClientId: “client-id-here.apps.googleusercontent.com",!
! ClientSecret: “client-secret-here“,!
! Scope: "https://www.googleapis.com/auth/analytics.readonly",!
! AuthURL: "https://accounts.google.com/o/oauth2/auth",!
! TokenURL: "https://accounts.google.com/o/oauth2/token",!
}
! oauthHttpClient := getOAuthClient(config)!
! analyticsService, err := analytics.New(oauthHttpClient)!
! if err != nil {!
! ! log.Fatal("Failed to create GA service")!
! }!
!! dataService := analytics.NewDataGaService(analyticsService)!
! dataGaGetCall := dataService.Get(gaId, start, end, metrics)
! data, err := dataGaGetCall.Do()!
! if err != nil {!
! ! log.Fatal("Failed fetch data from GA")!
! }!
!! return data.Rows
func main() {!
! gaOptions := map[string]string{!
! ! "dimensions": "ga:region,ga:city",!
! ! "sort": "-ga:visits",!
! ! "limit": "10",!
! }!
! rows := fetchGAData(config, "ga:11781168", "2014-04-06", "2014-04-06", !
"ga:visits", gaOptions)!
!! for row := 0; row <= len(rows)-1; row++ {!
! ! fmt.Printf("row=%d %vn", row, rows[row])!
! }!
}
! config := &oauth.Config{!
! ! ClientId: "client-id-here.apps.googleusercontent.com",!
! ! ClientSecret: "client-secret-here",!
! ! Scope: bigquery.BigqueryScope,!
! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",!
! ! TokenURL: "https://accounts.google.com/o/oauth2/token",!
! }!
!
! transport := &oauth.Transport{!
! ! Token: token,!
! ! Config: config,!
! }!
! client := transport.Client()
! service, err := bigquery.New(client)!
! if err != nil {!
! ! panic(err)!
! }!
!
! datasetList, err := service.Datasets.List(“testing-project").Do()!
! if err != nil {!
! ! panic(err)!
! }!
!
! for _, d := range datasetList.Datasets {!
! ! fmt.Println(d.FriendlyName)!
! }!
Useful and interesting Gophers
Interesting Gophers
‱ Golang machine learning lib ‹
https://github.com/xlvector/hector
‱ Logistic Regression
‱ Factorized Machine
‱ CART, Random Forest, Random Decision Tree,
Gradient Boosting Decision Tree
‱ Neural Network
Interesting Gophers
‱ library for numeric operation‹
https://github.com/gonum - fairly, but they are working
to bring some useful packages
‱ matrix - ScientiïŹc math package for the Go
language.
‱ graph - Discrete math structures and functions
Reference list
‱ Why are ‘Cool Kids’ at Github Moving to GO
Language? - http://www.homolog.us/blogs/blog/
2014/01/16/golang/
‱ How suitable Go will be for scientiïŹc computing? -
https://groups.google.com/forum/#!topic/golang-
nuts/_VoZfniBTZE
Thank you!
M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
S e r g i i K h o m e n k o ,
D a t a S c i e n t i s t
S T Y L I G H T G m b H
s e r g i i . k h o m e n k o @ s t y l i g h t . c o m
@ l c 0 d 3 r
!
S T Y L I G H T . C O M
DAHO.AM — Developer Conference 06-06-14
S A F E T H E D A T E

Weitere Àhnliche Inhalte

Andere mochten auch

Andere mochten auch (13)

See This, Do That Analytics presentation from Superweek 2014
See This, Do That Analytics presentation from Superweek 2014See This, Do That Analytics presentation from Superweek 2014
See This, Do That Analytics presentation from Superweek 2014
 
Google Analytics Crash Course
Google Analytics Crash CourseGoogle Analytics Crash Course
Google Analytics Crash Course
 
Impacting Business Performance with Analytics
Impacting Business Performance with AnalyticsImpacting Business Performance with Analytics
Impacting Business Performance with Analytics
 
From Data Science to Production - deploy, scale, enjoy! / PyData Amsterdam - ...
From Data Science to Production - deploy, scale, enjoy! / PyData Amsterdam - ...From Data Science to Production - deploy, scale, enjoy! / PyData Amsterdam - ...
From Data Science to Production - deploy, scale, enjoy! / PyData Amsterdam - ...
 
Measure camp pres 5 cro myths
Measure camp pres   5 cro mythsMeasure camp pres   5 cro myths
Measure camp pres 5 cro myths
 
Get more from Analytics 360 with BigQuery and the Google Cloud Platform
Get more from Analytics 360 with BigQuery and the Google Cloud PlatformGet more from Analytics 360 with BigQuery and the Google Cloud Platform
Get more from Analytics 360 with BigQuery and the Google Cloud Platform
 
Breaking down the barriers to the use of digital analytics
Breaking down the barriers to the use of digital analyticsBreaking down the barriers to the use of digital analytics
Breaking down the barriers to the use of digital analytics
 
User-Centric Analytics (MeasureCamp Talk)
User-Centric Analytics (MeasureCamp Talk)User-Centric Analytics (MeasureCamp Talk)
User-Centric Analytics (MeasureCamp Talk)
 
Superweek 2015 traffic attribution
Superweek 2015 traffic attributionSuperweek 2015 traffic attribution
Superweek 2015 traffic attribution
 
A/B Testing Pitfalls - MeasureCamp London 2015
A/B Testing Pitfalls - MeasureCamp London 2015A/B Testing Pitfalls - MeasureCamp London 2015
A/B Testing Pitfalls - MeasureCamp London 2015
 
Google BigQuery 101 & What’s New
Google BigQuery 101 & What’s NewGoogle BigQuery 101 & What’s New
Google BigQuery 101 & What’s New
 
31 Ways To Destroy Your Google Analytics Implementation
31 Ways To Destroy Your Google Analytics Implementation31 Ways To Destroy Your Google Analytics Implementation
31 Ways To Destroy Your Google Analytics Implementation
 
Google Analytics Premium for Better Data-Driven Decisions With Swapnil Sinha
Google Analytics Premium for Better Data-Driven Decisions With Swapnil SinhaGoogle Analytics Premium for Better Data-Driven Decisions With Swapnil Sinha
Google Analytics Premium for Better Data-Driven Decisions With Swapnil Sinha
 

Ähnlich wie Crunching data with go: Tips, tricks, use-cases

An Introduction to Go
An Introduction to GoAn Introduction to Go
An Introduction to Go
Cloudflare
 
20th.é™ˆæ™“éžŁ 癟ćșŠæ”·é‡æ—„ćż—ćˆ†æžæž¶æž„ćŠć€„ç†ç»éȘŒćˆ†äș«
20th.é™ˆæ™“éžŁ 癟ćșŠæ”·é‡æ—„ćż—ćˆ†æžæž¶æž„ćŠć€„ç†ç»éȘŒćˆ†äș«20th.é™ˆæ™“éžŁ 癟ćșŠæ”·é‡æ—„ćż—ćˆ†æžæž¶æž„ćŠć€„ç†ç»éȘŒćˆ†äș«
20th.é™ˆæ™“éžŁ 癟ćșŠæ”·é‡æ—„ćż—ćˆ†æžæž¶æž„ćŠć€„ç†ç»éȘŒćˆ†äș«
elevenma
 

Ähnlich wie Crunching data with go: Tips, tricks, use-cases (20)

An Introduction to Go
An Introduction to GoAn Introduction to Go
An Introduction to Go
 
20th.é™ˆæ™“éžŁ 癟ćșŠæ”·é‡æ—„ćż—ćˆ†æžæž¶æž„ćŠć€„ç†ç»éȘŒćˆ†äș«
20th.é™ˆæ™“éžŁ 癟ćșŠæ”·é‡æ—„ćż—ćˆ†æžæž¶æž„ćŠć€„ç†ç»éȘŒćˆ†äș«20th.é™ˆæ™“éžŁ 癟ćșŠæ”·é‡æ—„ćż—ćˆ†æžæž¶æž„ćŠć€„ç†ç»éȘŒćˆ†äș«
20th.é™ˆæ™“éžŁ 癟ćșŠæ”·é‡æ—„ćż—ćˆ†æžæž¶æž„ćŠć€„ç†ç»éȘŒćˆ†äș«
 
Machine learning on Go Code
Machine learning on Go CodeMachine learning on Go Code
Machine learning on Go Code
 
Go, the one language to learn in 2014
Go, the one language to learn in 2014Go, the one language to learn in 2014
Go, the one language to learn in 2014
 
JDD2014: GO! The one language you have to try in 2014 - Andrzej Grzesik
JDD2014: GO! The one language you have to try in 2014 - Andrzej GrzesikJDD2014: GO! The one language you have to try in 2014 - Andrzej Grzesik
JDD2014: GO! The one language you have to try in 2014 - Andrzej Grzesik
 
Postgres is easier
Postgres is easierPostgres is easier
Postgres is easier
 
Hadoop I/O Analysis
Hadoop I/O AnalysisHadoop I/O Analysis
Hadoop I/O Analysis
 
Programming Paradigms Which One Is The Best?
Programming Paradigms Which One Is The Best?Programming Paradigms Which One Is The Best?
Programming Paradigms Which One Is The Best?
 
Machine Learning on Code - SF meetup
Machine Learning on Code - SF meetupMachine Learning on Code - SF meetup
Machine Learning on Code - SF meetup
 
EuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and HadoopEuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and Hadoop
 
PyDX Presentation about Python, GeoData and Maps
PyDX Presentation about Python, GeoData and MapsPyDX Presentation about Python, GeoData and Maps
PyDX Presentation about Python, GeoData and Maps
 
Introduction to source{d} Engine and source{d} Lookout
Introduction to source{d} Engine and source{d} Lookout Introduction to source{d} Engine and source{d} Lookout
Introduction to source{d} Engine and source{d} Lookout
 
An Empirical Study on the Risks of Using Off-the-Shelf Techniques for Process...
An Empirical Study on the Risks of Using Off-the-Shelf Techniques for Process...An Empirical Study on the Risks of Using Off-the-Shelf Techniques for Process...
An Empirical Study on the Risks of Using Off-the-Shelf Techniques for Process...
 
Hadoop london
Hadoop londonHadoop london
Hadoop london
 
RubyConf Portugal 2014 - Why ruby must go!
RubyConf Portugal 2014 - Why ruby must go!RubyConf Portugal 2014 - Why ruby must go!
RubyConf Portugal 2014 - Why ruby must go!
 
To GO or not to GO
To GO or not to GOTo GO or not to GO
To GO or not to GO
 
PySpark with Juypter
PySpark with JuypterPySpark with Juypter
PySpark with Juypter
 
Elk stack
Elk stackElk stack
Elk stack
 
Hadoop Streaming: Programming Hadoop without Java
Hadoop Streaming: Programming Hadoop without JavaHadoop Streaming: Programming Hadoop without Java
Hadoop Streaming: Programming Hadoop without Java
 
FrontInBahia 2014: 10 dicas de desempenho para apps mobile hĂ­bridas
FrontInBahia 2014: 10 dicas de desempenho para apps mobile hĂ­bridasFrontInBahia 2014: 10 dicas de desempenho para apps mobile hĂ­bridas
FrontInBahia 2014: 10 dicas de desempenho para apps mobile hĂ­bridas
 

Mehr von Sergii Khomenko

Mehr von Sergii Khomenko (10)

Handle your Lambdas - From event-based processing to Continuous Integration /...
Handle your Lambdas - From event-based processing to Continuous Integration /...Handle your Lambdas - From event-based processing to Continuous Integration /...
Handle your Lambdas - From event-based processing to Continuous Integration /...
 
Building data pipelines: from simple to more advanced - hands-on experience /...
Building data pipelines: from simple to more advanced - hands-on experience /...Building data pipelines: from simple to more advanced - hands-on experience /...
Building data pipelines: from simple to more advanced - hands-on experience /...
 
Scaling up Business Intelligence from the scratch and to 15 countries worldwi...
Scaling up Business Intelligence from the scratch and to 15 countries worldwi...Scaling up Business Intelligence from the scratch and to 15 countries worldwi...
Scaling up Business Intelligence from the scratch and to 15 countries worldwi...
 
Secure Data Scalability at Stylight with Tableau Online and Amazon Redshift /...
Secure Data Scalability at Stylight with Tableau Online and Amazon Redshift /...Secure Data Scalability at Stylight with Tableau Online and Amazon Redshift /...
Secure Data Scalability at Stylight with Tableau Online and Amazon Redshift /...
 
Helping Data Teams with Puppet / Puppet Camp London - Apr 13, 2015
Helping Data Teams with Puppet / Puppet Camp London - Apr 13, 2015Helping Data Teams with Puppet / Puppet Camp London - Apr 13, 2015
Helping Data Teams with Puppet / Puppet Camp London - Apr 13, 2015
 
Scaling your Tableau - Migrating from Tableau Online to a proper DWH solution...
Scaling your Tableau - Migrating from Tableau Online to a proper DWH solution...Scaling your Tableau - Migrating from Tableau Online to a proper DWH solution...
Scaling your Tableau - Migrating from Tableau Online to a proper DWH solution...
 
Building Ranking Infrastructure: Data-Driven, Lean, Flexible - Sergii Khomenk...
Building Ranking Infrastructure: Data-Driven, Lean, Flexible - Sergii Khomenk...Building Ranking Infrastructure: Data-Driven, Lean, Flexible - Sergii Khomenk...
Building Ranking Infrastructure: Data-Driven, Lean, Flexible - Sergii Khomenk...
 
From simple to more advanced: Lessons learned in 13 months with Tableau
From simple to more advanced: Lessons learned in 13 months with TableauFrom simple to more advanced: Lessons learned in 13 months with Tableau
From simple to more advanced: Lessons learned in 13 months with Tableau
 
Lean Ranking infrastructure with Solr
Lean Ranking infrastructure with SolrLean Ranking infrastructure with Solr
Lean Ranking infrastructure with Solr
 
Data Visualization with R
Data Visualization with RData Visualization with R
Data Visualization with R
 

KĂŒrzlich hochgeladen

+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
?#DUbAI#??##{{(☎+971_581248768%)**%*]'#abortion pills for sale in dubai@
 

KĂŒrzlich hochgeladen (20)

Boost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdfBoost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdf
 
Workshop - Best of Both Worlds_ Combine KG and Vector search for enhanced R...
Workshop - Best of Both Worlds_ Combine  KG and Vector search for  enhanced R...Workshop - Best of Both Worlds_ Combine  KG and Vector search for  enhanced R...
Workshop - Best of Both Worlds_ Combine KG and Vector search for enhanced R...
 
Boost PC performance: How more available memory can improve productivity
Boost PC performance: How more available memory can improve productivityBoost PC performance: How more available memory can improve productivity
Boost PC performance: How more available memory can improve productivity
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
 
Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)
 
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, AdobeApidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
 
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost SavingRepurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
 
Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...
Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...
Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...
 
HTML Injection Attacks: Impact and Mitigation Strategies
HTML Injection Attacks: Impact and Mitigation StrategiesHTML Injection Attacks: Impact and Mitigation Strategies
HTML Injection Attacks: Impact and Mitigation Strategies
 
A Domino Admins Adventures (Engage 2024)
A Domino Admins Adventures (Engage 2024)A Domino Admins Adventures (Engage 2024)
A Domino Admins Adventures (Engage 2024)
 
Tata AIG General Insurance Company - Insurer Innovation Award 2024
Tata AIG General Insurance Company - Insurer Innovation Award 2024Tata AIG General Insurance Company - Insurer Innovation Award 2024
Tata AIG General Insurance Company - Insurer Innovation Award 2024
 
Apidays New York 2024 - The value of a flexible API Management solution for O...
Apidays New York 2024 - The value of a flexible API Management solution for O...Apidays New York 2024 - The value of a flexible API Management solution for O...
Apidays New York 2024 - The value of a flexible API Management solution for O...
 
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot TakeoffStrategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
 
Polkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin WoodPolkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin Wood
 
Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024
Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024
Bajaj Allianz Life Insurance Company - Insurer Innovation Award 2024
 
AWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of TerraformAWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of Terraform
 
Exploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone ProcessorsExploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone Processors
 
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
 
Artificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyArtificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : Uncertainty
 
Understanding Discord NSFW Servers A Guide for Responsible Users.pdf
Understanding Discord NSFW Servers A Guide for Responsible Users.pdfUnderstanding Discord NSFW Servers A Guide for Responsible Users.pdf
Understanding Discord NSFW Servers A Guide for Responsible Users.pdf
 

Crunching data with go: Tips, tricks, use-cases

  • 1. ! Crunching data with go: Tips, tricks, use-cases S e r g i i K h o m e n k o , D a t a S c i e n t i s t , S T Y L I G H T s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
  • 2. Agenda Relational databases ! Google Analytics and BigQuery ! Geolocation ! Useful things from Go-world W H A T I T ’ S A B O U T
  • 4. ‱ github.com/jmoiron/sqlx type Clickout struct {! ! Id, Count int! ! Ip string! ! Type int! ! Commision, Eu_commission float32! }
  • 5. ! db, err := sqlx.Connect(config.Database.Driver, fmt.Sprintf("%s:%s@%s(%s)/%s? parseTime=true", config.Database.Username,! ! ! config.Database.Password, config.Database.Protocol, config.Database.Server, config.Database.Database))! !! fmt.Printf("Connect to %s:(%s)... n", config.Database.Protocol, config.Database.Server)! ! if err != nil {! ! ! log.Fatalf("Can not connect to the mysql server - %s", err)! ! ! return! ! }! ! defer db.Close()! !!
  • 6. ! dbParams := paramStruct{"start": arguments["<from>"].(string) + " 00:00:00", "end": arguments["<to>"].(string) + " 23:59:59"}! ! geoParams := paramStruct{}! !! siteStr, _ := arguments["--site"].(string)! ! if siteInt, err2 := strconv.Atoi(siteStr); err2 == nil {! ! ! dbParams["site"] = siteInt! ! }! !! query := getClickoutsQuery(dbParams)! ! rows, err := db.Queryx(query)
  • 7. ! if err == nil {! ! ! for rows.Next() {! ! ! ! click := Clickout{}! !! ! ! err2 := rows.StructScan(&click)! ! ! ! if err2 == nil {! ! ! ! ! task <- click! !! ! ! } else {! ! ! ! ! fmt.Println(err2)! ! ! ! }! ! ! }! ! ! close(task)! ! } else {! ! ! log.Fatalf("SQL Error - %s", err)! ! }!
  • 8. Geolocation W H E R E M Y I P S A R E F R O M
  • 9. ! task := make(chan Clickout)! ! result := make(chan IpResult)! ! done = make(chan interface{})! !! go processChannel(task, result)! ! go aggregateResults(result, &results)! !! if err == nil {! ! ! for rows.Next() {! ! ! ! click := Clickout{}! !! ! ! err2 := rows.StructScan(&click)! ! ! ! if err2 == nil {! ! ! ! ! task <- click! !! ! ! } else {! ! ! ! ! fmt.Println(err2)! ! ! ! }! ! ! }! ! ! close(task)! ! } else {! ! ! log.Fatalf("SQL Error - %s", err)! ! }
  • 10. func processChannel(tc chan Clickout, rc chan IpResult) {! ! for click := range tc {! ! ! if subnet, err := findNetwork(click.Ip); err == nil {! ! ! ! rc <- IpResult{click, subnet}! ! ! } else {! ! ! ! rc <- IpResult{click, new(IpSubnet)}! ! ! }! ! }! ! close(rc)! }!
  • 11. func aggregateResults(rc chan IpResult, rs *map[string]*AggrResults) {! ! results := *rs! ! found, notFound := 0, 0! !! for result := range rc {! ! ! if result.Subnet.startInt == 0 {! ! ! ! notFound += result.click.Count! ! ! ! log.Printf("Can not find ip %sn", result.click.Ip)! ! ! } else {! ! ! ! found += result.click.Count! ! ! ! log.Printf("%s is {%s - %s} n", result.click.Ip,! ! ! ! ! result.Subnet.startIp, result.Subnet.endIp)! !! ! ! AddResult(&results, result)! ! ! }! ! }! ! fmt.Printf("%f (%d) IPs in GeoIP db and %f (%d) not found out of %dn", float32(found)/float32(found+notFound),! ! ! found, float32(notFound)/float32(found+notFound), notFound, found+notFound)! !! close(done)! }! !
  • 12. package main! !import (! ! "fmt"! ! "runtime"! )! !func main() {! !! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(), runtime.NumGoroutine())! !! runtime.GOMAXPROCS(runtime.NumCPU())! ! fmt.Printf("GOMAXPROCS is %d %d %dn", runtime.GOMAXPROCS(0), runtime.NumCPU(), runtime.NumGoroutine())! !}!
  • 13. ! db, err := geoip2.Open("data/GeoLite2-City.mmdb")! ! if err != nil {! ! ! panic(err)! ! }! ! ! ! ip := net.ParseIP("81.2.69.142")! ! record, err := db.City(ip)! ! if err != nil {! ! ! panic(err)! ! }! !! fmt.Printf("Portuguese (BR) city name: %vn", record.City.Names["pt-BR"])! ! fmt.Printf("English subdivision name: %vn", record.Subdivisions[0].Names["en"])! ! fmt.Printf("Russian country name: %vn", record.Country.Names["ru"])! ! fmt.Printf("ISO country code: %vn", record.Country.IsoCode)! ! fmt.Printf("Time zone: %vn", record.Location.TimeZone)! ! fmt.Printf("Coordinates: %v, %vn", record.Location.Latitude, record.Location.Longitude)! !! db.Close()
  • 15. var config = &oauth.Config{! ! ClientId: “client-id-here.apps.googleusercontent.com",! ! ClientSecret: “client-secret-here“,! ! Scope: "https://www.googleapis.com/auth/analytics.readonly",! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",! ! TokenURL: "https://accounts.google.com/o/oauth2/token",! }
  • 16. ! oauthHttpClient := getOAuthClient(config)! ! analyticsService, err := analytics.New(oauthHttpClient)! ! if err != nil {! ! ! log.Fatal("Failed to create GA service")! ! }! !! dataService := analytics.NewDataGaService(analyticsService)! ! dataGaGetCall := dataService.Get(gaId, start, end, metrics)
  • 17. ! data, err := dataGaGetCall.Do()! ! if err != nil {! ! ! log.Fatal("Failed fetch data from GA")! ! }! !! return data.Rows
  • 18. func main() {! ! gaOptions := map[string]string{! ! ! "dimensions": "ga:region,ga:city",! ! ! "sort": "-ga:visits",! ! ! "limit": "10",! ! }! ! rows := fetchGAData(config, "ga:11781168", "2014-04-06", "2014-04-06", ! "ga:visits", gaOptions)! !! for row := 0; row <= len(rows)-1; row++ {! ! ! fmt.Printf("row=%d %vn", row, rows[row])! ! }! }
  • 19.
  • 20. ! config := &oauth.Config{! ! ! ClientId: "client-id-here.apps.googleusercontent.com",! ! ! ClientSecret: "client-secret-here",! ! ! Scope: bigquery.BigqueryScope,! ! ! AuthURL: "https://accounts.google.com/o/oauth2/auth",! ! ! TokenURL: "https://accounts.google.com/o/oauth2/token",! ! }! ! ! transport := &oauth.Transport{! ! ! Token: token,! ! ! Config: config,! ! }! ! client := transport.Client()
  • 21. ! service, err := bigquery.New(client)! ! if err != nil {! ! ! panic(err)! ! }! ! ! datasetList, err := service.Datasets.List(“testing-project").Do()! ! if err != nil {! ! ! panic(err)! ! }! ! ! for _, d := range datasetList.Datasets {! ! ! fmt.Println(d.FriendlyName)! ! }!
  • 23. Interesting Gophers ‱ Golang machine learning lib ‹ https://github.com/xlvector/hector ‱ Logistic Regression ‱ Factorized Machine ‱ CART, Random Forest, Random Decision Tree, Gradient Boosting Decision Tree ‱ Neural Network
  • 24. Interesting Gophers ‱ library for numeric operation‹ https://github.com/gonum - fairly, but they are working to bring some useful packages ‱ matrix - ScientiïŹc math package for the Go language. ‱ graph - Discrete math structures and functions
  • 25. Reference list ‱ Why are ‘Cool Kids’ at Github Moving to GO Language? - http://www.homolog.us/blogs/blog/ 2014/01/16/golang/ ‱ How suitable Go will be for scientiïŹc computing? - https://groups.google.com/forum/#!topic/golang- nuts/_VoZfniBTZE
  • 26. Thank you! M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H
  • 27. M U N I C H G O P H E R S - A P R 2 4 2 0 1 4 , M U N I C H S e r g i i K h o m e n k o , D a t a S c i e n t i s t S T Y L I G H T G m b H s e r g i i . k h o m e n k o @ s t y l i g h t . c o m @ l c 0 d 3 r ! S T Y L I G H T . C O M
  • 28. DAHO.AM — Developer Conference 06-06-14 S A F E T H E D A T E