SlideShare ist ein Scribd-Unternehmen logo
1 von 32
Downloaden Sie, um offline zu lesen
Realtime Computation
      with Storm
                    Brad Anderson
          banderson@maprtech.com
                         @boorad
Definition & Overview
   Interoperability
     Use Cases
Stream Processing
       CEP
 Distributed RPC
Source Data
•   Social Media      •   Weather Data
    Feeds
                      • Auctions of Ad
•   Network Sensors       Impressions
• App/Web Logs        •   Payment
• Stock Tick Data         Transactions
Before Storm



Queues        Workers
Example




 (simplified)
Storm
Guaranteed data processing
Horizontal scalability
Fault-tolerance
No intermediate message brokers!
Higher level abstraction than message passing
“Just works”
Concepts
streams

Tuple   Tuple      Tuple    Tuple    Tuple     Tuple   Tuple




                Unbounded sequence of tuples
spouts



Source of streams
spouts
public	
  interface	
  ISpout	
  extends	
  Serializable	
  {
	
  	
  	
  	
  void	
  open(Map	
  conf,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  TopologyContext	
  context,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  SpoutOutputCollector	
  collector);
	
  	
  	
  	
  void	
  close();
	
  	
  	
  	
  void	
  nextTuple();
	
  	
  	
  	
  void	
  ack(Object	
  msgId);
	
  	
  	
  	
  void	
  fail(Object	
  msgId);
}
bolts



Processes input streams and produces new streams
bolts
public	
  class	
  DoubleAndTripleBolt	
  extends	
  BaseRichBolt	
  {
	
  	
  	
  	
  private	
  OutputCollectorBase	
  _collector;

	
  	
  	
  	
  public	
  void	
  prepare(Map	
  conf,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  TopologyContext	
  context,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  OutputCollectorBase	
  collector)	
  {
	
  	
  	
  	
  	
  	
  	
  	
  _collector	
  =	
  collector;
	
  	
  	
  	
  }

	
  	
  	
  	
  public	
  void	
  execute(Tuple	
  input)	
  {
	
  	
  	
  	
  	
  	
  	
  	
  int	
  val	
  =	
  input.getInteger(0);	
  	
  	
  	
  	
  	
  	
  	
  
	
  	
  	
  	
  	
  	
  	
  	
  _collector.emit(input,	
  new	
  Values(val*2,	
  val*3));
	
  	
  	
  	
  	
  	
  	
  	
  _collector.ack(input);
	
  	
  	
  	
  }

	
  	
  	
  	
  public	
  void	
  declareOutputFields(OutputFieldsDeclarer	
  declarer)	
  {
	
  	
  	
  	
  	
  	
  	
  	
  declarer.declare(new	
  Fields("double",	
  "triple"));
	
  	
  	
  	
  }	
  	
  	
  	
  
}
topologies



Network of spouts and bolts
topologies
        TopologyBuilder builder = new TopologyBuilder();
        
        builder.setSpout("spout", new RandomSentenceSpout(), 5);
        
        builder.setBolt("split", new SplitSentence(), 8)
                 .shuffleGrouping("spout");
        builder.setBolt("count", new WordCount(), 12)
                 .fieldsGrouping("split", new Fields("word"));
Trident
Cascading for Storm
Trident Facilities
•   Joins

•   Aggregations

•   Grouping

•   Functions

•   Filters

•   Consistent, Exactly-Once Semantics
TridentTopology	
  topology	
  =	
  new	
  TridentTopology();	
  	
  	
  	
  	
  	
  	
  	
  
TridentState	
  wordCounts	
  =
	
  	
  	
  	
  	
  topology.newStream("spout1",	
  spout)
	
  	
  	
  	
  	
  	
  	
  .each(new	
  Fields("sentence"),	
  new	
  Split(),	
  new	
  Fields("word"))
	
  	
  	
  	
  	
  	
  	
  .groupBy(new	
  Fields("word"))
	
  	
  	
  	
  	
  	
  	
  .persistentAggregate(new	
  MemoryMapState.Factory(),
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  new	
  Count(),
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  new	
  Fields("count"))	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  
	
  	
  	
  	
  	
  	
  	
  .parallelismHint(6);
Interoperability
spouts
•Kafka (with transactions)
• Kestrel
• JMS
• AMQP
• Beanstalkd
bolts
• Functions
• Filters
• Aggregation
• Joins
• Talk to databases, Hadoop write-behind
Storm

                realtime
               processes

       Queue                               Apps
Raw
Data                                      Business
                                           Value
                               Hadoop




                                batch
                              processes
Storm

                       realtime
                      processes

              Queue                               Apps
Raw
Data                                             Business
                                                  Value
                                      Hadoop
       Parallel Cluster Ingest


                                       batch
                                     processes
Storm

                                    realtime
                                   processes
                                                Apps
       Queue           TailSpout
Raw
Data                                           Business
               Franz                            Value
                                   Hadoop




                                     batch
                                   processes
Storm

                            realtime
                           processes
                                        Apps
               TailSpout
Raw
Data                                   Business
       Franz                            Value
                           Hadoop




                             batch
                           processes
Use Cases
Twitter
                  Follower

                             Distinct
        Tweeter   Follower   follower



                  Follower
                             Distinct
  URL   Tweeter              follower   Reach
                  Follower


                  Follower
                             Distinct
        Tweeter              follower

                  Follower
Heartbyte
Fleet Logistics
http://github.com/{tdunning | boorad}/mapr-spout


                                    Brad Anderson
                          banderson@maprtech.com
                                         @boorad
Thank you.
http://github.com/{tdunning | boorad}/mapr-spout


                                    Brad Anderson
                          banderson@maprtech.com
                                         @boorad

Weitere ähnliche Inhalte

Was ist angesagt?

Hadoop Summit San Jose 2014: Costing Your Big Data Operations
Hadoop Summit San Jose 2014: Costing Your Big Data Operations Hadoop Summit San Jose 2014: Costing Your Big Data Operations
Hadoop Summit San Jose 2014: Costing Your Big Data Operations Sumeet Singh
 
データ解析技術入門(Hadoop編)
データ解析技術入門(Hadoop編)データ解析技術入門(Hadoop編)
データ解析技術入門(Hadoop編)Takumi Asai
 
Hadoop Summit Amsterdam 2014: Capacity Planning In Multi-tenant Hadoop Deploy...
Hadoop Summit Amsterdam 2014: Capacity Planning In Multi-tenant Hadoop Deploy...Hadoop Summit Amsterdam 2014: Capacity Planning In Multi-tenant Hadoop Deploy...
Hadoop Summit Amsterdam 2014: Capacity Planning In Multi-tenant Hadoop Deploy...Sumeet Singh
 
Introduction to Spark on Hadoop
Introduction to Spark on HadoopIntroduction to Spark on Hadoop
Introduction to Spark on HadoopCarol McDonald
 
Real-World Machine Learning - Leverage the Features of MapR Converged Data Pl...
Real-World Machine Learning - Leverage the Features of MapR Converged Data Pl...Real-World Machine Learning - Leverage the Features of MapR Converged Data Pl...
Real-World Machine Learning - Leverage the Features of MapR Converged Data Pl...Mathieu Dumoulin
 
Goto amsterdam-2013-skinned
Goto amsterdam-2013-skinnedGoto amsterdam-2013-skinned
Goto amsterdam-2013-skinnedTed Dunning
 
Dealing with an Upside Down Internet
Dealing with an Upside Down InternetDealing with an Upside Down Internet
Dealing with an Upside Down InternetMapR Technologies
 
Boston hug-2012-07
Boston hug-2012-07Boston hug-2012-07
Boston hug-2012-07Ted Dunning
 
Large Scale Data With Hadoop
Large Scale Data With HadoopLarge Scale Data With Hadoop
Large Scale Data With Hadoopguest27e6764
 
Hadoop on Azure, Blue elephants
Hadoop on Azure,  Blue elephantsHadoop on Azure,  Blue elephants
Hadoop on Azure, Blue elephantsOvidiu Dimulescu
 
TWDI Accelerate Seattle, Oct 16, 2017: Distributed and In-Database Analytics ...
TWDI Accelerate Seattle, Oct 16, 2017: Distributed and In-Database Analytics ...TWDI Accelerate Seattle, Oct 16, 2017: Distributed and In-Database Analytics ...
TWDI Accelerate Seattle, Oct 16, 2017: Distributed and In-Database Analytics ...Debraj GuhaThakurta
 
Hive at Yahoo: Letters from the trenches
Hive at Yahoo: Letters from the trenchesHive at Yahoo: Letters from the trenches
Hive at Yahoo: Letters from the trenchesDataWorks Summit
 
BKK16-408B Data Analytics and Machine Learning From Node to Cluster
BKK16-408B Data Analytics and Machine Learning From Node to ClusterBKK16-408B Data Analytics and Machine Learning From Node to Cluster
BKK16-408B Data Analytics and Machine Learning From Node to ClusterLinaro
 
Hadoop Summit Dublin 2016: Hadoop Platform at Yahoo - A Year in Review
Hadoop Summit Dublin 2016: Hadoop Platform at Yahoo - A Year in Review Hadoop Summit Dublin 2016: Hadoop Platform at Yahoo - A Year in Review
Hadoop Summit Dublin 2016: Hadoop Platform at Yahoo - A Year in Review Sumeet Singh
 

Was ist angesagt? (20)

HUG slides on NFS and ODBC
HUG slides on NFS and ODBCHUG slides on NFS and ODBC
HUG slides on NFS and ODBC
 
London hug
London hugLondon hug
London hug
 
Apache Spark Overview
Apache Spark OverviewApache Spark Overview
Apache Spark Overview
 
MapReduce and NoSQL
MapReduce and NoSQLMapReduce and NoSQL
MapReduce and NoSQL
 
Hadoop Summit San Jose 2014: Costing Your Big Data Operations
Hadoop Summit San Jose 2014: Costing Your Big Data Operations Hadoop Summit San Jose 2014: Costing Your Big Data Operations
Hadoop Summit San Jose 2014: Costing Your Big Data Operations
 
データ解析技術入門(Hadoop編)
データ解析技術入門(Hadoop編)データ解析技術入門(Hadoop編)
データ解析技術入門(Hadoop編)
 
May 2013 HUG: HCatalog/Hive Data Out
May 2013 HUG: HCatalog/Hive Data OutMay 2013 HUG: HCatalog/Hive Data Out
May 2013 HUG: HCatalog/Hive Data Out
 
Hadoop Summit Amsterdam 2014: Capacity Planning In Multi-tenant Hadoop Deploy...
Hadoop Summit Amsterdam 2014: Capacity Planning In Multi-tenant Hadoop Deploy...Hadoop Summit Amsterdam 2014: Capacity Planning In Multi-tenant Hadoop Deploy...
Hadoop Summit Amsterdam 2014: Capacity Planning In Multi-tenant Hadoop Deploy...
 
Introduction to Spark on Hadoop
Introduction to Spark on HadoopIntroduction to Spark on Hadoop
Introduction to Spark on Hadoop
 
Real-World Machine Learning - Leverage the Features of MapR Converged Data Pl...
Real-World Machine Learning - Leverage the Features of MapR Converged Data Pl...Real-World Machine Learning - Leverage the Features of MapR Converged Data Pl...
Real-World Machine Learning - Leverage the Features of MapR Converged Data Pl...
 
Goto amsterdam-2013-skinned
Goto amsterdam-2013-skinnedGoto amsterdam-2013-skinned
Goto amsterdam-2013-skinned
 
Dealing with an Upside Down Internet
Dealing with an Upside Down InternetDealing with an Upside Down Internet
Dealing with an Upside Down Internet
 
Enabling R on Hadoop
Enabling R on HadoopEnabling R on Hadoop
Enabling R on Hadoop
 
Boston hug-2012-07
Boston hug-2012-07Boston hug-2012-07
Boston hug-2012-07
 
Large Scale Data With Hadoop
Large Scale Data With HadoopLarge Scale Data With Hadoop
Large Scale Data With Hadoop
 
Hadoop on Azure, Blue elephants
Hadoop on Azure,  Blue elephantsHadoop on Azure,  Blue elephants
Hadoop on Azure, Blue elephants
 
TWDI Accelerate Seattle, Oct 16, 2017: Distributed and In-Database Analytics ...
TWDI Accelerate Seattle, Oct 16, 2017: Distributed and In-Database Analytics ...TWDI Accelerate Seattle, Oct 16, 2017: Distributed and In-Database Analytics ...
TWDI Accelerate Seattle, Oct 16, 2017: Distributed and In-Database Analytics ...
 
Hive at Yahoo: Letters from the trenches
Hive at Yahoo: Letters from the trenchesHive at Yahoo: Letters from the trenches
Hive at Yahoo: Letters from the trenches
 
BKK16-408B Data Analytics and Machine Learning From Node to Cluster
BKK16-408B Data Analytics and Machine Learning From Node to ClusterBKK16-408B Data Analytics and Machine Learning From Node to Cluster
BKK16-408B Data Analytics and Machine Learning From Node to Cluster
 
Hadoop Summit Dublin 2016: Hadoop Platform at Yahoo - A Year in Review
Hadoop Summit Dublin 2016: Hadoop Platform at Yahoo - A Year in Review Hadoop Summit Dublin 2016: Hadoop Platform at Yahoo - A Year in Review
Hadoop Summit Dublin 2016: Hadoop Platform at Yahoo - A Year in Review
 

Ähnlich wie Realtime Computation with Storm

Realtime Computation with Storm
Realtime Computation with StormRealtime Computation with Storm
Realtime Computation with Stormboorad
 
Large Scale Data Analysis Tools
Large Scale Data Analysis ToolsLarge Scale Data Analysis Tools
Large Scale Data Analysis Toolsboorad
 
Apachecon Euro 2012: Elastic, Multi-tenant Hadoop on Demand
Apachecon Euro 2012: Elastic, Multi-tenant Hadoop on DemandApachecon Euro 2012: Elastic, Multi-tenant Hadoop on Demand
Apachecon Euro 2012: Elastic, Multi-tenant Hadoop on DemandRichard McDougall
 
Big Data/Hadoop Infrastructure Considerations
Big Data/Hadoop Infrastructure ConsiderationsBig Data/Hadoop Infrastructure Considerations
Big Data/Hadoop Infrastructure ConsiderationsRichard McDougall
 
Scaling Big Data Mining Infrastructure Twitter Experience
Scaling Big Data Mining Infrastructure Twitter ExperienceScaling Big Data Mining Infrastructure Twitter Experience
Scaling Big Data Mining Infrastructure Twitter ExperienceDataWorks Summit
 
Dataiku pig - hive - cascading
Dataiku   pig - hive - cascadingDataiku   pig - hive - cascading
Dataiku pig - hive - cascadingDataiku
 
Sri Ambati – CEO, 0xdata at MLconf ATL
Sri Ambati – CEO, 0xdata at MLconf ATLSri Ambati – CEO, 0xdata at MLconf ATL
Sri Ambati – CEO, 0xdata at MLconf ATLMLconf
 
Distributed and Fault Tolerant Realtime Computation with Apache Storm, Apache...
Distributed and Fault Tolerant Realtime Computation with Apache Storm, Apache...Distributed and Fault Tolerant Realtime Computation with Apache Storm, Apache...
Distributed and Fault Tolerant Realtime Computation with Apache Storm, Apache...Folio3 Software
 
Don't be Hadooped when looking for Big Data ROI
Don't be Hadooped when looking for Big Data ROIDon't be Hadooped when looking for Big Data ROI
Don't be Hadooped when looking for Big Data ROIDataWorks Summit
 
Big-data-analysis-training-in-mumbai
Big-data-analysis-training-in-mumbaiBig-data-analysis-training-in-mumbai
Big-data-analysis-training-in-mumbaiUnmesh Baile
 
Tech4Africa - Opportunities around Big Data
Tech4Africa - Opportunities around Big DataTech4Africa - Opportunities around Big Data
Tech4Africa - Opportunities around Big DataSteve Watt
 
Hadoop trainingin bangalore
Hadoop trainingin bangaloreHadoop trainingin bangalore
Hadoop trainingin bangaloreappaji intelhunt
 
Riding the Elephant - Hadoop 2.0
Riding the Elephant - Hadoop 2.0Riding the Elephant - Hadoop 2.0
Riding the Elephant - Hadoop 2.0Simon Elliston Ball
 
H2O 0xdata MLconf
H2O 0xdata MLconfH2O 0xdata MLconf
H2O 0xdata MLconfSri Ambati
 

Ähnlich wie Realtime Computation with Storm (20)

Realtime Computation with Storm
Realtime Computation with StormRealtime Computation with Storm
Realtime Computation with Storm
 
Large Scale Data Analysis Tools
Large Scale Data Analysis ToolsLarge Scale Data Analysis Tools
Large Scale Data Analysis Tools
 
Apachecon Euro 2012: Elastic, Multi-tenant Hadoop on Demand
Apachecon Euro 2012: Elastic, Multi-tenant Hadoop on DemandApachecon Euro 2012: Elastic, Multi-tenant Hadoop on Demand
Apachecon Euro 2012: Elastic, Multi-tenant Hadoop on Demand
 
Big Data/Hadoop Infrastructure Considerations
Big Data/Hadoop Infrastructure ConsiderationsBig Data/Hadoop Infrastructure Considerations
Big Data/Hadoop Infrastructure Considerations
 
Scaling Big Data Mining Infrastructure Twitter Experience
Scaling Big Data Mining Infrastructure Twitter ExperienceScaling Big Data Mining Infrastructure Twitter Experience
Scaling Big Data Mining Infrastructure Twitter Experience
 
Dataiku pig - hive - cascading
Dataiku   pig - hive - cascadingDataiku   pig - hive - cascading
Dataiku pig - hive - cascading
 
Steve Watt Presentation
Steve Watt PresentationSteve Watt Presentation
Steve Watt Presentation
 
Dancing with the Elephant
Dancing with the ElephantDancing with the Elephant
Dancing with the Elephant
 
Galaxy of bits
Galaxy of bitsGalaxy of bits
Galaxy of bits
 
Sri Ambati – CEO, 0xdata at MLconf ATL
Sri Ambati – CEO, 0xdata at MLconf ATLSri Ambati – CEO, 0xdata at MLconf ATL
Sri Ambati – CEO, 0xdata at MLconf ATL
 
Distributed and Fault Tolerant Realtime Computation with Apache Storm, Apache...
Distributed and Fault Tolerant Realtime Computation with Apache Storm, Apache...Distributed and Fault Tolerant Realtime Computation with Apache Storm, Apache...
Distributed and Fault Tolerant Realtime Computation with Apache Storm, Apache...
 
Don't be Hadooped when looking for Big Data ROI
Don't be Hadooped when looking for Big Data ROIDon't be Hadooped when looking for Big Data ROI
Don't be Hadooped when looking for Big Data ROI
 
Big-data-analysis-training-in-mumbai
Big-data-analysis-training-in-mumbaiBig-data-analysis-training-in-mumbai
Big-data-analysis-training-in-mumbai
 
Hackathon bonn
Hackathon bonnHackathon bonn
Hackathon bonn
 
Tech4Africa - Opportunities around Big Data
Tech4Africa - Opportunities around Big DataTech4Africa - Opportunities around Big Data
Tech4Africa - Opportunities around Big Data
 
Hadoop trainingin bangalore
Hadoop trainingin bangaloreHadoop trainingin bangalore
Hadoop trainingin bangalore
 
Yahoo compares Storm and Spark
Yahoo compares Storm and SparkYahoo compares Storm and Spark
Yahoo compares Storm and Spark
 
Riding the Elephant - Hadoop 2.0
Riding the Elephant - Hadoop 2.0Riding the Elephant - Hadoop 2.0
Riding the Elephant - Hadoop 2.0
 
H2O 0xdata MLconf
H2O 0xdata MLconfH2O 0xdata MLconf
H2O 0xdata MLconf
 
Hadoop
HadoopHadoop
Hadoop
 

Mehr von boorad

Big Data Analysis Patterns with Hadoop, Mahout and Solr
Big Data Analysis Patterns with Hadoop, Mahout and SolrBig Data Analysis Patterns with Hadoop, Mahout and Solr
Big Data Analysis Patterns with Hadoop, Mahout and Solrboorad
 
Big Data Analysis Patterns - TriHUG 6/27/2013
Big Data Analysis Patterns - TriHUG 6/27/2013Big Data Analysis Patterns - TriHUG 6/27/2013
Big Data Analysis Patterns - TriHUG 6/27/2013boorad
 
Hadoop and Storm - AJUG talk
Hadoop and Storm - AJUG talkHadoop and Storm - AJUG talk
Hadoop and Storm - AJUG talkboorad
 
Big Data Use Cases
Big Data Use CasesBig Data Use Cases
Big Data Use Casesboorad
 
TriHUG - Beyond Batch
TriHUG - Beyond BatchTriHUG - Beyond Batch
TriHUG - Beyond Batchboorad
 
DevNexus 2011
DevNexus 2011DevNexus 2011
DevNexus 2011boorad
 
DevNation Atlanta
DevNation AtlantaDevNation Atlanta
DevNation Atlantaboorad
 
NOSQL, CouchDB, and the Cloud
NOSQL, CouchDB, and the CloudNOSQL, CouchDB, and the Cloud
NOSQL, CouchDB, and the Cloudboorad
 
Why Erlang? - Bar Camp Atlanta 2008
Why Erlang?  - Bar Camp Atlanta 2008Why Erlang?  - Bar Camp Atlanta 2008
Why Erlang? - Bar Camp Atlanta 2008boorad
 

Mehr von boorad (9)

Big Data Analysis Patterns with Hadoop, Mahout and Solr
Big Data Analysis Patterns with Hadoop, Mahout and SolrBig Data Analysis Patterns with Hadoop, Mahout and Solr
Big Data Analysis Patterns with Hadoop, Mahout and Solr
 
Big Data Analysis Patterns - TriHUG 6/27/2013
Big Data Analysis Patterns - TriHUG 6/27/2013Big Data Analysis Patterns - TriHUG 6/27/2013
Big Data Analysis Patterns - TriHUG 6/27/2013
 
Hadoop and Storm - AJUG talk
Hadoop and Storm - AJUG talkHadoop and Storm - AJUG talk
Hadoop and Storm - AJUG talk
 
Big Data Use Cases
Big Data Use CasesBig Data Use Cases
Big Data Use Cases
 
TriHUG - Beyond Batch
TriHUG - Beyond BatchTriHUG - Beyond Batch
TriHUG - Beyond Batch
 
DevNexus 2011
DevNexus 2011DevNexus 2011
DevNexus 2011
 
DevNation Atlanta
DevNation AtlantaDevNation Atlanta
DevNation Atlanta
 
NOSQL, CouchDB, and the Cloud
NOSQL, CouchDB, and the CloudNOSQL, CouchDB, and the Cloud
NOSQL, CouchDB, and the Cloud
 
Why Erlang? - Bar Camp Atlanta 2008
Why Erlang?  - Bar Camp Atlanta 2008Why Erlang?  - Bar Camp Atlanta 2008
Why Erlang? - Bar Camp Atlanta 2008
 

Realtime Computation with Storm

  • 1. Realtime Computation with Storm Brad Anderson banderson@maprtech.com @boorad
  • 2.
  • 3. Definition & Overview Interoperability Use Cases
  • 4. Stream Processing CEP Distributed RPC
  • 5. Source Data • Social Media • Weather Data Feeds • Auctions of Ad • Network Sensors Impressions • App/Web Logs • Payment • Stock Tick Data Transactions
  • 8. Storm Guaranteed data processing Horizontal scalability Fault-tolerance No intermediate message brokers! Higher level abstraction than message passing “Just works”
  • 10. streams Tuple Tuple Tuple Tuple Tuple Tuple Tuple Unbounded sequence of tuples
  • 12. spouts public  interface  ISpout  extends  Serializable  {        void  open(Map  conf,                            TopologyContext  context,                            SpoutOutputCollector  collector);        void  close();        void  nextTuple();        void  ack(Object  msgId);        void  fail(Object  msgId); }
  • 13. bolts Processes input streams and produces new streams
  • 14. bolts public  class  DoubleAndTripleBolt  extends  BaseRichBolt  {        private  OutputCollectorBase  _collector;        public  void  prepare(Map  conf,                                                TopologyContext  context,                                                OutputCollectorBase  collector)  {                _collector  =  collector;        }        public  void  execute(Tuple  input)  {                int  val  =  input.getInteger(0);                                _collector.emit(input,  new  Values(val*2,  val*3));                _collector.ack(input);        }        public  void  declareOutputFields(OutputFieldsDeclarer  declarer)  {                declarer.declare(new  Fields("double",  "triple"));        }         }
  • 16. topologies         TopologyBuilder builder = new TopologyBuilder();                  builder.setSpout("spout", new RandomSentenceSpout(), 5);                  builder.setBolt("split", new SplitSentence(), 8)                  .shuffleGrouping("spout");         builder.setBolt("count", new WordCount(), 12)                  .fieldsGrouping("split", new Fields("word"));
  • 18. Trident Facilities • Joins • Aggregations • Grouping • Functions • Filters • Consistent, Exactly-Once Semantics
  • 19. TridentTopology  topology  =  new  TridentTopology();                 TridentState  wordCounts  =          topology.newStream("spout1",  spout)              .each(new  Fields("sentence"),  new  Split(),  new  Fields("word"))              .groupBy(new  Fields("word"))              .persistentAggregate(new  MemoryMapState.Factory(),                                                        new  Count(),                                                        new  Fields("count"))                                              .parallelismHint(6);
  • 21. spouts •Kafka (with transactions) • Kestrel • JMS • AMQP • Beanstalkd
  • 22. bolts • Functions • Filters • Aggregation • Joins • Talk to databases, Hadoop write-behind
  • 23. Storm realtime processes Queue Apps Raw Data Business Value Hadoop batch processes
  • 24. Storm realtime processes Queue Apps Raw Data Business Value Hadoop Parallel Cluster Ingest batch processes
  • 25. Storm realtime processes Apps Queue TailSpout Raw Data Business Franz Value Hadoop batch processes
  • 26. Storm realtime processes Apps TailSpout Raw Data Business Franz Value Hadoop batch processes
  • 28. Twitter Follower Distinct Tweeter Follower follower Follower Distinct URL Tweeter follower Reach Follower Follower Distinct Tweeter follower Follower
  • 31. http://github.com/{tdunning | boorad}/mapr-spout Brad Anderson banderson@maprtech.com @boorad
  • 32. Thank you. http://github.com/{tdunning | boorad}/mapr-spout Brad Anderson banderson@maprtech.com @boorad