SlideShare a Scribd company logo
1 of 3
Download to read offline
package org.ajug;

import   org.apache.hadoop.fs.Path;
import   org.apache.hadoop.conf.*;
import   org.apache.hadoop.io.*;
import   org.apache.hadoop.mapreduce.*;
import   org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import   org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import   org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import   org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


public class MnM {

    public static void main(String[] args) throws Exception {
       Configuration conf = new Configuration();

            Job job = new Job(conf, "ajug");

         job.setOutputKeyClass(Text.class);
         job.setOutputValueClass(Data.class);

         job.setMapperClass(MnMMapper.class);
         job.setReducerClass(MnMReducer.class);

         job.setInputFormatClass(TextInputFormat.class);
         job.setOutputFormatClass(TextOutputFormat.class);

         FileInputFormat.addInputPath(job, new Path(args[0]));
         FileOutputFormat.setOutputPath(job, new Path(args[1]));

         job.waitForCompletion(true);
    }

}

================================================

package org.ajug;

import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Data implements WritableComparable<Data> {
    private DoubleWritable width = new DoubleWritable();
    private DoubleWritable weight = new DoubleWritable();

    void set(double a_width, double a_weight) {
        width.set(a_width);
        weight.set(a_weight);
    }

    public double getWidth() {
return width.get();
}
     public double getWeight() {
         return weight.get();
     }

    public void write(DataOutput out) throws IOException {
      width. write(out);
      weight. write(out);
    }

    public void readFields(DataInput in) throws IOException {
      width. readFields(in);
      weight. readFields(in);
    }


    public int hashCode() {
      return width.hashCode() * 163 + weight.hashCode();
    }

    public int compareTo(Data tp) {
      int cmp = width.compareTo(tp.width);
      if (cmp != 0) {
        return cmp;
      }
      return weight.compareTo(tp.weight);
    }

     public String toString() {
         return "" + width + "t" + weight;
     }

}
========================================
package org.ajug;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;

import java.io.IOException;

public class MnMMapper extends
     Mapper<LongWritable, Text, Text, Data> {

     private Text color = new Text();
     private Data data = new Data();

    public void map(LongWritable key, Text value, Context context) throws
IOException, InterruptedException {
        String line = value.toString();
        String[] fields = line.split(",");
        if (fields.length > 2) {
           color.set(fields[0]);
            double weight = Double.parseDouble(fields[1]);
            double width = Double.parseDouble(fields[2]);
            data.set(width, weight);
            context.write(color, data);
        }
}
}
======================================
package org.ajug;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;

import java.io.IOException;
import java.util.Iterator;

public class MnMReducer extends   Reducer <Text, Data, Text, Data> {

    private Data data = new Data();


    public void reduce(Text key, Iterable<Data> values, Context context)
throws IOException, InterruptedException {
        double weights = 0;
        double widths=0;
        int count = 0;

        Iterator iter = values.iterator();
        while (iter.hasNext()) {
            Data value = (Data)iter.next();
            count++;
            weights += value.getWeight();
            widths += value.getWidth();
        }
        data.set(widths/count, weights/count);
        context.write(key, data);
    }
}

More Related Content

What's hot

Weather of the Century: Design and Performance
Weather of the Century: Design and PerformanceWeather of the Century: Design and Performance
Weather of the Century: Design and PerformanceMongoDB
 
The Weather of the Century
The Weather of the CenturyThe Weather of the Century
The Weather of the CenturyMongoDB
 
Image magick++
Image magick++Image magick++
Image magick++Yubin Lim
 
Logic Equations Resolver J Script
Logic Equations Resolver   J ScriptLogic Equations Resolver   J Script
Logic Equations Resolver J ScriptRoman Agaev
 
名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン
名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン
名古屋SGGAE/J勉強会 Grails、GaelykでハンズオンTsuyoshi Yamamoto
 
Google App Engine Developer - Day3
Google App Engine Developer - Day3Google App Engine Developer - Day3
Google App Engine Developer - Day3Simon Su
 
ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.Dr. Volkan OBAN
 
Programming with Python and PostgreSQL
Programming with Python and PostgreSQLProgramming with Python and PostgreSQL
Programming with Python and PostgreSQLPeter Eisentraut
 
Building Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeBuilding Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeMongoDB
 
Python in the database
Python in the databasePython in the database
Python in the databasepybcn
 
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...Dr. Volkan OBAN
 
Herding types with Scala macros
Herding types with Scala macrosHerding types with Scala macros
Herding types with Scala macrosMarina Sigaeva
 
Engineering a robust(ish) data pipeline with Luigi and AWS Elastic Map Reduce
Engineering a robust(ish) data pipeline with Luigi and AWS Elastic Map ReduceEngineering a robust(ish) data pipeline with Luigi and AWS Elastic Map Reduce
Engineering a robust(ish) data pipeline with Luigi and AWS Elastic Map ReduceAaron Knight
 
ClickHouse Features for Advanced Users, by Aleksei Milovidov
ClickHouse Features for Advanced Users, by Aleksei MilovidovClickHouse Features for Advanced Users, by Aleksei Milovidov
ClickHouse Features for Advanced Users, by Aleksei MilovidovAltinity Ltd
 
GPars For Beginners
GPars For BeginnersGPars For Beginners
GPars For BeginnersMatt Passell
 
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-Tsuyoshi Yamamoto
 
2019-01-29 - Demystifying Kotlin Coroutines
2019-01-29 - Demystifying Kotlin Coroutines2019-01-29 - Demystifying Kotlin Coroutines
2019-01-29 - Demystifying Kotlin CoroutinesEamonn Boyle
 

What's hot (20)

Weather of the Century: Design and Performance
Weather of the Century: Design and PerformanceWeather of the Century: Design and Performance
Weather of the Century: Design and Performance
 
The Weather of the Century
The Weather of the CenturyThe Weather of the Century
The Weather of the Century
 
Image magick++
Image magick++Image magick++
Image magick++
 
Logic Equations Resolver J Script
Logic Equations Resolver   J ScriptLogic Equations Resolver   J Script
Logic Equations Resolver J Script
 
名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン
名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン
名古屋SGGAE/J勉強会 Grails、Gaelykでハンズオン
 
Google App Engine Developer - Day3
Google App Engine Developer - Day3Google App Engine Developer - Day3
Google App Engine Developer - Day3
 
ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.
 
Programming with Python and PostgreSQL
Programming with Python and PostgreSQLProgramming with Python and PostgreSQL
Programming with Python and PostgreSQL
 
Building Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeBuilding Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at Stripe
 
Python in the database
Python in the databasePython in the database
Python in the database
 
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...
Some R Examples[R table and Graphics] -Advanced Data Visualization in R (Some...
 
Herding types with Scala macros
Herding types with Scala macrosHerding types with Scala macros
Herding types with Scala macros
 
Typelevel summit
Typelevel summitTypelevel summit
Typelevel summit
 
Engineering a robust(ish) data pipeline with Luigi and AWS Elastic Map Reduce
Engineering a robust(ish) data pipeline with Luigi and AWS Elastic Map ReduceEngineering a robust(ish) data pipeline with Luigi and AWS Elastic Map Reduce
Engineering a robust(ish) data pipeline with Luigi and AWS Elastic Map Reduce
 
ClickHouse Features for Advanced Users, by Aleksei Milovidov
ClickHouse Features for Advanced Users, by Aleksei MilovidovClickHouse Features for Advanced Users, by Aleksei Milovidov
ClickHouse Features for Advanced Users, by Aleksei Milovidov
 
Clojure functions midje
Clojure functions midjeClojure functions midje
Clojure functions midje
 
GPars For Beginners
GPars For BeginnersGPars For Beginners
GPars For Beginners
 
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
Grails 1.2 探検隊 -新たな聖杯をもとめて・・・-
 
Tricks
TricksTricks
Tricks
 
2019-01-29 - Demystifying Kotlin Coroutines
2019-01-29 - Demystifying Kotlin Coroutines2019-01-29 - Demystifying Kotlin Coroutines
2019-01-29 - Demystifying Kotlin Coroutines
 

Viewers also liked

Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009Christopher Curtin
 
Jenny, Katerina And Arynda
Jenny, Katerina And AryndaJenny, Katerina And Arynda
Jenny, Katerina And Aryndakaterinawsy
 
2011 march cloud computing atlanta
2011 march cloud computing atlanta2011 march cloud computing atlanta
2011 march cloud computing atlantaChristopher Curtin
 
Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013Christopher Curtin
 
AJUG April 2011 Cascading example
AJUG April 2011 Cascading exampleAJUG April 2011 Cascading example
AJUG April 2011 Cascading exampleChristopher Curtin
 
Redis and Bloom Filters - Atlanta Java Users Group 9/2014
Redis and Bloom Filters - Atlanta Java Users Group 9/2014Redis and Bloom Filters - Atlanta Java Users Group 9/2014
Redis and Bloom Filters - Atlanta Java Users Group 9/2014Christopher Curtin
 

Viewers also liked (9)

Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009Hadoop and Cascading At AJUG July 2009
Hadoop and Cascading At AJUG July 2009
 
Jenny, Katerina And Arynda
Jenny, Katerina And AryndaJenny, Katerina And Arynda
Jenny, Katerina And Arynda
 
2011 march cloud computing atlanta
2011 march cloud computing atlanta2011 march cloud computing atlanta
2011 march cloud computing atlanta
 
Ajug april 2011
Ajug april 2011Ajug april 2011
Ajug april 2011
 
Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013Atlanta hadoop users group july 2013
Atlanta hadoop users group july 2013
 
IASA Atlanta September 2009
IASA Atlanta September 2009IASA Atlanta September 2009
IASA Atlanta September 2009
 
Nosql East October 2009
Nosql East October 2009Nosql East October 2009
Nosql East October 2009
 
AJUG April 2011 Cascading example
AJUG April 2011 Cascading exampleAJUG April 2011 Cascading example
AJUG April 2011 Cascading example
 
Redis and Bloom Filters - Atlanta Java Users Group 9/2014
Redis and Bloom Filters - Atlanta Java Users Group 9/2014Redis and Bloom Filters - Atlanta Java Users Group 9/2014
Redis and Bloom Filters - Atlanta Java Users Group 9/2014
 

Similar to AJUG April 2011 Raw hadoop example

Hadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docxHadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docx1MS20CS406
 
Create & Execute First Hadoop MapReduce Project in.pptx
Create & Execute First Hadoop MapReduce Project in.pptxCreate & Execute First Hadoop MapReduce Project in.pptx
Create & Execute First Hadoop MapReduce Project in.pptxvishal choudhary
 
JRubyKaigi2010 Hadoop Papyrus
JRubyKaigi2010 Hadoop PapyrusJRubyKaigi2010 Hadoop Papyrus
JRubyKaigi2010 Hadoop PapyrusKoichi Fujikawa
 
Scalable and Flexible Machine Learning With Scala @ LinkedIn
Scalable and Flexible Machine Learning With Scala @ LinkedInScalable and Flexible Machine Learning With Scala @ LinkedIn
Scalable and Flexible Machine Learning With Scala @ LinkedInVitaly Gordon
 
The current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docxThe current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docxtodd241
 
05 pig user defined functions (udfs)
05 pig user defined functions (udfs)05 pig user defined functions (udfs)
05 pig user defined functions (udfs)Subhas Kumar Ghosh
 
Introduction to Scalding and Monoids
Introduction to Scalding and MonoidsIntroduction to Scalding and Monoids
Introduction to Scalding and MonoidsHugo Gävert
 
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash courseCodepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash courseSages
 
Please fix the java code (using eclipse)package hw4p1;import jav.pdf
Please fix the java code (using eclipse)package hw4p1;import jav.pdfPlease fix the java code (using eclipse)package hw4p1;import jav.pdf
Please fix the java code (using eclipse)package hw4p1;import jav.pdfinfo961251
 
EuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and HadoopEuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and HadoopMax Tepkeev
 
Having a problem figuring out where my errors are- The code is not run.pdf
Having a problem figuring out where my errors are- The code is not run.pdfHaving a problem figuring out where my errors are- The code is not run.pdf
Having a problem figuring out where my errors are- The code is not run.pdfNicholasflqStewartl
 
Cascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGCascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGMatthew McCullough
 
Testing multi outputformat based mapreduce
Testing multi outputformat based mapreduceTesting multi outputformat based mapreduce
Testing multi outputformat based mapreduceAshok Agarwal
 
Writing Hadoop Jobs in Scala using Scalding
Writing Hadoop Jobs in Scala using ScaldingWriting Hadoop Jobs in Scala using Scalding
Writing Hadoop Jobs in Scala using ScaldingToni Cebrián
 
Open XKE - Big Data, Big Mess par Bertrand Dechoux
Open XKE - Big Data, Big Mess par Bertrand DechouxOpen XKE - Big Data, Big Mess par Bertrand Dechoux
Open XKE - Big Data, Big Mess par Bertrand DechouxPublicis Sapient Engineering
 
Spark Day 2017- Spark 의 과거, 현재, 미래
Spark Day 2017- Spark 의 과거, 현재, 미래Spark Day 2017- Spark 의 과거, 현재, 미래
Spark Day 2017- Spark 의 과거, 현재, 미래Moon Soo Lee
 
TypeScript Introduction
TypeScript IntroductionTypeScript Introduction
TypeScript IntroductionDmitry Sheiko
 

Similar to AJUG April 2011 Raw hadoop example (20)

Hadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docxHadoop Installation_13_09_2022(1).docx
Hadoop Installation_13_09_2022(1).docx
 
Create & Execute First Hadoop MapReduce Project in.pptx
Create & Execute First Hadoop MapReduce Project in.pptxCreate & Execute First Hadoop MapReduce Project in.pptx
Create & Execute First Hadoop MapReduce Project in.pptx
 
JRubyKaigi2010 Hadoop Papyrus
JRubyKaigi2010 Hadoop PapyrusJRubyKaigi2010 Hadoop Papyrus
JRubyKaigi2010 Hadoop Papyrus
 
Scalable and Flexible Machine Learning With Scala @ LinkedIn
Scalable and Flexible Machine Learning With Scala @ LinkedInScalable and Flexible Machine Learning With Scala @ LinkedIn
Scalable and Flexible Machine Learning With Scala @ LinkedIn
 
The current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docxThe current program only run one iteration of the KMeans algorithm. .docx
The current program only run one iteration of the KMeans algorithm. .docx
 
05 pig user defined functions (udfs)
05 pig user defined functions (udfs)05 pig user defined functions (udfs)
05 pig user defined functions (udfs)
 
Introduction to Scalding and Monoids
Introduction to Scalding and MonoidsIntroduction to Scalding and Monoids
Introduction to Scalding and Monoids
 
Hadoop
HadoopHadoop
Hadoop
 
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash courseCodepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
Codepot - Pig i Hive: szybkie wprowadzenie / Pig and Hive crash course
 
Please fix the java code (using eclipse)package hw4p1;import jav.pdf
Please fix the java code (using eclipse)package hw4p1;import jav.pdfPlease fix the java code (using eclipse)package hw4p1;import jav.pdf
Please fix the java code (using eclipse)package hw4p1;import jav.pdf
 
EuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and HadoopEuroPython 2015 - Big Data with Python and Hadoop
EuroPython 2015 - Big Data with Python and Hadoop
 
Having a problem figuring out where my errors are- The code is not run.pdf
Having a problem figuring out where my errors are- The code is not run.pdfHaving a problem figuring out where my errors are- The code is not run.pdf
Having a problem figuring out where my errors are- The code is not run.pdf
 
Cascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGCascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUG
 
Testing multi outputformat based mapreduce
Testing multi outputformat based mapreduceTesting multi outputformat based mapreduce
Testing multi outputformat based mapreduce
 
Amazon elastic map reduce
Amazon elastic map reduceAmazon elastic map reduce
Amazon elastic map reduce
 
Writing Hadoop Jobs in Scala using Scalding
Writing Hadoop Jobs in Scala using ScaldingWriting Hadoop Jobs in Scala using Scalding
Writing Hadoop Jobs in Scala using Scalding
 
Open XKE - Big Data, Big Mess par Bertrand Dechoux
Open XKE - Big Data, Big Mess par Bertrand DechouxOpen XKE - Big Data, Big Mess par Bertrand Dechoux
Open XKE - Big Data, Big Mess par Bertrand Dechoux
 
Spark Day 2017- Spark 의 과거, 현재, 미래
Spark Day 2017- Spark 의 과거, 현재, 미래Spark Day 2017- Spark 의 과거, 현재, 미래
Spark Day 2017- Spark 의 과거, 현재, 미래
 
TypeScript Introduction
TypeScript IntroductionTypeScript Introduction
TypeScript Introduction
 
MaxTemp PPT.pptx
MaxTemp PPT.pptxMaxTemp PPT.pptx
MaxTemp PPT.pptx
 

Recently uploaded

Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...apidays
 
Architecting Cloud Native Applications
Architecting Cloud Native ApplicationsArchitecting Cloud Native Applications
Architecting Cloud Native ApplicationsWSO2
 
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ..."I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...Zilliz
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MIND CTI
 
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024Victor Rentea
 
Exploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone ProcessorsExploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone Processorsdebabhi2
 
presentation ICT roal in 21st century education
presentation ICT roal in 21st century educationpresentation ICT roal in 21st century education
presentation ICT roal in 21st century educationjfdjdjcjdnsjd
 
How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerThousandEyes
 
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...apidays
 
[BuildWithAI] Introduction to Gemini.pdf
[BuildWithAI] Introduction to Gemini.pdf[BuildWithAI] Introduction to Gemini.pdf
[BuildWithAI] Introduction to Gemini.pdfSandro Moreira
 
AWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of TerraformAWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of TerraformAndrey Devyatkin
 
FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024The Digital Insurer
 
AXA XL - Insurer Innovation Award Americas 2024
AXA XL - Insurer Innovation Award Americas 2024AXA XL - Insurer Innovation Award Americas 2024
AXA XL - Insurer Innovation Award Americas 2024The Digital Insurer
 
Spring Boot vs Quarkus the ultimate battle - DevoxxUK
Spring Boot vs Quarkus the ultimate battle - DevoxxUKSpring Boot vs Quarkus the ultimate battle - DevoxxUK
Spring Boot vs Quarkus the ultimate battle - DevoxxUKJago de Vreede
 
Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024The Digital Insurer
 
Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024The Digital Insurer
 
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc
 
DBX First Quarter 2024 Investor Presentation
DBX First Quarter 2024 Investor PresentationDBX First Quarter 2024 Investor Presentation
DBX First Quarter 2024 Investor PresentationDropbox
 
Ransomware_Q4_2023. The report. [EN].pdf
Ransomware_Q4_2023. The report. [EN].pdfRansomware_Q4_2023. The report. [EN].pdf
Ransomware_Q4_2023. The report. [EN].pdfOverkill Security
 

Recently uploaded (20)

Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
 
Architecting Cloud Native Applications
Architecting Cloud Native ApplicationsArchitecting Cloud Native Applications
Architecting Cloud Native Applications
 
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ..."I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024
 
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
 
Exploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone ProcessorsExploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone Processors
 
presentation ICT roal in 21st century education
presentation ICT roal in 21st century educationpresentation ICT roal in 21st century education
presentation ICT roal in 21st century education
 
How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected Worker
 
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
 
[BuildWithAI] Introduction to Gemini.pdf
[BuildWithAI] Introduction to Gemini.pdf[BuildWithAI] Introduction to Gemini.pdf
[BuildWithAI] Introduction to Gemini.pdf
 
AWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of TerraformAWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of Terraform
 
FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024
 
AXA XL - Insurer Innovation Award Americas 2024
AXA XL - Insurer Innovation Award Americas 2024AXA XL - Insurer Innovation Award Americas 2024
AXA XL - Insurer Innovation Award Americas 2024
 
Spring Boot vs Quarkus the ultimate battle - DevoxxUK
Spring Boot vs Quarkus the ultimate battle - DevoxxUKSpring Boot vs Quarkus the ultimate battle - DevoxxUK
Spring Boot vs Quarkus the ultimate battle - DevoxxUK
 
Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024
 
Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024
 
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
 
DBX First Quarter 2024 Investor Presentation
DBX First Quarter 2024 Investor PresentationDBX First Quarter 2024 Investor Presentation
DBX First Quarter 2024 Investor Presentation
 
Ransomware_Q4_2023. The report. [EN].pdf
Ransomware_Q4_2023. The report. [EN].pdfRansomware_Q4_2023. The report. [EN].pdf
Ransomware_Q4_2023. The report. [EN].pdf
 

AJUG April 2011 Raw hadoop example

  • 1. package org.ajug; import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.*; import org.apache.hadoop.io.*; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; public class MnM { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "ajug"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Data.class); job.setMapperClass(MnMMapper.class); job.setReducerClass(MnMReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } } ================================================ package org.ajug; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class Data implements WritableComparable<Data> { private DoubleWritable width = new DoubleWritable(); private DoubleWritable weight = new DoubleWritable(); void set(double a_width, double a_weight) { width.set(a_width); weight.set(a_weight); } public double getWidth() {
  • 2. return width.get(); } public double getWeight() { return weight.get(); } public void write(DataOutput out) throws IOException { width. write(out); weight. write(out); } public void readFields(DataInput in) throws IOException { width. readFields(in); weight. readFields(in); } public int hashCode() { return width.hashCode() * 163 + weight.hashCode(); } public int compareTo(Data tp) { int cmp = width.compareTo(tp.width); if (cmp != 0) { return cmp; } return weight.compareTo(tp.weight); } public String toString() { return "" + width + "t" + weight; } } ======================================== package org.ajug; import org.apache.hadoop.io.*; import org.apache.hadoop.mapreduce.*; import java.io.IOException; public class MnMMapper extends Mapper<LongWritable, Text, Text, Data> { private Text color = new Text(); private Data data = new Data(); public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] fields = line.split(","); if (fields.length > 2) { color.set(fields[0]); double weight = Double.parseDouble(fields[1]); double width = Double.parseDouble(fields[2]); data.set(width, weight); context.write(color, data); }
  • 3. } } ====================================== package org.ajug; import org.apache.hadoop.io.*; import org.apache.hadoop.mapreduce.*; import java.io.IOException; import java.util.Iterator; public class MnMReducer extends Reducer <Text, Data, Text, Data> { private Data data = new Data(); public void reduce(Text key, Iterable<Data> values, Context context) throws IOException, InterruptedException { double weights = 0; double widths=0; int count = 0; Iterator iter = values.iterator(); while (iter.hasNext()) { Data value = (Data)iter.next(); count++; weights += value.getWeight(); widths += value.getWidth(); } data.set(widths/count, weights/count); context.write(key, data); } }