SlideShare ist ein Scribd-Unternehmen logo
1 von 69
Downloaden Sie, um offline zu lesen
Open source indexing and
     search engine
Web scale
Lucene
         Inverted
           index
Lucene
                   Inverted
                     index

Results
Lucene
                          Inverted
                            index
   Servlet container
J2EE application server
WARNING
Java approaching!
Java is strongly
object orientated
my @gene_names = ();
  push(@gene_names, $gene);
  print @gene_names;


Perl
Java

  Array gene_names = new Array();
  gene_names.add(gene);
  System.out.println(gene_names.toString)
my $gene = Gene->new(‘ENS12345’);
  $gene->set_name(‘BRCA2’);



Perl
Java


  Gene gene = new Gene(‘ENS12345’);
  gene.set_name(‘BRCA2’);
Java is strongly typed
my $number = “100”;
  $number = $number + 400
  print $number;


Perl
Java

  Integer number = new Integer(100);
  number = number + 400;
  System.out.println(number + 400);
Java is good at error
      handling
eval ($gene->transform);
  warn $@ if $@;



Perl
Java
  try {
  
 gene->transform
  } catch (IOException e) {
  
 e.printStackTrace;
  }
Java is surprisingly
   easy to learn
Conditionals and loops
  Variables have scope
  Extras from CPAN
  Performance is important


Perl
Java
  Conditionals and loops
  Variables have scope
  Extras available as JAR files
  Performance is important
Recipe 1:

Indexing a collection of
      documents
org.ensembl.lucene.Writer
public static void main(String[] args) {
        HashMap<String, String> arguments = new HashMap<String, String>();
        String key = null;

                for   (String s: args) {
                      if (key == null) {
                          key = s;
                      } else {
                          arguments.put(key, s);
                          key = null;
                      }
                }

        Writer writer = new Writer();

        writer.setIndexLocation(arguments.get(quot;-indexquot;));
        writer.setInputLocation(arguments.get(quot;-inputquot;));

        if (arguments.get(quot;-mergefactorquot;) != null) {

   
    writer.setMergeFactor(Integer.valueOf(arguments.get(quot;-mergefactorquot;)));
    }

        if (arguments.get(quot;-maxmergedocsquot;) != null) {

   
    writer.setMaxMergeDocs(Integer.valueOf(arguments.get(quot;-maxmergedocsquot;)));
    }

                try {
                    writer.index();
                } catch (IOException e) {
                    e.printStackTrace();
                }

        System.out.println(quot;Indexing completequot;);
  }
public static void main(String[] args) {
        HashMap<String, String> arguments = new HashMap<String, String>();
        String key = null;

                for   (String s: args) {
                      if (key == null) {
                          key = s;
                      } else {
                          arguments.put(key, s);
                          key = null;
                      }
                }

        Writer writer = new Writer();

        writer.setIndexLocation(arguments.get(quot;-indexquot;));
        writer.setInputLocation(arguments.get(quot;-inputquot;));

        if (arguments.get(quot;-mergefactorquot;) != null) {

   
    writer.setMergeFactor(Integer.valueOf(arguments.get(quot;-mergefactorquot;)));
    }

        if (arguments.get(quot;-maxmergedocsquot;) != null) {

   
    writer.setMaxMergeDocs(Integer.valueOf(arguments.get(quot;-maxmergedocsquot;)));
    }

                try {
                    writer.index();
                } catch (IOException e) {
                    e.printStackTrace();
                }

        System.out.println(quot;Indexing completequot;);
  }
public static void main(String[] args) {
        HashMap<String, String> arguments = new HashMap<String, String>();
        String key = null;

                for   (String s: args) {
                      if (key == null) {
                          key = s;
                      } else {
                          arguments.put(key, s);
                          key = null;
                      }
                }

        Writer writer = new Writer();

        writer.setIndexLocation(arguments.get(quot;-indexquot;));
        writer.setInputLocation(arguments.get(quot;-inputquot;));

        if (arguments.get(quot;-mergefactorquot;) != null) {

   
    writer.setMergeFactor(Integer.valueOf(arguments.get(quot;-mergefactorquot;)));
    }

        if (arguments.get(quot;-maxmergedocsquot;) != null) {

   
    writer.setMaxMergeDocs(Integer.valueOf(arguments.get(quot;-maxmergedocsquot;)));
    }

                try {
                    writer.index();
                } catch (IOException e) {
                    e.printStackTrace();
                }

        System.out.println(quot;Indexing completequot;);
  }
Max-merge-docs

how many documents are added to a
            segment
Merge-factor

how often Lucene merges index segments
        when adding documents
public static void main(String[] args) {
        HashMap<String, String> arguments = new HashMap<String, String>();
        String key = null;

                for   (String s: args) {
                      if (key == null) {
                          key = s;
                      } else {
                          arguments.put(key, s);
                          key = null;
                      }
                }

        Writer writer = new Writer();

        writer.setIndexLocation(arguments.get(quot;-indexquot;));
        writer.setInputLocation(arguments.get(quot;-inputquot;));

        if (arguments.get(quot;-mergefactorquot;) != null) {

   
    writer.setMergeFactor(Integer.valueOf(arguments.get(quot;-mergefactorquot;)));
    }

        if (arguments.get(quot;-maxmergedocsquot;) != null) {

   
    writer.setMaxMergeDocs(Integer.valueOf(arguments.get(quot;-maxmergedocsquot;)));
    }

                try {
                    writer.index();
                } catch (IOException e) {
                    e.printStackTrace();
                }

        System.out.println(quot;Indexing completequot;);
  }
public void index() throws IOException {
        File index = new File(getIndexLocation());
        File location = new File(getInputLocation());
        IndexWriter writer = new IndexWriter(index, new StandardAnalyzer(), true);

        writer.setMergeFactor(getMergeFactor());
        writer.setMaxMergeDocs(getMaxMergeDocs());

        indexDocuments(writer, location);

        writer.optimize();
        writer.close();
  }

    private static void indexDocuments(IndexWriter writer, Filelocation) throws IOException {

                if (location.canRead()) {
                  if (location.isDirectory()) {
                    String[] files = location.list();
                    if (files != null) {
                      for (int i = 0; i < files.length; i++) {
                        indexDocuments(writer, new File(location, files[i]));
                   }
                  }
                  } else {
                    System.out.println(quot;Indexing  quot; + location);
                    try {
                        GeneFileDocument.index(writer, location);
                  }
                    catch (FileNotFoundException e) {
                      System.out.println(quot;Caught exception: quot; + e);
                  }
                 }
                }
        }
public void index() throws IOException {
        File index = new File(getIndexLocation());
        File location = new File(getInputLocation());
        IndexWriter writer = new IndexWriter(index, new StandardAnalyzer(), true);

        writer.setMergeFactor(getMergeFactor());
        writer.setMaxMergeDocs(getMaxMergeDocs());

        indexDocuments(writer, location);

        writer.optimize();
        writer.close();
  }

    private static void indexDocuments(IndexWriter writer, Filelocation) throws IOException {

                if (location.canRead()) {
                  if (location.isDirectory()) {
                    String[] files = location.list();
                    if (files != null) {
                      for (int i = 0; i < files.length; i++) {
                        indexDocuments(writer, new File(location, files[i]));
                   }
                  }
                  } else {
                    System.out.println(quot;Indexing  quot; + location);
                    try {
                        GeneFileDocument.index(writer, location);
                  }
                    catch (FileNotFoundException e) {
                      System.out.println(quot;Caught exception: quot; + e);
                  }
                 }
                }
        }
public void index() throws IOException {
        File index = new File(getIndexLocation());
        File location = new File(getInputLocation());
        IndexWriter writer = new IndexWriter(index, new StandardAnalyzer(), true);

        writer.setMergeFactor(getMergeFactor());
        writer.setMaxMergeDocs(getMaxMergeDocs());

        indexDocuments(writer, location);

        writer.optimize();
        writer.close();
  }

    private static void indexDocuments(IndexWriter writer, Filelocation) throws IOException {

                if (location.canRead()) {
                  if (location.isDirectory()) {
                    String[] files = location.list();
                    if (files != null) {
                      for (int i = 0; i < files.length; i++) {
                        indexDocuments(writer, new File(location, files[i]));
                   }
                  }
                  } else {
                    System.out.println(quot;Indexing  quot; + location);
                    try {
                        GeneFileDocument.index(writer, location);
                  }
                    catch (FileNotFoundException e) {
                      System.out.println(quot;Caught exception: quot; + e);
                  }
                 }
                }
        }
public void index() throws IOException {
        File index = new File(getIndexLocation());
        File location = new File(getInputLocation());
        IndexWriter writer = new IndexWriter(index, new StandardAnalyzer(), true);

        writer.setMergeFactor(getMergeFactor());
        writer.setMaxMergeDocs(getMaxMergeDocs());

        indexDocuments(writer, location);

        writer.optimize();
        writer.close();
  }

    private static void indexDocuments(IndexWriter writer, Filelocation) throws IOException {

                if (location.canRead()) {
                  if (location.isDirectory()) {
                    String[] files = location.list();
                    if (files != null) {
                      for (int i = 0; i < files.length; i++) {
                        indexDocuments(writer, new File(location, files[i]));
                   }
                  }
                  } else {
                    System.out.println(quot;Indexing  quot; + location);
                    try {
                        GeneFileDocument.index(writer, location);
                  }
                    catch (FileNotFoundException e) {
                      System.out.println(quot;Caught exception: quot; + e);
                  }
                 }
                }
        }
org.ensembl.lucene.
GeneFileDocument
    public static void index(IndexWriter writer, File f) throws IOException {

        String fields[] = {quot;subtypequot;, quot;idquot;, quot;urlquot;, quot;keywordsquot;, quot;descriptionquot;};

                FileReader input = new FileReader(f);
                BufferedReader bufRead = new BufferedReader(input);
                String line;
                line = bufRead.readLine();

        while (line != null){

             Document doc = new Document();

             int count = 0;
             String terms[] = line.split(quot;tquot;);

                         while (count < terms.length) {
                             String field = fields[count];
                             String item = terms[count];
                             doc.add(new Field(field, item, Field.Store.YES, Field.Index.TOKENIZED));
                             count++;
                         }

             writer.addDocument(doc);

             line = bufRead.readLine();

         }

  }
    public static void index(IndexWriter writer, File f) throws IOException {

        String fields[] = {quot;subtypequot;, quot;idquot;, quot;urlquot;, quot;keywordsquot;, quot;descriptionquot;};

                FileReader input = new FileReader(f);
                BufferedReader bufRead = new BufferedReader(input);
                String line;
                line = bufRead.readLine();

        while (line != null){

             Document doc = new Document();

             int count = 0;
             String terms[] = line.split(quot;tquot;);

                         while (count < terms.length) {
                             String field = fields[count];
                             String item = terms[count];
                             doc.add(new Field(field, item, Field.Store.YES, Field.Index.TOKENIZED));
                             count++;
                         }

             writer.addDocument(doc);

             line = bufRead.readLine();

         }

  }
    public static void index(IndexWriter writer, File f) throws IOException {

        String fields[] = {quot;subtypequot;, quot;idquot;, quot;urlquot;, quot;keywordsquot;, quot;descriptionquot;};

                FileReader input = new FileReader(f);
                BufferedReader bufRead = new BufferedReader(input);
                String line;
                line = bufRead.readLine();

        while (line != null){

             Document doc = new Document();

             int count = 0;
             String terms[] = line.split(quot;tquot;);

                         while (count < terms.length) {
                             String field = fields[count];
                             String item = terms[count];
                             doc.add(new Field(field, item, Field.Store.YES, Field.Index.TOKENIZED));
                             count++;
                         }

             writer.addDocument(doc);

             line = bufRead.readLine();

         }

  }
Quite a lot of memory
       ~1.5Gb
Creates index
Merge indices to form
master search index
Recipe 2:

Finding documents
containing a search
       term
Easy
org.ensembl.lucene.Search
public static void main(String args[]) {

        Timer timer = new Timer();

        String index = quot;indexquot;;
        try {

            timer.start();
            Searcher searcher = new IndexSearcher(index);
            timer.stop();

            System.out.println(quot;Loaded quot; + searcher.maxDoc() + quot; documents in quot; + timer.elapsed() + quot;msquot;);

            search(searcher, quot;subtypequot;, quot;Vega_havana processed_pseudogene Genequot;);
            search(searcher, quot;idquot;, quot;OTTHUMG00000000423quot;);

            searcher.close();

        } catch (Exception e) {
            e.printStackTrace();
    }

  }
public static void main(String args[]) {

        Timer timer = new Timer();

        String index = quot;indexquot;;
        try {

            timer.start();
            Searcher searcher = new IndexSearcher(index);
            timer.stop();

            System.out.println(quot;Loaded quot; + searcher.maxDoc() + quot; documents in quot; + timer.elapsed() + quot;msquot;);

            search(searcher, quot;subtypequot;, quot;Vega_havana processed_pseudogene Genequot;);
            search(searcher, quot;idquot;, quot;OTTHUMG00000000423quot;);

            searcher.close();

        } catch (Exception e) {
            e.printStackTrace();
    }

  }
public static void main(String args[]) {

        Timer timer = new Timer();

        String index = quot;indexquot;;
        try {

            timer.start();
            Searcher searcher = new IndexSearcher(index);
            timer.stop();

            System.out.println(quot;Loaded quot; + searcher.maxDoc() + quot; documents in quot; + timer.elapsed() + quot;msquot;);

            search(searcher, quot;subtypequot;, quot;Vega_havana processed_pseudogene Genequot;);
            search(searcher, quot;idquot;, quot;OTTHUMG00000000423quot;);

            searcher.close();

        } catch (Exception e) {
            e.printStackTrace();
    }

  }
public static void main(String args[]) {

        Timer timer = new Timer();

        String index = quot;indexquot;;
        try {

            timer.start();
            Searcher searcher = new IndexSearcher(index);
            timer.stop();

            System.out.println(quot;Loaded quot; + searcher.maxDoc() + quot; documents in quot; + timer.elapsed() + quot;msquot;);

            search(searcher, quot;subtypequot;, quot;Vega_havana processed_pseudogene Genequot;);
            search(searcher, quot;idquot;, quot;OTTHUMG00000000423quot;);

            searcher.close();

        } catch (Exception e) {
            e.printStackTrace();
    }

  }
    private static void search(Searcher searcher, String field, String queryString) throws

                       ParseException, IOException {

        Timer timer = new Timer();
        timer.start();

        System.out.println(quot;Search (quot; + field + quot;): quot; + queryString);

        QueryParser parser = new QueryParser(field, new StandardAnalyzer());
        Query query = parser.parse(queryString);

        Hits hits = searcher.search(query);

                Integer count = 1;
                Iterator<Hit> hiterator = hits.iterator();
                while (hiterator.hasNext()) {
                    Hit hit = hiterator.next();
                    Document document = hit.getDocument();
                    System.out.println(count + quot;: ID: quot; + document.get(quot;idquot;));
                    System.out.println(count + quot;: Subtype: quot; + document.get(quot;subtypequot;));
                    count++;
                }

        int hitCount = hits.length();
        timer.stop();

        System.out.println(quot;Hits: quot; + hitCount);
        System.out.println(quot;Completed in quot; + timer.elapsed() + quot;msquot;);
    private static void search(Searcher searcher, String field, String queryString) throws

                       ParseException, IOException {

        Timer timer = new Timer();
        timer.start();

        System.out.println(quot;Search (quot; + field + quot;): quot; + queryString);

        QueryParser parser = new QueryParser(field, new StandardAnalyzer());
        Query query = parser.parse(queryString);

        Hits hits = searcher.search(query);

                Integer count = 1;
                Iterator<Hit> hiterator = hits.iterator();
                while (hiterator.hasNext()) {
                    Hit hit = hiterator.next();
                    Document document = hit.getDocument();
                    System.out.println(count + quot;: ID: quot; + document.get(quot;idquot;));
                    System.out.println(count + quot;: Subtype: quot; + document.get(quot;subtypequot;));
                    count++;
                }

        int hitCount = hits.length();
        timer.stop();

        System.out.println(quot;Hits: quot; + hitCount);
        System.out.println(quot;Completed in quot; + timer.elapsed() + quot;msquot;);
    private static void search(Searcher searcher, String field, String queryString) throws

                       ParseException, IOException {

        Timer timer = new Timer();
        timer.start();

        System.out.println(quot;Search (quot; + field + quot;): quot; + queryString);

        QueryParser parser = new QueryParser(field, new StandardAnalyzer());
        Query query = parser.parse(queryString);

        Hits hits = searcher.search(query);

                Integer count = 1;
                Iterator<Hit> hiterator = hits.iterator();
                while (hiterator.hasNext()) {
                    Hit hit = hiterator.next();
                    Document document = hit.getDocument();
                    System.out.println(count + quot;: ID: quot; + document.get(quot;idquot;));
                    System.out.println(count + quot;: Subtype: quot; + document.get(quot;subtypequot;));
                    count++;
                }

        int hitCount = hits.length();
        timer.stop();

        System.out.println(quot;Hits: quot; + hitCount);
        System.out.println(quot;Completed in quot; + timer.elapsed() + quot;msquot;);
    private static void search(Searcher searcher, String field, String queryString) throws

                       ParseException, IOException {

        Timer timer = new Timer();
        timer.start();

        System.out.println(quot;Search (quot; + field + quot;): quot; + queryString);

        QueryParser parser = new QueryParser(field, new StandardAnalyzer());
        Query query = parser.parse(queryString);

        Hits hits = searcher.search(query);

                Integer count = 1;
                Iterator<Hit> hiterator = hits.iterator();
                while (hiterator.hasNext()) {
                    Hit hit = hiterator.next();
                    Document document = hit.getDocument();
                    System.out.println(count + quot;: ID: quot; + document.get(quot;idquot;));
                    System.out.println(count + quot;: Subtype: quot; + document.get(quot;subtypequot;));
                    count++;
                }

        int hitCount = hits.length();
        timer.stop();

        System.out.println(quot;Hits: quot; + hitCount);
        System.out.println(quot;Completed in quot; + timer.elapsed() + quot;msquot;);
    private static void search(Searcher searcher, String field, String queryString) throws

                       ParseException, IOException {

        Timer timer = new Timer();
        timer.start();

        System.out.println(quot;Search (quot; + field + quot;): quot; + queryString);

        QueryParser parser = new QueryParser(field, new StandardAnalyzer());
        Query query = parser.parse(queryString);

        Hits hits = searcher.search(query);

                Integer count = 1;
                Iterator<Hit> hiterator = hits.iterator();
                while (hiterator.hasNext()) {
                    Hit hit = hiterator.next();
                    Document document = hit.getDocument();
                    System.out.println(count + quot;: ID: quot; + document.get(quot;idquot;));
                    System.out.println(count + quot;: Subtype: quot; + document.get(quot;subtypequot;));
                    count++;
                }

        int hitCount = hits.length();
        timer.stop();

        System.out.println(quot;Hits: quot; + hitCount);
        System.out.println(quot;Completed in quot; + timer.elapsed() + quot;msquot;);
    private static void search(Searcher searcher, String field, String queryString) throws

                       ParseException, IOException {

        Timer timer = new Timer();
        timer.start();

        System.out.println(quot;Search (quot; + field + quot;): quot; + queryString);

        QueryParser parser = new QueryParser(field, new StandardAnalyzer());
        Query query = parser.parse(queryString);

        Hits hits = searcher.search(query);

                Integer count = 1;
                Iterator<Hit> hiterator = hits.iterator();
                while (hiterator.hasNext()) {
                    Hit hit = hiterator.next();
                    Document document = hit.getDocument();
                    System.out.println(count + quot;: ID: quot; + document.get(quot;idquot;));
                    System.out.println(count + quot;: Subtype: quot; + document.get(quot;subtypequot;));
                    count++;
                }

        int hitCount = hits.length();
        timer.stop();

        System.out.println(quot;Hits: quot; + hitCount);
        System.out.println(quot;Completed in quot; + timer.elapsed() + quot;msquot;);
Recipe 3:

Querying a remote
 document index
Wrap everything into a
      single file
Copy that file to an
application server
Restart the application
         server
Voilà!
(almost never that easy)
You will need...
Bonus recipe!

Automate tasks with Ant
XML based configuration
Automated compiles
Automated test runner
Automated deployment
Platform independent
Flexible
(but complex)
ant deploy
clean code

clean index

  compile

build index

 build jar

 build war

  deploy
Could this work for
   Ensembl?
lucene.apache.org
Java IDEs rock:
  get stuck in
Thank you

Weitere ähnliche Inhalte

Was ist angesagt?

Building Smart Async Functions For Mobile
Building Smart Async Functions For MobileBuilding Smart Async Functions For Mobile
Building Smart Async Functions For Mobile
Glan Thomas
 

Was ist angesagt? (19)

Property-Based Testing for Godly Tests
Property-Based Testing for Godly TestsProperty-Based Testing for Godly Tests
Property-Based Testing for Godly Tests
 
Spring data iii
Spring data iiiSpring data iii
Spring data iii
 
Nativescript angular
Nativescript angularNativescript angular
Nativescript angular
 
What are arrays in java script
What are arrays in java scriptWhat are arrays in java script
What are arrays in java script
 
Dpilot Source Code With ScreenShots
Dpilot Source Code With ScreenShots Dpilot Source Code With ScreenShots
Dpilot Source Code With ScreenShots
 
Source Code for Dpilot
Source Code for Dpilot Source Code for Dpilot
Source Code for Dpilot
 
Implementing CQRS and Event Sourcing with RavenDB
Implementing CQRS and Event Sourcing with RavenDBImplementing CQRS and Event Sourcing with RavenDB
Implementing CQRS and Event Sourcing with RavenDB
 
Why Sifu
Why SifuWhy Sifu
Why Sifu
 
Rxjs marble-testing
Rxjs marble-testingRxjs marble-testing
Rxjs marble-testing
 
Building Smart Async Functions For Mobile
Building Smart Async Functions For MobileBuilding Smart Async Functions For Mobile
Building Smart Async Functions For Mobile
 
Scala active record
Scala active recordScala active record
Scala active record
 
CAVE Overview
CAVE OverviewCAVE Overview
CAVE Overview
 
Temporary Cache Assistance (Transients API): WordCamp Phoenix 2014
Temporary Cache Assistance (Transients API): WordCamp Phoenix 2014Temporary Cache Assistance (Transients API): WordCamp Phoenix 2014
Temporary Cache Assistance (Transients API): WordCamp Phoenix 2014
 
Powershell for Log Analysis and Data Crunching
 Powershell for Log Analysis and Data Crunching Powershell for Log Analysis and Data Crunching
Powershell for Log Analysis and Data Crunching
 
Second Level Cache in JPA Explained
Second Level Cache in JPA ExplainedSecond Level Cache in JPA Explained
Second Level Cache in JPA Explained
 
Protocol-Oriented Networking
Protocol-Oriented NetworkingProtocol-Oriented Networking
Protocol-Oriented Networking
 
Speed Things Up with Transients
Speed Things Up with TransientsSpeed Things Up with Transients
Speed Things Up with Transients
 
Deploying Straight to Production
Deploying Straight to ProductionDeploying Straight to Production
Deploying Straight to Production
 
Agile Testing Days 2018 - API Fundamentals - postman collection
Agile Testing Days 2018 - API Fundamentals - postman collectionAgile Testing Days 2018 - API Fundamentals - postman collection
Agile Testing Days 2018 - API Fundamentals - postman collection
 

Andere mochten auch

Architecture and Implementation of Apache Lucene: Marter's Thesis
Architecture and Implementation of Apache Lucene: Marter's ThesisArchitecture and Implementation of Apache Lucene: Marter's Thesis
Architecture and Implementation of Apache Lucene: Marter's Thesis
Josiane Gamgo
 
Analytics in olap with lucene & hadoop
Analytics in olap with lucene & hadoopAnalytics in olap with lucene & hadoop
Analytics in olap with lucene & hadoop
lucenerevolution
 
Search Lucene
Search LuceneSearch Lucene
Search Lucene
Jeremy Coates
 
Devinsampa nginx-scripting
Devinsampa nginx-scriptingDevinsampa nginx-scripting
Devinsampa nginx-scripting
Tony Fabeen
 

Andere mochten auch (20)

Portable Lucene Index Format & Applications - Andrzej Bialecki
Portable Lucene Index Format & Applications - Andrzej BialeckiPortable Lucene Index Format & Applications - Andrzej Bialecki
Portable Lucene Index Format & Applications - Andrzej Bialecki
 
Finite State Queries In Lucene
Finite State Queries In LuceneFinite State Queries In Lucene
Finite State Queries In Lucene
 
Lucene and MySQL
Lucene and MySQLLucene and MySQL
Lucene and MySQL
 
Lucandra
LucandraLucandra
Lucandra
 
Berlin Buzzwords 2013 - How does lucene store your data?
Berlin Buzzwords 2013 - How does lucene store your data?Berlin Buzzwords 2013 - How does lucene store your data?
Berlin Buzzwords 2013 - How does lucene store your data?
 
Architecture and Implementation of Apache Lucene: Marter's Thesis
Architecture and Implementation of Apache Lucene: Marter's ThesisArchitecture and Implementation of Apache Lucene: Marter's Thesis
Architecture and Implementation of Apache Lucene: Marter's Thesis
 
Lucene Introduction
Lucene IntroductionLucene Introduction
Lucene Introduction
 
Lucene basics
Lucene basicsLucene basics
Lucene basics
 
Introduction to Elasticsearch with basics of Lucene
Introduction to Elasticsearch with basics of LuceneIntroduction to Elasticsearch with basics of Lucene
Introduction to Elasticsearch with basics of Lucene
 
What is in a Lucene index?
What is in a Lucene index?What is in a Lucene index?
What is in a Lucene index?
 
Lucene And Solr Intro
Lucene And Solr IntroLucene And Solr Intro
Lucene And Solr Intro
 
Introduction to Lucene and Solr - 1
Introduction to Lucene and Solr - 1Introduction to Lucene and Solr - 1
Introduction to Lucene and Solr - 1
 
Apache lucene
Apache luceneApache lucene
Apache lucene
 
Analytics in olap with lucene & hadoop
Analytics in olap with lucene & hadoopAnalytics in olap with lucene & hadoop
Analytics in olap with lucene & hadoop
 
Solr
SolrSolr
Solr
 
Architecture and implementation of Apache Lucene
Architecture and implementation of Apache LuceneArchitecture and implementation of Apache Lucene
Architecture and implementation of Apache Lucene
 
Introduction To Apache Lucene
Introduction To Apache LuceneIntroduction To Apache Lucene
Introduction To Apache Lucene
 
Search Lucene
Search LuceneSearch Lucene
Search Lucene
 
Beyond full-text searches with Lucene and Solr
Beyond full-text searches with Lucene and SolrBeyond full-text searches with Lucene and Solr
Beyond full-text searches with Lucene and Solr
 
Devinsampa nginx-scripting
Devinsampa nginx-scriptingDevinsampa nginx-scripting
Devinsampa nginx-scripting
 

Ähnlich wie Lucene

Building a horizontally scalable API in php
Building a horizontally scalable API in phpBuilding a horizontally scalable API in php
Building a horizontally scalable API in php
Wade Womersley
 
The Joy of Smartmatch
The Joy of SmartmatchThe Joy of Smartmatch
The Joy of Smartmatch
Andrew Shitov
 
ApacheCon: Abdera A Java Atom Pub Implementation
ApacheCon: Abdera A Java Atom Pub ImplementationApacheCon: Abdera A Java Atom Pub Implementation
ApacheCon: Abdera A Java Atom Pub Implementation
David Calavera
 

Ähnlich wie Lucene (20)

Java Boilerplate Busters
Java Boilerplate BustersJava Boilerplate Busters
Java Boilerplate Busters
 
Building a horizontally scalable API in php
Building a horizontally scalable API in phpBuilding a horizontally scalable API in php
Building a horizontally scalable API in php
 
Ajax
AjaxAjax
Ajax
 
Java Boilerplate Busters
Java Boilerplate BustersJava Boilerplate Busters
Java Boilerplate Busters
 
PHP tips and tricks
PHP tips and tricks PHP tips and tricks
PHP tips and tricks
 
Stop Making Excuses and Start Testing Your JavaScript
Stop Making Excuses and Start Testing Your JavaScriptStop Making Excuses and Start Testing Your JavaScript
Stop Making Excuses and Start Testing Your JavaScript
 
Productive Programming in Groovy
Productive Programming in GroovyProductive Programming in Groovy
Productive Programming in Groovy
 
What is new in Java 8
What is new in Java 8What is new in Java 8
What is new in Java 8
 
Surfacing External Data Through Magnolia
Surfacing External Data Through MagnoliaSurfacing External Data Through Magnolia
Surfacing External Data Through Magnolia
 
Scala for the doubters. Максим Клыга
Scala for the doubters. Максим КлыгаScala for the doubters. Максим Клыга
Scala for the doubters. Максим Клыга
 
Introduccion a Jasmin
Introduccion a JasminIntroduccion a Jasmin
Introduccion a Jasmin
 
The Joy of Smartmatch
The Joy of SmartmatchThe Joy of Smartmatch
The Joy of Smartmatch
 
ApacheCon: Abdera A Java Atom Pub Implementation
ApacheCon: Abdera A Java Atom Pub ImplementationApacheCon: Abdera A Java Atom Pub Implementation
ApacheCon: Abdera A Java Atom Pub Implementation
 
Cascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUGCascading Through Hadoop for the Boulder JUG
Cascading Through Hadoop for the Boulder JUG
 
Temporary Cache Assistance (Transients API): WordCamp Birmingham 2014
Temporary Cache Assistance (Transients API): WordCamp Birmingham 2014Temporary Cache Assistance (Transients API): WordCamp Birmingham 2014
Temporary Cache Assistance (Transients API): WordCamp Birmingham 2014
 
20220112 sac v1
20220112 sac v120220112 sac v1
20220112 sac v1
 
Ast transformations
Ast transformationsAst transformations
Ast transformations
 
{:from => 'Java', :to => 'Ruby'}
{:from => 'Java', :to => 'Ruby'}{:from => 'Java', :to => 'Ruby'}
{:from => 'Java', :to => 'Ruby'}
 
Drupal7 dbtng
Drupal7  dbtngDrupal7  dbtng
Drupal7 dbtng
 
Regular expressions, Session and Cookies by Dr.C.R.Dhivyaa Kongu Engineering ...
Regular expressions, Session and Cookies by Dr.C.R.Dhivyaa Kongu Engineering ...Regular expressions, Session and Cookies by Dr.C.R.Dhivyaa Kongu Engineering ...
Regular expressions, Session and Cookies by Dr.C.R.Dhivyaa Kongu Engineering ...
 

Mehr von Matt Wood

Mehr von Matt Wood (12)

Genomics in the Cloud
Genomics in the CloudGenomics in the Cloud
Genomics in the Cloud
 
How to make Friendfeeds and influence people
How to make Friendfeeds and influence peopleHow to make Friendfeeds and influence people
How to make Friendfeeds and influence people
 
Genomes On Rails
Genomes On RailsGenomes On Rails
Genomes On Rails
 
Genomes On Rails
Genomes On RailsGenomes On Rails
Genomes On Rails
 
Into The Wonderful
Into The WonderfulInto The Wonderful
Into The Wonderful
 
Extreme Informatics
Extreme InformaticsExtreme Informatics
Extreme Informatics
 
What can Bioinformaticians learn from YouTube?
What can Bioinformaticians learn from YouTube?What can Bioinformaticians learn from YouTube?
What can Bioinformaticians learn from YouTube?
 
The A to Z of developing for the web
The A to Z of developing for the webThe A to Z of developing for the web
The A to Z of developing for the web
 
Introduction to Scrum
Introduction to ScrumIntroduction to Scrum
Introduction to Scrum
 
30 Minutes With Rails
30 Minutes With Rails30 Minutes With Rails
30 Minutes With Rails
 
Subversion Best Practices
Subversion Best PracticesSubversion Best Practices
Subversion Best Practices
 
Introduction to the Semantic Web
Introduction to the Semantic WebIntroduction to the Semantic Web
Introduction to the Semantic Web
 

Kürzlich hochgeladen

Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Safe Software
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Safe Software
 

Kürzlich hochgeladen (20)

Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdfRising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
Rising Above_ Dubai Floods and the Fortitude of Dubai International Airport.pdf
 
Corporate and higher education May webinar.pptx
Corporate and higher education May webinar.pptxCorporate and higher education May webinar.pptx
Corporate and higher education May webinar.pptx
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
 
Platformless Horizons for Digital Adaptability
Platformless Horizons for Digital AdaptabilityPlatformless Horizons for Digital Adaptability
Platformless Horizons for Digital Adaptability
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
 
DEV meet-up UiPath Document Understanding May 7 2024 Amsterdam
DEV meet-up UiPath Document Understanding May 7 2024 AmsterdamDEV meet-up UiPath Document Understanding May 7 2024 Amsterdam
DEV meet-up UiPath Document Understanding May 7 2024 Amsterdam
 
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
 
MS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectorsMS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectors
 
Elevate Developer Efficiency & build GenAI Application with Amazon Q​
Elevate Developer Efficiency & build GenAI Application with Amazon Q​Elevate Developer Efficiency & build GenAI Application with Amazon Q​
Elevate Developer Efficiency & build GenAI Application with Amazon Q​
 
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
 
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, AdobeApidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
 
Exploring Multimodal Embeddings with Milvus
Exploring Multimodal Embeddings with MilvusExploring Multimodal Embeddings with Milvus
Exploring Multimodal Embeddings with Milvus
 
FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024
 
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
 
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot ModelMcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
 
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ..."I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
 
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
 
Vector Search -An Introduction in Oracle Database 23ai.pptx
Vector Search -An Introduction in Oracle Database 23ai.pptxVector Search -An Introduction in Oracle Database 23ai.pptx
Vector Search -An Introduction in Oracle Database 23ai.pptx
 
Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)
 
How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected Worker
 

Lucene

  • 1.
  • 2. Open source indexing and search engine
  • 4. Lucene Inverted index
  • 5. Lucene Inverted index Results
  • 6. Lucene Inverted index Servlet container J2EE application server
  • 9. my @gene_names = (); push(@gene_names, $gene); print @gene_names; Perl Java Array gene_names = new Array(); gene_names.add(gene); System.out.println(gene_names.toString)
  • 10. my $gene = Gene->new(‘ENS12345’); $gene->set_name(‘BRCA2’); Perl Java Gene gene = new Gene(‘ENS12345’); gene.set_name(‘BRCA2’);
  • 12. my $number = “100”; $number = $number + 400 print $number; Perl Java Integer number = new Integer(100); number = number + 400; System.out.println(number + 400);
  • 13. Java is good at error handling
  • 14. eval ($gene->transform); warn $@ if $@; Perl Java try { gene->transform } catch (IOException e) { e.printStackTrace; }
  • 15. Java is surprisingly easy to learn
  • 16. Conditionals and loops Variables have scope Extras from CPAN Performance is important Perl Java Conditionals and loops Variables have scope Extras available as JAR files Performance is important
  • 17. Recipe 1: Indexing a collection of documents
  • 19. public static void main(String[] args) {         HashMap<String, String> arguments = new HashMap<String, String>();         String key = null;         for (String s: args) {            if (key == null) {                key = s;            } else {                arguments.put(key, s);                key = null;            }         }         Writer writer = new Writer();         writer.setIndexLocation(arguments.get(quot;-indexquot;));         writer.setInputLocation(arguments.get(quot;-inputquot;));         if (arguments.get(quot;-mergefactorquot;) != null) { writer.setMergeFactor(Integer.valueOf(arguments.get(quot;-mergefactorquot;)));     }         if (arguments.get(quot;-maxmergedocsquot;) != null) { writer.setMaxMergeDocs(Integer.valueOf(arguments.get(quot;-maxmergedocsquot;)));     }         try {             writer.index();         } catch (IOException e) {             e.printStackTrace();         }         System.out.println(quot;Indexing completequot;);   }
  • 20. public static void main(String[] args) {         HashMap<String, String> arguments = new HashMap<String, String>();         String key = null;         for (String s: args) {            if (key == null) {                key = s;            } else {                arguments.put(key, s);                key = null;            }         }         Writer writer = new Writer();         writer.setIndexLocation(arguments.get(quot;-indexquot;));         writer.setInputLocation(arguments.get(quot;-inputquot;));         if (arguments.get(quot;-mergefactorquot;) != null) { writer.setMergeFactor(Integer.valueOf(arguments.get(quot;-mergefactorquot;)));     }         if (arguments.get(quot;-maxmergedocsquot;) != null) { writer.setMaxMergeDocs(Integer.valueOf(arguments.get(quot;-maxmergedocsquot;)));     }         try {             writer.index();         } catch (IOException e) {             e.printStackTrace();         }         System.out.println(quot;Indexing completequot;);   }
  • 21. public static void main(String[] args) {         HashMap<String, String> arguments = new HashMap<String, String>();         String key = null;         for (String s: args) {            if (key == null) {                key = s;            } else {                arguments.put(key, s);                key = null;            }         }         Writer writer = new Writer();         writer.setIndexLocation(arguments.get(quot;-indexquot;));         writer.setInputLocation(arguments.get(quot;-inputquot;));         if (arguments.get(quot;-mergefactorquot;) != null) { writer.setMergeFactor(Integer.valueOf(arguments.get(quot;-mergefactorquot;)));     }         if (arguments.get(quot;-maxmergedocsquot;) != null) { writer.setMaxMergeDocs(Integer.valueOf(arguments.get(quot;-maxmergedocsquot;)));     }         try {             writer.index();         } catch (IOException e) {             e.printStackTrace();         }         System.out.println(quot;Indexing completequot;);   }
  • 22. Max-merge-docs how many documents are added to a segment
  • 23. Merge-factor how often Lucene merges index segments when adding documents
  • 24. public static void main(String[] args) {         HashMap<String, String> arguments = new HashMap<String, String>();         String key = null;         for (String s: args) {            if (key == null) {                key = s;            } else {                arguments.put(key, s);                key = null;            }         }         Writer writer = new Writer();         writer.setIndexLocation(arguments.get(quot;-indexquot;));         writer.setInputLocation(arguments.get(quot;-inputquot;));         if (arguments.get(quot;-mergefactorquot;) != null) { writer.setMergeFactor(Integer.valueOf(arguments.get(quot;-mergefactorquot;)));     }         if (arguments.get(quot;-maxmergedocsquot;) != null) { writer.setMaxMergeDocs(Integer.valueOf(arguments.get(quot;-maxmergedocsquot;)));     }         try {             writer.index();         } catch (IOException e) {             e.printStackTrace();         }         System.out.println(quot;Indexing completequot;);   }
  • 25. public void index() throws IOException {         File index = new File(getIndexLocation());         File location = new File(getInputLocation());         IndexWriter writer = new IndexWriter(index, new StandardAnalyzer(), true);         writer.setMergeFactor(getMergeFactor());         writer.setMaxMergeDocs(getMaxMergeDocs());         indexDocuments(writer, location);         writer.optimize();         writer.close();   }     private static void indexDocuments(IndexWriter writer, Filelocation) throws IOException {         if (location.canRead()) {           if (location.isDirectory()) {             String[] files = location.list();             if (files != null) {               for (int i = 0; i < files.length; i++) {                 indexDocuments(writer, new File(location, files[i]));            }           }           } else {             System.out.println(quot;Indexing  quot; + location);             try {                 GeneFileDocument.index(writer, location);           }             catch (FileNotFoundException e) {               System.out.println(quot;Caught exception: quot; + e);           }          }         }     }
  • 26. public void index() throws IOException {         File index = new File(getIndexLocation());         File location = new File(getInputLocation());         IndexWriter writer = new IndexWriter(index, new StandardAnalyzer(), true);         writer.setMergeFactor(getMergeFactor());         writer.setMaxMergeDocs(getMaxMergeDocs());         indexDocuments(writer, location);         writer.optimize();         writer.close();   }     private static void indexDocuments(IndexWriter writer, Filelocation) throws IOException {         if (location.canRead()) {           if (location.isDirectory()) {             String[] files = location.list();             if (files != null) {               for (int i = 0; i < files.length; i++) {                 indexDocuments(writer, new File(location, files[i]));            }           }           } else {             System.out.println(quot;Indexing  quot; + location);             try {                 GeneFileDocument.index(writer, location);           }             catch (FileNotFoundException e) {               System.out.println(quot;Caught exception: quot; + e);           }          }         }     }
  • 27. public void index() throws IOException {         File index = new File(getIndexLocation());         File location = new File(getInputLocation());         IndexWriter writer = new IndexWriter(index, new StandardAnalyzer(), true);         writer.setMergeFactor(getMergeFactor());         writer.setMaxMergeDocs(getMaxMergeDocs());         indexDocuments(writer, location);         writer.optimize();         writer.close();   }     private static void indexDocuments(IndexWriter writer, Filelocation) throws IOException {         if (location.canRead()) {           if (location.isDirectory()) {             String[] files = location.list();             if (files != null) {               for (int i = 0; i < files.length; i++) {                 indexDocuments(writer, new File(location, files[i]));            }           }           } else {             System.out.println(quot;Indexing  quot; + location);             try {                 GeneFileDocument.index(writer, location);           }             catch (FileNotFoundException e) {               System.out.println(quot;Caught exception: quot; + e);           }          }         }     }
  • 28. public void index() throws IOException {         File index = new File(getIndexLocation());         File location = new File(getInputLocation());         IndexWriter writer = new IndexWriter(index, new StandardAnalyzer(), true);         writer.setMergeFactor(getMergeFactor());         writer.setMaxMergeDocs(getMaxMergeDocs());         indexDocuments(writer, location);         writer.optimize();         writer.close();   }     private static void indexDocuments(IndexWriter writer, Filelocation) throws IOException {         if (location.canRead()) {           if (location.isDirectory()) {             String[] files = location.list();             if (files != null) {               for (int i = 0; i < files.length; i++) {                 indexDocuments(writer, new File(location, files[i]));            }           }           } else {             System.out.println(quot;Indexing  quot; + location);             try {                 GeneFileDocument.index(writer, location);           }             catch (FileNotFoundException e) {               System.out.println(quot;Caught exception: quot; + e);           }          }         }     }
  • 30.     public static void index(IndexWriter writer, File f) throws IOException {         String fields[] = {quot;subtypequot;, quot;idquot;, quot;urlquot;, quot;keywordsquot;, quot;descriptionquot;};         FileReader input = new FileReader(f);         BufferedReader bufRead = new BufferedReader(input);         String line;         line = bufRead.readLine();         while (line != null){              Document doc = new Document();              int count = 0;              String terms[] = line.split(quot;tquot;);              while (count < terms.length) {                  String field = fields[count];                  String item = terms[count];                  doc.add(new Field(field, item, Field.Store.YES, Field.Index.TOKENIZED));                  count++;              }              writer.addDocument(doc);              line = bufRead.readLine();          }   }
  • 31.     public static void index(IndexWriter writer, File f) throws IOException {         String fields[] = {quot;subtypequot;, quot;idquot;, quot;urlquot;, quot;keywordsquot;, quot;descriptionquot;};         FileReader input = new FileReader(f);         BufferedReader bufRead = new BufferedReader(input);         String line;         line = bufRead.readLine();         while (line != null){              Document doc = new Document();              int count = 0;              String terms[] = line.split(quot;tquot;);              while (count < terms.length) {                  String field = fields[count];                  String item = terms[count];                  doc.add(new Field(field, item, Field.Store.YES, Field.Index.TOKENIZED));                  count++;              }              writer.addDocument(doc);              line = bufRead.readLine();          }   }
  • 32.     public static void index(IndexWriter writer, File f) throws IOException {         String fields[] = {quot;subtypequot;, quot;idquot;, quot;urlquot;, quot;keywordsquot;, quot;descriptionquot;};         FileReader input = new FileReader(f);         BufferedReader bufRead = new BufferedReader(input);         String line;         line = bufRead.readLine();         while (line != null){              Document doc = new Document();              int count = 0;              String terms[] = line.split(quot;tquot;);              while (count < terms.length) {                  String field = fields[count];                  String item = terms[count];                  doc.add(new Field(field, item, Field.Store.YES, Field.Index.TOKENIZED));                  count++;              }              writer.addDocument(doc);              line = bufRead.readLine();          }   }
  • 33. Quite a lot of memory ~1.5Gb
  • 35. Merge indices to form master search index
  • 37. Easy
  • 39. public static void main(String args[]) {         Timer timer = new Timer();         String index = quot;indexquot;;         try {             timer.start();             Searcher searcher = new IndexSearcher(index);             timer.stop();             System.out.println(quot;Loaded quot; + searcher.maxDoc() + quot; documents in quot; + timer.elapsed() + quot;msquot;);            search(searcher, quot;subtypequot;, quot;Vega_havana processed_pseudogene Genequot;);             search(searcher, quot;idquot;, quot;OTTHUMG00000000423quot;);             searcher.close();         } catch (Exception e) {             e.printStackTrace();     }   }
  • 40. public static void main(String args[]) {         Timer timer = new Timer();         String index = quot;indexquot;;         try {             timer.start();             Searcher searcher = new IndexSearcher(index);             timer.stop();             System.out.println(quot;Loaded quot; + searcher.maxDoc() + quot; documents in quot; + timer.elapsed() + quot;msquot;);            search(searcher, quot;subtypequot;, quot;Vega_havana processed_pseudogene Genequot;);             search(searcher, quot;idquot;, quot;OTTHUMG00000000423quot;);             searcher.close();         } catch (Exception e) {             e.printStackTrace();     }   }
  • 41. public static void main(String args[]) {         Timer timer = new Timer();         String index = quot;indexquot;;         try {             timer.start();             Searcher searcher = new IndexSearcher(index);             timer.stop();             System.out.println(quot;Loaded quot; + searcher.maxDoc() + quot; documents in quot; + timer.elapsed() + quot;msquot;);            search(searcher, quot;subtypequot;, quot;Vega_havana processed_pseudogene Genequot;);             search(searcher, quot;idquot;, quot;OTTHUMG00000000423quot;);             searcher.close();         } catch (Exception e) {             e.printStackTrace();     }   }
  • 42. public static void main(String args[]) {         Timer timer = new Timer();         String index = quot;indexquot;;         try {             timer.start();             Searcher searcher = new IndexSearcher(index);             timer.stop();             System.out.println(quot;Loaded quot; + searcher.maxDoc() + quot; documents in quot; + timer.elapsed() + quot;msquot;);            search(searcher, quot;subtypequot;, quot;Vega_havana processed_pseudogene Genequot;);             search(searcher, quot;idquot;, quot;OTTHUMG00000000423quot;);             searcher.close();         } catch (Exception e) {             e.printStackTrace();     }   }
  • 43.     private static void search(Searcher searcher, String field, String queryString) throws ParseException, IOException {         Timer timer = new Timer();         timer.start();         System.out.println(quot;Search (quot; + field + quot;): quot; + queryString);         QueryParser parser = new QueryParser(field, new StandardAnalyzer());         Query query = parser.parse(queryString);         Hits hits = searcher.search(query);         Integer count = 1;         Iterator<Hit> hiterator = hits.iterator();         while (hiterator.hasNext()) {             Hit hit = hiterator.next();             Document document = hit.getDocument();             System.out.println(count + quot;: ID: quot; + document.get(quot;idquot;));             System.out.println(count + quot;: Subtype: quot; + document.get(quot;subtypequot;));             count++;         }         int hitCount = hits.length();         timer.stop();         System.out.println(quot;Hits: quot; + hitCount);         System.out.println(quot;Completed in quot; + timer.elapsed() + quot;msquot;);
  • 44.     private static void search(Searcher searcher, String field, String queryString) throws ParseException, IOException {         Timer timer = new Timer();         timer.start();         System.out.println(quot;Search (quot; + field + quot;): quot; + queryString);         QueryParser parser = new QueryParser(field, new StandardAnalyzer());         Query query = parser.parse(queryString);         Hits hits = searcher.search(query);         Integer count = 1;         Iterator<Hit> hiterator = hits.iterator();         while (hiterator.hasNext()) {             Hit hit = hiterator.next();             Document document = hit.getDocument();             System.out.println(count + quot;: ID: quot; + document.get(quot;idquot;));             System.out.println(count + quot;: Subtype: quot; + document.get(quot;subtypequot;));             count++;         }         int hitCount = hits.length();         timer.stop();         System.out.println(quot;Hits: quot; + hitCount);         System.out.println(quot;Completed in quot; + timer.elapsed() + quot;msquot;);
  • 45.     private static void search(Searcher searcher, String field, String queryString) throws ParseException, IOException {         Timer timer = new Timer();         timer.start();         System.out.println(quot;Search (quot; + field + quot;): quot; + queryString);         QueryParser parser = new QueryParser(field, new StandardAnalyzer());         Query query = parser.parse(queryString);         Hits hits = searcher.search(query);         Integer count = 1;         Iterator<Hit> hiterator = hits.iterator();         while (hiterator.hasNext()) {             Hit hit = hiterator.next();             Document document = hit.getDocument();             System.out.println(count + quot;: ID: quot; + document.get(quot;idquot;));             System.out.println(count + quot;: Subtype: quot; + document.get(quot;subtypequot;));             count++;         }         int hitCount = hits.length();         timer.stop();         System.out.println(quot;Hits: quot; + hitCount);         System.out.println(quot;Completed in quot; + timer.elapsed() + quot;msquot;);
  • 46.     private static void search(Searcher searcher, String field, String queryString) throws ParseException, IOException {         Timer timer = new Timer();         timer.start();         System.out.println(quot;Search (quot; + field + quot;): quot; + queryString);         QueryParser parser = new QueryParser(field, new StandardAnalyzer());         Query query = parser.parse(queryString);         Hits hits = searcher.search(query);         Integer count = 1;         Iterator<Hit> hiterator = hits.iterator();         while (hiterator.hasNext()) {             Hit hit = hiterator.next();             Document document = hit.getDocument();             System.out.println(count + quot;: ID: quot; + document.get(quot;idquot;));             System.out.println(count + quot;: Subtype: quot; + document.get(quot;subtypequot;));             count++;         }         int hitCount = hits.length();         timer.stop();         System.out.println(quot;Hits: quot; + hitCount);         System.out.println(quot;Completed in quot; + timer.elapsed() + quot;msquot;);
  • 47.     private static void search(Searcher searcher, String field, String queryString) throws ParseException, IOException {         Timer timer = new Timer();         timer.start();         System.out.println(quot;Search (quot; + field + quot;): quot; + queryString);         QueryParser parser = new QueryParser(field, new StandardAnalyzer());         Query query = parser.parse(queryString);         Hits hits = searcher.search(query);         Integer count = 1;         Iterator<Hit> hiterator = hits.iterator();         while (hiterator.hasNext()) {             Hit hit = hiterator.next();             Document document = hit.getDocument();             System.out.println(count + quot;: ID: quot; + document.get(quot;idquot;));             System.out.println(count + quot;: Subtype: quot; + document.get(quot;subtypequot;));             count++;         }         int hitCount = hits.length();         timer.stop();         System.out.println(quot;Hits: quot; + hitCount);         System.out.println(quot;Completed in quot; + timer.elapsed() + quot;msquot;);
  • 48.     private static void search(Searcher searcher, String field, String queryString) throws ParseException, IOException {         Timer timer = new Timer();         timer.start();         System.out.println(quot;Search (quot; + field + quot;): quot; + queryString);         QueryParser parser = new QueryParser(field, new StandardAnalyzer());         Query query = parser.parse(queryString);         Hits hits = searcher.search(query);         Integer count = 1;         Iterator<Hit> hiterator = hits.iterator();         while (hiterator.hasNext()) {             Hit hit = hiterator.next();             Document document = hit.getDocument();             System.out.println(count + quot;: ID: quot; + document.get(quot;idquot;));             System.out.println(count + quot;: Subtype: quot; + document.get(quot;subtypequot;));             count++;         }         int hitCount = hits.length();         timer.stop();         System.out.println(quot;Hits: quot; + hitCount);         System.out.println(quot;Completed in quot; + timer.elapsed() + quot;msquot;);
  • 49. Recipe 3: Querying a remote document index
  • 50. Wrap everything into a single file
  • 51. Copy that file to an application server
  • 64. clean code clean index compile build index build jar build war deploy
  • 65. Could this work for Ensembl?
  • 67.
  • 68. Java IDEs rock: get stuck in