SlideShare ist ein Scribd-Unternehmen logo
1 von 24
Lazy Multigram Learning Environment for ACRS SISY 2008 Zoran Popović shoom013[at] gmail.com Institute for Multidisciplinary Research Belgrade University
Automated Content Recommendation Systems (ACRS) and Information Retrieval ,[object Object],[object Object],[object Object],[object Object],[object Object]
k-Nearest Neighbour Method (kNN) ,[object Object]
k-Nearest Neighbour Method (kNN) ,[object Object],[object Object],[object Object],[object Object],[object Object]
SVM classification ,[object Object]
SVM classification ,[object Object],[object Object],[object Object],[object Object],[object Object]
N-grams ,[object Object],[object Object],[object Object],[object Object]
N-grams ,[object Object],[object Object],[object Object],[object Object]
ngram.jar – generating N-grams java ngram.generator.Arff inDir outfile.arff [options] Options: -l <Lmin>  =  lower rank bound (default=1) -m <Lmax>  =  upper rank bound (default=10) -i <invf>  =  inverse frequency threshold (default=0.34) -N <N>  =  N-gram order (default=3) -D <depth> =  biggest number of N-grams (default=4294967295) -w <url> =  use database with jdbc url to write data -r <url> =  use database with jdbc url to read arff -u =  do not use normalized vectors for output EXAMPLE:   .  Arff.sh . .ut.arff -l 1 -m 500 -N 4 -i 0.5 -D 1048576     (subfolders as category names)
ngram.jar – JDBC storage ,[object Object],[object Object],[object Object],[object Object]
Results with N-grams ,[object Object],[object Object]
Some good indications about performance with multigrams ,[object Object],[object Object],[object Object]
Weka, Data Mining Tool – ARFF (Attribute-Relation File Format)
Example of multi-instance ARFF file with sparse data ,[object Object]
Weka's SVM MI SMO classifier
Weka – JDBC Horizontal form of data is needed - all attributes in each row ARFF supports data given by sparse vectors (zero values omitted – this also speeds up SVM)
<WEKA_HOME>/DatabaseUtils.props: ... jdbcDriver=...org.gjt.mm.mysql.Driver,oracle.jdbc.driver.OracleDriver ... CHAR=0 ... VARCHAR=0 VARCHAR2=0 ... NUMBER=7 .... Weka – JDBC
[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],Weka command line
SQL and vertical N-gram storage PROFILES:  NSHARED NGRAMS:  TSHARED
SQL and data transformation ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
SQL and data transformation Data2.sql – cursor function for a query returning horizontal form of data: create or replace function fewcols(p_lmin number, p_lmax number, p_invf number, p_bagid number, p_norm boolean default true) return sys_refcursor is str varchar2(32000); cat varchar2(64) := null; opt  sys_refcursor; j number; n number; norm number := 1; cursor C(lmin number, lmax number, invf number, bagid number) is select T.rank-1 rank, TS.count count, TS.bag_id, TS.category, TS.N from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram  from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid order by TS.bag_id asc, TS.N asc, T.rank asc; cursor CS(lmin number, lmax number, invf number, bagid number) is select sqrt(sum((T.rank-1)*(T.rank-1))) norm from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram  from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid; begin str := to_char(p_bagid)||' bag_id'; j:=p_lmin; if p_norm then open CS(p_lmin,p_lmax,p_invf,p_bagid); fetch CS into norm; if norm=0 then norm:=1; end if; close CS; end if; for i in C(p_lmin,p_lmax,p_invf,p_bagid) loop if cat is null then cat := i.category; end if; if j<i.rank then for n in j .. i.rank-1 loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ' || to_char(i.count/norm,'9999.99999999') || ' A' || to_char(i.rank); j := i.rank+1; end loop; if cat is null or j<=p_lmax then select distinct category into cat from tshared where bag_id=p_bagid; for n in j .. p_lmax loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ''' || cat || ''' category'; open opt for 'select ' || str || ' from dual'; return opt; end; /
SQL and data transformation Data3.sql – procedure generating table DATA3 in horizontal form: create or replace procedure data(p_lmin number, p_lmax number, p_invf number, p_norm boolean default true) is str varchar2(32000); cat varchar2(64) := null; first boolean := true; p_bagid number; j number; n number; norm number := 1; cursor C(lmin number, lmax number, invf number, bagid number) is select T.rank-1 rank, TS.count count, TS.bag_id, TS.category, TS.N from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram  from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid order by TS.bag_id asc, TS.N asc, T.rank asc; cursor CS(lmin number, lmax number, invf number, bagid number) is select sqrt(sum((T.rank-1)*(T.rank-1))) norm from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram  from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid; cursor B IS select distinct bag_id from tshared; begin open B; loop fetch B into p_bagid; exit when B%NOTFOUND; str := to_char(p_bagid)||' bag_id'; j:=p_lmin; if p_norm then open CS(p_lmin,p_lmax,p_invf,p_bagid); fetch CS into norm; if norm=0 then norm:=1; end if; close CS; end if; for i in C(p_lmin,p_lmax,p_invf,p_bagid) loop if cat is null then cat := i.category; end if; if j<i.rank then for n in j .. i.rank-1 loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ' || to_char(i.count/norm,'9999.99999999') || ' A' || to_char(i.rank); j := i.rank+1; end loop; if cat is null or j<=p_lmax then select distinct category into cat from tshared where bag_id=p_bagid; for n in j .. p_lmax loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ''' || cat || ''' category'; if first then first := false; begin execute immediate 'drop table data3'; exception when others then null; end; execute immediate 'create table data3 as select ' || str || ' from dual'; else execute immediate 'insert into data3 select ' || str || ' from dual'; end if; end loop; commit; end; /
Conclusions so far ... ,[object Object],[object Object],[object Object]
? Questions ...

Weitere ähnliche Inhalte

Ähnlich wie SISY 2008

MLlib sparkmeetup_8_6_13_final_reduced
MLlib sparkmeetup_8_6_13_final_reducedMLlib sparkmeetup_8_6_13_final_reduced
MLlib sparkmeetup_8_6_13_final_reducedChao Chen
 
Lab 2: Classification and Regression Prediction Models, training and testing ...
Lab 2: Classification and Regression Prediction Models, training and testing ...Lab 2: Classification and Regression Prediction Models, training and testing ...
Lab 2: Classification and Regression Prediction Models, training and testing ...Yao Yao
 
Machine learning using spark
Machine learning using sparkMachine learning using spark
Machine learning using sparkRan Silberman
 
Workshop NGS data analysis - 2
Workshop NGS data analysis - 2Workshop NGS data analysis - 2
Workshop NGS data analysis - 2Maté Ongenaert
 
Automate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scala
Automate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scalaAutomate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scala
Automate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scalaChetan Khatri
 
Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...
Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...
Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...Yao Yao
 
Machinelearning Spark Hadoop User Group Munich Meetup 2016
Machinelearning Spark Hadoop User Group Munich Meetup 2016Machinelearning Spark Hadoop User Group Munich Meetup 2016
Machinelearning Spark Hadoop User Group Munich Meetup 2016Comsysto Reply GmbH
 
Spark ml streaming
Spark ml streamingSpark ml streaming
Spark ml streamingAdam Doyle
 
No more struggles with Apache Spark workloads in production
No more struggles with Apache Spark workloads in productionNo more struggles with Apache Spark workloads in production
No more struggles with Apache Spark workloads in productionChetan Khatri
 
PPT on Data Science Using Python
PPT on Data Science Using PythonPPT on Data Science Using Python
PPT on Data Science Using PythonNishantKumar1179
 
Python-for-Data-Analysis.pptx
Python-for-Data-Analysis.pptxPython-for-Data-Analysis.pptx
Python-for-Data-Analysis.pptxParveenShaik21
 
2014.06.24.what is ubix
2014.06.24.what is ubix2014.06.24.what is ubix
2014.06.24.what is ubixJim Cooley
 
Snmp class
Snmp classSnmp class
Snmp classaduitsis
 
Fosdem2017 Scientific computing on Jruby
Fosdem2017  Scientific computing on JrubyFosdem2017  Scientific computing on Jruby
Fosdem2017 Scientific computing on JrubyPrasun Anand
 
Device status anomaly detection
Device status anomaly detectionDevice status anomaly detection
Device status anomaly detectionDavid Tung
 
Spark 4th Meetup Londond - Building a Product with Spark
Spark 4th Meetup Londond - Building a Product with SparkSpark 4th Meetup Londond - Building a Product with Spark
Spark 4th Meetup Londond - Building a Product with Sparksamthemonad
 
AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...
AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...
AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...GeeksLab Odessa
 

Ähnlich wie SISY 2008 (20)

Lk module3
Lk module3Lk module3
Lk module3
 
MLlib sparkmeetup_8_6_13_final_reduced
MLlib sparkmeetup_8_6_13_final_reducedMLlib sparkmeetup_8_6_13_final_reduced
MLlib sparkmeetup_8_6_13_final_reduced
 
interenship.pptx
interenship.pptxinterenship.pptx
interenship.pptx
 
Lab 2: Classification and Regression Prediction Models, training and testing ...
Lab 2: Classification and Regression Prediction Models, training and testing ...Lab 2: Classification and Regression Prediction Models, training and testing ...
Lab 2: Classification and Regression Prediction Models, training and testing ...
 
Machine learning using spark
Machine learning using sparkMachine learning using spark
Machine learning using spark
 
Workshop NGS data analysis - 2
Workshop NGS data analysis - 2Workshop NGS data analysis - 2
Workshop NGS data analysis - 2
 
Automate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scala
Automate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scalaAutomate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scala
Automate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scala
 
Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...
Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...
Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...
 
Machinelearning Spark Hadoop User Group Munich Meetup 2016
Machinelearning Spark Hadoop User Group Munich Meetup 2016Machinelearning Spark Hadoop User Group Munich Meetup 2016
Machinelearning Spark Hadoop User Group Munich Meetup 2016
 
Scala in Places API
Scala in Places APIScala in Places API
Scala in Places API
 
Spark ml streaming
Spark ml streamingSpark ml streaming
Spark ml streaming
 
No more struggles with Apache Spark workloads in production
No more struggles with Apache Spark workloads in productionNo more struggles with Apache Spark workloads in production
No more struggles with Apache Spark workloads in production
 
PPT on Data Science Using Python
PPT on Data Science Using PythonPPT on Data Science Using Python
PPT on Data Science Using Python
 
Python-for-Data-Analysis.pptx
Python-for-Data-Analysis.pptxPython-for-Data-Analysis.pptx
Python-for-Data-Analysis.pptx
 
2014.06.24.what is ubix
2014.06.24.what is ubix2014.06.24.what is ubix
2014.06.24.what is ubix
 
Snmp class
Snmp classSnmp class
Snmp class
 
Fosdem2017 Scientific computing on Jruby
Fosdem2017  Scientific computing on JrubyFosdem2017  Scientific computing on Jruby
Fosdem2017 Scientific computing on Jruby
 
Device status anomaly detection
Device status anomaly detectionDevice status anomaly detection
Device status anomaly detection
 
Spark 4th Meetup Londond - Building a Product with Spark
Spark 4th Meetup Londond - Building a Product with SparkSpark 4th Meetup Londond - Building a Product with Spark
Spark 4th Meetup Londond - Building a Product with Spark
 
AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...
AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...
AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...
 

Mehr von Zoran Popovic

Evaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jeziku
Evaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jezikuEvaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jeziku
Evaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jezikuZoran Popovic
 
Veštačka inteligencija 2
Veštačka inteligencija 2Veštačka inteligencija 2
Veštačka inteligencija 2Zoran Popovic
 
Veštačka inteligencija 1
Veštačka inteligencija 1Veštačka inteligencija 1
Veštačka inteligencija 1Zoran Popovic
 
Magistarska teza - prezentacija
Magistarska teza - prezentacijaMagistarska teza - prezentacija
Magistarska teza - prezentacijaZoran Popovic
 
SAP, Linux, Virtualization and ... Itanium
SAP, Linux, Virtualization and ... ItaniumSAP, Linux, Virtualization and ... Itanium
SAP, Linux, Virtualization and ... ItaniumZoran Popovic
 
SSO secure communication flow for web Oracle login
SSO secure communication flow for web Oracle loginSSO secure communication flow for web Oracle login
SSO secure communication flow for web Oracle loginZoran Popovic
 

Mehr von Zoran Popovic (13)

Evaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jeziku
Evaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jezikuEvaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jeziku
Evaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jeziku
 
Veštačka inteligencija 2
Veštačka inteligencija 2Veštačka inteligencija 2
Veštačka inteligencija 2
 
Veštačka inteligencija 1
Veštačka inteligencija 1Veštačka inteligencija 1
Veštačka inteligencija 1
 
Machine Learning
Machine LearningMachine Learning
Machine Learning
 
Ekspertni sistemi
Ekspertni sistemiEkspertni sistemi
Ekspertni sistemi
 
Soft Computing
Soft ComputingSoft Computing
Soft Computing
 
Magistarska teza
Magistarska tezaMagistarska teza
Magistarska teza
 
Magistarska teza - prezentacija
Magistarska teza - prezentacijaMagistarska teza - prezentacija
Magistarska teza - prezentacija
 
Tag
TagTag
Tag
 
SAP, Linux, Virtualization and ... Itanium
SAP, Linux, Virtualization and ... ItaniumSAP, Linux, Virtualization and ... Itanium
SAP, Linux, Virtualization and ... Itanium
 
SSO secure communication flow for web Oracle login
SSO secure communication flow for web Oracle loginSSO secure communication flow for web Oracle login
SSO secure communication flow for web Oracle login
 
Migration to 9i
Migration to 9iMigration to 9i
Migration to 9i
 
ETRAN 2008
ETRAN 2008ETRAN 2008
ETRAN 2008
 

Kürzlich hochgeladen

Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...Zilliz
 
A Beginners Guide to Building a RAG App Using Open Source Milvus
A Beginners Guide to Building a RAG App Using Open Source MilvusA Beginners Guide to Building a RAG App Using Open Source Milvus
A Beginners Guide to Building a RAG App Using Open Source MilvusZilliz
 
Exploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone ProcessorsExploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone Processorsdebabhi2
 
AWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of TerraformAWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of TerraformAndrey Devyatkin
 
Ransomware_Q4_2023. The report. [EN].pdf
Ransomware_Q4_2023. The report. [EN].pdfRansomware_Q4_2023. The report. [EN].pdf
Ransomware_Q4_2023. The report. [EN].pdfOverkill Security
 
A Year of the Servo Reboot: Where Are We Now?
A Year of the Servo Reboot: Where Are We Now?A Year of the Servo Reboot: Where Are We Now?
A Year of the Servo Reboot: Where Are We Now?Igalia
 
MS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectorsMS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectorsNanddeep Nachan
 
How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerThousandEyes
 
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...apidays
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MIND CTI
 
Navi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Navi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot ModelNavi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Navi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot ModelDeepika Singh
 
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost SavingRepurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost SavingEdi Saputra
 
presentation ICT roal in 21st century education
presentation ICT roal in 21st century educationpresentation ICT roal in 21st century education
presentation ICT roal in 21st century educationjfdjdjcjdnsjd
 
Apidays Singapore 2024 - Scalable LLM APIs for AI and Generative AI Applicati...
Apidays Singapore 2024 - Scalable LLM APIs for AI and Generative AI Applicati...Apidays Singapore 2024 - Scalable LLM APIs for AI and Generative AI Applicati...
Apidays Singapore 2024 - Scalable LLM APIs for AI and Generative AI Applicati...apidays
 
Apidays Singapore 2024 - Modernizing Securities Finance by Madhu Subbu
Apidays Singapore 2024 - Modernizing Securities Finance by Madhu SubbuApidays Singapore 2024 - Modernizing Securities Finance by Madhu Subbu
Apidays Singapore 2024 - Modernizing Securities Finance by Madhu Subbuapidays
 
2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...Martijn de Jong
 
Data Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonData Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonAnna Loughnan Colquhoun
 
Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024The Digital Insurer
 
FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024The Digital Insurer
 

Kürzlich hochgeladen (20)

Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
 
A Beginners Guide to Building a RAG App Using Open Source Milvus
A Beginners Guide to Building a RAG App Using Open Source MilvusA Beginners Guide to Building a RAG App Using Open Source Milvus
A Beginners Guide to Building a RAG App Using Open Source Milvus
 
Exploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone ProcessorsExploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone Processors
 
AWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of TerraformAWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of Terraform
 
Ransomware_Q4_2023. The report. [EN].pdf
Ransomware_Q4_2023. The report. [EN].pdfRansomware_Q4_2023. The report. [EN].pdf
Ransomware_Q4_2023. The report. [EN].pdf
 
A Year of the Servo Reboot: Where Are We Now?
A Year of the Servo Reboot: Where Are We Now?A Year of the Servo Reboot: Where Are We Now?
A Year of the Servo Reboot: Where Are We Now?
 
MS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectorsMS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectors
 
How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected Worker
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
 
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024
 
Navi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Navi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot ModelNavi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Navi Mumbai Call Girls 🥰 8617370543 Service Offer VIP Hot Model
 
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost SavingRepurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
 
presentation ICT roal in 21st century education
presentation ICT roal in 21st century educationpresentation ICT roal in 21st century education
presentation ICT roal in 21st century education
 
Apidays Singapore 2024 - Scalable LLM APIs for AI and Generative AI Applicati...
Apidays Singapore 2024 - Scalable LLM APIs for AI and Generative AI Applicati...Apidays Singapore 2024 - Scalable LLM APIs for AI and Generative AI Applicati...
Apidays Singapore 2024 - Scalable LLM APIs for AI and Generative AI Applicati...
 
Apidays Singapore 2024 - Modernizing Securities Finance by Madhu Subbu
Apidays Singapore 2024 - Modernizing Securities Finance by Madhu SubbuApidays Singapore 2024 - Modernizing Securities Finance by Madhu Subbu
Apidays Singapore 2024 - Modernizing Securities Finance by Madhu Subbu
 
2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...
 
Data Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonData Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt Robison
 
Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024
 
FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024
 

SISY 2008

  • 1. Lazy Multigram Learning Environment for ACRS SISY 2008 Zoran Popović shoom013[at] gmail.com Institute for Multidisciplinary Research Belgrade University
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9. ngram.jar – generating N-grams java ngram.generator.Arff inDir outfile.arff [options] Options: -l <Lmin> = lower rank bound (default=1) -m <Lmax> = upper rank bound (default=10) -i <invf> = inverse frequency threshold (default=0.34) -N <N> = N-gram order (default=3) -D <depth> = biggest number of N-grams (default=4294967295) -w <url> = use database with jdbc url to write data -r <url> = use database with jdbc url to read arff -u = do not use normalized vectors for output EXAMPLE: . Arff.sh . .ut.arff -l 1 -m 500 -N 4 -i 0.5 -D 1048576 (subfolders as category names)
  • 10.
  • 11.
  • 12.
  • 13. Weka, Data Mining Tool – ARFF (Attribute-Relation File Format)
  • 14.
  • 15. Weka's SVM MI SMO classifier
  • 16. Weka – JDBC Horizontal form of data is needed - all attributes in each row ARFF supports data given by sparse vectors (zero values omitted – this also speeds up SVM)
  • 17. <WEKA_HOME>/DatabaseUtils.props: ... jdbcDriver=...org.gjt.mm.mysql.Driver,oracle.jdbc.driver.OracleDriver ... CHAR=0 ... VARCHAR=0 VARCHAR2=0 ... NUMBER=7 .... Weka – JDBC
  • 18.
  • 19. SQL and vertical N-gram storage PROFILES: NSHARED NGRAMS: TSHARED
  • 20.
  • 21. SQL and data transformation Data2.sql – cursor function for a query returning horizontal form of data: create or replace function fewcols(p_lmin number, p_lmax number, p_invf number, p_bagid number, p_norm boolean default true) return sys_refcursor is str varchar2(32000); cat varchar2(64) := null; opt sys_refcursor; j number; n number; norm number := 1; cursor C(lmin number, lmax number, invf number, bagid number) is select T.rank-1 rank, TS.count count, TS.bag_id, TS.category, TS.N from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid order by TS.bag_id asc, TS.N asc, T.rank asc; cursor CS(lmin number, lmax number, invf number, bagid number) is select sqrt(sum((T.rank-1)*(T.rank-1))) norm from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid; begin str := to_char(p_bagid)||' bag_id'; j:=p_lmin; if p_norm then open CS(p_lmin,p_lmax,p_invf,p_bagid); fetch CS into norm; if norm=0 then norm:=1; end if; close CS; end if; for i in C(p_lmin,p_lmax,p_invf,p_bagid) loop if cat is null then cat := i.category; end if; if j<i.rank then for n in j .. i.rank-1 loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ' || to_char(i.count/norm,'9999.99999999') || ' A' || to_char(i.rank); j := i.rank+1; end loop; if cat is null or j<=p_lmax then select distinct category into cat from tshared where bag_id=p_bagid; for n in j .. p_lmax loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ''' || cat || ''' category'; open opt for 'select ' || str || ' from dual'; return opt; end; /
  • 22. SQL and data transformation Data3.sql – procedure generating table DATA3 in horizontal form: create or replace procedure data(p_lmin number, p_lmax number, p_invf number, p_norm boolean default true) is str varchar2(32000); cat varchar2(64) := null; first boolean := true; p_bagid number; j number; n number; norm number := 1; cursor C(lmin number, lmax number, invf number, bagid number) is select T.rank-1 rank, TS.count count, TS.bag_id, TS.category, TS.N from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid order by TS.bag_id asc, TS.N asc, T.rank asc; cursor CS(lmin number, lmax number, invf number, bagid number) is select sqrt(sum((T.rank-1)*(T.rank-1))) norm from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid; cursor B IS select distinct bag_id from tshared; begin open B; loop fetch B into p_bagid; exit when B%NOTFOUND; str := to_char(p_bagid)||' bag_id'; j:=p_lmin; if p_norm then open CS(p_lmin,p_lmax,p_invf,p_bagid); fetch CS into norm; if norm=0 then norm:=1; end if; close CS; end if; for i in C(p_lmin,p_lmax,p_invf,p_bagid) loop if cat is null then cat := i.category; end if; if j<i.rank then for n in j .. i.rank-1 loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ' || to_char(i.count/norm,'9999.99999999') || ' A' || to_char(i.rank); j := i.rank+1; end loop; if cat is null or j<=p_lmax then select distinct category into cat from tshared where bag_id=p_bagid; for n in j .. p_lmax loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ''' || cat || ''' category'; if first then first := false; begin execute immediate 'drop table data3'; exception when others then null; end; execute immediate 'create table data3 as select ' || str || ' from dual'; else execute immediate 'insert into data3 select ' || str || ' from dual'; end if; end loop; commit; end; /
  • 23.