SlideShare ist ein Scribd-Unternehmen logo
1 von 13
Downloaden Sie, um offline zu lesen
Erlang for Data Ops




                a db-centric design with Erlang




Warning: trendy people may be offended... (contains refs to relational databases, perl)

                                                                                 @mnacos
specs
●   multiple relational dbs   why Erlang?
●   feed processing           ●   read the book(s), saw the
                                  movie, been to meetups
●   multiple locations
                              ●   good for concurrency,
●   multiple schemas              services, scalability, etc.
●   automated                 ●   can't stand middleware
●   soft realtime                 (esp. java “frameworks”)
●   interoperable
●   extensible
approach
●   good old-fashioned
    relational modelling
●   model escapes the
    RDBMS
●   Erlang as 'glue'
    (erl is perl for systems)
●   Erlang thin-clients enforce
    and maintain the model
●   global state resides in a
    traditional database (logic)
                                   controlling db schema snippet
●   ACID is good – eventual
    consistency across sites
workflow
    Hub discovers / allocates work

    Agents do feed processing

    Agents submit messages to hub

    Relational-friendly message fmt

    Messages routed via rabbitmq

    Each site has its own mailbox

    Erl consumer applies operations




  Principles:

  http for synchronous
  amqp for asynchronous

  agents, hubs and consumers
  have types...
message format
"t":[                                                              single transaction
        {    "d":{"virtualdb1":"tracking data"},
             "r":{"public":"mycases"},
             "k":[
                   {"company":"6678928"}              Transactions such as these
             ],                                       are packaged in amqp
             "z":null                                 messages with appropriate
                                                      routing keys
        },
        {    "d":{"virtualdb1":"tracking data"},
             "r":{"public":"mycases"},
             "k":[
                   {"company":"6678928"},                          key part
                   {"casenumber":"9513"}
             ],
             "z":[
                   {"dateregistered":"2010-09-10"},
                   {"location":"LONDON"},                           payload
                   {"statuscode":"ABCD"},
                   {"sum":"3983.00"}
             ]                                        Each element of this transaction is
        }                                             declarative i.e. a logical assertion
]                                                     ... we use UPSERTs
Nonterminals


                                                            json_to_erl.yrl
transaction items item
target attpairs attpair
pair key value bytearray bytes byte.

Terminals ':' atom string integer '[' ']' '{' '}' ','.
                                                            for yecc (LALR-1 Parser Generator)
% Test this code with: f(File), f(Scan), f(Status), {ok,
File} = file:read_file("new-format.json"),
yecc:yecc("json_to_erl.yrl","json_to_erl.erl"),
c(json_to_erl), {ok,Scan,Status} =
erl_scan:string(binary_to_list(File)),                      key -> string : unwrap('$1').
json_to_erl:parse(Scan).                                    value -> atom : unwrap('$1').
                                                            value -> string : unwrap('$1').
Rootsymbol transaction.                                     value -> integer : unwrap('$1').

transaction -> key ':' '[' items ']' : pack('$1','$4').     bytearray -> '[' ']' : [].
                                                            bytearray -> '[' bytes ']' : '$2'.
items -> item : ['$1'].                                     bytes -> byte : ['$1'].
items -> item ',' items : ['$1'|'$3'].                      bytes -> byte ',' bytes : ['$1'|'$3'].
                                                            byte -> integer : unwrap('$1').
item -> '{' target ',' target ',' target ',' target '}' :
pack(item,['$2','$4','$6','$8']).                           Erlang code.

target -> key ':' '{' pair '}' : pack('$1','$4').           unwrap({_,_,V}) when is_integer(V) -> V;
target -> key ':' atom : pack('$1',null).                   unwrap({_,_,V}) when is_list(V) -> V;
target -> key ':' '[' ']' : pack('$1',null).                unwrap({_,_,V}) -> V.
target -> key ':' '[' attpairs ']' : pack('$1','$4').
                                                            pack(Key,List) when Key == "t" -> {transaction,List};
attpairs -> attpair : ['$1'].                               pack(item,List) -> {item,List};
attpairs -> attpair ',' attpairs : ['$1'|'$3'].             pack(Key,Tuple) when Key == "d" -> {dbvar,Tuple};
attpair -> '{' pair '}': '$2'.                              pack(Key,Tuple) when Key == "r" -> {relvar,Tuple};
                                                            pack(Key,List) when Key == "k" -> {key,List};
pair -> key ':' bytearray : {'$1', '$3'}.                   pack(Key,List) when Key == "z" -> {relation,List};
pair -> key ':' value : {'$1', '$3'}.                       pack(Key,List) -> {Key,List}.
sql from our json
erl_to_sql({transaction, Items}) ->                        dict_to_sql(Dict) ->
   items_to_sql(Items).                                       {Targetdb, Dbver} = read_from_dict(Dict, dbvar),
                                                              {Targetsch, Targetrel} = read_from_dict(Dict, relvar),
items_to_sql(Items) ->                                        case read_from_dict(Dict, key) of
                                                                {Sk, Sv} -> Keydefs = [Sk], Keyvals = [Sv];
   items_to_sql([], Items).                                     [{Sk, Sv}] -> Keydefs = [Sk], Keyvals = [Sv];
                                                                [] -> Keydefs = ["null"], Keyvals = ["null"];
items_to_sql(Statements, []) -> lists:reverse(Statements);      null -> Keydefs = ["null"], Keyvals = ["null"];
items_to_sql(Statements, [H | T]) ->                            List -> {Keydefs, Keyvals} = split_keydefs_keyvals(List)
   {item, Fields} = H,                                        end,
   Sql = sql_from_item(Fields),                               case read_from_dict(Dict, relation) of
                                                                % empty relation means DELETE
   items_to_sql([Sql|Statements], T).
                                                                null ->
                                                                    {Targetdb, lists:concat(
% storing all elements into a dict                                      ["DELETE FROM "] ++ [Targetsch] ++ ["."] ++ [Targetrel] ++
%      so that their order is not important                             [" WHERE "] ++ where_clause(Keydefs,Keyvals) ++ [";"]
% ----------------------------------------------------              )};
sql_from_item(Fields) ->                                        % emtpy key means INSERT
   Dict = dict:new(),                                           Tuples when Keyvals == ["null"] ->
                                                                    {Keys, Values} = split_pairs(Tuples),
   sql_from_item(Dict, Fields).                                     {Targetdb, "INSERT INTO " ++ Targetsch ++ "." ++ Targetrel ++ "
                                                           " ++
sql_from_item(Dict, []) -> dict_to_sql(Dict);                           commas_and_parentheses(Keys) ++ " VALUES " ++
sql_from_item(Dict, [H | T]) ->                                         commas_and_parentheses(sql_quote(Values)) ++ ";"};
   {Key,Val} = H,                                               % if both key and relation are supplied we UPDATE
   sql_from_item(dict:store(Key,Val,Dict), T).                  Tuples ->
                                                                    {Targetdb, lists:concat(
                                                                        ["UPDATE "] ++ [Targetsch] ++ ["."] ++ [Targetrel] ++
                                                                        [" SET "] ++ [pairs_to_sql(Tuples)] ++ [" WHERE "] ++
                                                                        where_clause(Keydefs,Keyvals) ++ [";"]
                                                                    )}
                                                              end.
(simple http fileserver example)

                                                mochiweb
 -module(abstract_files,[Class]).
 -behaviour(gen_server).                               init(Port) ->
                                                          code:add_path("deps"),
                                                          mochiweb_http:start([
                                                                         {port, Port},
                                                                         {loop, fun(Req) -> dispatch_requests(Req) end}
                                                                     ]),
                                                          {ok, []}.

%% CONTROLLER -------------------------------------------------------------------------
dispatch_requests(Req) ->
   Path = Req:get(path),
   Action = clean_path(Path),
   erlang:apply(Class, handle, [Action, Req]).

                                                      handle(Action, Req) when Action == "/" ->
                                                        case file:read_file("lib/start.sh") of
                                                          {ok, Binary} -> ?SUPER:send_text(Req, 200, "text/plain", Binary);
                                                          {error, _} -> not_found(Req)
                                                        end;
send_file(Req, Filename, Binary) ->
  Req:respond({ 200,
              [
                 {"Content-Type", "application/octet-stream"},
                 {"Content-Transfer-Encoding", "base64"},
                 {"Content-disposition", lists:concat(["attachment; filename=",Filename])}
              ],
              Binary
          }).
(simple consumer example)
                                              rabbitmq
% handle call for subscribe
handle_call({subscribe, MsgCallback, QoS}, _From, State) ->
  [{Conn}] = get_term(State, connection),                                  still using 1.7 version of the client
  [{Queue}] = get_term(State, queue),                                      version 2.1 is out!
  case get_term(State, consumer) of
     [{_Consumer}] -> {reply, already_subscribed, State};
     _ ->
        try
           #rabbit_queue{q=Q,passive=_P,durable=_D,exclusive=_E,auto_delete=_A} = Queue,
           process_flag(trap_exit, true),
           Consumer = spawn_link(                                 own wrapper
                           fun() ->
                 process_flag(trap_exit, true),                   % configuration and startup example:
                 % opening a connection, channel and auth         %
                 {Connection, Channel} = connect(Conn),           % R = hubz_rabbit:new(abstract_rabbit, "token").
                 % asserting queue                                % C = R:connection("localhost","/","guest","guest").
                 assert_queue(Queue,Channel),                     % E = R:exchange("ONE","direct",false,true,false).
                 % setting QoS parameter                          % Q = R:queue("TWO",false,true,false,false).
                 set_prefetch_count(Channel,QoS),                 % B = R:binding("TWO","ONE","#").
                 % basic consume                                  % R:start_link({C,E,Q,B}).
                 #'basic.consume_ok'{ consumer_tag = Tag } =
                      amqp_channel:subscribe(Channel, #'basic.consume'{ queue = Q }, self()),
                 consumer_loop(Connection, Channel, Tag, MsgCallback) end ),
           ets:insert(State, {consumer, {Consumer}}),
           ets:insert(State, {consumer_settings, {MsgCallback, QoS}}),
           {reply, ok, State}
        catch
           _:_ -> {reply, error, State}
        end                                                 R:subscribe(fun(_Key,Data)->io:format("~p~n",[Data]) end).
  end;
start(Id) ->
   spawn(fun() ->
       register(Id, self()),
       process_flag(trap_exit, true),
       Port = open_port({spawn, "epg "++atom_to_list(Id)}, [use_stdio, {line, 4096}]),
                                                                                            epg
       loop(Port)
    end).

                                                                         uses epg.c, libpq-based
collect_response(Port, RespAcc, LineAcc) ->                              for connecting to postgres
  receive
     {Port, {data, {eol, "!eod!"}}} ->
        {data, lists:concat(lists:reverse(RespAcc))};

     {Port, {data, {eol, "!error!"}}} ->
       {error, lists:concat(lists:reverse(RespAcc))};

     {Port, {data, {eol, "!connected!"}}} ->
       {info, lists:concat(lists:reverse(RespAcc))};

     {Port, {data, {eol, "!bye!"}}} ->
       {bye, lists:concat(lists:reverse(RespAcc))};

     {Port, {data, {eol, Result}}} ->
       Line = lists:reverse([Result | LineAcc]),
       collect_response(Port, [ [Line | "n"] | RespAcc], []);

     {Port, {data, {noeol, Result}}} ->
       collect_response(Port, RespAcc, [Result | LineAcc])

  %% Prevent the gen_server from hanging indefinitely in case the
  %% spawned process is taking too long processing the request.
    after 72000000 ->
       timeout
  end.
gen_event + gen_fsm + log4erl
  -define(DISPATCH(Type, Data),
     erlang:apply(
        fun() -> ?EVENT:normal(Type, {Data, self()}, ?LINE) end, []
     )
  ).                                                ?DISPATCH(agent, {{Ip, SPort}, MyTag, unregistered_agent, MyEvent})
               handle_event(Event, State) ->
                 MyName = State,                         gen_event:add_handler(mybus, {my_listener, Server}, Server)
                 % should we handle this event?
                 {{OriginServer,_P,_M,_C}, _E, _T} = Event,
                 case OriginServer of
                    MyName ->
                       % e.g. if MyName is 'my' we send it to the my_fsm handler module             (gen_event)
                       gen_fsm:send_event(adapt(MyName,"fsm"), Event);
                    _ -> ok
                 end,
                 {ok, State}.

% handling messages/errors from *** agents ***
normal({{_S, _P, {_M, _L}, _C}, {_I, normal, agent, Data}, _Now}, State) ->                          (gen_fsm)
  {EventData, _Pid} = Data,
  % what differentiates agent events is the {data_validation_error, ...} part at the end
  case EventData of
      {_Agent, _Workfile, _Line, {data_validation_error, _Datatype, _Value}} ->
        log4erl:warn(feed, "data validation error: ~p", [EventData]);
      {_Agent, _Workfile, _Line, {data_validation_pkey_error, _Datatype, _Value}} ->
        log4erl:error(feed, "data validation (in pkey!) error: ~p", [EventData]);
      {_Agent, Workfile, Line, {no_rule_event, Code}} ->
        log4erl:warn(feed, "no mapping rules for record type ~p (~s:~s)", [Code, Workfile, Line]);
      {_Agent, Workfile, Line, {missing_data_event, Key}} ->
        log4erl:info(feed, "missing data for mapping ~p (~s:~s)", [Key, Workfile, Line]);
% ... etc etc
somehow, it works
thoughts                                                                 ideas
●   not easy                                 ●   dynamic tuple introspection
●   it's crazy                               ●   java/scala client libraries
●   i'd do it again
●   erlang messes with your mind




            links
                            http://www.rabbitmq.com/erlang-client-user-guide.html

             http://github.com/mochi/mochiweb

                            http://code.google.com/p/log4erl/

             http://github.com/mnacos/epg
                                                                              @mnacos

Weitere ähnliche Inhalte

Was ist angesagt?

Symfony Day 2010 Doctrine MongoDB ODM
Symfony Day 2010 Doctrine MongoDB ODMSymfony Day 2010 Doctrine MongoDB ODM
Symfony Day 2010 Doctrine MongoDB ODM
Jonathan Wage
 
From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)
Night Sailer
 
MTDDC 2010.2.5 Tokyo - Brand new API
MTDDC 2010.2.5 Tokyo - Brand new APIMTDDC 2010.2.5 Tokyo - Brand new API
MTDDC 2010.2.5 Tokyo - Brand new API
Six Apart KK
 
CodeCamp Iasi 10 march 2012 - Practical Groovy
CodeCamp Iasi 10 march 2012 - Practical GroovyCodeCamp Iasi 10 march 2012 - Practical Groovy
CodeCamp Iasi 10 march 2012 - Practical Groovy
Codecamp Romania
 

Was ist angesagt? (20)

Revisiting SOLID Principles
Revisiting  SOLID Principles Revisiting  SOLID Principles
Revisiting SOLID Principles
 
Symfony Day 2010 Doctrine MongoDB ODM
Symfony Day 2010 Doctrine MongoDB ODMSymfony Day 2010 Doctrine MongoDB ODM
Symfony Day 2010 Doctrine MongoDB ODM
 
Spine JS
Spine JSSpine JS
Spine JS
 
Open Source Search: An Analysis
Open Source Search: An AnalysisOpen Source Search: An Analysis
Open Source Search: An Analysis
 
UKOUG Tech14 - Getting Started With JSON in the Database
UKOUG Tech14 - Getting Started With JSON in the DatabaseUKOUG Tech14 - Getting Started With JSON in the Database
UKOUG Tech14 - Getting Started With JSON in the Database
 
From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)
 
Starting with JSON Path Expressions in Oracle 12.1.0.2
Starting with JSON Path Expressions in Oracle 12.1.0.2Starting with JSON Path Expressions in Oracle 12.1.0.2
Starting with JSON Path Expressions in Oracle 12.1.0.2
 
Dependency Injection in Laravel
Dependency Injection in LaravelDependency Injection in Laravel
Dependency Injection in Laravel
 
Spring Data for KSDG 2012/09
Spring Data for KSDG 2012/09Spring Data for KSDG 2012/09
Spring Data for KSDG 2012/09
 
Modern Application Foundations: Underscore and Twitter Bootstrap
Modern Application Foundations: Underscore and Twitter BootstrapModern Application Foundations: Underscore and Twitter Bootstrap
Modern Application Foundations: Underscore and Twitter Bootstrap
 
Dartprogramming
DartprogrammingDartprogramming
Dartprogramming
 
初めてのQuickで初めてのテスト
初めてのQuickで初めてのテスト初めてのQuickで初めてのテスト
初めてのQuickで初めてのテスト
 
MTDDC 2010.2.5 Tokyo - Brand new API
MTDDC 2010.2.5 Tokyo - Brand new APIMTDDC 2010.2.5 Tokyo - Brand new API
MTDDC 2010.2.5 Tokyo - Brand new API
 
OSDC.fr 2012 :: Cascalog : progammation logique pour Hadoop
OSDC.fr 2012 :: Cascalog : progammation logique pour HadoopOSDC.fr 2012 :: Cascalog : progammation logique pour Hadoop
OSDC.fr 2012 :: Cascalog : progammation logique pour Hadoop
 
Oracle Database - JSON and the In-Memory Database
Oracle Database - JSON and the In-Memory DatabaseOracle Database - JSON and the In-Memory Database
Oracle Database - JSON and the In-Memory Database
 
Drupal II: The SQL
Drupal II: The SQLDrupal II: The SQL
Drupal II: The SQL
 
Perl object ?
Perl object ?Perl object ?
Perl object ?
 
Quebec pdo
Quebec pdoQuebec pdo
Quebec pdo
 
Backbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The BrowserBackbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The Browser
 
CodeCamp Iasi 10 march 2012 - Practical Groovy
CodeCamp Iasi 10 march 2012 - Practical GroovyCodeCamp Iasi 10 march 2012 - Practical Groovy
CodeCamp Iasi 10 march 2012 - Practical Groovy
 

Andere mochten auch (12)

Diplomas
DiplomasDiplomas
Diplomas
 
Internal training - Eda
Internal training - EdaInternal training - Eda
Internal training - Eda
 
Javan Owino Diploma certificate
Javan Owino Diploma certificateJavan Owino Diploma certificate
Javan Owino Diploma certificate
 
Data processing with celery and rabbit mq
Data processing with celery and rabbit mqData processing with celery and rabbit mq
Data processing with celery and rabbit mq
 
Dilplomas Certificaciones
Dilplomas CertificacionesDilplomas Certificaciones
Dilplomas Certificaciones
 
Attachment report IAT
Attachment report IATAttachment report IAT
Attachment report IAT
 
Attachment report Victor
Attachment report VictorAttachment report Victor
Attachment report Victor
 
INTERNSHIP REPORT
INTERNSHIP REPORTINTERNSHIP REPORT
INTERNSHIP REPORT
 
Attachment report
Attachment report Attachment report
Attachment report
 
Field attachment report (alie chibwe)
Field attachment report (alie chibwe)Field attachment report (alie chibwe)
Field attachment report (alie chibwe)
 
Industrial Training Report-1
Industrial Training Report-1Industrial Training Report-1
Industrial Training Report-1
 
Summer internship project report
Summer internship project reportSummer internship project report
Summer internship project report
 

Ähnlich wie Erlang for data ops

Refactoring to Macros with Clojure
Refactoring to Macros with ClojureRefactoring to Macros with Clojure
Refactoring to Macros with Clojure
Dmitry Buzdin
 
Working With JQuery Part1
Working With JQuery Part1Working With JQuery Part1
Working With JQuery Part1
saydin_soft
 

Ähnlich wie Erlang for data ops (20)

Apache Spark - Key-Value RDD | Big Data Hadoop Spark Tutorial | CloudxLab
Apache Spark - Key-Value RDD | Big Data Hadoop Spark Tutorial | CloudxLabApache Spark - Key-Value RDD | Big Data Hadoop Spark Tutorial | CloudxLab
Apache Spark - Key-Value RDD | Big Data Hadoop Spark Tutorial | CloudxLab
 
Swift Sequences & Collections
Swift Sequences & CollectionsSwift Sequences & Collections
Swift Sequences & Collections
 
Scala for Java Developers
Scala for Java DevelopersScala for Java Developers
Scala for Java Developers
 
Refactoring to Macros with Clojure
Refactoring to Macros with ClojureRefactoring to Macros with Clojure
Refactoring to Macros with Clojure
 
学生向けScalaハンズオンテキスト
学生向けScalaハンズオンテキスト学生向けScalaハンズオンテキスト
学生向けScalaハンズオンテキスト
 
Model-Driven Software Development - Pretty-Printing, Editor Services, Term Re...
Model-Driven Software Development - Pretty-Printing, Editor Services, Term Re...Model-Driven Software Development - Pretty-Printing, Editor Services, Term Re...
Model-Driven Software Development - Pretty-Printing, Editor Services, Term Re...
 
Internationalizing CakePHP Applications
Internationalizing CakePHP ApplicationsInternationalizing CakePHP Applications
Internationalizing CakePHP Applications
 
An Introduction to Higher Order Functions in Spark SQL with Herman van Hovell
An Introduction to Higher Order Functions in Spark SQL with Herman van HovellAn Introduction to Higher Order Functions in Spark SQL with Herman van Hovell
An Introduction to Higher Order Functions in Spark SQL with Herman van Hovell
 
(first '(Clojure.))
(first '(Clojure.))(first '(Clojure.))
(first '(Clojure.))
 
Clean code
Clean codeClean code
Clean code
 
Apache Spark - Key Value RDD - Transformations | Big Data Hadoop Spark Tutori...
Apache Spark - Key Value RDD - Transformations | Big Data Hadoop Spark Tutori...Apache Spark - Key Value RDD - Transformations | Big Data Hadoop Spark Tutori...
Apache Spark - Key Value RDD - Transformations | Big Data Hadoop Spark Tutori...
 
Stratosphere Intro (Java and Scala Interface)
Stratosphere Intro (Java and Scala Interface)Stratosphere Intro (Java and Scala Interface)
Stratosphere Intro (Java and Scala Interface)
 
Spark workshop
Spark workshopSpark workshop
Spark workshop
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love Story
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love Story
 
Working With JQuery Part1
Working With JQuery Part1Working With JQuery Part1
Working With JQuery Part1
 
Scala @ TechMeetup Edinburgh
Scala @ TechMeetup EdinburghScala @ TechMeetup Edinburgh
Scala @ TechMeetup Edinburgh
 
CS101- Introduction to Computing- Lecture 29
CS101- Introduction to Computing- Lecture 29CS101- Introduction to Computing- Lecture 29
CS101- Introduction to Computing- Lecture 29
 
CoffeeScript - A Rubyist's Love Affair
CoffeeScript - A Rubyist's Love AffairCoffeeScript - A Rubyist's Love Affair
CoffeeScript - A Rubyist's Love Affair
 
Python lecture 05
Python lecture 05Python lecture 05
Python lecture 05
 

Kürzlich hochgeladen

Finding Java's Hidden Performance Traps @ DevoxxUK 2024
Finding Java's Hidden Performance Traps @ DevoxxUK 2024Finding Java's Hidden Performance Traps @ DevoxxUK 2024
Finding Java's Hidden Performance Traps @ DevoxxUK 2024
Victor Rentea
 
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Victor Rentea
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Safe Software
 

Kürzlich hochgeladen (20)

TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
 
Polkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin WoodPolkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin Wood
 
Finding Java's Hidden Performance Traps @ DevoxxUK 2024
Finding Java's Hidden Performance Traps @ DevoxxUK 2024Finding Java's Hidden Performance Traps @ DevoxxUK 2024
Finding Java's Hidden Performance Traps @ DevoxxUK 2024
 
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
 
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
 
Understanding the FAA Part 107 License ..
Understanding the FAA Part 107 License ..Understanding the FAA Part 107 License ..
Understanding the FAA Part 107 License ..
 
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot ModelMcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
Mcleodganj Call Girls 🥰 8617370543 Service Offer VIP Hot Model
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024
 
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
 
[BuildWithAI] Introduction to Gemini.pdf
[BuildWithAI] Introduction to Gemini.pdf[BuildWithAI] Introduction to Gemini.pdf
[BuildWithAI] Introduction to Gemini.pdf
 
Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
 
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
Modular Monolith - a Practical Alternative to Microservices @ Devoxx UK 2024
 
presentation ICT roal in 21st century education
presentation ICT roal in 21st century educationpresentation ICT roal in 21st century education
presentation ICT roal in 21st century education
 
Biography Of Angeliki Cooney | Senior Vice President Life Sciences | Albany, ...
Biography Of Angeliki Cooney | Senior Vice President Life Sciences | Albany, ...Biography Of Angeliki Cooney | Senior Vice President Life Sciences | Albany, ...
Biography Of Angeliki Cooney | Senior Vice President Life Sciences | Albany, ...
 
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, AdobeApidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
 
Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
 
MS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectorsMS Copilot expands with MS Graph connectors
MS Copilot expands with MS Graph connectors
 
Platformless Horizons for Digital Adaptability
Platformless Horizons for Digital AdaptabilityPlatformless Horizons for Digital Adaptability
Platformless Horizons for Digital Adaptability
 
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ..."I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
 

Erlang for data ops

  • 1. Erlang for Data Ops a db-centric design with Erlang Warning: trendy people may be offended... (contains refs to relational databases, perl) @mnacos
  • 2. specs ● multiple relational dbs why Erlang? ● feed processing ● read the book(s), saw the movie, been to meetups ● multiple locations ● good for concurrency, ● multiple schemas services, scalability, etc. ● automated ● can't stand middleware ● soft realtime (esp. java “frameworks”) ● interoperable ● extensible
  • 3. approach ● good old-fashioned relational modelling ● model escapes the RDBMS ● Erlang as 'glue' (erl is perl for systems) ● Erlang thin-clients enforce and maintain the model ● global state resides in a traditional database (logic) controlling db schema snippet ● ACID is good – eventual consistency across sites
  • 4. workflow Hub discovers / allocates work Agents do feed processing Agents submit messages to hub Relational-friendly message fmt Messages routed via rabbitmq Each site has its own mailbox Erl consumer applies operations Principles: http for synchronous amqp for asynchronous agents, hubs and consumers have types...
  • 5. message format "t":[ single transaction { "d":{"virtualdb1":"tracking data"}, "r":{"public":"mycases"}, "k":[ {"company":"6678928"} Transactions such as these ], are packaged in amqp "z":null messages with appropriate routing keys }, { "d":{"virtualdb1":"tracking data"}, "r":{"public":"mycases"}, "k":[ {"company":"6678928"}, key part {"casenumber":"9513"} ], "z":[ {"dateregistered":"2010-09-10"}, {"location":"LONDON"}, payload {"statuscode":"ABCD"}, {"sum":"3983.00"} ] Each element of this transaction is } declarative i.e. a logical assertion ] ... we use UPSERTs
  • 6. Nonterminals json_to_erl.yrl transaction items item target attpairs attpair pair key value bytearray bytes byte. Terminals ':' atom string integer '[' ']' '{' '}' ','. for yecc (LALR-1 Parser Generator) % Test this code with: f(File), f(Scan), f(Status), {ok, File} = file:read_file("new-format.json"), yecc:yecc("json_to_erl.yrl","json_to_erl.erl"), c(json_to_erl), {ok,Scan,Status} = erl_scan:string(binary_to_list(File)), key -> string : unwrap('$1'). json_to_erl:parse(Scan). value -> atom : unwrap('$1'). value -> string : unwrap('$1'). Rootsymbol transaction. value -> integer : unwrap('$1'). transaction -> key ':' '[' items ']' : pack('$1','$4'). bytearray -> '[' ']' : []. bytearray -> '[' bytes ']' : '$2'. items -> item : ['$1']. bytes -> byte : ['$1']. items -> item ',' items : ['$1'|'$3']. bytes -> byte ',' bytes : ['$1'|'$3']. byte -> integer : unwrap('$1'). item -> '{' target ',' target ',' target ',' target '}' : pack(item,['$2','$4','$6','$8']). Erlang code. target -> key ':' '{' pair '}' : pack('$1','$4'). unwrap({_,_,V}) when is_integer(V) -> V; target -> key ':' atom : pack('$1',null). unwrap({_,_,V}) when is_list(V) -> V; target -> key ':' '[' ']' : pack('$1',null). unwrap({_,_,V}) -> V. target -> key ':' '[' attpairs ']' : pack('$1','$4'). pack(Key,List) when Key == "t" -> {transaction,List}; attpairs -> attpair : ['$1']. pack(item,List) -> {item,List}; attpairs -> attpair ',' attpairs : ['$1'|'$3']. pack(Key,Tuple) when Key == "d" -> {dbvar,Tuple}; attpair -> '{' pair '}': '$2'. pack(Key,Tuple) when Key == "r" -> {relvar,Tuple}; pack(Key,List) when Key == "k" -> {key,List}; pair -> key ':' bytearray : {'$1', '$3'}. pack(Key,List) when Key == "z" -> {relation,List}; pair -> key ':' value : {'$1', '$3'}. pack(Key,List) -> {Key,List}.
  • 7. sql from our json erl_to_sql({transaction, Items}) -> dict_to_sql(Dict) -> items_to_sql(Items). {Targetdb, Dbver} = read_from_dict(Dict, dbvar), {Targetsch, Targetrel} = read_from_dict(Dict, relvar), items_to_sql(Items) -> case read_from_dict(Dict, key) of {Sk, Sv} -> Keydefs = [Sk], Keyvals = [Sv]; items_to_sql([], Items). [{Sk, Sv}] -> Keydefs = [Sk], Keyvals = [Sv]; [] -> Keydefs = ["null"], Keyvals = ["null"]; items_to_sql(Statements, []) -> lists:reverse(Statements); null -> Keydefs = ["null"], Keyvals = ["null"]; items_to_sql(Statements, [H | T]) -> List -> {Keydefs, Keyvals} = split_keydefs_keyvals(List) {item, Fields} = H, end, Sql = sql_from_item(Fields), case read_from_dict(Dict, relation) of % empty relation means DELETE items_to_sql([Sql|Statements], T). null -> {Targetdb, lists:concat( % storing all elements into a dict ["DELETE FROM "] ++ [Targetsch] ++ ["."] ++ [Targetrel] ++ % so that their order is not important [" WHERE "] ++ where_clause(Keydefs,Keyvals) ++ [";"] % ---------------------------------------------------- )}; sql_from_item(Fields) -> % emtpy key means INSERT Dict = dict:new(), Tuples when Keyvals == ["null"] -> {Keys, Values} = split_pairs(Tuples), sql_from_item(Dict, Fields). {Targetdb, "INSERT INTO " ++ Targetsch ++ "." ++ Targetrel ++ " " ++ sql_from_item(Dict, []) -> dict_to_sql(Dict); commas_and_parentheses(Keys) ++ " VALUES " ++ sql_from_item(Dict, [H | T]) -> commas_and_parentheses(sql_quote(Values)) ++ ";"}; {Key,Val} = H, % if both key and relation are supplied we UPDATE sql_from_item(dict:store(Key,Val,Dict), T). Tuples -> {Targetdb, lists:concat( ["UPDATE "] ++ [Targetsch] ++ ["."] ++ [Targetrel] ++ [" SET "] ++ [pairs_to_sql(Tuples)] ++ [" WHERE "] ++ where_clause(Keydefs,Keyvals) ++ [";"] )} end.
  • 8. (simple http fileserver example) mochiweb -module(abstract_files,[Class]). -behaviour(gen_server). init(Port) -> code:add_path("deps"), mochiweb_http:start([ {port, Port}, {loop, fun(Req) -> dispatch_requests(Req) end} ]), {ok, []}. %% CONTROLLER ------------------------------------------------------------------------- dispatch_requests(Req) -> Path = Req:get(path), Action = clean_path(Path), erlang:apply(Class, handle, [Action, Req]). handle(Action, Req) when Action == "/" -> case file:read_file("lib/start.sh") of {ok, Binary} -> ?SUPER:send_text(Req, 200, "text/plain", Binary); {error, _} -> not_found(Req) end; send_file(Req, Filename, Binary) -> Req:respond({ 200, [ {"Content-Type", "application/octet-stream"}, {"Content-Transfer-Encoding", "base64"}, {"Content-disposition", lists:concat(["attachment; filename=",Filename])} ], Binary }).
  • 9. (simple consumer example) rabbitmq % handle call for subscribe handle_call({subscribe, MsgCallback, QoS}, _From, State) -> [{Conn}] = get_term(State, connection), still using 1.7 version of the client [{Queue}] = get_term(State, queue), version 2.1 is out! case get_term(State, consumer) of [{_Consumer}] -> {reply, already_subscribed, State}; _ -> try #rabbit_queue{q=Q,passive=_P,durable=_D,exclusive=_E,auto_delete=_A} = Queue, process_flag(trap_exit, true), Consumer = spawn_link( own wrapper fun() -> process_flag(trap_exit, true), % configuration and startup example: % opening a connection, channel and auth % {Connection, Channel} = connect(Conn), % R = hubz_rabbit:new(abstract_rabbit, "token"). % asserting queue % C = R:connection("localhost","/","guest","guest"). assert_queue(Queue,Channel), % E = R:exchange("ONE","direct",false,true,false). % setting QoS parameter % Q = R:queue("TWO",false,true,false,false). set_prefetch_count(Channel,QoS), % B = R:binding("TWO","ONE","#"). % basic consume % R:start_link({C,E,Q,B}). #'basic.consume_ok'{ consumer_tag = Tag } = amqp_channel:subscribe(Channel, #'basic.consume'{ queue = Q }, self()), consumer_loop(Connection, Channel, Tag, MsgCallback) end ), ets:insert(State, {consumer, {Consumer}}), ets:insert(State, {consumer_settings, {MsgCallback, QoS}}), {reply, ok, State} catch _:_ -> {reply, error, State} end R:subscribe(fun(_Key,Data)->io:format("~p~n",[Data]) end). end;
  • 10. start(Id) -> spawn(fun() -> register(Id, self()), process_flag(trap_exit, true), Port = open_port({spawn, "epg "++atom_to_list(Id)}, [use_stdio, {line, 4096}]), epg loop(Port) end). uses epg.c, libpq-based collect_response(Port, RespAcc, LineAcc) -> for connecting to postgres receive {Port, {data, {eol, "!eod!"}}} -> {data, lists:concat(lists:reverse(RespAcc))}; {Port, {data, {eol, "!error!"}}} -> {error, lists:concat(lists:reverse(RespAcc))}; {Port, {data, {eol, "!connected!"}}} -> {info, lists:concat(lists:reverse(RespAcc))}; {Port, {data, {eol, "!bye!"}}} -> {bye, lists:concat(lists:reverse(RespAcc))}; {Port, {data, {eol, Result}}} -> Line = lists:reverse([Result | LineAcc]), collect_response(Port, [ [Line | "n"] | RespAcc], []); {Port, {data, {noeol, Result}}} -> collect_response(Port, RespAcc, [Result | LineAcc]) %% Prevent the gen_server from hanging indefinitely in case the %% spawned process is taking too long processing the request. after 72000000 -> timeout end.
  • 11. gen_event + gen_fsm + log4erl -define(DISPATCH(Type, Data), erlang:apply( fun() -> ?EVENT:normal(Type, {Data, self()}, ?LINE) end, [] ) ). ?DISPATCH(agent, {{Ip, SPort}, MyTag, unregistered_agent, MyEvent}) handle_event(Event, State) -> MyName = State, gen_event:add_handler(mybus, {my_listener, Server}, Server) % should we handle this event? {{OriginServer,_P,_M,_C}, _E, _T} = Event, case OriginServer of MyName -> % e.g. if MyName is 'my' we send it to the my_fsm handler module (gen_event) gen_fsm:send_event(adapt(MyName,"fsm"), Event); _ -> ok end, {ok, State}. % handling messages/errors from *** agents *** normal({{_S, _P, {_M, _L}, _C}, {_I, normal, agent, Data}, _Now}, State) -> (gen_fsm) {EventData, _Pid} = Data, % what differentiates agent events is the {data_validation_error, ...} part at the end case EventData of {_Agent, _Workfile, _Line, {data_validation_error, _Datatype, _Value}} -> log4erl:warn(feed, "data validation error: ~p", [EventData]); {_Agent, _Workfile, _Line, {data_validation_pkey_error, _Datatype, _Value}} -> log4erl:error(feed, "data validation (in pkey!) error: ~p", [EventData]); {_Agent, Workfile, Line, {no_rule_event, Code}} -> log4erl:warn(feed, "no mapping rules for record type ~p (~s:~s)", [Code, Workfile, Line]); {_Agent, Workfile, Line, {missing_data_event, Key}} -> log4erl:info(feed, "missing data for mapping ~p (~s:~s)", [Key, Workfile, Line]); % ... etc etc
  • 13. thoughts ideas ● not easy ● dynamic tuple introspection ● it's crazy ● java/scala client libraries ● i'd do it again ● erlang messes with your mind links http://www.rabbitmq.com/erlang-client-user-guide.html http://github.com/mochi/mochiweb http://code.google.com/p/log4erl/ http://github.com/mnacos/epg @mnacos