To Infinity & Beyond: Protocols & sequences in Node - Part 2
1. To Infinity & Beyond!
Protocols & Lazy Sequences in Node
Part Deux – Sh*t Just Got Real
Bahul Neel Upadhyaya (@bahulneel)
BraveNewTalent
http://github.com/bahulneel
http://www.bravenewtalent.com
3. An Example (TF-IDF)
“Tf–idf, term frequency–inverse document frequency,
is a numerical statistic which reflects how important a word is to a
document in a collection or corpus. It is often used as a weighting
factor in information retrieval and text mining.”
- Wikipedia
4. Words
function stripWord(word) {
return word.replace(/[^-a-zA-Z_0-9]+/, '').toLowerCase();
}
function isWord(word) {
return /^[-a-zA-Z_0-9]+$/.exec(word)
}
function words(string) {
return vec(filter(isWord, map(stripWord, string.split(/ /))));
}
5. Term Frequencies
function tf(words) {
var max = 0, counts, word;
function countFeq(counts, word) {
var newCounts = clone(counts);
if ('undefined' === typeof newCounts[word]) newCounts[word] = 0;
newCounts[word] += 1;
if (newCounts[word] > max) max = newCounts[word];
return newCounts;
}
counts = reduce(countFeq, {}, words);
if (max) {
for (word in counts) {
if (counts.hasOwnProperty(word)) counts[word] /= max;
}
}
return counts;
}
6. Inverse Document Frequency
idf = fn$({
1: function (terms) {
return idf({}, 1, terms);
},
3: function (freq, docCount, terms) {
if (null === first(terms)) return null;
function calcIdf(terms) {
var docFreq, invDocFreq = {}, word;
docFreq = merge(freq, first(terms));
for (word in docFreq) {
if (docFreq.hasOwnProperty(word)) invDocFreq[word] = docCount/(1+docFreq[word]);
}
return cons(invDocFreq, idf(docFreq, docCount + 1, rest(terms)));
}
return lazy(terms, calcIdf);
}
});
7. TF-IDF
function tfIdf(documents) {
var theWords, terms, freq;
terms = map(tf, map(words, documents));
freq = idf(terms);
function calcTfIdf(tf, idf) {
var word, tfIdf = {};
for (word in tf) {
if (tf.hasOwnProperty(word)) tfIdf[word] = tf[word] * idf[word];
}
return tfIdf;
}
return map(calcTfIdf, terms, freq);
}
8. Making Sequences Asyncronous
Source Sink
● Takes an ISeq & ISync as ● Takes an IStream as it's
it's argument argument
● Extends IStream ● Extends ISeq & ISync
● Registers a tick callback ● First returns stream.skip
using the ISync interface until stream emits
● Emits first element when
● Calls tick callback when
callback is called stream emits
9. Socket IO - Server
lang.protocol.extend(lang.stream.IStream, socketServer.Socket,
["tap", function (socket, fn) {
socket.on("message", function (data) {
fn(JSON.parse(data));
});
}],
["emit", function (socket, val) {
socket.send(JSON.stringify(val));
}]
);
function server(port, callback) {
var io = socketServer.listen(port);
io.sockets.on('connection', callback);
}
10. Socker IO - Server
(function (lang, tfIdf, server) {
server(1234, function (socket) {
lang.stream.pipe(tfIdf(socket), socket);
});
})(require('cosy-lang'),
require('./lib/tf-idf'),
require('./lib/socket-server').server);
11. SocketIO - Client
lang.protocol.extend(lang.stream.IStream, socketClient.SocketNamespace,
["tap", function (socket, fn) {
socket.on("message", function (data) {
fn(JSON.parse(data));
});
}],
["emit", function (socket, val) {
socket.send(JSON.stringify(val));
}]
);
function client(addr, callback) {
var io = socketClient.connect(addr);
io.on('connect', function () {
callback(io);
});
}
12. Socket IO - Client
client("http://localhost:1234", function (socket) {
lang.stream.tap(socket, function (val) {
console.log('td-idf', val);
});
lang.stream.pipe(documents, socket);
});