Word Cloud Bookmarklet

Inspired by Wordle.net, I wrote a bookmarklet that generates a word cloud based on the text of the page you are on. Here it is: WordCloud <- (drag to your toolbar to install). The uncompressed source is as follows:

function getText(elm, excludeTags){
	if (elm.nodeType == 3) return elm.nodeValue;
	if (excludeTags && elm.tagName && excludeTags.indexOf(elm.tagName.toLowerCase()) != -1) return '';
	var ret = '';
	for (var i = 0; i < elm.childNodes.length; i++){
		ret += getText(elm.childNodes[i], excludeTags);
	}
	return ret;
}

function keys(obj){
    var ret = [];
    for (var key in obj) ret.push(key);
    return ret;
}

var text = getText(document.body, ['script']);
var commonWords = ['a', 'the', 'and', 'of', 'is', 'in', 'this', 'it', 'to'];
var words = text.replace(/[^a-zA-Z]/g, ' ').split(' ').filter(function(p){return p != ''});
words = words.filter(function(word){
	return commonWords.indexOf(word) == -1;
});
var freq = {};
words.forEach(function(word){ 
    freq[word] = (freq[word] || 0) + 1;
});

var markup = keys(freq).map(function(word){
	return '<span style="font-size: ' + (freq[word] / 4) + 'em">' + word + '</span>';
}).join(' ');
document.body.innerHTML = markup;

Enjoy! As a next step, I plan to use Canvas and implement more advanced layouts like the ones on Wordle.

blog comments powered by Disqus