Inspired by Wordle.net, I wrote a bookmarklet that generates a word cloud based on the text of the page you are on. Here it is: WordCloud <- (drag to your toolbar to install). The uncompressed source is as follows:
function getText(elm, excludeTags){
if (elm.nodeType == 3) return elm.nodeValue;
if (excludeTags && elm.tagName && excludeTags.indexOf(elm.tagName.toLowerCase()) != -1) return '';
var ret = '';
for (var i = 0; i < elm.childNodes.length; i++){
ret += getText(elm.childNodes[i], excludeTags);
}
return ret;
}
function keys(obj){
var ret = [];
for (var key in obj) ret.push(key);
return ret;
}
var text = getText(document.body, ['script']);
var commonWords = ['a', 'the', 'and', 'of', 'is', 'in', 'this', 'it', 'to'];
var words = text.replace(/[^a-zA-Z]/g, ' ').split(' ').filter(function(p){return p != ''});
words = words.filter(function(word){
return commonWords.indexOf(word) == -1;
});
var freq = {};
words.forEach(function(word){
freq[word] = (freq[word] || 0) + 1;
});
var markup = keys(freq).map(function(word){
return '<span style="font-size: ' + (freq[word] / 4) + 'em">' + word + '</span>';
}).join(' ');
document.body.innerHTML = markup;
Enjoy! As a next step, I plan to use Canvas and implement more advanced layouts like the ones on Wordle.