// i was wondering if something like this would act as compression for text.
// answer: not really. to be more space efficient that the original text, it
// would require that the original text be very redundant with long words.
// example of beneficial string: "asdf asdf asdf asdf asdf"
// in most other cases, the output length would be equal to or greater than
// the length of the original string.
// change variable 'string' to compress the value it stores.
var dotChar = '.'.charCodeAt(0);
var qmarkChar = '?'.charCodeAt(0);
var expChar = '!'.charCodeAt(0);
var words = []; // unique words
var text = []; // the byte array output
function getwi(str){ // get word index
for(var xint = 0, len = words.length;xint < len;xint++){
if(words[xint] == str) return xint;
}
if(words.length == 31) words.push(''); // save index 32
else if(words.length == dotChar || words.length == qmarkChar || words.length == expChar) words.push('');
words.push(str);
return words.length-1;
}
var string = `hi there
friend, what is up there is up there?`;
var tmp = ""; // tmp str buffer
function doit(){
for(var xint = 0, len = string.length;xint < len;xint++){
var c = string.charAt(xint);
if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')){
// is part of alphanumeric string that makes up word
tmp += c;
}else{
// first, use previous tmp str buffer as a word
if(tmp != ""){
text.push( getwi(tmp) ); // add to dictionary & text output
tmp = "";
}
// then decide what to do with this new char
if(c == ' ') text.push(32);
else if(c == '.' || c == '?' || c == '!') text.push(c.charCodeAt(0)); // common symbols are space-optimized
else text.push( getwi(c) ); // other symbols are not
}
}
// report findings
console.log("\n=========");
console.log("Found " + words.length + " unique words/punctuation");
var size = 0;
// size of all words in the dictionary
for(var xint = 0, len = words.length;xint < len;xint++)
size += words[xint].length;
size += words.length; // +1 byte per words for the word length
console.log("Dictionary size: " + size);
console.log("Text data size: " + text.length);
size += text.length;
console.log("Total size: " + size);
console.log("vs Original text size: " + string.length);
}
Wednesday, March 29, 2023
Coding Challenge #36 Useless text compression
Subscribe to:
Post Comments (Atom)
Coding Challenge #54 C++ int to std::string (no stringstream or to_string())
Gets a string from an integer (ejemplo gratis: 123 -> "123") Wanted to come up with my own function for this like 10 years ago ...
-
I NEEDS MY JAVASCRIPT PLEASE :) Open txtcode .txt Save as .gct Please click the blue cheat code title to activate it, a...
-
Naruto Personality Quiz Please enable javascript ☯ Which Naruto Character Are You?
-
Useful for extensions that want to prevent event listeners from being attached or modify prototypes or functions, generally to defeat click...
No comments:
Post a Comment