Wednesday, March 29, 2023

Coding Challenge #36 Useless text compression

 // i was wondering if something like this would act as compression for text.
// answer: not really. to be more space efficient that the original text, it
// would require that the original text be very redundant with long words.
// example of beneficial string: "asdf asdf asdf asdf asdf"
// in most other cases, the output length would be equal to or greater than
// the length of the original string.

// change variable 'string' to compress the value it stores.

var dotChar = '.'.charCodeAt(0);
var qmarkChar = '?'.charCodeAt(0);
var expChar = '!'.charCodeAt(0);

var words = []; // unique words
var text = []; // the byte array output
function getwi(str){ // get word index
    for(var xint = 0, len = words.length;xint < len;xint++){
        if(words[xint] == str) return xint;
    }
    if(words.length == 31) words.push(''); // save index 32
    else if(words.length == dotChar || words.length == qmarkChar || words.length == expChar) words.push('');
    words.push(str);
    return words.length-1;
}

var string = `hi there
friend, what is up there is up there?`;

var tmp = ""; // tmp str buffer
function doit(){
    for(var xint = 0, len = string.length;xint < len;xint++){
        var c = string.charAt(xint);
        if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')){
            // is part of alphanumeric string that makes up word
            tmp += c;
        }else{
            // first, use previous tmp str buffer as a word
            if(tmp != ""){
                text.push( getwi(tmp) ); // add to dictionary & text output
                tmp = "";
            }
            
            // then decide what to do with this new char
            if(c == ' ') text.push(32);
            else if(c == '.' || c == '?' || c == '!') text.push(c.charCodeAt(0)); // common symbols are space-optimized
            else text.push( getwi(c) ); // other symbols are not
        }
    }
    
    // report findings
    console.log("\n=========");
    console.log("Found " + words.length + " unique words/punctuation");
    var size = 0;
    // size of all words in the dictionary
    for(var xint = 0, len = words.length;xint < len;xint++)
        size += words[xint].length;
    size += words.length; // +1 byte per words for the word length
    console.log("Dictionary size: " + size);
    console.log("Text data size: " + text.length);
    size += text.length;
    console.log("Total size: " + size);
    console.log("vs Original text size: " + string.length);
}

No comments:

Post a Comment

Coding Challenge #54 C++ int to std::string (no stringstream or to_string())

Gets a string from an integer (ejemplo gratis: 123 -> "123") Wanted to come up with my own function for this like 10 years ago ...