Liang’s hyphenation algorithm implementation in node.js

Currently I am involved in a project that requires some string hyphenation. Initially I took a shot by using Liang’s hyphenation algorithm (taken from here liang hyphenation in python).
However it could not match my needs therefore I had to switch to the one open-office uses.

Anyway here it is the javascript implementation as a node.js module.


function LiangHyphenator(patterns) {

    this.tree = {}
    this.patterns = patterns

    for(var i= 0;i<patterns.length;i++) {
        var pattern = patterns[i]

        this.__insertPattern(pattern)
    }

}

LiangHyphenator.prototype.__insertPattern = function(pattern) {

    var chars = this.__clearPattern(pattern)
    var points = this.__createPoints(pattern)

    this.__addPatternToTree(points,chars)
}

LiangHyphenator.prototype.__clearPattern = function(pattern) {
    var numericsExpression = new RegExp('[0-9]','g')
    return pattern.replace(numericsExpression,'')
}

LiangHyphenator.prototype.__createPoints = function(pattern) {

    var charExpression = new RegExp('[.a-z]','g')
    var splitted = pattern.split(charExpression)

    for(var i= 0;i<splitted.length;i++) {
        if(splitted[i]==='') {
            splitted[i]=0
        } else {
            splitted[i] = parseInt(splitted[i])
        }
    }

    return splitted
}

LiangHyphenator.prototype.__addPatternToTree = function(points,chars) {
    var tree = this.tree
    for(var i=0;i<chars.length;i++) {

        var c = chars[i]
        if(!tree[c]) {
            tree[c] = {}
        }
        tree = tree[c]

    }


    tree['None'] = points
}

LiangHyphenator.prototype.hyphenateWord = function(word) {
    if(word.length<=4) {
        return [word]
    }

    var work = '.'+word.toLowerCase()+'.'

    var points = this.__createZeroArray(work.length+1)

    var tree = {}

    for(var j=0;j<work.length;j++) {

        var restWord = work.slice(j)
        tree = this.tree

        for(var i=0;i<restWord.length;i++) {
            var char = restWord[i]
            if(tree[char]) {
                tree = tree[char]
                if(tree['None']) {
                    var p = tree['None']
                    for(var pi=0;pi< p.length;pi++) {
                        points[pi+j] = Math.max(points[pi+j],p[pi])
                    }
                }
            } else {
                break
            }
        }
    }

    points[1] = 0
    points[2] = 0
    points[points.length-2] = 0
    points[points.length-3] = 0

    var pieces = ['']
    var zipped = this.__zip([word.split(''),points.slice(2)])

    for(var i=0;i<zipped.length;i++) {
        var c = zipped[i][0]
        var p = zipped[i][1]

        pieces[pieces.length-1] += c

        if(p%2!=0) {
            pieces.push('')
        }
    }

    return pieces

}

LiangHyphenator.prototype.__createZeroArray = function(size) {

    zeroArray = []

    for(var i=0;i<size;i++) {
        zeroArray.push(0)
    }

    return zeroArray
}

LiangHyphenator.prototype.__zip = function (arrays) {
    var serial = Array.apply(null,Array(arrays[0].length)).map(function(_,i){
        return arrays.map(function(array){return array[i]})
    });

    return serial
}


module.exports = LiangHyphenator

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s