Neural Networks L1 Regularization

Overview


vector norms
{% Reg(\vec{\theta}) = \lambda || \theta ||_1 %}
{% \frac{d Reg}{d \theta} = \pm \lambda %}
where {% \lambda %} is added if {% \theta %} is positive, and subtracted if it is negative.

Implementation


Instead of adding the regularization term to the error function, our implementation just adds the relevant terms to the gradient when it is updated.


update:function(parameterGradient, step){
    let newBias = [];
    for(let i=0;i<this.bias.length;i++){
        newBias.push([parameterGradient[parameterGradient.length-i-1][0]]);
        //add the gradient wrt the error terms
        if(this.bias[newBias.length-1] > 0) newBias[newBias.length-1] += this.lambda;
        else if(this.bias[newBias.length-1] < 0) newBias[newBias.length-1] -= this.lambda;
    }
    let weightGradient = la.unvec(parameterGradient, this.weights.length, this.weights[0].length);
    //add the gradient wrt the error terms
    for(let i=0;i<weightGradient.length;i++){
        for(let j=0;j<weightGradient[0].length;j++){
            if(this.weights[i][j] > 0) weightGradient[i][j]+=this.lambda
            else if(this.weights[i][j] < 0) weightGradient[i][j]-=this.lambda
        }
    }
    addTo(this.bias, la.multiply(step,newBias));
    addTo(this.weights, la.multiply(step, weightGradient));
},