File contents
#! /usr/bin/env python
# -*- coding: iso-8859-15 -*-
"""Artificial neural networks for Orange.
Orange module to add artificial neural networks as learning
algorithms using calls to SNNS software.
Version: 1.09 (working but some more testing and refinements
can improve it to version 1.10)
SNNS randomness agrees with Orange behaviour on randomness:
http://www.ailab.si/orange/doc/reference/random.htm
In spite of the communicating media with SNNS being files, this code
is supposed to be reentrant. Any way, as some of the temporal files
are named by the module, but created by SNNS, there is a really
extremely small chance of files becoming corrupted and
breaking. Don't worry you would probably win the lotto and hang a
windows program a billion times before this happens.
TO DO:
see marked XXX in code,
error handling in system calls,
error handling when SNNS fails,
Copyright (C) 2005-2006 Antonio Arauzo Azofra
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""
# From std python
import os, sys, string, re
import tempfile, math, random
from itertools import izip
# From orange
import orange, statc
# Should be set to the path where binaries of SNNS tools are
# located, if they are not in system path
pathSNNS = ""
# example:
#pathSNNS = "~/SNNSv4.2/tools/bin/i686-pc-linux-gnu/"
# If messages should be printed
verbose = False
def _argmax(array):
"""
_argmax returns the position of the maximun value of an array
"""
return max(izip(array,xrange(len(array))))[1]
class Transform:
def __init__(self, table, alpha=0.1, beta=0.9):
"""
Prepares transformation of data for neural network
* discrete to N features in {alpha, beta}
* continuous to [alpha, beta]
Details of the transformation performed (transform):
[(Continuous=True, slope, pos),(Continuous=False, no.values)]
y = slope*x + pos
Notes:
Destination is not an orange.Example because we can not use
more than one class feature in Orange
Destination domain is formed by nnAntecedent values followed by
nnTargets (following original order in each subgroup)
"""
self.transform = []
self.alpha = alpha
self.beta = beta
self.domain = table.domain # Previous domain
self.nnAntecedents = 0 # Transformed domain
self.nnTargets = 0
# Prepares transformation of data
basicAttrStat = orange.DomainBasicAttrStat(table)
for i in range( len(table.domain.variables) ):
# Continuous
if self.domain[i].varType == orange.VarTypes.Continuous:
varRange = (basicAttrStat[i].max - basicAttrStat[i].min)
if varRange == 0.0:
slope = 1.0 # Unique value
else:
slope = float(beta-alpha) / varRange
pos = alpha - (slope * basicAttrStat[i].min)
self.transform.append( (True, slope, pos) )
if i != self.domain.variables.index(self.domain.classVar):
self.nnAntecedents += 1
else:
self.nnTargets +=1
# Discrete
else:
nValues = len( self.domain[i].values )
self.transform.append( (False, nValues) )
if i != self.domain.variables.index(self.domain.classVar):
self.nnAntecedents += nValues
else:
self.nnTargets += nValues
def apply(self, example):
"""
Applies a defined trasformation over an example
Returns: a list with the result
"""
rtn = []
# Antecedents
for i in range( len(example) ):
if i != self.domain.variables.index(self.domain.classVar):
# Continuous
if self.transform[i][0]:
if example[i].value in ['?', '~', '.']:
rtn.append(0.5) # NULL values (uses average of [0,1]) XXX
else:
rtn.append( example[i]*self.transform[i][1] + self.transform[i][2] )
# Discrete
else:
for j in range(self.transform[i][1]):
if self.domain[i].values[j] == example[i]:
rtn.append(self.beta)
else:
rtn.append(self.alpha)
# Target (Class or goal attribute)
i = self.domain.variables.index(self.domain.classVar)
# Continuous
if self.transform[i][0]:
if example[i].value in ['?', '~', '.']:
rtn.append(0.5) # NULL values (uses average of [0,1]) XXX
else:
rtn.append( example[i]*self.transform[i][1] + self.transform[i][2] )
# Discrete
else:
for j in range(self.transform[i][1]):
if self.domain[i].values[j] == example[i]:
rtn.append(self.beta)
else:
rtn.append(self.alpha)
return rtn
def applyInverseToTarget(self, target):
"""
From a NN output get the class by: majority criterion, or
denormalizing in continuous cases.
Returns: orange.Value with the class
"""
i = self.domain.variables.index(self.domain.classVar)
if self.transform[i][0]: # continuous
trTarget = (target[0] - self.transform[i][2]) / self.transform[i][1]
rtn = orange.Value(self.domain.classVar, trTarget)
else: # discrete
rtn = orange.Value(self.domain.classVar, _argmax(target) )
#XXX would not be nice if this worked in Orange:
# domain[i].values[_argmax(out)]
return rtn
def __str__(self):
t = '<Transform:\n'
for i in range( len(self.domain.variables) ):
# Continuous
t += self.domain[i].name + str( self.transform[i] ) + '\n'
t += '>\n'
return t
def savePatFile(table):
"""
Given an orange example table create an SNNS pattern file.
Transform data (using Transform):
Normalize continuous data to [0,1].
Discrete values to N inputs/outputs in {0,1}
Caller is responsible for deleting pat file
Returns: (patternFileName, transform)
"""
# Prepares transformation of data
transform = Transform(table, 0.1, 0.9)
# Header
text = "SNNS pattern definition file V1.4\n"
text += "generated at Tue Jan 21 18:02:24 1997\n\n"
text += "No. of patterns : " + str( len(table) ) + '\n'
text += "No. of input units : " + str(transform.nnAntecedents) + '\n'
text += "No. of output units : " + str(transform.nnTargets) + '\n'
fd, patFileName = tempfile.mkstemp(suffix=".pat")
patFile = os.fdopen(fd, "w")
patFile.write(text)
# Examples
for example in table:
text = ""
trEx = transform.apply(example)
for v in trEx:
text += str(v) + ' '
text += "\n"
patFile.write(text)
patFile.close()
return (patFileName, transform)
def createNN(nInputs, hiddenLayers, nOutputs):
"""
Creates a snns file with the topology of a multilayer
completely connected aNN.
Caller is responsible for deleting network file
Returns: name of the file
"""
# Prepare the name of the aNN
hiddenName = ""
for layer in hiddenLayers:
hiddenName += str(layer) + "_"
nnFileNamePrefix = "mlp" + str(nInputs) + "_" + hiddenName + str(nOutputs) + "-"
nnFileName = tempfile.mktemp(prefix=nnFileNamePrefix, suffix=".net")
# Build command for ff_bignet
orden = pathSNNS + "ff_bignet" + " " + "-p " + str(nInputs) + " 1"
for nNodes in hiddenLayers:
orden = orden + " -p " + str(nNodes) + " 1"
orden = orden + " -p " + str(nOutputs) + " 1"
for j in range(len(hiddenLayers)+1):
orden = orden + " -l " + str(j+1) + " + " + str(j+2) + " +"
orden = orden + " " + nnFileName
os.system(orden)
return nnFileName
def trainNN(nnFileName, patternFileName, MSE, cycles, algorithm, learningParams):
"""
Trains a neural network using batchman
"""
# Open tmp file for the script
try:
fd, batchmanScriptFileName = tempfile.mkstemp()
batchmanScriptFile = os.fdopen(fd, 'w')
except IOError:
print 'Error: Couldn\'t create temp file.'
sys.exit(0)
# Create script batchman
nu = 'loadNet("' + nnFileName + '")\n'
nu += 'loadPattern("' + patternFileName + '")\n'
nu += 'setInitFunc("Randomize_Weights", 1.0, -1.0)\n'
nu += 'setLearnFunc("' + algorithm + '"' +\
string.join(["," + p for p in learningParams], sep="") + ')\n'
nu += 'setShuffle(TRUE)\n'
nu += 'initNet()\n'
nu += 'while CYCLES < ' + str(cycles) + ' and MSE > ' + str(MSE) + ' and SIGNAL == 0 do\n'
# nu += 'if CYCLES mod 10 == 0 then\n'
# nu += 'print ("cycles = ", CYCLES, " SSE = ", SSE, " MSE = ",MSE) endif\n'
nu += 'trainNet()\nendwhile\n'
nu += 'if SIGNAL !=0 then print("Stopped due to signal reception: signal " + SIGNAL)\nendif'
nu += '\nsaveNet("'+nnFileName+'")\n'
batchmanScriptFile.write(nu)
batchmanScriptFile.close()
# Train the NN
if verbose:
orden = pathSNNS + "batchman -f " + batchmanScriptFileName
else:
orden = pathSNNS + "batchman -q -f " + batchmanScriptFileName
os.system(orden)
# Remove tmp file
os.remove(batchmanScriptFileName)
def trainAutoNN(nnFileName, trainFileName, testFileName, MSE, cycles, nRepeat, step, algorithm, learningParams):
"""
Trains a neural network using batchman. Uses test data to evaluate
the training state and select the best neural network.
Bad accuracy (not used)
"""
# Open tmp file for the script
try:
fd, batchmanScriptFileName = tempfile.mkstemp()
batchmanScriptFile = os.fdopen(fd, 'w')
except IOError:
print 'Error: Couldn\'t create temp file.'
sys.exit(0)
# Create script batchman
nu = 'net = "' + nnFileName + '"\n'
nu += 'loadNet(net)\n'
nu += 'trainPat = "' + trainFileName + '"\n'
nu += 'testPat = "' + testFileName + '"\n'
nu += 'loadPattern(trainPat)\n'
nu += 'loadPattern(testPat)\n'
nu += 'setInitFunc("Randomize_Weights", 1.0, -1.0)\n'
nu += 'setLearnFunc("' + algorithm + '"' +\
string.join(["," + p for p in learningParams], sep="") + ')\n'
nu += 'setShuffle(TRUE)\n'
nu += 'mejor = 100000000 #Valor grande para representar +infinito\n'
nu += 'for i:=1 to ' + str(nRepeat) + ' do\n'
if verbose:
nu += ' print(" --- ", i)\n'
nu += ' initNet()\n'
nu += ' while CYCLES < ' + str(cycles) + ' and MSE > ' + str(MSE) + ' and SIGNAL == 0 do\n'
nu += ' setPattern(trainPat)\n'
nu += ' for k:= 1 to ' + str(step) + ' do\n'
nu += ' trainNet()\n'
nu += ' endfor\n'
nu += ' setPattern(testPat)\n'
nu += ' testNet()\n'
if verbose:
nu += ' print("MSE =", MSE, "ciclos:", CYCLES)\n'
nu += ' if MSE < mejor then\n'
nu += ' mejor = MSE\n'
nu += ' saveNet(net)\n'
if verbose:
nu += ' print(CYCLES, ": ", MSE, "(mejor MSE)")\n'
nu += ' endif\n'
nu += ' endwhile\n'
nu += 'endfor\n'
if verbose:
nu += 'print("Mejor MSE(", net, ")= ", mejor)\n'
nu += 'if SIGNAL !=0 then print("Stopped due to signal reception: signal " + SIGNAL)\nendif'
batchmanScriptFile.write(nu)
batchmanScriptFile.close()
# Train the NN
if verbose:
orden = pathSNNS + "batchman -f " + batchmanScriptFileName
else:
orden = pathSNNS + "batchman -q -f " + batchmanScriptFileName
os.system(orden)
# Remove tmp file
os.remove(batchmanScriptFileName)
#print "BATCHMAN:", batchmanScriptFileName
def guessTrainParameters(nnFileName, trainFileName, testFileName, MSE, cycles, nRepeat, step, algorithm, learningParams):
"""
By a series of tests choose the number of cycles to train a neural
network.
"""
# Open tmp file for the script
try:
fd, batchmanScriptFileName = tempfile.mkstemp()
batchmanScriptFile = os.fdopen(fd, 'w')
except IOError:
print 'Error: Couldn\'t create temp file.'
sys.exit(0)
# Create script batchman
nu = 'net = "' + nnFileName + '"\n'
nu += 'loadNet(net)\n'
nu += 'trainPat = "' + trainFileName + '"\n'
nu += 'testPat = "' + testFileName + '"\n'
nu += 'loadPattern(trainPat)\n'
nu += 'loadPattern(testPat)\n'
nu += 'setInitFunc("Randomize_Weights", 1.0, -1.0)\n'
nu += 'setLearnFunc("' + algorithm + '"' +\
string.join(["," + p for p in learningParams], sep="") + ')\n'
nu += 'setShuffle(TRUE)\n'
nu += 'for i:=1 to ' + str(nRepeat) + ' do\n'
nu += ' mejor = 100000000 #Valor grande para representar +infinito\n'
nu += ' mejorCycles = 0\n'
nu += ' print(" --- ", i)\n'
nu += ' initNet()\n'
nu += ' while CYCLES < ' + str(cycles) + ' and MSE > ' + str(MSE) + ' and SIGNAL == 0 do\n'
nu += ' setPattern(trainPat)\n'
nu += ' for k:= 1 to ' + str(step) + ' do\n'
nu += ' trainNet()\n'
nu += ' endfor\n'
nu += ' setPattern(testPat)\n'
nu += ' testNet()\n'
nu += ' print("MSE =", MSE, "ciclos:", CYCLES)\n'
nu += ' if MSE < mejor then\n'
nu += ' mejor = MSE\n'
nu += ' mejorCycles = CYCLES\n'
nu += ' print(CYCLES, ": ", MSE, "(mejor MSE)")\n'
nu += ' endif\n'
nu += ' endwhile\n'
nu += ' print("SetCycles=", mejorCycles)\n'
nu += 'endfor\n'
nu += 'print("Mejor MSE(", net, ")= ", mejor)\n'
nu += 'if SIGNAL !=0 then print("Stopped due to signal reception: signal " + SIGNAL)\nendif'
batchmanScriptFile.write(nu)
batchmanScriptFile.close()
# Train the NN
if verbose:
orden = pathSNNS + "batchman -f " + batchmanScriptFileName
else:
orden = pathSNNS + "batchman -q -f " + batchmanScriptFileName
inout = os.popen2(orden)
cycles = []
l = inout[1].readline()
while l:
if l[0:10] == "SetCycles=":
cycles.append( int(l[10:]) )
#print l,
l = inout[1].readline()
if verbose:
print "cycles=", cycles
# Remove tmp file
os.remove(batchmanScriptFileName)
return int( statc.mean(cycles) )
def extractWeights(nnFN):
"""
Extract weights and bias from a neural network trained file
"""
f = open(nnFN, 'r')
aLine = f.readline()
if aLine.find("SNNS network") == -1:
raise Exception(nnFN + " is not an SNNS network")
# Extract bias
while not aLine.find("unit definition section") != -1:
aLine = f.readline()
dre = re.compile("\d")
while not dre.search(aLine):
aLine = f.readline()
bias = []
while dre.search(aLine):
tokens = re.split('\|', aLine)
b = float( tokens[4] )
bias.append(b)
aLine = f.readline()
# Extract weights
while not aLine.find("connection definition section") != -1:
aLine = f.readline()
dre = re.compile("\d")
while not dre.search(aLine):
aLine = f.readline()
weightVectors = []
while dre.search(aLine):
cols = re.split('\|', aLine)
tokens = cols[2].strip()
while tokens[-1] == ',':
tokens += f.readline().strip()
tokens = re.split('^[^:]*:|,[^:]*:', tokens)
weights = [float(w) for w in tokens if w != '']
weightVectors.append(weights)
aLine = f.readline()
f.close()
return (weightVectors, bias)
# --------------Learner classes (orange integration)------------------
def SNNSLearner(examples=None, **kwds):
learner = SNNSLearner_Class(*(), **kwds)
if examples:
return learner(examples)
else:
return learner
class SNNSLearner_Class:
"""
Artificial Neural Network(ANN) learner class that uses SNNS to
create and train the ANN.
"""
def __init__(self, name='SNNS neural network', hiddenLayers=None,
MSE=0, cycles=200, auto=False, nRepeat=3, step=50,
percentTrain=0.90,
algorithm=None, learningParams=None):
"""
Initializes a new neural network learner, defining the
structure of the networks and training parameters.
By now the structure is a multilayered perceptron
name = learner name
hiddenLayers = a list with the number of nodes of each hidden layer
MSE = stop training if mse is smaller than this value
cycles = stop training after this number of cycles
auto = Whether trainNN (False) or trainAutoNN (True) is used
nRepeat = if auto, the number of times the net is trained
step = if auto, the number of cycles between one test and the next one
percentTrain = if auto, the proportion of patterns used for training
algorithm = name of training algorithm as identified in SNNS
learningParams = list of strings with the parameters as in SNNS
"""
self.name = name
self.hiddenLayers = hiddenLayers
self.MSE = MSE
self.cycles = cycles
self.auto = auto
self.nRepeat = nRepeat
self.step = step
self.percentTrain = percentTrain
if algorithm:
self.algorithm = algorithm
else:
self.algorithm = "Std_Backpropagation"
if learningParams:
self.learningParams = learningParams
else:
self.learningParams = []
def __call__(self, t, weight=None):
patFileName, transform = savePatFile(t)
# If input has no feature with values return a Majority classifier
if transform.nnAntecedents < 1:
return orange.MajorityLearner(t)
if not self.hiddenLayers:
self.hiddenLayers = [ (transform.nnAntecedents + transform.nnTargets)/2 ]
nnFN = createNN(transform.nnAntecedents,
self.hiddenLayers,
transform.nnTargets)
if self.auto:
selection = orange.MakeRandomIndices2(t, self.percentTrain)
trnPatFileName, transform = savePatFile( t.select(selection, 0) )
testPatFileName, ignore = savePatFile( t.select(selection, 1) )
cycles = guessTrainParameters(nnFN, trnPatFileName, testPatFileName,
self.MSE, self.cycles, self.nRepeat, self.step,
self.algorithm, self.learningParams)
else:
cycles = self.cycles
trainNN(nnFN, patFileName, self.MSE, cycles,
self.algorithm, self.learningParams)
# Extract info from nnFile
weights, bias = extractWeights(nnFN)
nn = {'in': transform.nnAntecedents,
'hidden': self.hiddenLayers,
'out': transform.nnTargets,
'weights': weights,
'bias': bias}
os.remove(patFileName)
os.remove(nnFN)
if self.auto:
os.remove(trnPatFileName)
os.remove(testPatFileName)
# This self.domain seems needed by orgnFSS.FilteredClassifier
# orange bug or misfeature?? XXX report
# domain = t.domain
# Note: -this is used to know which atts are being used by the learner
# -Could be used to check that every example agrees on domain (maybe
# not efficient)
return SNNSClassifier(nn=nn, transform=transform, domain = t.domain)
class SNNSClassifier:
def __init__(self, transform, name=None, **kwds):
self.__dict__ = kwds
if not name:
self.name="snns"
self.transform = transform
def __call__(self, exampleOfAnySize, resultType = orange.GetValue):
# Need to perform feature filtering because
# IMHO this should be the duty of orngFSS.FilteredClassifier.__call__
# to achieve transparency of FS in learning methods XXX report
# Workaround to avoid the problem that appears in examples
# with less features:
workaround_domain = orange.Domain([a.name for a in self.transform.domain],
exampleOfAnySize.domain)
example = orange.Example(workaround_domain, exampleOfAnySize)
exTr = self.transform.apply(example)
output = self.simulateNN(exTr[:self.transform.nnAntecedents])
v = self.transform.applyInverseToTarget(output)
if resultType == orange.GetValue:
return v
elif resultType == orange.GetProbabilities:
return output
else:
return (v,output)
def __str__(self):
t = '<orangeSNNS:\n'
t += str(self.transform) + '\n'
t += str(self.nn)
t += '>'
return t
def simulateNN(self, inputs):
"""
Evaluates feed-fordward neural network with Logistic
activation function
"""
layersSize = self.nn['hidden'] + [ self.nn['out'] ]
wRow = 0
bPos = len(inputs)
act = inputs
for n in layersSize:
act = self.layer(act, self.nn['weights'][wRow:wRow+n],
self.nn['bias'][bPos:bPos+n])
wRow += n
bPos += n
print act,
return act
def layer(self, inputs, weights, bias):
"""
Evaluates a layer (used by simulateNN)
"""
n = len(weights)
out = [None] * n
for i in range(n):
sum = 0
for j in range( len(inputs) ):
sum += inputs[j] * weights[i][j]
sum += bias[i]
out[i] = 1 / (1 + math.exp(-sum) )
return out
# --- main - test ----------------------------------------------
# (Unsorted tests used for development)
if __name__ == "__main__":
import orngTest, orngStat
import gc, os, re, fileinput
f = sys.argv[1]
data = orange.ExampleTable(f)
snns0 = SNNSLearner(name="snns", hiddenLayers=[4,2], cycles=100)
regresor=snns0(data)
sse = 0.0
print "Results (test)"
for e in data:
print e, "->", regresor(e)
## snns1 = SNNSLearner(name="snns0.2", auto=True, cycles=100, step=10, learningParams=["0.2"])
## snns2 = SNNSLearner(cycles=2000, learningParams=["0.2"])
## snns3 = SNNSLearner(cycles=2500, learningParams=["0.2"])
## learners = [snns1]#, snns1,snns2,snns3]
## # compute accuracies on data
## results = orngTest.crossValidation(learners, data, folds=5)
## # Print results
## if data.domain.classVar.varType == orange.VarTypes.Continuous:
## print "\nLearner MSE SE #Atts SE"
## for i in range(len(learners)):
## mse, se = complete.MSE_se(results, reportSE=1)[i]
## print "%-15s %6.2f %5.3f" % (learners[i].name, mse, se)
## else:
## print "\nLearner Accuracy SE #Atts SE"
## for i in range(len(learners)):
## ca, se = orngStat.CA_se(results)[i]
## ca, se = ca * 100, se * 100
## print "%-15s %6.2f %5.3f" % (learners[i].name, ca, se)