Symbolic Regression

From jecoliwiki
Jump to: navigation, search

Contents

Symbolic Regression

Problem definition

Given an array of n points, find a n-order finite difference function that approximates the given set of numbers. This problem is solved using Genetic Programming and Linear Genetic Programming.

Resolution steps

Linear Genetic Programming

To solve this particular problem is used a linear representation where each gene represents an instruction. Linear Genetic Programming requires the use of a CPU that is defined in the class CPU. This class represents a generic processing unit with k output registers and n input/output registers. In box bellow is shown, how to create an evaluation function for this specific problem (see How To: creating evaluation functions). This function provides the decoding of the genome into a solution to the problem and also the process of assigning this solution an appropriate fitness value (numerical).

/**
 * The Class ArraySymbolicRegressionEvaluationFunction.
 */
public class ArraySymbolicRegressionEvaluationFunction extends EvaluationFunction<ILinearRepresentation<IInstruction>>{
	
	/** The function values array. */
	protected double[] functionValuesArray;
	
	/** The cpu. */
	protected CPU cpu;
	
	/** The order of the finite difference function. */
	protected int k;
	
	/**
	 * Instantiates a new array symbolic regression evaluation function.
	 * 
	 * @param k the the order of the finite difference function
	 * @param numberOfReadWriteRegisters the number of read write registers
	 * @param functionValuesArray the function values array
	 */
	public ArraySymbolicRegressionEvaluationFunction(int k,int numberOfReadWriteRegisters,double[] functionValuesArray){
		super(false);
		cpu = new CPU(numberOfReadWriteRegisters,k,1);
		this.functionValuesArray = functionValuesArray;
		this.k = k;
	}

	@Override
	public List<Double> evaluate(ILinearRepresentation<IInstruction> solutionRepresentation) throws Exception {
		List<Double> objectiveList = new ArrayList<Double>();
		double leastSquareValue = 0;
		int totalNumberOfValues = functionValuesArray.length;
			
		cpu.clearAllRegisters();
		
		for(int i = k; i < totalNumberOfValues ;i++){
			for(int j = 0; j < k;j++)
				setCPUInputRegisterInput(i,j);
			leastSquareValue += calculateLeastSquareFunctionPoint(i,solutionRepresentation);
		}
		
		objectiveList.add(leastSquareValue);
		
		return objectiveList;
	}
	
	
	/**
	 * Calculate least square function point.
	 * 
	 * @param functionPointIndex the function point index
	 * @param solutionRepresentation the solution representation
	 * 
	 * @return the double
	 */
	protected double calculateLeastSquareFunctionPoint(int functionPointIndex,ILinearRepresentation<IInstruction> solutionRepresentation){
		double functionValue = functionValuesArray[functionPointIndex];
		List<Double> outputRegisters = cpu.evaluateProgram(solutionRepresentation);
		double outputValue = outputRegisters.get(0);
		if(Double.isNaN(outputValue))
			return Double.MAX_VALUE;
		
		double difference = functionValue - outputValue;
		
		return Math.pow(difference,2);
	}

	/**
	 * Sets the cpu input register input.
	 * 
	 * @param currentFunctionIndex the current function index
	 * @param currentRegisterIndex the current register index
	 */
	protected void setCPUInputRegisterInput(int currentFunctionIndex,int currentRegisterIndex){
		int inputRegisterIndex = currentFunctionIndex - (k - currentRegisterIndex);
		double functionValue = functionValuesArray[inputRegisterIndex];
		cpu.setInputValue(currentRegisterIndex,functionValue);
	}


The next step will be to choose one of the available algorithms. Let us start by illustrating the use of Cellular Genetic Algorithms. The next segment of code addresses the tasks of configuring and running a Cellular Genetic Algorithm (the detailed explanation on the configuration of the several components is addressed in the How To: configuring CAGA).


/**
	 * The main method.
	 * 
	 * @param args the arguments
	 * 
	 * @throws Exception the exception
	 * @throws InvalidConfigurationException the invalid configuration exception
	 */
	public static void main(String[] args) throws Exception, InvalidConfigurationException{
		String baseDirectoryPath = "TestRun";
		String jobId = "LGP";
		int numberOfRuns = 10;
		int k = 1;//Integer.valueOf(args[0]);
		int numberOfReadWriteRegisters = 5;
		double[] functionValuesArray = {0,2,4,6,8,10,12,14,16.3,18.4,20,22,24.5,26,28,30,32,34,36,38,40};
		
		IAlgorithm algorithm = createCellularAutomataGeneticAlgorithm(k, numberOfReadWriteRegisters, functionValuesArray);
		List<IAlgorithmResult> algorithmResultList = new ArrayList<IAlgorithmResult>(numberOfRuns);
 				
 		for(int i = 0; i < numberOfRuns ; i++){
 			IAlgorithmResult result = algorithm.run();
 			algorithmResultList.add(result);
 		}
 		
 		AlgorithmOverallRunsStatistics statistics = new AlgorithmOverallRunsStatistics(algorithm,algorithmResultList);
 		IStatisticWriter statisticWriter = new CSVStatisticsWriter("\t");
 		statisticWriter.writeOverallAllRunStatistics(baseDirectoryPath, jobId,statistics);
	}
	
	
	/**
	 * Creates the cellular automata genetic algorithm.
	 * 
	 * @param k the order of the finite difference function
	 * @param numberOfReadWriteRegisters the number of read write registers
	 * @param functionValuesArray the function values array
	 * 
	 * @return the cellular genetic algorithm
	 * 
	 * @throws Exception the exception
	 * @throws InvalidConfigurationException the invalid configuration exception
	 */
	protected static IAlgorithm createCellularAutomataGeneticAlgorithm(int k,int numberOfReadWriteRegisters, double[] functionValuesArray) throws Exception, InvalidConfigurationException{
		
		CellularGeneticAlgorithmConfiguration configuration =  new CellularGeneticAlgorithmConfiguration();
		
                //configuration of the statistics to show on screen
		StatisticsConfiguration statisticsConfiguration = new StatisticsConfiguration();
		StatisticTypeMask screenStatisticsMaks = statisticsConfiguration.getScreenStatisticsMask();
		screenStatisticsMaks.setMaxFitnessValue(false);
		screenStatisticsMaks.setIterationExecutionTime(false);
		screenStatisticsMaks.setNumberOfFunctionEvaluations(false);
		configuration.setStatisticsConfiguration(statisticsConfiguration);
		
		EvaluationFunction<ILinearRepresentation<IInstruction>>  evaluationFunction = new ArraySymbolicRegressionEvaluationFunction(k,numberOfReadWriteRegisters,functionValuesArray);
		configuration.setEvaluationFunction(evaluationFunction);
				
		ILinearRepresentationFactory<IInstruction> solutionFactory = createSolutionFactory((ArraySymbolicRegressionEvaluationFunction) evaluationFunction);
		configuration.setSolutionFactory(solutionFactory);
		
		configuration.setWidth(100);// The grid width
		configuration.setHeight(100);//The grid height
		configuration.setRadius(1);// The radius of the neighborhood
		

		configuration.setNeighborhoodType(NeighborhoodType.COMPACT); // Select the neighborhood type
		configuration.setCellularAutomataType(CellularAutomataType.SYNCHRONOUS); //Select one of the available update policies

                /*The termination criteria - In this case the number of iterations*/
		ITerminationCriteria terminationCriteria = new IterationTerminationCriteria(1000);
		configuration.setTerminationCriteria(terminationCriteria);
		
                //Set the selection operator to use
		configuration.setSelectionOperator(new TournamentSelection(1,2));
		
                //Creation of the operator container and the operators used in this problem
		IOperatorContainer<IReproductionOperator> reproductionOperatorContainer = new OperatorContainer<IReproductionOperator>();
		
		reproductionOperatorContainer.addOperator(0.25,new MicroInstructionMutation(solutionFactory));
		reproductionOperatorContainer.addOperator(0.25,new UniformCrossover<IInstruction>(solutionFactory));	
		reproductionOperatorContainer.addOperator(0.25,new LinearGenomeShrinkMutation<IInstruction>(solutionFactory));
		reproductionOperatorContainer.addOperator(0.25,new LinearGenomeGrowMutation<IInstruction>(solutionFactory));
		
		configuration.setReproductionOperatorContainer(reproductionOperatorContainer);
	
		return new CellularGeneticAlgorithm(configuration); 
	}


Genetic Programming

To address this particular problem using genetic programming a tree based representation will be used. In box bellow is how to create an evaluation function for this specific problem (see How To: creating evaluation functions). This function provides the decoding of the genome into a solution to the problem and also the process of assigning this solution an appropriate fitness value (numerical).



/**
 * The Class ArraySymbolicRegressionEvaluationGP.
 */
public class ArraySymbolicRegressionEvaluationGP extends EvaluationFunction<ITreeRepresentation<DataTypeEnum,Double>>{
	
	/** The function values array. */
	protected double[] functionValuesArray;
	
	/** The order of the finite difference function */
	protected int k;
	
	/** The environment used in the evaluation of an abstract syntax tree*/
	protected IEnvironment<Double> environment;
	
	/**
	 * Instantiates a new array symbolic regression evaluation gp.
	 * 
	 * @param k the order of the finite difference function 
	 * @param functionValuesArray the function values array
	 */
	public ArraySymbolicRegressionEvaluationGP(int k,double[] functionValuesArray){
		super(false);
		environment = new Environment<Double>();
		this.k = k;
		this.functionValuesArray = functionValuesArray;
	}
	
	
	@Override
	public List<Double> evaluate(ITreeRepresentation<DataTypeEnum, Double> solutionRepresentation) throws Exception {
		List<Double> objectiveList = new ArrayList<Double>();
		double leastSquareValue = 0;
		int totalNumberOfValues = functionValuesArray.length;
		
                //Represents an abstract syntax tree with nodes o type DataTypeEnum that return Double values.
		AbstractSyntaxTree<DataTypeEnum,Double> tree = solutionRepresentation.getTree();
		
		for(int i = k; i < totalNumberOfValues ;i++){
			for(int j = 0; j < k;j++)
				setVaribleNodeInEnvironment(i,j);
			leastSquareValue += calculateLeastSquareFunctionPoint(i,tree);
		}
		
		objectiveList.add(leastSquareValue);
		return objectiveList;
	}


	/**
	 * Calculate least square function point.
	 * 
	 * @param currentFunctionPoint the current function point
	 * @param tree the tree
	 * 
	 * @return the double
	 */
	protected double calculateLeastSquareFunctionPoint(int currentFunctionPoint,AbstractSyntaxTree<DataTypeEnum,Double> tree){
		double functionValue = functionValuesArray[currentFunctionPoint];
		double outputValue = tree.evaluate(environment);
		
		if(Double.isNaN(outputValue))
			return Double.MAX_VALUE;
		
		double difference = functionValue - outputValue;
		return Math.pow(difference,2);
	}

	/**
	 * Sets the varible node in environment.
	 * 
	 * @param currentFunctionPoint the current function point
	 * @param currentNode the current node
	 */
	protected void setVaribleNodeInEnvironment(int currentFunctionPoint, int currentNode) {
		int functionValueIndex = currentFunctionPoint-(currentNode+1);
		double functionValue = functionValuesArray[functionValueIndex];
		String variableId = "X"+currentNode;
		environment.associate(variableId,functionValue);
	}

}



The next step will be to choose one of the available algorithms. Let us start by illustrating the use of Cellular Genetic Algorithms. The next segment of code addresses the tasks of configuring and running a Cellular Genetic Algorithm (the detailed explanation on the configuration of the several components is addressed in the How To: configuring CAGA).

/**
 * The Class ArraySymbolicRegressionGP.
 */
public class ArraySymbolicRegressionGP {
	
	
	/**
	 * The main method.
	 * 
	 * @param args the arguments
	 * 
	 * @throws Exception the exception
	 * @throws InvalidConfigurationException the invalid configuration exception
	 */
	public static void main(String[] args) throws Exception, InvalidConfigurationException{
		String baseDirectoryPath = "TestRun";
		String jobId = "GPSymbolicRegression";
		int numberOfRuns = 10;
		int k = 1;//Integer.valueOf(args[0]);
		double[] functionValuesArray = {0,2,4,6,8,10,12,14,16.3,18.4,20,22,24.5,26,28,30,32,34,36,38,40};

		
		IAlgorithm algorithm = createCellularAutomataGeneticAlgorithm(k,functionValuesArray);
		
		List<IAlgorithmResult> algorithmResultList = new ArrayList<IAlgorithmResult>(numberOfRuns);
 				
 		for(int i = 0; i < numberOfRuns ; i++){
 			IAlgorithmResult result = algorithm.run();
 			algorithmResultList.add(result);
 		}
 		
 		AlgorithmOverallRunsStatistics statistics = new AlgorithmOverallRunsStatistics(algorithm,algorithmResultList);
 		IStatisticWriter statisticWriter = new CSVStatisticsWriter("\t");
 		statisticWriter.writeOverallAllRunStatistics(baseDirectoryPath, jobId,statistics);
	}

	/**
	 * Creates the cellular automata genetic algorithm.
	 * 
	 * @param k the order of the finite difference function 
	 * @param functionValuesArray the function values array
	 * 
	 * @return the i algorithm
	 * 
	 * @throws Exception the exception
	 * @throws InvalidConfigurationException the invalid configuration exception
	 */
	private static IAlgorithm createCellularAutomataGeneticAlgorithm(int k,double[] functionValuesArray) throws Exception, InvalidConfigurationException {
		CellularGeneticAlgorithmConfiguration configuration =  new CellularGeneticAlgorithmConfiguration();
		//Statistics Configuration
		StatisticsConfiguration statisticsConfiguration = new StatisticsConfiguration();
		StatisticTypeMask screenStatisticsMaks = statisticsConfiguration.getScreenStatisticsMask();
		screenStatisticsMaks.setMaxFitnessValue(false);
		screenStatisticsMaks.setIterationExecutionTime(false);
		screenStatisticsMaks.setNumberOfFunctionEvaluations(false);
		configuration.setStatisticsConfiguration(statisticsConfiguration);
		
		EvaluationFunction<ITreeRepresentation<DataTypeEnum, Double>>  evaluationFunction = new ArraySymbolicRegressionEvaluationGP(k,functionValuesArray);
		configuration.setEvaluationFunction(evaluationFunction);
		
	
		ConstantNodeConstraints constantNodeConstraint = new ConstantNodeConstraints();
	    List<String> variableNameList = createVariableNameList(k); //Creates the names of variables allowed to be used in GP
	    VariableNodeConstraints variableNodeConstraints = new VariableNodeConstraints(variableNameList);//Specifies constraints for the nodes 
		//Object whose main purpose is to create a set of nodes to use
		INodeFactory<DataTypeEnum,Double> nodeFactory = new LanguageFactory(constantNodeConstraint,variableNodeConstraints);
		
		ITreeRepresentationFactory solutionFactory = new TreeRepresentationFactory<DataTypeEnum,Double>(DataTypeEnum.DOUBLE,nodeFactory,3);//Expression factory
		configuration.setSolutionFactory(solutionFactory);
		
		configuration.setWidth(20);
		configuration.setHeight(20);
		configuration.setRadius(1);
		
		configuration.setNeighborhoodType(NeighborhoodType.NEWS);
		configuration.setCellularAutomataType(CellularAutomataType.ASYNCHRONOUS_UNIFORM_CHOICE);

		ITerminationCriteria terminationCriteria = new IterationTerminationCriteria(1000);
		configuration.setTerminationCriteria(terminationCriteria);
		
		configuration.setSelectionOperator(new TournamentSelection(1,2));
		
		IOperatorContainer<IReproductionOperator> reproductionOperatorContainer = new OperatorContainer<IReproductionOperator>();
		
		reproductionOperatorContainer.addOperator(1,new SubTreeMutation<DataTypeEnum,Double>(solutionFactory));
		
		
		configuration.setReproductionOperatorContainer(reproductionOperatorContainer);
	
		return new CellularGeneticAlgorithm(configuration); 

	}

	/**
	 * Creates the variable name list.
	 * 
	 * @param k the order of the finite difference function 
	 * 
	 * @return the list< string>
	 */
	protected static List<String> createVariableNameList(int k) {
		List<String> variableNameList = new ArrayList<String>(k);
		
		for(int i = 0; i < k;i++)
			variableNameList.add("X"+i);
		
		return variableNameList;
	}
	
}

Personal tools