• Gegroet, sterveling! Nieuw op het forum?
    Als je wilt deelnemen aan het forum heb je een forumaccount nodig. Registeer er snel een! Heb je al een forumaccount? Dan kun je hier inloggen.

Wat zit er onder jouw ctrl + v ?

DeletedUser21356

Gast
12 punten: *Naam land*
10 punten: *Naam land*
8 punten: *Naam land*
7 punten: *Naam land*
6 punten: *Naam land*
5 punten: *Naam land*
 

AriChibi

Hydra
package nl.ru.ai.vroon.mdp;

import java.util.Random;

/**
* @author TES Installatie
*
* The Algorithm class which performs both of the algorithms of this
* exercise.
*/

public class Algorithms {

private final double gamma = 0.1;
private double eta = 0.8;
private final double alpha = 0.9;
private final double side;
private final double fwd;
private final double bck;
private MarkovDecisionProblem mdp;
private VField vf;
private VField qf;

/**
* The algorithm class will keep track of two value fields for the value
* iteration and Q-learning algorithms respectively.
*
* @param mdp
*/
public Algorithms(MarkovDecisionProblem mdp) {
this.mdp = mdp;
this.vf = new VField(mdp.getWidth(), mdp.getHeight());
this.qf = new VField(mdp.getWidth(), mdp.getHeight());
vf.initialize();
qf.initialize();

// The transition probabilities.
side = mdp.getErrorProbability();
fwd = mdp.getProbability();
bck = mdp.getBackProbability();
}

/**
* The Value Iterator algorithm. This algorithm will fill up its respective
* (V)Field with its found values and policies.
*/
public void ValueIterator() {
double[] values = new double[4]; // 0=N,1=E,2=S,3=W
Action[] actions = { Action.UP, Action.RIGHT, Action.DOWN, Action.LEFT };
int x = mdp.getStateXPosition();
int y = mdp.getStateYPostion();
double rN = 0.0, rS = 0.0, rW = 0.0, rE = 0.0;
double vNorth = 0.0, vEast = 0.0, vSouth = 0.0, vWest = 0.0;

// V(NORTH) and reward of the northern state
if (!mdp.getField(x, y + 1).equals(Field.OUTOFBOUNDS) && !mdp.getField(x, y + 1).equals(Field.OBSTACLE)) {
vNorth = qf.getValue(x, y + 1);
rN = mdp.getNextReward(x, y + 1);
} else
values[0] = Double.NEGATIVE_INFINITY;

// V(SOUTH) and reward of the southern state
if (!mdp.getField(x, y - 1).equals(Field.OUTOFBOUNDS) && !mdp.getField(x, y - 1).equals(Field.OBSTACLE)) {
vSouth = qf.getValue(x, y - 1);
rS = mdp.getNextReward(x, y - 1);
} else
values[2] = Double.NEGATIVE_INFINITY;

// V(EAST) and reward of the eastern state
if (!mdp.getField(x + 1, y).equals(Field.OUTOFBOUNDS) && !mdp.getField(x + 1, y).equals(Field.OBSTACLE)) {
vEast = qf.getValue(x + 1, y);
rE = mdp.getNextReward(x + 1, y);
} else
values[1] = Double.NEGATIVE_INFINITY;

// V(WEST) and reward of the western state
if (!mdp.getField(x - 1, y).equals(Field.OUTOFBOUNDS) && !mdp.getField(x - 1, y).equals(Field.OBSTACLE)) {
vWest = qf.getValue(x - 1, y);
rW = mdp.getNextReward(x - 1, y);
} else
values[3] = Double.NEGATIVE_INFINITY;

// Calculating the values for the different actions
double[] temp = new double[4];
temp[0] = (fwd * (rN + gamma * vNorth) + side * (rE + gamma * vEast) + bck * (rS + gamma * vSouth)
+ side * (rW + gamma * vWest));
temp[2] = (fwd * (rS + gamma * vSouth) + side * (rE + gamma * vEast) + bck * (rN + gamma * vNorth)
+ side * (rW + gamma * vWest));
temp[1] = (fwd * (rE + gamma * vEast) + side * (rN + gamma * vNorth) + bck * (rW + gamma * vWest)
+ side * (rS + gamma * vSouth));
temp[3] = (fwd * (rW + gamma * vWest) + side * (rN + gamma * vNorth) + bck * (rE + gamma * vEast)
+ side * (rS + gamma * vSouth));

// Filtering out the impossible actions i.e. if the northern state is an
// obstacle its value is filtered out of the arrays for the maximizing
// part
for (int v = 0; v < 4; v++) {
if (values[v] != Double.NEGATIVE_INFINITY) {
values[v] = temp[v];
}
}

Action action = Action.UP;

// Selecting the highest value and the corresponding action
double maxi = Double.NEGATIVE_INFINITY;
for (int i = 0; i < values.length; i++) {
if (values > maxi) {
maxi = values;
action = actions;
}
}
//System.out.println("Max: " + maxi + " Action:" + action.toString());

// Setting the newly found V value and policy
vf.setValue(x, y, maxi);
vf.setAction(x, y, action);
}

/**
* The Q-Learning algorithm which fills up its respective (Q)Field with its
* found values and policies.
*/
public void qLearning() {
double[] values = new double[4];
int x = mdp.getStateXPosition();
int y = mdp.getStateYPostion();
double rN = 0.0, rS = 0.0, rW = 0.0, rE = 0.0;
double qNorth = 0.0, qEast = 0.0, qSouth = 0.0, qWest = 0.0;

// Q(NORTH,UP) and reward of the northern state
if (!mdp.getField(x, y + 1).equals(Field.OUTOFBOUNDS) && !mdp.getField(x, y + 1).equals(Field.OBSTACLE)) {
qNorth = qf.getValue(x, y + 1);
rN = mdp.getNextReward(x, y + 1);
} else
values[0] = Double.NEGATIVE_INFINITY;

// Q(SOUTH,DOWN) and reward of the southern state
if (!mdp.getField(x, y - 1).equals(Field.OUTOFBOUNDS) && !mdp.getField(x, y - 1).equals(Field.OBSTACLE)) {
qSouth = qf.getValue(x, y - 1);
rS = mdp.getNextReward(x, y - 1);
} else
values[2] = Double.NEGATIVE_INFINITY;

// Q(EAST,RIGHT) and reward of the eastern state
if (!mdp.getField(x + 1, y).equals(Field.OUTOFBOUNDS) && !mdp.getField(x + 1, y).equals(Field.OBSTACLE)) {
qEast = qf.getValue(x + 1, y);
rE = mdp.getNextReward(x + 1, y);
} else
values[1] = Double.NEGATIVE_INFINITY;

// Q(WEST,LEFT) and reward of the western state
if (!mdp.getField(x - 1, y).equals(Field.OUTOFBOUNDS) && !mdp.getField(x - 1, y).equals(Field.OBSTACLE)) {
qWest = qf.getValue(x - 1, y);
rW = mdp.getNextReward(x - 1, y);
} else
values[3] = Double.NEGATIVE_INFINITY;


Action[] actions = { Action.UP, Action.RIGHT, Action.DOWN, Action.LEFT };
// 0=N,1=E,2=S,3=W

// Here the Q[S',A'] values are calculated
double[] temp = new double[4];
temp[0] = qCalc(mdp, Action.UP, vf, qf, qNorth, qSouth, qWest, qEast, rN);
temp[2] = qCalc(mdp, Action.DOWN, vf, qf, qNorth, qSouth, qWest, qEast, rS);
temp[1] = qCalc(mdp, Action.RIGHT, vf, qf, qNorth, qSouth, qWest, qEast, rE);
temp[3] = qCalc(mdp, Action.LEFT, vf, qf, qNorth, qSouth, qWest, qEast, rW);

Action action = Action.UP;
Random rand = new Random();
double prob = rand.nextDouble();
double maxi = Double.NEGATIVE_INFINITY;

// The impossible states are filtered out i.e. obstacles
for (int v = 0; v < 4; v++) {
if (values[v] != Double.NEGATIVE_INFINITY) {
values[v] = temp[v];
//System.out.println(v + " " + values[v]);
}
}

// E-Greedy
// Decide randomly whether to choose the optimal policy or to perform a
// random action
if (prob < eta) {
// Choose a random action
System.out.println(prob + "<" + eta);
maxi = temp[rand.nextInt(3)];
} else {
// Select the highest Q[S',A']
for (int i = 0; i < values.length; i++) {
if (values > maxi) {
maxi = values;
action = actions;
}
}
}
double r = mdp.getReward();
//System.out.println("Max: " + maxi + " Action:" + action.toString());

// Q[S,A] <- Q[S,A] + alpha(r + gamma * max(Q[S',A']) - Q[S,A])
double q = qf.getValue(x, y) + alpha * (r + gamma * maxi - qf.getValue(x, y));

// Set the new values, set the new policy and lessen E as to lessen the
// randomness.
qf.setValue(x, y, q);
qf.setAction(x, y, action);
eta -= 0.05;

}

/**
* A separate function to calculate the Q[S',A'] value such that the
* original function is not cluttered by all of these repeated lines.
*
* @param mdp
* @param a
* @param vf
* @param qf
* @param qNorth
* @param qSouth
* @param qWest
* @param qEast
* @return the corresponding Q[S',A'] values
*/
private double qCalc(MarkovDecisionProblem mdp, Action a, VField vf, VField qf, double qNorth, double qSouth,
double qWest, double qEast, double r) {
if (a.equals(Action.UP))
return r + gamma * (fwd * qNorth + side * qEast + bck * qSouth + side * qWest);
if (a.equals(Action.DOWN))
return r + gamma * (fwd * qSouth + side * qEast + bck * qNorth + side * qWest);
if (a.equals(Action.RIGHT))
return r + gamma * (fwd * qEast + side * qSouth + side * qNorth + bck * qWest);
if (a.equals(Action.LEFT))
return r + gamma * (fwd * qWest + bck * qEast + side * qNorth + side * qSouth);

return 0.0;
}

/**
* @return the current Value Iteration Field
*/
public VField getVfield() {
return vf;
}

/**
* @return the current Q-Learning Field
*/
public VField getQfield() {
return qf;
}

/**
* @param xPos
* @param yPos
* @return the value at the given location in the VField
*/
public double getValue(int xPos, int yPos) {
return vf.getValue(xPos, yPos);
}

}
 

DeletedUser28406

Gast
tenzij je schaars geklede of zelf niet geklede dames wilt zijn, denk ik niet dat ik zal deelnemen aan dit spel.
 
Bovenaan