Wat zit er onder jouw ctrl + v ?

AriChibi · 8 april 2017

DeletedUser21356 · 17 april 2017

12 punten: *Naam land*
10 punten: *Naam land*
8 punten: *Naam land*
7 punten: *Naam land*
6 punten: *Naam land*
5 punten: *Naam land*

DeletedUser20125 · 3 mei 2017

AriChibi · 7 mei 2017

DeletedUser18414 · 1 juni 2017

26/05/17: Was dit de 'MA'?

DeletedUser27764 · 3 juni 2017

[report]25678f3c8ba666705e6ea20fd9d4c197[/report]

DeletedUser28076 · 22 juni 2017

megashare film

AriChibi · 2 juli 2017

package nl.ru.ai.vroon.mdp;

import java.util.Random;

/**
* @author TES Installatie
*
* The Algorithm class which performs both of the algorithms of this
* exercise.
*/

public class Algorithms {

private final double gamma = 0.1;
private double eta = 0.8;
private final double alpha = 0.9;
private final double side;
private final double fwd;
private final double bck;
private MarkovDecisionProblem mdp;
private VField vf;
private VField qf;

/**
* The algorithm class will keep track of two value fields for the value
* iteration and Q-learning algorithms respectively.
*
* @param mdp
*/
public Algorithms(MarkovDecisionProblem mdp) {
this.mdp = mdp;
this.vf = new VField(mdp.getWidth(), mdp.getHeight());
this.qf = new VField(mdp.getWidth(), mdp.getHeight());
vf.initialize();
qf.initialize();

// The transition probabilities.
side = mdp.getErrorProbability();
fwd = mdp.getProbability();
bck = mdp.getBackProbability();
}

/**
* The Value Iterator algorithm. This algorithm will fill up its respective
* (V)Field with its found values and policies.
*/
public void ValueIterator() {
double[] values = new double[4]; // 0=N,1=E,2=S,3=W
Action[] actions = { Action.UP, Action.RIGHT, Action.DOWN, Action.LEFT };
int x = mdp.getStateXPosition();
int y = mdp.getStateYPostion();
double rN = 0.0, rS = 0.0, rW = 0.0, rE = 0.0;
double vNorth = 0.0, vEast = 0.0, vSouth = 0.0, vWest = 0.0;

// V(NORTH) and reward of the northern state
if (!mdp.getField(x, y + 1).equals(Field.OUTOFBOUNDS) && !mdp.getField(x, y + 1).equals(Field.OBSTACLE)) {
vNorth = qf.getValue(x, y + 1);
rN = mdp.getNextReward(x, y + 1);
} else
values[0] = Double.NEGATIVE_INFINITY;

// V(SOUTH) and reward of the southern state
if (!mdp.getField(x, y - 1).equals(Field.OUTOFBOUNDS) && !mdp.getField(x, y - 1).equals(Field.OBSTACLE)) {
vSouth = qf.getValue(x, y - 1);
rS = mdp.getNextReward(x, y - 1);
} else
values[2] = Double.NEGATIVE_INFINITY;

// V(EAST) and reward of the eastern state
if (!mdp.getField(x + 1, y).equals(Field.OUTOFBOUNDS) && !mdp.getField(x + 1, y).equals(Field.OBSTACLE)) {
vEast = qf.getValue(x + 1, y);
rE = mdp.getNextReward(x + 1, y);
} else
values[1] = Double.NEGATIVE_INFINITY;

// V(WEST) and reward of the western state
if (!mdp.getField(x - 1, y).equals(Field.OUTOFBOUNDS) && !mdp.getField(x - 1, y).equals(Field.OBSTACLE)) {
vWest = qf.getValue(x - 1, y);
rW = mdp.getNextReward(x - 1, y);
} else
values[3] = Double.NEGATIVE_INFINITY;

// Calculating the values for the different actions
double[] temp = new double[4];
temp[0] = (fwd * (rN + gamma * vNorth) + side * (rE + gamma * vEast) + bck * (rS + gamma * vSouth)
+ side * (rW + gamma * vWest));
temp[2] = (fwd * (rS + gamma * vSouth) + side * (rE + gamma * vEast) + bck * (rN + gamma * vNorth)
+ side * (rW + gamma * vWest));
temp[1] = (fwd * (rE + gamma * vEast) + side * (rN + gamma * vNorth) + bck * (rW + gamma * vWest)
+ side * (rS + gamma * vSouth));
temp[3] = (fwd * (rW + gamma * vWest) + side * (rN + gamma * vNorth) + bck * (rE + gamma * vEast)
+ side * (rS + gamma * vSouth));

// Filtering out the impossible actions i.e. if the northern state is an
// obstacle its value is filtered out of the arrays for the maximizing
// part
for (int v = 0; v < 4; v++) {
if (values[v] != Double.NEGATIVE_INFINITY) {
values[v] = temp[v];
}
}

Action action = Action.UP;

// Selecting the highest value and the corresponding action
double maxi = Double.NEGATIVE_INFINITY;
for (int i = 0; i < values.length; i++) {
if (values > maxi) {
maxi = values;
action = actions;
}
}
//System.out.println("Max: " + maxi + " Action:" + action.toString());

// Setting the newly found V value and policy
vf.setValue(x, y, maxi);
vf.setAction(x, y, action);
}

/**
* The Q-Learning algorithm which fills up its respective (Q)Field with its
* found values and policies.
*/
public void qLearning() {
double[] values = new double[4];
int x = mdp.getStateXPosition();
int y = mdp.getStateYPostion();
double rN = 0.0, rS = 0.0, rW = 0.0, rE = 0.0;
double qNorth = 0.0, qEast = 0.0, qSouth = 0.0, qWest = 0.0;

// Q(NORTH,UP) and reward of the northern state
if (!mdp.getField(x, y + 1).equals(Field.OUTOFBOUNDS) && !mdp.getField(x, y + 1).equals(Field.OBSTACLE)) {
qNorth = qf.getValue(x, y + 1);
rN = mdp.getNextReward(x, y + 1);
} else
values[0] = Double.NEGATIVE_INFINITY;

// Q(SOUTH,DOWN) and reward of the southern state
if (!mdp.getField(x, y - 1).equals(Field.OUTOFBOUNDS) && !mdp.getField(x, y - 1).equals(Field.OBSTACLE)) {
qSouth = qf.getValue(x, y - 1);
rS = mdp.getNextReward(x, y - 1);
} else
values[2] = Double.NEGATIVE_INFINITY;

// Q(EAST,RIGHT) and reward of the eastern state
if (!mdp.getField(x + 1, y).equals(Field.OUTOFBOUNDS) && !mdp.getField(x + 1, y).equals(Field.OBSTACLE)) {
qEast = qf.getValue(x + 1, y);
rE = mdp.getNextReward(x + 1, y);
} else
values[1] = Double.NEGATIVE_INFINITY;

// Q(WEST,LEFT) and reward of the western state
if (!mdp.getField(x - 1, y).equals(Field.OUTOFBOUNDS) && !mdp.getField(x - 1, y).equals(Field.OBSTACLE)) {
qWest = qf.getValue(x - 1, y);
rW = mdp.getNextReward(x - 1, y);
} else
values[3] = Double.NEGATIVE_INFINITY;

Action[] actions = { Action.UP, Action.RIGHT, Action.DOWN, Action.LEFT };
// 0=N,1=E,2=S,3=W

// Here the Q[S',A'] values are calculated
double[] temp = new double[4];
temp[0] = qCalc(mdp, Action.UP, vf, qf, qNorth, qSouth, qWest, qEast, rN);
temp[2] = qCalc(mdp, Action.DOWN, vf, qf, qNorth, qSouth, qWest, qEast, rS);
temp[1] = qCalc(mdp, Action.RIGHT, vf, qf, qNorth, qSouth, qWest, qEast, rE);
temp[3] = qCalc(mdp, Action.LEFT, vf, qf, qNorth, qSouth, qWest, qEast, rW);

Action action = Action.UP;
Random rand = new Random();
double prob = rand.nextDouble();
double maxi = Double.NEGATIVE_INFINITY;

// The impossible states are filtered out i.e. obstacles
for (int v = 0; v < 4; v++) {
if (values[v] != Double.NEGATIVE_INFINITY) {
values[v] = temp[v];
//System.out.println(v + " " + values[v]);
}
}

// E-Greedy
// Decide randomly whether to choose the optimal policy or to perform a
// random action
if (prob < eta) {
// Choose a random action
System.out.println(prob + "<" + eta);
maxi = temp[rand.nextInt(3)];
} else {
// Select the highest Q[S',A']
for (int i = 0; i < values.length; i++) {
if (values > maxi) {
maxi = values;
action = actions;
}
}
}
double r = mdp.getReward();
//System.out.println("Max: " + maxi + " Action:" + action.toString());

// Q[S,A] <- Q[S,A] + alpha(r + gamma * max(Q[S',A']) - Q[S,A])
double q = qf.getValue(x, y) + alpha * (r + gamma * maxi - qf.getValue(x, y));

// Set the new values, set the new policy and lessen E as to lessen the
// randomness.
qf.setValue(x, y, q);
qf.setAction(x, y, action);
eta -= 0.05;

}

/**
* A separate function to calculate the Q[S',A'] value such that the
* original function is not cluttered by all of these repeated lines.
*
* @param mdp
* @param a
* @param vf
* @param qf
* @param qNorth
* @param qSouth
* @param qWest
* @param qEast
* @return the corresponding Q[S',A'] values
*/
private double qCalc(MarkovDecisionProblem mdp, Action a, VField vf, VField qf, double qNorth, double qSouth,
double qWest, double qEast, double r) {
if (a.equals(Action.UP))
return r + gamma * (fwd * qNorth + side * qEast + bck * qSouth + side * qWest);
if (a.equals(Action.DOWN))
return r + gamma * (fwd * qSouth + side * qEast + bck * qNorth + side * qWest);
if (a.equals(Action.RIGHT))
return r + gamma * (fwd * qEast + side * qSouth + side * qNorth + bck * qWest);
if (a.equals(Action.LEFT))
return r + gamma * (fwd * qWest + bck * qEast + side * qNorth + side * qSouth);

return 0.0;
}

/**
* @return the current Value Iteration Field
*/
public VField getVfield() {
return vf;
}

/**
* @return the current Q-Learning Field
*/
public VField getQfield() {
return qf;
}

/**
* @param xPos
* @param yPos
* @return the value at the given location in the VField
*/
public double getValue(int xPos, int yPos) {
return vf.getValue(xPos, yPos);
}

}

DeletedUser25517 · 8 juli 2017

denk dat ik jullie de gayporn wel zal besparen.

AriChibi · 12 juli 2017

DeletedUser28406 · 18 juli 2017

tenzij je schaars geklede of zelf niet geklede dames wilt zijn, denk ik niet dat ik zal deelnemen aan dit spel.

AriChibi · 24 juli 2017

DeletedUser22289 · 30 juli 2017

AriChibi · 31 juli 2017

AriChibi · 4 augustus 2017

DeletedUser28476 · 16 augustus 2017

http://brickny.com/takumi/lunch/

AriChibi · 16 augustus 2017

https://heathenmead.nl

DeletedUser28476 · 16 augustus 2017

Zeer interessant, kan niet zeggen dat ik ooit mead heb gedronken.

AriChibi · 17 augustus 2017

Het is uh... honing

DeletedUser28076 · 17 augustus 2017

20:54

Wat zit er onder jouw ctrl + v ?

Hydra

DeletedUser21356

Gast

DeletedUser20125

Gast

Hydra

DeletedUser18414

Gast

DeletedUser27764

Gast

DeletedUser28076

Gast

Hydra

DeletedUser25517

Gast

Hydra

DeletedUser28406

Gast

Hydra

DeletedUser22289

Gast

Hydra

Hydra

DeletedUser28476

Gast

Hydra

DeletedUser28476

Gast

Hydra

DeletedUser28076

Gast