1. Gegroet, sterveling! Nieuw op het forum?
    Als je wilt deelnemen aan het forum heb je een forumaccount nodig. Registeer er snel een!
    Heb je al een forumaccount? Dan kun je hier inloggen.
    Ontzet Notitie

Wat zit er onder jouw ctrl + v ?

Discussie in 'Praatcafé' gestart door RabbidRobin, 21. jun 2012.

Deel deze pagina

  1. AriChibi

    AriChibi

    Lid sinds:
    7. mrt 2014
    Berichten:
    1.391
    Leuk Bevonden:
    749
  2. Gekkootje63

    Gekkootje63

    Lid sinds:
    12. jul 2013
    Berichten:
    747
    Leuk Bevonden:
    105
    12 punten: *Naam land*
    10 punten: *Naam land*
    8 punten: *Naam land*
    7 punten: *Naam land*
    6 punten: *Naam land*
    5 punten: *Naam land*
     
  3. danielRV

    danielRV

    Lid sinds:
    25. feb 2017
    Berichten:
    348
    Leuk Bevonden:
    142
    Terrorappel
     
  4. Corinthe

    Corinthe

    Lid sinds:
    25. dec 2012
    Berichten:
    681
    Leuk Bevonden:
    0
  5. AriChibi

    AriChibi

    Lid sinds:
    7. mrt 2014
    Berichten:
    1.391
    Leuk Bevonden:
    749
  6. danielRV

    danielRV

    Lid sinds:
    25. feb 2017
    Berichten:
    348
    Leuk Bevonden:
    142
  7. xxSwordsmanxx

    xxSwordsmanxx

    Lid sinds:
    29. jun 2012
    Berichten:
    393
    Leuk Bevonden:
    35
    26/05/17: Was dit de 'MA'?
     
  8. malista?

    malista?

    Lid sinds:
    5. jan 2017
    Berichten:
    33
    Leuk Bevonden:
    10
    [report]25678f3c8ba666705e6ea20fd9d4c197[/report]
     
  9. Hertog Takkie

    Hertog Takkie

    Lid sinds:
    6. apr 2017
    Berichten:
    48
    Leuk Bevonden:
    4
    megashare film
     
  10. danielRV

    danielRV

    Lid sinds:
    25. feb 2017
    Berichten:
    348
    Leuk Bevonden:
    142
  11. AriChibi

    AriChibi

    Lid sinds:
    7. mrt 2014
    Berichten:
    1.391
    Leuk Bevonden:
    749
    package nl.ru.ai.vroon.mdp;

    import java.util.Random;

    /**
    * @author TES Installatie
    *
    * The Algorithm class which performs both of the algorithms of this
    * exercise.
    */

    public class Algorithms {

    private final double gamma = 0.1;
    private double eta = 0.8;
    private final double alpha = 0.9;
    private final double side;
    private final double fwd;
    private final double bck;
    private MarkovDecisionProblem mdp;
    private VField vf;
    private VField qf;

    /**
    * The algorithm class will keep track of two value fields for the value
    * iteration and Q-learning algorithms respectively.
    *
    * @param mdp
    */
    public Algorithms(MarkovDecisionProblem mdp) {
    this.mdp = mdp;
    this.vf = new VField(mdp.getWidth(), mdp.getHeight());
    this.qf = new VField(mdp.getWidth(), mdp.getHeight());
    vf.initialize();
    qf.initialize();

    // The transition probabilities.
    side = mdp.getErrorProbability();
    fwd = mdp.getProbability();
    bck = mdp.getBackProbability();
    }

    /**
    * The Value Iterator algorithm. This algorithm will fill up its respective
    * (V)Field with its found values and policies.
    */
    public void ValueIterator() {
    double[] values = new double[4]; // 0=N,1=E,2=S,3=W
    Action[] actions = { Action.UP, Action.RIGHT, Action.DOWN, Action.LEFT };
    int x = mdp.getStateXPosition();
    int y = mdp.getStateYPostion();
    double rN = 0.0, rS = 0.0, rW = 0.0, rE = 0.0;
    double vNorth = 0.0, vEast = 0.0, vSouth = 0.0, vWest = 0.0;

    // V(NORTH) and reward of the northern state
    if (!mdp.getField(x, y + 1).equals(Field.OUTOFBOUNDS) && !mdp.getField(x, y + 1).equals(Field.OBSTACLE)) {
    vNorth = qf.getValue(x, y + 1);
    rN = mdp.getNextReward(x, y + 1);
    } else
    values[0] = Double.NEGATIVE_INFINITY;

    // V(SOUTH) and reward of the southern state
    if (!mdp.getField(x, y - 1).equals(Field.OUTOFBOUNDS) && !mdp.getField(x, y - 1).equals(Field.OBSTACLE)) {
    vSouth = qf.getValue(x, y - 1);
    rS = mdp.getNextReward(x, y - 1);
    } else
    values[2] = Double.NEGATIVE_INFINITY;

    // V(EAST) and reward of the eastern state
    if (!mdp.getField(x + 1, y).equals(Field.OUTOFBOUNDS) && !mdp.getField(x + 1, y).equals(Field.OBSTACLE)) {
    vEast = qf.getValue(x + 1, y);
    rE = mdp.getNextReward(x + 1, y);
    } else
    values[1] = Double.NEGATIVE_INFINITY;

    // V(WEST) and reward of the western state
    if (!mdp.getField(x - 1, y).equals(Field.OUTOFBOUNDS) && !mdp.getField(x - 1, y).equals(Field.OBSTACLE)) {
    vWest = qf.getValue(x - 1, y);
    rW = mdp.getNextReward(x - 1, y);
    } else
    values[3] = Double.NEGATIVE_INFINITY;

    // Calculating the values for the different actions
    double[] temp = new double[4];
    temp[0] = (fwd * (rN + gamma * vNorth) + side * (rE + gamma * vEast) + bck * (rS + gamma * vSouth)
    + side * (rW + gamma * vWest));
    temp[2] = (fwd * (rS + gamma * vSouth) + side * (rE + gamma * vEast) + bck * (rN + gamma * vNorth)
    + side * (rW + gamma * vWest));
    temp[1] = (fwd * (rE + gamma * vEast) + side * (rN + gamma * vNorth) + bck * (rW + gamma * vWest)
    + side * (rS + gamma * vSouth));
    temp[3] = (fwd * (rW + gamma * vWest) + side * (rN + gamma * vNorth) + bck * (rE + gamma * vEast)
    + side * (rS + gamma * vSouth));

    // Filtering out the impossible actions i.e. if the northern state is an
    // obstacle its value is filtered out of the arrays for the maximizing
    // part
    for (int v = 0; v < 4; v++) {
    if (values[v] != Double.NEGATIVE_INFINITY) {
    values[v] = temp[v];
    }
    }

    Action action = Action.UP;

    // Selecting the highest value and the corresponding action
    double maxi = Double.NEGATIVE_INFINITY;
    for (int i = 0; i < values.length; i++) {
    if (values > maxi) {
    maxi = values;
    action = actions;
    }
    }
    //System.out.println("Max: " + maxi + " Action:" + action.toString());

    // Setting the newly found V value and policy
    vf.setValue(x, y, maxi);
    vf.setAction(x, y, action);
    }

    /**
    * The Q-Learning algorithm which fills up its respective (Q)Field with its
    * found values and policies.
    */
    public void qLearning() {
    double[] values = new double[4];
    int x = mdp.getStateXPosition();
    int y = mdp.getStateYPostion();
    double rN = 0.0, rS = 0.0, rW = 0.0, rE = 0.0;
    double qNorth = 0.0, qEast = 0.0, qSouth = 0.0, qWest = 0.0;

    // Q(NORTH,UP) and reward of the northern state
    if (!mdp.getField(x, y + 1).equals(Field.OUTOFBOUNDS) && !mdp.getField(x, y + 1).equals(Field.OBSTACLE)) {
    qNorth = qf.getValue(x, y + 1);
    rN = mdp.getNextReward(x, y + 1);
    } else
    values[0] = Double.NEGATIVE_INFINITY;

    // Q(SOUTH,DOWN) and reward of the southern state
    if (!mdp.getField(x, y - 1).equals(Field.OUTOFBOUNDS) && !mdp.getField(x, y - 1).equals(Field.OBSTACLE)) {
    qSouth = qf.getValue(x, y - 1);
    rS = mdp.getNextReward(x, y - 1);
    } else
    values[2] = Double.NEGATIVE_INFINITY;

    // Q(EAST,RIGHT) and reward of the eastern state
    if (!mdp.getField(x + 1, y).equals(Field.OUTOFBOUNDS) && !mdp.getField(x + 1, y).equals(Field.OBSTACLE)) {
    qEast = qf.getValue(x + 1, y);
    rE = mdp.getNextReward(x + 1, y);
    } else
    values[1] = Double.NEGATIVE_INFINITY;

    // Q(WEST,LEFT) and reward of the western state
    if (!mdp.getField(x - 1, y).equals(Field.OUTOFBOUNDS) && !mdp.getField(x - 1, y).equals(Field.OBSTACLE)) {
    qWest = qf.getValue(x - 1, y);
    rW = mdp.getNextReward(x - 1, y);
    } else
    values[3] = Double.NEGATIVE_INFINITY;


    Action[] actions = { Action.UP, Action.RIGHT, Action.DOWN, Action.LEFT };
    // 0=N,1=E,2=S,3=W

    // Here the Q[S',A'] values are calculated
    double[] temp = new double[4];
    temp[0] = qCalc(mdp, Action.UP, vf, qf, qNorth, qSouth, qWest, qEast, rN);
    temp[2] = qCalc(mdp, Action.DOWN, vf, qf, qNorth, qSouth, qWest, qEast, rS);
    temp[1] = qCalc(mdp, Action.RIGHT, vf, qf, qNorth, qSouth, qWest, qEast, rE);
    temp[3] = qCalc(mdp, Action.LEFT, vf, qf, qNorth, qSouth, qWest, qEast, rW);

    Action action = Action.UP;
    Random rand = new Random();
    double prob = rand.nextDouble();
    double maxi = Double.NEGATIVE_INFINITY;

    // The impossible states are filtered out i.e. obstacles
    for (int v = 0; v < 4; v++) {
    if (values[v] != Double.NEGATIVE_INFINITY) {
    values[v] = temp[v];
    //System.out.println(v + " " + values[v]);
    }
    }

    // E-Greedy
    // Decide randomly whether to choose the optimal policy or to perform a
    // random action
    if (prob < eta) {
    // Choose a random action
    System.out.println(prob + "<" + eta);
    maxi = temp[rand.nextInt(3)];
    } else {
    // Select the highest Q[S',A']
    for (int i = 0; i < values.length; i++) {
    if (values > maxi) {
    maxi = values;
    action = actions;
    }
    }
    }
    double r = mdp.getReward();
    //System.out.println("Max: " + maxi + " Action:" + action.toString());

    // Q[S,A] <- Q[S,A] + alpha(r + gamma * max(Q[S',A']) - Q[S,A])
    double q = qf.getValue(x, y) + alpha * (r + gamma * maxi - qf.getValue(x, y));

    // Set the new values, set the new policy and lessen E as to lessen the
    // randomness.
    qf.setValue(x, y, q);
    qf.setAction(x, y, action);
    eta -= 0.05;

    }

    /**
    * A separate function to calculate the Q[S',A'] value such that the
    * original function is not cluttered by all of these repeated lines.
    *
    * @param mdp
    * @param a
    * @param vf
    * @param qf
    * @param qNorth
    * @param qSouth
    * @param qWest
    * @param qEast
    * @return the corresponding Q[S',A'] values
    */
    private double qCalc(MarkovDecisionProblem mdp, Action a, VField vf, VField qf, double qNorth, double qSouth,
    double qWest, double qEast, double r) {
    if (a.equals(Action.UP))
    return r + gamma * (fwd * qNorth + side * qEast + bck * qSouth + side * qWest);
    if (a.equals(Action.DOWN))
    return r + gamma * (fwd * qSouth + side * qEast + bck * qNorth + side * qWest);
    if (a.equals(Action.RIGHT))
    return r + gamma * (fwd * qEast + side * qSouth + side * qNorth + bck * qWest);
    if (a.equals(Action.LEFT))
    return r + gamma * (fwd * qWest + bck * qEast + side * qNorth + side * qSouth);

    return 0.0;
    }

    /**
    * @return the current Value Iteration Field
    */
    public VField getVfield() {
    return vf;
    }

    /**
    * @return the current Q-Learning Field
    */
    public VField getQfield() {
    return qf;
    }

    /**
    * @param xPos
    * @param yPos
    * @return the value at the given location in the VField
    */
    public double getValue(int xPos, int yPos) {
    return vf.getValue(xPos, yPos);
    }

    }
     
  12. Martecus

    Martecus

    Lid sinds:
    13. jul 2015
    Berichten:
    116
    Leuk Bevonden:
    0
    denk dat ik jullie de gayporn wel zal besparen.
     
  13. danielRV

    danielRV

    Lid sinds:
    25. feb 2017
    Berichten:
    348
    Leuk Bevonden:
    142
  14. AriChibi

    AriChibi

    Lid sinds:
    7. mrt 2014
    Berichten:
    1.391
    Leuk Bevonden:
    749
  15. Agesilaüs II van Sparta

    Agesilaüs II van Sparta

    Lid sinds:
    maandag
    Berichten:
    61
    Leuk Bevonden:
    16
    tenzij je schaars geklede of zelf niet geklede dames wilt zijn, denk ik niet dat ik zal deelnemen aan dit spel.