import numpy as np
import random
import os
import time
import pyprind
import matplotlib.pyplot as plt

class Racetrack:

"""
A class that implements a reinforcement learning and applying it to the racetrack problem.
The class implements diiferent algorithms that solves the problem:
1) Value Iteration
2) Q-learning
3) SARSA
4) TD- with function approximation
"""

def __init__(self, v_min=-5, v_max=5, gamma=0.9, action_probab=0.8, acceleration = (-1,0,1), learning_rate=0.2):

   self.actions = [(i,j) for j in acceleration for i in acceleration] # a list of all possible actions
   self.gamma = gamma # discount rate
   self.v_min = v_min # maximum velocity
   self.v_max = v_max # minimum velocity
   self.velocities = np.arange(v_min, v_max+1, 1) # list of all possible velocities
   self.action_probab = action_probab # the probability of accelerating
   self.learning_rate = learning_rate # learning rate (for Q-learning and SARSA)
   self.threshold = 0.02 # if the change of Q-value is below the threshold, we can assume that it is stabilized

   self.number_of_iterations = 50

   # keep track if the car gets stuck
   self.y_stuck = 0
   self.x_stuck = 0
   self.stuck_counter = 0

   self.print_racetrack_ = False
   self.number_of_steps = []


def load_track(self):
   """
   method that reads the reactrack
   racetrack is stored as 2D numpy array
   """
   self.track = []
   # open the file
   with open(self.track_path) as file:
         track_lines = file.readlines()[1:] # skip the first line
         # iterate over all lines
         for line in track_lines:
            line = line.strip('\n')
            self.track.append(list(line))
   self.track = np.asarray(self.track)

def start_position(self):
   """
   method that randomly selects starting position
   """
   start_positions = list(zip(*np.where(self.track == 'S')))
   self.y, self.x = random.choice(start_positions)

def final_positions(self):
   """
   method that creates a list of final positions
   """
   positions = list(zip(*np.where(self.track == 'F')))
   self.final = np.asarray(positions)

def update_velocities(self, action):
   """
   method that updates velocities
   """
   v_y_temp = self.v_y + action[0]
   v_x_temp = self.v_x + action[1]

   # velocity of the car is limited
   # update the velocity only if it is within limit
   if abs(v_x_temp) <= self.v_max:
         self.v_x = v_x_temp
   if abs(v_y_temp) <= self.v_max:
         self.v_y = v_y_temp

def within_track(self):
   """
   function that checks if the current coordinates of the car are within the environment
   """
   if ((self.y >= self.track.shape[0] or self.x >= self.track.shape[1]) or
         (self.y<0 or self.x<0)):
         return False
   return True


def update_state(self, action, probability):
   """
   method that updates the state state of the environment, i.e updates position and velocity of the car
   """

   # the probability of accelerating is 0.8
   if np.random.uniform() < probability:
         self.update_velocities(action) # update velocity


   y_temp, x_temp = self.y, self.x

   # update position
   self.x += self.v_x
   self.y += self.v_y

   """"
   prevent the car to go through the wall, so that if "#" character (wall) is between
   the current and the next position of the car, do not update position of the car
   """
   if self.within_track() and self.track[self.y, self.x] != "#":
         if self.v_y == 0:
            if "#" in self.track[y_temp, min(self.x, x_temp):max(self.x, x_temp)].ravel():
               self.x = x_temp
               self.v_y, self.v_x = 0, 0

         elif self.v_x == 0:
            if "#" in self.track[min(self.y, y_temp):max(self.y, y_temp), self.x].ravel():
               self.y = y_temp
               self.v_y, self.v_x = 0, 0

         elif self.v_x == self.v_y:
            if "#" in self.track[min(self.y, y_temp):max(self.y, y_temp), min(self.x, x_temp):max(self.x, x_temp)]:
               self.x, self.y = x_temp, y_temp
               self.v_y, self.v_x = 0, 0
         else:
            if "#" in self.track[min(self.y, y_temp):max(self.y, y_temp), min(self.x, x_temp):max(self.x, x_temp)].ravel():
               self.x, self.y = x_temp, y_temp
               self.v_y, self.v_x = 0, 0

   # if the car crashes into the wall, call method return_to_track
   if not self.within_track() or self.track[self.y, self.x] == "#":
         self.return_to_track()


def return_to_track(self):
   """
   method that returns the car to the racetrack when it crashes into the wall
   there are two scenarios:
   1) return the car to the starting position
   2) return the car to the nearest open cell (where it crashed)

   """

   open_cells = ".FS"

   # return track to the nearest open cell
   if self.start_from == "nearest_position":
         # go back to the position before crash
         self.x += -self.v_x
         self.y += -self.v_y


         L = []
         for k in range(abs(self.v_x)):
            L.append(1)
         for k in range(abs(self.v_y)):
            L.insert(2*k+1, 0)

         for i in L:
            if i:
               self.x += np.sign(self.v_x)
               if self.within_track():
                     if self.track[self.y, self.x] == "#":
                        self.x += -np.sign(self.v_x)
                        break
            else:
               self.y += np.sign(self.v_y)
               if self.within_track():
                     if self.track[self.y, self.x] == "#":
                        self.y += -np.sign(self.v_y)
                        break

   elif self.start_from == "starting_position":
         self.start_position()

   # set car velocity to zero
   self.v_y, self.v_x = 0, 0

def is_stuck(self):
   """
   check if the car have gotten stuck
   if the car has not been moving for 4 steps, return True, else return False
   """
   if (self.y_stuck == self.y and self.x_stuck == self.x):
         self.stuck_counter += 1
         self.y_stuck = self.y
         self.x_stuck = self.x
         if self.stuck_counter >= 4:
            return True
   else:
         self.stuck_counter = 0
         self.y_stuck = self.y
         self.x_stuck = self.x

   return False


def value_iteration_train(self):
   """
   method that implements Value iteration algorithm
   """
   num_of_iterations = 50
   print("Algorithm: Value Iteration")
   print("Number of iterations:", self.episodes)
   print("\nProgress:\n")

   # initialize a progress bar object allows visuzalization of an computation
   bar = pyprind.ProgBar(self.episodes)

   for iteration in range(self.episodes):
         # iterate over all possible states
         for y in range(self.track.shape[0]):
            for x in range(self.track.shape[1]):
               for v_y in self.velocities:
                     for v_x in self.velocities:
                        if self.track[y, x] == '#':
                           self.V[y, x, v_y, v_x] = -10
                           continue

                        self.y, self.x, self.v_y, self.v_x = y, x, v_y, v_x

                        for a_index, a in enumerate(self.actions):
                           if self.track[y, x] == "F":
                                 self.reward = 0
                           else:
                                 self.reward = -1

                           self.y, self.x, self.v_y, self.v_x = y, x, v_y, v_x
                           # update state
                           self.update_state(a, 1)
                           new_state = self.V[self.y, self.x, self.v_y, self.v_x]

                           self.y, self.x, self.v_y, self.v_x = y, x, v_y, v_x
                           self.update_state((0, 0), 1)
                           new_state_failed = self.V[self.y, self.x, self.v_y, self.v_x]

                           expected_value = self.action_probab*new_state +\
                           (1-self.action_probab)*new_state_failed
                           self.Q[y, x, v_y, v_x, a_index] = self.reward + self.gamma*expected_value

                        self.V[y, x, v_y, v_x] = np.max(self.Q[y, x, v_y, v_x])

         self.Q[self.final[:, 0], self.final[:, 1], :, :, :] = 0
         self.V[self.final[:, 0], self.final[:, 1], :, :] = 0

Statistical Programming Programming Assignment 5 â€“ Data Prepar...