QuestionQuestion

Please see Question.pdf

Solution PreviewSolution Preview

This material may consist of step-by-step explanations on how to solve a problem or examples of proper writing, including the use of citations, references, bibliographies, and formatting. This material is made available for the sole purpose of studying and learning - misuse is strictly forbidden.

import numpy as np
import random
import os
import time
import pyprind
import matplotlib.pyplot as plt


class Racetrack:
   
    """
    A class that implements a reinforcement learning and applying it to the racetrack problem.
    The class implements diiferent algorithms that solves the problem:
    1) Value Iteration
    2) Q-learning
    3) SARSA
    4) TD- with function approximation
    """
   
    def __init__(self, v_min=-5, v_max=5, gamma=0.9, action_probab=0.8, acceleration = (-1,0,1), learning_rate=0.2):
      
       self.actions = [(i,j) for j in acceleration for i in acceleration] # a list of all possible actions
       self.gamma = gamma # discount rate
       self.v_min = v_min # maximum velocity
       self.v_max = v_max # minimum velocity
       self.velocities = np.arange(v_min, v_max+1, 1) # list of all possible velocities
       self.action_probab = action_probab # the probability of accelerating
       self.learning_rate = learning_rate # learning rate (for Q-learning and SARSA)
       self.threshold = 0.02 # if the change of Q-value is below the threshold, we can assume that it is stabilized
      
       self.number_of_iterations = 50
      
       # keep track if the car gets stuck
       self.y_stuck = 0
       self.x_stuck = 0
       self.stuck_counter = 0
      
       self.print_racetrack_ = False
       self.number_of_steps = []
      
      
    def load_track(self):
       """
       method that reads the reactrack
       racetrack is stored as 2D numpy array
       """
       self.track = []
       # open the file
       with open(self.track_path) as file:
            track_lines = file.readlines()[1:] # skip the first line
            # iterate over all lines
            for line in track_lines:
                line = line.strip('\n')
                self.track.append(list(line))
       self.track = np.asarray(self.track)
   
    def start_position(self):
       """
       method that randomly selects starting position
       """
       start_positions = list(zip(*np.where(self.track == 'S')))
       self.y, self.x = random.choice(start_positions)
   
    def final_positions(self):
       """
       method that creates a list of final positions
       """
       positions = list(zip(*np.where(self.track == 'F')))
       self.final = np.asarray(positions)
   
    def update_velocities(self, action):
       """
       method that updates velocities
       """
       v_y_temp = self.v_y + action[0]
       v_x_temp = self.v_x + action[1]
      
       # velocity of the car is limited
       # update the velocity only if it is within limit
       if abs(v_x_temp) <= self.v_max:
            self.v_x = v_x_temp
       if abs(v_y_temp) <= self.v_max:
            self.v_y = v_y_temp
            
    def within_track(self):
       """
       function that checks if the current coordinates of the car are within the environment
       """
       if ((self.y >= self.track.shape[0] or self.x >= self.track.shape[1]) or
            (self.y<0 or self.x<0)):
            return False
       return True
      
      
    def update_state(self, action, probability):
       """
       method that updates the state state of the environment, i.e updates position and velocity of the car
       """
      
       # the probability of accelerating is 0.8
       if np.random.uniform() < probability:
            self.update_velocities(action) # update velocity
      
      
       y_temp, x_temp = self.y, self.x
      
       # update position
       self.x += self.v_x
       self.y += self.v_y
      
       """"
       prevent the car to go through the wall, so that if "#" character (wall) is between
       the current and the next position of the car, do not update position of the car
       """
       if self.within_track() and self.track[self.y, self.x] != "#":
            if self.v_y == 0:
                if "#" in self.track[y_temp, min(self.x, x_temp):max(self.x, x_temp)].ravel():
                   self.x = x_temp
                   self.v_y, self.v_x = 0, 0
                  
            elif self.v_x == 0:
                if "#" in self.track[min(self.y, y_temp):max(self.y, y_temp), self.x].ravel():
                   self.y = y_temp
                   self.v_y, self.v_x = 0, 0
                  
            elif self.v_x == self.v_y:
                if "#" in self.track[min(self.y, y_temp):max(self.y, y_temp), min(self.x, x_temp):max(self.x, x_temp)]:
                   self.x, self.y = x_temp, y_temp
                   self.v_y, self.v_x = 0, 0
            else:
                if "#" in self.track[min(self.y, y_temp):max(self.y, y_temp), min(self.x, x_temp):max(self.x, x_temp)].ravel():
                   self.x, self.y = x_temp, y_temp
                   self.v_y, self.v_x = 0, 0
               
       # if the car crashes into the wall, call method return_to_track
       if not self.within_track() or self.track[self.y, self.x] == "#":
            self.return_to_track()

      
    def return_to_track(self):
       """
       method that returns the car to the racetrack when it crashes into the wall
       there are two scenarios:
       1) return the car to the starting position
       2) return the car to the nearest open cell (where it crashed)
      
       """
      
       open_cells = ".FS"
      
       # return track to the nearest open cell
       if self.start_from == "nearest_position":
            # go back to the position before crash
            self.x += -self.v_x
            self.y += -self.v_y
            
            
            L = []
            for k in range(abs(self.v_x)):
                L.append(1)
            for k in range(abs(self.v_y)):
                L.insert(2*k+1, 0)
            
            for i in L:
                if i:
                   self.x += np.sign(self.v_x)
                   if self.within_track...
$48.00 for this solution

PayPal, G Pay, ApplePay, Amazon Pay, and all major credit cards accepted.

Find A Tutor

View available Computer Science - Other Tutors

Get College Homework Help.

Are you sure you don't want to upload any files?

Fast tutor response requires as much info as possible.

Decision:
Upload a file
Continue without uploading

SUBMIT YOUR HOMEWORK
We couldn't find that subject.
Please select the best match from the list below.

We'll send you an email right away. If it's not in your inbox, check your spam folder.

  • 1
  • 2
  • 3
Live Chats