Primeira vez aqui? Seja bem vindo e cheque o FAQ!
x

Como implementar o problema da locadora usando Programação Dinâmica?

0 votos
66 visitas
perguntada Jun 17, 2016 em Ciência da Computação por danielcajueiro (5,376 pontos)  

Aninha administra duas locadoras de automóveis para o aluguel
de carros. Todo dia, um número de clientes chega em cada locadora
para alugar carros. Se Aninha tem um carro disponível, ela aluga
este carro na locação e recebe 10 reais da matriz. Se ela não tem
carro disponível, então o negócio é perdido. Carros ficam
disponíveis para serem alugados no dia seguinte em que eles são
retornados. Para ajudar na sua disponibilidade de carros, Aninha
pode mover carros de uma locação a outra no meio da noite ao custo
de 1 real por carro movido. Assume-se que o número de carros
procurados e retornados em cada locadora são variáveis aleatórias
do tipo poisson, significando que a probabilidade é \(\frac{\lambda^n}{n!}\exp{(-\lambda)}\), onde
\(\lambda\) é o valor esperado e \(n\) é o número de carros. Suponha que \(\lambda\) é,respectivamente,
3
para o aluguel de carros nas primeira e segunda locadoras e 3
para o retorno dos carros nas primeira e segunda locadoras. Para
simplificar o problema assuma que não pode haver mais que 5
carros em nenhuma das locadoras (Como isso simplifica o problema?)
-- qualquer carro adicional será retornado a matriz e no máximo 3
carros podem ser transferidos por noite de uma locadora para a
outra (Como isso simplifica o problema?).

Compartilhe

1 Resposta

0 votos
respondida Jun 17, 2016 por danielcajueiro (5,376 pontos)  

Abaixo eu apresento duas soluções do problema. Uma utilizando iterações da função valor e a outra usando iterações da função política.

Função Valor:

#For Plot
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import matplotlib.pyplot as plt
import numpy as np

#For RL
import math
import numpy as np
from scipy.stats import poisson

def poisson_prob(n,lamb): # It evaluates the poisson probability
    #return poisson.pmf(n,lamb)
    return (math.pow(lamb,1.0*n)/(1.0*math.factorial(n)))*math.exp(-lamb)



def eval_probability_and_reward(maxNumberCars1,maxNumberCars2,profitPerCarRental,maxCarMoves,
    costPerCarMove,lambdaRequest1,lambdaRequest2,lambdaReturn1,lambdaReturn2):
    P=np.zeros([maxNumberCars1+1,maxNumberCars2+1,maxNumberCars1+1,maxNumberCars2+1,2*maxCarMoves+1])
    R=np.zeros([maxNumberCars1+1,maxNumberCars2+1,maxNumberCars1+1,maxNumberCars2+1,2*maxCarMoves+1])
    print "maxNumberCars1",maxNumberCars1
    print "maxNumberCars2",maxNumberCars2
    for state1 in range(maxNumberCars1+1):
        for state2 in range(maxNumberCars2+1):
            print "state1",state1
            print "state2",state2            
            for action in range(-maxCarMoves,maxCarMoves+1):
           #     print "action",action


                theAction=True 

                if(action>=0):
                    if(state1-action<0):
                        theAction=False
                    if(state2+action>maxNumberCars2):
                        theAction=False
                else:
                    if(state2+action<0):
                        theAction=False
                    if(state1-action>maxNumberCars1):
                        theAction=False
                # At night the cars are moved
                # These primeStates are the cars available in the morning

                if(not theAction):
                    for primeState1 in range(maxNumberCars1+1):
                        for primeState2 in range(maxNumberCars2+1):
                            P[state1,state2,primeState1,primeState2,action+maxCarMoves]=-1
                else:
                    # Returned cars are only available in the next day
                    for requests1 in range(maxNumberCars1+1):
                        for requests2 in range(maxNumberCars2+1):
                            for returns1 in range(maxNumberCars1+1):
                                for returns2 in range(maxNumberCars2+1):

                                    primeState1=state1-action
                                    primeState2=state2+action
                                    attendedRequests1=min(requests1,primeState1)
                                    attendedRequests2=min(requests2,primeState2)
                                    primeState1=min(primeState1-attendedRequests1+returns1,maxNumberCars1)
                                    primeState2=min(primeState2-attendedRequests2+returns2,maxNumberCars2)
                                    p=poisson_prob(requests1,lambdaRequest1)*\
                                    poisson_prob(requests2,lambdaRequest2)*\
                                    poisson_prob(returns1,lambdaReturn1)*\
                                    poisson_prob(returns2,lambdaReturn2)
                                    #print "p",p
                                    r=profitPerCarRental*(attendedRequests1+attendedRequests2)-\
                                    costPerCarMove*np.abs(action)

                                    P[state1,state2,primeState1,primeState2,action+maxCarMoves]=\
                                    P[state1,state2,primeState1,primeState2,action+maxCarMoves]+p

                                    R[state1,state2,primeState1,primeState2,action+maxCarMoves]=\
                                    R[state1,state2,primeState1,primeState2,action+maxCarMoves]+p*r
    return P,R



def value_iteration(maxNumberCars1,maxNumberCars2,profitPerCarRental,maxCarMoves,
    costPerCarMove,lambdaRequest1,lambdaRequest2,lambdaReturn1,lambdaReturn2,gamma):
    v=np.zeros([maxNumberCars1+1,maxNumberCars2+1])
    u=np.zeros([maxNumberCars1+1,maxNumberCars2+1])
    [P,R]=eval_probability_and_reward(maxNumberCars1,maxNumberCars2,profitPerCarRental,maxCarMoves,
    costPerCarMove,lambdaRequest1,lambdaRequest2,lambdaReturn1,lambdaReturn2)
    theta=0.000000001
    delta=theta+0.001
    while(delta>theta):
        # These first 2 for loops are related to the state
        delta=0
        oldV=v.copy()
        for state1 in range(maxNumberCars1+1):
            for state2 in range(maxNumberCars2+1):
                value=-np.inf
                for action in range(-maxCarMoves,maxCarMoves+1):
                    tentativeValue=0                    
                    for primeState1 in range(maxNumberCars1+1):
                        for primeState2 in range(maxNumberCars2+1):    
                            if(P[state1,state2,primeState1,primeState2,action+maxCarMoves]!=-1):
                                tentativeValue=tentativeValue+\
                                R[state1,state2,primeState1,primeState2,action+maxCarMoves]+\
                                (P[state1,state2,primeState1,primeState2,action+maxCarMoves]*\
                                gamma*v[primeState1,primeState2])
                    if(tentativeValue>value):
                        value=tentativeValue
                        optimalPolicy=action
                        #print optimalPolicy
                v[state1,state2]=value
                u[state1,state2]=optimalPolicy
                #print u
                delta=max(delta,np.abs(value-oldV[state1,state2]))
                #print "delta",delta
    return v,u,P,R

def plot_bidimensional_function(v):
    [m,n]=np.shape(v)
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    X = range(m)
    Y = range(n)
    X, Y = np.meshgrid(X, Y)
#R = np.sqrt(X**2 + Y**2)
#Z = np.sin(R)
    surf = ax.plot_surface(X, Y, v, rstride=1, cstride=1, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)

    #ax.set_zlim(-1.01, 1.01)

    ax.zaxis.set_major_locator(LinearLocator(10))
    #ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))


    fig.colorbar(surf, shrink=0.5, aspect=5)

plt.show()


if __name__ == '__main__':

    # max number of cars
    maxNumberCars1=5
    maxNumberCars2=5

    # Costs and profits
    profitPerCarRental=10.0
    costPerCarMove=2
    # Parameters of the Poisson Distribution

    lambdaRequest1=3.0
    lambdaRequest2=4.0
    lambdaReturn1=3.0
    lambdaReturn2=2.0


    # Max number of car that can be transported at night
    maxCarMoves=3

    # discountFactor
    gamma=0.9


    [v,u,P,R]=value_iteration(maxNumberCars1,maxNumberCars2,profitPerCarRental,maxCarMoves,
    costPerCarMove,lambdaRequest1,lambdaRequest2,lambdaReturn1,lambdaReturn2,gamma)
    print v
    print u

    plot_bidimensional_function(v)
    plot_bidimensional_function(u)    

Função Política:

# For Plot
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import matplotlib.pyplot as plt
import numpy as np


#For RL
import math
import numpy as np
from scipy.stats import poisson

def poisson_prob(n,lamb): # It evaluates the poisson probability
    #return poisson.pmf(n,lamb)
    return (math.pow(lamb,1.0*n)/(1.0*math.factorial(n)))*math.exp(-lamb)



def eval_probability_and_reward(maxNumberCars1,maxNumberCars2,profitPerCarRental,maxCarMoves,
    costPerCarMove,lambdaRequest1,lambdaRequest2,lambdaReturn1,lambdaReturn2):
    P=np.zeros([maxNumberCars1+1,maxNumberCars2+1,maxNumberCars1+1,maxNumberCars2+1,2*maxCarMoves+1])
    R=np.zeros([maxNumberCars1+1,maxNumberCars2+1,maxNumberCars1+1,maxNumberCars2+1,2*maxCarMoves+1])
    print "maxNumberCars1",maxNumberCars1
    print "maxNumberCars2",maxNumberCars2
    for state1 in range(maxNumberCars1+1):
        for state2 in range(maxNumberCars2+1):
            #print "state1",state1
            #print "state2",state2            
            for action in range(-maxCarMoves,maxCarMoves+1):
           #     print "action",action


                theAction=True 

                if(action>=0):
                    if(state1-action<0):
                        theAction=False
                    if(state2+action>maxNumberCars2):
                        theAction=False
                else:
                    if(state2+action<0):
                        theAction=False
                    if(state1-action>maxNumberCars1):
                        theAction=False
                # At night the cars are moved
                # These primeStates are the cars available in the morning

                if(not theAction):
                    for primeState1 in range(maxNumberCars1+1):
                        for primeState2 in range(maxNumberCars2+1):
                            P[state1,state2,primeState1,primeState2,action+maxCarMoves]=-1
                else:
                    # Returned cars are only available in the next day
                    for requests1 in range(maxNumberCars1+1):
                        for requests2 in range(maxNumberCars2+1):
                            for returns1 in range(maxNumberCars1+1):
                                for returns2 in range(maxNumberCars2+1):

                                    primeState1=state1-action
                                    primeState2=state2+action
                                    attendedRequests1=min(requests1,primeState1)
                                    attendedRequests2=min(requests2,primeState2)
                                    primeState1=min(primeState1-attendedRequests1+returns1,maxNumberCars1)
                                    primeState2=min(primeState2-attendedRequests2+returns2,maxNumberCars2)
                                    p=poisson_prob(requests1,lambdaRequest1)*\
                                    poisson_prob(requests2,lambdaRequest2)*\
                                    poisson_prob(returns1,lambdaReturn1)*\
                                    poisson_prob(returns2,lambdaReturn2)
                                    #print "p",p
                                    r=profitPerCarRental*(attendedRequests1+attendedRequests2)-\
                                    costPerCarMove*np.abs(action)

                                    P[state1,state2,primeState1,primeState2,action+maxCarMoves]=\
                                    P[state1,state2,primeState1,primeState2,action+maxCarMoves]+p

                                    R[state1,state2,primeState1,primeState2,action+maxCarMoves]=\
                                    R[state1,state2,primeState1,primeState2,action+maxCarMoves]+p*r
    return P,R



def policy_iteration(maxNumberCars1,maxNumberCars2,profitPerCarRental,maxCarMoves,
    costPerCarMove,lambdaRequest1,lambdaRequest2,lambdaReturn1,lambdaReturn2,gamma):
    v=np.zeros([maxNumberCars1+1,maxNumberCars2+1])
    u=np.zeros([maxNumberCars1+1,maxNumberCars2+1])
    [P,R]=eval_probability_and_reward(maxNumberCars1,maxNumberCars2,profitPerCarRental,maxCarMoves,
    costPerCarMove,lambdaRequest1,lambdaRequest2,lambdaReturn1,lambdaReturn2)
    theta=0.000000001
    policyStable=False

    while(not policyStable):
        # PolicyEvaluation
        delta=theta+0.001
        while(delta>theta):
            delta=0
            oldV=v.copy()
            for state1 in range(maxNumberCars1+1):
                for state2 in range(maxNumberCars2+1):
                    value=-np.inf
                    tentativeValue=0                    
                    for primeState1 in range(maxNumberCars1+1):
                        for primeState2 in range(maxNumberCars2+1):    
                            action=u[state1,state2]
                            if(P[state1,state2,primeState1,primeState2,action+maxCarMoves]!=-1):
                                tentativeValue=tentativeValue+\
                                R[state1,state2,primeState1,primeState2,action+maxCarMoves]+\
                                (P[state1,state2,primeState1,primeState2,action+maxCarMoves]*\
                                gamma*v[primeState1,primeState2])
                    if(tentativeValue>value):
                        value=tentativeValue
                    v[state1,state2]=value
                    delta=max(delta,np.abs(value-oldV[state1,state2]))

        # Policy Improvement       
        policyStable=True
        for state1 in range(maxNumberCars1+1):
            for state2 in range(maxNumberCars2+1):
                value=-np.inf
                policy=u[state1,state2]
                for action in range(-maxCarMoves,maxCarMoves+1):
                    tentativeValue=0                    
                    for primeState1 in range(maxNumberCars1+1):
                        for primeState2 in range(maxNumberCars2+1):    
                            if(P[state1,state2,primeState1,primeState2,action+maxCarMoves]!=-1):
                                tentativeValue=tentativeValue+\
                                R[state1,state2,primeState1,primeState2,action+maxCarMoves]+\
                                (P[state1,state2,primeState1,primeState2,action+maxCarMoves]*\
                                gamma*v[primeState1,primeState2])
                    if(tentativeValue>value):
                        value=tentativeValue
                        optimalPolicy=action

                if(policy!=optimalPolicy):        
                    u[state1,state2]=optimalPolicy
                    policyStable=False


    return v,u,P,R        

def plot_bidimensional_function(v):
    [m,n]=np.shape(v)
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    X = range(m)
    Y = range(n)
    X, Y = np.meshgrid(X, Y)
#R = np.sqrt(X**2 + Y**2)
#Z = np.sin(R)
    surf = ax.plot_surface(X, Y, v, rstride=1, cstride=1, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)

    #ax.set_zlim(-1.01, 1.01)

    ax.zaxis.set_major_locator(LinearLocator(10))
    #ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))


    fig.colorbar(surf, shrink=0.5, aspect=5)

plt.show()


if __name__ == '__main__':

    # max number of cars
    maxNumberCars1=5
    maxNumberCars2=5

    # Costs and profits
    profitPerCarRental=10.0
    costPerCarMove=2
    # Parameters of the Poisson Distribution

    lambdaRequest1=3.0
    lambdaRequest2=3.0
    lambdaReturn1=3.0
    lambdaReturn2=3.0


    # Max number of car that can be transported at night
    maxCarMoves=3

    # discountFactor
    gamma=0.9


    [v,u,P,R]=policy_iteration(maxNumberCars1,maxNumberCars2,profitPerCarRental,maxCarMoves,
    costPerCarMove,lambdaRequest1,lambdaRequest2,lambdaReturn1,lambdaReturn2,gamma)
    print v
    print u

    plot_bidimensional_function(v)
    plot_bidimensional_function(u)    
...