title:- T=['% A prolog program of dynamic allocation index --- a provisional version ' ,'% By Kenryo Indo (Kanto Gakuen University)' ,'% 4-11 Aug 2004' ,'% file: dai0.pl' ,'References:' ,'[1] Gittins, J.C. (1979). Bandit processes and dynamic allocation indicies.' ,' J. R. Stat. Soc. B 41:148-77.' ,'[2] Gittins, J.C. and Jones, D.M. (1979). A dynamic alloction index for the discounted multiarmed bandit problem.' ,' Biometrika 66(3): 561-5.'], forall(member(X,T),(nl,write(X))),nl. :- title. %-------------------------------------- % model of bandit process %-------------------------------------- :- dynamic max_of_alpha_plus_beta/1. max_of_alpha_plus_beta(141). state(Alpha,Beta):- (var(Alpha)->Alpha=A; A is Alpha), (var(Beta)->Beta=B; B is Beta), max_of_alpha_plus_beta(T), state_0(A + B = T-_K). state_0(Alpha + Beta = T-K):- max_of_alpha_plus_beta(T0), (T=0;natural_number_up_to(T0,T)), (K=0;natural_number_up_to(T,K)), T1 is T -K, (Alpha=0;natural_number_up_to(T1,Alpha)), Beta is T1 - Alpha. initial_state(A,B):- natural_number_up_to(10,A), natural_number_up_to(10,B). bandit(time(0),control,1). bandit(time(0),state,(A,B)):-initial_state(A,B). bandit(time(0),number_of_continue_before,0). bandit(time(0),number_of_stop_before,0). discount_factor(0.75). expected_reward(V,state(A,B),control(1)):- state(A,B), V is (A+1)/(A+B+2). expected_reward(0,state(_,_),control(0)). stop_criteria(U):- K is 100,natural_number_up_to(K,N),U is N/K. %--------------- % The index %--------------- % dai (or V-value) for a bandit process is the maxmum over (constant) stopping times of % averaged expected reward weighted by continuation probability for each state and trials. :- dynamic dai_0/3. dai(time(T-A-B=K),stop_by(U),R/W=V):- max_of_alpha_plus_beta(T0), natural_number_up_to(T0,T), T0>T, state_0(A+B=T-K), (clause(dai_0(time(T-A-B=K),stop_by(U),R/W=V),true)->true; (max(U,max(V, (stop_criteria(U), total_discounted_reward(R/W=V,T-A-B=K,stop_by(U)) ) )), retractall(dai_0(time(T-A-B=K),stop_by(_),_=V0)),(V0=V->true;read(y)), assert(dai_0(time(T-A-B=K),stop_by(U),R/W=V)) ) ). % tests: N=21, for a handy (about a day) simulation on my PC for the present. test_dai(Ttrue;!,fail), natural_number_up_to(N,T), state_0(A+B=T-K),A=<9,B=<9, dai(time(T-A-B=K),_,_),tell_goal('dai_out.txt',forall,dai_0(_,_,_)),fail. test_dai_0(T-A-B=K,S,D):- state_0(A+B=20-_),A=<9,B=<9,max(K,dai_0(time(T-A-B=K),S,D)). test_dai_0(T-A-B=K,S,D,true:D1,diff:X):- dai_0_140(time(140-A-B=_),_,_=D1),test_dai_0(T-A-B=K,S,_=D), X is D1 - D. test_dai_N(T):- init_cnt,forall(test_dai_0(T-A-B=K,_,D,E,F),(update_cnt(N),X=([N],T-A-B=K,D,E,F),write(X),nl)),nl,write(complete). test_dai_N(T,K):- init_cnt,forall(test_dai_0(T-A-B=K,_,D,E,F),(update_cnt(N),X=([N],T-A-B=K,D,E,F),write(X),nl)),nl,write(complete). inspect_growth_from_other_window(T-A-B=K):- ['dai_out.txt'], max(B,max(A,max(K,(test_dai_0(T-A-B=K,_,_))))). inspect_growth_1(T-A-B=K):- ['z:/Prolog/dai_out.txt'], max(A,max(K,(test_dai_0(T-A-B=K,_,_)))). %:- write(tamesi), %%%% about line 85 here. % averaged expected reward weighted by continuation probability %--------------------------------------------------- total_discounted_reward(R/1=R,T-A-B=1,stop_by(U)):- state_0(A+B=T-1), stop_criteria(U), expected_reward(R,state(A,B),control(1)). total_discounted_reward(R/W=E,T-A-B=2,stop_by(U)):- prob(Q0,success(0/1),state(A+B=T-2),stop_time(U)), prob(Q1,success(1/1),state(A+B=T-2),stop_time(U)), discount_factor(D), W = 1 + D * (Q0 + Q1), A1 is A + 1, B1 is B + 1, expected_reward(V0,state(A,B1),control(1)), expected_reward(V1,state(A1,B),control(1)), expected_reward(R1,state(A,B),control(1)), R = R1+ D * (Q1 * V1 + Q0 * V0), E is R/W. total_discounted_reward(R/W=E,T-A-B=3,stop_by(U)):- state_0(A+B=T-3), T1 is T -1, total_discounted_reward(R1/W1=_E1,T1-A-B=2,stop_by(U)), prob(Q0,success(0/2),state(A+B=T-3),stop_time(U)), prob(Q1,success(1/2),state(A+B=T-3),stop_time(U)), prob(Q2,success(2/2),state(A+B=T-3),stop_time(U)), discount_factor(D), W = W1 + D^2 * (Q0 + Q1 + Q2), A1 is A + 1, B1 is B + 1, A2 is A + 2, B2 is B + 2, expected_reward(V0,state(A,B2),control(1)), expected_reward(V1,state(A1,B1),control(1)), expected_reward(V2,state(A2,B),control(1)), R = R1 + D^2 * (Q2*V2 + Q1*V1 + Q0*V0), E is R/W. %:- write(tamesi), %%%% about line 130 here. % a generalization. total_discounted_reward(R/W=E,T-A-B=K,stop_by(U)):- state_0(A+B=T-K), K > 3, T1 is T -1, K1 is K -1, total_discounted_reward(R1/W1=_E1,T1-A-B=K1,stop_by(U)), findall(Q * V, ( (X=0;natural_number_up_to(K1,X)), A1 is A + X, B1 is B + K1 - X, expected_reward(V,state(A1,B1),control(1)), prob(Q,success(X/K1),state(A+B=T-K),stop_time(U)) ), LQV), sum_eq(LQV,EV,_), findall(Q, member(Q * V, LQV), LQ), sum_eq(LQ,QS,_), discount_factor(D), R = R1 + D^K1 * EV, W = W1 + D^K1 * QS, E is R/W. %:- write(tamesi), %%%% about line 157 here. % continuation probability (Q-value) %--------------------------------------------------- % after 2 trials with X successful pulls. prob(Q,success(X/1),state(A+B=T-2),stop_time(U)):- member((X,Y),[(1,0),(0,1)]), state_0(A+B=T-2), A1 is A + X, B1 is B + Y, dai(time(T-A1-B1=1),stop_by(U),V/1=V), P is (A+1)/(A+B+2), (V < U -> Q = 0; Q = P). % after 3 trials with X successful pulls. prob(Q,success(X/2),state(A+B=T-3),stop_time(U)):- state_0(A+B=T-3), stop_criteria(U), natural_number_up_to(3,X0), X is X0 - 1, findall(Q1*Q2, prob_item(Q1*Q2,success(X/2),state(A+B=T-3),stop_time(U)), LQ), sum(LQ,Q). %:- write(tamesi), %%%% about line 185 here. % a generalization. % after K trials with X successful pulls. prob(Q,success(X/K1),state(A+B=T-K),stop_time(U)):- state_0(A+B=T-K), K>3, K1 is K - 1, stop_criteria(U), natural_number_up_to(K,X0), X is X0 - 1, findall(ProductQ, prob_item(ProductQ,success(X/K1),state(A+B=T-K),stop_time(U)), LQ), sum(LQ,Q). prob_item(Q1*Q2,success(X/2),state(A+B=T-3),stop_time(U)):- member((X,X1,X2),[(0,0,0),(1,1,0),(1,0,1),(2,1,1)]), A1 is A + X, B1 is B + 2 -X, P1 is A1/(A1+B1), dai(time(T-A1-B1=1),stop_by(_U1),_=V1), %nl,write(dai(time(T-A1-B1=1),stop_by(U),_=V1)), (V1 < U -> Q1 = 0; Q1 = P1), A2 is A + X - X2, B2 is B + 1 - X1, P2 is (A2)/(A2+B2), dai(time(T-A2-B2=2),stop_by(_U2),_=V2), %nl,write(dai(time(T-A2-B2=2),stop_by(U),_=V2)), (V2 < U -> Q2 = 0; Q2 = P2). %:- write(tamesi), %%%% about line 219 here. % The terms of generalized prob/4. prob_item(Q,success(X/K1),state(A+B=T-K),stop_time(U)):- K>3, distribution_0(K1,X,_,D,0,[0,1]), findall(Qi, prob_item_0(Qi,[A+B=T-K,X,D,U],_V,_), QL), product(QL,Q). prob_item_0(Qi,[A+B=T-K,X,D,U],Vi,T-Ai-Bi=I):- nth1(I,D,Zi), Xi is X - Zi, Ai is A + Xi, Yi is K - I - Xi, Bi is B + Yi, Pi is (Ai)/(Ai+Bi), dai(time(T-Ai-Bi=I),stop_by(_Ui),_E=Vi), (Vi < U -> Qi = 0; Qi = Pi). %-------------------------------------- % common programs (utilities) %-------------------------------------- natural_number_up_to(M,N):- (var(M)->max_of_alpha_plus_beta(M);true), M1 is integer(M), length(L,M1), nth1(N,L,_). %distribution(length:K,sum_up_to:X,send:O,inventory:H,remain:R,allow:A):- % distribution_0(K,X,O,H,R,A). distribution_0(0,R,[],[],R,_A). distribution_0(K,M,[Y|O],[M1|H],R,A):- (var(A) -> (Y=0;natural_number_up_to(M,Y)) ; (member(Y,A),Y= X ). ranking(X,Goal,Ranking,ascend):- % X: the objective variable, % Goal: the objective function and constraints, setof((X,Goal),Goal,Z), sort(Z,Ranking), Ranking=[X|_]. ranking(X,Goal,Ranking,descend):- % X: the objective variable, % Goal: the objective function and constraints, ranking(X,Goal,Ranking0,ascend), reverse(Ranking0,Ranking). sum([],0). sum([X|Members],Sum):- sum(Members, Sum1), Sum is Sum1 + X. product([],1). product([X|Members], Prod):- product(Members, Prod1), Prod is Prod1 * X. sum_eq([],0,0). sum_eq([X|Members],E + X, Sum):- sum_eq(Members,E, Sum1), Sum is Sum1 + X. product_eq([],1,1). product_eq([X|Members],E*X, Prod):- product_eq(Members,E, Prod1), Prod is Prod1 * X. product_sum([],[],[],0). product_sum([P|Q],[A|B],[E|F],V):- length(Q,N), length(B,N), product_sum(Q,B,F,V1), E is P * A, V is V1 + E. average(U,G,A):- findall(U,G,B), length(B,N), sum(B,S), A is S/N. stdev(U,G,Y):- average(U,G,A), findall((U-A)^2,G,B), length(B,N), sum(B,S), Y is S/(N-1). bag0([],_A,0). bag0([C|B],A,N):- length([C|B],N), bag0(B,A,_N1), member(C,A). zeros(Zero,N):-bag0(Zero,[0],N). ones(One,N):-bag0(One,[1],N). binary(Bin,N):-bag0(Bin,[0,1],N). forall_write(X,G):- forall(G,(nl,write(X))). % redirect to file %-------------------------------- tell_goal(File,G):- (current_stream(File,write,S0)->close(S0);true), open(File,write,S), tell(File), nl, time_stamp('% file output start time ',_), nl, write('%---------- start from here ------------%'), nl, G, nl, write('%---------- end of data ------------%'), nl, time_stamp('% file output end time ',_), tell(user), close(S), % The following is to cope with the duplicated stream problem. (current_stream(File,write,S1)->close(S1);true). % save all successful goals to a file %-------------------------------- tell_goal(File,forall,G):- G0 = (nl,write(G),write('.')), G1 = forall(G,G0), tell_goal(File,G1). tell_goal(File,forall_such_that,G,Condition):- % G should be occurred in the Condition. WRITE = (nl,write(G),write('.')), G1 = forall(Condition,WRITE), tell_goal(File,G1). % time stamp %-------------------------------- time_stamp(no,T):- get_time(U), convert_time(U,A,B,C,D,E,F,_G), T = [date(A/B/C), time(D:E:F)], nl. time_stamp(Word,T):- \+ var(Word), Word \= no, get_time(U), convert_time(U,A,B,C,D,E,F,_G), T = [date(A/B/C), time(D:E:F)], % format('~`.t~t~a~30|~`.t~t~70|~n',[Word]), write((Word,T)), nl. %counter %---------------------- :- dynamic cnt/1. cnt(0). init_cnt:- abolish(cnt/1),assert(cnt(0)). update_cnt(N):- retract(cnt(B)),N is B+1,assert(cnt(N)). % Table 1 in Gittins and Jones (1979). %-------------------------------------------- % note: Each parameter in the table 1 cooresponds to that of Gittins(1979) % who describes the algorithm above modeled minus 1. dai_0_140(time(140-A-B=K),stop_by(U),_=U):- dai_140(B1,A1,U),A is A1-1, B is B1 -1, K is 140 - A - B. dai_140(1,1,0.6211). dai_140(1,2,0.7465). dai_140(1,3,0.8062). dai_140(1,4,0.8419). dai_140(1,5,0.8659). dai_140(1,6,0.8833). dai_140(1,7,0.8965). dai_140(1,8,0.9069). dai_140(1,9,0.9153). dai_140(1,10,0.9223). dai_140(2,1,0.4256). dai_140(2,2,0.5760). dai_140(2,3,0.6607). dai_140(2,4,0.7159). dai_140(2,5,0.7548). dai_140(2,6,0.7841). dai_140(2,7,0.8068). dai_140(2,8,0.8251). dai_140(2,9,0.8401). dai_140(2,10,0.8526). dai_140(3,1,0.3182). dai_140(3,2,0.4641). dai_140(3,3,0.5554). dai_140(3,4,0.6191). dai_140(3,5,0.6659). dai_140(3,6,0.7023). dai_140(3,7,0.7312). dai_140(3,8,0.7548). dai_140(3,9,0.7745). dai_140(3,10,0.7912). dai_140(4,1,0.2519). dai_140(4,2,0.3871). dai_140(4,3,0.4773). dai_140(4,4,0.5436). dai_140(4,5,0.5946). dai_140(4,6,0.6348). dai_140(4,7,0.6673). dai_140(4,8,0.6946). dai_140(4,9,0.7176). dai_140(4,10,0.7372). dai_140(5,1,0.2073). dai_140(5,2,0.3307). dai_140(5,3,0.4182). dai_140(5,4,0.4838). dai_140(5,5,0.5360). dai_140(5,6,0.5784). dai_140(5,7,0.6134). dai_140(5,8,0.6428). dai_140(5,9,0.6678). dai_140(5,10,0.6896). dai_140(6,1,0.1755). dai_140(6,2,0.2883). dai_140(6,3,0.3713). dai_140(6,4,0.4359). dai_140(6,5,0.4875). dai_140(6,6,0.5306). dai_140(6,7,0.5669). dai_140(6,8,0.5978). dai_140(6,9,0.6244). dai_140(6,10,0.6476). dai_140(7,1,0.1518). dai_140(7,2,0.2550). dai_140(7,3,0.3334). dai_140(7,4,0.3961). dai_140(7,5,0.4473). dai_140(7,6,0.4899). dai_140(7,7,0.5266). dai_140(7,8,0.5584). dai_140(7,9,0.5860). dai_140(7,10,0.6102). dai_140(8,1,0.1335). dai_140(8,2,0.2285). dai_140(8,3,0.3025). dai_140(8,4,0.3627). dai_140(8,5,0.4129). dai_140(8,6,0.4553). dai_140(8,7,0.4916). dai_140(8,8,0.5236). dai_140(8,9,0.5518). dai_140(8,10,0.5767). dai_140(9,1,0.1190). dai_140(9,2,0.2067). dai_140(9,3,0.2767). dai_140(9,4,0.3343). dai_140(9,5,0.3832). dai_140(9,6,0.4249). dai_140(9,7,0.4611). dai_140(9,8,0.4928). dai_140(9,9,0.5212). dai_140(9,10,0.5465). dai_140(10,1,0.1072). dai_140(10,2,0.1886). dai_140(10,3,0.2547). dai_140(10,4,0.3100). dai_140(10,5,0.3537). dai_140(10,6,0.3983). dai_140(10,7,0.4341). dai_140(10,8,0.4657). dai_140(10,9,0.4937). dai_140(10,10,0.5192). /* % tentatively generated data by dai0.pl, of a goal test_dai/2 % compared with T=140 data in Gittins & Jones (1979). ?- test_dai(T<21,_). [T-A-B=<1], go ahead(y/n):y. [T-A-B=<2], go ahead(y/n):y. [T-A-B=<3], go ahead(y/n):y. [T-A-B=<4], go ahead(y/n):y. % another terminal. ?- inspect_growth_from_other_window(A). % dai_out.txt compiled 0.11 sec, 46,172 bytes A = 13-9-0=4 Yes ?- test_dai_N(_,_). [1], 4-0-0=4, 0.604839, true:0.6211, diff:0.016261 [2], 5-1-0=4, 0.723232, true:0.7465, diff:0.023268 [3], 6-2-0=4, 0.785294, true:0.8062, diff:0.020906 [4], 7-3-0=4, 0.824112, true:0.8419, diff:0.017788 [5], 8-4-0=4, 0.85085, true:0.8659, diff:0.01505 [6], 9-5-0=4, 0.870445, true:0.8833, diff:0.012855 [7], 10-6-0=4, 0.885446, true:0.8965, diff:0.011054 [8], 11-7-0=4, 0.89731, true:0.9069, diff:0.00959 [9], 12-8-0=4, 0.906932, true:0.9153, diff:0.008368 ... [93], 15-2-9=4, 0.245336, true:0.2547, diff:0.009364 [94], 16-3-9=4, 0.301453, true:0.31, diff:0.008547 [95], 17-4-9=4, 0.34918, true:0.3537, diff:0.00452 [96], 17-5-9=3, 0.387594, true:0.3983, diff:0.010706 [97], 18-6-9=3, 0.423882, true:0.4341, diff:0.010218 [98], 19-7-9=3, 0.456111, true:0.4657, diff:0.009589 [99], 20-8-9=3, 0.484839, true:0.4937, diff:0.008861 [100], 21-9-9=3, 0.510619, true:0.5192, diff:0.008581 complete Yes ?- tell_goal('dai_run.txt',forall,(A). test_dai_N(_,_)). Yes ?- % You can analyze read the file and analyze it by using spreadsheet and so on. */ % end of program