Upload
others
View
1
Download
0
Embed Size (px)
Citation preview
𝑠0𝑇𝑝𝑟𝑒𝑐𝑖𝑠𝑒𝑇𝑓𝑎𝑠𝑡
𝑠0𝑇𝑝𝑟𝑒𝑐𝑖𝑠𝑒𝑇𝑓𝑎𝑠𝑡
u
𝑠0𝑇𝑝𝑟𝑒𝑐𝑖𝑠𝑒𝑇𝑓𝑎𝑠𝑡
u
𝑠0𝑇𝑝𝑟𝑒𝑐𝑖𝑠𝑒𝑇𝑓𝑎𝑠𝑡
𝑎𝑡
𝑟𝑡+1
𝑠𝑡+1
𝑟𝑡
𝑠𝑡
𝑎𝑡
𝑟𝑡+1
𝑠𝑡+1
𝑟𝑡
𝑠𝑡
𝑟𝑡+1
𝑠𝑡+1
𝑟𝑡
𝑠𝑡
𝑎𝑡
𝑄 𝒮 ×𝒜 → ℝ
𝑄 𝑠, 𝑎 =
𝑗=1
𝑙
𝜃𝑗 𝜙𝑗 𝑠, 𝑎
𝜙𝑗𝜃𝑗
𝑟 𝑠𝑡, 𝑎𝑡 , 𝑠𝑡+1
𝑎𝑡 = 𝑎𝑟𝑔𝑚𝑎𝑥𝑎∈𝒜𝑄(𝑠𝑡 , 𝑎)
𝑥1 − 𝑥2 + 𝑥3 ≤ 0,𝑥2 + 𝑥3 + 𝑥4 ≤ 0,𝑥2 − 𝑥3 ≤ 0𝑥3 + 𝑥4 ≤ 0𝑥4 − 𝑥5 ≤ 0𝑥4 − 𝑥6 ≤ 0
𝑥1 − 𝑥2 + 𝑥3 ≤ 0,𝑥2 − 𝑥3 ≤ 0}
𝑥4 − 𝑥5 ≤ 0𝑥4 − 𝑥6 ≤ 0
{𝑥2+𝑥3 + 𝑥4 ≤ 0𝑥3+𝑥4 ≤ 0
𝑥1 − 𝑥2 + 𝑥3 ≤ 0,𝑥2 + 𝑥3 + 𝑥4 ≤ 0,𝑥2 − 𝑥3 ≤ 0𝑥3 + 𝑥4 ≤ 0𝑥4 − 𝑥5 − 𝑥6 = 0𝑥4 − 𝑥6 ≤ 0
𝑥5: = 𝑥4 − 𝑥6
𝑥1 − 𝑥2 + 𝑥3 ≤ 0,𝑥2 − 𝑥3 ≤ 0}
𝑥4 − 𝑥5 − 𝑥6 = 0𝑥4 − 𝑥6 ≤ 0
𝑥1 − 𝑥2 + 𝑥3 ≤ 0,𝑥2 + 𝑥3 + 𝑥4 ≤ 0,𝑥2 − 𝑥3 ≤ 0𝑥3 + 𝑥4 ≤ 0}
𝑥1 − 𝑥2 + 𝑥3 ≤ 0,𝑥2 + 𝑥3 + 𝑥4 ≤ 0,𝑥2 − 𝑥3 ≤ 0𝑥3 + 𝑥4 ≤ 0𝑥4 − 𝑥5 − 𝑥6 = 0}
𝑥5: = 𝑥4 − 𝑥6
𝒜
𝑥5: = 𝑥4 − 𝑥6
𝑠 𝑎
{𝑥1 − 𝑥2 + 𝑥3 ≤ 0,𝑥2 − 𝑥3 ≤ 0}
{𝑥4 − 𝑥5 ≤ 0,𝑥4 − 𝑥6 ≤ 0}
{𝑥7 = 0,𝑥8 + 𝑥7 ≤ 0}
𝑥7
𝑥1 𝑥8
𝜙𝑗 𝑠, 𝑎
𝑠
𝒏𝒔
𝒏𝒃
𝒏𝒉𝒃
𝒄𝒚𝒄
𝑟 𝑠𝑡, 𝑎𝑡, 𝑠𝑡+1
𝑠𝑡+1 𝑎𝑡
𝑥1 − 𝑥2 + 𝑥3 ≤ 0,𝑥2 − 𝑥3 ≤ 0}
𝑥4 − 𝑥5 − 𝑥6 = 0𝑥5 = 0𝑥6 ≤ −𝑥6 ≤
𝑠𝑡+1𝑠𝑡+1
𝑛𝑠 𝑥5
𝑛𝑏 𝑥4 𝑥6
𝑛ℎ𝑏 𝑥1
𝑐𝑦𝑐
𝑟(𝑠𝑡 , 𝑎𝑡 , 𝑠𝑡+1)𝑟(𝑠𝑡, 𝑎𝑡 , 𝑠𝑡+1) = 3. 𝒏𝒔 + 2. 𝒏𝒃 + 𝒏𝒉𝒃 − 𝑙𝑜𝑔10(𝒄𝒚𝒄)
𝜙𝑗 𝑟(𝑠𝑡 , 𝑎𝑡 , 𝑠𝑡+1)
𝒜 𝜃𝑗
𝑄 𝑠, 𝑎 =
𝑗=1
𝑙
𝜃𝑗 𝜙𝑗 𝑠, 𝑎
𝑎𝑡 = 𝑎𝑟𝑔𝑚𝑎𝑥𝑎∈𝒜𝑄(𝑠𝑡 , 𝑎)
•
•
•
≈ ≈
≈ ≈
u
u
𝑠0
𝑇𝑝𝑟𝑒𝑐𝑖𝑠𝑒𝑇𝑓𝑎𝑠𝑡
𝑟𝑡+1
𝑠𝑡+1
𝑟𝑡
𝑠𝑡
𝑎𝑡
𝒜
𝜙𝑗 𝑠, 𝑎
𝑟 𝑠𝑡 , 𝑎𝑡 , 𝑠𝑡+1
𝑎𝑡 = 𝑎𝑟𝑔𝑚𝑎𝑥𝑎∈𝒜𝑄(𝑠𝑡 , 𝑎)𝑄 𝑠, 𝑎 =
𝑗=1
𝑙
𝜃𝑗 𝜙𝑗 𝑠, 𝑎