집단지성프로그래밍 05. 최적화(optimization.ipynb) 김지은_20150522

Section 01. 단체 여행

In [1]:

In [2]:

# optimization.py# schedule.txt (http://kiwitobes.com/optimize/schedule.txt)

# ppt

# 단체여행계획 수립에 있어서 최적화된 방법을 제시

import timeimport randomimport math

# 가족 구성원 정보people = [('Seymour','BOS'), ('Franny','DAL'), ('Zooey','CAK'), ('Walt','MIA'), ('Buddy','ORD'), ('Les','OMA')]# 라구아디아 공항destination='LGA'

# people, 가족구성원들의 정보('이름', '각자의 출발지 공항')

# 비행편데이터 샘플파일(schedule.txt)출발지,도착지,출발시간,도착시간,가격정보

LGA,OMA,6:19,8:13,239OMA,LGA,6:11,8:31,249LGA,OMA,8:04,10:59,136OMA,LGA,7:39,10:24,219LGA,OMA,9:31,11:43,210...

flights={} # 배열생성# for line in file('schedule.txt'): origin,dest,depart,arrive,price=line.strip().split(',') flights.setdefault((origin,dest),[]) # key 넣고

# Add details to the list of possible flights flights[(origin,dest)].append((depart,arrive,int(price))) # append로 value 붙이고

In [3]:

Section 02. 해답 표현하기

In [4]:

# 키는 출발지와 목적지 데이터로 # Key(origin,dest)# 상세 비행편 정보를 리스트값으로 하여, 딕셔너리에 로드# value(depart,arrive,int(price))

def getminutes(t): x=time.strptime(t,'%H:%M') return x[3]*60+x[4]

# getminutes() : 특정 일시에서 경과한 시간을 분 단위로 계산하는 유틸리티 함수# Next, 가족 구성원 각자가 탑승해야하는 비행기 정하기

# ppt# 해답 표현방식 결정하기# 다른 문제유형에도 잘 동작할 수 있도록 표현방식 선택

def printschedule(r): for d in range(len(r)/2): name=people[d][0] origin=people[d][1] out=flights[(origin,destination)][int(r[d*2])] ret=flights[(destination,origin)][int(r[d*2+1])] print '%10s%10s %5s-%5s $%3s %5s-%5s $%3s' % (name,origin, out[0],out[1],out[2], ret[0],ret[1],ret[2])

# 해답표현방식# 숫자리스트로 표현 s=[1,4,3,2,7,3,6,3,2,4,5,3] 그날의, 첫번째 비행기 : 0 두번재 비행기 : 1 세번째 비행기 : 2 네번째 비행기 : 3 ... ('Seymour','BOS')..., [1,4,...] 세이모어는 보스턴에서 뉴욕으로가는 두번째 비행기를 타고, 그날 보스턴으로 돌아오는 다섯번째 비행기를 타고,

In [5]:

Section 03. 비용함수

Seymour BOS 8:04-10:11 $ 95 12:08-14:05 $142 Franny DAL 10:30-14:57 $290 9:49-13:51 $229 Zooey CAK 17:08-19:08 $262 10:32-13:16 $139 Walt MIA 15:34-18:11 $326 11:08-14:38 $262 Buddy ORD 9:42-11:32 $169 12:08-14:47 $231 Les OMA 13:37-15:08 $250 11:07-13:24 $171

# import optimizations=[1,4,3,2,7,3,6,3,2,4,5,3]

# optimization.printschedle(s)printschedule(s)

# 몇가지 문제점1. 가격을 고려하지 않음2. 레스(Les)가 오마하(Omaha)로 돌아가는 비행기편인 오전 6시까지만 놀 수 있음 (책의 예제값에서는)

# 다양한 속성들에게 가중치를 부여하는 방법과 어떤 일정이 최적인지 판단하는 방법이 필요

# 비용? 해당 문제를 푸는데 소비되는 값, 돈이 많이들수록 안 좋은 거임.# 리턴되는 비용함수가 작을수록 최적의 함수# 해답이 나쁠수록 큰 값을 리턴하기

# PPT

# 비용부가변수티켓의 가격, 총 비행시간, 탑승 �기시간, 예약된 비행기를 놓쳐 발생하는 추가 비용, 자동차렌탈 추가비용

# 변수들의 비교: 결합하여 하나의 숫자로 만들기최적화 함수에는 가치를 비용으로 판단즉, 슈퍼에서 바나나, 시금치, 두부, 계란, 우유등을 "돈"이라는 단위로 환산하여 싸고 좋은것을 구매하는 것 처럼변수들도 "비용"이라는 단위로 환산하여 가치를 판단

추가지출 항목(가정)- 항공여행에서 시간을 줄여주는 서비스로 1분당 1달러의 비용이 소모- 공항에서 기다리는 시간을 줄이기 위해 0.5달러 비용이 소모- 자동차 렌탈 연장시 추가비용 소모

In [6]:

def schedulecost(sol): totalprice=0 latestarrival=0 # 가장 늦은 도착시간이함 = 0, 0부터 시작해서 가장 늦은 도착시간을 찾기 earliestdep=24*60 # 가장빠른 출발시간 하루24시간*60 = 총 하루를 분단위로 표현, 하루의 끝자락부터 시작해서 가장 빠른 항공편을 찾기

for d in range(len(sol)/2): # 출발, 도착 비행편을 얻음 origin=people[d][1] # d는 0부터 시작, 가족 구성원들의 출발지 정보를 얻음 outbound=flights[(origin,destination)][int(sol[d*2])] # 떠나는 항공편, d*2로 2자리씩 묶음 returnf=flights[(destination,origin)][int(sol[d*2+1])] # 되돌아오는 항공편, d*2+1로 outbound와 쌍으로 생각하여 처리 # 전체 가격은 모든 출발, 도착 비행편의 가격... 다 합치기 totalprice+=outbound[2] totalprice+=returnf[2] # 가장 늦은 도착 시간과 가장 이른 출발 시간을 추적하기, 변수로 인한 추가 지출을 생각해보기 위해 "추적"이라는 것을 수행 if latestarrival<getminutes(outbound[1]): latestarrival=getminutes(outbound if earliestdep>getminutes(returnf[0]): earliestdep=getminutes(returnf[0]) # 모든 사람들은 가장 늦게 도착하는 사람들을 공항에서 기다려야 함 # 또 이들은 도착하는 동시에 출발 비행편을 기다려야 함 totalwait=0 for d in range(len(sol)/2): origin=people[d][1] outbound=flights[(origin,destination)][int(sol[d*2])] returnf=flights[(destination,origin)][int(sol[d*2+1])] totalwait+=latestarrival-getminutes(outbound[1]) # 가족들 중 가장 늦게 출발하는 사람의 항공편 - 현재 출발시간 totalwait+=getminutes(returnf[0])-earliestdep # 되돌아오는 시간 - 가족들 중 가장 빨리 되돌아가는 사람의 항공권

# 렌탈한 자동차에 �해 추가로 비용이 지불되어야 하는지 점검. 있다면 $50 추가부담! if latestarrival>earliestdep: totalprice+=50 # 총 비용 리턴 return totalprice+totalwait

# 비용부가변수티켓의 가격, 총 비행시간, 탑승 �기시간, 예약된 비행기를 놓쳐 발생하는 추가 비용, 자동차렌탈 추가비용

# 변수들의 비교: 결합하여 하나의 숫자로 만들기최적화 함수에는 가치를 비용으로 판단즉, 슈퍼에서 바나나, 시금치, 두부, 계란, 우유등을 "돈"이라는 단위로 환산하여 싸고 좋은것을 구매하는 것 처럼변수들도 "비용"이라는 단위로 환산하여 가치를 판단

추가지출 항목(가정)- 항공여행에서 시간을 줄여주는 서비스로 1분당 1달러의 비용이 소모- 공항에서 기다리는 시간을 줄이기 위해 0.5달러 비용이 소모- 자동차 렌탈 연장시 추가비용 소모

In [7]:

Section 04. 무작위 검색

In [8]:

Out[7]:

4635

# reload(optimization)# optimization.schedulecost(s)schedulecost(s)

# Next, 비용 최소화# 비용 최소화를 위한 알고리즘 소개# ppt

# 정한 횟수만큼 무작위로 반복하여 결과를 도출하고 그 중 가장 효율적인 경우를 찾는 알고리즘# domain [(0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8)]- 출발 비행편 9개, 도착 비행편 9개- 사람별로 2번씩 반복# costf(): schedulecost()- 함수가 인자로 전달되어 재활용이 용이- 1000번 수행- 많이 돌리면 돌릴 수록 경우의 수가 많아지므로 좀 더 최적화된 결과를 얻을 수 있음

def randomoptimize(domain,costf): best=999999999 # 임의의 값으로 변수 초기화, 표현가능한 범위에서 젤 큰값 bestr=None for i in range(0,1000): # 1000번 반복 # 무작위 해답을 생성 r=[float(random.randint(domain[i][0],domain[i][1])) for i in range(len(domain))] # 비용 구하기 cost=costf(r) # random값을 넣은 매개변수를 가지고 costf(schedulecost)함수를 실행 # 이제까지의 최적 값과 비교 if cost<best: # 가장 cost가 적은 경우를 저장 best=cost bestr=r return r

In [9]:

In [11]:

Section 05. 언덕등반

Out[9]:

6104

Seymour BOS 17:11-18:30 $108 15:25-16:58 $ 62 Franny DAL 9:08-12:12 $364 9:49-13:51 $229 Zooey CAK 18:35-20:28 $204 10:32-13:16 $139 Walt MIA 12:05-15:30 $330 9:25-12:46 $295 Buddy ORD 8:25-10:34 $157 18:33-20:22 $143 Les OMA 15:03-16:42 $135 15:07-17:21 $129

# reload(optimization)# domain=[(0,8)] * (len(optimization.people)*2)# s=optimizationrandomoptimize(domain, optimization.schedulecost)# optimization.schedulecost(s)

domain=[(0,8)] * (len(people)*2)s=randomoptimize(domain,schedulecost)schedulecost(s)

# optimization.printschedule(s)printschedule(s)

# 일반 무작위 검색방법은 이 경우에서는 비효율적# ppt

# 무작위로 시작1. 해답의 이웃해답을 찾음2. 이웃해답 cost < 해답 cost - 해답 = 이웃해답 - goto 13. 이웃해답 == 해답 - 해답 return

In [10]:

In [11]:

In [12]:

Out[11]:

5383

Seymour BOS 18:34-19:36 $136 18:24-20:49 $124 Franny DAL 18:26-21:29 $464 9:49-13:51 $229 Zooey CAK 18:35-20:28 $204 6:58- 9:01 $238 Walt MIA 11:28-14:40 $248 12:37-15:05 $170 Buddy ORD 15:58-18:40 $173 7:50-10:08 $164 Les OMA 12:18-14:56 $172 15:07-17:21 $129

def hillclimb(domain,costf): # 무작위 해답 생성 sol=[random.randint(domain[i][0],domain[i][1]) # 무작위로 숫자 목록을 생성하여 초기해답 만들기 for i in range(len(domain))] # Main loop while 1: # 이웃 해법의 리스트를 생성 neighbors=[] for j in range(len(domain)): # 각 도메인의 항목마다 모든 이웃을 찾아 append() # 각 방향별로 한 개씩 선택 if sol[j]>domain[j][0]: neighbors.append(sol[0:j]+[sol[j]+1]+sol[j+1:]) if sol[j]<domain[j][1]: neighbors.append(sol[0:j]+[sol[j]-1]+sol[j+1:])

# 이웃 중에서 최적 해답을 찾기 current=costf(sol) # 무작위 초기해답을 현재상태로 가정 best=current for j in range(len(neighbors)): # 이웃을 돌아보면서 cost=costf(neighbors[j]) # 각 이웃들의 cost를 계산한다. if cost<best: best=cost # 더 나은 경우를 저장한다. sol=neighbors[j]

# 개선되지 않으면, 그게바로 최적해답 if best==current: # 메인 Loop가 1이르로 계속하여 반복하다가 더 이상 current가 생신되지 않으면 break return sol

# s=optimization.hillclimb(domain, optimization.schedulecost)# optimization.schedulecost(s)# optimization.printschedule(s)

s=hillclimb(domain, schedulecost)schedulecost(s)

printschedule(s)

Section 06. 시뮬레이티드 어닐링

In [13]:

# Hill Climb 알고리즘의 단점

# 국소최소점(local minimum) vs 전역최소점(global minimum)# 경사를 따라가는 hillclimb알고리즘의 경우 마지막 해답은 local minimum이 된다는 한계가 있음# 궁극적으로 최적화는 전체 어느 위치에서나 해답이 최적화된 global minimum을 지향해야 함

# ppt

# 국소최소(local minimum)를 회피하기# 어닐링(Annealing), 합금을 가열한 후 천천히 냉각시키는 과정 - 무작위 해답부터 시작 - 온도를 표현하는 변수로 고온에서 시작하여 점차 내려간다. # p=e(-(highcost-lowcost)/temperature)

def annealingoptimize(domain,costf,T=10000.0,cool=0.95,step=1): # T(temperature), cool은 옵션 # 무작위 값으로 초기화하기 vec=[float(random.randint(domain[i][0],domain[i][1])) # 무작위로 숫자 목록을 생성하여 초기해답 만들기 for i in range(len(domain))] while T>0.1: # 최소 온도가 Ý상 0.1 정도는 되어야 한다. 온도가 거의 0에 가까울때까지의 반복 # 해답내의 무작위(0~len(domain)-1) 인덱스 중 한개를 선택 i=random.randint(0,len(domain)-1)

# 변경할 방향을 선택. -step~step 사이의 랜덤값 dir=random.randint(-step,step)

# 변경할 값 중 하나로 새로운 리스트 생성 vecb=vec[:] vecb[i]+=dir if vecb[i]<domain[i][0]: vecb[i]=domain[i][0] # 새로운 값 넣기 elif vecb[i]>domain[i][1]: vecb[i]=domain[i][1]

# 현재 비용과 새로운 비용을 계산 ea=costf(vec) # 현재 해답(vec)의 비용 계산 eb=costf(vecb) # 새로운 해답(vecb)의 비용 계산 p=pow(math.e,(-eb-ea)/T) # 확률계산식

# 더 좋은지, 또는 확률이 차단 기준 이하인지 판단 if (eb<ea or random.random()<p): # 새로운 해답의 비용이 더 좋거나, 확률이 차단기준 이하라면, vec=vecb # 현재 해답으로

# 온도 내리기 T=T*cool return vec

In [14]:

In [15]:

Section 07. 유전자 알고리즘

Out[14]:

3798

Seymour BOS 17:11-18:30 $108 10:33-12:03 $ 74 Franny DAL 13:54-18:02 $294 17:14-20:59 $277 Zooey CAK 8:27-10:45 $139 10:32-13:16 $139 Walt MIA 11:28-14:40 $248 12:37-15:05 $170 Buddy ORD 15:58-18:40 $173 10:33-13:11 $132 Les OMA 9:15-12:03 $ 99 11:07-13:24 $171

# p=pow(math.e,(-eb-ea)/T) (pow : 파이썬 제곱함수)- T(온도)가 올라갈수록 데이터의 양이 많아짐- 온도가 거의 0이 될때까지 반복

# reload(optimization)# domain=[(0,8)] * (len(optimization.people)*2)# s=optimizationrandomoptimize(domain, optimization.schedulecost)# optimization.schedulecost(s)

s=annealingoptimize(domain, schedulecost)schedulecost(s)

printschedule(s)

# 초기온도와 냉각속도를 달리하며 테스트

# ppt

# 기존과 동일한 크기인 새로운 해답은,1. 최적 해답을 무작위로 돌연변이시키거나, 번식시켜 만듬2. 새로운 개체군에 순서를 매긴 후 다른 갲체군을 만드는 과정을 계속반복3. 정해진 횟수만큼 계속하거나 몇 세�에 걸쳐 개선이 없을 때까지 반복

# 인자- popsize : 개체군의 크기- mutprob : 개체군의 새로운 멤버에 교배보다 돌연변이가 발생할 확률- elite : 좋은 해답으로 간주되어 다음 세�로 전달될 개체군의 비율- maxiter : 진화시킬 세� 수

In [16]:

In [17]:

def geneticoptimize(domain,costf,popsize=50,step=1, mutprob=0.2,elite=0.2,maxiter=100): # 진화시킬 새� 수가 100 # Mutation Operation, 돌연변이 연산 def mutate(vec): i=random.randint(0,len(domain)-1) # i, 해답내의 무작위(0~len(domain)-1) 인덱스 중에서 하나를 선택함 if random.random()<0.5 and vec[i]>domain[i][0]: return vec[0:i]+[vec[i]-step]+vec[i+1:] elif vec[i]<domain[i][1]: return vec[0:i]+[vec[i]+step]+vec[i+1:] # Crossover Operation, 교배연산 def crossover(r1,r2): i=random.randint(1,len(domain)-2) # i, 해답내의 무작위(1~len(domain)-2) 인덱스 중에서 하나를 선택함 return r1[0:i]+r2[i:]

# Build the initial population, 초기 개체군 생성 pop=[] for i in range(popsize): vec=[random.randint(domain[i][0],domain[i][1]) for i in range(len(domain))] pop.append(vec) # append로 추가 # How many winners from each generation?, 각 세�의 선발된 엘리트 개체군 수를 계산 topelite=int(elite*popsize) # toplite = 0.2*50 = 10 # Main loop for i in range(maxiter): # 진화시킬 새� 수가 100, 100번 반복 scores=[(costf(v),v) for v in pop] scores.sort() ranked=[v for (s,v) in scores] # Start with the pure winners, 생존자들로 시작 pop=ranked[0:topelite] # Add mutated and bred forms of the winners, 생존자에 돌연변이와 번식을 수행 while len(pop)<popsize: if random.random()<mutprob:

# Mutation, 돌연변이 c=random.randint(0,topelite) pop.append(mutate(ranked[c])) else: # Crossover, 교배 c1=random.randint(0,topelite) c2=random.randint(0,topelite) pop.append(crossover(ranked[c1],ranked[c2])) # Print current best score print scores[0][0] return scores[0][1]

421241193960376637663654361535153421342133793356335633563356319331933193318831873104309830933093309330933093309330933093309330933093309330933093309330933093309330933093309330933093309330933093309330933093309330933093

# s=optimization.geneticoptimize(domain,optimization.schedulecost)s=geneticoptimize(domain,schedulecost)

3093309330933093309330933093309330933093309330933093309330933093309330933093309330933093309330933093309330933093309330933093309330933093309330933093309330933093309330933093309330933093

In [18]:

Seymour BOS 15:27-17:18 $151 10:33-12:03 $ 74 Franny DAL 10:30-14:57 $290 9:49-13:51 $229 Zooey CAK 15:23-17:25 $232 10:32-13:16 $139 Walt MIA 14:01-17:24 $338 8:23-11:07 $143 Buddy ORD 14:22-16:32 $126 7:50-10:08 $164 Les OMA 12:18-14:56 $172 8:04-10:59 $136

# optimization.printschedule(s)printschedule(s)

# ppt

Science

집단지성프로그래밍 05. 최적화(optimization.ipynb) 김지은_20150522