39
Using Graphics Cards to Break Passwords Andrey Belenko [email protected]

Using Graphics Cards to Break Passwords

Embed Size (px)

Citation preview

Page 1: Using Graphics Cards to Break Passwords

Using Graphics Cards to Break Passwords

Andrey [email protected]

!"#$%&'()"*

+,-.&/0,1%"23$3"(3,4",5/667%$&6,8,5!96

:3(3;<3$,=.>?,-@A@

BC3,D3E;3$,13"#3$?,F")G3$6)#0,%2,H3$*3"?,9%$7/0

I

J3,(/"K#,$3/EE0,#C)"L,%2,/"0<%&0,73,L"%7,7C%,&%36"K#,'63,M/667%$&6,%$,5!9,(%&36,2$3N'3"#E0O,

+E;%6#,3G3$0,&/0,)",2/(#O,5/667%$&6,8,5!96,/$3,3G3$07C3$3O,P/"0,03/$6,C/G3,M/663&,6)"(3,<3##3$,

/'#C3"#)(/#)%",#3(C"%E%*)36,73$3,;/&3,/G/)E/<E3?,6'(C,/6,-.2/(#%$,/'#C3"#)(/#)%",/"&,<)%;3#$)(6O,

D#)EE,73,C/G3,;%$3,/"&,;%$3,M/667%$&6,/"&,5!96,#%,$3;3;<3$O,JC0Q

BC/#,N'36#)%",M$%</<E0,7%"K#,<3,/"673$3&,C3$3O

JC/#,73,7)EE,&%,)6,#%,M$363"#,%"*%)"*,$363/$(C?,#3(C"%E%*)36,/"&,#3(C")N'36,#C/#,/)&6,)",#C3,

$3(%G3$0,%2,M/667%$&6O,B3(C"%E%*)36,#C/#,(/",<3,'63&,2%$,*%%&,.,/"&,2%$,3G)EO,J)#C,#3(C"%E%*)36,

6'(C,/6,R$/MC)(6,5$%(366)"*,F")#6,/"&,S/)"<%7,B/<E36,<3)"*,'#)E)T3&,#%,*$3/#E0,);M$%G3,$3(%G3$0,

6M33&6,%2,M/667%$&6?,$363/$(C3$6,/$3,"%7,#/EL)"*,/<%'#,A-.(C/$/(#3$,E3"*#C,M/667%$&6,/6,#C3,

;)");';,2%$,<3)"*,U63('$3VO

BC/#K6,/,<)*,6#3M,2$%;,7C/#,M3%ME3,/$3,'6)"*,#%&/0O,!"#$%&,W'6#,7%"K#,&%,/"0;%$3,/6,0%'$,M/667%$&O

BC3,'6/<)E)#0,/6M3(#,%2,M/667%$&6,/"&,5!96,/$3,<3(%;)"*,)"($3/6)"*E0,);M%$#/"#O,P/"0,03/$6,/*%,

#C3,!"#3$"3#,7/6,/,6/23,ME/(3,#%,<3O,J3,&)&"K#,$3/EE0,&%,;'(C,<'6)"366,%$,63($3#,6#'22,#C3$3O,

9%7/&/06,73,M/0,%'$,<)EE6?,M'$(C/63,"37,*/&*3#6,/"&,#/EL,#%,%'$,2/;)E0?,(%EE3/*'36,/"&,63($3#,

E%G3$6,I,$)*C#,#C3$3,%",#C3,!"#3$"3#O,D#)EE?,63('$)#0,/#,;/"0,6)#36,)6,/E;%6#,3"#)$3E0,E32#,)",#C3,C/"&6,%2,

#C3,3"&.'63$,I,"%,*'/$/"#336,/##/(C3&O,DM3/L)"*,%2,7C)(CX,7C3",&)&,0%',E/6#,(C/"*3,0%'$,5!96Q

I

J3,7%'E&,E)L3,#%,73E(%;3,0%',#%,'())*+,-).!/O,

1%G3$)"*,/##/(L6?,&323"636,/"&,'6/<)E)#0,%2,5/667%$&6,/"&,5!96O

I

!"#$%&'(#)#'%#$*+$+,&'+&)#-$./$0120#34'&

0*1223,45()52(67489())*+,-)!/

Page 2: Using Graphics Cards to Break Passwords

Why use GPUs?

Page 3: Using Graphics Cards to Break Passwords

Core i7 die layout

Transistor count: 1.17B

Page 4: Using Graphics Cards to Break Passwords

Memory Controller

IO &

QPI

IO &

QPI L3 Cache L3 Cache

Que

ue

CoreCore Core CoreCoreCore

Core i7 die layout

Transistor count: 1.17B

Page 5: Using Graphics Cards to Break Passwords
Page 6: Using Graphics Cards to Break Passwords

L2

L1

Exec

Pagi

ng

Bran

ch p

red.

Fetc

h &

L1

Sched.

Decode&

μ-codeMem.

Page 7: Using Graphics Cards to Break Passwords

Core i7 die layout

Transistor count: 1.17B

Page 8: Using Graphics Cards to Break Passwords

10%

90%

CPU dedicates 1/10 of resources to calculations

Page 9: Using Graphics Cards to Break Passwords

GTX 480 die layout

Transistor count: 3B

Page 10: Using Graphics Cards to Break Passwords

GTX 480 die layout

Transistor count: 3B

Page 11: Using Graphics Cards to Break Passwords

30%

70%

• GPU dedicates 1/3 of resources to calculations

• 2.5x more transistors than CPU

• 7x more computing power overall

Page 12: Using Graphics Cards to Break Passwords

PBKDF2-SHA1with 2000 iterations

i7-970

GTX 480

GTX 580

HD 5970

0K 50K 100K 150K 200K

195K

68K

60K

15.5K

Page 13: Using Graphics Cards to Break Passwords

How to use GPUs?

Page 14: Using Graphics Cards to Break Passwords

Basics

• GPUs are SIMD and excel at data-parallel tasks

• Program for GPU is called ‘kernel’

• Kernel runs in instances called threads

• Hardware takes care of thread scheduling

• Typical GPU has 100s of processors

• Need 1000s of threads to fully utilize GPU

Page 15: Using Graphics Cards to Break Passwords

ExampleC=A+B

void sum (int c[], int a[], int b[]) {int Index = getThreadId();c[Index] = a[Index] + b[Index];

}

Kernel:

int A[10], B[10], C[10];sum<<10>> (C, A, B);

Adding vectors:

Page 16: Using Graphics Cards to Break Passwords

ExampleMD5

void md5 (uint8 *dataIn, uint8 *dataOut) {int Index = getThreadId();uint8 *in = dataIn + MD5_BLOCK_SIZE * Index;uint8 *out = dataOut + MD5_HASH_SIZE * Index;MD5( dataOut, dataIn, MD5_BLOCK_SIZE );

}

Kernel:

uint8 Src[10 * MD5_BLOCK_SIZE];uint8 Dst[10 * MD5_HASH_SIZE];md5<<10>> (Src, Dst);

Computing hashes:

Page 17: Using Graphics Cards to Break Passwords

GPU Computing Stack

GPU Hardware

High-level Language

Intermediate Language

ISA

Optimization goes here

Translation, no optimizations

Page 18: Using Graphics Cards to Break Passwords

GPU Computing StackGPU world is bipolar

NVIDIA ATI

CUDA C, OpenCL OpenCL

PTX IL

Not documented Documented for RV700 (48xx)

G80 (8xxx) and up RV670 (38xx) and upHW

HLL

IL

ISA

Page 19: Using Graphics Cards to Break Passwords

Breaking passwordsthe CPU way

Generate password

H(p) Verify hash

Computing H(p) takes the most time, so offload it to the GPU

Page 20: Using Graphics Cards to Break Passwords

Breaking passwordsthe GPU way

CPU CPUGPU

Generate passwords

H(p)

Verify hashesH(p)

H(p)

...

Page 21: Using Graphics Cards to Break Passwords

Breaking passwordsthe GPU way

Generate passwords

Verify hashesH(p)

CPU CPUGPU

•If H(p) is fast, PCIe data transfers are the bottleneck•E.g. if H(p) is SHA-1, theoretical peak is ~200M p/s

Solution is to offload everything to GPU

Page 22: Using Graphics Cards to Break Passwords

Breaking passwordsthe GPU way

Generate passwords

Verify hashesH(p)

GPU GPUGPU

•If H(p) is fast, PCIe data transfers are the bottleneck•E.g. if H(p) is SHA-1, theoretical peak is ~200M p/s

Solution is to offload everything to GPU

Page 23: Using Graphics Cards to Break Passwords

How to use GPUs?Implementation considerations

Page 24: Using Graphics Cards to Break Passwords

GPU Computing Stack

NVIDIA ATI

CUDA C, OpenCL OpenCL

PTX IL

Not documented Documented for RV700 (48xx)

G80 (8xxx) and up RV670 (38xx) and upHW

HLL

IL

ISA

Page 25: Using Graphics Cards to Break Passwords

Choosing languageCUDA C vs. PTX

• C code translates into PTX without optimizations

• Optimization is done when compiling PTX

• Intrinsics for device-specific instructions

No real reason for developing in PTX

Page 26: Using Graphics Cards to Break Passwords

Choosing languageOpenCL

• Portability requires compilation at runtime

• May take significant time and resources

• Compiler is part of driver ➯ testing hell

• Requires source code in HLL ➯ IP issues

• Implementations are not complete and vary across vendors

Not mature enough

Page 27: Using Graphics Cards to Break Passwords

Choosing languageATI IL

• The only viable option if you love your users

• Access to device-specific instructions

• Best performance

• Not a an option if you love your developers

• Poor documentation, poor samples

• Meaningless compiler errors, no debugger

Page 28: Using Graphics Cards to Break Passwords

Achieving performance•Minimize data transfers

•Minimize memory accesses

•Or at least plan them carefully

•Minimize number of registers used

•Less registers used means more threads will run simultaneously

•Schedule enough threads to keep GPU processors busy

•Avoid thread divergence

Page 29: Using Graphics Cards to Break Passwords

Porting crypto to GPU

• Usually pretty straightforward

• MD5, SHA1 and alike require little to no changes

• Can be tricky sometimes

• RC4 requires many memory accesses, so careful layout is needed

• DES requires table lookups which are very expensive

Page 30: Using Graphics Cards to Break Passwords

Porting crypto to GPUThe DES

• Table lookups (s-boxes) are the bottleneck

• Avoid them by using bitslicing

• S-boxes replaced with logic functions

• 32 encryptions in parallel

• Requires many registers

• Performance depends on compiler heuristics

Page 31: Using Graphics Cards to Break Passwords

How to use GPUs?Real-world problems

Page 32: Using Graphics Cards to Break Passwords

ScalabilityNot all GPUs created equal

1. Program should scale nicely with the number of processors on GPU

• Query processor count from the driver

• Partition task accordinglynumThreads = F(numProcessors)

• Also helps to avoid triggering watchdog and freezing screen

Page 33: Using Graphics Cards to Break Passwords

Scalability8 GPUs in system are not uncommon

2. Program should scale nicely with the number of GPUs

• Query device count from the driver

• Spawn CPU threads to control each device

• Partition task accordingly

Speedup should be linear unless you hit PCIe limits

Page 34: Using Graphics Cards to Break Passwords

CompatibilityNot everyone’s got Fermi. Yet.

• New hardware offers great new features

• Cache on Fermi

• bitalign instruction on RV770

• May require different optimization strategy

• May require separate codebase

• Support for legacy hardware shouldn’t be dropped

Be prepared to handle this sort of complexity

Page 35: Using Graphics Cards to Break Passwords

Including GPU codeOption 1: include PTX/IL code in your program

Pros

•Recommended way

•Forward compatibility

•No hardware required

Cons

•Compilation at runtime

•Can’t test all hardware

•IP issues

Page 36: Using Graphics Cards to Break Passwords

Including GPU codeOption 2: include pre-compiled GPU binaries

Pros

•No dependency on users’ driver

•No compilation at runtime

•Better IP protection

Cons

•May not work with future devices

•Need to precompile for every supported GPU

•No precompiled binary for GPU = no support

Page 37: Using Graphics Cards to Break Passwords

Questions?

Page 38: Using Graphics Cards to Break Passwords

Thank you

Page 39: Using Graphics Cards to Break Passwords

Using Graphics Cards to Break Passwords

Andrey [email protected]

!"#$%&'()"*

+,-.&/0,1%"23$3"(3,4",5/667%$&6,8,5!96

:3(3;<3$,=.>?,-@A@

BC3,D3E;3$,13"#3$?,F")G3$6)#0,%2,H3$*3"?,9%$7/0

I

J3,(/"K#,$3/EE0,#C)"L,%2,/"0<%&0,73,L"%7,7C%,&%36"K#,'63,M/667%$&6,%$,5!9,(%&36,2$3N'3"#E0O,

+E;%6#,3G3$0,&/0,)",2/(#O,5/667%$&6,8,5!96,/$3,3G3$07C3$3O,P/"0,03/$6,C/G3,M/663&,6)"(3,<3##3$,

/'#C3"#)(/#)%",#3(C"%E%*)36,73$3,;/&3,/G/)E/<E3?,6'(C,/6,-.2/(#%$,/'#C3"#)(/#)%",/"&,<)%;3#$)(6O,

D#)EE,73,C/G3,;%$3,/"&,;%$3,M/667%$&6,/"&,5!96,#%,$3;3;<3$O,JC0Q

BC/#,N'36#)%",M$%</<E0,7%"K#,<3,/"673$3&,C3$3O

JC/#,73,7)EE,&%,)6,#%,M$363"#,%"*%)"*,$363/$(C?,#3(C"%E%*)36,/"&,#3(C")N'36,#C/#,/)&6,)",#C3,

$3(%G3$0,%2,M/667%$&6O,B3(C"%E%*)36,#C/#,(/",<3,'63&,2%$,*%%&,.,/"&,2%$,3G)EO,J)#C,#3(C"%E%*)36,

6'(C,/6,R$/MC)(6,5$%(366)"*,F")#6,/"&,S/)"<%7,B/<E36,<3)"*,'#)E)T3&,#%,*$3/#E0,);M$%G3,$3(%G3$0,

6M33&6,%2,M/667%$&6?,$363/$(C3$6,/$3,"%7,#/EL)"*,/<%'#,A-.(C/$/(#3$,E3"*#C,M/667%$&6,/6,#C3,

;)");';,2%$,<3)"*,U63('$3VO

BC/#K6,/,<)*,6#3M,2$%;,7C/#,M3%ME3,/$3,'6)"*,#%&/0O,!"#$%&,W'6#,7%"K#,&%,/"0;%$3,/6,0%'$,M/667%$&O

BC3,'6/<)E)#0,/6M3(#,%2,M/667%$&6,/"&,5!96,/$3,<3(%;)"*,)"($3/6)"*E0,);M%$#/"#O,P/"0,03/$6,/*%,

#C3,!"#3$"3#,7/6,/,6/23,ME/(3,#%,<3O,J3,&)&"K#,$3/EE0,&%,;'(C,<'6)"366,%$,63($3#,6#'22,#C3$3O,

9%7/&/06,73,M/0,%'$,<)EE6?,M'$(C/63,"37,*/&*3#6,/"&,#/EL,#%,%'$,2/;)E0?,(%EE3/*'36,/"&,63($3#,

E%G3$6,I,$)*C#,#C3$3,%",#C3,!"#3$"3#O,D#)EE?,63('$)#0,/#,;/"0,6)#36,)6,/E;%6#,3"#)$3E0,E32#,)",#C3,C/"&6,%2,

#C3,3"&.'63$,I,"%,*'/$/"#336,/##/(C3&O,DM3/L)"*,%2,7C)(CX,7C3",&)&,0%',E/6#,(C/"*3,0%'$,5!96Q

I

J3,7%'E&,E)L3,#%,73E(%;3,0%',#%,'())*+,-).!/O,

1%G3$)"*,/##/(L6?,&323"636,/"&,'6/<)E)#0,%2,5/667%$&6,/"&,5!96O

I

!"#$%&'(#)#'%#$*+$+,&'+&)#-$./$0120#34'&

0*1223,45()52(67489())*+,-)!/