Files

Challenge

I ran this program but it never finished… maybe my computer is too slow. Maybe yours is faster?

Inizialmente non avevo letto che la challenge fosse math e quindi ho subito pensato ad un ottimizzazione del codice. Ho iniziato a riscriverlo in python cachando le varie operazioni. Ottendendo cosi le 3 operazioni fondamentali che vengono utilizzate all’interno del <main>:

@cached(cache={})
def pshufd(src,order):
    line=bin(src)[2:].rjust(128,"0")
    n=32
    src=[line[i:i+n] for i in range(0, len(line), n)][::-1]
    #print(src)
    line=bin(order)[2:].rjust(8,"0")
    n=2
    order=[line[i:i+n] for i in range(0, len(line), n)]
    #print(order)
    res=""
    for i in order:
        val=int(i,2)
        res+=src[val]
    #print(int(res,2))
    return int(res,2)

@cached(cache={})
def pmulld(val1,val2):
    line=bin(val1)[2:]
    line=line.rjust(128,"0")
    n=32
    val1=[line[i:i+n] for i in range(0, len(line), n)]
    line=bin(val2)[2:].rjust(128,"0")
    n=32
    val2=[line[i:i+n] for i in range(0, len(line), n)]
    #print(val1,val2)
    res=""
    for i,j in zip(val1,val2):
        res+=str(int(i,2)*int(j,2)).rjust(32,"0")
    return int(res,16)

@cached(cache={})
def paddd(val1,val2):
    line=bin(val1)[2:]
    line=line.rjust(128,"0")
    n=32
    val1=[line[i:i+n] for i in range(0, len(line), n)]
    line=bin(val2)[2:].rjust(128,"0")
    n=32
    val2=[line[i:i+n] for i in range(0, len(line), n)]
    #print(val1,val2)
    res=""
    for i,j in zip(val1,val2):
        res+=str(int(i,2)+int(j,2)).rjust(32,"0")
    return int(res,16)

Successivamente ho individuato che le funzioni venivano sempre chiamate con una sequenza ben precisa, quindi le ho rese delle funzioni:

@cached(cache={})
def m_fun(s1, s2, s3):
    m = pmulld(s1, s2)
    m = paddd(m, s3)
    return m


@cached(cache={})
def fun(s1, s2, s3, s4):
    m = m_fun(s1, s2, s3)
    m = paddd(m, s4)
    return m

Ho copiato i dati 80 byte di dati in reverse_data.
Ricostruendo il main:

def main():
    start_time = time.time()
    data = open('reverse_data', 'rb').read()

    res = int.from_bytes(data[64:80], byteorder='little')
    i0 = pshufd(res, 0x15)
    i1 = pshufd(res, 0x45)
    i2 = pshufd(res, 0x51)
    i3 = pshufd(res, 0x54)

#    i = [
#            [0, 0, 0, 1],
#            [0, 0, 1, 0],
#            [0, 1, 0, 0],
#            [1, 0, 0, 0]
#        ]

    counter = 0x112210f47de98115
    rax = 0

    d9 =  int.from_bytes(data[:16],   byteorder='little')
    d10 = int.from_bytes(data[16:32], byteorder='little')
    d13 = int.from_bytes(data[32:48], byteorder='little')
    d15 = int.from_bytes(data[48:64], byteorder='little')

    s00 = pshufd(d9,    0)
    s01 = pshufd(d9,   0x55)
    sss5= pshufd(d9,    0xaa)
    s03 = pshufd(d9,    0xff)

    s10 = pshufd(d10,   0)
    s5 = pshufd(d10,   0x55)
    s12 = pshufd(d10,   0xaa)
    s13 = pshufd(d10,   0xff)

    s20 = pshufd(d13,  0)
    s21 = pshufd(d13,   0x55)
    s22 = pshufd (d13, 0xaa)
    s23 = pshufd(d13,  0xff)

    s30 = pshufd(d15,  0)
    s31 = pshufd(d15,   0x55)
    s32 = pshufd(d15,  0xaa)
    s33 = pshufd(d15,  0xff)

    print(hex(s00 ), hex(s01), hex(sss5), hex(s03 ))
    print(hex(s10 ), hex(s5 ), hex(s12), hex(s13 ))
    print(hex(s20), hex(s21 ), hex(s22 ), hex(s23))
    print(hex(s30), hex(s31 ), hex(s32 ), hex(s33))

#
#    A1 = [[16,15,14,13],
#          [12,11,10, 9],
#          [8 , 7, 6, 5],
#          [4 , 3, 2, 1]]

    sres = int.from_bytes(data[72:76], byteorder='little')

    while(rax != counter):
        # prima colonna
        m6  = pmulld(s00, i3)
        m8  = pmulld(s10, i3)
        m11 = pmulld(s20, i3)
        m14 = pmulld(s30, i3)

        #--------------------
        m12 = m_fun(s01, i2, m6) #xmm12s * xmm2 * xmm6 = xmm12
        m5 = pmulld(s11,  i1) #xmm5s *xmm1 = xmm5
        i3 = fun(s03, i0, m5, m12)
        print(hex(i3))
        exit(0)

        mm5 = m_fun(s11, i2, m8)
        m7 = m_fun(s21, i2, s20)
        mm6 = pmulld(s12, i1)       #64c
        i2 = fun(s13, i0, mm6, mm5) #662

        mmm5 = pmulld(s22, i1)
        mmm6 = pmulld(s32, i1)      #680
        i1 = fun(s23, i0, mmm5, m7)

        m4 = m_fun(s31, i2, m14)
        i0 = fun(s33, i0, mmm6, m4)

        #var ciclo interno
        m4 = pshufd(sres, 0xaa)
        m5 = pshufd(i0, 0)
        edx = 0x3e8
        #internalCycle() 	ignoring overflow internal cycle

        rax += 1
        if rax % 10000000 == 0:		#print time to execute 10.000.000 cycles
            print(time.time()-start_time) #150 seconds every 10.000.000 cycles... mmm... wrong way

Dopo aver perso qualche ora cercando di risolverlo nel modo sbagliato, ho realizzato che la challenge era MATH!!! Ho analizzato come veniva modificata la matrice alla fine di ogni ciclo, ed era una semplice moltiplicazione:

A1 = [[16,15,14,13],
      [12,11,10, 9],
      [8 , 7, 6, 5],
      [4 , 3, 2, 1]]

A2 = A*A = [[600 542 484 426]
            [440 398 356 314]
            [280 254 228 202]
            [120 110 100 90]]

A questo punto ho cercato il numero di volte che doveva essere eseguito (fibonacci)

def calculate_exponentiation():
    res = []
    s = 1234567890123456789
    i=0
    while(s>0):
        if 2**i > s:
            res.append(i-1)
            s -= 2**(i-1)
            i=0
        else:
            i+=1
    return res

Ho trovato che il ciclo interno serviva a gestire l’overflow:

def overflow(a):
    sres = 0x96433d
    for x in range(4):
        for y in range(4):
            while a[x][y] > sres:
                a[x][y] %= sres
    return a

Infine ho emulato la risoluzione della flag:

def emulation(tot):
    BASE    = 0x400000
    STACK   = 0x7ffcaf000000
    FLAG    = 0x00600000

    mu = Uc(UC_ARCH_X86, UC_MODE_64)
    mu.mem_map(BASE, 1024*4)
    mu.mem_map(STACK, 1024*4)
    mu.mem_map(FLAG, 1024*1024)

    code = struct.pack ("69B", *[
        0x66,0x0f,0x7f,0x1c,0x24,0x66,0x0f,0x7f,0x54,0x24,0x10,
        0x66,0x0f,0x7f,0x4c,0x24,0x20,0x66,0x0f,0x7f,0x44,0x24,
        0x30,0x31,0xc0,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,
        0x00,0x00,0x0f,0x1f,0x00,0x0f,0xb6,0x0c,0x44,0x30,0x88,
        0x90,0x10,0x60,0x00,0x0f,0xb6,0x4c,0x44,0x01,0x30,0x88,
        0x91,0x10,0x60,0x00,0x48,0x83,0xc0,0x02,0x48,0x83,0xf8,
        0x20,0x75,0xe1])
    flag = struct.pack ("40B", *[
        0xfc,0x14,0xeb,0x09,0xbc,0xae,0xe7,0x47,0x4f,0xe3,0x7c,
        0xc1,0x52,0xa5,0x02,0x8e,0x89,0x71,0xc8,0x8d,0x96,0x23,
        0x01,0x6d,0x71,0x40,0x5a,0xea,0xfd,0x46,0x1d,0x23,0x00,
        0x00,0x00,0x00,0x00,0x00,0x00,0x00])

    mu.reg_write(UC_X86_REG_RSP, STACK)
    mu.reg_write(UC_X86_REG_XMM0, (tot[0][0]<<96) + (tot[0][1]<<64) + (tot[0][2]<<32) + (tot[0][3]))
    mu.reg_write(UC_X86_REG_XMM1, (tot[1][0]<<96) + (tot[1][1]<<64) + (tot[1][2]<<32) + (tot[1][3]))
    mu.reg_write(UC_X86_REG_XMM2, (tot[2][0]<<96) + (tot[2][1]<<64) + (tot[2][2]<<32) + (tot[2][3]))
    mu.reg_write(UC_X86_REG_XMM3, (tot[3][0]<<96) + (tot[3][1]<<64) + (tot[3][2]<<32) + (tot[3][3]))

    mu.mem_write(FLAG+0x1090, flag)

    mu.mem_write(BASE, code)
    mu.emu_start(BASE, BASE + len(code), 2 * UC_SECOND_SCALE)
    print(mu.mem_read(FLAG+0x1090, 0x20))