archgame is a reversing challenge that was part of W&M CTF 2022. The challenge owes its name to the fact that it features a bunch of binaries of different architectures. For those interested, an archived version is available here.

Static Analysis

On opening the archive, we see a 28 MB ELF binary named ArchGame and a chall folder containing 50 smaller files named chall0 through chall50. One of these files, chall14, resembles valid aarch64 shellcode, while all other chall files contain seemingly random data.

ArchGame

While opening the ELF binary in a decompiler of our choice, we notice that the challenge authors thankfully provided debugging symbols for the binary. These symbols tell us that the binary is containing a statically compiled version of Unicorn, a multi-architecture CPU emulator.

When running the ELF binary, an init() function is called, which sets up std::map instance with entries for each of the 50 chall files. Each entry contains the challenges fileidx, arch*, mode, and a magic value, with the latter also beings the entry’s key.

We are then continuously asked to input a fake flag as a hexadecimal string. The entered fake flag is then converted to binary and stored in a global variable:

char g_input[16384];

int main() {
  puts("WMCTF --- ArchGame");
  puts("YOU MUST INPUT ALL fake flag!");
  init();
  char input[16384];
  vector<int> tracks;

  while ( 1 )
  {
    memset(g_input, 0, sizeof(g_input));
    printf("input your fake flag:");
    __isoc99_scanf("%s", input);
    hexs2bin(input, g_input);
    load_round();
    round_key = round();
    tracks.push_back(round_key);
    if ( round_key == 0xB7620858 )
      break;
    check_round();
    switch_round();
  }
  show_flag();
  exit(0);
}

Next, the load_round function is called. This function is responsible for preparing the execution of the next chall file:

void __cdecl load_round()
{
  code_info *code = code_info[round_key];
  g_mode = code->mode;
  g_arch = code->arch;

  std::string path = "challs/" + std::to_string(code->fileidx) + ".bin";
  load_code(path.c_str());
}

The load_code function is responsible for loading the binary file into memory. The loaded binary is xored with a four byte char array named global_key, explaining why we werent able to disassemble all but one of the chall files:

void load_code(const char *path)
{
  FILE *fp = fopen(path, "rb");
  if ( !fp )
  {
    printf("Failed to open file %s\n", path);
    exit(1);
  }
  fseek(fp, 0LL, SEEK_END);
  __int64 size = ftell(fp);
  if ( size > 0xA0000 )
  {
    printf("File %s is too big\n", path);
    fclose(fp);
    exit(1);
  }
  fseek(fp, 0LL, SEEK_SET);
  fread(g_code_data, size, 1uLL, fp);
  fclose(fp);
  for ( size_t i = 0LL; i < size; ++i )
    g_code_data[i] ^= *((_BYTE *)&global_key + (i & 3));
}

Finally, the round function is called, bringing the bundled Unicorn engine to life.

int __cdecl round()
{
  unsigned int next_key; // [rsp+0h] [rbp-30h] BYREF
  uc_err_0 err; // [rsp+4h] [rbp-2Ch]
  uc_hook trace1; // [rsp+10h] [rbp-20h] BYREF
  uint64_t stack_top64; // [rsp+18h] [rbp-18h] BYREF
  uint64_t stop_addr[2]; // [rsp+20h] [rbp-10h] BYREF

  uc_engine *uc;
  uc_err_0 err;
  
  // Initialize the emulator with the given arch and mode
  err = uc_open(g_arch, g_mode, &uc);
  if ( err )
  {
    printf(
      "Failed on uc_open() with error returned: %u (%s)\n",
      err, uc_strerror(err)
    );
    exit(1);
  }

  // Map the unxored binary into emulated memory.
  err = uc_mem_map(uc, 0LL, 0xA0000LL, 7LL);
  if ( err )
  {
    printf(
      "1Failed on uc_mem_map() with error returned: %u (%s)\n",
      (unsigned int)err, uc_strerror(err)
    );
    exit(1);
  }
  uc_mem_write(uc, 0LL, g_code_data, 655360LL);

  // Setup emulated stack.
  err = (unsigned int)uc_mem_map(uc, 0x70000000LL, 0x4000LL, 7LL);
  if ( err )
  {
    printf(
      "2Failed on uc_mem_map() with error returned: %u (%s)\n",
      err, uc_strerror(err)
    );
    exit(1);
  }

  // Write the fake flag to the emulated stack.
  uc_mem_write(uc, 0x70000000LL, g_input, 0x4000LL);
  uc_mem_map(uc, 0x20000000LL, 0x8000LL, 7LL);
  stack_top64 = 0x20007F00LL;
  stop_addr[0] = 0x70000100LL;
  switch ( g_arch )
  {
    case UC_ARCH_ARM:
      uc_reg_write(uc, 12LL, &stack_top64);
      uc_reg_write(uc, 10LL, stop_addr);
      break;
    case UC_ARCH_ARM64:
      uc_reg_write(uc, 4LL, &stack_top64);
      uc_reg_write(uc, 2LL, stop_addr);
      break;
    case UC_ARCH_MIPS:
      uc_reg_write(uc, 31LL, &stack_top64);
      uc_reg_write(uc, 33LL, stop_addr);
      break;
    case UC_ARCH_PPC:
      uc_reg_write(uc, 3LL, &stack_top64);
      uc_reg_write(uc, 74LL, stop_addr);
      break;
    case UC_ARCH_RISCV:
      uc_reg_write(uc, 3LL, &stack_top64);
      uc_reg_write(uc, 2LL, stop_addr);
      break;
    default:
      break;
  }

  // Setup the hook to stop the emulation when the execution reaches the stop_addr.
  uc_hook_add(uc, &trace1, 1008, hook_mem, 0, 1, 0LL);
  err = uc_emu_start(uc, 0LL, stack_top64, 0LL, 0LL);

  // After the emulation is finished, fetch the next_key from a fixed register.
  switch ( g_arch )
  {
    case UC_ARCH_ARM:
      uc_reg_read(uc, 66LL, &next_key);
      break;
    case UC_ARCH_ARM64:
      uc_reg_read(uc, 199LL, &next_key);
      break;
    case UC_ARCH_MIPS:
      uc_reg_read(uc, 4LL, &next_key);
      break;
    case UC_ARCH_PPC:
      uc_reg_read(uc, 5LL, &next_key);
      break;
    case UC_ARCH_RISCV:
      uc_reg_read(uc, 11LL, &next_key);
      break;
    default:
      break;
  }
  uc_close(uc);
  return next_key;
}

After the emulation is finished, next_key is returned. It is then validated using the check_round function:

void check_round()
{
  if ( code_info.count(round_key) != 1 )
  {
    puts("wrong flag.");
    exit(1);
  }
}

If the returned key points to an entry in the code_info map, the program updates the global_key variable used to decrypt the next chall file (pointed by the round_key) and asks for the next fake flag. It’s also keeping track of all rounds executed in an std::vector.

void __cdecl switch_round()
{
  global_key ^= round_key;
}

When we finally encounter a chall-binary that returns the magic value 0xB7620858, the program will calculate the flag from the rounds ran:

void __cdecl show_flag()
{
  std::string flag;
  char buffer[100];
  for (auto &round : tracks) {
    sprintf(buffer, "%08x", round);
    flag += buffer;
  }
  printf("your flag is wmctf{%s}", flag);
}

The Challenges

Our next job is to figure out which values the chall-binaries might return (for a given fake flag input). Since the first chall-binary, chall14, is xored with the global key 0x0, we can simply open it in our decompiler and figure things out. For the sake of completeness, here is the decompiled code for chall14:

uint64_t sub_0() {
void* input_area = 0x70000000
void** input_area_ptr = &input_area
void*** input_area_ptr_ptr = &input_area_ptr
uint32_t x8_5 = zx.d(***input_area_ptr_ptr)
int32_t result
if (x8_5 != 'f')
  int32_t* result_ptr = &result
  *result_ptr = 0x552859b4
else if (zx.d(*(input_area + 1)) != 'l')
  result = 0x552859b4
else if (zx.d(*(input_area + 2)) != 'a')
  result = 0x552859b4
else if (zx.d(*(input_area + 3)) != 'g')
  result = 0x552859b4
else
  void** input_area_ptr = &input_area
  void** input_area_ptr = input_area_ptr
  void*** input_area_ptr_ptr = &input_area_ptr
  if (zx.d(*(**input_area_ptr_ptr + 4)) != '{')
    int32_t* result_ptr = &result
    *result_ptr = 0x552859b4
    else
      void* input_area_6_ptr = input_area + 6
      if (zx.d(*(input_area + 5)) + zx.d(*input_area_6_ptr) == 0xdc)
        result = 0x76eab3e1
      else
        char input_area_6 = *(input_area + 6)
        int64_t var_190
        int64_t* var_78 = &var_190
        int64_t** var_80 = &var_78
        **var_80 = &input_area
        int64_t var_90 = var_190
        int64_t* var_98 = &var_90
        if (zx.d(input_area_6 - *(**var_98 + 7)) == 0xb3)
          result = 0x9b13d8a6
          else
            void var_198
            void* var_a8 = &var_198
            void** var_b0 = &var_a8
            **var_b0 = input_area + 7
            void* var_1a0 = &var_198
            void** var_c0 = &var_1a0
            void*** var_c8 = &var_c0
            uint32_t x8_53 = zx.d(****var_c8)
            void** var_1b0 = &input_area
            void*** var_1b8 = &var_1b0
            int64_t* var_1c0
            int64_t* var_d8 = &var_1c0
            *var_d8 = &var_1b8
            int64_t var_e8 = *var_1c0
            uint32_t x9_26 = zx.d(*(**var_e8 + 8))
            int32_t x13_1 = ((0x7da18f8 & not.d(x8_53))
                            | (x8_53 & 0xf825e707)) ^ 0xf825e707
            uint32_t x11_2 = not.d(x9_26)
            int32_t x8_59 =
              ((((x8_53 & 2) | (x13_1 & 0x80)) ^ 0x80)
              | (((x8_53 & 4 & 0xfffffffe) | (x13_1 & 1) u>> 0) ^ 4)
              | (x8_53 & 0x78)) ^ ((((x9_26 & 2) | (x11_2 & 0x80)) ^ 0x80) 
              | (((x11_2 & 4 & 0xfffffffe) | (x9_26 & 1) u>> 0) ^ 1)
              | (x9_26 & 0x78))
            if (x8_59 == 0x51)
              result = 0xe1797ab6
            else if (zx.d(*(input_area + 9) + *(input_area + 8)) == 0x2f)
              result = 0xb03250e5
            else
              void** var_f8 = &input_area
              void* var_108 = *var_f8 + 0xa
              if (zx.d(*(input_area + 9) - *var_108) == 0xf6)
                result = 0xb842f275
              else if (zx.d(*(input_area + 0xa) + *(input_area + 0xb)) == 0x7c)
                int32_t* var_118 = &result
                *var_118 = 0x38f646e6
              else
                result = 0x552859b4
return zx.q(result)
}

The resulting output is quite messy, but we can easily see that the binary might return one of several intermediate values. By comparing these intermediates with the keys of the std::map contained in the ELF binary, we can figure out that some of these intermediates are actually the keys of the std::map. Other intermediates are not, and we can safely ignore them.

Automation

With 50 binaries of different architectures in play, it is not feasible to manually figure out the possible keys for each. Instead, we might disassemble the binary and extract the intermediate values.

Being lazy I opted for another approach: Starting with a global_key of 0, we know that one of the intermediates returned by chall14 will decrypt one of the 49 other binaries. How do we know which one? We can simply emulate all of them and see which one runs without crashing. If we find a binary that does not crash, we assume that we found the correct round_key and update the global_key accordingly. We repeat this process until no more binaries can be decrypted, which means that we hopefully found our flag.

The script for this pretty much resembles the implementation of the challenge itself:

import json
import struct
from unicorn import Uc, UcError
from unicorn import arm_const, arm64_const, mips_const, ppc_const, riscv_const, unicorn_const as uc

def xor_data(data, key):
    return bytes([data[i] ^ key[i % len(key)] for i in range(len(data))])

def hook_code(uc, address, size, user_data):
    print(f'[+] Tracing instruction at 0x{address:x}')
    return


def simulate_challenge(challenge, xor_key):
    # Load the challenge binary
    challenge_path = f"./challs/chall{challenge['fileidx']}.bin"
    challenge_data = open(challenge_path, 'rb').read()
    print(f'Loading challenge: {challenge_path}) with key {xor_key.hex()}')
    challenge_data = xor_data(challenge_data, xor_key)

    # Fetch simulation parameters
    arch = challenge['arch']
    mode = challenge['mode']

    # Initialize the emulator
    emu = Uc(arch, mode)
    
    # Map the challenge binary into memory
    emu.mem_map(0x0, 0xA0000, uc.UC_PROT_ALL)
    emu.mem_write(0x0, challenge_data)

    # Map the input area into memory
    input_data = bytes('flag{asd}', 'utf-8')
    emu.mem_map(0x70000000, 0x4000, uc.UC_PROT_ALL)
    emu.mem_write(0x70000000, input_data)

    # Map the stack area into memory
    emu.mem_map(0x20000000, 0x8000, uc.UC_PROT_ALL)

    stack_top64 = 0x20007F00
    stop_addr = 0x70000100

    # Initialize registers
    if arch == uc.UC_ARCH_ARM:
        emu.reg_write(arm_const.UC_ARM_REG_SP, stack_top64)
        emu.reg_write(arm_const.UC_ARM_REG_LR, stop_addr)
    elif arch == uc.UC_ARCH_ARM64:
        emu.reg_write(arm64_const.UC_ARM64_REG_SP, stack_top64)
        emu.reg_write(arm64_const.UC_ARM64_REG_X30, stop_addr)
    elif arch == uc.UC_ARCH_MIPS:
        emu.reg_write(mips_const.UC_MIPS_REG_29, stack_top64)
        emu.reg_write(mips_const.UC_MIPS_REG_31, stop_addr)
    elif arch == uc.UC_ARCH_PPC:
        emu.reg_write(ppc_const.UC_PPC_REG_1, stack_top64)
        emu.reg_write(ppc_const.UC_PPC_REG_LR, stop_addr)
    elif arch == uc.UC_ARCH_RISCV:
        emu.reg_write(riscv_const.UC_RISCV_REG_X2, stack_top64)
        emu.reg_write(riscv_const.UC_RISCV_REG_X1, stop_addr)
    else:
        raise Exception(f"Unsupported architecture {arch}")

    # Start the simulation
    try:
        emu.emu_start(0x0, stop_addr, 0)
    except UcError as e:
        return False

    print(f'[+] Simulation finished')
    # Get the simulation result
    if arch == uc.UC_ARCH_ARM:
        next_key = emu.reg_read(arm_const.UC_ARM_REG_R0)
    elif arch == uc.UC_ARCH_ARM64:
        next_key = emu.reg_read(arm64_const.UC_ARM64_REG_X0)
    elif arch == uc.UC_ARCH_MIPS:
        next_key = emu.reg_read(mips_const.UC_MIPS_REG_2)
    elif arch == uc.UC_ARCH_PPC:
        next_key = emu.reg_read(ppc_const.UC_PPC_REG_3)
    elif arch == uc.UC_ARCH_RISCV:
        next_key = emu.reg_read(riscv_const.UC_RISCV_REG_X10)
    else:
        raise Exception(f"Unsupported architecture {arch}")

    return True


def key_from_chain(chain):
    key = struct.pack('<I', 0x0)
    for challenge in chain:
        round_key = struct.pack('<I', int(challenge['magic']))
        key = xor_data(key, round_key)
    return key


def solve():
    with open('challenges.json', 'r') as f:
        challenges = json.load(f)

    challenge_chain = [challenges['0']]

    while True:
        print(f'[+] Current key: {key_from_chain(challenge_chain).hex()}')

        for challenge in challenges.values():
            possible_key = key_from_chain(challenge_chain + [challenge])
            if (challenge['fileidx'] == 25 and possible_key == b'\x0d\xad\xce\x42'):
                # Skip looping challenge
                continue


            if simulate_challenge(challenge, possible_key):
                print(f'[+] Found next challenge: {challenge["fileidx"]}')
                challenge_chain.append(challenge)
                break
        else:
            print(f'[-] No next challenge found')
            flag_bytes = 'wmctf{'
            for challenge in challenge_chain[1:]:
                flag_bytes += f"{challenge['magic']:08x}"
            flag_bytes += f"{0xB7620858:08x}"
            flag_bytes += '}'

            print(f'[+] Flag: {flag_bytes}')
            return


if __name__ == '__main__':
    solve()