Mixing x86 with x64 code (混合编写x86和x64代码)

几个月前我小小的研究了在WOW64下的32位进程中运行native x64代码。

第二个设想是在64位进程下运行x86代码。它们都是可以的，如我google的一样，

已经有人在使用这两种方法了：

当我研究的时候还没有看上面搜索到的结果，所以下面仅仅代表我自己的见解；)

x86 <-> x64 Transition（x86和x64之间的转换）

最早的来检查x86到x64转换的方法是观察windows中32位版本和64位版本的ntdll.dll中的任意syscall：

32-bits ntdll from Win7 x86	32-bits ntdll from Win7 x64
mov eax, X mov edx, 7FFE0300h call dword ptr [edx] ;ntdll.KiFastSystemCall retn Z	mov eax, X mov ecx, Y lea edx, [esp+4] call dword ptr fs:[0C0h] ;wow64cpu!X86SwitchTo64BitMode add esp, 4 ret Z

正如你所见，在64位系统上新的call
fs:[0xC0](wow64cpu!X86SwitchTo64BitMode) 代替了标准call ntdll.KiFastSystemCall.

wow64cpu!X86SwitchTo64BitMode 执行了一个简单的远跳转到64位的段中了：

	wow64cpu!X86SwitchTo64BitMode:
	748c2320 jmp     0033:748C271E   ;wow64cpu!CpupReturnFromSimulatedCode

这就是64位Windows系统上转换x64和x86后面的魔术。

此外他也能在非WoW64进程中运行(标准的native 64位应用程序)，所以32位代码也能运行在64位应用程序中。

总结一下，运行在64位Windows中的每个进程(x86和x64)，都分配了两个代码段：

cs = 0×23 -> x86 mode
cs = 0×33 -> x64 mode

Running x64 code inside 32-bits process（在32位进程中运行x64代码）

首先，我准备了一些宏，将用它来标记64位代码的开始和结尾：

#define EM(a) __asm __emit (a)
 
#define X64_Start_with_CS(_cs) \
{ \
	EM(0x6A) EM(_cs)                     /*  push   _cs                   */ \
	EM(0xE8) EM(0) EM(0) EM(0) EM(0)     /*  call   $+5                   */ \
	EM(0x83) EM(4) EM(0x24) EM(5)        /*  add    dword [esp], 5        */ \
	EM(0xCB)                             /*  retf                         */ \
}
 
#define X64_End_with_CS(_cs) \
{ \
	EM(0xE8) EM(0) EM(0) EM(0) EM(0)     /*  call   $+5                   */ \
	EM(0xC7) EM(0x44) EM(0x24) EM(4)     /*                               */ \
	EM(_cs) EM(0) EM(0) EM(0)            /*  mov    dword [rsp + 4], _cs  */ \
	EM(0x83) EM(4) EM(0x24) EM(0xD)      /*  add    dword [rsp], 0xD      */ \
	EM(0xCB)                             /*  retf                         */ \
}
 
#define X64_Start() X64_Start_with_CS(0x33)
#define X64_End() X64_End_with_CS(0x23)

执行完X64_Start()宏后，CPU直接转换到x64模式，执行完X64_End()宏后立即回到x86模式。

由于远返回的opcode，以上宏都是位置独立的。

能够调用x64版本的APIs是非常有用的。我尝试加载过x64版本的kernel32.dll，他不是一个微不足道的任务，

并且我失败了，所以我需要坚持使用Native API。x64版本的kernel32.dll的主要问题是在已经加载x86版本的

kernel32.dll的情况下，x64 kernel32.dll 有一些额外的检查来阻止正常的加载。我相信通过一些猥琐的hook

来拦截kernel32!BaseDllInitialize能达到目的，但是这是非常复杂的任务。当我开始研究的时候，我是在WIndows

Vista上，并且我能加载(用一些hacks)64位版本的kernel32和user32库，但是他们没有完整的功能，同时我又

转换到Windows7，使用在Vista上的方法不能够正常工作了。

让我们回到主题上，为了使用Native APIs，我需要定位内存中你给的x64版本的ntdll.dll。为了完成这个任务，

我需要解析_PEB_LDR_DATA结构中的InLoadOrderModuleList。64位的_PEB被64位的_TEB包含，并且64位_TEB

类似于x86平台的(在x64上我们需要使用gs段代替fs)：

mov   eax, gs:[0x30]

他甚至可以更简单，因为 wow64cpu!CpuSimulate(负责转换CPU到x86模式的函数)将gs:[0x30]的值移动到r12寄存器中，

所以我们的getTEB64()版本看起来像这样:

//to fool M$ inline asm compiler I'm using 2 DWORDs instead of DWORD64
//use of DWORD64 will generate wrong 'pop word ptr[]' and it will break stack
union reg64
{
	DWORD dw[2];
	DWORD64 v;
};
 
//macro that simplifies pushing x64 registers
#define X64_Push(r) EM(0x48 | ((r) >> 3)) EM(0x50 | ((r) & 7))
 
WOW64::TEB64* getTEB64()
{
	reg64 reg;
	reg.v = 0;
 
	X64_Start();
	//R12 register should always contain pointer to TEB64 in WoW64 processes
	X64_Push(_R12);
	//below pop will pop QWORD from stack, as we're in x64 mode now
	__asm pop reg.dw[0]
	X64_End();
 
	//upper 32 bits should be always 0 in WoW64 processes
	if (reg.dw[1] != 0)
		return 0;
 
	return (WOW64::TEB64*)reg.dw[0];
}

WOW64名字空间定义在"os_structs.h"文件中，随后将会和其他示例代码添加到文章尾部。

负责定位64位ntdll.dll函数定义如下：

DWORD getNTDLL64()
{
	static DWORD ntdll64 = 0;
	if (ntdll64 != 0)
		return ntdll64;
 
	WOW64::TEB64* teb64 = getTEB64();
	WOW64::PEB64* peb64 = teb64->ProcessEnvironmentBlock;
	WOW64::PEB_LDR_DATA64* ldr = peb64->Ldr;
 
	printf("TEB: %08X\n", (DWORD)teb64);
	printf("PEB: %08X\n", (DWORD)peb64);
	printf("LDR: %08X\n", (DWORD)ldr);
 
	printf("Loaded modules:\n");
	WOW64::LDR_DATA_TABLE_ENTRY64* head = \
		(WOW64::LDR_DATA_TABLE_ENTRY64*)ldr->InLoadOrderModuleList.Flink;
	do
	{
		printf("  %ws\n", head->BaseDllName.Buffer);
		if (memcmp(head->BaseDllName.Buffer, L"ntdll.dll",
			   head->BaseDllName.Length) == 0)
		{
			ntdll64 = (DWORD)head->DllBase;
		}
		head = (WOW64::LDR_DATA_TABLE_ENTRY64*)head->InLoadOrderLinks.Flink;
	}
	while (head != (WOW64::LDR_DATA_TABLE_ENTRY64*)&ldr->InLoadOrderModuleList);
	printf("NTDLL x64: %08X\n", ntdll64);
	return ntdll64;
}

为了完整支持x64 Native API调用，我们还需要等价于GetProcAddress的函数，通过ntdll!LdrGetProcedureAddress更容易

的交流。下面代码负责获取LdrGetProcedureAddress的地址：

DWORD getLdrGetProcedureAddress()
{
	BYTE* modBase = (BYTE*)getNTDLL64();
	IMAGE_NT_HEADERS64* inh = \
		(IMAGE_NT_HEADERS64*)(modBase + ((IMAGE_DOS_HEADER*)modBase)->e_lfanew);
	IMAGE_DATA_DIRECTORY& idd = \
		inh->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT];
	if (idd.VirtualAddress == 0)
		return 0;
 
	IMAGE_EXPORT_DIRECTORY* ied = \
		(IMAGE_EXPORT_DIRECTORY*)(modBase + idd.VirtualAddress);
 
	DWORD* rvaTable = (DWORD*)(modBase + ied->AddressOfFunctions);
	WORD* ordTable = (WORD*)(modBase + ied->AddressOfNameOrdinals);
	DWORD* nameTable = (DWORD*)(modBase + ied->AddressOfNames);
	//lazy search, there is no need to use binsearch for just one function
	for (DWORD i = 0; i < ied->NumberOfFunctions; i++)
	{
		if (strcmp((char*)modBase + nameTable[i], "LdrGetProcedureAddress"))
			continue;
		else
			return (DWORD)(modBase + rvaTable[ordTable[i]]);
	}
	return 0;
}

为了锦上添花，我将介绍有用的函数，能让我在x86的C/C++代码中直接的调用x64 Native APIs：

DWORD64 X64Call(DWORD func, int argC, ...)
{
	va_list args;
	va_start(args, argC);
	DWORD64 _rcx = (argC > 0) ? argC--, va_arg(args, DWORD64) : 0;
	DWORD64 _rdx = (argC > 0) ? argC--, va_arg(args, DWORD64) : 0;
	DWORD64 _r8 = (argC > 0) ? argC--, va_arg(args, DWORD64) : 0;
	DWORD64 _r9 = (argC > 0) ? argC--, va_arg(args, DWORD64) : 0;
	reg64 _rax;
	_rax.v = 0;
 
	DWORD64 restArgs = (DWORD64)&va_arg(args, DWORD64);
 
	//conversion to QWORD for easier use in inline assembly
	DWORD64 _argC = argC;
	DWORD64 _func = func;
 
	DWORD back_esp = 0;
 
	__asm
	{
		;//keep original esp in back_esp variable
		mov    back_esp, esp
 
		;//align esp to 8, without aligned stack some syscalls
		;//may return errors !
		and    esp, 0xFFFFFFF8
 
		X64_Start();
 
		;//fill first four arguments
		push   _rcx
		X64_Pop(_RCX);
		push   _rdx
		X64_Pop(_RDX);
		push   _r8
		X64_Pop(_R8);
		push   _r9
		X64_Pop(_R9);
 
		push   edi
 
		push   restArgs
		X64_Pop(_RDI);
 
		push   _argC
		X64_Pop(_RAX);
 
		;//put rest of arguments on the stack
		test   eax, eax
		jz     _ls_e
		lea    edi, dword ptr [edi + 8*eax - 8]
 
		_ls:
		test   eax, eax
		jz     _ls_e
		push   dword ptr [edi]
		sub    edi, 8
		sub    eax, 1
		jmp    _ls
		_ls_e:
 
		;//create stack space for spilling registers
		sub    esp, 0x20
 
		call   _func
 
		;//cleanup stack
		push   _argC
		X64_Pop(_RCX);
		lea    esp, dword ptr [esp + 8*ecx + 0x20]
 
		pop    edi
 
		;//set return value
		X64_Push(_RAX);
		pop    _rax.dw[0]
 
		X64_End();
 
		mov    esp, back_esp
	}
	return _rax.v;
}

函数有一点长，但是有注释，并且整个想法也是非常简单的。第一个参数是我们想调用的x64函数地址，第二个参数是指定函数

需要的参数个数，其他的参数依赖于被调用的函数，所有的参数都应该转换成DWORD64。调用X64Call的一个小例子：

DWORD64 GetProcAddress64(DWORD module, char* funcName)
{
	static DWORD _LdrGetProcedureAddress = 0;
	if (_LdrGetProcedureAddress == 0)
	{
		_LdrGetProcedureAddress = getLdrGetProcedureAddress();
		printf("LdrGetProcedureAddress: %08X\n", _LdrGetProcedureAddress);
		if (_LdrGetProcedureAddress == 0)
			return 0;
	}
 
	WOW64::ANSI_STRING64 fName = { 0 };
	fName.Buffer = funcName;
	fName.Length = strlen(funcName);
	fName.MaximumLength = fName.Length + 1;
	DWORD64 funcRet = 0;
	X64Call(_LdrGetProcedureAddress, 4,
		(DWORD64)module, (DWORD64)&fName,
		(DWORD64)0, (DWORD64)&funcRet);
 
	printf("%s: %08X\n", funcName, (DWORD)funcRet);
	return funcRet;
}

Running x86 code inside 64-bits process（在64位进程中运行x86代码）

X86_Start MACRO
	LOCAL  xx, rt
	call   $+5
	xx     equ $
	mov    dword ptr [rsp + 4], 23h
	add    dword ptr [rsp], rt - xx
	retf
	rt:
ENDM
X86_End MACRO
	db 6Ah, 33h			; push  33h
	db 0E8h, 0, 0, 0, 0		; call  $+5
	db 83h, 4, 24h, 5		; add   dword ptr [esp], 5
	db 0CBh				; retf
ENDM

Ending notes

文章中使用到的源码链接:

http://download.csdn.net/detail/u014249041/7074553

http://download.csdn.net/detail/u014249041/7074555

原文地址：

http://blog.rewolf.pl/blog/?p=102#.UysCxM4pCqS