// compiled with : gcc -o memcpy memcpy.c -m32 -lm
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <signal.h>
#include <unistd.h>
#include <sys/mman.h>
#include <math.h> unsigned long long rdtsc(){
asm("rdtsc");
} char* slow_memcpy(char* dest, const char* src, size_t len){
int i;
for (i=; i<len; i++) {
dest[i] = src[i];
}
return dest;
} char* fast_memcpy(char* dest, const char* src, size_t len){
size_t i;
// 64-byte block fast copy
if(len >= ){
i = len / ;
len &= (-);
while(i-- > ){
__asm__ __volatile__ (
"movdqa (%0), %%xmm0\n"
"movdqa 16(%0), %%xmm1\n"
"movdqa 32(%0), %%xmm2\n"
"movdqa 48(%0), %%xmm3\n"
"movntps %%xmm0, (%1)\n"
"movntps %%xmm1, 16(%1)\n"
"movntps %%xmm2, 32(%1)\n"
"movntps %%xmm3, 48(%1)\n"
::"r"(src),"r"(dest):"memory");
dest += ;
src += ;
}
} // byte-to-byte slow copy
if(len) slow_memcpy(dest, src, len);
return dest;
} int main(void){ setvbuf(stdout, , _IONBF, );
setvbuf(stdin, , _IOLBF, ); printf("Hey, I have a boring assignment for CS class.. :(\n");
printf("The assignment is simple.\n"); printf("-----------------------------------------------------\n");
printf("- What is the best implementation of memcpy? -\n");
printf("- 1. implement your own slow/fast version of memcpy -\n");
printf("- 2. compare them with various size of data -\n");
printf("- 3. conclude your experiment and submit report -\n");
printf("-----------------------------------------------------\n"); printf("This time, just help me out with my experiment and get flag\n");
printf("No fancy hacking, I promise :D\n"); unsigned long long t1, t2;
int e;
char* src;
char* dest;
unsigned int low, high;
unsigned int size;
// allocate memory
char* cache1 = mmap(, 0x4000, , MAP_PRIVATE|MAP_ANONYMOUS, -, );
char* cache2 = mmap(, 0x4000, , MAP_PRIVATE|MAP_ANONYMOUS, -, );
src = mmap(, 0x2000, , MAP_PRIVATE|MAP_ANONYMOUS, -, ); size_t sizes[];
int i=; // setup experiment parameters
for(e=; e<; e++){ // 2^13 = 8K
low = pow(,e-);
high = pow(,e);
printf("specify the memcpy amount between %d ~ %d : ", low, high);
scanf("%d", &size);
if( size < low || size > high ){
printf("don't mess with the experiment.\n");
exit();
}
sizes[i++] = size;
} sleep();
printf("ok, lets run the experiment with your configuration\n");
sleep(); // run experiment
for(i=; i<; i++){
size = sizes[i];
printf("experiment %d : memcpy with buffer size %d\n", i+, size);
dest = malloc( size ); memcpy(cache1, cache2, 0x4000); // to eliminate cache effect
t1 = rdtsc();
slow_memcpy(dest, src, size); // byte-to-byte memcpy
t2 = rdtsc();
printf("ellapsed CPU cycles for slow_memcpy : %llu\n", t2-t1); memcpy(cache1, cache2, 0x4000); // to eliminate cache effect
t1 = rdtsc();
fast_memcpy(dest, src, size); // block-to-block memcpy
t2 = rdtsc();
printf("ellapsed CPU cycles for fast_memcpy : %llu\n", t2-t1);
printf("\n");
} printf("thanks for helping my experiment!\n");
printf("flag : ----- erased in this source code -----\n");
return ;
}

分析源码:

    size_t sizes[];
int i=; // setup experiment parameters
for(e=; e<; e++){ // 2^13 = 8K
low = pow(,e-);
high = pow(,e);
printf("specify the memcpy amount between %d ~ %d : ", low, high);
scanf("%d", &size);
if( size < low || size > high ){
printf("don't mess with the experiment.\n");
exit();
}
sizes[i++] = size;
}

从上代码中分析得到,需要输入2的n次幂和2的n+1次幂之间

// run experiment
for(i=; i<; i++){
size = sizes[i];
printf("experiment %d : memcpy with buffer size %d\n", i+, size);
dest = malloc( size );

这段代码分析得到,输入size后malloc分配空间,分配的空间大小就是我们输入的size大小。

memcpy(cache1, cache2, 0x4000);        // to eliminate cache effect
t1 = rdtsc();
slow_memcpy(dest, src, size); // byte-to-byte memcpy
t2 = rdtsc();
printf("ellapsed CPU cycles for slow_memcpy : %llu\n", t2-t1); memcpy(cache1, cache2, 0x4000); // to eliminate cache effect
t1 = rdtsc();
fast_memcpy(dest, src, size); // block-to-block memcpy
t2 = rdtsc();
printf("ellapsed CPU cycles for fast_memcpy : %llu\n", t2-t1);
printf("\n");
}

分配空间后,分别用slow_memcpy和fast_memcpy两种方式,对堆块内的数据向另外一个内存地址拷贝,并比较二者时间。那么分析一下slow_memcpy和fast_memcpy:

char* slow_memcpy(char* dest, const char* src, size_t len){
int i;
for (i=; i<len; i++) {
dest[i] = src[i];
}
return dest;
}
char* fast_memcpy(char* dest, const char* src, size_t len){
size_t i;
// 64-byte block fast copy
if(len >= 64){
i = len / 64;
len &= (64-1); while(i-- > 0){
__asm__ __volatile__ (
"movdqa (%0), %%xmm0\n"
"movdqa 16(%0), %%xmm1\n"
"movdqa 32(%0), %%xmm2\n"
"movdqa 48(%0), %%xmm3\n"
"movntps %%xmm0, (%1)\n"
"movntps %%xmm1, 16(%1)\n"
"movntps %%xmm2, 32(%1)\n"
"movntps %%xmm3, 48(%1)\n"
::"r"(src),"r"(dest):"memory");
dest += 64;
src += 64;
}
}
 

slow_memcpy是循环赋值,fast_memcpy是用asm汇编指令movdqa进行拷贝。拷贝结束后输入flag。

根据提示生成可执行程序,然后执行程序看一下:

那么我们运行程序来看一下:

随便输入发现出错了:

我们用gdb来看,发现了出错的位置:

出错的位置,也就是movntps的执行出了问题,百度了一下movntps的用法:

movntps m128,XMM
m128 <== XMM 直接把XMM中的值送入m128,不经过cache,必须对齐16字节。再参考别人的wp:
malloc分配的堆块大小是以8字节对其的。

假设用户申请的堆块大小是a的话,malloc(a)分配的堆块大小为 8*(int((a+4)/8)+1)。

因此假设第一个malloc分配地址是16字节对齐的,则每次请求大小为16字节对齐的数据块即可成功运行结束。可以用脚本来算一下:

# coidng  = utf-8
while(1):
a = raw_input()
a = int(a)
if ((a+4)%16>=9) or ((a+4)%16==0):
print a," is true"
else:
print a," is false"

根据脚本算出来的数,我们输入得到flag:

memcpy@ubuntu:~$ ls
memcpy.c readme
memcpy@ubuntu:~$ cat readme
the compiled binary of "memcpy.c" source code (with real flag) will be executed under memcpy_pwn privilege if you connect to port 9022.
execute the binary by connecting to daemon(nc 0 9022). memcpy@ubuntu:~$ nc o 9022
nc: getaddrinfo: Name or service not known
memcpy@ubuntu:~$ nc 0 9022
Hey, I have a boring assignment for CS class.. :(
The assignment is simple.
-----------------------------------------------------
- What is the best implementation of memcpy? -
- 1. implement your own slow/fast version of memcpy -
- 2. compare them with various size of data -
- 3. conclude your experiment and submit report -
-----------------------------------------------------
This time, just help me out with my experiment and get flag
No fancy hacking, I promise :D
specify the memcpy amount between 8 ~ 16 : 9
specify the memcpy amount between 16 ~ 32 : 21
specify the memcpy amount between 32 ~ 64 : 40
specify the memcpy amount between 64 ~ 128 : 70
specify the memcpy amount between 128 ~ 256 : 135
specify the memcpy amount between 256 ~ 512 : 265
specify the memcpy amount between 512 ~ 1024 : 520
specify the memcpy amount between 1024 ~ 2048 : 1030
specify the memcpy amount between 2048 ~ 4096 : 2055
specify the memcpy amount between 4096 ~ 8192 : 5210
ok, lets run the experiment with your configuration
experiment 1 : memcpy with buffer size 9
ellapsed CPU cycles for slow_memcpy : 1497
ellapsed CPU cycles for fast_memcpy : 438 experiment 2 : memcpy with buffer size 21
ellapsed CPU cycles for slow_memcpy : 384
ellapsed CPU cycles for fast_memcpy : 411 experiment 3 : memcpy with buffer size 40
ellapsed CPU cycles for slow_memcpy : 636
ellapsed CPU cycles for fast_memcpy : 672 experiment 4 : memcpy with buffer size 70
ellapsed CPU cycles for slow_memcpy : 1134
ellapsed CPU cycles for fast_memcpy : 288 experiment 5 : memcpy with buffer size 135
ellapsed CPU cycles for slow_memcpy : 1938
ellapsed CPU cycles for fast_memcpy : 237 experiment 6 : memcpy with buffer size 265
ellapsed CPU cycles for slow_memcpy : 3633
ellapsed CPU cycles for fast_memcpy : 291 experiment 7 : memcpy with buffer size 520
ellapsed CPU cycles for slow_memcpy : 7287
ellapsed CPU cycles for fast_memcpy : 342 experiment 8 : memcpy with buffer size 1030
ellapsed CPU cycles for slow_memcpy : 13860
ellapsed CPU cycles for fast_memcpy : 441 experiment 9 : memcpy with buffer size 2055
ellapsed CPU cycles for slow_memcpy : 27561
ellapsed CPU cycles for fast_memcpy : 984 experiment 10 : memcpy with buffer size 5210
ellapsed CPU cycles for slow_memcpy : 72930
ellapsed CPU cycles for fast_memcpy : 2628 thanks for helping my experiment!
flag : 1_w4nn4_br34K_th3_m3m0ry_4lignm3nt

pwnable.kr memcpy之write up的更多相关文章

  1. 【pwnable.kr】 memcpy

    pwnable的新一题,和堆分配相关. http://pwnable.kr/bin/memcpy.c ssh memcpy@pwnable.kr -p2222 (pw:guest) 我觉得主要考察的是 ...

  2. pwnable.kr simple login writeup

    这道题是pwnable.kr Rookiss部分的simple login,需要我们去覆盖程序的ebp,eip,esp去改变程序的执行流程   主要逻辑是输入一个字符串,base64解码后看是否与题目 ...

  3. 【pwnable.kr】 asm

    一道写shellcode的题目, #include <stdio.h> #include <string.h> #include <stdlib.h> #inclu ...

  4. pwnable.kr之simple Login

    pwnable.kr之simple Login 懒了几天,一边看malloc.c的源码,一边看华庭的PDF.今天佛系做题,到pwnable.kr上打开了simple Login这道题,但是这道题个人觉 ...

  5. pwnable.kr的passcode

    前段时间找到一个练习pwn的网站,pwnable.kr 这里记录其中的passcode的做题过程,给自己加深印象. 废话不多说了,看一下题目, 看到题目,就ssh连接进去,就看到三个文件如下 看了一下 ...

  6. pwnable.kr bof之write up

    这一题与前两题不同,用到了静态调试工具ida 首先题中给出了源码: #include <stdio.h> #include <string.h> #include <st ...

  7. pwnable.kr col之write up

    Daddy told me about cool MD5 hash collision today. I wanna do something like that too! ssh col@pwnab ...

  8. pwnable.kr brainfuck之write up

    I made a simple brain-fuck language emulation program written in C. The [ ] commands are not impleme ...

  9. pwnable.kr login之write up

    main函数如下: auth函数如下: 程序的流程如下: 输入Authenticate值,并base64解码,将解码的值代入md5_auth函数中 mad5_auth()生成其MD5值并与f87cd6 ...

随机推荐

  1. 前端自动化测试python+webdriver

    前言:很多做测试的朋友的就知道,python+webdriver  可以做自动化测试,这对前端开发是非常有用的.  python 入门我就不讲了  ,推荐学习 廖雪峰老师的python入门3.5新版哈 ...

  2. EBS系统启动&停止&增加表空间&替换首页图片

    EBS系统启动&停止&增加表空间&替换首页图片 数据库启动 使用oraprod账号登陆 [root@htdb data]# su oraprod [oraprod@htdb d ...

  3. [图形学] 习题8.12 NLN二维线段裁剪算法实现

    Nicholl-Lee-Nicholl二维线段裁剪算法相对于Cohen-Sutherland和Liang-Barsky算法来说,在求交点之前进行了线段端点相对于几个区域的判断,可以确切的知道要求交点的 ...

  4. ubuntu12.0.4安装启动后无法进入图形操作界面

    在VMware10.0.4虚拟机上安装ubuntu12.0.4版本后,启动linux后,无法进入图形界面,但是可以进入字符界面.通过查阅网上资料,有人说是VMware的3D图形加速没有关闭,于是通过查 ...

  5. IT小天博客APP版本

    今天弄了一个博客的APP版本,感觉还不错,欢迎下载体验. 共 1 张图片 APP名称:[IT小天博客APP] APP版本:1.3 APP上线时间:2017-06-29 下载地址:点击下载

  6. React--JSX语法

    JSX语法,它是js语言的语法拓展. 比如2+2 , 对象.属性 , 函数的调用都可以在jsx中书写. import React from "react"; export defa ...

  7. 移动端300ms的点击延迟以及解决方案

    [今天做在移动端的一些效果时,我选择使用动画而不是用过渡,这个300ms的点击延迟是我为什么使用动画而不使用过渡最主要的一个原因] 动画和过渡 共同点:都是css控制DOM运动, 不同点: 1.过渡: ...

  8. C# 哈希表(Hashtable)用法笔记

    一.什么是Hashtable? Hashtable 类代表了一系列基于键的哈希代码组织起来的键/值对.它使用键来访问集合中的元素. 当您使用键访问元素时,则使用哈希表,而且您可以识别一个有用的键值.哈 ...

  9. PhpStorm配置PHP解释器(wampServer版)

    PHPStorm(以下简称为PS)和wampServer集成环境安装简单,不再赘述. 本人使用PhpStrom版本为2017.1.4版本. PS刚开始使用会使用自带服务器,但是有几率不能自动匹配到PH ...

  10. easyui点击搜索的时候获取不要文本框里面的值的问题

    jsp的代码 <div id="tb"> <input id="AppID" placeholder="请根据申请人ID搜索&quo ...