pwnable.kr memcpy之write up

 // compiled with : gcc -o memcpy memcpy.c -m32 -lm

 #include <stdio.h>

 #include <string.h>

 #include <stdlib.h>

 #include <signal.h>

 #include <unistd.h>

 #include <sys/mman.h>

 #include <math.h>

 unsigned long long rdtsc(){

         asm("rdtsc");

 }

 char* slow_memcpy(char* dest, const char* src, size_t len){

     int i;

     for (i=; i<len; i++) {

         dest[i] = src[i];

     }

     return dest;

 }

 char* fast_memcpy(char* dest, const char* src, size_t len){

     size_t i;

     // 64-byte block fast copy

     if(len >= ){

         i = len / ;

         len &= (-);

         while(i-- > ){

             __asm__ __volatile__ (

             "movdqa (%0), %%xmm0\n"

             "movdqa 16(%0), %%xmm1\n"

             "movdqa 32(%0), %%xmm2\n"

             "movdqa 48(%0), %%xmm3\n"

             "movntps %%xmm0, (%1)\n"

             "movntps %%xmm1, 16(%1)\n"

             "movntps %%xmm2, 32(%1)\n"

             "movntps %%xmm3, 48(%1)\n"

             ::"r"(src),"r"(dest):"memory");

             dest += ;

             src += ;

         }

     }

     // byte-to-byte slow copy

     if(len) slow_memcpy(dest, src, len);

     return dest;

 }

 int main(void){

     setvbuf(stdout, , _IONBF, );

     setvbuf(stdin, , _IOLBF, );

     printf("Hey, I have a boring assignment for CS class.. :(\n");

     printf("The assignment is simple.\n");

     printf("-----------------------------------------------------\n");

     printf("- What is the best implementation of memcpy?        -\n");

     printf("- 1. implement your own slow/fast version of memcpy -\n");

     printf("- 2. compare them with various size of data         -\n");

     printf("- 3. conclude your experiment and submit report     -\n");

     printf("-----------------------------------------------------\n");

     printf("This time, just help me out with my experiment and get flag\n");

     printf("No fancy hacking, I promise :D\n");

     unsigned long long t1, t2;

     int e;

     char* src;

     char* dest;

     unsigned int low, high;

     unsigned int size;

     // allocate memory

     char* cache1 = mmap(, 0x4000, , MAP_PRIVATE|MAP_ANONYMOUS, -, );

     char* cache2 = mmap(, 0x4000, , MAP_PRIVATE|MAP_ANONYMOUS, -, );

     src = mmap(, 0x2000, , MAP_PRIVATE|MAP_ANONYMOUS, -, );

     size_t sizes[];

     int i=;

     // setup experiment parameters

     for(e=; e<; e++){    // 2^13 = 8K

         low = pow(,e-);

         high = pow(,e);

         printf("specify the memcpy amount between %d ~ %d : ", low, high);

         scanf("%d", &size);

         if( size < low || size > high ){

             printf("don't mess with the experiment.\n");

             exit();

         }

         sizes[i++] = size;

     }

     sleep();

     printf("ok, lets run the experiment with your configuration\n");

     sleep();

     // run experiment

     for(i=; i<; i++){

         size = sizes[i];

         printf("experiment %d : memcpy with buffer size %d\n", i+, size);

         dest = malloc( size );

         memcpy(cache1, cache2, 0x4000);        // to eliminate cache effect

         t1 = rdtsc();

         slow_memcpy(dest, src, size);        // byte-to-byte memcpy

         t2 = rdtsc();

         printf("ellapsed CPU cycles for slow_memcpy : %llu\n", t2-t1);

         memcpy(cache1, cache2, 0x4000);        // to eliminate cache effect

         t1 = rdtsc();

         fast_memcpy(dest, src, size);        // block-to-block memcpy

         t2 = rdtsc();

         printf("ellapsed CPU cycles for fast_memcpy : %llu\n", t2-t1);

         printf("\n");

     }

     printf("thanks for helping my experiment!\n");

     printf("flag : ----- erased in this source code -----\n");

     return ;

 }

分析源码：

    size_t sizes[];

    int i=;

    // setup experiment parameters

    for(e=; e<; e++){    // 2^13 = 8K

        low = pow(,e-);

        high = pow(,e);

        printf("specify the memcpy amount between %d ~ %d : ", low, high);

        scanf("%d", &size);

        if( size < low || size > high ){

            printf("don't mess with the experiment.\n");

            exit();

        }

        sizes[i++] = size;

    }

从上代码中分析得到，需要输入2的n次幂和2的n+1次幂之间

// run experiment

    for(i=; i<; i++){

        size = sizes[i];

        printf("experiment %d : memcpy with buffer size %d\n", i+, size);

        dest = malloc( size );

这段代码分析得到，输入size后malloc分配空间，分配的空间大小就是我们输入的size大小。

memcpy(cache1, cache2, 0x4000);        // to eliminate cache effect

        t1 = rdtsc();

        slow_memcpy(dest, src, size);        // byte-to-byte memcpy

        t2 = rdtsc();

        printf("ellapsed CPU cycles for slow_memcpy : %llu\n", t2-t1);

        memcpy(cache1, cache2, 0x4000);        // to eliminate cache effect

        t1 = rdtsc();

        fast_memcpy(dest, src, size);        // block-to-block memcpy

        t2 = rdtsc();

        printf("ellapsed CPU cycles for fast_memcpy : %llu\n", t2-t1);

        printf("\n");

    }

分配空间后，分别用slow_memcpy和fast_memcpy两种方式，对堆块内的数据向另外一个内存地址拷贝，并比较二者时间。那么分析一下slow_memcpy和fast_memcpy：

char* slow_memcpy(char* dest, const char* src, size_t len){

    int i;

    for (i=; i<len; i++) {

        dest[i] = src[i];

    }

    return dest;

}

char* fast_memcpy(char* dest, const char* src, size_t len){

    size_t i;

    // 64-byte block fast copy

    if(len >= 64){

        i = len / 64;

        len &= (64-1);

        while(i-- > 0){

            __asm__ __volatile__ (

            "movdqa (%0), %%xmm0\n"

            "movdqa 16(%0), %%xmm1\n"

            "movdqa 32(%0), %%xmm2\n"

            "movdqa 48(%0), %%xmm3\n"

            "movntps %%xmm0, (%1)\n"

            "movntps %%xmm1, 16(%1)\n"

            "movntps %%xmm2, 32(%1)\n"

            "movntps %%xmm3, 48(%1)\n"

            ::"r"(src),"r"(dest):"memory");

            dest += 64;

            src += 64;

        }

    }

slow_memcpy是循环赋值，fast_memcpy是用asm汇编指令movdqa进行拷贝。拷贝结束后输入flag。

根据提示生成可执行程序，然后执行程序看一下：

那么我们运行程序来看一下：

随便输入发现出错了：

我们用gdb来看，发现了出错的位置：

出错的位置，也就是movntps的执行出了问题，百度了一下movntps的用法：

movntps m128,XMM

m128 <== XMM 直接把XMM中的值送入m128，不经过cache,必须对齐16字节。再参考别人的wp:

malloc分配的堆块大小是以8字节对其的。

假设用户申请的堆块大小是a的话，malloc(a)分配的堆块大小为 8*（int（(a+4)/8）+1）。

因此假设第一个malloc分配地址是16字节对齐的，则每次请求大小为16字节对齐的数据块即可成功运行结束。可以用脚本来算一下：

# coidng  = utf-8

while(1):

    a = raw_input()

    a = int(a)

    if ((a+4)%16>=9) or ((a+4)%16==0):

        print a," is true"

    else:

        print a," is false"

根据脚本算出来的数，我们输入得到flag：

memcpy@ubuntu:~$ ls

memcpy.c  readme

memcpy@ubuntu:~$ cat readme

the compiled binary of "memcpy.c" source code (with real flag) will be executed under memcpy_pwn privilege if you connect to port 9022.

execute the binary by connecting to daemon(nc 0 9022).

memcpy@ubuntu:~$ nc o 9022

nc: getaddrinfo: Name or service not known

memcpy@ubuntu:~$ nc 0 9022

Hey, I have a boring assignment for CS class.. :(

The assignment is simple.

-----------------------------------------------------

- What is the best implementation of memcpy?        -

- 1. implement your own slow/fast version of memcpy -

- 2. compare them with various size of data         -

- 3. conclude your experiment and submit report     -

-----------------------------------------------------

This time, just help me out with my experiment and get flag

No fancy hacking, I promise :D

specify the memcpy amount between 8 ~ 16 : 9

specify the memcpy amount between 16 ~ 32 : 21

specify the memcpy amount between 32 ~ 64 : 40

specify the memcpy amount between 64 ~ 128 : 70

specify the memcpy amount between 128 ~ 256 : 135

specify the memcpy amount between 256 ~ 512 : 265

specify the memcpy amount between 512 ~ 1024 : 520

specify the memcpy amount between 1024 ~ 2048 : 1030

specify the memcpy amount between 2048 ~ 4096 : 2055

specify the memcpy amount between 4096 ~ 8192 : 5210

ok, lets run the experiment with your configuration

experiment 1 : memcpy with buffer size 9

ellapsed CPU cycles for slow_memcpy : 1497

ellapsed CPU cycles for fast_memcpy : 438

experiment 2 : memcpy with buffer size 21

ellapsed CPU cycles for slow_memcpy : 384

ellapsed CPU cycles for fast_memcpy : 411

experiment 3 : memcpy with buffer size 40

ellapsed CPU cycles for slow_memcpy : 636

ellapsed CPU cycles for fast_memcpy : 672

experiment 4 : memcpy with buffer size 70

ellapsed CPU cycles for slow_memcpy : 1134

ellapsed CPU cycles for fast_memcpy : 288

experiment 5 : memcpy with buffer size 135

ellapsed CPU cycles for slow_memcpy : 1938

ellapsed CPU cycles for fast_memcpy : 237

experiment 6 : memcpy with buffer size 265

ellapsed CPU cycles for slow_memcpy : 3633

ellapsed CPU cycles for fast_memcpy : 291

experiment 7 : memcpy with buffer size 520

ellapsed CPU cycles for slow_memcpy : 7287

ellapsed CPU cycles for fast_memcpy : 342

experiment 8 : memcpy with buffer size 1030

ellapsed CPU cycles for slow_memcpy : 13860

ellapsed CPU cycles for fast_memcpy : 441

experiment 9 : memcpy with buffer size 2055

ellapsed CPU cycles for slow_memcpy : 27561

ellapsed CPU cycles for fast_memcpy : 984

experiment 10 : memcpy with buffer size 5210

ellapsed CPU cycles for slow_memcpy : 72930

ellapsed CPU cycles for fast_memcpy : 2628

thanks for helping my experiment!

flag : 1_w4nn4_br34K_th3_m3m0ry_4lignm3nt

pwnable.kr memcpy之write up的更多相关文章

【pwnable.kr】 memcpy
pwnable的新一题,和堆分配相关. http://pwnable.kr/bin/memcpy.c ssh memcpy@pwnable.kr -p2222 (pw:guest) 我觉得主要考察的是 ...
pwnable.kr simple login writeup
这道题是pwnable.kr Rookiss部分的simple login,需要我们去覆盖程序的ebp,eip,esp去改变程序的执行流程主要逻辑是输入一个字符串,base64解码后看是否与题目 ...
【pwnable.kr】 asm
一道写shellcode的题目, #include <stdio.h> #include <string.h> #include <stdlib.h> #inclu ...
pwnable.kr之simple Login
pwnable.kr之simple Login 懒了几天,一边看malloc.c的源码,一边看华庭的PDF.今天佛系做题,到pwnable.kr上打开了simple Login这道题,但是这道题个人觉 ...
pwnable.kr的passcode
前段时间找到一个练习pwn的网站,pwnable.kr 这里记录其中的passcode的做题过程,给自己加深印象. 废话不多说了,看一下题目, 看到题目,就ssh连接进去,就看到三个文件如下看了一下 ...
pwnable.kr bof之write up
这一题与前两题不同,用到了静态调试工具ida 首先题中给出了源码: #include <stdio.h> #include <string.h> #include <st ...
pwnable.kr col之write up
Daddy told me about cool MD5 hash collision today. I wanna do something like that too! ssh col@pwnab ...
pwnable.kr brainfuck之write up
I made a simple brain-fuck language emulation program written in C. The [ ] commands are not impleme ...
pwnable.kr login之write up
main函数如下: auth函数如下: 程序的流程如下: 输入Authenticate值,并base64解码,将解码的值代入md5_auth函数中 mad5_auth()生成其MD5值并与f87cd6 ...

随机推荐

前端自动化测试python+webdriver
前言:很多做测试的朋友的就知道,python+webdriver 可以做自动化测试,这对前端开发是非常有用的. python 入门我就不讲了 ,推荐学习廖雪峰老师的python入门3.5新版哈 ...
EBS系统启动&停止&增加表空间&替换首页图片
EBS系统启动&停止&增加表空间&替换首页图片数据库启动使用oraprod账号登陆 [root@htdb data]# su oraprod [oraprod@htdb d ...
[图形学] 习题8.12 NLN二维线段裁剪算法实现
Nicholl-Lee-Nicholl二维线段裁剪算法相对于Cohen-Sutherland和Liang-Barsky算法来说,在求交点之前进行了线段端点相对于几个区域的判断,可以确切的知道要求交点的 ...
ubuntu12.0.4安装启动后无法进入图形操作界面
在VMware10.0.4虚拟机上安装ubuntu12.0.4版本后,启动linux后,无法进入图形界面,但是可以进入字符界面.通过查阅网上资料,有人说是VMware的3D图形加速没有关闭,于是通过查 ...
IT小天博客APP版本
今天弄了一个博客的APP版本,感觉还不错,欢迎下载体验. 共 1 张图片 APP名称:[IT小天博客APP] APP版本:1.3 APP上线时间:2017-06-29 下载地址:点击下载
React--JSX语法
JSX语法,它是js语言的语法拓展. 比如2+2 , 对象.属性 , 函数的调用都可以在jsx中书写. import React from "react"; export defa ...
移动端300ms的点击延迟以及解决方案
[今天做在移动端的一些效果时,我选择使用动画而不是用过渡,这个300ms的点击延迟是我为什么使用动画而不使用过渡最主要的一个原因] 动画和过渡共同点:都是css控制DOM运动, 不同点: 1.过渡: ...
C# 哈希表（Hashtable）用法笔记
一.什么是Hashtable? Hashtable 类代表了一系列基于键的哈希代码组织起来的键/值对.它使用键来访问集合中的元素. 当您使用键访问元素时,则使用哈希表,而且您可以识别一个有用的键值.哈 ...
PhpStorm配置PHP解释器（wampServer版）
PHPStorm(以下简称为PS)和wampServer集成环境安装简单,不再赘述. 本人使用PhpStrom版本为2017.1.4版本. PS刚开始使用会使用自带服务器,但是有几率不能自动匹配到PH ...
easyui点击搜索的时候获取不要文本框里面的值的问题
jsp的代码 <div id="tb"> <input id="AppID" placeholder="请根据申请人ID搜索&quo ...

pwnable.kr memcpy之write up

pwnable.kr memcpy之write up的更多相关文章

随机推荐

热门专题