Just readed . That's a cool tool !

You can find pahole here: http://git.kernel.org/?p=linux/kernel/git/acme/pahole.git :

git clone git://git.kernel.org/pub/scm/linux/kernel/git/acme/pahole.git
cd pahole
mkdir build
cmake ..
make

I tested it on ikmalloc

$ pahole libikmalloc.so
 
struct chunk {
        struct chunk *             main_next;            /*     0     8 */
        struct chunk *             main_prev;            /*     8     8 */
        struct chunk *             free_next;            /*    16     8 */
        struct chunk *             free_prev;            /*    24     8 */
        size_t                     size;                 /*    32     8 */
        size_t                     type;                 /*    40     8 */
        size_t                     asked_size;           /*    48     8 */
        size_t                     alignment;            /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
 
        /* size: 64, cachelines: 1, members: 8 */
};

Ok, everything is aligned =).

$ pahole fs/nandfs/nandfs.o --reorganize --show_reorg_steps -C nandfs_io_ops
struct nandfs_io_ops {
        struct nandfs_object *     obj;                  /*     0     8 */
        uint8_t *                  buf;                  /*     8     8 */
        uint32_t                   ofs;                  /*    16     4 */
 
        /* XXX 4 bytes hole, try to pack */
 
        size_t                     len;                  /*    24     8 */
        size_t                     retlen;               /*    32     8 */
        bool                       direct;               /*    40     1 */
 
        /* size: 48, cachelines: 1, members: 6 */
        /* sum members: 37, holes: 1, sum holes: 4 */
        /* padding: 7 */
        /* last cacheline: 48 bytes */
};
 
/* Moving 'direct' from after 'retlen' to after 'ofs' */
struct nandfs_io_ops {
        struct nandfs_object *     obj;                  /*     0     8 */
        uint8_t *                  buf;                  /*     8     8 */
        uint32_t                   ofs;                  /*    16     4 */
        bool                       direct;               /*    20     1 */
 
        /* XXX 3 bytes hole, try to pack */
 
        size_t                     len;                  /*    24     8 */
        size_t                     retlen;               /*    32     8 */
 
        /* size: 40, cachelines: 1, members: 6 */
        /* sum members: 37, holes: 1, sum holes: 3 */
        /* last cacheline: 40 bytes */
}
 
/* Final reorganized struct: */
struct nandfs_io_ops {
        struct nandfs_object *     obj;                  /*     0     8 */
        uint8_t *                  buf;                  /*     8     8 */
        uint32_t                   ofs;                  /*    16     4 */
        bool                       direct;               /*    20     1 */
 
        /* XXX 3 bytes hole, try to pack */
 
        size_t                     len;                  /*    24     8 */
        size_t                     retlen;               /*    32     8 */
 
        /* size: 40, cachelines: 1, members: 6 */
        /* sum members: 37, holes: 1, sum holes: 3 */
        /* last cacheline: 40 bytes */
};   /* saved 8 bytes! */

Wow, cool, 8 byte saved :P