Author Topic: Decompiling (Read 7724 times)

Exoduz · « **on:** June 27, 2017, 03:39:34 PM »

Has anyone ever tried to decompile op2 and in that way maybe get a close resemblence to what the source code was?

I have decompiled one of the dlls, and am wondering if it is correct c++ lang or not, and if it would be able to compile back and be playable?

I´ll add what i got from cep1.dll

please do check and see if it would be usefull, and if so then i can decompile the rest of the game.
i tried decompiling the exe too and got a shitload of code...

due to limits of 20000 characters in message i could only add part of what i got from dll, but i put it in a txt if you want to examine it closer!

Code: [Select]

/*
 * ?CreateBeacon@TethysGame@@SIHW4map_id@@HHHHH@Z
 * public: static int __fastcall TethysGame::CreateBeacon(enum map_id,int,int,int,int,int)
 */
int32_t CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z = 0xd5ba;

uint32_t fun_11001de0(int32_t ecx, int32_t a2, int32_t a3, int32_t a4, int32_t a5) {
    uint32_t eax6;
    int32_t eax7;
    int32_t eax8;
    uint32_t eax9;
    int32_t eax10;
    uint32_t edx11;

    eax6 = eax7 - (eax8 + reinterpret_cast<uint1_t>(eax9 < eax10 + reinterpret_cast<uint1_t>(edx11 < 1))) & 2;
    switch (ecx) {
        addr_0x11001fdd_3:
    default:
        return eax6;
    case 0:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 4;
    case 1:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 34;
    case 2:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0x8e;
    case 3:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 94;
    case 4:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 18;
    case 5:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 66;
    case 6:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0x71;
    case 7:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 72;
    case 8:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0x9d;
    case 9:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xd7;
    case 10:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0x9b;
    case 11:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xbf;
    case 12:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xc6;
    case 13:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xed;
    case 14:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xe5;
    case 15:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xe3;
    case 16:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xbe;
    case 17:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 61;
    case 18:
        eax6 = reinterpret_cast<uint32_t>(CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81, 87, 0, eax6, 0xff));
        goto addr_0x11001fdd_3;
    }
}

void fun_11001c70(int32_t ecx, int32_t a2, int32_t a3, int32_t a4, int32_t a5) {
    switch (ecx) {
        addr_0x11001db3_3:
    default:
        return;
    case 0:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xcf;
    case 1:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xdf;
    case 2:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xe5;
    case 3:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xfb;
    case 4:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xfb;
    case 5:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xdd;
    case 6:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xf6;
    case 7:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xe9;
    case 8:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xe9;
    case 9:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xd7;
    case 10:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81, 0xc9, 1, 2, 0xff);
        goto addr_0x11001db3_3;
    }
}

void fun_11001af0(int32_t ecx, int32_t a2, int32_t a3, int32_t a4, int32_t a5) {
    int32_t edx6;

    switch (ecx) {
        addr_0x11001c31_3:
    default:
        return;
    case 0:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xa4;
    case 1:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0xbc;
    case 2:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0x87;
    case 3:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0x80;
    case 4:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 0x9b;
    case 5:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 78;
    case 6:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 27;
    case 7:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 12;
    case 8:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 17;
    case 9:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 55;
    case 10:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto 74;
    case 11:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81, 0xf7, edx6, 1, 0xff);
        goto addr_0x11001c31_3;
    }
}

void fun_11001930(int32_t ecx, int32_t a2, int32_t a3, int32_t a4, int32_t a5, int32_t a6) {
    int32_t edx7;
    int32_t edx8;
    int32_t edx9;
    int32_t edx10;
    int32_t edx11;
    int32_t edx12;
    int32_t edx13;
    int32_t edx14;
    int32_t edx15;
    int32_t edx16;
    int32_t edx17;
    int32_t edx18;

    switch (ecx) {
        addr_0x11001ab7_3:
    default:
        return;
    case 0:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto edx7;
    case 1:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto edx8;
    case 2:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto edx9;
    case 3:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto edx10;
    case 4:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto edx11;
    case 5:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto edx12;
    case 6:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto edx13;
    case 7:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto edx14;
    case 8:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto edx15;
    case 9:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto edx16;
    case 10:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81);
        goto edx17;
    case 11:
        CreateBeacon_TethysGame_SIHW4map_id_HHHHH_Z(81, 0x8c, edx18, a2, 0xff);
        goto addr_0x11001ab7_3;
    }
}

struct s0 {
    signed char[12] pad12;
    int32_t f12;
    uint32_t f16;
    int32_t f20;
};

/* ?Player@@3PAV_Player@@A */
struct s0* Player_3PAV_Player_A = reinterpret_cast<struct s0*>(0xd4ba);

/*
 * ?Difficulty@_Player@@QBEHXZ
 * public: int __thiscall _Player::Difficulty(void)const
 */
int32_t Difficulty__Player_QBEHXZ = 0xd636;

/*
 * ?GetRand@TethysGame@@SIHH@Z
 * public: static int __fastcall TethysGame::GetRand(int)
 */
int32_t GetRand_TethysGame_SIHH_Z = 0xd696;

void fun_11002030(struct s0* ecx, int32_t a2, int32_t a3, int32_t a4, int32_t a5) {
    struct s0* ecx6;
    int32_t edi7;
    int32_t esi8;
    int32_t eax9;
    int32_t esi10;
    int32_t edi11;
    int32_t eax12;
    int32_t eax13;
    int32_t eax14;
    int32_t eax15;
    int32_t eax16;
    int32_t eax17;
    int32_t eax18;
    int32_t eax19;
    int32_t eax20;
    int32_t eax21;
    int32_t eax22;
    int32_t eax23;
    int32_t eax24;
    int32_t eax25;

Sirbomber · « **Reply #1 on:** June 27, 2017, 04:50:33 PM »

Quote from: Exoduz on June 27, 2017, 03:39:34 PM

Has anyone ever tried to decompile op2 and in that way maybe get a close resemblence to what the source code was?

Short answer: Yes. It's highly impractical. To give you an idea, it took someone with decades of experience several months to decompile one of the demo missions that play out if you idle on the main menu.

Hooman · « **Reply #2 on:** June 29, 2017, 12:46:40 PM »

Decompiled code is typically of low quality. It often won't recompile without edits. For such a large project, probably a massive number of edits. Though I suggest you give it a try and see for sure.

The code output by most decompilers is barely readable. It's often on par with reading a raw disassembly.

There are some hacks in the EXE that likely won't decompile properly. In particular the self modifying code. That might not even be handled well by a disassembler, which is a simpler problem than a decompiler must solve.

Arklon also had a project that might help with efforts of a similar nature.

Arklon · « **Reply #3 on:** June 29, 2017, 04:17:12 PM »

Quote from: Hooman on June 29, 2017, 12:46:40 PM

Arklon also had a project that might help with efforts of a similar nature.

The reverse linker? That'd be more an alternative (if I ever decide to go back and finish it), which would be much more practical but would still involve a ton of manual work fixing things up.

If you think about how the build toolchain works, compiler -> assembler -> linker, reversing the linking step makes so much more sense than disassembling or decompiling, since you go back 1 step instead of 2 or 3. And it serves the same purpose in the end, you get something that you can compile against as if you had the source code to the game, and can start replacing functions with ones rewritten in C++ seamlessly (taking advantage of weak externals) and incrementally rewrite everything.

Hooman · « **Reply #4 on:** June 30, 2017, 04:25:57 AM »

It's a beautiful idea really. And yes, simpler. Though I've always viewed it as pretty close to disassembling. Seems like you'd need to know a lot of the same information to do it correctly. The one difference I see is not needing to distinguish between code and data. Considering that problem is not in the general case solvable, that could actually be a pretty big deal.

How far along was the reverse linker? Was it usable in certain cases? What was unfinished?

Exoduz · « **Reply #5 on:** June 30, 2017, 02:43:51 PM »

Well, i dont really know which decompilers give the most exact decompiling but i used snowmans decompiler and it decompiled every single file in the op2 catalogue without trouble for me.
if you want to check it out the adress for his website is; https://derevenets.com/

Arklon · « **Reply #6 on:** June 30, 2017, 04:05:44 PM »

Quote from: Hooman on June 30, 2017, 04:25:57 AM

It's a beautiful idea really. And yes, simpler. Though I've always viewed it as pretty close to disassembling. Seems like you'd need to know a lot of the same information to do it correctly. The one difference I see is not needing to distinguish between code and data. Considering that problem is not in the general case solvable, that could actually be a pretty big deal.

Right, and it would still take a ton of manual work. A lot of it would be defining the sizes of all global objects in the .(r)data section, which is needed in order to be able to associate base relocations within the object but past its base address, which are pretty common. And, of course, defining decorated symbol names, at least for things you actually need to use or replace in your own code. And your own code needs to have headers written for all this stuff.

Quote from: Hooman on June 30, 2017, 04:25:57 AM

How far along was the reverse linker? Was it usable in certain cases? What was unfinished?

I had a pretty decent auto-analysis scheme working enough to use on Outpost2.exe. That was intended to be used to output a text representation of the exe with the reconstructed symbol and relocation tables, so it can then be manually curated before feeding that back in to produce the obj. LLVM (which is what I'm basing it on) already has well-defined APIs for YAML, including a COFF YAML representation, so that'd be the most convenient format to use. Outputting that YAML file after auto analysis is all it does right now.

So, obviously it needs to write out the obj still, which would actually not be nearly as much work as parsing the input exe/auto analysis/etc. The auto-analysis needs to support some exe features I haven't bothered with yet like SEH and delayed imports, Outpost2.exe doesn't use those though so I can probably just not care; there's also still some bugs like my code to deal with common symbols (meaning, references either to the base address or inside the exe headers, before the first section) isn't working for some reason, but again that's not a concern with Outpost2.exe. I need to write an IDA plugin to also output a YAML file, so you can use IDA as a much better curation tool than editing the YAML by hand, or use its auto-analysis which is better than anything I'd ever have time to write myself. Or, just forget about even using my auto-analysis at all, and just make the plugin.

It needed to have code written to handle loading in YAML contents. I'd imagine it being able to have multiple YAMLs used as inputs at once, so you could have manual symbol definitions be overlayed with auto analysis definitions, i.e. a symbol (address) defined in file 1 would take precedence over definitions of it in 2, 3, etc.

To address having to manually define decorated symbol names, and make headers for your code to use anything, since I was already working off of LLVM already, I might as well use libclang to make a tool to parse source code for specially formatted comments or whatever that define where the symbol in Outpost2.exe is, which outputs another YAML file with MS-compatible decorated symbol names etc. So your workflow to do both things just involves writing the headers, and the tool deals with generating decorated symbol names in the right format for use with the reverse-linker. Instead of, you know, dealing with the insanity of ForcedExports.asm.

It seems like it should be pretty easy to just parse C++ comments using libclang, and just use COFF YAML, just not as easy as my decision to be lazy instead.

The reason I shelved it was because the linker I was basing this on, LLD, had a rewrite of most of its codebase. Which did actually look like a huge improvement in code simplicity and speed, but I was just too lazy to feel like doing code archaeology all over again, considering I already spent 3 weeks doing nothing but reading over LLVM's codebase before I even started writing any code.

Arklon · « **Reply #7 on:** June 30, 2017, 05:23:20 PM »

Quote from: Exoduz on June 30, 2017, 02:43:51 PM

Well, i dont really know which decompilers give the most exact decompiling but i used snowmans decompiler and it decompiled every single file in the op2 catalogue without trouble for me.
if you want to check it out the adress for his website is; https://derevenets.com/

Yeah, but good luck actually compiling it, and even if it somehow miraculously does you'd be lucky that it correctly translated the code functionality so it'd run correctly. And even then, you still haven't made rewriting the game that much easier of a task, since almost everything has no labels, and decompiler output is generally pretty cryptic. I actually do use IDA's decompiler all the time to give me a somewhat better visualization of what the assembly I'm looking at does, which is still pretty cryptic but it makes it a lot easier/quicker to understand. When it doesn't break, anyway, which it does a lot of the time. Though I might actually give Snowman a shot since it has an IDA plugin, it'd be interesting to see if it works on functions IDA's decompiler fails with.

Exoduz · « **Reply #8 on:** July 01, 2017, 12:45:05 AM »

Yes, to compile it again would obviously not work

I thought first before i read up about the subject on decompiling to get source code that it was easy to read from decompiled data.
My first thought was that it couldnt differ that much, boy was I wrong

Yeah snowman has an Ida plugin that you can try, might be able to get some more readable code from that!
If you feel tired of writing your tool, why not share it via github and let other have a go at completing it?
As of now, most of coding for me is cryptic and i am still learning c++ (i chose to start with c++) as my first language, so i am not really of any help, i am just interested in learning

Hooman · « **Reply #9 on:** July 01, 2017, 04:02:01 PM »

Arklon, your project idea is pretty good. You could write a proper paper on your project. It's on par with other projects I've seen research papers on. A simpler general interest article could also be a good read.

Interesting to note the IDA plugin. I never really did give IDA all that much attention.

News:

Author Topic: Decompiling (Read 7724 times)

Exoduz

Decompiling

Sirbomber

Re: Decompiling

Hooman

Re: Decompiling

Arklon

Re: Decompiling

Hooman

Re: Decompiling

Exoduz

Re: Decompiling

Arklon

Re: Decompiling

Arklon

Re: Decompiling

Exoduz

Re: Decompiling

Hooman

Re: Decompiling