﻿/*
 * Name: Dump v1.04
 * Author: Thomas M. James
 * Date: Aug 2011
 * System: Pelles C for Windows, C99 standard.
 *
 * License: This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or (at your
 * option) any later version. This program is distributed in the hope that it
 * will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
 * Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 * © 2011 Thomas James
 * tmj2005@gmail.com
 */

#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>
#include <io.h>

#define TEMP_FILE "_stdin"

#define EXIT_INVALID_SYNTAX 2
#define EXIT_AMBIGUOUS_SWITCH 3
#define EXIT_TOO_MANY_FILENAMES 4
#define EXIT_FILE_OPEN_FAILURE 5
#define EXIT_EMPTY_FILE 6
#define EXIT_OUTSIDE_BOUNDS 7
#define EXIT_NOTHING_TO_DUMP 8

char switches[][9] = {"?", "skip", "keep", "type", "address", "words", "condense",
"compress", "noascii", "replace", "divider", "width", "head", "tail", "lower" };

void toLower(char *str)
{   for (int i = 0; str[i]; i++ )
    str[i] = tolower( str[i] );
}

/*
 * Gets the fully qualified name of a switch argument.
 * Returns NULL if switch is invalid or -1 if ambiguous.
 */
char* swName(char *sw)
{   char found = 0;
    char* rv = NULL;
    for (int i = 0; i < 15; i++) {
        if (!strcmp( switches[i], sw ))
            return switches[i];
        else if (!strncmp( switches[i], sw, strlen(sw))) {
            found++;
            rv = switches[i];
        }
    }
    return found>1 ? (char*)-1 : rv;
}

/*
 * Checks whether a string ends with a specified suffix.
 */
bool endsWith(const char *str, const char *suffix)
{
    if (!str || !suffix)
        return 0;
    size_t lenstr = strlen(str);
    size_t lensuffix = strlen(suffix);
    if (lensuffix > lenstr)
        return 0;
    return !strncmp(str+lenstr-lensuffix, suffix, lensuffix);
}

/*
 * Converts a switch operand to a number. Operand may be prefixed
 * with 0x for hex, or 0 for octal, or appended with kb/mb/gb to obtain
 * the number of bytes for those units.
 */
int64_t numOp(char operand[])
{   toLower(operand);
    int radix;
    if (!strncmp(operand, "0x", 2) || !strncmp(operand, "-0x", 3))
        radix = 16;
    else if (!strncmp(operand, "0", 1) || !strncmp(operand, "-0", 2))
        radix = 8;
    else
        radix = 10;
    int64_t rv = strtoll(operand, NULL, radix);
    if (endsWith(operand, "kb"))
        rv *= 0x400;
    else if (endsWith(operand, "mb"))
        rv *= 0x100000;
    else if (endsWith(operand, "gb"))
        rv *= 0x40000000;
    return rv;
}

int main(int argc, char *argv[])
{
    /* Switches */
    int64_t sw_skip = 0;
    int64_t sw_keep = 0;
    char sw_type = 'h';    // [hoduc]
    char sw_address = 'h'; // [houn]
    bool sw_words = false;
    bool sw_condense = false;
    bool sw_compress = false;
    bool sw_noascii = false;
    char sw_replace = '.';
    char sw_divider = 'n'; // [ns-]
    int sw_width = 16;
    bool sw_lower = false;
    char *fileName = NULL;

    /* Process arguments */
    for (int i = 1; i < argc; i++) {
        if ((argv[i][0] == '/' || argv[i][0] == '-')) {
            if (strlen(argv[i]) < 2) {
                puts("Invalid syntax. Use /? for help.\n");
                return EXIT_INVALID_SYNTAX;
            }
            toLower( argv[i] );
            char* name = swName(&argv[i][1]);
            if (name == (char*)-1) {
                printf("Ambiguous switch %s. Use /? for help.\n", argv[i]);
                return EXIT_AMBIGUOUS_SWITCH;
            } else if (name == NULL ) {
                printf("Invalid switch %s. Use /? for help.\n", argv[i]);
                return EXIT_INVALID_SYNTAX;
            } else {
                int next = i+1;
                if (!strcmp(name, "?")) {
                    puts("Dump v1.03. Prints binary data in byte form to the console or an output file.\n"
                         "\n"
                         "DUMP filename [/skip n] [/keep n] [/type c] [/address c] [/words] [/condense]\n"
                         "              [/compress] [/noascii] [/replace] [/width n] [/divider c]\n"
                         "              [/head] [/tail] [/lower]\n"
                         "\n"
                         "  filename    Specifies the file to be dumped. If no file is specified then\n"
                         "              piped data is read from stdin.\n"
                         "  /skip n     Offset or address where dumping begins. Use a negative value to\n"
                         "              set an offset from the end of the file.\n"
                         "  /keep n     Number of bytes to be dumped.\n"
                         "  /type c     Format for displayed bytes (default hex). Code may be:\n"
                         "              h - hex, o - octal, d - decimal, u - unsigned decimal, c - char.\n"
                         "              When displaying in octal or decimal formats it will be necessary\n"
                         "              to increase the console width buffer or reduce the number of\n"
                         "              bytes per line with /width, to avoid line-wrap.\n"
                         "  /address c  Format for offset address marker (default hex). Code may be:\n"
                         "              h - hex, o - octal, u - unsigned decimal, n - no address.\n"
                         "  /words      Display byte data grouped in 2-byte words.\n"
                         "  /condense   Display bytes contiguously without spaces or divider.\n"
                         "  /compress   Display duplicate lines as an asterisk.\n"
                         "  /noascii    ASCII column will not be displayed.\n"
                         "  /replace c  Sets the replacement for non printable characters in the ASCII\n"
                         "              column, or the bytes area when using /type c. Use sp for space.\n"
                         "  /width n    Number of bytes to be displayed per line (default 16, max 128).\n"
                         "  /divider c  Specifies a divider character to be displayed in the middle of\n"
                         "              the line of bytes, when the number of bytes or 2-byte words on\n"
                         "              the line is even. Code may be: s - double space, - - hyphen.\n"
                         "  /head       Display first 64 bytes of the file only. Equivalent to /keep 64.\n"
                         "  /tail       Display last 64 bytes of the file only. Equivalent to /skip -64.\n"
                         "  /lower      Display alphabetic hex symbols in lowercase.\n"
                         "\n"
                         "Bytes are dumped to the console. Use redirection to output to a file:\n"
                         "dump filename > output.txt\n"
                         "\n"
                         "When no filename is specified, piped data from stdin will be dumped instead:\n"
                         "echo hello world! | dump\n"
                         "type main.c | dump\n"
                         "\n"
                         "Numeric operands may be prefixed with 0x or 0X for a hex value or 0 for octal,\n"
                         "and appended with kb/mb/gb for kilobytes/megabytes/gigabytes respectively.\n"
                         "Syntax is case insensitive. Note that switches may be typed in shorthand:\n"
                         "/r instead of /replace. Where the typed switch is ambiguous, a warning will be\n"
                         "displayed and further characters will need to be typed to disambiguate.\n"
                         "You may precede the switch name with either / or -.\n");
                    return EXIT_SUCCESS;
                } else if (!strcmp(name, "skip")) {
                    if (next >= argc) {
                        printf("Operand of %s must be a value greater than 0. Use /? for help.\n", argv[i]);
                        return EXIT_INVALID_SYNTAX;
                    } else {
                        sw_skip=numOp(argv[next]);
                        i++;
                    }
                } else if (!strcmp(name, "keep")) {
                    if (next >= argc || (sw_keep=numOp(argv[next])) <= 0) {
                        printf("Operand of %s must be a value greater than 0. Use /? for help.\n", argv[i]);
                        return EXIT_INVALID_SYNTAX;
                    } else
                        i++;
                } else if (!strcmp(name, "type")) {
                    if (next >= argc || (sw_type=tolower(argv[next][0])) != 'h' && sw_type != 'o' && sw_type != 'd' && sw_type != 'u' && sw_type != 'c') {
                        printf("Operand of %s must be a character in the range [hoduc]. Use /? for help.\n", argv[i]);
                        return EXIT_INVALID_SYNTAX;
                    } else
                        i++;
                } else if (!strcmp(name, "address")) {
                    if (next >= argc || (sw_address=tolower(argv[next][0])) != 'h' && sw_address != 'o' && sw_address != 'u' && sw_address != 'n') {
                        printf("Operand of %s must be a character in the range [houn]. Use /? for help.\n", argv[i]);
                        return EXIT_INVALID_SYNTAX;
                    } else
                        i++;
                } else if (!strcmp(name, "words"))
                    sw_words = true;
                else if (!strcmp(name, "condense"))
                    sw_condense = true;
                else if (!strcmp(name, "compress"))
                    sw_compress = true;
                else if (!strcmp(name, "noascii"))
                    sw_noascii = true;
                else if (!strcmp(name, "replace")) {
                    if (next >= argc) {
                        printf("Operand of %s must be a character or sp. Use /? for help.\n", argv[i]);
                        return EXIT_INVALID_SYNTAX;
                    } else {
                        if (strlen(argv[next]) == 2) {
                            toLower(argv[next]);
                            if (!strcmp(argv[next], "sp"))
                                sw_replace = ' ';
                        } else
                            sw_replace = argv[next][0];
                        i++;
                    }
                }
                else if (!strcmp(name, "divider")) {
                    if (next >= argc || (sw_divider=tolower(argv[next][0])) != '-' && sw_divider != 's') {
                        printf("Operand of %s must be a character in the range [s-]. Use /? for help.\n", argv[i]);
                        return EXIT_INVALID_SYNTAX;
                    } else
                        i++;
                } else if (!strcmp(name, "width")) {
                    if (next >= argc || (sw_width=numOp(argv[next])) <= 0 || sw_width > 128) {
                        printf("Operand of %s must be a value between 1 and 128. Use /? for help.\n", argv[i]);
                        return EXIT_INVALID_SYNTAX;
                    } else
                        i++;
                } else if (!strcmp(name, "head")) {
                    sw_keep = 64;
                    sw_skip = 0;
                } else if (!strcmp(name, "tail")) {
                    sw_keep = 0;
                    sw_skip = -64;
                } else if (!strcmp(name, "lower"))
                    sw_lower = true;
            }
        } else {
            if (fileName != NULL) {
                puts("Too many filenames specified. Use /? for help.\n");
                return EXIT_TOO_MANY_FILENAMES;
            } else
                fileName = argv[i];
        }
    }

    /*
     * If no filename specified read piped data from stdin
     */
    if (fileName == NULL) {
        if (_isatty(_fileno(stdin))) {
            puts("Nothing to dump.");
            return EXIT_NOTHING_TO_DUMP;
        }
        /*
         * Save stdin to a temporary file, allowing
         * seek and pre-calculation of data length.
         */
        FILE *out;
        if ((out = fopen(TEMP_FILE, "wb")) == NULL) {
            printf("Could not open file %s\n", TEMP_FILE);
            return EXIT_FILE_OPEN_FAILURE;
        }
        int c;
        while ((c = fgetc(stdin)) != EOF)
            fputc(c, out);
        fclose(out);
        fclose(stdin);
        fileName = TEMP_FILE;
    }

    FILE *fp;
    if ((fp = fopen(fileName, "rb")) == NULL) {
        printf("Could not open file %s.\n", fileName);
        return EXIT_FILE_OPEN_FAILURE;
    }

    /*
     * Get length of file to be dumped. Used for bounds checking with /skip and
     * to enable calculation of padding length for address marker. Length is held
     * in a 64-bit integer, allowing seek through files larger than 4 gigabytes
     * on platforms where a long int would only be 32 bits wide. Maximum file
     * size is 8589934591 gigabytes. Probably enough.
     */
    int64_t len = 0;
    _fseek64(fp, 0, SEEK_END);
    if ((len = _ftell64(fp)) == 0) {
        printf("File %s is empty.\n", fileName);
        return EXIT_EMPTY_FILE;
    }

    /* Seek to right location */
    if ((sw_skip<0?-sw_skip:sw_skip+1) > len) {
        printf("Address %lld is outside the bounds of the %s.\n", sw_skip, fileName==TEMP_FILE?"data":"file");
        return EXIT_OUTSIDE_BOUNDS;
    }
    int64_t offset = sw_skip<0 ? len+sw_skip : sw_skip;
    _fseek64(fp, sw_skip, sw_skip<0 ? SEEK_END : SEEK_SET );

    int ch;
    char line[1024] = "";
    char address[32] = "";
    char asciiCh[2] = "";
    char ascii[129] = "";
    char bytes[128] = "";
    char lastBytes[128] = "";
    char *p = line;
    int lineLen = 0;
    int64_t count = 0;
    bool groupDiv = false;
    bool even = true;

    /*
     * Pre-generate address format string. Calculates how many characters are needed
     * to represent the largest address in the file and pads to that amount.
     */
    char addresst = sw_address=='h' ? sw_lower?'x':'X' : sw_address;
    char addressf[8];
    if (sw_address != 'n') {
        char sprf[3] = "% ";
        sprf[1] = addresst;
        sprintf(addressf, sprf, len);
        char addressw = strlen(addressf);
        sprintf( addressf, "%%0%d%c: ", addressw, addresst );
    }

    /* Pre-generate byte format string */
    char bytet = sw_type=='h' ? sw_lower?'x':'X' : sw_type;
    char bytef[5] = "%0  ";
    switch (sw_type) {
        case 'c': bytef[2] = '1'; break; // char needs no padding
        case 'h': bytef[2] = '2'; break; // 0 - FF, pad 2
        case 'u':                        // 0 - 255, pad 3
        case 'o': bytef[2] = '3'; break; // 0 - 377, pad 3
        case 'd': bytef[2] = '4'; break; // pad one more for minus sign
    }
    bytef[3] = bytet;

    /*
     * Loop until end of file, or the number of bytes
     * specified by the keep switch is reached.
     */
    for (int i = 0; (ch = fgetc(fp)) != EOF && (count<sw_keep||sw_keep==0); i++, count++) {

        /* Print address */
        if (i == 0 && sw_address != 'n') {
            if (sw_compress)
                sprintf(address, addressf, offset); // save for print on compressed line
            p += sprintf(p, addressf, offset);
        }

        /* Print byte */
        if (sw_type == 'd') {
            p += sprintf(p, bytef, (char)ch); // cast to char to get signed number
        } else
            p += sprintf(p, bytef, sw_type=='c'&&!isprint(ch) ? sw_replace : ch);
        bytes[i] = ch;

        /* Build ascii column */
        if (! sw_noascii) {
            sprintf(asciiCh, "%c", isprint(ch) ? ch : sw_replace);
            strcat(ascii, asciiCh);
        }

        /*
         * Print divider between bytes. This will be either a space separating each byte/2-byte word, or
         * an optional hyphen/double space divider to be shown down the middle. Optional divider will
         * only be shown if the number of bytes or 2-byte words on the line is even.
         */
        if (!sw_condense) {

            /* Check if line is of even length */
            even = !(sw_width&1);
            if (sw_words)
                even = even && !((sw_width/2)&1);

            /* If line is even and we're at middle of it */
            if (even && i+1==sw_width/2) {
                /* Print the optional divider */
                switch (sw_divider) {
                    case '-': p += sprintf(p, "-"); break;
                    case 's': p += sprintf(p, "  "); break;
                    case 'n': p += sprintf(p, " ");
                }
                groupDiv = false;
            } else {
                /* Print the regular separator, unless we're at the end of the line */
                if (i+1 != sw_width) {
                    if (sw_words) {
                        if (groupDiv)
                            p += sprintf(p, " ");
                    } else
                        p += sprintf(p, " ");
                }
                if (sw_words)
                    groupDiv = !groupDiv;
            }
        }

        /* Print whole line if we've reached the last byte */
        if (i+1 == sw_width) {
            /*
             * If compressing, print an asterix if this line equals
             * the previous one, otherwise just print the line.
             */
            if (sw_compress && !memcmp(lastBytes, bytes, 128) && lineLen) {
                printf("%s*\n", address);
            } else {
                if (! sw_noascii)
                    p += sprintf(p, "  %s", ascii);
                if (!lineLen)
                    lineLen = strlen(line); // save line length for pad on final line
                puts(line);
            }
            p = line;
            line[0] = '\0';
            ascii[0] = '\0';
            offset += sw_width;
            if (sw_compress) {
                memcpy(lastBytes, bytes, 128);
                memset(bytes, 0, 128);
            }
            i = -1;
        }
    }

    /* Print final line */
    if( strlen(line) > 0 ) {
        if (sw_compress && !memcmp(lastBytes, bytes, 128) && lineLen) {
            printf("%s*\n", address);
        } else {
            if (! sw_noascii) {
                if (count < sw_width ) {
                    if (sw_condense)
                        p += sprintf(p, " ");
                    sprintf(p, " %s", ascii);
                }
                else {
                    int gap = lineLen-strlen(line)-sw_width;
                    for (int i = 0; i < gap; i++)
                        p += sprintf(p, " ");
                    sprintf(p, "%s", ascii);
                }
            }
            puts(line);
        }
    }

    fclose(fp);
    if (fileName == TEMP_FILE)
        remove(TEMP_FILE);
    return EXIT_SUCCESS;

}
