/*
Copyright (c) 2000-2004, 2007 Tony Garnock-Jones <tonyg@kcbbs.gen.nz>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <assert.h>

#include "ubf_a.h"

static char const * const ubf_a_reserved_chars = "%\"'`~#&$>-0123456789{}, \n\r\t";

/* EOF is -1, obviously, and: */
#define CHARNOTAVAILABLE -2

typedef enum UBF_A_MachineState_ {
  UBF_A_Machine_Mainloop = 0,
  UBF_A_Machine_CollectingQuoted,
  UBF_A_Machine_CollectingBinary,
  UBF_A_Machine_CollectingInteger,
  UBF_A_Machine_Binding
} UBF_A_MachineState;

typedef void (*string_continuation_t)(UBF_A_DecoderState, char *, size_t, void *);

struct UBF_A_DecoderState_ {
  UBF_A_Pool statepool;

  UBF_A_Pool *mainpool;

  UBF_A_Object *stackcache;
  UBF_A_Object *stacktop;

  size_t stringbufferlength;
  char *stringbuffer;

  UBF_A_CharSource source;
  void *source_arg;

  void (*dispatch[256])(struct UBF_A_DecoderState_ *, int, void *);
  void *dispatch_arg[256];

  int machine_finished;
  UBF_A_MachineState machine_state;
  UBF_A_Error machine_status;

  struct {
    int sentinel;
    int in_escape;
    size_t length;
    string_continuation_t continuation;
    void *continuation_arg;
  } quoted;

  struct {
    size_t length;
    UBF_A_Object *bin;
    size_t index;
  } binary;

  struct {
    signed long long sign;
    signed long long acc;
  } num;
};

static void push(UBF_A_DecoderState p, UBF_A_Object *x) {
  UBF_A_Object *cell;

  if (p->stackcache == NULL) {
    cell = ubf_a_pair(&p->statepool, NULL, p->stacktop);
  } else {
    cell = p->stackcache;
    p->stackcache = cell->body.pair.tail;
    cell->body.pair.tail = p->stacktop;
  }

  cell->body.pair.head = x;
  p->stacktop = cell;
}

static int isempty(UBF_A_DecoderState p) {
  return (p->stacktop == NULL);
}

static UBF_A_Object *peek(UBF_A_DecoderState p) {
  assert(p->stacktop != NULL);
  return p->stacktop->body.pair.head;
}

static UBF_A_Object *pop(UBF_A_DecoderState p) {
  UBF_A_Object *result, *cell;

  assert(p->stacktop != NULL);

  cell = p->stacktop;
  p->stacktop = cell->body.pair.tail;

  result = cell->body.pair.head;
  cell->body.pair.head = NULL;

  cell->body.pair.tail = p->stackcache;
  p->stackcache = cell;

  return result;
}

static void finish_with(UBF_A_DecoderState p, UBF_A_Error err) {
  p->machine_finished = 1;
  p->machine_status = err;
}

static void startCollectingQuoted(UBF_A_DecoderState p,
				  int sentinel,
				  string_continuation_t continuation,
				  void *continuation_arg)
{
  p->quoted.sentinel = sentinel;
  p->quoted.in_escape = 0;
  p->quoted.length = 0;
  p->quoted.continuation = continuation;
  p->quoted.continuation_arg = continuation_arg;
  p->machine_state = UBF_A_Machine_CollectingQuoted;
}

static void commentContinuation(UBF_A_DecoderState p, char *str, size_t len, void *arg) {
  /* Ignore the comment text */
}

static void handleComment(UBF_A_DecoderState p, int c, void *arg) {
  startCollectingQuoted(p, '%', commentContinuation, NULL);
}

static void stringContinuation(UBF_A_DecoderState p, char *str, size_t len, void *arg) {
  push(p, ubf_a_string(p->mainpool, str));
}

static void handleString(UBF_A_DecoderState p, int c, void *arg) {
  startCollectingQuoted(p, '"', stringContinuation, NULL);
}

static void symbolContinuation(UBF_A_DecoderState p, char *str, size_t len, void *arg) {
  push(p, ubf_a_symbol(p->mainpool, str));
}

static void handleSymbol(UBF_A_DecoderState p, int c, void *arg) {
  startCollectingQuoted(p, '\'', symbolContinuation, NULL);
}

static void semanticTagContinuation(UBF_A_DecoderState p, char *str, size_t len, void *arg) {
  if (isempty(p)) {
    finish_with(p, UBF_A_OrphanSemanticTag);
    return;
  }

  push(p, ubf_a_tag(p->mainpool, str, pop(p)));
}

static void handleSemanticTag(UBF_A_DecoderState p, int c, void *arg) {
  startCollectingQuoted(p, '`', semanticTagContinuation, NULL);
}

static void handleBinary(UBF_A_DecoderState p, int c, void *arg) {
  if (isempty(p) || peek(p)->kind != UBF_A_NUMBER) {
    finish_with(p, UBF_A_MissingBinaryLength);
    return;
  }

  p->binary.length = (size_t) pop(p)->body.num;
  p->binary.bin = ubf_a_binary_nocopy(p->mainpool, p->binary.length);
  p->binary.index = 0;
  p->machine_state = UBF_A_Machine_CollectingBinary;
}

static void handleNull(UBF_A_DecoderState p, int c, void *arg) {
  push(p, NULL);
}

static void handleCons(UBF_A_DecoderState p, int c, void *arg) {
  UBF_A_Object *head, *tail;

  if (isempty(p)) {
    finish_with(p, UBF_A_StackUnderflow);
    return;
  }
  head = pop(p);
  if (isempty(p)) {
    finish_with(p, UBF_A_StackUnderflow);
    return;
  }
  tail = pop(p);

  push(p, ubf_a_pair(p->mainpool, head, tail));
}

static void handleEom(UBF_A_DecoderState p, int c, void *arg) {
  if (isempty(p)) {
    finish_with(p, UBF_A_StackUnderflow);
    return;
  }

  p->mainpool->root = pop(p);

  if (!isempty(p)) {
    finish_with(p, UBF_A_StackOverflow);
    return;
  }

  p->machine_finished = 1;
}

static void handleBoundReference(UBF_A_DecoderState p, int c, void *arg) {
  push(p, (UBF_A_Object *) arg);
}

static void handleBind(UBF_A_DecoderState p, int c, void *arg) {
  p->machine_state = UBF_A_Machine_Binding;
}

static void handleInt(UBF_A_DecoderState p, int c, void *arg) {
  p->num.sign = (c == '-' ? -1 : 1);
  p->num.acc = (c == '-' ? 0 : (c - '0'));
  p->machine_state = UBF_A_Machine_CollectingInteger;
}

static void handleOpenStruct(UBF_A_DecoderState p, int c, void *arg) {
  UBF_A_Object *marker = ubf_a_pool_alloc(&p->statepool, sizeof(UBF_A_Object));
  marker->kind = UBF_A_UNKNOWN;
  push(p, marker);
}

static void handleCloseStruct(UBF_A_DecoderState p, int c, void *arg) {
  size_t length = 0;
  int i;
  UBF_A_Object *x = p->stacktop;

  while (x != NULL && x->body.pair.head->kind != UBF_A_UNKNOWN) {
    length++;
    x = x->body.pair.tail;
  }

  if (x == NULL) {
    finish_with(p, UBF_A_MissingOpenStruct);
    return;
  }

  x = ubf_a_vector(p->mainpool, length);
  for (i = (int) length - 1; i >= 0; i--) {
    x->body.vector.vec[i] = pop(p);
  }

  if (pop(p)->kind != UBF_A_UNKNOWN) {
    finish_with(p, UBF_A_MissingOpenStruct);
    return;
  }

  push(p, x);
}

static void ignore(UBF_A_DecoderState p, int c, void *arg) {
}

UBF_A_Error ubf_a_start_decode(UBF_A_DecoderState *stateptr,
			       UBF_A_Pool *mainpool,
			       UBF_A_CharSource source, void *source_arg)
{
  UBF_A_DecoderState state = malloc(sizeof(struct UBF_A_DecoderState_));

  assert(state != NULL);
  assert(mainpool != NULL);
  assert(source != NULL);
  /* don't care about source_arg */

  assert(EOF != CHARNOTAVAILABLE);

  *stateptr = state;

  init_ubf_a_pool(&state->statepool, mainpool->pagesize);
  state->mainpool = mainpool;
  state->stackcache = NULL;
  state->stacktop = NULL;
  state->stringbufferlength = 1024;
  state->stringbuffer = malloc(state->stringbufferlength);
  state->source = source;
  state->source_arg = source_arg;
  {
    int i;
    for (i = 0; i < 256; i++) {
      state->dispatch[i] = NULL;
      state->dispatch_arg[i] = NULL;
    }
  }
  state->machine_finished = 0;
  state->machine_state = UBF_A_Machine_Mainloop;
  state->machine_status = UBF_A_NoError;
  state->quoted.sentinel = 0;
  state->quoted.in_escape = 0;
  state->quoted.length = 0;
  state->quoted.continuation = NULL;
  state->quoted.continuation_arg = NULL;
  state->binary.length = 0;
  state->binary.bin = NULL;
  state->binary.index = 0;
  state->num.sign = 0;
  state->num.acc = 0;

  state->dispatch['%'] = handleComment;
  state->dispatch['"'] = handleString;
  state->dispatch['\''] = handleSymbol;
  state->dispatch['`'] = handleSemanticTag;
  state->dispatch['~'] = handleBinary;
  state->dispatch['#'] = handleNull;
  state->dispatch['&'] = handleCons;
  state->dispatch['$'] = handleEom;
  state->dispatch['>'] = handleBind;
  state->dispatch['-'] = handleInt;
  {
    int i;
    for (i = '0'; i <= '9'; i++) state->dispatch[i] = handleInt;
  }
  state->dispatch['{'] = handleOpenStruct;
  state->dispatch['}'] = handleCloseStruct;
  state->dispatch[' '] = ignore;
  state->dispatch['\n'] = ignore;
  state->dispatch['\r'] = ignore;
  state->dispatch['\t'] = ignore;
  state->dispatch[','] = ignore;

  return state->machine_status;
}

UBF_A_Error ubf_a_update_decode(UBF_A_DecoderState state, int *finished) {
  int c = EOF;

 next_iteration:
  if (state->machine_finished)
    goto leave_loop;

  c = state->source(state->source_arg);

  if (c == EOF) {
    finish_with(state, UBF_A_EarlyEOF);
    goto leave_loop;
  }

  if (c == CHARNOTAVAILABLE)
    goto leave_loop;

  if (c < 0 || c >= 256) {
    finish_with(state, UBF_A_UnhandledCharacter);
    goto leave_loop;
  }

 next_iteration_without_read:
  switch (state->machine_state) {
    case UBF_A_Machine_Mainloop:
      if (state->dispatch[c] == NULL) {
	finish_with(state, UBF_A_UnhandledCharacter);
	goto leave_loop;
      } else {
	state->dispatch[c](state, c, state->dispatch_arg[c]);
	goto next_iteration;
      }

    case UBF_A_Machine_CollectingQuoted:
      if (state->quoted.in_escape) {
	if (c != '\\' && c != state->quoted.sentinel) {
	  finish_with(state, UBF_A_UnsupportedQuotedCharacter);
	  goto leave_loop;
	}
	state->quoted.in_escape = 0;
      } else {
	if (c == state->quoted.sentinel) {
	  state->machine_state = UBF_A_Machine_Mainloop;
	  state->stringbuffer[state->quoted.length] = '\0';
	  state->quoted.continuation(state,
				     state->stringbuffer,
				     state->quoted.length,
				     state->quoted.continuation_arg);
	  goto next_iteration;
	}
	if (c == '\\') {
	  state->quoted.in_escape = 1;
	  goto next_iteration;
	}
      }

      while (state->quoted.length >= state->stringbufferlength) {
	state->stringbufferlength *= 2;
	state->stringbuffer = realloc(state->stringbuffer, state->stringbufferlength);
      }

      state->stringbuffer[state->quoted.length++] = c;
      goto next_iteration;

    case UBF_A_Machine_CollectingBinary:
      if (state->binary.index < state->binary.length) {
	state->binary.bin->body.data.vec[state->binary.index++] = c;
      } else {
	if (c != '~') {
	  finish_with(state, UBF_A_MissingBinaryTilde);
	  goto leave_loop;
	}
	push(state, state->binary.bin);
	state->machine_state = UBF_A_Machine_Mainloop;
      }
      goto next_iteration;

    case UBF_A_Machine_CollectingInteger:
      if (c >= '0' && c <= '9') {
	state->num.acc = state->num.acc * 10 + (c - '0');
	goto next_iteration;
      } else {
	push(state, ubf_a_number(state->mainpool, state->num.sign * state->num.acc));
	state->machine_state = UBF_A_Machine_Mainloop;
	goto next_iteration_without_read;
      }

    case UBF_A_Machine_Binding:
      if (strchr(ubf_a_reserved_chars, c) != NULL) {
	finish_with(state, UBF_A_ReservedCharacter);
	goto leave_loop;
      }

      if (isempty(state)) {
	finish_with(state, UBF_A_StackUnderflow);
	goto leave_loop;
      }

      state->dispatch_arg[c] = pop(state);
      state->dispatch[c] = handleBoundReference;
      state->machine_state = UBF_A_Machine_Mainloop;
      goto next_iteration;
  }

 leave_loop:
  *finished = state->machine_finished;
  return state->machine_status;
}

void ubf_a_finish_decode(UBF_A_DecoderState state) {
  empty_ubf_a_pool(&state->statepool);
  free(state->stringbuffer);
  free(state);
}

UBF_A_Error ubf_a_decode(UBF_A_Pool *mainpool, UBF_A_CharSource source, void *source_arg) {
  UBF_A_DecoderState state;
  UBF_A_Error status = UBF_A_NoError;
  int finished = 0;

  status = ubf_a_start_decode(&state, mainpool, source, source_arg);
  if (status) {
    ubf_a_finish_decode(state);
    return status;
  }

  status = ubf_a_update_decode(state, &finished);
  if (status) {
    ubf_a_finish_decode(state);
    return status;
  }

  if (!finished) {
    /* Essentially, EAGAIN. In here, there's nothing we can do, since
       we don't know anything about how to fill the buffers our source
       is using. */
    ubf_a_finish_decode(state);
    return UBF_A_EarlyEOF;
  }

  ubf_a_finish_decode(state);
  return UBF_A_NoError;
}

