/*
 * Copyright 2018 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can
 * be found in the LICENSE file.
 *
 */

//
//
//

#include <assert.h>
#include <memory.h>

#include "runtime_cl_12.h"
#include "scheduler.h"

//
//
//

#ifndef NDEBUG

#include <stdio.h>

#define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss)         \
  fprintf(stderr,                                                       \
          "suballocator %s : [ %4u ] : alloc( %9u ) @ %4u = %u\n",      \
          suballocator->name,                                           \
          suballocator->rem.avail,                                      \
          (skc_uint)ss,                                                 \
          subbuf_id,                                                    \
          (skc_uint)suballocator->total);

#define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss)          \
  fprintf(stderr,                                                       \
          "suballocator %s : [ %4u ] : free ( %9u ) @ %4u = %u\n",      \
          suballocator->name,                                           \
          suballocator->rem.avail,                                      \
          (skc_uint)ss,                                                 \
          subbuf_id,                                                    \
          (skc_uint)suballocator->total);

#else

#define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss)
#define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss)

#endif

//
//
//

void
skc_suballocator_create(struct skc_runtime      * const runtime,
                        struct skc_suballocator * const suballocator,
                        char              const * const name,
                        skc_uint                  const subbufs,
                        size_t                    const align,
                        size_t                    const size)
{
  size_t const subbufs_size = sizeof(*suballocator->subbufs) * subbufs;

  // allocate array of subbuf records
  suballocator->subbufs = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,subbufs_size);

  // zero subbufs
  memset(suballocator->subbufs,0,subbufs_size);

  // initialize starting subbuf
  suballocator->subbufs[0].size = (skc_subbuf_size_t)size;

  // allocate array of ids
  suballocator->ids = skc_runtime_host_perm_alloc(runtime,
                                                  SKC_MEM_FLAGS_READ_WRITE,
                                                  sizeof(*suballocator->ids) * subbufs);
  for (skc_uint ii=0; ii<subbufs; ii++)
    suballocator->ids[ii] = ii;

  suballocator->rem.avail = 1;
  suballocator->rem.spare = subbufs - 1;

  suballocator->align     = (skc_uint)align;
  suballocator->count     = subbufs;

  suballocator->size      = (skc_subbuf_size_t)size;
  suballocator->total     = 0;

  suballocator->name      = name;
}

void
skc_suballocator_dispose(struct skc_runtime      * const runtime,
                         struct skc_suballocator * const suballocator)
{
  skc_runtime_host_perm_free(runtime,suballocator->ids);
  skc_runtime_host_perm_free(runtime,suballocator->subbufs);
}


//
// Sets id and returns origin
//

size_t
skc_suballocator_subbuf_alloc(struct skc_suballocator * const suballocator,
                              struct skc_scheduler    * const scheduler,
                              size_t                    const size,
                              skc_subbuf_id_t         * const subbuf_id,
                              size_t                  * const subbuf_size)
{
  //
  // Note that we can't deadlock here because everything allocated is
  // expected to be freed within msecs.  Worst case, we wait for a
  // availability of resources while a fully utilized GPU is making
  // forward progress on kernels.
  //
  // This behavior should guide the sizing of the suballocator's
  // number of subbuffers and extent.
  //
  // We want to allocate a large enough extent and enough subbuffer
  // records so that the CPU/GPU is never starved.
  //

  // round up the size
  skc_subbuf_size_t const size_ru = (skc_subbuf_size_t)SKC_ROUND_UP_POW2(size,suballocator->align);

  // save it
  if (subbuf_size != NULL)
    *subbuf_size = size_ru;

  //
  // We precheck to see there is at least one region of memory
  // available but do not check to see if there is a spare. Instead,
  // we simply keep looking for an exact fit.
  //
  skc_subbuf_id_t * const ids = suballocator->ids;

  while (true)
    {
      skc_uint avail_rem = suballocator->rem.avail;
      skc_uint spare_rem = suballocator->rem.spare;

      for (skc_uint avail_idx=0; avail_idx<avail_rem; avail_idx++)
        {
          skc_subbuf_id_t     const avail_id = ids[avail_idx];
          struct skc_subbuf * const avail    = suballocator->subbufs + avail_id;

          assert(avail->inuse == 0);

          if (avail->size == size_ru) // size matches exactly
            {
              suballocator->total += size_ru;

              // return this id
              *subbuf_id = avail_id;

              SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,avail_id,size_ru);

              // mark the subbuffer as in use
              avail->inuse += 1;

              assert(avail->inuse == 1);

              // update rem avail count
              suballocator->rem.avail = --avail_rem;

              // replace now inuse id with last avail id
              if ((avail_rem > 0) && (avail_idx != avail_rem))
                {
                  skc_subbuf_id_t     const last_id = ids[avail_rem];
                  struct skc_subbuf * const last    = suballocator->subbufs + last_id;

                  ids[avail_idx] = last_id;   // move id
                  last->idx      = avail_idx; // update idx[]
                }

              assert(suballocator->rem.avail > 0);
              
              // return origin
              return avail->origin;
            }
          else if ((avail->size > size_ru) && (spare_rem > 0)) // requested is less than available so split it
            {
              suballocator->total += size_ru;

              skc_uint                  spare_idx = suballocator->count - spare_rem;
              skc_subbuf_id_t     const spare_id  = ids[spare_idx];
              struct skc_subbuf * const spare     = suballocator->subbufs + spare_id;
              
              assert(spare->inuse == 0);

              // simple -- we're popping the top-of-stack of spares
              suballocator->rem.spare -= 1;

              // return id
              *subbuf_id = spare_id;

              SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,spare_id,size_ru);
              
              // get prev
              struct skc_subbuf * const prev = avail->prev;

              if (prev != NULL)
                prev->next = spare;

              // init spare
              spare->prev    = prev;
              spare->next    = avail;
              spare->size    = size_ru;
              spare->origin  = avail->origin;
              spare->idx     = SKC_UINT_MAX; // defensive
              spare->inuse  += 1;

              // update curr
              avail->prev    = spare;
              avail->size   -= size_ru;
              avail->origin += size_ru;

              assert(suballocator->rem.avail > 0);

              return spare->origin;
            }
        }

      // uh oh... couldn't find enough memory
      skc_scheduler_wait(scheduler);
    }
}

//
// FIXME -- simplify this with a merge-with-prev() primitive
//

void
skc_suballocator_subbuf_free(struct skc_suballocator * const suballocator,
                             skc_subbuf_id_t                 subbuf_id)
{
  // get subbuf for id
  struct skc_subbuf * const subbuf = suballocator->subbufs + subbuf_id;

  assert(subbuf->inuse == 1);

  suballocator->total -= subbuf->size;

  SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,subbuf->size);

  //
  // try to merge subbuf with left and maybe right and then dispose
  //
  struct skc_subbuf * prev;
  struct skc_subbuf * next;

  if (((prev = subbuf->prev) != NULL) && !prev->inuse)
    {
      next = subbuf->next;

      if ((next != NULL) && !next->inuse)
        {
          subbuf->inuse -= 1;

          assert(next->inuse == 0);

          // increment size
          prev->size += (subbuf->size + next->size);

          struct skc_subbuf * const nextnext = next->next;

          // update next link
          prev->next = nextnext;

          // update prev link
          if (nextnext != NULL)
            nextnext->prev = prev;

          //
          // both subbuf and next are now spare which means we need to
          // move final available subbuffer into next's old position
          // unless they're the same
          //
          skc_uint const last_idx = --suballocator->rem.avail;
          skc_uint const next_idx = next->idx;

          assert(suballocator->rem.avail > 0);

          if (last_idx != next_idx)
            {
              skc_subbuf_id_t     const last_id = suballocator->ids[last_idx];
              struct skc_subbuf * const last    = suballocator->subbufs + last_id;

              suballocator->ids[next_idx]       = last_id;
              last->idx                         = next_idx;
            }

          skc_subbuf_id_t  const next_id   = (skc_subbuf_id_t)(next - suballocator->subbufs);

          skc_uint         const spare_rem = suballocator->rem.spare + 2;
          skc_uint         const spare_idx = suballocator->count - spare_rem;

          suballocator->rem.spare          = spare_rem;
          suballocator->ids[spare_idx + 0] = subbuf_id;
          suballocator->ids[spare_idx + 1] = next_id;
        }
      else
        {
          prev->size += subbuf->size;
          prev->next  = next;

          if (next != NULL)
            next->prev = prev;

          subbuf->inuse -= 1;

          assert(subbuf->inuse == 0);
          assert(suballocator->rem.avail > 0);

          suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
        }
    }
  //
  // try to merge with right
  //
  else if (((next = subbuf->next) != NULL) && !next->inuse)
    {
      subbuf->inuse -= 1;

      assert(subbuf->inuse == 0);
      assert(suballocator->rem.avail > 0);

      next->prev     = prev;
      next->origin   = subbuf->origin;
      next->size    += subbuf->size;

      if (prev != NULL)
        prev->next = next;

      // subbuf is now spare
      suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
    }
  else // couldn't merge with a neighbor
    {
      skc_uint avail_idx = suballocator->rem.avail++;

      // subbuf is now available
      subbuf->idx    = avail_idx;
      subbuf->inuse -= 1;

      assert(subbuf->inuse == 0);
      assert(suballocator->rem.avail > 0);

      suballocator->ids[avail_idx] = subbuf_id;
    }
}

//
//
//

#if 0

//
// At some point there might be a reason to sort the available
// subbuffers into some useful order -- presumably to binary search
// for the closest match or to chip away at the largest available
// subbuffer
//

static
void
skc_suballocator_optimize(struct skc_suballocator * const suballocator)
{
  ;
}

#endif

//
//
//