/*
 * hdf52hdf5.cpp
 *
 *  Created on: Jan 31, 2023
 *      Author: felix
 */

#include <H5Cpp.h>

#include <cstring>
#include <iostream>
#include <fstream>
#include <vector>
#include <algorithm>
#include <cassert>
#include <memory>
#include <sys/stat.h>

// should get this from file, not store it here. Use an include
const std::vector<std::string> CELL_TYPES = { "granule_cell", "glomerulus", "golgi_cell", "purkinje_cell",
                                              "stellate_cell", "basket_cell", "mossy_fibers" };
void printUsage(const char *filename)
{
  const std::string name = (filename ? std::string(filename) : "TXTTOCSV");

  std::cout << name << " - Generates HDF5 with filtered ids." << '\n';
  std::cout << "Usage: " << name << " <hdf5_source_file> <hdf5_target_file>" << std::endl;
  std::exit(-1);
}

// #include "toFilter.h"
// modify to filter ids, maybe use another input for this? If empty all ids goes in target.
std::vector<uint32_t> toFilter;

const char CELLS_TAG[]="cells/positions";
const char MAPS_TAG[]="cells/type_maps";
const char CONNECTIONS_TAG[]="cells/connections";
const char MORPHOLOGIES_TAG[] ="morphologies";

// to store cell ids, type and coordinates. Has to be double to write to HDF5 dataset.
struct Coords
{
    double id;
    double type;
    double x;
    double y;
    double z;

    Coords():id{0}, type{0}, x{0}, y{0}, z{0} {};
    Coords(double i, double t, double _x, double _y, double _z): id{i}, type{t}, x{_x}, y{_y}, z{_z} {};
};

auto checkError = [](const int value) { if(value < 0) { std::cerr << "ERROR: " << __FILE__ << ":" << __LINE__ << std::endl; exit(1); } };

void H5WritePositions(hid_t to, const std::vector<Coords> &coords)
{
  if(coords.empty()) return;

  hsize_t dims[2]{coords.size(), 5};

  // Create a dataset
  auto group_id = H5Gopen2(to, "/cells", H5P_DEFAULT);
  auto dspace_id = H5Screate_simple(2, dims, NULL);
  auto dset_id = H5Dcreate2(group_id, "positions", H5T_NATIVE_DOUBLE, dspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);

  // Write the data
  auto status = H5Dwrite(dset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, coords.data());
  checkError(status);

  // Close dataset after writing
  status = H5Dclose(dset_id);
  checkError(status);

  status = H5Gclose(group_id);
  checkError(status);
}

void H5CopyAttribute(hid_t from, hid_t to, const char* propertyName)
{
  hid_t attr = H5Aopen(from, propertyName, H5P_DEFAULT);
  auto aSpace = H5Aget_space(attr);

  H5A_info_t aInfo;
  auto status = H5Aget_info(attr, &aInfo);
  checkError(status);
  auto aType = H5Aget_type(attr);

  unsigned char *buffer = new unsigned char[aInfo.data_size];
  memset(buffer, 0, aInfo.data_size);
  status = H5Aread(attr, aType, buffer);
  checkError(status);

  auto newAttr = H5Acreate2(to, propertyName, aType, aSpace,H5P_DEFAULT, H5P_DEFAULT);
  checkError(newAttr);
  status = H5Awrite(newAttr, aType, buffer);
  checkError(status);

  H5free_memory(buffer);
  H5Sclose(aSpace);
  H5Tclose(aType);
  H5Aclose(attr);
  H5Aclose(newAttr);
}

void H5copy(hid_t from, const char *from_tag, hid_t to, const char *to_tag)
{
  auto status = H5Ocopy(from, from_tag, to, to_tag, H5P_DEFAULT, H5P_DEFAULT);
  checkError(status);
}

void H5writeMap(hid_t to, std::string name, std::vector<uint32_t> &gids)
{
  hsize_t dims[1]{gids.size()};

  auto group_id = H5Gopen2(to, "/cells/type_maps", H5P_DEFAULT);

  // Create a dataset
  auto dspace_id = H5Screate_simple(1, dims, NULL);
  auto dset_id = H5Dcreate2( group_id, name.c_str(), H5T_NATIVE_INT, dspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);

  // Write the data
  auto status = H5Dwrite(dset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, gids.data());
  checkError(status);

  // Close dataset after writing
  status = H5Dclose(dset_id);
  checkError(status);

  status = H5Gclose(group_id);
  checkError(status);
}

void H5writeConnections(hid_t to, std::string name, std::vector<std::pair<uint32_t, uint32_t>> connections)
{
  std::vector<std::pair<double, double>> others;
  for(size_t i = 0; i < connections.size(); ++i)
  {
    auto value = connections.at(i);

    if(!toFilter.empty())
    {
      auto it1 = std::find(toFilter.cbegin(), toFilter.cend(), value.first);
      auto it2 = std::find(toFilter.cbegin(), toFilter.cend(), value.second);
      if(toFilter.cend() == it1 || toFilter.cend() == it2)
      {
        continue;
      }
    }

    others.emplace_back(value.first, value.second);
  }

  if(others.empty()) return;

  hsize_t dims[2]{others.size(), 2};

  auto group_id = H5Gopen2(to, "/cells/connections", H5P_DEFAULT);
  // Create a dataset
  auto dspace_id = H5Screate_simple(2, dims, NULL);
  auto dset_id = H5Dcreate2( group_id, name.c_str(), H5T_NATIVE_DOUBLE, dspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);

  // Write the data
  auto status = H5Dwrite(dset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, others.data());
  checkError(status);

  // Close dataset after writing
  status = H5Dclose(dset_id);
  checkError(status);

  status = H5Gclose(group_id);
  checkError(status);
}

int main(int argc, char* argv[])
{
  if(argc < 3) printUsage(argv[0]);

  struct stat buf;
  if(stat(argv[1], &buf) != 0)
  {
    std::cerr << "Unable to find file: " << argv[1] << std::endl;
    exit(-1);
  }

  if(!S_ISREG(buf.st_mode))
  {
    std::cerr << "Parameter " << argv[1] << " is not a file." << std::endl;
    exit(-1);
  }

  if(stat(argv[2], &buf) == 0)
  {
    std::cerr << "Destination file: " << argv[2] << " already exists." << std::endl;
    exit(-1);
  }

  // Check whether file referenced by the path is an Hdf5 format file or not.
  if( !H5::H5File::isHdf5( argv[1] ))
  {
    std::cerr << "File " << argv[1] << " is not a HDF5 file!" << std::endl;
    return 0;
  }

  // open files
  auto sourceFile = H5::H5File( argv[1], H5F_ACC_RDONLY );
  auto targetFile = H5Fcreate( argv[2], H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);

  for(const auto &tag: { CELLS_TAG, CONNECTIONS_TAG, MORPHOLOGIES_TAG, MAPS_TAG })
  {
    if(!H5Lexists(sourceFile.getLocId(), tag, H5P_DEFAULT) > 0)
    {
      std::cerr << "File " << argv[1] << " has not the correct format, "
                << tag << " tag is missing or misplaced!" << std::endl;
      return -1;
    }
  }

  // copy morphologies
  H5copy(sourceFile.getLocId(), MORPHOLOGIES_TAG, targetFile, MORPHOLOGIES_TAG);

  // open cells and copy/filter
  H5Gcreate2(targetFile, "/cells", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
  auto targetCellId = H5Gopen2(targetFile, "/cells", H5P_DEFAULT);
  auto sourceCellId = H5Gopen2(sourceFile.getLocId(), "cells", H5P_DEFAULT);
  H5CopyAttribute(sourceCellId, targetCellId, "types");
  H5Gcreate2(targetCellId, "type_maps", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
  H5Gcreate2(targetCellId, "connections", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
  H5Gclose(targetCellId);

  auto dataSet = sourceFile.openDataSet(CELLS_TAG);

  hsize_t dims[2];
  dataSet.getSpace().getSimpleExtentDims( dims );
  assert(dataSet.getSpace().getSimpleExtentNdims() == 2);
  assert(dataSet.getTypeClass() == H5T_FLOAT);

  auto buffer = new float[dims[0]*dims[1]];

  // load positions
  dataSet.read( reinterpret_cast<void*>(buffer), H5::PredType::IEEE_F32LE );

  std::vector<Coords> subset;
  std::vector<uint32_t> types;
  for(size_t idx = 0; idx < dims[0]*dims[1]; )
  {
    const auto id = static_cast<uint32_t>(buffer[idx]);
    bool hasId = toFilter.empty();
    if(!hasId)
    {
      auto it = std::find(toFilter.cbegin(), toFilter.cend(), id);
      hasId = it != toFilter.cend();
    }

    if(hasId)
    {
      const auto type = static_cast<uint32_t>(buffer[idx+1]);
      idx += dims[1]-3;
      subset.emplace_back(id, type, buffer[idx+0],buffer[idx+1],buffer[idx+2]);
      idx+=3;
    }
  }

  delete [] buffer;
  buffer = nullptr;

  dataSet.close();

  auto sortCoords = [](Coords &lhs, Coords &rhs){ return lhs.id < rhs.id; };
  std::sort(subset.begin(), subset.end(), sortCoords);

  H5WritePositions(targetFile, subset);

  // group maps.
  if(H5Lexists(sourceFile.getLocId(), MAPS_TAG, H5P_DEFAULT) > 0)
  {
    auto group = sourceFile.openGroup(MAPS_TAG);
    const unsigned int objNum = group.getNumObjs( );

    for( unsigned int i = 0; i < objNum; i++ )
    {
      // Get object type.
      H5G_obj_t type = group.getObjTypeByIdx( i );

      // Get object name.
      const std::string name = group.getObjnameByIdx( i );

      // Check if type is a dataset.
      if( type != H5G_obj_t::H5G_DATASET )
        continue;

      // Open dataset.
      H5::DataSet innerDs = group.openDataSet( name );
      memset(dims, 0, 2*sizeof(hsize_t));
      innerDs.getSpace().getSimpleExtentDims( dims );
      assert(innerDs.getSpace().getSimpleExtentNdims() == 1);

      assert(innerDs.getTypeClass() == H5T_INTEGER);
      auto integerType = innerDs.getIntType();

      if(dims[0] == 0)
        continue;

      buffer = new float[dims[0]];

      innerDs.read( reinterpret_cast<void*>(buffer), H5::PredType::IEEE_F32LE );

      std::vector<uint32_t> gids;
      for(size_t idx = 0; idx < dims[0]; )
      {
        const auto id = static_cast<uint32_t>(buffer[idx]);
        bool hasId = toFilter.empty();
        if(!hasId)
          hasId = std::find(toFilter.cbegin(), toFilter.cend(), id) != toFilter.cend();

        if(hasId)
          gids.emplace_back(id);

        ++idx;
      }

      delete [] buffer;
      buffer = nullptr;
      const auto groupName = "group " + name;
      innerDs.close();

      H5writeMap(targetFile, name, gids);
    }

    group.close();
  }

  // groups by connections
  if(H5Lexists(sourceFile.getLocId(), CONNECTIONS_TAG, H5P_DEFAULT) > 0)
  {
    auto group = sourceFile.openGroup(CONNECTIONS_TAG);
    const unsigned int objNum = group.getNumObjs( );

    for( unsigned int i = 0; i < objNum; i++ )
    {
      // Get object type.
      H5G_obj_t type = group.getObjTypeByIdx( i );

      // Get object name.
      const std::string name = group.getObjnameByIdx( i );

      // Check if type is a dataset.
      if( type != H5G_obj_t::H5G_DATASET )
        continue;

      // Open dataset.
      H5::DataSet innerDs = group.openDataSet( name );
      memset(dims, 0, 2*sizeof(hsize_t));
      innerDs.getSpace().getSimpleExtentDims( dims );
      assert(innerDs.getSpace().getSimpleExtentNdims() == 2);

      assert(innerDs.getTypeClass() == H5T_FLOAT);

      if(dims[0] == 0 || dims[1] == 0)
        continue;

      buffer = new float[dims[0]*dims[1]];

      innerDs.read( reinterpret_cast<void*>(buffer), H5::PredType::IEEE_F32LE );

      std::vector<std::pair<uint32_t, uint32_t>> connections;

      std::vector<uint32_t> gids;
      for(size_t idx = 0; idx < dims[0]*dims[1]; )
      {
        auto sou = static_cast<uint32_t>(buffer[idx]);
        auto tar = static_cast<uint32_t>(buffer[idx+1]);
        bool hasIds = toFilter.empty();
        if(!hasIds)
        {
          auto it1 = std::find(toFilter.cbegin(), toFilter.cend(), sou);
          auto it2 = std::find(toFilter.cbegin(), toFilter.cend(), tar);

          hasIds = (it1 != toFilter.cend() && it2 != toFilter.cend());
        }

        if(hasIds)
          connections.emplace_back(sou, tar);

        idx += dims[1];
      }
      delete [] buffer;
      buffer = nullptr;
      const auto groupName = "connection " + name;
      innerDs.close();

      H5writeConnections(targetFile, name, connections);
    }

    group.close();
  }

  sourceFile.close();

  H5Fflush(targetFile, H5F_SCOPE_GLOBAL);
  H5Fclose(targetFile);

  return 0;
}