/*
 *  Copyright (c) 2008 Cyrille Berger <cberger@cberger.net>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation;
 * version 2 of the License.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this library; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */

#include "CodeGenerator_p.h"

#include <vector>

#include <llvm/CallingConv.h>
#include <llvm/Constants.h>
#include <llvm/Constant.h>
#include <llvm/DerivedTypes.h>
#include <llvm/Function.h>
#include <llvm/Instructions.h>
#include <llvm/Module.h>
#include <llvm/Value.h>
#include <llvm/ParameterAttributes.h>

#include "GTLCore/CodeGenerator_p.h"
#include "GTLCore/ExpressionResult_p.h"
#include "GTLCore/Function.h"
#include "GTLCore/Function_p.h"
#include "GTLCore/GenerationContext_p.h"
#include "GTLCore/ModuleData_p.h"
#include "GTLCore/Parameter.h"
#include "GTLCore/PixelDescription.h"
#include "GTLCore/Type.h"
#include "GTLCore/TypeManager_p.h"
#include "GTLCore/Value.h"
#include "GTLCore/VariableNG_p.h"

#include "Wrapper_p.h"
#include "wrappers/ImageWrap_p.h"
#include "wrappers/PixelWrap_p.h"

#include "Debug.h"
#include "Kernel.h"

using namespace OpenShiva;

int CodeGenerator::s_evaluatePixelesId = 0;

llvm::Function* CodeGenerator::createMemCpyFunction( llvm::Module* _module )
{
  // TODO that stuff was copied from a llvmgcc output, can't we use memcpy intrinsinc ?
  std::vector<const llvm::Type*> memcpyTyArgs;
  memcpyTyArgs.push_back(llvm::PointerType::get(llvm::IntegerType::get(8), 0));
  memcpyTyArgs.push_back(llvm::PointerType::get(llvm::IntegerType::get(8), 0));
  memcpyTyArgs.push_back(llvm::IntegerType::get(32));
  memcpyTyArgs.push_back(llvm::IntegerType::get(32));
  llvm::FunctionType* memcpyTy = llvm::FunctionType::get( llvm::Type::VoidTy, memcpyTyArgs, false);
  
  llvm::Function* func_llvm_memcpy_i32 = (llvm::Function*)_module->getOrInsertFunction(
      "llvm.memcpy.i32", memcpyTy); // (external, no body)
  func_llvm_memcpy_i32->setCallingConv(llvm::CallingConv::C);
  
/*  const llvm::ParamAttrsList *func_llvm_memcpy_i32_PAL = 0;
  {
    llvm::ParamAttrsVector Attrs;
    llvm::ParamAttrsWithIndex PAWI;
    PAWI.index = 0; PAWI.attrs = 0  | llvm::ParamAttr::NoUnwind;
    Attrs.push_back(PAWI);
    func_llvm_memcpy_i32_PAL = llvm::ParamAttrsList::get(Attrs);
  }
  func_llvm_memcpy_i32->setParamAttrs(func_llvm_memcpy_i32_PAL);*/
  return func_llvm_memcpy_i32;
}

llvm::Value* CodeGenerator::accessPixelDataPtr( GTLCore::GenerationContext& _gc, llvm::BasicBlock* _currentBlock, llvm::Value* _pixel)
{
  SHIVA_DEBUG( *_pixel );
  std::vector<llvm::Value*> indexes;
  indexes.push_back( _gc.codeGenerator()->integerToConstant(0));
  indexes.push_back( _gc.codeGenerator()->integerToConstant(PixelWrap::INDEX_DATA));
  return llvm::GetElementPtrInst::Create( _pixel, indexes.begin(), indexes.end(), "", _currentBlock );
}

llvm::Value* CodeGenerator::accessPixelDataAsU8Ptr( GTLCore::GenerationContext& _gc, llvm::BasicBlock* _currentBlock, llvm::Value* _pixel)
{
  llvm::Value* _pointerToPixelVector = accessPixelDataPtr( _gc, _currentBlock, _pixel) ;
  // Cast the vector to a "char*"
  return new llvm::BitCastInst(
      _pointerToPixelVector, llvm::PointerType::get(llvm::IntegerType::get(8), 0), "", _currentBlock);
}

llvm::Value* CodeGenerator::accessPixelDataAsF32Ptr( GTLCore::GenerationContext& _gc, llvm::BasicBlock* _currentBlock, llvm::Value* _pixel)
{
  llvm::Value* _pointerToPixelVector = accessPixelDataPtr( _gc, _currentBlock, _pixel) ;
  // Cast the vector to a "char*"
  return new llvm::BitCastInst(
      _pointerToPixelVector, llvm::PointerType::get(llvm::Type::FloatTy, 0), "", _currentBlock);
}

llvm::Value* CodeGenerator::callMemcpy( GTLCore::GenerationContext& _gc, llvm::BasicBlock* _currentBlock, llvm::Value* _dst, llvm::Value* _src, llvm::Value* _n )
{
  SHIVA_DEBUG( "memcpy( dst = " << *_dst << ", src = " << *_src << ", _n = " << *_n );
  // Initialise llvm_memcpy_i32
  llvm::Function* func_llvm_memcpy_i32 = createMemCpyFunction( _gc.llvmModule() );
  // Set parameters
  std::vector<llvm::Value*> memcpy_params;
  memcpy_params.push_back( _dst );
  memcpy_params.push_back( _src );
  memcpy_params.push_back( _n );
  memcpy_params.push_back( GTLCore::CodeGenerator::integerToConstant( 1 ) );
  llvm::CallInst* callMemCpy = llvm::CallInst::Create(
                  func_llvm_memcpy_i32, memcpy_params.begin(), memcpy_params.end(), "", _currentBlock);
  callMemCpy->setCallingConv(llvm::CallingConv::C);
  callMemCpy->setTailCall( false );
  return callMemCpy;
}

llvm::BasicBlock* CodeGenerator::memToPixel( GTLCore::GenerationContext& _gc, llvm::BasicBlock* _currentBlock, llvm::Value* _dataPointer, llvm::Value* _pixel, llvm::Value* _image )
{
  // Access to the data pointer for the _pixel
  llvm::Value* _pointerToPixelVector = accessPixelDataPtr( _gc, _currentBlock, _pixel );
  
  std::vector<llvm::Value*> arguments;
  arguments.push_back( _pointerToPixelVector );
  arguments.push_back( _dataPointer );
  callVirtualMember( _gc, _currentBlock, _image, ImageWrap::INDEX_MEM_TO_VEC, arguments);
  
  return _currentBlock;
}

llvm::BasicBlock* CodeGenerator::pixelToMem( GTLCore::GenerationContext& _gc, llvm::BasicBlock* _currentBlock, llvm::Value* _pixel, llvm::Value* _dataPointer, llvm::Value* _image )
{
  // Access to the data pointer for the _pixel
  llvm::Value* _pointerToPixelVector = accessPixelDataPtr( _gc, _currentBlock, _pixel );
  
  std::vector<llvm::Value*> arguments;
  arguments.push_back( _dataPointer );
  arguments.push_back( _pointerToPixelVector );
  callVirtualMember( _gc, _currentBlock, _image, ImageWrap::INDEX_VEC_TO_MEM, arguments);
  
  return _currentBlock;
}

void CodeGenerator::setPixelCoordinates( GTLCore::GenerationContext& _gc, llvm::BasicBlock* _currentBlock, llvm::Value* _pixel, llvm::Value* _iVal, const GTLCore::Type* _iValType, llvm::Value* _jVal, const GTLCore::Type* _jValType )
{
  SHIVA_DEBUG("setPixelCoordinates");
  // Create the coord vector
  llvm::Value* result = llvm::ConstantVector::get( static_cast<const llvm::VectorType*>( GTLCore::TypeManager::getVector( GTLCore::Type::Float, 2)->d->type()), std::vector<llvm::Constant*>() );
  SHIVA_DEBUG(*result << " " << *_iVal << " " << *_jVal);
  result = llvm::InsertElementInst::Create( result, GTLCore::CodeGenerator::convertValueTo( _currentBlock, _iVal, _iValType, GTLCore::Type::Float), (unsigned int)0, "", _currentBlock );
  result = llvm::InsertElementInst::Create( result, GTLCore::CodeGenerator::convertValueTo( _currentBlock, _jVal, _iValType, GTLCore::Type::Float), 1, "", _currentBlock );
  
  std::vector<llvm::Value*> indexes;
  indexes.push_back( _gc.codeGenerator()->integerToConstant(0));
  indexes.push_back( _gc.codeGenerator()->integerToConstant(PixelWrap::INDEX_COORD));
  new llvm::StoreInst( result,
        llvm::GetElementPtrInst::Create( _pixel, indexes.begin(), indexes.end(), "", _currentBlock ),
        "", _currentBlock );
}

llvm::Function* CodeGenerator::generateEvaluatePixeles( const std::vector<const GTLCore::Type*>& _inputTypes, const GTLCore::Type* _outputType, Kernel* _kernel, GTLCore::ModuleData* _moduleData, int _channels)
{
  GTLCore::CodeGenerator codeGenerator( _moduleData );
  
  // Pseudo-code explaining step-by-step what an evaluatePixeles function does :
  //
  //  AbstractImage* are seen as a void*.
  //
  //  template< int nbSources >
  //  void evaluatePixeles( int _x, int _y, int _width, int _height, AbstractImage** _sources, AbstractImage* _result)
  //  {
  //    for( int i = _x; i < nbSource; ++i )
  //    {
  //      imageWrap[i] = wrap(sources[i]);
  //    }
  //    for( int j = _y; j < height; ++j )
  //    {
  //      for( int i = 0; i < width; ++i )
  //      {
  //        pixel result;
  //        evaluatePixel( imageWrap[0], imageWrap[1], ..., imageWrap[nbSource-1], result );
  //        memcpy(result.data, _result.data( width, height );
  //      }
  //    }
  //  }
  //
  // Simple, isn't it ?
  
  // Create the function
  std::vector<const llvm::Type*> params;
  params.push_back( llvm::Type::Int32Ty );
  params.push_back( llvm::Type::Int32Ty );
  params.push_back( llvm::Type::Int32Ty );
  params.push_back( llvm::Type::Int32Ty );
  SHIVA_ASSERT( _moduleData->typeManager()->getStructure( "image" ) );
  params.push_back( 
          llvm::PointerType::get( 
                   llvm::PointerType::get( _moduleData->typeManager()->getStructure( "image" )->d->type(), 0 ), 0 ) );
  params.push_back( llvm::PointerType::get( _moduleData->typeManager()->getStructure( "image" )->d->type(), 0 ) );
  llvm::FunctionType* definitionType = llvm::FunctionType::get( llvm::Type::VoidTy, params, false );
  int evaluatePixelesId = ++CodeGenerator::s_evaluatePixelesId;
  llvm::Function* func = codeGenerator.createFunction( definitionType, "evaluatePixeles" + GTLCore::String::number(evaluatePixelesId));
  // Initialise a generation context
  GTLCore::GenerationContext generationContext( &codeGenerator, func, 0, _moduleData );
  
  // {
    llvm::BasicBlock* initialBlock = llvm::BasicBlock::Create();
    func->getBasicBlockList().push_back( initialBlock );
  // Get the args.
    llvm::Function::arg_iterator arg_it = func->arg_begin();
    //   int _x = first arg;
    llvm::Value* arg_x = arg_it;
    //   int _y = second arg;
    ++arg_it;
    llvm::Value* arg_y = arg_it;
    //   int _width = third arg;
    ++arg_it;
    llvm::Value* arg_width = arg_it;
    //   int _height = fourth arg;
    ++arg_it;
    llvm::Value* arg_height = arg_it;
    //   void** _sources = fifth arg;
    ++arg_it;
    llvm::Value* arg_sources = arg_it;
    //   void* _result = sixth arg;
    ++arg_it;
    llvm::Value* arg_result = arg_it;
  // Create the pixel
    GTLCore::VariableNG* resultVar = new GTLCore::VariableNG( _moduleData->typeManager()->getStructure( "pixel" ), false );
  resultVar->initialise( generationContext, initialBlock, GTLCore::ExpressionResult(), std::list<llvm::Value*>());
  // Get the evaluatePixel function
    GTLCore::Function* ePFunction = _moduleData->function( _kernel->name(), "evaluatePixel" );
    SHIVA_ASSERT( ePFunction );
    unsigned int countArguments = ePFunction->parameters().size();
    
  // Create the list of parameters for the evaluatePixel function
    std::vector<llvm::Value*> evaluatePixel_params;
    for(unsigned int i = 0; i < countArguments - 1; ++i )
    {
      evaluatePixel_params.push_back(
          new llvm::LoadInst(
              llvm::GetElementPtrInst::Create( arg_sources, GTLCore::CodeGenerator::integerToConstant( i ), "", initialBlock ),
              "", initialBlock ) );
    }
    evaluatePixel_params.push_back( resultVar->pointer() );
    SHIVA_ASSERT( evaluatePixel_params.size() == countArguments );
  // Construct the "conditions" of the first loop
    // int j = 0;
    GTLCore::VariableNG* incJ = new GTLCore::VariableNG( GTLCore::Type::Integer32, false);
    incJ->initialise( generationContext, initialBlock,
                      GTLCore::ExpressionResult( arg_x, GTLCore::Type::Integer32),
                      std::list<llvm::Value*>());
    
    // {
      llvm::BasicBlock* firstBlockJLoop = llvm::BasicBlock::Create();
      func->getBasicBlockList().push_back( firstBlockJLoop );
      
      // int i = 0;
      GTLCore::VariableNG* incI = new GTLCore::VariableNG( GTLCore::Type::Integer32, false);
      incI->initialise( generationContext, firstBlockJLoop,
                        GTLCore::ExpressionResult( arg_y, GTLCore::Type::Integer32),
                        std::list<llvm::Value*>());
      
      // {
        llvm::BasicBlock* firstBlockILoop = llvm::BasicBlock::Create();
        func->getBasicBlockList().push_back( firstBlockILoop );
        llvm::Value* jVal = incJ->get( generationContext, firstBlockILoop );
        llvm::Value* iVal = incI->get( generationContext, firstBlockILoop );
        
        // Set the coordinates of the pixel
        setPixelCoordinates( generationContext, firstBlockILoop, resultVar->pointer(), iVal, GTLCore::Type::Integer32, jVal, GTLCore::Type::Integer32 );
        
        // Call evaluatePixel
        llvm::Function* llvmEPFunction = ePFunction->d->data->function( countArguments );
        SHIVA_ASSERT( llvmEPFunction );
        SHIVA_DEBUG( evaluatePixel_params.size() );
        llvm::CallInst::Create( llvmEPFunction, evaluatePixel_params.begin(), evaluatePixel_params.end(), "", firstBlockILoop );
        
        // Synchronize the output pixel with input
        // Call image_wrap_data on the result to get the pointer on destination data
        llvm::Value* pointer = callImageWrapData( generationContext, firstBlockILoop, generationContext.module()->typeManager()->getStructure( "image" ), arg_result, iVal, jVal);
        llvm::BasicBlock* lastBlockILoop = pixelToMem( generationContext, firstBlockILoop, resultVar->pointer(), pointer, arg_result );
      // }
      
      llvm::BasicBlock* lastBlockJLoop = GTLCore::CodeGenerator::createIterationForStatement( generationContext, firstBlockJLoop, incI, arg_width, GTLCore::Type::Integer32, firstBlockILoop, lastBlockILoop);
    // }
    
    llvm::BasicBlock* lastBlock = GTLCore::CodeGenerator::createIterationForStatement( generationContext, initialBlock, incJ, arg_height, GTLCore::Type::Integer32, firstBlockJLoop, lastBlockJLoop);
    llvm::ReturnInst::Create(lastBlock);
  // Cleanup
  
  delete resultVar;
  delete incJ;
  delete incI;
  return func;
}

int memToVecId = 0;

llvm::Function* CodeGenerator::generateMemToVec( GTLCore::ModuleData* _moduleData, const GTLCore::PixelDescription& _pixelDescription )
{
  int channelsNb = _pixelDescription.channels();
  // Check if all channels are floats
  bool allFloat = true;
  for( int i = 0; i < channelsNb; ++i)
  {
    if( _pixelDescription.channelTypes()[i]->dataType() != GTLCore::Type::FLOAT )
    {
      allFloat = false;
      break;
    }
  }
  //
  // If all channels are float, then do a direct memcpy.
  // Otherwise do a conversion to a float array and memcpy it.
  //
  //  template<bool _TAllFloat_, int _TChannels_>
  //  void memToVec( vec4* _dst, char* _imgData)
  //  {
  //    if( _TAllFloat_ )
  //    {
  //      memcpy( (char*)result, src, _TChannels * 4 );
  //    } else {
  //      for(int i = 0; i < _TChannels; ++i)
  //      {
  //        result[i] = convert(_imgData + pos(i) ) 
  //      }
  //    }
  //  }
  
  GTLCore::CodeGenerator codeGenerator( _moduleData );

  llvm::Function* func = codeGenerator.createFunction( 
      Wrapper::image_wrap_mem_to_vec_float_type( _moduleData->typeManager(), _pixelDescription.channels()) ,
      "image_wrap_memToVec" + GTLCore::String::number( ++memToVecId) );
  // Initialise a generation context
  GTLCore::GenerationContext generationContext( &codeGenerator, func, 0, _moduleData );
  
  // Get the args.
    llvm::Function::arg_iterator arg_it = func->arg_begin();
    //   vec4* _dst = first arg;
    llvm::Value* arg_dst = arg_it;
    //   char* _imgData= second arg;
    ++arg_it;
    llvm::Value* arg_imgData = arg_it;
    
  // {
    llvm::BasicBlock* currentBlock = llvm::BasicBlock::Create();
    func->getBasicBlockList().push_back( currentBlock );
    
  //    if( _TAllFloat_ )
    if( allFloat )
  //    {
    {
  //    memcpy( (char*)result, src, _TChannels * 4 );
      callMemcpy( generationContext, currentBlock, 
                  GTLCore::CodeGenerator::convertPointerToCharP( currentBlock, arg_dst),
                  arg_imgData, GTLCore::CodeGenerator::integerToConstant( sizeof(float) * 4 ) );
  //    } else {
    } else {
  //      for(int i = 0; i < _TChannels; ++i)
      int currentPos = 0;
      llvm::Value* floatVec = new llvm::LoadInst( arg_dst, "", currentBlock );
      for( int i = 0; i < channelsNb; ++i)
  //      {
      {
  //        result[i] = convert(_imgData + pos(i) ) 
        // _imgData + pos(i);
        const GTLCore::Type* channelType = _pixelDescription.channelTypes()[i];
        llvm::Value* posInP = llvm::GetElementPtrInst::Create( arg_imgData, GTLCore::CodeGenerator::integerToConstant( currentPos ), "", currentBlock );
        llvm::Value* posInPNative = GTLCore::CodeGenerator::convertPointerTo( currentBlock, posInP, channelType->d->type() );
        llvm::Value* nativeValue = new llvm::LoadInst( posInPNative, "", currentBlock );
        // convert(_imgData + pos(i) )
        llvm::Value* floatValue = GTLCore::CodeGenerator::convertValueTo( currentBlock, nativeValue, channelType, GTLCore::Type::Float );
        switch( channelType->dataType() )
        {
          case GTLCore::Type::INTEGER8:
            floatValue = GTLCore::CodeGenerator::createAdditionExpression( currentBlock, floatValue, GTLCore::Type::Float, GTLCore::CodeGenerator::floatToConstant( 127.0 ), GTLCore::Type::Float);
          case GTLCore::Type::UNSIGNED_INTEGER8:
            floatValue = GTLCore::CodeGenerator::createDivisionExpression( currentBlock, floatValue, GTLCore::Type::Float, GTLCore::CodeGenerator::floatToConstant( 255.0 ), GTLCore::Type::Float);
            break;
          case GTLCore::Type::INTEGER16:
            floatValue = GTLCore::CodeGenerator::createAdditionExpression( currentBlock, floatValue, GTLCore::Type::Float, GTLCore::CodeGenerator::floatToConstant( 32767.0 ), GTLCore::Type::Float);
          case GTLCore::Type::UNSIGNED_INTEGER16:
            floatValue = GTLCore::CodeGenerator::createDivisionExpression( currentBlock, floatValue, GTLCore::Type::Float, GTLCore::CodeGenerator::floatToConstant( 65535.0 ), GTLCore::Type::Float);
            break;
          case GTLCore::Type::INTEGER32:
            floatValue = GTLCore::CodeGenerator::createAdditionExpression( currentBlock, floatValue, GTLCore::Type::Float, GTLCore::CodeGenerator::floatToConstant( 2147483647.0 ), GTLCore::Type::Float);
          case GTLCore::Type::UNSIGNED_INTEGER32:
            floatValue = GTLCore::CodeGenerator::createDivisionExpression( currentBlock, floatValue, GTLCore::Type::Float, GTLCore::CodeGenerator::floatToConstant( 4294967295.0 ), GTLCore::Type::Float);
            break;
          default:
            GTL_ABORT("unimplemented");
        }
        
        // result[i] = convert(_imgData + pos(i) )
        floatVec = llvm::InsertElementInst::Create( floatVec, floatValue, i, "", currentBlock);
        GTL_ASSERT(channelType->bitsSize() % 8 == 0);
        currentPos += channelType->bitsSize() / 8;
  //      }
      }
      new llvm::StoreInst( floatVec, arg_dst, "", currentBlock);
  //    }
    }
  //  }
    llvm::ReturnInst::Create( currentBlock );
  return func;
}

int vecToMemId = 0;

llvm::Function* CodeGenerator::generateVecToMem( GTLCore::ModuleData* _moduleData, const GTLCore::PixelDescription& _pixelDescription )
{
  int channelsNb = _pixelDescription.channels();
  // Check if all channels are floats
  bool allFloat = true;
  for( int i = 0; i < channelsNb; ++i)
  {
    if( _pixelDescription.channelTypes()[i]->dataType() != GTLCore::Type::FLOAT )
    {
      allFloat = false;
      break;
    }
  }
  //
  // If all channels are float, then do a direct memcpy.
  // Otherwise do a memcpy to a float array and a conversion from it.
  //
  //  template<bool _TAllFloat_, int _TChannels_>
  //  void vecToMem( char* _imgData, vec4* _src, )
  //  {
  //    char *dst;
  //    if( _TAllFloat_ )
  //    {
  //      memcpy( dst, (char*)_src, _TChannels * sizeof(float) );
  //    } else {
  //      for(int i = 0; i < _TChannels; ++i)
  //      {
  //        *(_imgData + pos(i) ) = conver( _src[i] );
  //      }
  //    }
  //  }
  
  GTLCore::CodeGenerator codeGenerator( _moduleData );
  
  llvm::Function* func = codeGenerator.createFunction( 
      Wrapper::image_wrap_vec_float_to_mem_type( _moduleData->typeManager(), _pixelDescription.channels()) ,
      "image_wrap_vecToMem" + GTLCore::String::number( ++vecToMemId) );
  // Initialise a generation context
  GTLCore::GenerationContext generationContext( &codeGenerator, func, 0, _moduleData );
  
  // Get the args.
    llvm::Function::arg_iterator arg_it = func->arg_begin();
    //   char* _imgData = first arg;
    llvm::Value* arg_imgData = arg_it;
    //   vec4* _src = second arg;
    ++arg_it;
    llvm::Value* arg_src = arg_it;
    
  // {
    llvm::BasicBlock* currentBlock = llvm::BasicBlock::Create();
    func->getBasicBlockList().push_back( currentBlock );
  //    if( _TAllFloat_ )
    if( allFloat )
  //    {
    {
  //      src = _imgData;
      callMemcpy( generationContext, currentBlock, arg_imgData,
                  GTLCore::CodeGenerator::convertPointerToCharP( currentBlock, arg_src),
                  GTLCore::CodeGenerator::integerToConstant( sizeof(float) * channelsNb ) );
  //    } else {
    } else {
  //      for(int i = 0; i < _TChannels; ++i)
      int currentPos = 0;
      llvm::Value* floatVec = new llvm::LoadInst( arg_src, "", currentBlock );
      for( int i = 0; i < channelsNb; ++i)
  //      {
      {
        const GTLCore::Type* channelType = _pixelDescription.channelTypes()[i];
        // _src[i]
        llvm::Value* floatValue = new llvm::ExtractElementInst( floatVec, i, "", currentBlock );
  //        convert( _src[i] );
        // Scale
        switch( channelType->dataType() )
        {
          case GTLCore::Type::INTEGER8:
            floatValue = GTLCore::CodeGenerator::createSubstractionExpression( currentBlock, floatValue, GTLCore::Type::Float, GTLCore::CodeGenerator::floatToConstant( 0.5 ), GTLCore::Type::Float);
          case GTLCore::Type::UNSIGNED_INTEGER8:
            floatValue = GTLCore::CodeGenerator::createMultiplicationExpression( currentBlock, floatValue, GTLCore::Type::Float, GTLCore::CodeGenerator::floatToConstant( 255.0 ), GTLCore::Type::Float);
            break;
          case GTLCore::Type::INTEGER16:
            floatValue = GTLCore::CodeGenerator::createSubstractionExpression( currentBlock, floatValue, GTLCore::Type::Float, GTLCore::CodeGenerator::floatToConstant( 0.5 ), GTLCore::Type::Float);
          case GTLCore::Type::UNSIGNED_INTEGER16:
            floatValue = GTLCore::CodeGenerator::createMultiplicationExpression( currentBlock, floatValue, GTLCore::Type::Float, GTLCore::CodeGenerator::floatToConstant( 65535.0 ), GTLCore::Type::Float);
            break;
          case GTLCore::Type::INTEGER32:
            floatValue = GTLCore::CodeGenerator::createSubstractionExpression( currentBlock, floatValue, GTLCore::Type::Float, GTLCore::CodeGenerator::floatToConstant( 0.5 ), GTLCore::Type::Float);
          case GTLCore::Type::UNSIGNED_INTEGER32:
            floatValue = GTLCore::CodeGenerator::createMultiplicationExpression( currentBlock, floatValue, GTLCore::Type::Float, GTLCore::CodeGenerator::floatToConstant( 4294967295.0 ), GTLCore::Type::Float);
            break;
          default:
            GTL_ABORT("unimplemented");
        }
        // Convert back to native
        llvm::Value* nativeValue = GTLCore::CodeGenerator::convertValueTo( currentBlock, floatValue, GTLCore::Type::Float, channelType);
  //        *(_imgData + pos(i) ) = convert( _src[i] );
        llvm::Value* posInP = llvm::GetElementPtrInst::Create( arg_imgData, GTLCore::CodeGenerator::integerToConstant( currentPos ), "", currentBlock );
        llvm::Value* posInPNative = GTLCore::CodeGenerator::convertPointerTo( currentBlock, posInP, channelType->d->type() );
        new llvm::StoreInst( nativeValue, posInPNative, "", currentBlock );
  //      }
        // result[i] = convert(_imgData + pos(i) )
        GTL_ASSERT(channelType->bitsSize() % 8 == 0);
        currentPos += channelType->bitsSize() / 8;
  //      }
      }
  //    }
    }
  //  }
    llvm::ReturnInst::Create( currentBlock );
  return func;
}

llvm::Value* CodeGenerator::callVirtualMember( GTLCore::GenerationContext& _gc, llvm::BasicBlock* _currentBlock, llvm::Value* _pointer, int _member_index , std::vector<llvm::Value*> _arguments)
{
  std::vector<llvm::Value*> indexes;
  indexes.push_back( _gc.codeGenerator()->integerToConstant(0));
  indexes.push_back( _gc.codeGenerator()->integerToConstant(_member_index));
  
  llvm::Value* funcPtr = new llvm::LoadInst( llvm::GetElementPtrInst::Create( _pointer, indexes.begin(), indexes.end(), "", _currentBlock ), "" , _currentBlock);
  return llvm::CallInst::Create( funcPtr, _arguments.begin(), _arguments.end(), "", _currentBlock );
}

llvm::Value* CodeGenerator::callImageWrapData( GTLCore::GenerationContext& _gc, llvm::BasicBlock* _currentBlock, const GTLCore::Type* _imageType, llvm::Value* _imageWrap, llvm::Value* _x, llvm::Value* _y )
{
  GTL_ASSERT( _imageType );
  std::vector<llvm::Value*> image_wrap_data_params;
  image_wrap_data_params.push_back( _imageWrap );
  image_wrap_data_params.push_back( _x );
  image_wrap_data_params.push_back( _y );
  
  llvm::Function* func = Wrapper::image_wrap_dataFunction( _gc.llvmModule(), _imageType );
#ifndef _NDEBUG_
  {
    const llvm::FunctionType *FTy =
      llvm::cast<llvm::FunctionType>(llvm::cast<llvm::PointerType>(func->getType())->getElementType());
    
    GTL_ASSERT( image_wrap_data_params.size() == FTy->getNumParams() or
            (FTy->isVarArg() and image_wrap_data_params.size() > FTy->getNumParams()) );
    
    for (unsigned i = 0; i < image_wrap_data_params.size(); ++i) {
      if( i < FTy->getNumParams() and (FTy->getParamType(i) != image_wrap_data_params[i]->getType()) )
      {
        GTL_DEBUG( "Wrong parameter " << i << " : " << FTy->getParamType(i) << " => " << *FTy->getParamType(i) << " but got " << image_wrap_data_params[i] << " => " << *image_wrap_data_params[i]->getType() );
      } else {
        GTL_DEBUG( "Parameter " << i << " : " << FTy->getParamType(i) << " => " << *FTy->getParamType(i) << " but got " << image_wrap_data_params[i]->getType() << " => " << *image_wrap_data_params[i]->getType() );
      }
    }
  }
#endif

  
  return llvm::CallInst::Create(
      func, image_wrap_data_params.begin(), image_wrap_data_params.end(), "", _currentBlock ); // TODO get the real type of the output image (even if it doesn't matter much currently)
  
}

int imageSampleNearestId = 0;

llvm::Function* CodeGenerator::generateImageSampleNearest(GTLCore::ModuleData* _moduleData, const GTLCore::Type* _imageType, const GTLCore::Type* _pixelType )
{
  //  pixelX* sampleNearest( ImageWrap* self, float2 pt )
  //  {
  //    int x = pt[0] + 0.5;
  //    int y = pt[1] + 0.5;
  //    pixelX* px = new pixelX;
  //    memToPixel( image, px, image_wrap_data(self, x, y ) );
  //    px.x = pt[0];
  //    px.y = pt[1];
  //    return pixelX;
  //  }
  
  GTLCore::CodeGenerator codeGenerator( _moduleData );
    
  llvm::Function* func = codeGenerator.createFunction(
      Wrapper::image_wrap_sample_nearest_type( _moduleData->typeManager(), _imageType, _pixelType),
      "image_wrap_sample_nearest" + GTLCore::String::number( ++imageSampleNearestId) );
  GTLCore::GenerationContext generationContext( &codeGenerator, func, 0, _moduleData );
  // Get the args.
    llvm::Function::arg_iterator arg_it = func->arg_begin();
    //   ImageWrap* self = first arg;
    llvm::Value* arg_self = arg_it;
    //   float2 _pt = second arg;
    ++arg_it;
    llvm::Value* arg_pt = arg_it;
  //  {
    llvm::BasicBlock* currentBlock = llvm::BasicBlock::Create();
    func->getBasicBlockList().push_back( currentBlock );
  //    int x = pt[0] + 0.5;
    llvm::Value* x_f = GTLCore::CodeGenerator::vectorValueAt( currentBlock, arg_pt,
                          GTLCore::CodeGenerator::integerToConstant( 0 ) );
    llvm::Value* x_i = GTLCore::CodeGenerator::createRound( currentBlock, x_f );
    llvm::Value* y_f = GTLCore::CodeGenerator::vectorValueAt( currentBlock, arg_pt,
                          GTLCore::CodeGenerator::integerToConstant( 1 ) );
    llvm::Value* y_i = GTLCore::CodeGenerator::createRound( currentBlock, y_f );
  //    pixelX* px = new pixelX;
    llvm::Value* px_var = new llvm::MallocInst( _pixelType->d->type(), GTLCore::CodeGenerator::integerToConstant( 1 ), "", currentBlock );
  //    memToPixel( image, px, image_wrap_data(self, x, y ) );
    currentBlock = memToPixel( generationContext, currentBlock,
                               callImageWrapData( generationContext, currentBlock, _imageType, arg_self, x_i, y_i ),
                               px_var, arg_self );
    setPixelCoordinates( generationContext, currentBlock, px_var, x_f, GTLCore::Type::Float, y_f, GTLCore::Type::Float );
    llvm::ReturnInst::Create( px_var, currentBlock);
  return func;
}
