
/***************************************************************************
                           chromakill.cpp
                           --------------
    begin                : Sat Jul 31 2004
    copyright            : (C) 2004 by Dirk Ziegelmeier
    email                : dziegel@gmx.de
 ***************************************************************************/

/*
 * ChromaKill algorithm taken from DScaler.
 * Copyright (c) 2003 Laurent Garnier. All rights reserved.
 *
 * Ported by: Dirk Ziegelmeier
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 */

#include <stdlib.h>

#include <kdebug.h>
#include <klocale.h>
#include <kconfig.h>

#include "chromakill.h"
#include "kdetvimagefilter.h"
#include "kdetvimagefiltercontext.h"
#include "kdetvcpudetection.h"
#include "x86-64_macros.inc"

class ChromaKillImageFilter : public KdetvImageFilter
{
public:
    ChromaKillImageFilter()
        : KdetvImageFilter(i18n("ChromaKill"))
    {
    }

    virtual ~ChromaKillImageFilter()
    {
    }

    virtual KdetvImageFilterContext* operator<< (KdetvImageFilterContext* ctx)
    {
        // Return if we don't have MMX
        if(!(KdetvCpuDetection::capabilities() & KdetvCpuDetection::Cap_MMX)) {
            return ctx;
        }

        int64_t        qwLumiMask       = 0x00FF00FF00FF00FFull;
        int64_t        qwChromaHalfMask = 0x8000800080008000ull;
        long           Cycles           = ctx->out->bytesPerLine() / 8;
        unsigned char* Pixels           = ctx->out->buffer();
        int            fieldHeight      = ctx->out->size().height();
        unsigned int   outputpitch      = ctx->out->bytesPerLine() + ctx->out->stride();

        for (int y=0; y<fieldHeight; y++) {
            __asm__ __volatile__ (
                MOVX"    %[Pixels],           %%"XAX"\n\t"
                MOVX"    %[Cycles],           %%"XCX"\n\t"

                "movq    %[qwLumiMask],       %%mm1\n\t"
                "movq    %[qwChromaHalfMask], %%mm2\n\t"

                "1:\n\t"
                "movq    (%%"XAX"),           %%mm0\n\t"

                "pand    %%mm1,               %%mm0\n\t"
                "por     %%mm2,               %%mm0\n\t"

                "movq    %%mm0,               (%%"XAX")\n\t"

                ADDX"    $8,                  %%"XAX"\n\t"
                DECX"    %%"XCX"\n\t"
                "jne     1b\n\t"

                : /* no outputs */

                : [qwLumiMask]       "m"(qwLumiMask),
                  [qwChromaHalfMask] "m"(qwChromaHalfMask),
                  [Pixels]           "g"(Pixels),
                  [Cycles]           "g"(Cycles)

                : XAX, XCX,
#ifdef ARCH_386
                  "st", "st(1)", "st(2)",
#endif
                  "mm0", "mm1", "mm2",
                  "memory", "cc"
            );

            Pixels += outputpitch;
        }

#ifdef ARCH_386
        __asm__ __volatile__ ("emms\n\t");
#endif

        return ctx;
    }

    virtual KdetvImage::ImageFormat inputFormats()
    {
        return KdetvImage::FORMAT_YUYV;
    };
};

// -----------------------------------------------------------------------

ChromaKillPlugin::ChromaKillPlugin(Kdetv *ktv, const QString& cfgkey, QObject *parent, const char* name)
    : KdetvFilterPlugin(ktv, cfgkey, parent, name)
{
    _filter = new ChromaKillImageFilter();
}

ChromaKillPlugin::~ChromaKillPlugin()
{
    delete _filter;
}

extern "C" {
    ChromaKillPlugin* create_chromakill(Kdetv* ktv)
    {
        return new ChromaKillPlugin(ktv, "chromakill", 0, "Kill image chroma");
    }
}

#include "chromakill.moc"
