/*
 * Copyright 2020 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#include "bench/Benchmark.h"
#include "include/gpu/GrContext.h"
#include "src/core/SkPathPriv.h"
#include "src/gpu/GrContextPriv.h"
#include "src/gpu/GrOpFlushState.h"
#include "src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h"
#include "src/gpu/tessellate/GrResolveLevelCounter.h"
#include "src/gpu/tessellate/GrTessellatePathOp.h"
#include "src/gpu/tessellate/GrWangsFormula.h"
#include "tools/ToolUtils.h"

// This is the number of cubics in desk_chalkboard.skp. (There are no quadratics in the chalkboard.)
constexpr static int kNumCubicsInChalkboard = 47182;

static SkPath make_cubic_path() {
    SkRandom rand;
    SkPath path;
    for (int i = 0; i < kNumCubicsInChalkboard/2; ++i) {
        float x = std::ldexp(rand.nextF(), (i % 18)) / 1e3f;
        path.cubicTo(111.625f*x, 308.188f*x, 764.62f*x, -435.688f*x, 742.63f*x, 85.187f*x);
        path.cubicTo(764.62f*x, -435.688f*x, 111.625f*x, 308.188f*x, 0, 0);
    }
    return path;
}

// This is a dummy GrMeshDrawOp::Target implementation that just gives back pointers into
// pre-allocated CPU buffers, rather than allocating and mapping GPU buffers.
class BenchmarkTarget : public GrMeshDrawOp::Target {
public:
    BenchmarkTarget() {
        GrMockOptions mockOptions;
        mockOptions.fDrawInstancedSupport = true;
        mockOptions.fMaxTessellationSegments = 64;
        mockOptions.fMapBufferFlags = GrCaps::kCanMap_MapFlag;
        mockOptions.fConfigOptions[(int)GrColorType::kAlpha_8].fRenderability =
                GrMockOptions::ConfigOptions::Renderability::kMSAA;
        mockOptions.fConfigOptions[(int)GrColorType::kAlpha_8].fTexturable = true;
        mockOptions.fIntegerSupport = true;

        GrContextOptions ctxOptions;
        ctxOptions.fGpuPathRenderers = GpuPathRenderers::kTessellation;

        fMockContext = GrContext::MakeMock(&mockOptions, ctxOptions);
    }
    const GrContext* mockContext() const { return fMockContext.get(); }
    const GrCaps& caps() const override { return *fMockContext->priv().caps(); }
    GrResourceProvider* resourceProvider() const override {
        return fMockContext->priv().resourceProvider();
    }
    void resetAllocator() { fAllocator.reset(); }
    SkArenaAlloc* allocator() override { return &fAllocator; }
    void putBackVertices(int vertices, size_t vertexStride) override { /* no-op */ }

    void* makeVertexSpace(size_t vertexSize, int vertexCount, sk_sp<const GrBuffer>*,
                          int* startVertex) override {
        if (vertexSize * vertexCount > sizeof(fStaticVertexData)) {
            SK_ABORT("FATAL: wanted %zu bytes of static vertex data; only have %zu.\n",
                     vertexSize * vertexCount, SK_ARRAY_COUNT(fStaticVertexData));
        }
        *startVertex = 0;
        return fStaticVertexData;
    }

    GrDrawIndexedIndirectCommand* makeDrawIndexedIndirectSpace(
            int drawCount, sk_sp<const GrBuffer>* buffer, size_t* offsetInBytes) override {
        int staticBufferCount = (int)SK_ARRAY_COUNT(fStaticDrawIndexedIndirectData);
        if (drawCount > staticBufferCount) {
            SK_ABORT("FATAL: wanted %i static drawIndexedIndirect elements; only have %i.\n",
                     drawCount, staticBufferCount);
        }
        return fStaticDrawIndexedIndirectData;
    }

#define UNIMPL(...) __VA_ARGS__ override { SK_ABORT("unimplemented."); }
    UNIMPL(void recordDraw(const GrGeometryProcessor*, const GrSimpleMesh[], int,
                           const GrSurfaceProxy* const[], GrPrimitiveType))
    UNIMPL(uint16_t* makeIndexSpace(int, sk_sp<const GrBuffer>*, int*))
    UNIMPL(void* makeVertexSpaceAtLeast(size_t, int, int, sk_sp<const GrBuffer>*, int*, int*))
    UNIMPL(uint16_t* makeIndexSpaceAtLeast(int, int, sk_sp<const GrBuffer>*, int*, int*))
    UNIMPL(GrDrawIndirectCommand* makeDrawIndirectSpace(int, sk_sp<const GrBuffer>*, size_t*))
    UNIMPL(void putBackIndices(int))
    UNIMPL(GrRenderTargetProxy* proxy() const)
    UNIMPL(const GrSurfaceProxyView* writeView() const)
    UNIMPL(const GrAppliedClip* appliedClip() const)
    UNIMPL(GrAppliedClip detachAppliedClip())
    UNIMPL(const GrXferProcessor::DstProxyView& dstProxyView() const)
    UNIMPL(GrStrikeCache* strikeCache() const)
    UNIMPL(GrAtlasManager* atlasManager() const)
    UNIMPL(SkTArray<GrSurfaceProxy*, true>* sampledProxyArray())
    UNIMPL(GrDeferredUploadTarget* deferredUploadTarget())
#undef UNIMPL

private:
    sk_sp<GrContext> fMockContext;
    SkPoint fStaticVertexData[(kNumCubicsInChalkboard + 2) * 8];
    GrDrawIndexedIndirectCommand fStaticDrawIndexedIndirectData[32];
    SkSTArenaAlloc<1024 * 1024> fAllocator;
};

// This serves as a base class for benchmarking individual methods on GrTessellatePathOp.
class GrTessellatePathOp::TestingOnly_Benchmark : public Benchmark {
public:
    TestingOnly_Benchmark(const char* subName, SkPath path, const SkMatrix& m)
            : fOp(m, path, GrPaint(), GrAAType::kMSAA, GrTessellationPathRenderer::OpFlags::kNone) {
        fName.printf("tessellate_%s", subName);
    }

    const char* onGetName() override { return fName.c_str(); }
    bool isSuitableFor(Backend backend) final { return backend == kNonRendering_Backend; }

    class prepareMiddleOutStencilGeometry;
    class prepareMiddleOutStencilGeometry_indirect;
    class prepareIndirectOuterCubics;
    class prepareTessellatedOuterCubics;
    class prepareTessellatedCubicWedges;
    class wangs_formula_cubic_log2;
    class wangs_formula_cubic_log2_scale;
    class wangs_formula_cubic_log2_affine;
    class middle_out_triangulation;

private:
    void onDraw(int loops, SkCanvas*) final {
        if (!fTarget.mockContext()) {
            SkDebugf("ERROR: could not create mock context.");
            return;
        }
        for (int i = 0; i < loops; ++i) {
            fOp.fTriangleBuffer.reset();
            fOp.fDoStencilTriangleBuffer = false;
            fOp.fDoFillTriangleBuffer = false;
            fOp.fCubicBuffer.reset();
            fOp.fStencilCubicsShader = nullptr;
            this->runBench(&fTarget, &fOp);
            fTarget.resetAllocator();
        }
    }

    virtual void runBench(GrMeshDrawOp::Target*, GrTessellatePathOp*) = 0;

    GrTessellatePathOp fOp;
    BenchmarkTarget fTarget;
    SkString fName;
};

#define DEF_TESS_BENCH(NAME, PATH, MATRIX, TARGET, OP) \
    class GrTessellatePathOp::TestingOnly_Benchmark::NAME \
            : public GrTessellatePathOp::TestingOnly_Benchmark { \
    public: \
        NAME() : TestingOnly_Benchmark(#NAME, (PATH), (MATRIX)) {} \
        void runBench(GrMeshDrawOp::Target* target, GrTessellatePathOp* op) override; \
    }; \
    DEF_BENCH( return new GrTessellatePathOp::TestingOnly_Benchmark::NAME(); ); \
    void GrTessellatePathOp::TestingOnly_Benchmark::NAME::runBench( \
            GrMeshDrawOp::Target* TARGET, GrTessellatePathOp* op)

DEF_TESS_BENCH(prepareMiddleOutStencilGeometry, make_cubic_path(), SkMatrix::I(), target, op) {
    op->prepareMiddleOutTrianglesAndCubics(target);
}

DEF_TESS_BENCH(prepareMiddleOutStencilGeometry_indirect, make_cubic_path(), SkMatrix::I(), target,
               op) {
    GrResolveLevelCounter resolveLevelCounter;
    op->prepareMiddleOutTrianglesAndCubics(target, &resolveLevelCounter, true);
}

DEF_TESS_BENCH(prepareIndirectOuterCubics, make_cubic_path(), SkMatrix::I(), target, op) {
    GrResolveLevelCounter resolveLevelCounter;
    resolveLevelCounter.reset(op->fPath, SkMatrix::I(), 4);
    op->prepareIndirectOuterCubics(target, resolveLevelCounter);
}

DEF_TESS_BENCH(prepareTessellatedOuterCubics, make_cubic_path(), SkMatrix::I(), target, op) {
    op->prepareTessellatedOuterCubics(target, kNumCubicsInChalkboard);
}

DEF_TESS_BENCH(prepareTessellatedCubicWedges, make_cubic_path(), SkMatrix::I(), target, op) {
    op->prepareTessellatedCubicWedges(target);
}

static void benchmark_wangs_formula_cubic_log2(const SkMatrix& matrix, const SkPath& path) {
    int sum = 0;
    GrVectorXform xform(matrix);
    for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) {
        if (verb == SkPathVerb::kCubic) {
            sum += GrWangsFormula::cubic_log2(4, pts, xform);
        }
    }
    // Don't let the compiler optimize away GrWangsFormula::cubic_log2.
    if (sum <= 0) {
        SK_ABORT("sum should be > 0.");
    }
}

DEF_TESS_BENCH(wangs_formula_cubic_log2, make_cubic_path(), SkMatrix::I(), target, op) {
    benchmark_wangs_formula_cubic_log2(op->fViewMatrix, op->fPath);
}

DEF_TESS_BENCH(wangs_formula_cubic_log2_scale, make_cubic_path(), SkMatrix::Scale(1.1f, 0.9f),
               target, op) {
    benchmark_wangs_formula_cubic_log2(op->fViewMatrix, op->fPath);
}

DEF_TESS_BENCH(wangs_formula_cubic_log2_affine, make_cubic_path(),
               SkMatrix::MakeAll(.9f,0.9f,0,  1.1f,1.1f,0, 0,0,1), target, op) {
    benchmark_wangs_formula_cubic_log2(op->fViewMatrix, op->fPath);
}

DEF_TESS_BENCH(middle_out_triangulation,
               ToolUtils::make_star(SkRect::MakeWH(500, 500), kNumCubicsInChalkboard),
               SkMatrix::I(), target, op) {
    int baseVertex;
    auto vertexData = static_cast<SkPoint*>(target->makeVertexSpace(
            sizeof(SkPoint), kNumCubicsInChalkboard, nullptr, &baseVertex));
    GrMiddleOutPolygonTriangulator middleOut(vertexData, 3, kNumCubicsInChalkboard + 2);
    for (auto [verb, pts, w] : SkPathPriv::Iterate(op->fPath)) {
        switch (verb) {
            case SkPathVerb::kMove:
                middleOut.closeAndMove(pts[0]);
                break;
            case SkPathVerb::kLine:
                middleOut.pushVertex(pts[1]);
                break;
            case SkPathVerb::kClose:
                middleOut.close();
                break;
            case SkPathVerb::kQuad:
            case SkPathVerb::kConic:
            case SkPathVerb::kCubic:
                SkUNREACHABLE;
        }
        middleOut.closeAndMove(pts[0]);
    }
}
