import React, { useState, useEffect, useRef } from 'react';
import { Tab } from '@headlessui/react';

import {
  Chart as ChartJS,
  CategoryScale,
  LinearScale,
  BarElement,
  Title,
  Tooltip,
  Legend,
} from 'chart.js';
import { Bar } from 'react-chartjs-2';

import ChartDataLabels from 'chartjs-plugin-datalabels';

import { ScrollWrap } from '../common/scroll-wrap';

import {
  PromptWrapper,
  ModelResponseWrapper,
} from '@/components/hallucination-index-2023/blooper-reel';

import NoRagBlooper from './no-rag-blooper';
import LongTextBlooper from './long-text-blooper';
import WithRagBlooper from './w-rag-blooper';

ChartJS.register(
  CategoryScale,
  LinearScale,
  BarElement,
  Title,
  Tooltip,
  Legend,
  ChartDataLabels,
);

import {
  useInView,
  InView,
  ObserverInstanceCallback,
} from 'react-intersection-observer';

import { modelsCreators } from '@/utils/hallucination-index-data';

// tii.png
// openai.png
// mosaic.png
// meta.png
// m.png
// hf.png

const findLogoByName = (name) => {
  const model = modelsCreators.find((item) => item.Model === name);
  return model ? model.creator : '';
};

const chartMetrics = {
  Long_form_text_generation: `
<p class="font-bold">Correctness:</p>
<p class="leading-loose">Measures whether a given model response is factual or not. Correctness uncovers open-domain hallucinations - factual errors that do not relate to any specific documents or context. </p>
<p class="leading-loose">The higher the Correctness score (i.e., it has a value of 1 or close to 1), the higher the probability the response is accurate.</p>
<p class="leading-loose">The lower the Correctness score (i.e., it has a value of 0 or close to 0), the higher the probability of hallucination and factual errors</p>
  `,
  QA_with_RAG: `
<p class="font-bold">Context Adherence:</p>
<p class="leading-loose">Context Adherence evaluates the degree to which a model's response aligns strictly with the given context, serving as a metric to gauge closed-domain hallucinations, wherein the model generates content that deviates from the provided context.</p>
<p class="leading-loose">The higher the Context Adherence score (i.e., it has a value of 1 or close to 1), the more likely the response contains information from the context provided to the model. </p>
<p class="leading-loose">The lower the Context Adherence score (ie., it has a value of 0 or close to 0), the more likely the response contains information not included in the context provided to the model.</p>
  `,
  QA_without_RAG: `
  <p class="font-bold">Correctness: </p>
  <p class="leading-loose">Measures whether a given model response is factual or not. Correctness uncovers open-domain hallucinations - factual errors that do not relate to any specific documents or context. </p>
  <p class="leading-loose">The higher the Correctness score (i.e., it has a value of 1 or close to 1), the higher the probability the response is accurate.</p>
  <p class="leading-loose">The lower the Correctness score (i.e., it has a value of 0 or close to 0), the higher the probability of hallucination and factual errors</p>
  `,
};

const chartUsedDataSets = {
  Long_form_text_generation: `
  <p>
    <a class="text-hi-100 font-medium hover:underline" target="_blank" href="https://huggingface.co/datasets/OpenAssistant/oasst1">Open assistant</a>
  </p>
  `,
  QA_with_RAG: `
  <p>
  <a class="text-hi-100 font-medium hover:underline" target="_blank" href="https://huggingface.co/datasets/narrativeqa">NarrativeQA</a>,
  <a class="text-hi-100 font-medium hover:underline" target="_blank" href="https://huggingface.co/datasets/drop">DROP</a>,
  <a class="text-hi-100 font-medium hover:underline" target="_blank" href="https://huggingface.co/datasets/ms_marco">MS Marco</a>,
  <a class="text-hi-100 font-medium hover:underline" target="_blank" href="https://huggingface.co/datasets/hotpot_qa/viewer/distractor/validation">HotpotQA distractor test</a>
  </p>
  `,
  QA_without_RAG: `
  <p>
  <a class="text-hi-100 font-medium hover:underline" href="https://huggingface.co/datasets/truthful_qa" target="_blank">TruthfulQA</a>,
  <a class="text-hi-100 font-medium hover:underline" href="https://huggingface.co/datasets/trivia_qa" target="_blank">TriviaQA</a>
  </p>
  `,
};

const tableLegend = {
  Long_form_text_generation: 'Correctness Score',
  QA_with_RAG: 'Context Adherence Score',
  QA_without_RAG: 'Correctness Score',
};

const blooperReels = {
  Long_form_text_generation: <LongTextBlooper />,
  QA_with_RAG: <WithRagBlooper />,
  QA_without_RAG: <NoRagBlooper />,
};

const chartInsights = {
  Long_form_text_generation: `
  <ul class="list-decimal pl-4 leading-10">
    <li>Open AI’s GPT-4-0613 performed the best and was least likely to hallucinate for Long-form Text Generation</li>
    <li>Open AI's GPT-3.5-turbo-1106 and GPT-3.5-turbo-0613 both performed on par with GPT-4, with potential cost savings and performance improvement over GPT-4.</li>
    <li>Surprisingly, Meta's open-source Llama-2-70b-chat was on par with GPT-4, offering a cost-efficient solution for this task type. </li>
    <li>We found TII UAE's Falcon-40b and Mosaic ML's MPT-7b performed worst for this task type.</li>
    <li class="font-bold">Recommendation: Llama-2-70b-chat</li>
  </ul>
  `,
  QA_with_RAG: `
  <ul class="list-decimal pl-4 leading-10">
    <li>Open AI's GPT-4-0613 performed the best and was least likely to hallucinate for Question & Answer with RAG.</li>
    <li>While GPT-4-0613 performed the best, the faster and more affordable GPT-3.5-turbo-0613/-1106 models performed nearly identically to GPT-4-0613.</li>
    <li>Huggingface's Zephyr-7b was the best-performing open-source model, outperforming Meta's 10x larger Llama-2-70b, proving larger models are not always better.</li>
    <li>We found TII UAE's Falcon-40b and Mosaic ML's MPT-7b performed worst for this task type.</li>
    <li class="font-bold">Recommendation: GPT-3.5-turbo-0613</li>
  </ul>
  `,
  QA_without_RAG: `
  <ul class="list-decimal pl-4 leading-10">
    <li>Open AI’s GPT-4 performed the best and was least likely to hallucinate for Question & Answer without RAG.</li>
    <li>OpenAI’s models ranked highest for this task type, highlighting their prowess in general knowledge use cases.</li>
    <li>Of the open-source models in the Index, Meta’s largest model, Llama 2 (70b) performed best.</li>
    <li>Meta’s Llama-2-7b-chat and Mosaic’s ML’s MPT-7b-instruct models performed poorly and were most likely to hallucinate for this task type.</li>
    <li class="font-bold">Recommendation: GPT-4-0613</li>
  </ul>
  `,
};

const taskType = {
  QA_without_RAG:
    'This task type refers to a model that, when presented with a question, relies on the internal knowledge and understanding that the AI model has already acquired during its training. It generates answers based on patterns, facts, and relationships it has learned during training, without referencing any external sources of information.',
  QA_with_RAG:
    'A model that, when presented with a question, can retrieve relevant information from a given dataset, database, or set of documents to provide an accurate answer. This approach is akin to looking up information in a reference book or searching a database before responding, making it well suited to tasks that require domain-specific information.',
  Long_form_text_generation:
    'Using generative AI to create extensive and coherent pieces of text such as reports, articles, essays, or stories. For this use-case, AI models are trained on large datasets to understand context, maintain subject relevance, and mimic a natural writing style over longer passages.',
};

const tabItems = [
  {
    name: 'QA_with_RAG',
    label: 'Q&A with RAG',
    text: 'In cases where customers ask the LLM a question and the LLM provides a response based on generally available knowledge, Open AI’s GPT-4 performed the best but was only marginally better than GPT3.5. Of the open-source options, the largest Llama 2 (70b) from Meta performed the best. We saw poor performance from smallest Llama 2 (7b) and MPT 7b model.',
    image: '',
  },
  {
    name: 'QA_without_RAG',
    label: 'Q&A without RAG',
    text: "In cases where customers ask the LLM a question and the LLM provides a response based on domain-specific data, OpenAI's GPT-3.5 Turbo performed equally well as its much costlier counterpart, GPT-4. What we found surprising is Zephyr from Huggingface performed the best in open-source models, outranking 10x larger Llama 2 model! On the other side we saw worse performance from Falcon (40b) and MPT models.",
    image: '',
  },
  {
    name: 'Long_form_text_generation',
    label: 'Long-Form Text Generation',
    text: "For generating extensive text, Meta's open-source Llama 2 performs on par with OpenAI's GPT-3.5 Turbo and GPT-4, offering a cost-efficient solution for this application. We saw a low performance from Mistral 7b and MPT 7b models.",
    image: '',
  },
];

const PerformanceTabs = ({ performanceData, taskInsights }) => {
  const [selectedIndex, setSelectedIndex] = useState(0);
  const [currentData, setCurrentData] = useState('QA_without_RAG');

  const insights = taskInsights.edges.filter(
    (item) => item.node.Task === currentData.replace(/_/g, ' '),
  );

  performanceData.sort(
    (a, b) => parseFloat(b[currentData]) - parseFloat(a[currentData]),
  );

  const [barChartData, setBarChartData] = useState(performanceData);

  const chartOptions = {
    // maintainAspectRatio: false
    indexAxis: 'y',
    background: 'red',

    responsive: true,
    borderRadius: 3,
    elements: {
      bar: {
        borderWidth: 2,
      },
    },
    tooltips: {
      enabled: false,
    },
    scales: {
      x: {
        ticks: {
          callback: function (value, index, values) {
            if ((value * 10) % 2 == 0 && index < values.length - 2)
              return value;
            else return null;
          },
        },
        scaleLabel: {
          display: true,
        },
        max: 0.9,
        border: {
          display: false,
        },
        grid: {
          display: false,
          offset: true,
        },
      },
      y: {
        scaleLabel: {
          display: true,
        },
        grid: {
          display: false,
        },
      },
    },
    plugins: {
      datalabels: {
        display: true,
        color: 'black',
        align: 'end',
        anchor: 'end',
        formatter: (value, ctx) => {
          console.log(ctx);
          return value;
        },
      },
      legend: {
        display: false,
      },
      title: {
        display: true,
        align: 'center',
        text: `Model Performance ${tabItems[selectedIndex].label}`,
        font: {
          size: 16,
        },
      },
    },
    animation: {
      duration: 0,
    },
    transitions: {
      from: 1,
    },
  };

  // Extract the array of model names and make them lowercase
  const modelNames = performanceData.map((item) => item.Model.toLowerCase());

  // Extract the result arrays
  const longFormTextGenerationResults = performanceData.map((item) =>
    parseFloat(item.Long_form_text_generation),
  );
  const qaWithRagResults = performanceData.map((item) =>
    parseFloat(item.QA_with_RAG),
  );
  const qaWithoutRagResults = performanceData.map((item) =>
    parseFloat(item.QA_without_RAG),
  );

  const chartBackgroundColors = [
    'rgba(0,0,0,1)',
    'rgba(0,0,0,1)',
    '#FFF5F0',
    '#FEE0D2',
    '#FCBBA1',
    '#FC9272',
    '#FC6A4A',
    '#EF3B2C',
    '#CB181D',
    '#A50E15',
    '#67000D',
  ];

  const hiBgs = ['#5C5CFF'];

  const chartDataSets = [
    {
      label: 'Long form text generation',
      name: 'Long_form_text_generation',
      backgroundColor: hiBgs,
      borderColor: hiBgs,
      borderWidth: 1,
      // hoverBackgroundColor: 'rgba(75,192,192,0.4)',
      // hoverBorderColor: 'rgba(75,192,192,1)',
      data: longFormTextGenerationResults,
    },
    {
      label: 'QA with RAG',
      name: 'QA_with_RAG',
      backgroundColor: hiBgs,
      borderColor: hiBgs,
      borderWidth: 1,
      // hoverBackgroundColor: 'rgba(255,99,132,0.4)',
      // hoverBorderColor: 'rgba(255,99,132,1)',
      data: qaWithRagResults,
    },
    {
      label: 'QA without RAG',
      name: 'QA_without_RAG',
      backgroundColor: hiBgs,
      borderColor: hiBgs,
      borderWidth: 1,
      // hoverBackgroundColor: 'rgba(255,159,64,0.4)',
      // hoverBorderColor: 'rgba(255,159,64,1)',
      data: qaWithoutRagResults,
    },
  ];

  // Prepare the dataset for the chart
  const chartData = {
    labels: modelNames,
    datasets: chartDataSets.filter((item) => item.name === currentData),
  };

  useEffect(() => {
    setCurrentData(tabItems[selectedIndex].name);
    const sortedData = performanceData.sort(
      (a, b) => parseFloat(b[currentData]) - parseFloat(a[currentData]),
    );

    setBarChartData(sortedData);
  }, [selectedIndex]);

  return (
    <div className="relative">
      {/* {JSON.stringify(performanceData, null, 2)} */}

      <Tab.Group selectedIndex={selectedIndex} onChange={setSelectedIndex}>
        <Tab.List className={'sticky top-14 z-20 bg-white lg:top-20'}>
          <div className="hide-scroll-bar mx-auto flex h-full max-w-[1280px] space-x-4 overflow-scroll px-4 lg:space-x-6">
            {tabItems.map((item, index) => (
              <Tab
                key={index}
                onClick={() => {
                  setSelectedIndex(index);
                }}
                className={`relative border-b-4 py-2 hover:border-hi-100 hover:opacity-100 lg:py-2.5 ${
                  selectedIndex === index
                    ? 'border-hi-100 text-hi-100'
                    : 'border-transparent opacity-80'
                }`}
              >
                <div className="whitespace-nowrap text-sm lg:text-base">
                  {item.label}
                </div>
              </Tab>
            ))}
          </div>
        </Tab.List>
        <div className="bg-hi-2">
          <div className="mx-auto h-full max-w-[1280px] px-4">
            <div className="xl:py-15 py-10 text-lg md:py-12 lg:py-14 lg:text-xl xl:pr-64">
              <p className="leading-loose">{taskType[currentData]}</p>
            </div>

            <div className="border border-hi-3 bg-white p-4 md:p-8 lg:p-10 xl:p-12">
              {/* <div className=''>
                  <Bar data={chartData} options={chartOptions} />
                </div> */}
              {/* // CODED */}

              <div className="pl divide-y-[1px]">
                <div className="flex items-center justify-start py-2">
                  <div className="flex w-[72px] shrink justify-start text-xs opacity-60">
                    Developer
                  </div>
                  <div className="text- w-[160px] text-sm text-xs opacity-60">
                    Model
                  </div>
                  {/* <div className='w-1/3 text-sm'>{item[currentData]}</div> */}
                  <div className="w-auto grow text-right text-sm text-xs opacity-60">
                    {tableLegend[currentData]}
                  </div>
                </div>

                {barChartData.map((item, index) => {
                  return (
                    <div className="flex flex-wrap items-center justify-start py-2">
                      <div className="flex w-[72px] shrink justify-start">
                        <img
                          src={`/creators/${findLogoByName(item.Model)}`}
                          width={24}
                          height={24}
                        />
                      </div>
                      <div className="text- w-[160px] text-sm">
                        {item.Model.toLowerCase()}
                      </div>
                      {/* <div className='w-1/3 text-sm'>{item[currentData]}</div> */}
                      <div className="mt-2 w-full grow text-sm lg:mt-0 lg:w-auto">
                        <div className="flex items-center">
                          <div className="w-full">
                            <div className="relative h-6 w-full">
                              <div
                                className="relative h-6 bg-hi-60 transition-all delay-100 duration-500 ease-out"
                                style={{
                                  width: `${
                                    parseFloat(item[currentData]) * 100
                                  }%`,
                                  opacity: parseFloat(item[currentData]),
                                }}
                              ></div>
                              <div className="absolute right-2 top-1 w-[20px] font-normal">
                                {parseFloat(item[currentData]).toFixed(2)}
                              </div>
                            </div>
                          </div>
                        </div>
                      </div>
                    </div>
                  );
                })}
              </div>
            </div>

            <div className="-mt-px border border-hi-3 bg-white p-4 md:p-8 lg:p-10 xl:p-12">
              <p className="text-[22px] md:text-[24px] lg:text-[28px]">
                🪄 Insights
              </p>

              <div className="mt-5">
                <div
                  className="space-y-4"
                  dangerouslySetInnerHTML={{
                    __html: chartInsights[currentData],
                  }}
                />
              </div>
            </div>

            <div className="-mt-px grid grid-cols-1 gap-8 border border-hi-3 p-4 md:p-8 lg:grid-cols-2 lg:p-10 xl:p-12">
              <div>
                <p className="text-[22px] md:text-[24px] lg:text-[28px]">
                  Metric Used
                </p>
                <div className="my-6 border-t border-hi-3 lg:my-8 xl:my-9" />
                <div
                  className="space-y-4"
                  dangerouslySetInnerHTML={{
                    __html: chartMetrics[currentData],
                  }}
                />
              </div>
              <div>
                <p className="text-[22px] md:text-[24px] lg:text-[28px]">
                  Datasets Used
                </p>
                <div className="my-6 border-t border-hi-3 lg:my-8 xl:my-9" />
                <div
                  className="space-y-4"
                  dangerouslySetInnerHTML={{
                    __html: chartUsedDataSets[currentData],
                  }}
                />
              </div>
            </div>

            <div className="py-20">
              <h3 className="font-serif text-[20px] font-normal md:text-[24px] lg:text-[28px] xl:text-[32px]">
                🤦🏽‍♀️ Hallucination Blooper Reel
              </h3>

              {blooperReels[currentData]}
            </div>

            {/* <div className="flex flex-wrap -mx-4">
              <div className="px-4 w-1/3">
                <Tab.Panels>
                  {tabItems.map((item, index) => (
                    <Tab.Panel key={index}>
                      <p className="text-[22px] md:text-[24px] lg:text-[28px] font-serif">
                        {item.label}
                      </p>
                      <hr className="my-3" />
                      <p className="text-sm opacity-70">{item.text}</p>
                    </Tab.Panel>
                  ))}
                </Tab.Panels>
              </div>
            </div> */}
          </div>
        </div>
      </Tab.Group>
    </div>
  );
};

export { PerformanceTabs };
