import { TranscriptUpdate } from "party/types"; // Adjust this import path as needed

export interface TranscriptSentence {
  id: string;
  speaker_id: number;
  speaker: string;
  sentence: string;
  start_time: number;
  end_time: number;
  item_ids: string[];
}

export function processTranscriptItems(
  items: TranscriptUpdate[]
): TranscriptSentence[] {
  const sortedItems = items
    .filter((item) => item.words && item.words.length > 0)
    .sort((a, b) => a.words[0].start_time - b.words[0].start_time);

  console.log("Sorted items:", sortedItems);

  const groupedBySpeaker = sortedItems.reduce(
    (acc, item) => {
      if (!acc[item.speaker_id]) {
        acc[item.speaker_id] = [];
      }
      acc[item.speaker_id].push(item);
      return acc;
    },
    {} as Record<number, TranscriptUpdate[]>
  );

  console.log("Grouped by speaker:", groupedBySpeaker);

  // Regex for splitting sentences
  const sentenceRegex = /[^.!?]+[.!?]+/g;

  const result = Object.entries(groupedBySpeaker).flatMap(
    ([speaker_id, speakerItems]) => {
      let currentSentence = "";
      let currentWords: TranscriptUpdate["words"] = [];
      let currentItemIds: string[] = []; // New array to track item IDs
      const sentences: TranscriptSentence[] = [];

      speakerItems.forEach((item) => {
        item.words.forEach((word) => {
          currentSentence += word.text + " ";
          currentWords.push(word);
          if (!currentItemIds.includes(item.id)) {
            currentItemIds.push(item.id); // Add item ID if not already included
          }

          const matches = currentSentence.match(sentenceRegex);
          if (matches) {
            matches.forEach((match) => {
              const sentenceWords = currentWords.filter((w) =>
                match.includes(w.text)
              );
              sentences.push({
                id: `${speaker_id}-${(sentenceWords[0]?.start_time ?? 0).toFixed(1)}-${
                  sentenceWords[sentenceWords.length - 1]?.end_time?.toFixed(
                    1
                  ) ?? 0
                }`,
                speaker_id: parseInt(speaker_id),
                speaker: item.speaker,
                sentence: match.trim(),
                start_time: sentenceWords[0]?.start_time ?? 0,
                end_time:
                  sentenceWords[sentenceWords.length - 1]?.end_time ?? 0,
                item_ids: [...currentItemIds], // Include the tracked item IDs
              });
            });

            // Remove processed sentence and words
            currentSentence = currentSentence.replace(matches.join(""), "");
            currentWords = currentWords.filter(
              (w) => !matches.some((m) => m.includes(w.text))
            );
            currentItemIds = []; // Reset the item IDs for the next sentence
          }
        });
      });

      // Handle any remaining text as a sentence
      if (currentSentence.trim()) {
        sentences.push({
          id: `${speaker_id}-${currentWords[0].start_time.toFixed(1)}-${currentWords[
            currentWords.length - 1
          ].end_time.toFixed(1)}`,
          speaker_id: parseInt(speaker_id),
          speaker: speakerItems[0].speaker,
          sentence: currentSentence.trim(),
          start_time: currentWords[0].start_time,
          end_time: currentWords[currentWords.length - 1].end_time,
          item_ids: currentItemIds,
        });
      }
      console.log(`Processed sentences for speaker ${speaker_id}:`, sentences);

      return sentences;
    }
  );
  // Sort the combined sentences by end_time
  const sortedResult = result.sort((a, b) => a.end_time - b.end_time);

  console.log("Final result:", sortedResult);
  return sortedResult;
}
