import { CharacterTextSplitterParams, CharacterTextSplitter } from 'langchain/text_splitter';

export const chunk = <T>(arrayData: T[], chunkSize: number) =>
  Array.from({ length: Math.ceil(arrayData.length / chunkSize) }, (v, i) =>
    arrayData.slice(i * chunkSize, i * chunkSize + chunkSize)
  );
export const getMarkdownChunks = async (text: string): Promise<string[]> => {
  const mdChunksByH2 = await splitText(text, 1000, '\n## ');
  const mdChunksByH3 = await getChunks(mdChunksByH2, 1000, '\n### ');
  const mdChunksByH4 = await getChunks(mdChunksByH3, 1000, '\n#### ');
  const mdChunks = await getChunks(mdChunksByH4, 1000, '\n');

  return mdChunks;
};
export const splitText = async (text: string, chunkSize: number, separator: string) => {
  const textSplitterParams: Partial<CharacterTextSplitterParams> = {
    chunkSize: chunkSize,
    separator: separator,
    chunkOverlap: 0,
    keepSeparator: true,
  };
  const textSplitter = new CharacterTextSplitter(textSplitterParams);

  return await textSplitter.splitText(text);
};
export const getChunks = async (chunks: string[], chunkSize: number, separator: string) => {
  const result = [];
  for (const chunk of chunks) {
    const newChunks = await splitText(chunk, chunkSize, separator);
    result.push(...newChunks);
  }

  return result;
};
