import { MatrixTypes2 } from '../../../../types/data';
import { PaperFieldLevel } from '../../../../types/data.global';
import _ from 'lodash';
import * as dataForge from 'data-forge';


const getPaperFieldPatentCitationDiversity = (flowData: MatrixTypes2.FlowUnit[]) => {
  // console.log('getPaperFieldPatentCitationDiversity: ', flowData)

  const shannonEntropy = (probList: number[]) =>
  probList.map(p => -p * Math.log2(p)).reduce((x, y) => x + y, 0);

  // 0. transform json data to dataframe
  const flowDataframe: dataForge.DataFrame<number, MatrixTypes2.FlowUnit> = new dataForge.DataFrame(flowData)

  // 1. paper diversity
  const paperFieldGroupList = flowDataframe
    .groupBy(item => item.colName)
    .select(paperFieldDF => {
      const patentGroupList = paperFieldDF
        .groupBy(item => item.cpcSubsection)
        .select(patentGroup => {
          return {
            cpcSubsection: patentGroup.first().cpcSubsection,
            numPatent: patentGroup.deflate(row => row.numPatent).sum(),
          }
        }).inflate().toArray()
      const numPatentSum: number = patentGroupList.map((item) => item.numPatent).reduce((numPatent, a) => numPatent + a, 0)
      return {
        colName: paperFieldDF.first().colName,
        colIdx: paperFieldDF.first().colIdx,
        diversityScore: shannonEntropy(patentGroupList.map(row => row.numPatent / numPatentSum))
      }
    }).inflate().toArray()

    // 2. patent diversity
    const patentCPCSubsectionGroupList = flowDataframe
      .groupBy(item => item.cpcSubsection)
      .select(cpcSubsectionDF => {
        const paperFieldGroupList = cpcSubsectionDF
          .groupBy(item => item.colName)
          .select(paperFieldGroup => {
            return {
              colName: paperFieldGroup.first().colName,
              colIdx: paperFieldGroup.first().colIdx,
              numPaper: paperFieldGroup.count()
            }
          }).inflate().toArray()
        const numPaperSum: number = paperFieldGroupList.map((item) => item.numPaper).reduce((numPaper, a) => numPaper + a, 0)
        return {
          cpcSubsection: cpcSubsectionDF.first().cpcSubsection,
          diversityScore: shannonEntropy(paperFieldGroupList.map(row => row.numPaper / numPaperSum))
        }
      }).inflate().toArray()


  return({
    paperField: paperFieldGroupList,
    patentCPCSubsection: patentCPCSubsectionGroupList,
  })
}

export default getPaperFieldPatentCitationDiversity;