/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                         Copyright (c) 1996                            */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission to use, copy, modify, distribute this software and its    */
/*  documentation for research, educational and individual use only, is  */
/*  hereby granted without fee, subject to the following conditions:     */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*  This software may not be used for commercial purposes without        */
/*  specific prior written permission from the authors.                  */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                     Author :  Alan W Black                            */
/*                     Date   :  July 1996                               */
/*-----------------------------------------------------------------------*/
/*                                                                       */
/* Simple statistics (for discrete probability distributions             */
/*                                                                       */
/*=======================================================================*/
#ifndef __EST_SIMPLESTATS_H__
#define __EST_SIMPLESTATS_H__

#include "EST_String.h"
#include "EST_StringTrie.h"
#include "EST_TList.h"
#include "EST_KV.h"
#include "EST_types.h"

class EST_Discrete {
    
private:
    // for fast index->name
    EST_StrVector namevector;
    
    // for fast name->index
    EST_StringTrie nametrie;
    int p_def_val;
    
public:
    EST_Discrete() {nametrie.clear(); p_def_val = -1;}
    EST_Discrete(const EST_StrList &vocab);
    //EST_Discrete(const EST_StrVector &vocab); not implemented
    ~EST_Discrete();
    bool init(const EST_StrList &vocab);

    const int size(void) const { return namevector.n(); }
    const int index(const EST_String &n) const { 
	int *i;
	return (((i=(int*)nametrie.lookup(n)) != NULL) ? *i : p_def_val);
    };
    
    const EST_String &name(const int n) const { return namevector(n); }
    
    void def_val(const EST_String &v) { p_def_val = index(v); }
    
    // backwards compatibility
    int name(const EST_String &n) const { return index(n); };
    bool operator == (const EST_Discrete &d);
    bool operator != (const EST_Discrete &d);

    friend ostream& operator <<(ostream& s, const EST_Discrete &d);
    
};

/* Sufficient Statistics */
class EST_SuffStats {
private:
    int n;
    double sum;
    double sumx;
public:
    EST_SuffStats() {n=0; sum = sumx = 0.0;}
    void reset(void) {n=0; sum = sumx = 0.0;}
    int samples(void) {return n;}
    double mean(void) const { return sum / n; }
    double variance(void) const { return ((n*sumx)-(sum*sum))/(n*(n-1)); }
    double stddev(void) const { return sqrt(variance()); }
    
    EST_SuffStats &operator +=(double a) { n++;sum+=a; sumx+=a*a; return *this;}
    EST_SuffStats &operator + (double a) { n++;sum+=a; sumx+=a*a; return *this;}
};

enum EST_tprob_type {tprob_string, tprob_int, tprob_discrete};
class EST_DiscreteProbDistribution {
private:
    EST_tprob_type type;
    double num_samples;		// because frequencies don't have to be integers
    /* For known vocabularies: tprob_discrete */
    const EST_Discrete *discrete;
    int size;
    double *icounts;		// was int, but frequencies don't have to be integers
    /* For unknown vocabularies: tprob_string */
    EST_StrD_KVL scounts;
public:
    EST_DiscreteProbDistribution() {init();}
    EST_DiscreteProbDistribution(const EST_DiscreteProbDistribution &b);
    EST_DiscreteProbDistribution(const EST_TList<EST_String> &vocab){init(); (void)init(vocab);}
    EST_DiscreteProbDistribution(const EST_Discrete *d) {init(); init(d);}
    EST_DiscreteProbDistribution(const EST_Discrete *d,const double n_samples, double *counts);
    
    ~EST_DiscreteProbDistribution() {clear();}
    
    void clear(void);
    bool init(const EST_StrList &vocab);
    void init(const EST_Discrete *d);
    void init();
    double samples(void) const { return num_samples; }
    void cumulate(const EST_String &s,double count=1);
    void cumulate(const int i,double count=1);
    const EST_String &most_probable(double *prob = NULL) const;
    double entropy(void) const;
    double probability(const EST_String &s) const; 
    double probability(const int i) const; 
    double frequency(const EST_String &s) const; 
    double frequency(const int i) const; 
    // For iterating through the members
    int item_start() const;
    int item_next(int idx) const;
    int item_end(int idx) const;
    const EST_String &item_name(int idx) const;
    void item_freq(int idx,EST_String &s,double &freq) const;
    void item_prob(int idx,EST_String &s,double &prob) const;
    
    inline const EST_Discrete *const get_discrete() const { return discrete; };
    
    /* Used when smoothing distributions */
    void set_frequency(const EST_String &s,double c);
    void set_frequency(int i,double c); 
    
    // as for set, but leaving num_samples alone
    void override_frequency(const EST_String &s,double c);
    void override_frequency(int i,double c); 
    
    void set_num_samples(const double c) { num_samples = c;}
    
friend ostream & operator <<(ostream &s, const EST_DiscreteProbDistribution &p);
    EST_DiscreteProbDistribution &operator=(const EST_DiscreteProbDistribution &a);
};    

#endif				// __EST_SIMPLESTATS_H__
