Main Page | Namespace List | Compound List | File List | Namespace Members | Compound Members | File Members

UnigramTextClassifier.h

Go to the documentation of this file.
00001 #ifndef _UnigramTextClassifier_H_
00002 #define _UnigramTextClassifier_H_
00003 #include <map>
00004 #include <iostream>
00005 #include <fstream>
00006 
00007 namespace std 
00008 {
00012   typedef map<unsigned char,unsigned long> frequency_map;
00013 
00024   class UnigramTextClassifier 
00025   {
00026   public:
00027    
00032     UnigramTextClassifier();
00033 
00038     UnigramTextClassifier(const string classification);
00039 
00043     frequency_map freqs() { return _freqs; }
00047     unsigned long corpus_total() { return _corpus_total; }
00051     unsigned long total() { return _total; }
00055     string classification() { return _classification; }
00059     void setClassification(string& classification) {_classification = classification;}
00060 
00066     void UnigramTextClassifier::learn(istream& in);
00067 
00073     void UnigramTextClassifier::learn(char* in);
00074 
00079     void UnigramTextClassifier::dump(ostream& out);
00080  
00085     void UnigramTextClassifier::dump(char* out);
00086 
00092     void UnigramTextClassifier::read(istream& in);
00093 
00099     void UnigramTextClassifier::read(char* in);
00100  
00105     float UnigramTextClassifier::score(istream& in); 
00106 //    {return ( (total()*8)-(bits_required(in)) ); }
00112     float UnigramTextClassifier::score(char* in) ;
00113 //    {return ( (total()*8)-(bits_required(in)) ); }
00114 
00119     float UnigramTextClassifier::bits_required(unsigned char ch);
00120 
00125     float UnigramTextClassifier::bits_required(istream& in);
00126    
00132     float UnigramTextClassifier::bits_required(char* in);
00133   private:
00135     frequency_map _freqs;
00137     unsigned long _corpus_total;
00139     unsigned long _total;
00141     string _classification;
00143     float UnigramTextClassifier::lg (float n);
00145     float UnigramTextClassifier::info_value(float n);
00147     string UnigramTextClassifier::ctime_string();
00148   };
00149 
00150 }
00151 using namespace std;
00152 
00153 #endif /* _UnigramTextClassifier_H_ */

Generated on Fri Aug 8 15:44:40 2003 for UnigramTextClassifier by doxygen 1.3.3