using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using PanGu.Dict; using PanGu.Framework; using PanGu.Match; using PanGu.Setting; using PanGu.HighLight; using Lucene.Net.Analysis; using System.IO; using PanGu; using System.Configuration;
namespace Pangu.Helper
{ public class PanguHelper {
private volatile static PanguHelper _instance = null; private static readonly object lockHelper = new object(); private PanguHelper() { }
/// <summary> [Default uses pangu.xml profile under the same path] /// </summary> /// <returns></returns> public static PanguHelper ShareHelper { get { if (_instance == null) { lock (lockHelper) { if (_instance == null) { _instance = new PanguHelper(); string configFile = ConfigurationManager.AppSettings["PANGU_CONFIG"] + ""; if (configFile != "") { _instance. Use(configFile); } } } }
return _instance; } }
/// <summary> Initialize the specified configuration file /// </summary> <param name="configFile"> pangu configuration file</param> public void Use(string configFile) { PanGu.Segment.Init(configFile); }
/// <summary> Get a part-of-speech description /// </summary> <param name="pos"> part of speech enumeration</param> /// <returns></returns> public string GetPosDescrip{filter}t(POS pos) { switch (pos) { case POS. POS_UNK: return "unknown part of speech;" case POS. POS_D_K: return "trailing component"; case POS. POS_D_H: return "preceding component"; case POS. POS_A_NZ: return "other proper name"; case POS. POS_A_NX: return "foreign character"; case POS. POS_A_NR: return "person's name"; case POS. POS_D_Z: return "status word"; case POS. POS_A_NT: return "body of institutions"; case POS. POS_A_NS: return "place name"; case POS. POS_D_Y: return "mood word mood morpheme"; case POS. POS_D_X: return "non-morphemic word"; case POS. POS_D_W: return "punctuation"; case POS. POS_D_T: return "word of time"; case POS. POS_D_S: return "place word"; case POS. POS_D_V: return "verb morpheme"; case POS. POS_D_U: return "particle particle morpheme"; case POS. POS_D_R: return "pronoun pronoun morpheme"; case POS. POS_A_Q: return "quantifier quantifier morpheme"; case POS. POS_D_P: return "preposition"; case POS. POS_D_MQ: return "quantifier"; case POS. POS_A_M: return "numeral, numeral, numeral"; case POS. POS_D_O: return "onomatopoeia"; case POS. POS_D_N: return "noun morpheme"; case POS. POS_D_F: return "azimuth word azimuth morpheme"; case POS. POS_D_E: return "interjection morpheme"; case POS. POS_D_L: return "idiom"; case POS. POS_D_I: return "idiom"; case POS. POS_D_D: return "adverb adverb morpheme"; case POS. POS_D_C: return "conjunctive conjunction morpheme"; case POS. POS_D_B: return "to distinguish words to distinguish morphemes"; case POS. POS_D_A: return "adjective morpheme"; } return "unknown part of speech"; }
/// <summary> Get the participle [Original Wordinfo] /// </summary> <param name="content"> the content of the word to be separated</param> <param name="matchOptions"> use the matching options</param> <param name="matchParameter"> use the match parameter</param> /// <returns></returns> public ICollection<WordInfo> GetSegmentWords(string content, MatchOptions matchOptions = null, MatchParameter matchParameter = null) { Segment segment = new Segment(); ICollection<WordInfo> words = segment. DoSegment(content, matchOptions, matchParameter); return words; }
/// <summary> Get participles /// </summary> <param name="content"> the content of the word to be separated</param> <param name="matchOptions"> Match options are used, not by default</param> <param name="matchParameter" > use the matching parameter, not by default</param> public List<string> GetSplitWords(string content, MatchOptions matchOptions = null, MatchParameter matchParameter = null) { Segment segment = new Segment(); ICollection<WordInfo> words = segment. DoSegment(content,matchOptions,matchParameter); List<string> list = new List<string>(); foreach (WordInfo word in words) { list. Add(word. Word); } return list; }
/// <summary> Obtain the participle of the specified word frequency /// </summary> <param name="content"> the content of the word to be separated</param> <param name="frequency"> word frequency</param> <param name="matchOptions"> Match options are used, not by default</param> <param name="matchParameter" > use the matching parameter, not by default</param> /// <returns></returns> public List<string> GetSplitWordsByFrequency(string content, double frequency, MatchOptions matchOptions = null, MatchParameter matchParameter = null) { Segment segment = new Segment(); ICollection<WordInfo> words = segment. DoSegment(content, matchOptions, matchParameter);
List<string> list = new List<string>(); foreach (WordInfo word in words) { if (frequency == word. Frequency) { list. Add(word. Word); } } return list; }
/// <summary> Get a participle with a specified weight /// </summary> <param name="content"> the content of the word to be separated</param> <param name="rank"> weight</param> <param name="matchOptions"> Match options are used, not by default</param> <param name="matchParameter" > use the matching parameter, not by default</param> /// <returns></returns> public List<string> GetSplitWordsByRank(string content, int rank, MatchOptions matchOptions = null, MatchParameter matchParameter = null) { Segment segment = new Segment(); ICollection<WordInfo> words = segment. DoSegment(content, matchOptions, matchParameter);
List<string> list = new List<string>(); foreach (WordInfo word in words) { if (rank == word. Rank) { list. Add(word. Word); } } return list; }
/// <summary> Highlight /// </summary> <param name="keyword"> the string to highlight</param> /// <param name="content">content</param> <param name="fragmentSize"> the number of characters for each summary field</param> <returns>Highlighted content</returns> public static string HighLight(string keyword, string content, int fragmentSize) { Create an HTML code with the parameter highlighting the search term SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); Create highlighting, enter HTML code and Pangu object Semgent Highlighter highter = new Highlighter(simpleHTMLFormatter, new Segment()); Set the number of characters for each summary field highter. FragmentSize = fragmentSize; string highlightStr = highter. GetBestFragment(keyword, content);
if (string. IsNullOrEmpty(highlightStr)) { return content; } return highlightStr; }
}
}
|