利用PostGreSQL数据库进行文本输入和检索

发表时间: 2022-12-13 08:58

中文分词

ChineseParse.cs

using System;using System.Collections;using System.IO;using System.Text.RegularExpressions;namespace FullTextSearch.Common{    /// <summary>    ///     中文分词器。    /// </summary>    public class ChineseParse    {        private static readonly ChineseWordsHashCountSet _countTable;        static ChineseParse()        {            _countTable = new ChineseWordsHashCountSet();            InitFromFile("ChineseDictionary.txt");        }        /// <summary>        ///     从指定的文件中初始化中文词语字典和字符串次数字典。        /// </summary>        /// <param name="fileName">文件名</param>        private static void InitFromFile(string fileName)        {            string path = Path.Combine(Directory.GetCurrentDirectory(), @"..\..\Common\", fileName);            if (File.Exists(path))            {                using (StreamReader sr = File.OpenText(path))                {                    string s = "";                    while ((s = sr.ReadLine()) != null)                    {                        ChineseWordUnit _tempUnit = InitUnit(s);                        _countTable.InsertWord(_tempUnit.Word);                    }                }            }        }        /// <summary>        ///     将一个字符串解析为ChineseWordUnit。        /// </summary>        /// <param name="s">字符串</param>        /// <returns>解析得到的ChineseWordUnit</returns>        /// 4        /// 0        private static ChineseWordUnit InitUnit(string s)        {            var reg = new Regex(@"\s+");            string[] temp = reg.Split(s);            //if (temp.Length != 2)            //{            //    throw new Exception("字符串解析错误:" + s);            //}            if (temp.Length != 1)            {                throw new Exception("字符串解析错误:" + s);            }            return new ChineseWordUnit(temp[0], Int32.Parse("1"));        }        /// <summary>        ///     分析输入的字符串,将其切割成一个个的词语。        /// </summary>        /// <param name="s">待切割的字符串</param>        /// <returns>所切割得到的中文词语数组</returns>        public static string[] ParseChinese(string s)        {            int _length = s.Length;            string _temp = String.Empty;            var _words = new ArrayList();            for (int i = 0; i < s.Length;)            {                _temp = s.Substring(i, 1);                if (_countTable.GetCount(_temp) > 1)                {                    int j = 2;                    for (; i + j < s.Length + 1 && _countTable.GetCount(s.Substring(i, j)) > 0; j++)                    {                    }                    _temp = s.Substring(i, j - 1);                    i = i + j - 2;                }                i++;                _words.Add(_temp);            }            var _tempStringArray = new string[_words.Count];            _words.CopyTo(_tempStringArray);            return _tempStringArray;        }    }}

ChineseWordsHashCountSet.cs

using System.Collections;namespace FullTextSearch.Common{    /// <summary>    ///     记录字符串出现在中文字典所录中文词语的前端的次数的字典类。如字符串"中"出现在"中国"的前端,则在字典中记录一个次数。    /// </summary>    public class ChineseWordsHashCountSet    {        /// <summary>        ///     记录字符串在中文词语中出现次数的Hashtable。键为特定的字符串,值为该字符串在中文词语中出现的次数。        /// </summary>        private readonly Hashtable _rootTable;        /// <summary>        ///     类型初始化。        /// </summary>        public ChineseWordsHashCountSet()        {            _rootTable = new Hashtable();        }        /// <summary>        ///     查询指定字符串出现在中文字典所录中文词语的前端的次数。        /// </summary>        /// <param name="s">指定字符串</param>        /// <returns>字符串出现在中文字典所录中文词语的前端的次数。若为-1,表示不出现。</returns>        public int GetCount(string s)        {            if (!_rootTable.ContainsKey(s.Length))            {                return -1;            }            var _tempTable = (Hashtable) _rootTable[s.Length];            if (!_tempTable.ContainsKey(s))            {                return -1;            }            return (int) _tempTable[s];        }        /// <summary>        ///     向次数字典中插入一个词语。解析该词语,插入次数字典。        /// </summary>        /// <param name="s">所处理的字符串。</param>        public void InsertWord(string s)        {            for (int i = 0; i < s.Length; i++)            {                string _s = s.Substring(0, i + 1);                InsertSubString(_s);            }        }        /// <summary>        ///     向次数字典中插入一个字符串的次数记录。        /// </summary>        /// <param name="s">所插入的字符串。</param>        private void InsertSubString(string s)        {            if (!_rootTable.ContainsKey(s.Length) && s.Length > 0)            {                var _newHashtable = new Hashtable();                _rootTable.Add(s.Length, _newHashtable);            }            var _tempTable = (Hashtable) _rootTable[s.Length];            if (!_tempTable.ContainsKey(s))            {                _tempTable.Add(s, 1);            }            else            {                _tempTable[s] = (int) _tempTable[s] + 1;            }        }    }}

ChineseWordUnit.cs

namespace FullTextSearch.Common{    public struct ChineseWordUnit    {        private readonly int _power;        private readonly string _word;        /// <summary>        ///     结构初始化。        /// </summary>        /// <param name="word">中文词语</param>        /// <param name="power">该词语的权重</param>        public ChineseWordUnit(string word, int power)        {            _word = word;            _power = power;        }        /// <summary>        ///     中文词语单元所对应的中文词。        /// </summary>        public string Word        {            get { return _word; }        }        /// <summary>        ///     该中文词语的权重。        /// </summary>        public int Power        {            get { return _power; }        }    }}

#ChineseDictionary.txt


主窗体界面

MainManager.cs

using System;using System.Collections.Generic;using System.Data;using System.Drawing;using System.Windows.Forms;using FullTextSearch.Common;using Npgsql;namespace FullTextSearch{    public partial class MainManager : Form    {        private readonly PostgreSQL pg = new PostgreSQL();        private readonly SQLquerys sqlQuerys = new SQLquerys();        private char analysisType;        private string createConnString = "";        private DataSet dataSet = new DataSet();        private DataTable dataTable = new DataTable();        private char odabirAndOr;        private char vrstaPretrazivanja;        public MainManager()        {            InitializeComponent();            rbtn_AND.Checked = true;            rbtnNeizmjenjeni.Checked = true;            odabirAndOr = '*';            radioButton_Day.Checked = true;            radioButton_Day.Checked = true;        }        private void Form1_Load(object sender, EventArgs e)        {            gb_unosPodataka.Enabled = false;            groupBox_Search.Enabled = false;            groupBox_Analysis.Enabled = false;            button_Disconnect.Enabled = false;            button_Pretrazi.BackColor = Color.WhiteSmoke;            button_Disconnect.BackColor = Color.WhiteSmoke;            button_unosTekstaUBazu.BackColor = Color.WhiteSmoke;            button1.BackColor = Color.WhiteSmoke;        }        private void button_unosTekstaUBazu_Click(object sender, EventArgs e)        {            string searchTextBoxString = rTB_unosTextaUBazu.Text;            if (searchTextBoxString != "")            {                pg.insertIntoTable(searchTextBoxString, pg.conn);                MessageBox.Show(searchTextBoxString + " 添加到数据库!");                rTB_unosTextaUBazu.Clear();            }            else            {                MessageBox.Show("不允许空数据!");            }        }        private void button_Pretrazi_Click(object sender, EventArgs e)        {            string stringToSearch;            string sql;            string highlitedText;            string rank;            string check;            stringToSearch = txt_Search.Text.Trim();            var list = new List<string>(ChineseParse.ParseChinese(stringToSearch));            ;            sql = sqlQuerys.createSqlString(list, odabirAndOr, vrstaPretrazivanja);            richTextBox1.Text = sql;            check = sqlQuerys.testIfEmpty(stringToSearch);            pg.insertIntoAnalysisTable(stringToSearch, pg.conn);            pg.openConnection();            var command = new NpgsqlCommand(sql, pg.conn);            NpgsqlDataReader reader = command.ExecuteReader();            int count = 0;            linkLabel_Rezultat.Text = " ";            while (reader.Read())            {                highlitedText = reader[1].ToString();                rank = reader[3].ToString();                linkLabel_Rezultat.Text += highlitedText + "[" + rank + "]\n";                count++;            }            labelBrojac.Text = "找到的文件数量: " + count;            pg.closeConnection();        }        private void rbtn_AND_CheckedChanged(object sender, EventArgs e)        {            odabirAndOr = '*';        }        private void rbtn_OR_CheckedChanged(object sender, EventArgs e)        {            odabirAndOr = '+';        }        private void rbtnNeizmjenjeni_CheckedChanged(object sender, EventArgs e)        {            vrstaPretrazivanja = 'A';        }        private void rbtn_Rijecnici_CheckedChanged(object sender, EventArgs e)        {            vrstaPretrazivanja = 'B';        }        private void rbtn_Fuzzy_CheckedChanged(object sender, EventArgs e)        {            vrstaPretrazivanja = 'C';        }        private void button_Connect_Click(object sender, EventArgs e)        {            if (connectMe())            {                gb_unosPodataka.Enabled = true;                groupBox_Search.Enabled = true;                groupBox_Analysis.Enabled = true;                textBox_Database.Enabled = false;                textBox_IP.Enabled = false;                textBox_Port.Enabled = false;                textBox_Password.Enabled = false;                textBox_UserID.Enabled = false;                button_Connect.Enabled = false;                button_Disconnect.Enabled = true;                button_Pretrazi.BackColor = Color.SkyBlue;                button_Disconnect.BackColor = Color.IndianRed;                button_unosTekstaUBazu.BackColor = Color.MediumSeaGreen;                button1.BackColor = Color.MediumSeaGreen;                button_Connect.BackColor = Color.WhiteSmoke;            }        }        private void button_Disconnect_Click(object sender, EventArgs e)        {            gb_unosPodataka.Enabled = false;            groupBox_Search.Enabled = false;            groupBox_Analysis.Enabled = false;            textBox_Database.Enabled = true;            textBox_IP.Enabled = true;            textBox_Port.Enabled = true;            textBox_Password.Enabled = true;            textBox_UserID.Enabled = true;            button_Connect.Enabled = true;            button_Disconnect.Enabled = false;            button_Pretrazi.BackColor = Color.WhiteSmoke;            button_Disconnect.BackColor = Color.WhiteSmoke;            button_unosTekstaUBazu.BackColor = Color.WhiteSmoke;            button1.BackColor = Color.WhiteSmoke;            button_Connect.BackColor = Color.MediumSeaGreen;            txt_Search.Text = "";            linkLabel_Rezultat.Text = "";            richTextBox1.Text = "";            labelBrojac.Text = "";        }        private bool connectMe()        {            createConnString += "Server=" + textBox_IP.Text + ";Port=" + textBox_Port.Text + ";User Id=" +                                textBox_UserID.Text + ";Password=" + textBox_Password.Text + ";Database=" +                                textBox_Database.Text + ";";            sqlQuerys.setTheKey(createConnString);            pg.setConnectionString();            pg.setConnection();            if (pg.openConnection())            {                MessageBox.Show("您已成功连接!");                pg.closeConnection();                return true;            }            return false;        }        private void button1_Click(object sender, EventArgs e)        {            string selectedTimestamp;            selectedTimestamp = dateTimePicker_From.Value.ToString("dd-MM-yyyy hh:mm:ss") + " " +                                dateTimePicker_To.Value.ToString("dd-MM-yyyy hh:mm:ss");            var analize = new Analysis(selectedTimestamp, analysisType);            analize.Show();        }        private void radioButton_Day_CheckedChanged(object sender, EventArgs e)        {            analysisType = 'D';        }        private void radioButton_Hour_CheckedChanged(object sender, EventArgs e)        {            analysisType = 'H';        }    }}

SQLquerys.cs代码:

using System.Collections.Generic;namespace FullTextSearch{    internal class SQLquerys    {        private static string giveMeTheKey;        private static int tempInt = 1;        //设置连接字符串        public void setTheKey(string connString)        {            giveMeTheKey = connString;            giveMeTheKey += "";        }        //将连接字符串存储在静态变量中        public string getTheKey()        {            giveMeTheKey += "";            return giveMeTheKey;        }        public void setCounter()        {            tempInt = 1;        }        //根据AND和OR的选择分析字符串进行搜索        public string createFunctionString(List<string> searchList, char selector)        {            string TempString = "";            string[] TempField = null;            int i = 0;            int j = 0;            foreach (string searchStringInList in searchList)            {                if (j != 0)                {                    if (selector == '+')                        TempString = TempString + " | ";                    else if (selector == '*')                        TempString = TempString + " & ";                }                j = 1;                TempField = splitListForInput(searchStringInList);                TempString = TempString + "(";                foreach (string justTempString in TempField)                {                    if (i != 0)                    {                        TempString = TempString + " & ";                    }                    TempString = TempString + justTempString;                    i = 1;                }                TempString = TempString + ")";                i = 0;            }            return TempString;        }        //帮助方法        public List<string> splitInputField(string[] inputField)        {            var unfinishedList = new List<string>();            foreach (string splitString in inputField)            {                unfinishedList.Add(splitString);            }            return unfinishedList;        }        //帮助方法        public string[] splitListForInput(string inputString)        {            string[] parsedList = null;            parsedList = inputString.Split(' ');            return parsedList;        }        //在PostgreSQL中创建ts功能的功能,用于字典搜索        public string createTsFunction(string tsString)        {            string tsHeadline = "";            string tsRank = "";            string tsFunction = "";            tsHeadline = ",\n ts_headline(\"content\", to_tsquery('" + tsString + "')), \"content\"";            tsRank = ",\n ts_rank(to_tsvector(\"content\"), to_tsquery('" + tsString + "')) rank";            tsFunction = tsHeadline + tsRank;            return tsFunction;        }        //创建SQL查询依赖于选择哪种类型的搜索,也取决于AND或OR选择器        public string createSqlString(List<string> searchList, char selector, char vrstaPretrazivanja)        {            string selectString = "";            string myTempString = "";            string TempString = "";            int i = 0;            TempString = createFunctionString(searchList, selector);            TempString = createTsFunction(TempString);            selectString = "SELECT \"id\"" + TempString + "\nFROM \"texttable\" \nWHERE ";            if (vrstaPretrazivanja == 'A')            {                foreach (string myString in searchList)                {                    if (i == 0)                    {                        myTempString = myTempString + "\"content\" LIKE '%" + myString + "%' ";                        i++;                    }                    else                    {                        if (selector == '*')                            myTempString = myTempString + "\nAND \"content\" LIKE '%" + myString + "%' ";                        else if (selector == '+')                            myTempString = myTempString + "\nOR \"content\" LIKE '%" + myString + "%' ";                    }                }            }            else if (vrstaPretrazivanja == 'B')            {                foreach (string myString in searchList)                {                    string temporalString = "";                    string[] testingString = myString.Split(' ');                    for (int k = 0; k < testingString.Length; k++)                    {                        if (k != testingString.Length - 1)                        {                            temporalString += testingString[k] + " & ";                        }                        else                        {                            temporalString += testingString[k];                        }                    }                    if (i == 0)                    {                        myTempString = myTempString + "to_tsvector(\"content\") @@ to_tsquery('english', '" +                                       temporalString + "')";                        i++;                    }                    else                    {                        if (selector == '*')                            myTempString = myTempString + "\nAND to_tsvector(\"content\") @@ to_tsquery('english', '" +                                           temporalString + "')";                        else if (selector == '+')                            myTempString = myTempString + "\nOR to_tsvector(\"content\") @@ to_tsquery('english', '" +                                           temporalString + "')";                    }                }            }            if (vrstaPretrazivanja == 'C')            {                foreach (string myString in searchList)                {                    if (i == 0)                    {                        myTempString = myTempString + "\"content\" % '" + myString + "' ";                        i++;                    }                    else                    {                        if (selector == '*')                            myTempString = myTempString + "\nAND \"content\" % '" + myString + "' ";                        else if (selector == '+')                            myTempString = myTempString + "\nOR \"content\" % '" + myString + "' ";                    }                }            }            selectString = selectString + myTempString + "\nORDER BY rank DESC";            return selectString;        }        public string testIfEmpty(string searchedText)        {            string checkingIfEmpty = "SELECT * FROM \"analysisTable\" WHERE \"searchedtext\" =' " + searchedText + "'";            return checkingIfEmpty;        }        public string queryForAnalysis(char analysisChoice)        {            string myTestsql = "";            if (analysisChoice == 'H')            {                //这个查询是这样写的只是为了测试的目的,它需要改变                myTestsql = "SELECT * FROM crosstab('SELECT CAST((\"searchedtext\") AS text) searchedText,"                            +                            " CAST(EXTRACT(HOUR FROM \"timeOfSearch\") AS int) AS sat, CAST(COUNT(*) AS int) AS broj FROM \"analysisTable\" GROUP BY \"searchedText\", sat"                            +                            " ORDER BY \"searchedtext\", sat', 'SELECT rbrSata FROM sat ORDER BY rbrSata') AS pivotTable (\"searchedText\" TEXT, t0_1 INT, t1_2 INT"                            +                            ", t2_3 INT, t3_4 INT, t4_5 INT, t5_6 INT, t6_7 INT, t7_8 INT, t8_9 INT, t9_10 INT, t10_11 INT, t11_12 INT, t12_13 INT"                            +                            ", t13_14 INT, t14_15 INT, t15_16 INT, t16_17 INT, t17_18 INT, t18_19 INT, t19_20 INT, t20_21 INT, t21_22 INT, t22_23 INT, t23_00 INT) ORDER BY \"searchedText\"";                return myTestsql;            }            if (analysisChoice == 'D')            {                //这个查询是这样写的只是为了测试的目的,它需要改变                myTestsql += "SELECT *FROM crosstab ('SELECT CAST((\"searchedtext\") AS text) AS searchedText, CAST(EXTRACT(DAY FROM \"dateOfSearch\") AS int) AS dan"                             + ", CAST(COUNT(*) AS int) AS broj FROM \"analysisTable\" GROUP BY \"searchedText\", "                             +                             "dan ORDER BY \"searchedtext\", dan', 'SELECT rbrDana FROM dan ORDER BY rbrDana') AS pivotTable(\"searchedtext\" TEXT";                return myTestsql;            }            return myTestsql;        }        //此方法用于解析日期        public int[] parseForDates(string date)        {            string[] temp;            var tempInt = new int[3];            temp = date.Split('-');            for (int i = 0; i < 3; i++)            {                tempInt[i] = int.Parse(temp[i]);            }            return tempInt;        }        //此代码用于创建分析,它执行一些日期/时间操作,以便能够为选定的日期/时间创建分析。        public string createSqlForDayAnalysis(string dateFrom, string dateTo)        {            string insertIntoTempTable = "";            string dateTimeForAnalysis = "";            int[] tempFrom = parseForDates(dateFrom);            int[] tempTo = parseForDates(dateTo);            //月份变更算法            while (tempFrom[0] != tempTo[0] || tempFrom[1] != tempTo[1])            {                if (tempFrom[1] == tempTo[1])                {                    if (tempFrom[0] != tempTo[0])                    {                        for (int i = tempInt + 1; tempFrom[0] + 2 < tempTo[0] + 2; i++)                        {                            insertIntoTempTable += "INSERT INTO \"dan\" VALUES (" + i + ");";                            dateTimeForAnalysis += ",dd" + tempFrom[0] + tempFrom[1] + tempFrom[2] + " INT";                            tempInt = i;                            tempFrom[0]++;                        }                    }                }                if (tempFrom[1] != tempTo[1])                {                    if (tempFrom[1]%2 == 0 || tempFrom[1] == 7 || tempFrom[1] == 1)                    {                        for (int i = tempInt; tempFrom[0] < 31 && tempFrom[1] != tempTo[1]; i++)                        {                            insertIntoTempTable += "INSERT INTO \"dan\" VALUES (" + i + ");";                            dateTimeForAnalysis += ", dd" + tempFrom[0] + tempFrom[1] + tempFrom[2] + " INT";                            tempInt = i;                            tempFrom[0]++;                            if (tempFrom[0] == 31)                            {                                tempFrom[1]++;                                tempFrom[0] = 1;                            }                        }                    }                }            }            dateTimeForAnalysis += ") ORDER BY \"searchedtext\"";            return dateTimeForAnalysis + "#" + insertIntoTempTable;        }    }}

PostgreSQL.cs代码:

using System;using System.Windows.Forms;using Npgsql;using NpgsqlTypes;namespace FullTextSearch{    public class PostgreSQL    {        private static int tempInt = 1;        private readonly SQLquerys sql = new SQLquerys();        public NpgsqlConnection conn;        public string connectionstring;        private string newConnString;        public PostgreSQL()        {            setConnectionString();            setConnection();        }        public void setConnectionString()        {            newConnString = sql.getTheKey();            connectionstring = String.Format(newConnString);            setConnection();        }        public void setConnection()        {            conn = new NpgsqlConnection(connectionstring);        }        public bool openConnection()        {            try            {                conn.Open();                return true;            }            catch            {                MessageBox.Show("Unable to connect! Check parameters!");                return false;            }        }        public void closeConnection()        {            conn.Close();        }        public void insertIntoTable(string textToInsert, NpgsqlConnection nsqlConn)        {            string mySqlString = "INSERT INTO \"texttable\" (\"content\") VALUES (@Param1)";            var myParameter = new NpgsqlParameter("@Param1", NpgsqlDbType.Text);            myParameter.Value = textToInsert;            openConnection();            var myCommand = new NpgsqlCommand(mySqlString, nsqlConn);            myCommand.Parameters.Add(myParameter);            myCommand.ExecuteNonQuery();            closeConnection();        }        public void insertIntoAnalysisTable(string textToInsert, NpgsqlConnection nsqlConn)        {            string dateTime = DateTime.Now.ToString();            string[] temp;            temp = dateTime.Split(' ');            string mySqlString =                "INSERT INTO \"analysistable\" (\"searchedtext\", \"dateofsearch\", \"timeofsearch\") VALUES ('" +                textToInsert + "', '" + temp[0] + "'" + ", '" + temp[1] + "');";            openConnection();            var myCommand = new NpgsqlCommand(mySqlString, nsqlConn);            myCommand.ExecuteNonQuery();            closeConnection();        }        public void executeQuery(string queryText, NpgsqlConnection nsqlConn)        {            openConnection();            var myCommand = new NpgsqlCommand(queryText, nsqlConn);            myCommand.ExecuteNonQuery();            closeConnection();        }        public void createTempTable(NpgsqlConnection nsqlConn, char analysisType, string dateFrom, string dateTo,            string splitMe)        {            if (analysisType == 'H')            {                string dropIfExists = "DROP TABLE IF EXISTS \"sat\";";                string createTempTable = "CREATE TABLE IF NOT EXISTS \"sat\" (rbrSata INT);";                string insertIntoTempTable = "";                for (int i = 0; i < 24; i++)                {                    insertIntoTempTable += "INSERT INTO \"sat\" VALUES (" + i + ");";                }                openConnection();                var commandDrop = new NpgsqlCommand(dropIfExists, nsqlConn);                commandDrop.ExecuteNonQuery();                var commandCreate = new NpgsqlCommand(createTempTable, nsqlConn);                commandCreate.ExecuteNonQuery();                var commandInsert = new NpgsqlCommand(insertIntoTempTable, nsqlConn);                commandInsert.ExecuteNonQuery();                closeConnection();            }            else if (analysisType == 'D')            {                string dropIfExists = "DROP TABLE IF EXISTS \"dan\";";                string createTempTable = "CREATE TABLE IF NOT EXISTS \"dan\" (rbrDana INT);";                string insertIntoTempTable = splitMe;                openConnection();                var commandDrop = new NpgsqlCommand(dropIfExists, nsqlConn);                commandDrop.ExecuteNonQuery();                var commandCreate = new NpgsqlCommand(createTempTable, nsqlConn);                commandCreate.ExecuteNonQuery();                var commandInsert = new NpgsqlCommand(insertIntoTempTable, nsqlConn);                commandInsert.ExecuteNonQuery();                closeConnection();            }        }    }}

PostGreSQL sql脚本:

CREATE TABLE public.analysistable(    id integer NOT NULL DEFAULT nextval('analysistable_id_seq'::regclass),    searchedtext text COLLATE pg_catalog."default" NOT NULL,    dateofsearch date NOT NULL,    timeofsearch time without time zone NOT NULL,    CONSTRAINT analysistable_pkey PRIMARY KEY (id))WITH (    OIDS = FALSE)TABLESPACE pg_default;ALTER TABLE public.analysistable    OWNER to king;
CREATE TABLE public.texttable(    id integer NOT NULL DEFAULT nextval('texttable_id_seq'::regclass),    content text COLLATE pg_catalog."default" NOT NULL,    CONSTRAINT texttable_pkey PRIMARY KEY (id))WITH (    OIDS = FALSE)TABLESPACE pg_default;ALTER TABLE public.texttable    OWNER to king;


运行结果如图: