1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO; using System.Collections; namespace tfidf { class Program { static void Main(string[] args) { DateTime time_start = DateTime.Now;//計時開始 取得目前時間 //字典檔&文件頻率資訊 Dictionary<string, int> df = new Dictionary<string, int>(); //ArrayList dic = new ArrayList(); string wtemp; StreamReader sr = new StreamReader(@"..\..\Dictionary225.txt"); while ((wtemp = sr.ReadLine()) != null) { df.Add(wtemp, 0); //dic.Add(wtemp); } sr.Close(); //Console.WriteLine("df.length = " + df.Count); //Console.WriteLine("dic.length = " + dic.Count); StreamReader sr2 = new StreamReader(@"..\..\corpus4.txt"); while ((wtemp = sr2.ReadLine()) != null) { HashSet<string> wordSet = new HashSet<string>(); string[] words = wtemp.Split(' '); foreach (string word in words) { if (!wordSet.Contains(word)) { wordSet.Add(word); if (df.ContainsKey(word)) { df[word]++; } } } } sr2.Close(); StreamWriter sw = new StreamWriter(@"..\..\idf4.txt"); foreach (KeyValuePair<string, int> item in df) { sw.WriteLine(item.Key + " " + item.Value); } sw.Close(); DateTime time_end = DateTime.Now; string result2 = ((TimeSpan)(time_end - time_start)).TotalMilliseconds.ToString(); StreamWriter sw2 = new StreamWriter(@"..\..\timespend.txt"); sw2.WriteLine(result2); sw2.Close(); } } } |
Direct link: https://paste.plurk.com/show/656610