1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Collections;

namespace tfidf
{
class Program
{


static void Main(string[] args)
{
DateTime time_start = DateTime.Now;//計時開始 取得目前時間
//字典檔&文件頻率資訊
Dictionary<string, int> df = new Dictionary<string, int>();
//ArrayList dic = new ArrayList();

string wtemp;
StreamReader sr = new StreamReader(@"..\..\Dictionary225.txt");
while ((wtemp = sr.ReadLine()) != null)
{
df.Add(wtemp, 0);
//dic.Add(wtemp);
}
sr.Close();

//Console.WriteLine("df.length = " + df.Count);
//Console.WriteLine("dic.length = " + dic.Count);
StreamReader sr2 = new StreamReader(@"..\..\corpus4.txt");
while ((wtemp = sr2.ReadLine()) != null)
{
HashSet<string> wordSet = new HashSet<string>();

string[] words = wtemp.Split(' ');
foreach (string word in words)
{
if (!wordSet.Contains(word))
{
wordSet.Add(word);

if (df.ContainsKey(word))
{
df[word]++;
}
}
}

}
sr2.Close();

StreamWriter sw = new StreamWriter(@"..\..\idf4.txt");

foreach (KeyValuePair<string, int> item in df)
{
sw.WriteLine(item.Key + " " + item.Value);
}
sw.Close();

DateTime time_end = DateTime.Now;
string result2 = ((TimeSpan)(time_end - time_start)).TotalMilliseconds.ToString();
StreamWriter sw2 = new StreamWriter(@"..\..\timespend.txt");
sw2.WriteLine(result2);
sw2.Close();
}
}
}