-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathIncidenceMatrix.java
More file actions
executable file
·101 lines (95 loc) · 3.24 KB
/
IncidenceMatrix.java
File metadata and controls
executable file
·101 lines (95 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import java.io.*;
import java.util.*;
/**
* 612 LBE01 IncidenceMatrix
* Khavya Seshadri
*/
public class IncidenceMatrix {
//attributes
private String[] myDocs; //Documents
private ArrayList<String> termList; //List of terms in the dictionary
private ArrayList<int[]> docLists;
/*
** Construction of Incidence Matrix
*/
public IncidenceMatrixA1(String[] docs) {
myDocs = docs;
termList = new ArrayList<String>();
docLists = new ArrayList<int[]>();
for(int i=0;i<myDocs.length;i++) {
String[] words = myDocs[i].split(" ");
for(String word:words) {
if(!termList.contains(word)) {
termList.add(word);
int[] docList = new int[myDocs.length];
docList[i] = 1;
docLists.add(docList);
}
else {
int index =termList.indexOf(word);
int[] docList = docLists.remove(index);
docList[i] = 1;
docLists.add(index, docList);
}
}
}
}
public ArrayList<Integer> search(String query) {
String[] words = query.split(" ");
ArrayList<int[]> queryList = new ArrayList<int[]>();
for(String word : words){
if(termList.contains(word)){
int index = termList.indexOf(word);
int[] array = new int[myDocs.length];
array = docLists.get(index);
queryList.add(array);
}
}
System.out.println("the list to be processed based on query "+queryList);
//process the queryList
int[] flag = new int[myDocs.length];
ArrayList<Integer> resultDocs = new ArrayList<Integer>();
for(int[] docArray : queryList){
for(int i=0; i<docArray.length; i++){
if(docArray[i]==1){
flag[i]++;
}
}
}
//traverse through flag array
for(int i=0; i<myDocs.length; i++){
if(flag[i]== words.length){
resultDocs.add(i+1);
}
}
return resultDocs;
}
// toString() will be invoked internally whenever the object of the class is printed
// Overriding the toString() method of Object class
public String toString() {
String outputString = new String();
for(int i=0;i<termList.size();i++) {
outputString += String.format("%-15s", termList.get(i));
int[] docList = docLists.get(i);
for(int j=0;j<docList.length;j++) {
outputString += docList[j] + "\t";
}
outputString += "\n";
}
return outputString;
}
public static void main(String[] args) throws IOException{
//input document collection: corpus
String[] docs = {"text data warehousing over big data",
"dimensional data warehousing over big data",
"nlp before text mining",
"nlp before text classification"};
IncidenceMatrixA1 im = new IncidenceMatrixA1(docs);
System.out.println(im);
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
String query = br.readLine();
//Query search
ArrayList<Integer> result = im.search(query);
System.out.println("Search Results found in the following Documents "+ result);
}
}