dragon.ir.index
Class AbstractIndexReader

java.lang.Object
  |
  +--dragon.ir.index.AbstractIndexReader
All Implemented Interfaces:
IndexReader
Direct Known Subclasses:
BasicIndexReader, OnlineIndexReader, OnlineSentenceIndexReader

public abstract class AbstractIndexReader
extends java.lang.Object
implements IndexReader

AbstractIndexReader implements functions defined in interface IndexReader such as getting termdoc and docterm matrix, getting termkey and dockey, getting termindex and docindex, getting all the indexed terms for a given document, getting all the indexed documents for a given term ...

Copyright: Copyright (c) 2005

Company: IST, Drexel University

Version:
1.0
Author:
Davis Zhou

Field Summary
protected  IRCollection collection
           
protected  CollectionReader collectionReader
           
protected  IRDocIndexList docIndexList
           
protected  SimpleElementList docKeyList
           
protected  IntSparseMatrix docrelationMatrix
           
protected  IntSparseMatrix doctermMatrix
           
protected  boolean initialized
           
protected  IntSparseMatrix relationdocMatrix
           
protected  IRRelationIndexList relationIndexList
           
protected  boolean relationSupported
           
protected  IntSparseMatrix termdocMatrix
           
protected  IRTermIndexList termIndexList
           
protected  SimpleElementList termKeyList
           
 
Constructor Summary
AbstractIndexReader(boolean relationSupported)
           
AbstractIndexReader(boolean relationSupported, CollectionReader collectionReader)
           
 
Method Summary
 void close()
          This method releases all occupied resources.
 IRCollection getCollection()
           
 IRDoc getDoc(int index)
           
 IRDoc getDoc(java.lang.String key)
           
 java.lang.String getDocKey(int index)
           
 IntSparseMatrix getDocRelationMatrix()
           
 IntSparseMatrix getDocTermMatrix()
           
 IRRelation getIRRelation(int index)
           
 IRRelation getIRRelation(int relationIndex, int docIndex)
          If the given document does not contain the given relation, this method returns null.
 IRTerm getIRTerm(int index)
           
 IRTerm getIRTerm(int termIndex, int docIndex)
          If the given document does not contain the given term, this method returns null.
 IRTerm getIRTerm(java.lang.String key)
           
 Article getOriginalDoc(int index)
           
 Article getOriginalDoc(java.lang.String key)
           
 IntSparseMatrix getRelaitonDocMatrix()
           
 int[] getRelationDocFrequencyList(int relationIndex)
           
 int[] getRelationDocIndexList(int relationIndex)
           
 IRDoc[] getRelationDocList(int relationIndex)
           
 int[] getRelationFrequencyList(int docIndex)
          To know what relation the frequency corresponds to, call the method getRelationIndexList.
 int[] getRelationIndexList(int docIndex)
           
 IRRelation[] getRelationList(int docIndex)
           
 int[] getTermDocFrequencyList(int termIndex)
           
 int[] getTermDocIndexList(int termIndex)
           
 IRDoc[] getTermDocList(int termIndex)
           
 IntSparseMatrix getTermDocMatrix()
           
 int[] getTermFrequencyList(int docIndex)
          To know what term the frequency corresponds to, call the method getTermIndexList.
 int[] getTermIndexList(int docIndex)
           
 java.lang.String getTermKey(int index)
           
 IRTerm[] getTermList(int docIndex)
           
 boolean isRelationSupported()
           
 void setIRDocKeyList(SimpleElementList keyList)
           
 void setIRTermKeyList(SimpleElementList keyList)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 
Methods inherited from interface dragon.ir.index.IndexReader
initialize
 

Field Detail

collectionReader

protected CollectionReader collectionReader

termKeyList

protected SimpleElementList termKeyList

termIndexList

protected IRTermIndexList termIndexList

relationIndexList

protected IRRelationIndexList relationIndexList

docKeyList

protected SimpleElementList docKeyList

docIndexList

protected IRDocIndexList docIndexList

termdocMatrix

protected IntSparseMatrix termdocMatrix

doctermMatrix

protected IntSparseMatrix doctermMatrix

relationdocMatrix

protected IntSparseMatrix relationdocMatrix

docrelationMatrix

protected IntSparseMatrix docrelationMatrix

relationSupported

protected boolean relationSupported

initialized

protected boolean initialized

collection

protected IRCollection collection
Constructor Detail

AbstractIndexReader

public AbstractIndexReader(boolean relationSupported)

AbstractIndexReader

public AbstractIndexReader(boolean relationSupported,
                           CollectionReader collectionReader)
Method Detail

getDocTermMatrix

public IntSparseMatrix getDocTermMatrix()

getTermDocMatrix

public IntSparseMatrix getTermDocMatrix()

getDocRelationMatrix

public IntSparseMatrix getDocRelationMatrix()

getRelaitonDocMatrix

public IntSparseMatrix getRelaitonDocMatrix()

close

public void close()
Description copied from interface: IndexReader
This method releases all occupied resources.

Specified by:
close in interface IndexReader

isRelationSupported

public boolean isRelationSupported()
Specified by:
isRelationSupported in interface IndexReader
Returns:
true if the indexing contains relationship information

setIRDocKeyList

public void setIRDocKeyList(SimpleElementList keyList)

setIRTermKeyList

public void setIRTermKeyList(SimpleElementList keyList)

getIRTerm

public IRTerm getIRTerm(int index)
Specified by:
getIRTerm in interface IndexReader
Parameters:
index - the index of the term
Returns:
the IRTerm object

getTermKey

public java.lang.String getTermKey(int index)
Specified by:
getTermKey in interface IndexReader
Returns:
the name of the given term

getIRTerm

public IRTerm getIRTerm(java.lang.String key)
Specified by:
getIRTerm in interface IndexReader
Parameters:
key - the name of the term
Returns:
the IRTerm object

getIRTerm

public IRTerm getIRTerm(int termIndex,
                        int docIndex)
Description copied from interface: IndexReader
If the given document does not contain the given term, this method returns null.

Specified by:
getIRTerm in interface IndexReader
Parameters:
termIndex - the index of the term
docIndex - the index of the document
Returns:
the IRTerm object

getTermList

public IRTerm[] getTermList(int docIndex)
Specified by:
getTermList in interface IndexReader
Parameters:
docIndex - the index of the document
Returns:
a list of unique terms in the given document

getTermFrequencyList

public int[] getTermFrequencyList(int docIndex)
Description copied from interface: IndexReader
To know what term the frequency corresponds to, call the method getTermIndexList.

Specified by:
getTermFrequencyList in interface IndexReader
Parameters:
docIndex - the index of the document
Returns:
an interger array each elment of which is the frequency of unique terms in the given document

getTermIndexList

public int[] getTermIndexList(int docIndex)
Specified by:
getTermIndexList in interface IndexReader
Parameters:
docIndex - the index of the document
Returns:
an interger array each elment of which is the index of unique terms in the given document

getIRRelation

public IRRelation getIRRelation(int index)
Specified by:
getIRRelation in interface IndexReader
Parameters:
index - the index of the relation
Returns:
the IRRelation object

getIRRelation

public IRRelation getIRRelation(int relationIndex,
                                int docIndex)
Description copied from interface: IndexReader
If the given document does not contain the given relation, this method returns null.

Specified by:
getIRRelation in interface IndexReader
Parameters:
relationIndex - the index of the relation
docIndex - the index of the document
Returns:
the IRRelation object

getRelationIndexList

public int[] getRelationIndexList(int docIndex)
Specified by:
getRelationIndexList in interface IndexReader
Parameters:
docIndex - the index of the document
Returns:
an interger array each elment of which is the index of unique relations in the given document

getRelationFrequencyList

public int[] getRelationFrequencyList(int docIndex)
Description copied from interface: IndexReader
To know what relation the frequency corresponds to, call the method getRelationIndexList.

Specified by:
getRelationFrequencyList in interface IndexReader
Parameters:
docIndex - the index of the document
Returns:
an interger array each elment of which is the frequency of unique relations in the given document

getRelationList

public IRRelation[] getRelationList(int docIndex)
Specified by:
getRelationList in interface IndexReader
Parameters:
docIndex - the index of the document
Returns:
a list of unique relations in the given document

getCollection

public IRCollection getCollection()
Specified by:
getCollection in interface IndexReader
Returns:
IRCollection object which contains the statistics of the indexed collection

getDoc

public IRDoc getDoc(int index)
Specified by:
getDoc in interface IndexReader
Parameters:
index - the index of the document
Returns:
the IRDoc object

getDocKey

public java.lang.String getDocKey(int index)
Specified by:
getDocKey in interface IndexReader
Parameters:
index - the index of the document
Returns:
the unique entry number of the index-th document

getDoc

public IRDoc getDoc(java.lang.String key)
Specified by:
getDoc in interface IndexReader
Parameters:
key - the unique etnry number of the document
Returns:
the IRDoc object

getOriginalDoc

public Article getOriginalDoc(java.lang.String key)
Specified by:
getOriginalDoc in interface IndexReader
Parameters:
key - the unique entry number of the document
Returns:
the raw content of the indexed document

getOriginalDoc

public Article getOriginalDoc(int index)
Specified by:
getOriginalDoc in interface IndexReader
Parameters:
index - the index of the document
Returns:
the raw content of the indexed document

getTermDocIndexList

public int[] getTermDocIndexList(int termIndex)
Specified by:
getTermDocIndexList in interface IndexReader
Parameters:
termIndex - the index of the term
Returns:
a list of indices of documents containing this term

getTermDocList

public IRDoc[] getTermDocList(int termIndex)
Specified by:
getTermDocList in interface IndexReader
Parameters:
termIndex - the index of the term
Returns:
a list of IRDoc objects which contain the specified term

getTermDocFrequencyList

public int[] getTermDocFrequencyList(int termIndex)
Specified by:
getTermDocFrequencyList in interface IndexReader
Parameters:
termIndex - the index of the term
Returns:
a list of frequencies the given term occurs in the documents containing this term

getRelationDocFrequencyList

public int[] getRelationDocFrequencyList(int relationIndex)
Specified by:
getRelationDocFrequencyList in interface IndexReader
Parameters:
relationIndex - the index of the relation
Returns:
a list of frequencies the given relation occurs in the documents containing this relation

getRelationDocList

public IRDoc[] getRelationDocList(int relationIndex)
Specified by:
getRelationDocList in interface IndexReader
Parameters:
relationIndex - the index of the relation
Returns:
a list of IRDoc objects which contain the specified relation

getRelationDocIndexList

public int[] getRelationDocIndexList(int relationIndex)
Specified by:
getRelationDocIndexList in interface IndexReader
Parameters:
relationIndex - the index of the relation
Returns:
a list of indices of documents containing this relation