How do you implement a custom filter with Lucene.net?
下面的代码来自Lucene In Action书(最初是Java)。它用于构建"允许的"文档列表(从用户权限的angular出发)以过滤搜索结果。问题在于termsDocs.Read()方法不接受要通过引用传递的'doc'和'freq'数组,因此在设置位数组中的位时它们仍然为空。
任何人都可以帮忙,使用Lucene自定义过滤器(尤其是在.net中)的示例似乎很少。谢谢。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | public class LuceneCustomFilter : Lucene.Net.Search.Filter { string[] _luceneIds; public LuceneCustomFilter(string[] luceneIds) { _luceneIds = luceneIds; } public override BitArray Bits(Lucene.Net.Index.IndexReader indexReader) { BitArray bitarray = new BitArray(indexReader.MaxDoc()); int[] docs = new int[1]; int[] freq = new int[1]; for (int i = 0; i < _luceneIds.Length; i++) { if (!string.IsNullOrEmpty(_luceneIds[i])) { Lucene.Net.Index.TermDocs termDocs = indexReader.TermDocs( new Lucene.Net.Index.Term(@"luceneId", _luceneIds[i])); int count = termDocs.Read(docs, freq); if (count == 1) { bitarray.Set(docs[0], true); } } } return bitarray; } } |
我正在使用Lucene.net 2.0.0.4,但是TermDocs界面在这里的最新分支中仍然是相同的:https://svn.apache.org/repos/asf/incubator/lucene.net/主干/C#/src/Lucene.Net/Index/TermDocs.cs
这是使用自定义过滤器的Lucene.NET的工作示例,您可以看一下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | using System; using System.Collections; using Lucene.Net.Analysis; using Lucene.Net.Documents; using Lucene.Net.Index; using Lucene.Net.Search; using Lucene.Net.Store; class Program { static void Main(string[] args) { Directory index = new RAMDirectory(); Analyzer analyzer = new KeywordAnalyzer(); IndexWriter writer = new IndexWriter(index, analyzer, true); Document doc = new Document(); doc.Add(new Field("title","t1", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("title","t2", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(doc); writer.Close(); Searcher searcher = new IndexSearcher(index); Query query = new MatchAllDocsQuery(); Filter filter = new LuceneCustomFilter(); Sort sort = new Sort("title", true); Hits hits = searcher.Search(query, filter, sort); IEnumerator hitsEnumerator = hits.Iterator(); while (hitsEnumerator.MoveNext()) { Hit hit = (Hit)hitsEnumerator.Current; Console.WriteLine(hit.GetDocument().GetField("title"). StringValue()); } } } public class LuceneCustomFilter : Filter { public override BitArray Bits(IndexReader indexReader) { BitArray bitarray = new BitArray(indexReader.MaxDoc()); int[] docs = new int[1]; int[] freq = new int[1]; TermDocs termDocs = indexReader.TermDocs( new Term(@"title","t1")); int count = termDocs.Read(docs, freq); if (count == 1) { bitarray.Set(docs[0], true); } return bitarray; } } |
这里有些困惑,因为传递数组实际上确实是通过引用传递它。
例如,下面的blurb将打印10 10 10 10 10,表示数组值已更新。
我在这里想念什么吗?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | public void TestPassing() { int[] stuff = new int[] {5, 5, 5, 5}; Add(stuff, 5); for (int i = 0; i < stuff.Length; i++) { Console.Write(stuff[i]); } } public void Add(int[] stuff, int x) { for(int i = 0; i < stuff.Length; i++) { stuff[i] = stuff[i] + x; } } |