代码很简单, 直接看就明白了, 可以在实际工作中借鉴, 原文在
这里. 这个例子使用两种方式来演示如何生成全量索引:
一个是从db中通过sql生成全量索引
一个是通过tika解析文件生成全量索引
package SolrJExample;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.StreamingUpdateSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.sql.*;
import java.util.ArrayList;
import java.util.Collection;
/* Example class showing the skeleton of using Tika and
Sql on the client to index documents from
both structured documents and a SQL database.
NOTE: The SQL example and the Tika example are entirely orthogonal.
Both are included here to make a
more interesting example, but you can omit either of them.
*/
public class SqlTikaExample {
private StreamingUpdateSolrServer _server;
private long _start = System.currentTimeMillis();
private AutoDetectParser _autoParser;
private int _totalTika = 0;
private int _totalSql = 0;
private Collection _docs = new ArrayList();
public static void main(String[] args) {
try {
SqlTikaExample idxer = new SqlTikaExample("http://localhost:8983/solr");
idxer.doTikaDocuments(new File("/Users/Erick/testdocs"));
idxer.doSqlDocuments();
idxer.endIndexing();
} catch (Exception e) {
e.printStackTrace();
}
}
private SqlTikaExample(String url) throws IOException, SolrServerException {
// Create a multi-threaded communications channel to the Solr server.
// Could be CommonsHttpSolrServer as well.
//
_server = new StreamingUpdateSolrServer(url, 10, 4);
_server.setSoTimeout(1000); // socket read timeout
_server.setConnectionTimeout(1000);
_server.setMaxRetries(1); // defaults to 0. > 1 not recommended.
// binary parser is used by default for responses
_server.setParser(new XMLResponseParser());
// One of the ways Tika can be used to attempt to parse arbitrary files.
_autoParser = new AutoDetectParser();
}
// Just a convenient place to wrap things up.
private void endIndexing() throws IOException, SolrServerException {
if (_docs.size() > 0) { // Are there any documents left over?
_server.add(_docs, 300000); // Commit within 5 minutes
}
_server.commit(); // Only needs to be done at the end,
// commitWithin should do the rest.
// Could even be omitted
// assuming commitWithin was specified.
long endTime = System.currentTimeMillis();
log("Total Time Taken: " + (endTime - _start) +
" milliseconds to index " + _totalSql +
" SQL rows and " + _totalTika + " documents");
}
// I hate writing System.out.println() everyplace,
// besides this gives a central place to convert to true logging
// in a production system.
private static void log(String msg) {
System.out.println(msg);
}
/**
* ***************************Tika processing here
*/
// Recursively traverse the filesystem, parsing everything found.
private void doTikaDocuments(File root) throws IOException, SolrServerException {
// Simple loop for recursively indexing all the files
// in the root directory passed in.
for (File file : root.listFiles()) {
if (file.isDirectory()) {
doTikaDocuments(file);
continue;
}
// Get ready to parse the file.
ContentHandler textHandler = new BodyContentHandler();
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
InputStream input = new FileInputStream(file);
// Try parsing the file. Note we haven't checked at all to
// see whether this file is a good candidate.
try {
_autoParser.parse(input, textHandler, metadata, context);
} catch (Exception e) {
// Needs better logging of what went wrong in order to
// track down "bad" documents.
log(String.format("File %s failed", file.getCanonicalPath()));
e.printStackTrace();
continue;
}
// Just to show how much meta-data and what form it's in.
dumpMetadata(file.getCanonicalPath(), metadata);
// Index just a couple of the meta-data fields.
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", file.getCanonicalPath());
// Crude way to get known meta-data fields.
// Also possible to write a simple loop to examine all the
// metadata returned and selectively index it and/or
// just get a list of them.
// One can also use the LucidWorks field mapping to
// accomplish much the same thing.
String author = metadata.get("Author");
if (author != null) {
doc.addField("author", author);
}
doc.addField("text", textHandler.toString());
_docs.add(doc);
++_totalTika;
// Completely arbitrary, just batch up more than one document
// for throughput!
if (_docs.size() >= 1000) {
// Commit within 5 minutes.
UpdateResponse resp = _server.add(_docs, 300000);
if (resp.getStatus() != 0) {
log("Some horrible error has occurred, status is: " +
resp.getStatus());
}
_docs.clear();
}
}
}
// Just to show all the metadata that's available.
private void dumpMetadata(String fileName, Metadata metadata) {
log("Dumping metadata for file: " + fileName);
for (String name : metadata.names()) {
log(name + ":" + metadata.get(name));
}
log("\n\n");
}
/**
* ***************************SQL processing here
*/
private void doSqlDocuments() throws SQLException {
Connection con = null;
try {
Class.forName("com.mysql.jdbc.Driver").newInstance();
log("Driver Loaded......");
con = DriverManager.getConnection("jdbc:mysql://192.168.1.103:3306/test?"
+ "user=testuser&password=test123");
Statement st = con.createStatement();
ResultSet rs = st.executeQuery("select id,title,text from test");
while (rs.next()) {
// DO NOT move this outside the while loop
// or be sure to call doc.clear()
SolrInputDocument doc = new SolrInputDocument();
String id = rs.getString("id");
String title = rs.getString("title");
String text = rs.getString("text");
doc.addField("id", id);
doc.addField("title", title);
doc.addField("text", text);
_docs.add(doc);
++_totalSql;
// Completely arbitrary, just batch up more than one
// document for throughput!
if (_docs.size() > 1000) {
// Commit within 5 minutes.
UpdateResponse resp = _server.add(_docs, 300000);
if (resp.getStatus() != 0) {
log("Some horrible error has occurred, status is: " +
resp.getStatus());
}
_docs.clear();
}
}
} catch (Exception ex) {
ex.printStackTrace();
} finally {
if (con != null) {
con.close();
}
}
}
}
分享到:
相关推荐
用户可以通过http请求,向搜索引擎服务器提交一定格式的XML文件,生成索引;也可以通过Http G Solret操作提出查找请求,并得到XML格式的返回结果。 Solrj 是访问 Solr 的 Java 客户端,它提供添加、更新和查询Solr ...
里面有非常详细的说明交你怎么使用solrj客户端来操作solrAPI
主要讲解了 solr客户端如何调用带账号密码的solr服务器调用,实现添加索引和查询索引,以及分组查询
solrj使用教程
商城项目的搜索功能示例代码-使用solrj压缩包中是在商场项目中使用solrj访问solr服务的代码例子,需要时可以参考。。。
solr详细配置教程与solrj的使用
solrj工具类封装,包括条件批量查询,批量增删改,分段修改。
简单的SolrJ使用示例,包括文件:News.java(PO对应的class),SolrService.java(对solrJ的包装),SolrServiceHook.java(在查询前修改SolrQuery的行为)
放了SolrJ6.3.0所有web工程下的Jar包。solr-solrj6.3.0.jar等。放了SolrJ6.3.0所有web工程下的Jar包。放了SolrJ6.3.0所有web工程下的Jar包。
该文档主要是对solr1.4的配置,包含服务器的复制,分发,和分片
solrJ是Java连接solr进行查询检索和索引更新维护的jar包。
solr-solrj 5.0.0 和自己搭建的solr服务交互
压缩文件里面有solr-solrj-4.10.3.jar和solr-solrj-5.0.0.jar两个jar
solr-solrj-4.9.0.jar
Lucidworks Spark / Solr集成该项目包括用于从Solr作为Spark DataFrame / RDD读取数据以及使用SolrJ将对象从Spark索引到Solr的工具。 索引编制例子索引和查询Twitter数据索引和查询纽约市黄色出租车CSV数据配置和...
NULL 博文链接:https://747017186.iteye.com/blog/2111497
solrj实现索引根据id进行更新,可以添加field、更新field、在原有多值field上增加索引。
solr-solrj-4.10.3.jar。
1、站内搜索的技术选型 2、什么是solr 3、solr的安装及配置 ...4、使用solr维护索引 a)添加 b)删除 c)修改 5、使用solr查询索引 6、Solr的客户端SolrJ a)solrJ维护索引 b)SolrJ查询索引 7、综合案例
solrj的facet查询总结