Commit a8eda8e0 authored by Itamar Syn-Hershko's avatar Itamar Syn-Hershko
Browse files

Finishing upgrade to 5.x

parent 9824af54
......@@ -2,7 +2,7 @@
jvm=true
name=analysis-hebrew
description=Hebrew analyzer powered by HebMorph
classname=com.code972.elasticsearch.plugins.AnalysisHebrewPlugin
classname=com.code972.elasticsearch.HebrewAnalysisPlugin
elasticsearch.version=ES-PLUGIN-VERSION
java.version=1.7
java.version=1.8
version=ES-PLUGIN-VERSION
......@@ -2,7 +2,7 @@
jvm=true
name=analysis-hebrew
description=Hebrew analyzer powered by HebMorph
classname=com.code972.elasticsearch.plugins.AnalysisPlugin
classname=com.code972.elasticsearch.HebrewAnalysisPlugin
elasticsearch.version=ES-PLUGIN-VERSION
java.version=1.7
java.version=1.8
version=ES-PLUGIN-VERSION
grant {
grant {
permission java.lang.RuntimePermission "accessClassInPackage.sun.reflect.generics.reflectiveObjects";
permission java.io.FilePermission "/var/lib/hebmorph/dictionary.dict", "read";
permission java.io.FilePermission "/var/lib/hspell-data-files", "read";
permission java.io.FilePermission "/var/lib/hspell-data-files/*", "read";
......
......@@ -9,7 +9,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<lucene.version>6.2.1</lucene.version>
<lucene.version>6.2.0</lucene.version>
<elasticsearch.version>5.0.0</elasticsearch.version>
<hebmorph.version>6.0.0</hebmorph.version>
</properties>
......@@ -178,6 +178,12 @@
<version>${elasticsearch.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.elasticsearch.test</groupId>
<artifactId>framework</artifactId>
<version>${elasticsearch.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.code972.hebmorph</groupId>
<artifactId>hebmorph-lucene</artifactId>
......@@ -190,6 +196,14 @@
<version>17.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.6.2</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
......
......@@ -9,9 +9,9 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<lucene.version>5.5.2</lucene.version>
<elasticsearch.version>ES-PLUGIN-VERSION</elasticsearch.version>
<hebmorph.version>2.4.0</hebmorph.version>
<lucene.version>6.2.0</lucene.version>
<elasticsearch.version>5.0.0</elasticsearch.version>
<hebmorph.version>6.0.0</hebmorph.version>
</properties>
<groupId>com.code972.hebmorph</groupId>
......@@ -87,8 +87,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>3.2</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
......@@ -178,6 +178,12 @@
<version>${elasticsearch.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.elasticsearch.test</groupId>
<artifactId>framework</artifactId>
<version>${elasticsearch.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.code972.hebmorph</groupId>
<artifactId>hebmorph-lucene</artifactId>
......@@ -190,6 +196,14 @@
<version>17.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.6.2</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
......
......@@ -3,9 +3,9 @@ set -e
echo "Releasing elasticsearch-analysis-hebrew version $1"
export HEBMORPH_LUCENE_VERSION=2.4.0
export RELEASE_PATH="./releases/elasticsearch-analysis-hebrew-$1/"
mkdir -p ${RELEASE_PATH}
export HEBMORPH_LUCENE_VERSION=6.2.0
export RELEASE_PATH="./releases/elasticsearch-analysis-hebrew-$1"
mkdir -p ${RELEASE_PATH}/elasticsearch/hspell-data-files
cp plugin-descriptor.properties.template plugin-descriptor.properties
cp pom.xml.template pom.xml
......@@ -14,17 +14,39 @@ sed -i '.bak' "s/ES-PLUGIN-VERSION/$1/" pom.xml
mvn clean
mvn package
cp "target/elasticsearch-analysis-hebrew-${1}.jar" plugin-descriptor.properties plugin-security.policy ${RELEASE_PATH}
# Prepare binaries
wget "http://central.maven.org/maven2/com/code972/hebmorph/hebmorph-lucene/$HEBMORPH_LUCENE_VERSION/hebmorph-lucene-$HEBMORPH_LUCENE_VERSION.jar" -P ${RELEASE_PATH}/elasticsearch
cp "target/elasticsearch-analysis-hebrew-${1}.jar" plugin-descriptor.properties plugin-security.policy ${RELEASE_PATH}/elasticsearch
# Package open-source plugin with hspell dictionary
pushd ${RELEASE_PATH}/elasticsearch/hspell-data-files
wget https://github.com/synhershko/HebMorph/raw/master/hspell-data-files/hebrew.wgz
wget https://github.com/synhershko/HebMorph/raw/master/hspell-data-files/hebrew.wgz.desc
wget https://github.com/synhershko/HebMorph/raw/master/hspell-data-files/hebrew.wgz.prefixes
wget https://github.com/synhershko/HebMorph/raw/master/hspell-data-files/hebrew.wgz.sizes
wget https://github.com/synhershko/HebMorph/raw/master/hspell-data-files/hebrew.wgz.stems
wget https://github.com/synhershko/HebMorph/raw/master/hspell-data-files/prefixes.c
wget https://github.com/synhershko/HebMorph/raw/master/hspell-data-files/dmask.c
wget https://github.com/synhershko/HebMorph/raw/master/hspell-data-files/prefix_h.gz
popd
pushd ${RELEASE_PATH}
wget "http://central.maven.org/maven2/com/code972/hebmorph/hebmorph-lucene/$HEBMORPH_LUCENE_VERSION/hebmorph-lucene-$HEBMORPH_LUCENE_VERSION.jar"
zip "elasticsearch-analysis-hebrew-$1.zip" "elasticsearch-analysis-hebrew-$1.jar" "hebmorph-lucene-$HEBMORPH_LUCENE_VERSION.jar" plugin-descriptor.properties plugin-security.policy
zip -r "elasticsearch-analysis-hebrew-$1.zip" ./elasticsearch
popd
cp ~/packaging/* .
zip "elasticsearch-analysis-hebrew-$1-commercial.zip" "elasticsearch-analysis-hebrew-$1.jar" plugin-descriptor.properties plugin-security.policy dictionary.dict "hebmorph-lucene-commercial-$HEBMORPH_LUCENE_VERSION.jar"
# Package the commercial plugin
rm -r ${RELEASE_PATH}/elasticsearch/hspell-data-files
cp ./../hebmorph.dictionary/release/* ${RELEASE_PATH}/elasticsearch
pushd ${RELEASE_PATH}
zip -r "elasticsearch-analysis-hebrew-commercial-$1.zip" ./elasticsearch
popd
# reset run
rm -r ${RELEASE_PATH}/elasticsearch
cp plugin-descriptor.properties.template plugin-descriptor.properties
# publish to bintray
pushd ${RELEASE_PATH}
curl -T elasticsearch-analysis-hebrew-$1.zip -usynhershko:$BINTRAY_API_KEY "https://api.bintray.com/content/synhershko/elasticsearch-analysis-hebrew/elasticsearch-analysis-hebrew-plugin/$1/elasticsearch-analysis-hebrew-$1?publish=1"
popd
package com.code972.elasticsearch;
import com.code972.elasticsearch.plugins.index.analysis.*;
import com.code972.elasticsearch.plugins.rest.action.RestHebrewAnalyzerCheckWordAction;
import com.code972.hebmorph.DictionaryLoader;
import com.code972.hebmorph.datastructures.DictHebMorph;
import com.code972.hebmorph.hspell.HSpellDictionaryLoader;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.SpecialPermission;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.plugins.ActionPlugin;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.rest.RestHandler;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static java.util.Collections.*;
/**
* The Hebrew analysis plugin entry point, locating and loading the dictionary and configuring
* the tokenizer, token filters and analyzers
*/
public final class HebrewAnalysisPlugin extends Plugin implements ActionPlugin, AnalysisPlugin {
private final Logger log = LogManager.getLogger(this.getClass());
private final static String commercialDictionaryLoaderClass = "com.code972.hebmorph.dictionary.impl.HebMorphDictionaryLoader";
private static DictHebMorph dict;
public static DictHebMorph getDictionary() {
return dict;
}
/**
* Attempts to load a dictionary from paths specified in elasticsearch.yml.
* If hebrew.dict.path is defined, try loading that first.
*
* @param settings
*/
public HebrewAnalysisPlugin(final Settings settings) {
super();
final SecurityManager sm = System.getSecurityManager();
if (sm != null) {
// unprivileged code such as scripts do not have SpecialPermission
sm.checkPermission(new SpecialPermission());
}
// Figure out which DictionaryLoader class to use for loading the dictionary
DictionaryLoader dictLoader = (DictionaryLoader) AccessController.doPrivileged((PrivilegedAction<Object>) () -> {
try {
final Class clz;
if ((clz = Class.forName(commercialDictionaryLoaderClass)) != null) {
log.info("Dictionary loader available ({})", clz.getSimpleName());
try {
Constructor ctor = Class.forName(commercialDictionaryLoaderClass).getConstructor();
return (DictionaryLoader) ctor.newInstance();
} catch (NoSuchMethodException | IllegalAccessException | InstantiationException | InvocationTargetException e) {
log.error("Unable to load the HebMorph dictionary", e);
}
}
} catch (ClassNotFoundException ignored) {
// If external dictionary loaders are not present, we default to the one provided with OSS HebMorph
}
return null;
});
if (dictLoader == null) {
log.info("Defaulting to HSpell dictionary loader");
dictLoader = new HSpellDictionaryLoader();
}
// If path was specified in settings, try that path first
final String pathFromSettings = settings.get("hebrew.dict.path");
if (pathFromSettings != null && !pathFromSettings.isEmpty()) {
final DictHebMorph tmp = AccessController.doPrivileged(new LoadDictAction(pathFromSettings, dictLoader));
log.info("Trying to load {} dictionary from path {}", dictLoader.dictionaryLoaderName(), pathFromSettings);
if (tmp != null) {
dict = tmp;
log.info("Dictionary '{}' loaded successfully from path {}", dictLoader.dictionaryLoaderName(), pathFromSettings);
return;
}
}
for (final String path : dictLoader.dictionaryPossiblePaths()) {
final DictHebMorph tmp = AccessController.doPrivileged(new LoadDictAction(path, dictLoader));
log.info("Trying to load {} from path {}", dictLoader.dictionaryLoaderName(), path);
if (tmp != null) {
dict = tmp;
log.info("Dictionary '{}' loaded successfully from path {}", dictLoader.dictionaryLoaderName(), path);
return;
}
}
throw new IllegalArgumentException("Could not load any dictionary. Aborting!");
// TODO log "tried paths"
}
private class LoadDictAction implements PrivilegedAction<DictHebMorph> {
private final String path;
private final DictionaryLoader loader;
public LoadDictAction(final String path, DictionaryLoader dictLoader) {
this.path = path;
this.loader = dictLoader;
}
@Override
public DictHebMorph run() {
final File file = new File(path);
if (file.exists()) {
try {
return loader.loadDictionaryFromPath(path);
} catch (IOException e) {
log.error(e);
}
}
return null;
}
}
@Override
public List<Class<? extends RestHandler>> getRestHandlers() {
return singletonList(RestHebrewAnalyzerCheckWordAction.class);
}
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
final Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> extra = new HashMap<>();
extra.put("hebrew_lemmatizer", (indexSettings, env, name, settings) -> new HebrewLemmatizerTokenFilterFactory(indexSettings, env, name, settings, dict));
extra.put("niqqud", NiqqudFilterTokenFilterFactory::new);
extra.put("add_suffix", AddSuffixTokenFilterFactory::new);
return unmodifiableMap(extra);
}
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
return singletonMap("hebrew", (indexSettings, env, name, settings) -> new HebrewTokenizerFactory(indexSettings, env, name, settings, dict));
}
@Override
public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> extra = new HashMap<>();
extra.put("hebrew", (indexSettings, env, name, settings) -> new HebrewIndexingAnalyzerProvider(indexSettings, env, name, settings, dict));
extra.put("hebrew_query", (indexSettings, env, name, settings) -> new HebrewQueryAnalyzerProvider(indexSettings, env, name, settings, dict));
extra.put("hebrew_query_light", (indexSettings, env, name, settings) -> new HebrewQueryLightAnalyzerProvider(indexSettings, env, name, settings, dict));
extra.put("hebrew_exact", (indexSettings, env, name, settings) -> new HebrewExactAnalyzerProvider(indexSettings, env, name, settings, dict));
return unmodifiableMap(extra);
}
}
package com.code972.elasticsearch.plugins;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.plugins.*;
import java.util.HashMap;
import java.util.Map;
public class AnalysisHebrewPlugin extends Plugin implements AnalysisPlugin {
private final static HashMap<String, Class<? extends AnalyzerProvider>> languageAnalyzers = new HashMap<>();
static {
languageAnalyzers.put("hebrew", HebrewIndexingAnalyzerProvider.class);
languageAnalyzers.put("hebrew_query", HebrewQueryAnalyzerProvider.class);
languageAnalyzers.put("hebrew_query_light", HebrewQueryLightAnalyzerProvider.class);
languageAnalyzers.put("hebrew_exact", HebrewExactAnalyzerProvider.class);
}
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
final Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> extra = new HashMap<>();
return extra;
}
// @Override
// public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
// return singletonMap("kuromoji_tokenizer", KuromojiTokenizerFactory::new);
// }
//
// @Override
// public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
// return singletonMap("kuromoji", KuromojiAnalyzerProvider::new);
// }
}
package com.code972.elasticsearch.plugins;
import com.code972.elasticsearch.rest.action.RestHebrewAnalyzerCheckWordAction;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.analysis.AnalysisModule;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.rest.RestModule;
public class AnalysisPlugin extends Plugin {
/**
* Attempts to load a dictionary from paths specified in elasticsearch.yml.
* If hebrew.dict.path is defined, try loading that.
*/
public AnalysisPlugin(Settings settings) {
final String path = settings.get("hebrew.dict.path");
if (path != null && !path.isEmpty()) {
DictReceiver.setDictionary(path);
} else if (DictReceiver.getDictionary() == null) {
throw new IllegalArgumentException("Could not load any dictionary. Aborting!");
}
}
@Override
public String name() {
return "elasticsearch-analysis-hebrew";
}
@Override
public String description() {
return "Hebrew analyzer powered by HebMorph";
}
/* Invoked on component assembly. */
public void onModule(AnalysisModule analysisModule) {
analysisModule.addProcessor(new HebrewAnalysisBinderProcessor());
}
/* Invoked on component assembly. */
public void onModule(RestModule restModule) {
restModule.addRestAction(RestHebrewAnalyzerCheckWordAction.class);
}
}
package com.code972.elasticsearch.plugins;
import com.code972.hebmorph.datastructures.DictHebMorph;
import com.code972.hebmorph.hspell.HSpellDictionaryLoader;
import org.elasticsearch.SpecialPermission;
import java.io.File;
import java.io.IOException;
import java.security.AccessController;
import java.security.PrivilegedAction;
/**
* This class will try to locate the dictionary to load, and call the DictionaryLoader class with the files it found
* to initialize loading them and initializing the HebMorph analyzers.
*/
public class DictReceiver {
private static String[] filePaths = {"plugins/analysis-hebrew/dictionary.dict", "/var/lib/hebmorph/dictionary.dict",
"plugins/analysis-hebrew/hspell-data-files/", "/var/lib/hspell-data-files/"};
private static DictHebMorph dict = null;
public static DictHebMorph getDictionary() {
if (dict == null) {
dict = setDefaultDictionary();
}
return dict;
}
private static class LoadDictAction implements PrivilegedAction<DictHebMorph> {
private final String path;
private final HSpellDictionaryLoader loader;
public LoadDictAction(final String path) {
this.path = path;
this.loader = new HSpellDictionaryLoader();
}
@Override
public DictHebMorph run() {
final File file = new File(path);
if (file.exists()) {
try {
return loader.loadDictionaryFromPath(path);
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
}
public static boolean setDictionary(String path) {
SecurityManager sm = System.getSecurityManager();
if (sm != null) {
// unprivileged code such as scripts do not have SpecialPermission
sm.checkPermission(new SpecialPermission());
}
if (path != null) {
final DictHebMorph tmp = AccessController.doPrivileged(new LoadDictAction(path));
if (dict != null) {
dict = tmp;
return true;
}
}
return false;
}
private static DictHebMorph setDefaultDictionary() {
SecurityManager sm = System.getSecurityManager();
if (sm != null) {
// unprivileged code such as scripts do not have SpecialPermission
sm.checkPermission(new SpecialPermission());
}
for (final String path : filePaths) {
final DictHebMorph dict = AccessController.doPrivileged(new LoadDictAction(path));
if (dict != null)
return dict;
}
throw new IllegalArgumentException("Could not load any dictionary. Aborting!");
}
}
package com.code972.elasticsearch.plugins.index.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hebrew.TokenFilters.AddSuffixTokenFilter;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class AddSuffixTokenFilterFactory extends AbstractTokenFilterFactory {
private final char suffix;
@Inject
public AddSuffixTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
this.suffix = settings.get("suffix", "$").charAt(0);
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new AddSuffixTokenFilter(tokenStream, suffix);
}
}
\ No newline at end of file
package com.code972.elasticsearch.plugins;
package com.code972.elasticsearch.plugins.index.analysis;
import com.code972.hebmorph.datastructures.DictHebMorph;
import org.apache.lucene.analysis.hebrew.HebrewExactAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.settings.IndexSettingsService;
import java.io.IOException;
......@@ -15,9 +14,10 @@ public class HebrewExactAnalyzerProvider extends AbstractIndexAnalyzerProvider<H
private final HebrewExactAnalyzer analyzer;
@Inject
public HebrewExactAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) throws IOException {
super(index, indexSettingsService.getSettings(), name, settings);
analyzer = new HebrewExactAnalyzer(DictReceiver.getDictionary());
public HebrewExactAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings,
final DictHebMorph dict) throws IOException {
super(indexSettings, name, settings);
analyzer = new HebrewExactAnalyzer(dict);
analyzer.setVersion(this.version);
}
......
package com.code972.elasticsearch.plugins;
package com.code972.elasticsearch.plugins.index.analysis;
import com.code972.hebmorph.datastructures.DictHebMorph;
import org.apache.lucene.analysis.hebrew.HebrewIndexingAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.settings.IndexSettingsService;
import java.io.IOException;
......@@ -15,9 +14,10 @@ public class HebrewIndexingAnalyzerProvider extends AbstractIndexAnalyzerProvide
private final HebrewIndexingAnalyzer analyzer;