Skip to content

Commit

Permalink
Update codecs to Apache Lucene 9.12.0 (#198)
Browse files Browse the repository at this point in the history
* Update codecs to Apache Lucene 9.12.0

Signed-off-by: Andriy Redko <[email protected]>

* Address code review comments

Signed-off-by: Andriy Redko <[email protected]>

* Address code review comments

Signed-off-by: Andriy Redko <[email protected]>

* Fix javadoc comments

Signed-off-by: Andriy Redko <[email protected]>

---------

Signed-off-by: Andriy Redko <[email protected]>
  • Loading branch information
reta authored Oct 24, 2024
1 parent 5b5d693 commit 6d74a67
Show file tree
Hide file tree
Showing 33 changed files with 1,150 additions and 201 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
run: |
# https://github.com/opensearch-project/opensearch-build/issues/4191
chown -R ci-runner:ci-runner `pwd`
su ci-runner -c "source /etc/profile.d/java_home.sh && ./gradlew check -Dorg.gradle.java.home=/opt/java/openjdk-${{ matrix.java }}"
su ci-runner -c "source /etc/profile.d/java_home.sh && ./gradlew test check -Dorg.gradle.java.home=/opt/java/openjdk-${{ matrix.java }}"
- name: Run Gradle (assemble)
run: |
# https://github.com/opensearch-project/opensearch-build/issues/4191
Expand All @@ -53,7 +53,7 @@ jobs:
cache: gradle
- name: Run Gradle (check)
run: |
./gradlew check
./gradlew test check
- name: Run Gradle (assemble)
run: |
./gradlew assemble
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.settings.Settings;
import org.opensearch.core.common.Strings;
import org.opensearch.index.codec.customcodecs.Lucene99QatCodec;
import org.opensearch.index.codec.customcodecs.Lucene912QatCodec;
import org.opensearch.index.codec.customcodecs.QatZipperFactory;
import org.opensearch.test.rest.OpenSearchRestTestCase;

Expand Down Expand Up @@ -103,7 +103,10 @@ public void testCreateIndexWithQatSPICodecWithQatHardwareUnavailable() throws IO
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.codec", randomFrom(Lucene99QatCodec.Mode.QAT_LZ4.getCodec(), Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec()))
.put(
"index.codec",
randomFrom(Lucene912QatCodec.Mode.QAT_LZ4.getCodec(), Lucene912QatCodec.Mode.QAT_DEFLATE.getCodec())
)
.put("index.codec.compression_level", randomIntBetween(1, 6))
.build()
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.opensearch.common.settings.Setting;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.codec.CodecServiceFactory;
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99QatCodec;
import org.opensearch.index.engine.EngineConfig;
import org.opensearch.plugins.EnginePlugin;
import org.opensearch.plugins.Plugin;
Expand Down Expand Up @@ -49,12 +50,11 @@ public Optional<CodecServiceFactory> getCustomCodecServiceFactory(final IndexSet
|| codecName.equals(CustomCodecService.QAT_DEFLATE_CODEC)) {
return Optional.of(new CustomCodecServiceFactory());
} else {
if (!QatZipperFactory.isQatAvailable()
&& (codecName.equals(Lucene99QatCodec.Mode.QAT_LZ4.getCodec())
|| codecName.equals(Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec()))) {
throw new IllegalArgumentException("QAT codecs are not supported. Please create indices with a different codec.");
if (!QatZipperFactory.isQatAvailable() && isQatCodec(codecName)) {
throw new IllegalArgumentException(
"QAT codecs are not supported (QAT is not available). Please create indices with a different codec."
);
}

}
return Optional.empty();
}
Expand All @@ -63,4 +63,11 @@ public Optional<CodecServiceFactory> getCustomCodecServiceFactory(final IndexSet
public List<Setting<?>> getSettings() {
return Arrays.asList(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
}

private static boolean isQatCodec(String codecName) {
return codecName.equals(Lucene99QatCodec.Mode.QAT_LZ4.getCodec())
|| codecName.equals(Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec())
|| codecName.equals(Lucene912QatCodec.Mode.QAT_LZ4.getCodec())
|| codecName.equals(Lucene912QatCodec.Mode.QAT_DEFLATE.getCodec());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.util.Map;
import java.util.stream.Stream;

import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING;
import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING;

/** CustomCodecService provides ZSTD, ZSTD_NO_DICT, QAT_LZ4, and QAT_DEFLATE compression codecs. */
Expand Down Expand Up @@ -49,25 +50,26 @@ public CustomCodecService(MapperService mapperService, IndexSettings indexSettin
int compressionLevel = indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING);
final MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
if (mapperService == null) {
codecs.put(ZSTD_CODEC, new Zstd99Codec(compressionLevel));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict99Codec(compressionLevel));
codecs.put(ZSTD_CODEC, new Zstd912Codec(compressionLevel));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict912Codec(compressionLevel));
if (QatZipperFactory.isQatAvailable()) {
codecs.put(QAT_LZ4_CODEC, new QatLz499Codec(compressionLevel, () -> {
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
}));
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate99Codec(compressionLevel, () -> {
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
codecs.put(
QAT_LZ4_CODEC,
new QatLz4912Codec(compressionLevel, () -> { return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING); })
);
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate912Codec(compressionLevel, () -> {
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
}));
}
} else {
codecs.put(ZSTD_CODEC, new Zstd99Codec(mapperService, logger, compressionLevel));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict99Codec(mapperService, logger, compressionLevel));
codecs.put(ZSTD_CODEC, new Zstd912Codec(mapperService, logger, compressionLevel));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict912Codec(mapperService, logger, compressionLevel));
if (QatZipperFactory.isQatAvailable()) {
codecs.put(QAT_LZ4_CODEC, new QatLz499Codec(mapperService, logger, compressionLevel, () -> {
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
codecs.put(QAT_LZ4_CODEC, new QatLz4912Codec(mapperService, logger, compressionLevel, () -> {
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
}));
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate99Codec(mapperService, logger, compressionLevel, () -> {
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate912Codec(mapperService, logger, compressionLevel, () -> {
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
}));
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.customcodecs;

import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec;
import org.opensearch.index.mapper.MapperService;

import java.util.Set;

import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99CustomCodec.DEFAULT_COMPRESSION_LEVEL;

/**
*
* Extends {@link FilterCodec} to reuse the functionality of Lucene Codec.
* Supports two modes zstd and zstd_no_dict.
* Uses Lucene912 as the delegate codec
*
* @opensearch.internal
*/
public abstract class Lucene912CustomCodec extends FilterCodec {

/** Each mode represents a compression algorithm. */
public enum Mode {
/**
* ZStandard mode with dictionary
*/
ZSTD("ZSTD912", Set.of("zstd")),
/**
* ZStandard mode without dictionary
*/
ZSTD_NO_DICT("ZSTDNODICT912", Set.of("zstd_no_dict"));

private final String codec;
private final Set<String> aliases;

Mode(String codec, Set<String> aliases) {
this.codec = codec;
this.aliases = aliases;
}

/**
* Returns the Codec that is registered with Lucene
*/
public String getCodec() {
return codec;
}

/**
* Returns the aliases of the Codec
*/
public Set<String> getAliases() {
return aliases;
}
}

private final StoredFieldsFormat storedFieldsFormat;

/**
* Creates a new compression codec with the default compression level.
*
* @param mode The compression codec (ZSTD or ZSTDNODICT).
*/
public Lucene912CustomCodec(Mode mode) {
this(mode, DEFAULT_COMPRESSION_LEVEL);
}

/**
* Creates a new compression codec with the given compression level. We use
* lowercase letters when registering the codec so that we remain consistent with
* the other compression codecs: default, lucene_default, and best_compression.
*
* @param mode The compression codec (ZSTD or ZSTDNODICT).
* @param compressionLevel The compression level.
*/
public Lucene912CustomCodec(Mode mode, int compressionLevel) {
super(mode.getCodec(), new Lucene912Codec());
this.storedFieldsFormat = new Lucene912CustomStoredFieldsFormat(mode, compressionLevel);
}

/**
* Creates a new compression codec with the given compression level. We use
* lowercase letters when registering the codec so that we remain consistent with
* the other compression codecs: default, lucene_default, and best_compression.
*
* @param mode The compression codec (ZSTD or ZSTDNODICT).
* @param compressionLevel The compression level.
* @param mapperService The mapper service.
* @param logger The logger.
*/
public Lucene912CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) {
super(mode.getCodec(), new PerFieldMappingPostingFormatCodec(Lucene912Codec.Mode.BEST_SPEED, mapperService, logger));
this.storedFieldsFormat = new Lucene912CustomStoredFieldsFormat(mode, compressionLevel);
}

@Override
public StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}

@Override
public String toString() {
return getClass().getSimpleName();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.customcodecs;

import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.compressing.CompressionMode;
import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsFormat;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;

import java.io.IOException;
import java.util.Objects;

import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99CustomCodec.DEFAULT_COMPRESSION_LEVEL;

/** Stored field format used by pluggable codec */
public class Lucene912CustomStoredFieldsFormat extends StoredFieldsFormat {

/** A key that we use to map to a mode */
public static final String MODE_KEY = Lucene912CustomStoredFieldsFormat.class.getSimpleName() + ".mode";

protected static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024;
protected static final int ZSTD_MAX_DOCS_PER_BLOCK = 4096;
protected static final int ZSTD_BLOCK_SHIFT = 10;

private final CompressionMode zstdCompressionMode;
private final CompressionMode zstdNoDictCompressionMode;

private final Lucene912CustomCodec.Mode mode;
private final int compressionLevel;

/** default constructor */
public Lucene912CustomStoredFieldsFormat() {
this(Lucene912CustomCodec.Mode.ZSTD, DEFAULT_COMPRESSION_LEVEL);
}

/**
* Creates a new instance.
*
* @param mode The mode represents ZSTD or ZSTDNODICT
*/
public Lucene912CustomStoredFieldsFormat(Lucene912CustomCodec.Mode mode) {
this(mode, DEFAULT_COMPRESSION_LEVEL);
}

/**
* Creates a new instance with the specified mode and compression level.
*
* @param mode The mode represents ZSTD or ZSTDNODICT
* @param compressionLevel The compression level for the mode.
*/
public Lucene912CustomStoredFieldsFormat(Lucene912CustomCodec.Mode mode, int compressionLevel) {
this.mode = Objects.requireNonNull(mode);
this.compressionLevel = compressionLevel;
zstdCompressionMode = new ZstdCompressionMode(compressionLevel);
zstdNoDictCompressionMode = new ZstdNoDictCompressionMode(compressionLevel);
}

/**
* Returns a {@link StoredFieldsReader} to load stored fields.
* @param directory The index directory.
* @param si The SegmentInfo that stores segment information.
* @param fn The fieldInfos.
* @param context The IOContext that holds additional details on the merge/search context.
*/
@Override
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
if (si.getAttribute(MODE_KEY) != null) {
String value = si.getAttribute(MODE_KEY);
Lucene912CustomCodec.Mode mode = Lucene912CustomCodec.Mode.valueOf(value);
return impl(mode).fieldsReader(directory, si, fn, context);
} else {
throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name);
}
}

/**
* Returns a {@link StoredFieldsReader} to write stored fields.
* @param directory The index directory.
* @param si The SegmentInfo that stores segment information.
* @param context The IOContext that holds additional details on the merge/search context.
*/
@Override
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
String previous = si.putAttribute(MODE_KEY, mode.name());
if (previous != null && previous.equals(mode.name()) == false) {
throw new IllegalStateException(
"found existing value for " + MODE_KEY + " for segment: " + si.name + " old = " + previous + ", new = " + mode.name()
);
}
return impl(mode).fieldsWriter(directory, si, context);
}

StoredFieldsFormat impl(Lucene912CustomCodec.Mode mode) {
switch (mode) {
case ZSTD:
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstd", this.zstdCompressionMode);
case ZSTD_NO_DICT:
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstdNoDict", this.zstdNoDictCompressionMode);
default:
throw new IllegalStateException("Unsupported compression mode: " + mode);
}
}

private StoredFieldsFormat getCustomCompressingStoredFieldsFormat(String formatName, CompressionMode compressionMode) {
return new Lucene90CompressingStoredFieldsFormat(
formatName,
compressionMode,
ZSTD_BLOCK_LENGTH,
ZSTD_MAX_DOCS_PER_BLOCK,
ZSTD_BLOCK_SHIFT
);
}

public Lucene912CustomCodec.Mode getMode() {
return mode;
}

/**
* Returns the compression level.
*/
public int getCompressionLevel() {
return compressionLevel;
}

public CompressionMode getCompressionMode() {
return mode == Lucene912CustomCodec.Mode.ZSTD_NO_DICT ? zstdNoDictCompressionMode : zstdCompressionMode;
}

}
Loading

0 comments on commit 6d74a67

Please sign in to comment.