s

solr-langid

This module is intended to be used while indexing documents. It is implemented as an UpdateProcessor to be placed in an UpdateChain. Its purpose is to identify language from documents and tag the document with language code.
Files download
File Operation
solr-langid-8.2.0.jar download
solr-langid-8.2.0.pom download
solr-langid-8.2.0-sources.jar download
Apache Maven
<dependency>
  <groupId>org.apache.solr</groupId>
  <artifactId>solr-langid</artifactId>
  <version>8.2.0</version>
</dependency>
Gradle Groovy
implementation 'org.apache.solr:solr-langid:8.2.0'
Gradle Kotlin
implementation("org.apache.solr:solr-langid:8.2.0")
Scala SBT
libraryDependencies += "org.apache.solr" % "solr-langid" % "8.2.0"
Groovy Grape
@Grapes(
  @Grab(group='org.apache.solr', module='solr-langid', version='8.2.0')
)
Apache Ivy
<dependency org="org.apache.solr" name="solr-langid" rev="8.2.0" />
Leiningen
[org.apache.solr/solr-langid "8.2.0"]
Apache Buildr
'org.apache.solr:solr-langid:jar:8.2.0'
Dependencies
<parent>
  <groupId>org.apache.solr</groupId>
  <artifactId>solr-parent</artifactId>
  <version>8.2.0</version>
</parent>

compile

|-- org.apache.lucene:lucene-analyzers-common

|-- org.apache.lucene:lucene-analyzers-kuromoji

|-- org.apache.lucene:lucene-analyzers-nori

|-- org.apache.lucene:lucene-analyzers-phonetic

|-- org.apache.lucene:lucene-backward-codecs

|-- org.apache.lucene:lucene-classification

|-- org.apache.lucene:lucene-codecs

|-- org.apache.lucene:lucene-core

|-- org.apache.lucene:lucene-expressions

|-- org.apache.lucene:lucene-grouping

|-- org.apache.lucene:lucene-highlighter

|-- org.apache.lucene:lucene-join

|-- org.apache.lucene:lucene-memory

|-- org.apache.lucene:lucene-misc

|-- org.apache.lucene:lucene-queries

|-- org.apache.lucene:lucene-queryparser

|-- org.apache.lucene:lucene-sandbox

|-- org.apache.lucene:lucene-spatial-extras

|-- org.apache.lucene:lucene-spatial3d

|-- org.apache.lucene:lucene-suggest

|-- org.apache.solr:solr-core

|-- org.apache.solr:solr-solrj

|-- com.adobe.xmp:xmpcore

|-- com.carrotsearch:hppc

|-- com.cybozu.labs:langdetect

|-- com.drewnoakes:metadata-extractor

|-- com.epam:parso

|-- com.fasterxml.jackson.core:jackson-annotations

|-- com.fasterxml.jackson.core:jackson-core

|-- com.fasterxml.jackson.core:jackson-databind

|-- com.fasterxml.jackson.dataformat:jackson-dataformat-smile

|-- com.github.ben-manes.caffeine:caffeine

|-- com.github.virtuald:curvesapi

|-- com.google.guava:guava

|-- com.google.protobuf:protobuf-java

|-- com.google.re2j:re2j

|-- com.googlecode.juniversalchardet:juniversalchardet

|-- com.googlecode.mp4parser:isoparser

|-- com.healthmarketscience.jackcess:jackcess

|-- com.healthmarketscience.jackcess:jackcess-encrypt

|-- com.ibm.icu:icu4j

|-- com.jayway.jsonpath:json-path

|-- com.lmax:disruptor

|-- com.pff:java-libpst

|-- com.rometools:rome

|-- com.rometools:rome-utils

|-- com.tdunning:t-digest

|-- commons-beanutils:commons-beanutils

|-- commons-cli:commons-cli

|-- commons-codec:commons-codec

|-- commons-collections:commons-collections

|-- commons-fileupload:commons-fileupload

|-- commons-io:commons-io

|-- de.l3s.boilerpipe:boilerpipe

|-- io.dropwizard.metrics:metrics-core

|-- io.dropwizard.metrics:metrics-graphite

|-- io.dropwizard.metrics:metrics-jetty9

|-- io.dropwizard.metrics:metrics-jmx

|-- io.dropwizard.metrics:metrics-jvm

|-- io.opentracing:opentracing-api

|-- io.opentracing:opentracing-noop

|-- io.opentracing:opentracing-util

|-- io.sgr:s2-geometry-library-java

|-- javax.servlet:javax.servlet-api

|-- net.arnx:jsonic

|-- net.hydromatic:eigenbase-properties

|-- org.antlr:antlr4-runtime

|-- org.apache.calcite:calcite-core

|-- org.apache.calcite:calcite-linq4j

|-- org.apache.calcite.avatica:avatica-core

|-- org.apache.commons:commons-collections4

|-- org.apache.commons:commons-compress

|-- org.apache.commons:commons-configuration2

|-- org.apache.commons:commons-exec

|-- org.apache.commons:commons-lang3

|-- org.apache.commons:commons-math3

|-- org.apache.commons:commons-text

|-- org.apache.curator:curator-client

|-- org.apache.curator:curator-framework

|-- org.apache.curator:curator-recipes

|-- org.apache.hadoop:hadoop-annotations

|-- org.apache.hadoop:hadoop-auth

|-- org.apache.hadoop:hadoop-common

|-- org.apache.hadoop:hadoop-hdfs-client

|-- org.apache.htrace:htrace-core4

|-- org.apache.httpcomponents:httpclient

|-- org.apache.httpcomponents:httpcore

|-- org.apache.httpcomponents:httpmime

|-- org.apache.james:apache-mime4j-core

|-- org.apache.james:apache-mime4j-dom

|-- org.apache.kerby:kerb-core

|-- org.apache.kerby:kerb-util

|-- org.apache.kerby:kerby-asn1

|-- org.apache.kerby:kerby-pkix

|-- org.apache.logging.log4j:log4j-1.2-api

|-- org.apache.logging.log4j:log4j-api

|-- org.apache.logging.log4j:log4j-core

|-- org.apache.logging.log4j:log4j-slf4j-impl

|-- org.apache.logging.log4j:log4j-web

|-- org.apache.opennlp:opennlp-tools

|-- org.apache.pdfbox:fontbox

|-- org.apache.pdfbox:jempbox

|-- org.apache.pdfbox:pdfbox

|-- org.apache.pdfbox:pdfbox-tools

|-- org.apache.poi:poi

|-- org.apache.poi:poi-ooxml

|-- org.apache.poi:poi-ooxml-schemas

|-- org.apache.poi:poi-scratchpad

|-- org.apache.tika:tika-core

|-- org.apache.tika:tika-java7

|-- org.apache.tika:tika-parsers

|-- org.apache.tika:tika-xmp

|-- org.apache.xmlbeans:xmlbeans

|-- org.apache.zookeeper:zookeeper

|-- org.apache.zookeeper:zookeeper-jute

|-- org.aspectj:aspectjrt

|-- org.bitbucket.b_c:jose4j

|-- org.bouncycastle:bcmail-jdk15on

|-- org.bouncycastle:bcpkix-jdk15on

|-- org.bouncycastle:bcprov-jdk15on

|-- org.brotli:dec

|-- org.ccil.cowan.tagsoup:tagsoup

|-- org.codehaus.janino:commons-compiler

|-- org.codehaus.janino:janino

|-- org.codehaus.woodstox:stax2-api

|-- org.codehaus.woodstox:woodstox-core-asl

|-- org.eclipse.jetty:jetty-alpn-client

|-- org.eclipse.jetty:jetty-alpn-java-client

|-- org.eclipse.jetty:jetty-alpn-java-server

|-- org.eclipse.jetty:jetty-alpn-server

|-- org.eclipse.jetty:jetty-client

|-- org.eclipse.jetty:jetty-continuation

|-- org.eclipse.jetty:jetty-deploy

|-- org.eclipse.jetty:jetty-http

|-- org.eclipse.jetty:jetty-io

|-- org.eclipse.jetty:jetty-jmx

|-- org.eclipse.jetty:jetty-rewrite

|-- org.eclipse.jetty:jetty-security

|-- org.eclipse.jetty:jetty-server

|-- org.eclipse.jetty:jetty-servlet

|-- org.eclipse.jetty:jetty-servlets

|-- org.eclipse.jetty:jetty-util

|-- org.eclipse.jetty:jetty-webapp

|-- org.eclipse.jetty:jetty-xml

|-- org.eclipse.jetty.http2:http2-client

|-- org.eclipse.jetty.http2:http2-common

|-- org.eclipse.jetty.http2:http2-hpack

|-- org.eclipse.jetty.http2:http2-http-client-transport

|-- org.eclipse.jetty.http2:http2-server

|-- org.gagravarr:vorbis-java-core

|-- org.gagravarr:vorbis-java-tika

|-- org.jdom:jdom2

|-- org.locationtech.spatial4j:spatial4j

|-- org.ow2.asm:asm

|-- org.ow2.asm:asm-commons

|-- org.restlet.jee:org.restlet

|-- org.restlet.jee:org.restlet.ext.servlet

|-- org.rrd4j:rrd4j

|-- org.slf4j:jcl-over-slf4j

|-- org.slf4j:jul-to-slf4j

|-- org.slf4j:slf4j-api

|-- org.tallison:jmatio

|-- org.tukaani:xz

|-- xerces:xercesImpl

test

|-- org.apache.lucene:lucene-test-framework

|-- org.apache.solr:solr-test-framework