File languagetool-hunspell.patch of Package languagetool

Overview Repositories Revisions Requests Users Attributes Meta

File languagetool-hunspell.patch of Package languagetool

--- languagetool-4.8/languagetool-core/pom.xml	2019-12-27 11:17:28.000000000 +0100
+++ languagetool-4.8/languagetool-core/pom.xml	2020-01-07 09:32:01.278033500 +0100
@@ -106,6 +106,11 @@
             <version>28.1-jre</version>
         </dependency>
         <dependency>
+            <groupId>net.java.dev.jna</groupId>
+            <artifactId>jna</artifactId>
+            <version>4.5.2</version>
+        </dependency>
+        <dependency>
             <groupId>org.carrot2</groupId>
             <artifactId>morfologik-fsa</artifactId>
             <version>${morfologik.version}</version>
@@ -218,13 +223,6 @@
             <artifactId>slf4j-api</artifactId>
             <version>1.7.25</version>
         </dependency>
-
-        <dependency>
-            <groupId>com.gitlab.dumonts</groupId>
-            <artifactId>hunspell</artifactId>
-            <version>1.1.0</version>
-        </dependency>
-        
         <dependency>
             <groupId>ch.qos.logback</groupId>
             <artifactId>logback-classic</artifactId>
--- languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/CompoundAwareHunspellRule.java	2019-12-27 11:17:28.000000000 +0100
+++ languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/CompoundAwareHunspellRule.java	2020-01-07 09:32:01.278033500 +0100
@@ -143,7 +143,7 @@
     int partCount = 0;
     List<String> candidates = new ArrayList<>();
     for (String part : parts) {
-      if (!hunspell.spell(part)) {
+      if (hunspellDict.misspelled(part)) {
         // assume noun, so use uppercase:
         boolean doUpperCase = partCount > 0 && !StringTools.startsWithUppercase(part);
         List<String> suggestions = morfoSpeller.getSuggestions(doUpperCase ? StringTools.uppercaseFirstChar(part) : part);
@@ -213,7 +213,7 @@
       String[] words = tokenizeText(wordOrPhrase);
       boolean wordIsOkay = true;
       for (String word : words) {
-        if (!hunspell.spell(word)) {
+        if (hunspellDict.misspelled(word)) {
           wordIsOkay = false;
           break;
         }
--- languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/Hunspell.java	2019-12-27 11:17:28.000000000 +0100
+++ languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/Hunspell.java	2020-01-07 09:32:01.278033500 +0100
@@ -1,132 +1,418 @@
 package org.languagetool.rules.spelling.hunspell;
 
-import dumonts.hunspell.bindings.HunspellLibrary;
-import org.bridj.Pointer;
-
-import java.io.Closeable;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.nio.charset.Charset;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardCopyOption;
-import java.util.*;
-import java.util.stream.Collectors;
-
-public class Hunspell implements Closeable {
-  private final Pointer<HunspellLibrary.Hunhandle> handle;
-  private final Charset charset;
-  
-  private static final Map<LanguageAndPath, Hunspell> map = new HashMap<>();
-  
-  static class LanguageAndPath {
-    private final Path dictionary;
-    private final Path affix;
-    LanguageAndPath(Path dictionary, Path affix) {
-      this.dictionary = Objects.requireNonNull(dictionary);
-      this.affix = Objects.requireNonNull(affix);
-    }
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) return true;
-      if (o == null || getClass() != o.getClass()) return false;
-      LanguageAndPath that = (LanguageAndPath) o;
-      return Objects.equals(dictionary, that.dictionary) &&
-        Objects.equals(affix, that.affix);
-    }
-    @Override
-    public int hashCode() {
-      return Objects.hash(dictionary, affix);
-    }
+import java.io.UnsupportedEncodingException;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Scanner;
+
+import com.sun.jna.Native;
+import com.sun.jna.Pointer;
+import com.sun.jna.ptr.PointerByReference;
+
+/**
+ * The simple hunspell library frontend which takes care of creating
+ * and singleton'ing the library instance (no need to load it more than once
+ * per process).
+ *
+ * The Hunspell java bindings are licensed under the same terms as Hunspell itself (GPL/LGPL/MPL tri-license),
+ * see the file COPYING.txt in the root of the distribution for the exact terms.
+ *
+ * @author Flemming Frandsen (flfr at stibo dot com)
+ */
+
+public class Hunspell {
+
+    /**
+     * The Singleton instance of Hunspell
+     */
+    private static Hunspell hunspell = null;
+
+    /**
+     * The native library instance, created by JNA.
+     */
+    private HunspellLibrary hsl = null;
+
+    /**
+     * The library file that was loaded.
+     */
+    private String libFile;
+
+    /**
+     * The instance of the HunspellManager, looks for the native lib in the
+     * default directories
+     */
+    public static Hunspell getInstance() throws UnsatisfiedLinkError, UnsupportedOperationException { 
+        return getInstance(null);
+    }
+
+    /**
+     * The instance of the HunspellManager, looks for the native lib in
+     * the directory specified.
+     *
+     * @param libDir Optional absolute directory where the native lib can be found. 
+     */
+    public static synchronized Hunspell getInstance(String libDir) throws UnsatisfiedLinkError, UnsupportedOperationException {
+        if (hunspell != null) {
+            return hunspell;
   }
 
-  public Hunspell(Path dictionary, Path affix) {
-    Pointer<Byte> aff = Pointer.pointerToCString(affix.toString());
-    Pointer<Byte> dic = Pointer.pointerToCString(dictionary.toString());
-    handle = HunspellLibrary.Hunspell_create(aff, dic);
-    charset = Charset.forName(HunspellLibrary.Hunspell_get_dic_encoding(handle).getCString());
-    if (this.handle == null) {
-      throw new RuntimeException("Unable to create Hunspell instance");
-    }
+        hunspell = new Hunspell(libDir);
+        return hunspell;
   }
   
-  public synchronized static Hunspell getInstance(Path dictionary, Path affix) {
-    LanguageAndPath key = new LanguageAndPath(dictionary, affix);
-    Hunspell hunspell = map.get(key);
-    if (hunspell != null) {
-      return hunspell;
+    protected void tryLoad(String libFile) throws UnsupportedOperationException {
+        hsl = (HunspellLibrary)Native.loadLibrary(libFile, HunspellLibrary.class);
     }
-    Hunspell newHunspell = new Hunspell(dictionary, affix);
-    map.put(key, newHunspell);
-    return newHunspell;
+
+
+    /**
+     * Constructor for the library, loads the native lib.
+     *
+     * Loading is done in the first of the following three ways that works:
+     * 1) Unmodified load in the provided directory.
+     * 2) libFile stripped back to the base name (^lib(.*)\.so on unix)
+     * 3) The library is searched for in the classpath, extracted to disk and loaded.
+     *
+     * @param libDir Optional absolute directory where the native lib can be found. 
+     * @throws UnsupportedOperationException if the OS or architecture is simply not supported.
+     */
+    protected Hunspell(String libDir) throws UnsatisfiedLinkError, UnsupportedOperationException {
+
+        libFile = libDir != null ? libDir+"/"+libName() : libNameBare();
+        try {	   
+            hsl = (HunspellLibrary)Native.loadLibrary(libFile, HunspellLibrary.class);
+        } catch (UnsatisfiedLinkError urgh) {
+
+            // Oh dear, the library was not found in the file system, let's try the classpath
+            libFile = libName();
+            InputStream is = Hunspell.class.getResourceAsStream("/"+libFile);
+            if (is == null) {
+                throw new UnsatisfiedLinkError("Can't find "+libFile+
+                        " in the filesystem nor in the classpath\n"+
+                        urgh);
   }
 
-  public static Hunspell forDictionaryInResources(String language, String resourcePath) {
+            // Extract the library from the classpath into a temp file.
+            File lib;
+            FileOutputStream fos = null;
     try {
-      ClassLoader loader = Hunspell.class.getClassLoader();
-      InputStream dictionaryStream = loader.getResourceAsStream(resourcePath + language + ".dic");
-      InputStream affixStream = loader.getResourceAsStream(resourcePath + language + ".aff");
-      if (dictionaryStream == null || affixStream == null) {
-        throw new RuntimeException("Could not find dictionary for language \"" + language + "\" in classpath");
-      }
-      Path dictionary = Files.createTempFile(language, ".dic");
-      Path affix = Files.createTempFile(language, ".aff");
-      Files.copy(dictionaryStream, dictionary, StandardCopyOption.REPLACE_EXISTING);
-      Files.copy(affixStream, affix, StandardCopyOption.REPLACE_EXISTING);
-      return new Hunspell(dictionary, affix);
+                lib = File.createTempFile("jna", "."+libFile);
+                lib.deleteOnExit();
+                fos = new FileOutputStream(lib);
+                int count;
+                byte[] buf = new byte[1024];
+                while ((count = is.read(buf, 0, buf.length)) > 0) {
+                    fos.write(buf, 0, count);
+                }
+
     } catch (IOException e) {
-      throw new RuntimeException("Could not create temporary dictionaries for language \"" + language + "\"", e);
+                throw new Error("Failed to create temporary file for "+libFile, e);
+
+            } finally {
+                try { is.close(); } catch(IOException e) { }
+                if (fos != null) {
+                    try { fos.close(); } catch(IOException e) { }
+                }
+            }
+            //System.out.println("Loading temp lib: "+lib.getAbsolutePath());
+            hsl = (HunspellLibrary)Native.loadLibrary(lib.getAbsolutePath(), HunspellLibrary.class);
+        }
+    }
+
+    public String getLibFile() {
+        return libFile;
+    }
+
+    /**
+     * Calculate the filename of the native hunspell lib.
+     * The files have completely different names to allow them to live
+     * in the same directory and avoid confusion.
+     */
+    public static String libName() throws UnsupportedOperationException {
+        String os = System.getProperty("os.name").toLowerCase();
+        if (os.startsWith("windows")) {
+            return libNameBare()+".dll";
+
+        } else if (os.startsWith("mac os x")) {
+            //	    return libNameBare()+".dylib";
+            return libNameBare()+".jnilib";
+
+        } else {
+            return "lib"+libNameBare()+".so";
+        }  
+    }
+
+    public static String libNameBare() throws UnsupportedOperationException {
+        String os = System.getProperty("os.name").toLowerCase();
+        String arch = System.getProperty("os.arch").toLowerCase();
+
+        // Annoying that Java doesn't have consistent names for the arch types:
+        boolean x86  = arch.equals("x86")    || arch.equals("i386")  || arch.equals("i686");
+        boolean amd64= arch.equals("x86_64") || arch.equals("amd64") || arch.equals("ia64n");
+
+        if (os.startsWith("windows")) {
+            if (x86) {
+                return "hunspell-win-x86-32";
+            }
+            if (amd64) { 
+                return "hunspell-win-x86-64";
+            }
+
+        } else if (os.startsWith("mac os x")) {
+            if (x86) {
+                return "hunspell-darwin-x86-32";
+            }
+            if (amd64) {
+                return "hunspell-darwin-x86-64";
+            }
+            if (arch.equals("ppc")) {		    
+                return "hunspell-darwin-ppc-32";
+            }
+
+        } else if (os.startsWith("linux")) {
+            if (x86) {
+                return "hunspell-linux-x86-32";
+            }
+            if (amd64) {
+                return "hunspell-linux-x86-64";
+            }
+
+        } else if (os.startsWith("sunos")) {
+            //if (arch.equals("sparc")) { 
+            //	return "hunspell-sunos-sparc-64";
+            //}		
+            
+        } else if (os.startsWith("freebsd")) {
+            // Patch by Koen Vervloesem - FreeBSD is not supported yet, but: "... not a real solution, but
+            // having this fixed makes it easier for me to build new LanguageTool releases without always
+            // having to apply a local patch first."
+            if (x86) {
+                return "hunspell-freebsd-x86-32";
+            }
+            if (amd64) {
+                return "hunspell-freebsd-x86-64";
+            }
+
+        } else if (os.startsWith("aix")) {
+            // added by Martin Kallinger (https://github.com/languagetool-org/languagetool/pull/1090)
+            return "hunspell-ppc64"; 
     }
+
+        throw new UnsupportedOperationException("Unknown OS/arch: "+os+"/"+arch);
   }
 
-  public static Hunspell forDictionaryInResources(String language) {
-    return forDictionaryInResources(language, "");
+    /**
+     * This is the cache where we keep the already loaded dictionaries around
+     */
+    private HashMap<String, Dictionary> map = new HashMap<>();
+
+    
+    private static CharBuffer ensureCapacity(CharBuffer buffer, int capacity) {
+        if (buffer == null || buffer.capacity() < capacity) {
+            buffer = CharBuffer.allocate(capacity);
+        }
+        return buffer;
   }
 
-  public boolean spell(String word) {
-    if (handle == null) {
-      throw new RuntimeException("Attempt to use hunspell instance after closing");
+    /**
+     * Gets an instance of the dictionary. 
+     *
+     * @param baseFileName the base name of the dictionary, 
+     * passing /dict/da_DK means that the files /dict/da_DK.dic
+     * and /dict/da_DK.aff get loaded
+     */
+    public Dictionary getDictionary(String baseFileName)
+            throws IOException {
+
+        if (map.containsKey(baseFileName)) {
+            return map.get(baseFileName);
+
+        } else {
+            Dictionary d = new Dictionary(baseFileName);
+            map.put(baseFileName, d);
+            return d;
+        }
+    }   
+
+    /**
+     * Removes a dictionary from the internal cache
+     *
+     * @param baseFileName the base name of the dictionary, as passed to
+     * getDictionary()
+     */
+    public void destroyDictionary(String baseFileName) {
+        if (map.containsKey(baseFileName)) {
+            map.remove(baseFileName);
+        }
     }
-    @SuppressWarnings("unchecked")
-    Pointer<Byte> str = (Pointer<Byte>) Pointer.pointerToString(word, Pointer.StringType.C, charset);
-    int result = HunspellLibrary.Hunspell_spell(handle, str);
-    return result != 0;
+
+    /**
+     * Class representing a single dictionary.
+     */
+    public class Dictionary {
+        /**
+         * The pointer to the hunspell object as returned by the hunspell
+         * constructor.
+         */
+        private Pointer hunspellDict = null;
+
+        /**
+         * The encoding used by this dictionary
+         */
+        private String encoding;
+
+        /*
+         * the tokenization characters
+         */
+        private final String wordChars;
+
+        /**
+         * Creates an instance of the dictionary.
+         * @param baseFileName the base name of the dictionary, 
+         */
+        Dictionary(String baseFileName) throws IOException {
+            File dic = new File(baseFileName + ".dic");
+            File aff = new File(baseFileName + ".aff");
+
+            if (!dic.canRead() || !aff.canRead()) {
+                throw new FileNotFoundException("The dictionary files "+
+                        baseFileName+
+                        "(.aff|.dic) could not be read");
   }
 
-  public void add(String word) {
-    if (handle == null) {
-      throw new RuntimeException("Attempt to use hunspell instance after closing");
+            hunspellDict = hsl.Hunspell_create(aff.toString(), dic.toString());
+            encoding = hsl.Hunspell_get_dic_encoding(hunspellDict);
+
+            //hunspell uses non-standard names of charsets 
+            if ("microsoft1251".equals(encoding)) {
+                encoding = "windows-1251";
+            } else if ("ISCII-DEVANAGARI".equals(encoding)) {
+                encoding = "ISCII91";
     }
-    @SuppressWarnings("unchecked")
-    Pointer<Byte> str = (Pointer<Byte>) Pointer.pointerToString(word, Pointer.StringType.C, charset);
-    HunspellLibrary.Hunspell_add(handle, str);
+
+            wordChars = getWordCharsFromFile(aff);
   }
 
-  public List<String> suggest(String word) {
-    // Create pointer to native string
-    @SuppressWarnings("unchecked")
-    Pointer<Byte> str = (Pointer<Byte>) Pointer.pointerToString(word, Pointer.StringType.C, charset);
-    // Create pointer to native string array
-    Pointer<Pointer<Pointer<Byte>>> nativeSuggestionArray = Pointer.allocatePointerPointer(Byte.class);
-    // Hunspell will allocate the array and fill it with suggestions
-    int suggestionCount = HunspellLibrary.Hunspell_suggest(handle, nativeSuggestionArray, str);
-    if (suggestionCount == 0) {
-      // Return early and don't try to free the array
-      return new ArrayList<>();
+        /**
+         * Deallocate the dictionary.
+         */
+        public void destroy() {
+            if (hsl != null && hunspellDict != null) {
+                hsl.Hunspell_destroy(hunspellDict);
+                hunspellDict = null;
+            }
     }
-    // Ask bridj for a `java.util.List` that wraps `nativeSuggestionArray`
-    List<Pointer<Byte>> nativeSuggestionList = nativeSuggestionArray.get().validElements(suggestionCount).asList();
-    // Convert C Strings to java strings
-    List<String> suggestions = nativeSuggestionList.stream().map((p) -> p.getStringAtOffset(0, Pointer.StringType.C, charset)).collect(Collectors.toList());
 
-    // We can free the underlying buffer now because Java's `String` owns it's own memory
-    HunspellLibrary.Hunspell_free_list(handle, nativeSuggestionArray, suggestionCount);
-    return suggestions;
+        /**
+         * Used to query what are word-characters
+         * @return A string composed of characters that are parts of words,
+         * even if they are not alphabetic.
+         */
+        public String getWordChars() {
+            return wordChars;
   }
 
-  public void close() {
-    if (handle != null) {
-      HunspellLibrary.Hunspell_destroy(handle);
+        /**
+         * Check if a word is spelled correctly
+         *
+         * @param word The word to check.
+         * @return true if the <code>word</code> is not correctly spelled
+         */
+        public boolean misspelled(String word) {
+            try {
+                final byte[] wordAsBytes = stringToBytes(word);
+                if (wordAsBytes.length == 0 && word.length() > 0) {
+                    return true;
+                }
+                return (hsl.Hunspell_spell(hunspellDict, wordAsBytes) == 0);
+            } catch (UnsupportedEncodingException e) {
+                return true;
     }
   }
+
+        /**
+         * Convert a Java string to a zero terminated byte array, in the
+         * encoding of the dictionary, as expected by the hunspell functions.
+         */
+        protected byte[] stringToBytes(String str) throws UnsupportedEncodingException {
+          byte[] strBytes = str.getBytes(encoding);
+          byte[] zeroTerminated = Arrays.copyOf(strBytes, strBytes.length + 1);
+          zeroTerminated[zeroTerminated.length - 1] = '\u0000';
+          return zeroTerminated;
+        }
+
+        /**
+         * Returns a list of suggestions 
+         *
+         * @param word The word to check and offer suggestions for
+         */
+        public List<String> suggest(String word) throws CharacterCodingException {
+            List<String> res = new ArrayList<>();
+            try {		
+                int suggestionsCount = 0;
+                PointerByReference suggestions = new PointerByReference();
+                final byte[] wordAsBytes = stringToBytes(word);
+                if (wordAsBytes.length == 0 && word.length() > 0) {
+                    return res;
+                }
+                suggestionsCount = hsl.Hunspell_suggest(
+                        hunspellDict, suggestions, stringToBytes(word));
+                if (suggestionsCount == 0) {
+                    return res;
+                }
+
+                // Get each of the suggestions out of the pointer array.
+                Pointer[] pointerArray = suggestions.getValue().
+                        getPointerArray(0, suggestionsCount);
+
+                for (int i=0; i<suggestionsCount; i++) {
+                    long len = pointerArray[i].indexOf(0, (byte)0); 
+                    if (len != -1) {
+                        if (len > Integer.MAX_VALUE) {
+                            throw new RuntimeException(
+                                    "String improperly terminated: " + len);
+                        }
+                        byte[] data = pointerArray[i].getByteArray(0, (int)len);
+
+                        res.add(new String(data, encoding));
+                    }
+                }
+
+            } catch (UnsupportedEncodingException ex) { } // Shouldn't happen...
+
+            return res;
+        }
+        
+        private String getWordCharsFromFile(final File affixFile) throws IOException {
+            String affixWordChars = "";
+          try (Scanner scanner = new Scanner(affixFile, encoding)) {
+            while (scanner.hasNextLine()) {
+              final String line = scanner.nextLine().trim();
+              if (line.startsWith("WORDCHARS ")) {
+                affixWordChars = line.substring("WORDCHARS ".length());
+              }
+            }
+          }
+            return affixWordChars;
+          }
+        
+        /**
+         * Adds a word to the runtime dictionary.
+         * @param word Word to be added.
+         */
+        public void addWord(final String word) throws UnsupportedEncodingException {
+            hsl.Hunspell_add(hunspellDict, stringToBytes(word));
+        }
+                
+    }
+
 }
--- languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/HunspellLibrary.java	1970-01-01 01:00:00.000000000 +0100
+++ languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/HunspellLibrary.java	2020-01-07 09:32:01.278033500 +0100
@@ -0,0 +1,67 @@
+package org.languagetool.rules.spelling.hunspell;
+
+import com.sun.jna.Library;
+import com.sun.jna.Pointer;
+import com.sun.jna.ptr.PointerByReference;
+
+/**
+ * Functions from $hunspell/src/hunspell/hunspell.h
+ *
+ * The Hunspell java bindings are licensed under the same terms as Hunspell itself (GPL/LGPL/MPL tri-license),
+ * see the file COPYING.txt in the root of the distribution for the exact terms.
+ *
+ * @author Flemming Frandsen (flfr at stibo dot com)
+ */
+
+public interface HunspellLibrary extends Library {
+
+    /**
+     * Create the hunspell instance
+     * @param affpath The affix file
+     * @param dpath The dictionary file
+     * @return The hunspell object
+     */
+    public Pointer Hunspell_create(String affpath, String dpath);
+
+    /**
+     * Destroy him my robots...
+     * @param pHunspell The Hunspell object returned by Hunspell_create
+     */
+    public void Hunspell_destroy(Pointer pHunspell);
+
+    /**
+     * spell(word) - spellcheck word
+     * @param pHunspell The Hunspell object returned by Hunspell_create
+     * @param word The word to spellcheck.
+     * @return 0 = bad word, not 0 = good word
+     */
+    public int Hunspell_spell(Pointer pHunspell, byte[] word);
+
+    /**
+     * Get the dictionary encoding
+     * @param pHunspell : The Hunspell object returned by Hunspell_create
+     * @return The encoding name
+     */
+    public String Hunspell_get_dic_encoding(Pointer pHunspell);
+
+    /**
+     * Search suggestions
+     * @param pHunspell The Hunspell object returned by Hunspell_create
+     * @param slst  
+     * input: pointer to an array of strings pointer and the (bad) word
+     *   array of strings pointer (here *slst) may not be initialized
+     * output: number of suggestions in string array, and suggestions in
+     *   a newly allocated array of strings (*slts will be NULL when number
+     *   of suggestion equals 0.)
+     * @param word The word to offer suggestions for.
+     */
+    public int Hunspell_suggest(Pointer pHunspell, PointerByReference slst, byte[] word);
+    
+    /**
+     * Add a word to the run-time dictionary.
+     * @param pHunspell The Hunspell object returned by Hunspell_create
+     * @param word The word added to the runtime dictionary.
+     */
+    public int Hunspell_add(Pointer pHunspell, byte[] word);
+            
+}
--- languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java	2019-12-27 11:17:28.000000000 +0100
+++ languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java	2020-01-07 09:32:01.278033500 +0100
@@ -27,9 +27,12 @@
 import java.net.URISyntaxException;
 import java.net.URL;
 import java.nio.charset.StandardCharsets;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Queue;
+import java.util.ResourceBundle;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
@@ -70,7 +73,7 @@
 
   protected final SuggestionsOrderer suggestionsOrderer;
   protected boolean needsInit = true;
-  protected Hunspell hunspell = null;
+  protected Hunspell.Dictionary hunspellDict = null;
 
   private static final ConcurrentLinkedQueue<String> activeChecks = new ConcurrentLinkedQueue<>();
   private static final String NON_ALPHABETIC = "[^\\p{L}]";
@@ -141,7 +144,7 @@
     if (needsInit) {
       init();
     }
-    if (hunspell == null) {
+    if (hunspellDict == null) {
       // some languages might not have a dictionary, be silent about it
       return toRuleMatchArray(ruleMatches);
     }
@@ -297,7 +300,7 @@
       }
       return (
               isAlphabetic && !"--".equals(word)
-              && (hunspell != null && !hunspell.spell(word))
+              && (hunspellDict != null && hunspellDict.misspelled(word))
               && !ignoreWord(word)
              )
              || isProhibited(cutOffDot(word));
@@ -310,7 +313,7 @@
     if (needsInit) {
       init();
     }
-    return hunspell.suggest(word);
+    return hunspellDict.suggest(word);
   }
 
   protected List<String> sortSuggestionByQuality(String misspelling, List<String> suggestions) {
@@ -368,33 +371,20 @@
     String shortDicPath = getDictFilenameInResources(langCountry);
     String wordChars = "";
     // set dictionary only if there are dictionary files:
-    Path affPath = null;
     if (JLanguageTool.getDataBroker().resourceExists(shortDicPath)) {
       String path = getDictionaryPath(langCountry, shortDicPath);
       if ("".equals(path)) {
-        hunspell = null;
+        hunspellDict = null;
       } else {
-        affPath = Paths.get(path + ".aff");
-        hunspell = Hunspell.getInstance(Paths.get(path + ".dic"), affPath);
+        hunspellDict = Hunspell.getInstance().getDictionary(path);
         addIgnoreWords();
       }
     } else if (new File(shortDicPath + ".dic").exists()) {
       // for dynamic languages
-      affPath = Paths.get(shortDicPath + ".aff");
-      hunspell = Hunspell.getInstance(Paths.get(shortDicPath + ".dic"), affPath);
+      hunspellDict = Hunspell.getInstance().getDictionary(shortDicPath);
     }
-    if (affPath != null) {
-      Scanner sc = new Scanner(affPath);
-      while (sc.hasNextLine()) {
-        String line = sc.nextLine();
-        if (line.startsWith("WORDCHARS ")) {
-          String wordCharsFromAff = line.substring("WORDCHARS ".length());
-          //System.out.println("#" + wordCharsFromAff+ "#");
-          wordChars = "(?![" + wordCharsFromAff.replace("-", "\\-") + "])";
-          break;
-        }
-      }
-      
+    if (hunspellDict != null && !hunspellDict.getWordChars().isEmpty()) {
+      wordChars = "(?![" + hunspellDict.getWordChars().replace("-", "\\-") + "])";
     }
     nonWordPattern = Pattern.compile(wordChars + NON_ALPHABETIC);
     needsInit = false;
@@ -406,13 +396,13 @@
   }
 
   private void addIgnoreWords() throws IOException {
-    wordsToBeIgnored.add(SpellingCheckRule.LANGUAGETOOL);
-    wordsToBeIgnored.add(SpellingCheckRule.LANGUAGETOOLER);
+    hunspellDict.addWord(SpellingCheckRule.LANGUAGETOOL);
+    hunspellDict.addWord(SpellingCheckRule.LANGUAGETOOLER);
     URL ignoreUrl = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(getIgnoreFileName());
     List<String> ignoreLines = Resources.readLines(ignoreUrl, StandardCharsets.UTF_8);
     for (String ignoreLine : ignoreLines) {
       if (!ignoreLine.startsWith("#")) {
-        wordsToBeIgnored.add(ignoreLine);
+        hunspellDict.addWord(ignoreLine);
       }
     }
   }
--- languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/SpellingCheckRule.java	2019-12-27 11:17:28.000000000 +0100
+++ languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/SpellingCheckRule.java	2020-01-07 09:32:01.278033500 +0100
@@ -81,6 +81,7 @@
   private static final Comparator<String> STRING_LENGTH_COMPARATOR = Comparator.comparingInt(String::length);
 
   private final UserConfig userConfig;
+  private final Set<String> wordsToBeIgnored = new HashSet<>();
   private final Set<String> wordsToBeProhibited = new HashSet<>();
   private final List<RuleWithLanguage> altRules;
 
@@ -90,7 +91,6 @@
   private List<DisambiguationPatternRule> antiPatterns = new ArrayList<>();
   private boolean considerIgnoreWords = true;
   private boolean convertsCase = false;
-  protected final Set<String> wordsToBeIgnored = new HashSet<>();
   protected int ignoreWordsWithLength = 0;
 
   public SpellingCheckRule(ResourceBundle messages, Language language, UserConfig userConfig) {
--- languagetool-4.8/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java	2019-12-27 11:17:28.000000000 +0100
+++ languagetool-4.8/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java	2020-01-07 09:32:01.282033523 +0100
@@ -1132,107 +1132,107 @@
       return Collections.singletonList("Std.");
     } else if (word.matches(".*ibel[hk]eit$")) {
       suggestion = word.replaceFirst("el[hk]eit$", "ilität");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.endsWith("aquise")) {
       suggestion = word.replaceFirst("aquise$", "akquise");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.endsWith("standart")) {
       suggestion = word.replaceFirst("standart$", "standard");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.endsWith("standarts")) {
       suggestion = word.replaceFirst("standarts$", "standards");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.endsWith("tips")) {
       suggestion = word.replaceFirst("tips$", "tipps");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.endsWith("tip")) {
       suggestion = word + "p";
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.endsWith("entfehlung")) {
       suggestion = word.replaceFirst("ent", "emp");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.endsWith("oullie")) {
       suggestion = word.replaceFirst("oullie$", "ouille");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.startsWith("[dD]urschnitt")) {
       suggestion = word.replaceFirst("^urschnitt", "urchschnitt");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.startsWith("Bundstift")) {
       suggestion = word.replaceFirst("^Bundstift", "Buntstift");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.matches("[aA]llmähll?i(g|ch)(e[mnrs]?)?")) {
       suggestion = word.replaceFirst("llmähll?i(g|ch)", "llmählich");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.matches(".*[mM]a[jy]onn?[äe]se.*")) {
       suggestion = word.replaceFirst("a[jy]onn?[äe]se", "ayonnaise");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.matches(".*[rR]es(a|er)[vw]i[he]?rung(en)?")) {
       suggestion = word.replaceFirst("es(a|er)[vw]i[he]?rung", "eservierung");
-      if (hunspell.spell(suggestion)) { // suggest e.g. 'Ticketreservierung', but not 'Blödsinnsquatschreservierung'
+      if (!hunspellDict.misspelled(suggestion)) { // suggest e.g. 'Ticketreservierung', but not 'Blödsinnsquatschreservierung'
         return Collections.singletonList(suggestion);
       }
     } else if (word.matches("[rR]eschaschier.+")) {
       suggestion = word.replaceFirst("schaschier", "cherchier");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.matches(".*[lL]aborants$")) {
       suggestion = word.replaceFirst("ts$", "ten");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.matches("[pP]roff?ess?ion([äe])h?ll?(e[mnrs]?)?")) {
       suggestion = word.replaceFirst("roff?ess?ion([äe])h?l{1,2}", "rofessionell");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.matches("[vV]erstehendniss?(es?)?")) {
       suggestion = word.replaceFirst("[vV]erstehendnis", "Verständnis");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.matches("koregier.+")) {
       suggestion = word.replaceAll("reg", "rrig");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.matches("diagno[sz]ier.*")) {
       suggestion = word.replaceAll("gno[sz]ier", "gnostizier");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.matches(".*eiss.*")) {
       suggestion = word.replaceAll("eiss", "eiß");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.matches(".*uess.*")) {
       suggestion = word.replaceAll("uess", "üß");
-      if (hunspell.spell(suggestion)) {
+      if (!hunspellDict.misspelled(suggestion)) {
         return Collections.singletonList(suggestion);
       }
     } else if (word.equals("gin")) {
@@ -1286,17 +1286,17 @@
         return Collections.singletonList("Ladys");
       } else if (word.endsWith("derbies")) {
         suggestion = word.replaceFirst("derbies$", "derbys");
-        if (hunspell.spell(suggestion)) {
+        if (!hunspellDict.misspelled(suggestion)) {
           return Collections.singletonList(suggestion);
         }
       } else if (word.endsWith("stories")) {
         suggestion = word.replaceFirst("stories$", "storys");
-        if (hunspell.spell(suggestion)) {
+        if (!hunspellDict.misspelled(suggestion)) {
           return Collections.singletonList(suggestion);
         }
       } else if (word.endsWith("parties")) {
         suggestion = word.replaceFirst("parties$", "partys");
-        if (hunspell.spell(suggestion)) {
+        if (!hunspellDict.misspelled(suggestion)) {
           return Collections.singletonList(suggestion);
         }
       }
@@ -1334,8 +1334,8 @@
       return Collections.singletonList("Zynismus");
     } else if (word.matches("Email[a-zäöü]{5,}")) {
       String suffix = word.substring(5);
-      if (!hunspell.spell(suffix)) {
-        List<String> suffixSuggestions = hunspell.suggest(StringTools.uppercaseFirstChar(suffix));
+      if (hunspellDict.misspelled(suffix)) {
+        List<String> suffixSuggestions = hunspellDict.suggest(suffix);
         suffix = suffixSuggestions.isEmpty() ? suffix : suffixSuggestions.get(0);
       }
       return Collections.singletonList("E-Mail-"+Character.toUpperCase(suffix.charAt(0))+suffix.substring(1));
@@ -1352,7 +1352,7 @@
     }
     if (!StringTools.startsWithUppercase(word)) {
       String ucWord = StringTools.uppercaseFirstChar(word);
-      if (!suggestions.contains(ucWord) && hunspell.spell(ucWord) && !ucWord.endsWith(".")) {
+      if (!suggestions.contains(ucWord) && !hunspellDict.misspelled(ucWord) && !ucWord.endsWith(".")) {
         // Hunspell doesn't always automatically offer the most obvious suggestion for compounds:
         return Collections.singletonList(ucWord);
       }
@@ -1386,7 +1386,7 @@
           stopAt = words.length-2;
         }
         for (int idx = startAt; idx < stopAt; idx++) {
-          if (!hunspell.spell(words[idx])) {
+          if (hunspellDict.misspelled(words[idx])) {
             List<String> list = sortSuggestionByQuality(words[idx], super.getSuggestions(words[idx]));
             suggestionLists.add(list);
           } else {
@@ -1473,7 +1473,7 @@
   private String getParticipleForBaseform(String baseform) throws IOException {
     AnalyzedToken token = new AnalyzedToken(baseform, null, baseform);
     String[] forms = synthesizer.synthesize(token, "VER:PA2:.*", true);
-    if (forms.length > 0 && hunspell.spell(forms[0])) {
+    if (forms.length > 0 && !hunspellDict.misspelled(forms[0])) {
       return forms[0];
     }
     return null;
@@ -1498,12 +1498,12 @@
     boolean isCompound = nextWord != null && (compoundTokenizer.tokenize(nextWord).size() > 1 || nextWord.indexOf('-') > 0);
     if (isCompound) {
       word = StringUtils.removeEnd(word, "-");
-      boolean isMisspelled = !hunspell.spell(word);  // "Stil- und Grammatikprüfung" or "Stil-, Text- und Grammatikprüfung"
+      boolean isMisspelled = hunspellDict.misspelled(word);  // "Stil- und Grammatikprüfung" or "Stil-, Text- und Grammatikprüfung"
       if (isMisspelled && (super.ignoreWord(word) || wordsToBeIgnoredInCompounds.contains(word))) {
         isMisspelled = false;
       } else if (isMisspelled && word.endsWith("s") && isNeedingFugenS(StringUtils.removeEnd(word, "s"))) {
         // Vertuschungs- und Bespitzelungsmaßnahmen: remove trailing "s" before checking "Vertuschungs" so that the spell checker finds it
-        isMisspelled = !hunspell.spell(StringUtils.removeEnd(word, "s"));
+        isMisspelled = hunspellDict.misspelled(StringUtils.removeEnd(word, "s"));
       }
       return !isMisspelled;
     }
@@ -1556,10 +1556,10 @@
       boolean isCandidateForNonHyphenatedCompound = !StringUtils.isAllUpperCase(ignoredWord) && (StringUtils.isAllLowerCase(partialWord) || ignoredWord.endsWith("-"));
       boolean needFugenS = isNeedingFugenS(ignoredWord);
       if (isCandidateForNonHyphenatedCompound && !needFugenS && partialWord.length() > 2) {
-        return hunspell.spell(partialWord) || hunspell.spell(StringUtils.capitalize(partialWord));
+        return !hunspellDict.misspelled(partialWord) || !hunspellDict.misspelled(StringUtils.capitalize(partialWord));
       } else if (isCandidateForNonHyphenatedCompound && needFugenS && partialWord.length() > 2) {
         partialWord = partialWord.startsWith("s") ? partialWord.substring(1) : partialWord;
-        return hunspell.spell(partialWord) || hunspell.spell(StringUtils.capitalize(partialWord));
+        return !hunspellDict.misspelled(partialWord) || !hunspellDict.misspelled(StringUtils.capitalize(partialWord));
       }
       return false;
     }
@@ -1591,7 +1591,7 @@
 
     if (hasIgnoredWord) {
       for (String w : toSpellCheck) {
-        if (!hunspell.spell(w)) {
+        if (hunspellDict.misspelled(w)) {
           return false;
         }
       }
--- languagetool-4.8/languagetool-wikipedia/src/main/java/org/languagetool/dev/RareWordsFinder.java	2019-12-27 11:17:28.000000000 +0100
+++ languagetool-4.8/languagetool-wikipedia/src/main/java/org/languagetool/dev/RareWordsFinder.java	2020-01-07 09:32:01.282033523 +0100
@@ -25,7 +25,6 @@
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.nio.charset.CharacterCodingException;
-import java.nio.file.Paths;
 import java.util.List;
 import java.util.Scanner;
 
@@ -39,10 +38,11 @@
 
   private static final String dictInClassPath = "/en/hunspell/en_US.dict";
   
-  private final Hunspell hunspell;
+  private final Hunspell.Dictionary hunspellDict;
   
   private RareWordsFinder(String hunspellBase) throws IOException {
-    hunspell = new Hunspell(Paths.get(hunspellBase + ".dic"), Paths.get(hunspellBase + ".aff"));
+    Hunspell hunspell = Hunspell.getInstance();
+    hunspellDict = hunspell.getDictionary(hunspellBase);
   }
   
   private void run(File input, int minimum) throws FileNotFoundException, CharacterCodingException {
@@ -60,7 +60,7 @@
             boolean isMisspelled = speller.isMisspelled(word);
             if (!isMisspelled) {
               //List<String> suggestions = speller.getSuggestions(word);  // seems to work only for words that are actually misspellings
-              List<String> suggestions = hunspell.suggest(word);
+              List<String> suggestions = hunspellDict.suggest(word);
               suggestions.remove(word);
               if (suggestionsMightBeUseful(word, suggestions)) {
                 System.out.println(word + "\t" + count + " -> " + String.join(", ", suggestions));

Places

File languagetool-hunspell.patch of Package languagetool

Places