Skip to content

Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=63235 #146

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 5 additions & 16 deletions java/org/apache/tomcat/util/buf/B2CConverter.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

import org.apache.tomcat.util.res.StringManager;

Expand All @@ -38,23 +36,14 @@ public class B2CConverter {

private static final StringManager sm = StringManager.getManager(B2CConverter.class);

private static final Map<String, Charset> encodingToCharsetCache =
new HashMap<>();

// Protected so unit tests can use it
protected static final int LEFTOVER_SIZE = 9;
private static final CharsetCache charsetCache;

static {
for (Charset charset: Charset.availableCharsets().values()) {
encodingToCharsetCache.put(
charset.name().toLowerCase(Locale.ENGLISH), charset);
for (String alias : charset.aliases()) {
encodingToCharsetCache.put(
alias.toLowerCase(Locale.ENGLISH), charset);
}
}
charsetCache = new CharsetCache();
}

// Protected so unit tests can use it
protected static final int LEFTOVER_SIZE = 9;

/**
* Obtain the Charset for the given encoding
Expand All @@ -71,7 +60,7 @@ public static Charset getCharset(String enc) throws UnsupportedEncodingException
// Encoding names should all be ASCII
String lowerCaseEnc = enc.toLowerCase(Locale.ENGLISH);

Charset charset = encodingToCharsetCache.get(lowerCaseEnc);
Charset charset = charsetCache.getCharset(lowerCaseEnc);

if (charset == null) {
// Pre-population of the cache means this must be invalid
Expand Down
134 changes: 134 additions & 0 deletions java/org/apache/tomcat/util/buf/CharsetCache.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;

import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.util.Locale;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

public class CharsetCache {

private static final String[] INITIAL_CHARSETS = new String[] { "iso-8859-1", "utf-8" };

/*
* Tested with:
* - Oracle JDK 8 u192
* - OpenJDK 13 EA 4
*/
private static final String[] LAZY_CHARSETS = new String[] {
"big5", "big5-hkscs", "cesu-8", "euc-jp", "euc-kr", "gb18030", "gb2312", "gbk", "ibm-thai", "ibm00858",
"ibm01140", "ibm01141", "ibm01142", "ibm01143", "ibm01144", "ibm01145", "ibm01146", "ibm01147", "ibm01148",
"ibm01149", "ibm037", "ibm1026", "ibm1047", "ibm273", "ibm277", "ibm278", "ibm280", "ibm284", "ibm285",
"ibm290", "ibm297", "ibm420", "ibm424", "ibm437", "ibm500", "ibm775", "ibm850", "ibm852", "ibm855",
"ibm857", "ibm860", "ibm861", "ibm862", "ibm863", "ibm864", "ibm865", "ibm866", "ibm868", "ibm869",
"ibm870", "ibm871", "ibm918", "iso-2022-cn", "iso-2022-jp", "iso-2022-jp-2", "iso-2022-kr", "iso-8859-13",
"iso-8859-15", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7",
"iso-8859-8", "iso-8859-9", "iso-8859-16", "jis_x0201", "jis_x0212-1990", "koi8-r", "koi8-u", "shift_jis",
"tis-620", "us-ascii", "utf-16", "utf-16be", "utf-16le", "utf-32", "utf-32be", "utf-32le", "x-utf-32be-bom",
"x-utf-32le-bom", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254",
"windows-1255", "windows-1256", "windows-1257", "windows-1258", "windows-31j", "x-big5-hkscs-2001",
"x-big5-solaris", "x-compound_text", "x-euc-tw", "x-ibm1006", "x-ibm1025", "x-ibm1046", "x-ibm1097",
"x-ibm1098", "x-ibm1112", "x-ibm1122", "x-ibm1123", "x-ibm1124", "x-ibm1129", "x-ibm1166", "x-ibm1364",
"x-ibm1381", "x-ibm1383", "x-ibm300", "x-ibm33722", "x-ibm737", "x-ibm833", "x-ibm834", "x-ibm856",
"x-ibm874", "x-ibm875", "x-ibm921", "x-ibm922", "x-ibm930", "x-ibm933", "x-ibm935", "x-ibm937", "x-ibm939",
"x-ibm942", "x-ibm942c", "x-ibm943", "x-ibm943c", "x-ibm948", "x-ibm949", "x-ibm949c", "x-ibm950",
"x-ibm964", "x-ibm970", "x-iscii91", "x-iso-2022-cn-cns", "x-iso-2022-cn-gb", "x-jis0208",
"x-jisautodetect", "x-johab", "x-ms932_0213", "x-ms950-hkscs", "x-ms950-hkscs-xp", "x-macarabic",
"x-maccentraleurope", "x-maccroatian", "x-maccyrillic", "x-macdingbat", "x-macgreek", "x-machebrew",
"x-maciceland", "x-macroman", "x-macromania", "x-macsymbol", "x-macthai", "x-macturkish", "x-macukraine",
"x-pck", "x-sjis_0213", "x-utf-16le-bom", "x-euc-jp-linux", "x-eucjp-open", "x-iso-8859-11", "x-mswin-936",
"x-windows-50220", "x-windows-50221", "x-windows-874", "x-windows-949", "x-windows-950",
"x-windows-iso2022jp"
};

private static final Charset DUMMY_CHARSET = new DummyCharset("Dummy", null);

private ConcurrentMap<String,Charset> cache = new ConcurrentHashMap<>();

public CharsetCache() {
// Pre-populate the cache
for (String charsetName : INITIAL_CHARSETS) {
Charset charset = Charset.forName(charsetName);
addToCache(charsetName, charset);
}

for (String charsetName : LAZY_CHARSETS) {
addToCache(charsetName, DUMMY_CHARSET);
}
}


private void addToCache(String name, Charset charset) {
cache.put(name, charset);
for (String alias : charset.aliases()) {
cache.put(alias.toLowerCase(Locale.ENGLISH), charset);
}
}


public Charset getCharset(String charsetName) {
String lcCharsetName = charsetName.toLowerCase(Locale.ENGLISH);

Charset result = cache.get(lcCharsetName);

if (result == DUMMY_CHARSET) {
// Name is known but the Charset is not in the cache
Charset charset = Charset.forName(lcCharsetName);
if (charset == null) {
// Charset not available in this JVM - remove cache entry
cache.remove(lcCharsetName);
result = null;
} else {
// Charset is available - populate cache entry
addToCache(lcCharsetName, charset);
result = charset;
}
}

return result;
}


/*
* Placeholder Charset implementation for entries that will be loaded lazily
* into the cache.
*/
private static class DummyCharset extends Charset {

protected DummyCharset(String canonicalName, String[] aliases) {
super(canonicalName, aliases);
}

@Override
public boolean contains(Charset cs) {
return false;
}

@Override
public CharsetDecoder newDecoder() {
return null;
}

@Override
public CharsetEncoder newEncoder() {
return null;
}
}
}
65 changes: 65 additions & 0 deletions test/org/apache/tomcat/util/buf/TestCharsetCache.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;

import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;

import org.junit.Assert;
import org.junit.Test;

public class TestCharsetCache {

@Test
public void testAllKnownCharsets() {
CharsetCache cache = new CharsetCache();

List<String> cacheMisses = new ArrayList<>();

for (Charset charset: Charset.availableCharsets().values()) {
if (cache.getCharset(charset.name()) == null) {
cacheMisses.add(charset.name());
} else {
for (String alias : charset.aliases()) {
if (cache.getCharset(alias) == null) {
cacheMisses.add(alias);
}
}
}
}

if (cacheMisses.size() != 0) {
StringBuilder sb = new StringBuilder();
Collections.sort(cacheMisses);
for (String name : cacheMisses) {
if (sb.length() == 0) {
sb.append('"');
} else {
sb.append(", \"");
}
sb.append(name.toLowerCase(Locale.ENGLISH));
sb.append('"');
}
System.out.println(sb.toString());
}

Assert.assertTrue(cacheMisses.size() == 0);
}
}
141 changes: 141 additions & 0 deletions test/org/apache/tomcat/util/buf/TestCharsetCachePerformance.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;

import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

import org.junit.Test;

public class TestCharsetCachePerformance {

@Test
public void testNoCsCache() throws Exception {
doTest(new NoCsCache());
}


@Test
public void testFullCsCache() throws Exception {
doTest(new FullCsCache());
}


@Test
public void testLazyCsCache() throws Exception {
doTest(new LazyCsCache());
}


private void doTest(CsCache cache) throws Exception {
int threadCount = 10;
int iterations = 10000000;
String[] lookupNames = new String[] {
"ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", "ISO-8859-5" };

Thread[] threads = new Thread[threadCount];

for (int i = 0; i < threadCount; i++) {
threads[i] = new TestCsCacheThread(iterations, cache, lookupNames);
}

long startTime = System.nanoTime();

for (int i = 0; i < threadCount; i++) {
threads[i].start();
}

for (int i = 0; i < threadCount; i++) {
threads[i].join();
}

long endTime = System.nanoTime();

System.out.println(cache.getClass().getName() + ": " + (endTime - startTime) + "ns");
}


private static interface CsCache {
Charset getCharset(String charsetName);
}


private static class NoCsCache implements CsCache {

@Override
public Charset getCharset(String charsetName) {
return Charset.forName(charsetName);
}
}


private static class FullCsCache implements CsCache {

private static final Map<String,Charset> cache = new HashMap<>();

static {
for (Charset charset: Charset.availableCharsets().values()) {
cache.put(charset.name().toLowerCase(Locale.ENGLISH), charset);
for (String alias : charset.aliases()) {
cache.put(alias.toLowerCase(Locale.ENGLISH), charset);
}
}
}


@Override
public Charset getCharset(String charsetName) {
return cache.get(charsetName.toLowerCase(Locale.ENGLISH));
}
}


private static class LazyCsCache implements CsCache {

private CharsetCache cache = new CharsetCache();

@Override
public Charset getCharset(String charsetName) {
return cache.getCharset(charsetName);
}
}


private static class TestCsCacheThread extends Thread {

private final int iterations;
private final CsCache cache;
private final String[] lookupNames;
private final int lookupNamesCount;

public TestCsCacheThread(int iterations, CsCache cache, String[] lookupNames) {
this.iterations = iterations;
this.cache = cache;
this.lookupNames = lookupNames;
this.lookupNamesCount = lookupNames.length;
}

@Override
public void run() {
for (int i = 0; i < iterations; i++) {
cache.getCharset(lookupNames[i % lookupNamesCount]);
}
}
}
}
3 changes: 3 additions & 0 deletions webapps/docs/changelog.xml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@
the <code>protocolHeader</code> attribute of the
<code>RemoteIpFilter</code> and <code>RemoteIpValve</code>. (markt)
</fix>
<fix>
<bug>63235</bug>: Refactor Charset cache to reduce start time. (markt)
</fix>
<fix>
<bug>63249</bug>: Use a consistent log level (<code>WARN</code>) when
logging the failure to register or deregister a JMX Bean. (markt)
Expand Down