001/* 002 * (C) Copyright 2006-2019 Nuxeo (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 * 019 * $Id$ 020 */ 021 022package org.nuxeo.common.utils; 023 024import static java.nio.charset.StandardCharsets.UTF_8; 025import static org.nuxeo.common.utils.UserAgentMatcher.isMSIE6or7; 026 027/** 028 * RFC-2231 specifies how a MIME parameter value, like {@code Content-Disposition}'s {@code filename}, can be encoded to 029 * contain arbitrary character sets. 030 * 031 * @author Florent Guillaume 032 */ 033public class RFC2231 { 034 035 // RFC 2045 036 private static final String MIME_SPECIALS = "()<>@,;:\\\"/[]?=\t "; 037 038 // RFC 2231 039 private static final String RFC2231_SPECIALS = "*'%" + MIME_SPECIALS; 040 041 private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray(); 042 043 // Utility class 044 private RFC2231() { 045 } 046 047 /** 048 * Does a simple %-escaping of the UTF-8 bytes of the value. Keep only some know safe characters. 049 * 050 * @param sb the builder to which escaped chars are appended 051 * @param value the value to escape 052 */ 053 public static void percentEscape(StringBuilder sb, String value) { 054 byte[] bytes = value.getBytes(UTF_8); 055 for (byte b : bytes) { 056 if (b < '+' || b == ';' || b == ',' || b == '\\' || b > 'z') { 057 sb.append('%'); 058 String s = Integer.toHexString(b & 0xff).toUpperCase(); 059 if (s.length() < 2) { 060 sb.append('0'); 061 } 062 sb.append(s); 063 } else { 064 sb.append((char) b); 065 } 066 } 067 } 068 069 /** 070 * Encodes a MIME parameter per RFC 2231. 071 * <p> 072 * This implementation always uses UTF-8 and no language. 073 * 074 * @param sb the buffer to fill 075 * @param value the value to encode 076 */ 077 protected static void encodeRFC2231(StringBuilder sb, String value) { 078 int originalLength = sb.length(); 079 sb.append("*=UTF-8''"); // no language 080 byte[] bytes = value.getBytes(UTF_8); 081 boolean encoded = false; 082 for (int i = 0; i < bytes.length; i++) { 083 int c = bytes[i] & 0xff; 084 if (c <= 32 || c >= 127 || RFC2231_SPECIALS.indexOf(c) != -1) { 085 sb.append('%'); 086 sb.append(HEX_DIGITS[c >> 4]); 087 sb.append(HEX_DIGITS[c & 0xf]); 088 encoded = true; 089 } else { 090 sb.append((char) c); 091 } 092 } 093 if (!encoded) { 094 // undo and use basic format 095 sb.setLength(originalLength); 096 sb.append('='); 097 sb.append(value); 098 } 099 } 100 101 /** 102 * Encodes a {@code Content-Disposition} header. For some user agents the full RFC-2231 encoding won't be performed 103 * as they don't understand it. 104 * 105 * @param filename the filename 106 * @param inline {@code true} for an inline disposition, {@code false} for an attachment 107 * @param userAgent the userAgent 108 * @return a full string to set as value of a {@code Content-Disposition} header 109 */ 110 public static String encodeContentDisposition(String filename, boolean inline, String userAgent) { 111 StringBuilder sb = new StringBuilder(); 112 sb.append(inline ? "inline" : "attachment"); 113 sb.append("; filename"); 114 if (userAgent == null) { 115 userAgent = ""; 116 } 117 if (isMSIE6or7(userAgent)) { 118 // MSIE understands straight %-encoding 119 sb.append("="); 120 percentEscape(sb, filename); 121 } else { 122 // proper RFC2231 123 encodeRFC2231(sb, filename); 124 } 125 return sb.toString(); 126 } 127 128}