001/* 002 * (C) Copyright 2006-2010 Nuxeo SA (http://nuxeo.com/) and contributors. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the GNU Lesser General Public License 006 * (LGPL) version 2.1 which accompanies this distribution, and is available at 007 * http://www.gnu.org/licenses/lgpl.html 008 * 009 * This library is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 012 * Lesser General Public License for more details. 013 * 014 * Contributors: 015 * Florent Guillaume 016 */ 017package org.nuxeo.ecm.platform.htmlsanitizer; 018 019import java.io.InputStream; 020import java.io.Serializable; 021import java.util.ArrayList; 022import java.util.Iterator; 023import java.util.LinkedList; 024import java.util.List; 025 026import org.apache.commons.logging.Log; 027import org.apache.commons.logging.LogFactory; 028import org.nuxeo.ecm.core.api.DocumentModel; 029import org.nuxeo.ecm.core.api.model.Property; 030import org.nuxeo.ecm.core.api.model.PropertyNotFoundException; 031import org.nuxeo.runtime.model.ComponentInstance; 032import org.nuxeo.runtime.model.DefaultComponent; 033import org.owasp.validator.html.AntiSamy; 034import org.owasp.validator.html.CleanResults; 035import org.owasp.validator.html.Policy; 036import org.owasp.validator.html.PolicyException; 037import org.owasp.validator.html.ScanException; 038 039/** 040 * Service that sanitizes some HMTL fields to remove potential cross-site scripting attacks in them. 041 */ 042public class HtmlSanitizerServiceImpl extends DefaultComponent implements HtmlSanitizerService { 043 044 private static final Log log = LogFactory.getLog(HtmlSanitizerServiceImpl.class); 045 046 public static final String ANTISAMY_XP = "antisamy"; 047 048 public static final String SANITIZER_XP = "sanitizer"; 049 050 /** All policies registered. */ 051 public LinkedList<HtmlSanitizerAntiSamyDescriptor> allPolicies = new LinkedList<HtmlSanitizerAntiSamyDescriptor>(); 052 053 /** Effective policy. */ 054 public Policy policy; 055 056 /** All sanitizers registered. */ 057 public List<HtmlSanitizerDescriptor> allSanitizers = new ArrayList<HtmlSanitizerDescriptor>(1); 058 059 /** Effective sanitizers. */ 060 public List<HtmlSanitizerDescriptor> sanitizers = new ArrayList<HtmlSanitizerDescriptor>(1); 061 062 @Override 063 public void registerContribution(Object contribution, String extensionPoint, ComponentInstance contributor) { 064 if (ANTISAMY_XP.equals(extensionPoint)) { 065 if (!(contribution instanceof HtmlSanitizerAntiSamyDescriptor)) { 066 log.error("Contribution " + contribution + " is not of type " 067 + HtmlSanitizerAntiSamyDescriptor.class.getName()); 068 return; 069 } 070 HtmlSanitizerAntiSamyDescriptor desc = (HtmlSanitizerAntiSamyDescriptor) contribution; 071 log.info("Registering AntiSamy policy: " + desc.policy); 072 addAntiSamy(desc); 073 } else if (SANITIZER_XP.equals(extensionPoint)) { 074 if (!(contribution instanceof HtmlSanitizerDescriptor)) { 075 log.error("Contribution " + contribution + " is not of type " + HtmlSanitizerDescriptor.class.getName()); 076 return; 077 } 078 HtmlSanitizerDescriptor desc = (HtmlSanitizerDescriptor) contribution; 079 log.info("Registering HTML sanitizer: " + desc); 080 addSanitizer(desc); 081 } else { 082 log.error("Contribution extension point should be '" + SANITIZER_XP + "' but is: " + extensionPoint); 083 } 084 } 085 086 @Override 087 public void unregisterContribution(Object contribution, String extensionPoint, ComponentInstance contributor) { 088 if (ANTISAMY_XP.equals(extensionPoint)) { 089 if (!(contribution instanceof HtmlSanitizerAntiSamyDescriptor)) { 090 return; 091 } 092 HtmlSanitizerAntiSamyDescriptor desc = (HtmlSanitizerAntiSamyDescriptor) contribution; 093 log.info("Unregistering AntiSamy policy: " + desc.policy); 094 removeAntiSamy(desc); 095 } else if (SANITIZER_XP.equals(extensionPoint)) { 096 if (!(contribution instanceof HtmlSanitizerDescriptor)) { 097 return; 098 } 099 HtmlSanitizerDescriptor desc = (HtmlSanitizerDescriptor) contribution; 100 log.info("Unregistering HTML sanitizer: " + desc); 101 removeSanitizer(desc); 102 } 103 } 104 105 protected void addAntiSamy(HtmlSanitizerAntiSamyDescriptor desc) { 106 if (Thread.currentThread().getContextClassLoader().getResourceAsStream(desc.policy) == null) { 107 log.error("Cannot find AntiSamy policy: " + desc.policy); 108 return; 109 } 110 allPolicies.add(desc); 111 refreshPolicy(); 112 } 113 114 protected void removeAntiSamy(HtmlSanitizerAntiSamyDescriptor desc) { 115 allPolicies.remove(desc); 116 refreshPolicy(); 117 } 118 119 protected void refreshPolicy() { 120 if (allPolicies.isEmpty()) { 121 policy = null; 122 } else { 123 HtmlSanitizerAntiSamyDescriptor desc = allPolicies.removeLast(); 124 InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(desc.policy); 125 try { 126 policy = Policy.getInstance(is); 127 } catch (PolicyException e) { 128 policy = null; 129 throw new RuntimeException("Cannot parse AntiSamy policy: " + desc.policy, e); 130 } 131 } 132 } 133 134 protected Policy getPolicy() { 135 return policy; 136 } 137 138 protected void addSanitizer(HtmlSanitizerDescriptor desc) { 139 if (desc.fields.isEmpty()) { 140 log.error("Sanitizer has no fields: " + desc); 141 return; 142 } 143 allSanitizers.add(desc); 144 refreshSanitizers(); 145 } 146 147 protected void removeSanitizer(HtmlSanitizerDescriptor desc) { 148 allSanitizers.remove(desc); 149 refreshSanitizers(); 150 } 151 152 protected void refreshSanitizers() { 153 // not very efficient algorithm but who cares? 154 sanitizers.clear(); 155 for (HtmlSanitizerDescriptor sanitizer : allSanitizers) { 156 // remove existing with same name 157 for (Iterator<HtmlSanitizerDescriptor> it = sanitizers.iterator(); it.hasNext();) { 158 HtmlSanitizerDescriptor s = it.next(); 159 if (s.name.equals(sanitizer.name)) { 160 it.remove(); 161 break; 162 } 163 } 164 // add new one if enabled 165 if (sanitizer.enabled) { 166 sanitizers.add(sanitizer); 167 } 168 } 169 } 170 171 protected List<HtmlSanitizerDescriptor> getSanitizers() { 172 return sanitizers; 173 } 174 175 // ----- HtmlSanitizerService ----- 176 177 @Override 178 public void sanitizeDocument(DocumentModel doc) { 179 if (policy == null) { 180 log.error("Cannot sanitize, no policy registered"); 181 return; 182 } 183 for (HtmlSanitizerDescriptor sanitizer : sanitizers) { 184 if (!sanitizer.types.isEmpty() && !sanitizer.types.contains(doc.getType())) { 185 continue; 186 } 187 for (FieldDescriptor field : sanitizer.fields) { 188 String fieldName = field.getContentField(); 189 String filterField = field.getFilterField(); 190 if (filterField != null) { 191 Property filterProp; 192 try { 193 filterProp = doc.getProperty(filterField); 194 } catch (PropertyNotFoundException e) { 195 continue; 196 } 197 if (field.match(String.valueOf(filterProp.getValue())) != field.doSanitize()) { 198 continue; 199 } 200 } 201 Property prop; 202 try { 203 prop = doc.getProperty(fieldName); 204 } catch (PropertyNotFoundException e) { 205 continue; 206 } 207 Serializable value = prop.getValue(); 208 if (value == null) { 209 continue; 210 } 211 if (!(value instanceof String)) { 212 log.debug("Cannot sanitize non-string field: " + field); 213 continue; 214 } 215 String info = "doc " + doc.getPathAsString() + " (" + doc.getId() + ") field " + field; 216 String newValue = sanitizeString((String) value, info); 217 if (!newValue.equals(value)) { 218 prop.setValue(newValue); 219 } 220 } 221 } 222 } 223 224 @Override 225 public String sanitizeString(String string, String info) { 226 if (policy == null) { 227 log.error("Cannot sanitize, no policy registered"); 228 return string; 229 } 230 try { 231 CleanResults cr = new AntiSamy().scan(string, policy); 232 for (Object err : cr.getErrorMessages()) { 233 log.debug(String.format("Sanitizing %s: %s", info == null ? "" : info, err)); 234 } 235 return cr.getCleanHTML(); 236 } catch (ScanException | PolicyException e) { 237 log.error(String.format("Cannot sanitize %s: %s", info == null ? "" : info, e)); 238 return string; 239 } 240 } 241 242}