001/*
002 * (C) Copyright 2006-2010 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.platform.htmlsanitizer;
020
021import java.io.IOException;
022import java.io.Serializable;
023import java.net.URL;
024import java.util.ArrayList;
025import java.util.Iterator;
026import java.util.LinkedList;
027import java.util.List;
028
029import org.apache.commons.logging.Log;
030import org.apache.commons.logging.LogFactory;
031import org.nuxeo.ecm.core.api.DocumentModel;
032import org.nuxeo.ecm.core.api.NuxeoException;
033import org.nuxeo.ecm.core.api.model.Property;
034import org.nuxeo.ecm.core.api.model.PropertyNotFoundException;
035import org.nuxeo.runtime.model.ComponentInstance;
036import org.nuxeo.runtime.model.DefaultComponent;
037import org.owasp.html.HtmlPolicyBuilder;
038import org.owasp.html.PolicyFactory;
039
040/**
041 * Service that sanitizes some HMTL fields to remove potential cross-site scripting attacks in them.
042 */
043public class HtmlSanitizerServiceImpl extends DefaultComponent implements HtmlSanitizerService {
044
045    private static final Log log = LogFactory.getLog(HtmlSanitizerServiceImpl.class);
046
047    public static final String ANTISAMY_XP = "antisamy";
048
049    public static final String SANITIZER_XP = "sanitizer";
050
051    /** All policies registered. */
052    public LinkedList<HtmlSanitizerAntiSamyDescriptor> allPolicies = new LinkedList<>();
053
054    /** Effective policy. */
055    public PolicyFactory policy;
056
057    /** All sanitizers registered. */
058    public List<HtmlSanitizerDescriptor> allSanitizers = new ArrayList<>(1);
059
060    /** Effective sanitizers. */
061    public List<HtmlSanitizerDescriptor> sanitizers = new ArrayList<>(1);
062
063    @Override
064    public void registerContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
065        if (ANTISAMY_XP.equals(extensionPoint)) {
066            if (!(contribution instanceof HtmlSanitizerAntiSamyDescriptor)) {
067                log.error("Contribution " + contribution + " is not of type "
068                        + HtmlSanitizerAntiSamyDescriptor.class.getName());
069                return;
070            }
071            HtmlSanitizerAntiSamyDescriptor desc = (HtmlSanitizerAntiSamyDescriptor) contribution;
072            log.info("Registering AntiSamy policy: " + desc.policy);
073            addAntiSamy(desc);
074        } else if (SANITIZER_XP.equals(extensionPoint)) {
075            if (!(contribution instanceof HtmlSanitizerDescriptor)) {
076                log.error("Contribution " + contribution + " is not of type " + HtmlSanitizerDescriptor.class.getName());
077                return;
078            }
079            HtmlSanitizerDescriptor desc = (HtmlSanitizerDescriptor) contribution;
080            log.info("Registering HTML sanitizer: " + desc);
081            addSanitizer(desc);
082        } else {
083            log.error("Contribution extension point should be '" + SANITIZER_XP + "' but is: " + extensionPoint);
084        }
085    }
086
087    @Override
088    public void unregisterContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
089        if (ANTISAMY_XP.equals(extensionPoint)) {
090            if (!(contribution instanceof HtmlSanitizerAntiSamyDescriptor)) {
091                return;
092            }
093            HtmlSanitizerAntiSamyDescriptor desc = (HtmlSanitizerAntiSamyDescriptor) contribution;
094            log.info("Unregistering AntiSamy policy: " + desc.policy);
095            removeAntiSamy(desc);
096        } else if (SANITIZER_XP.equals(extensionPoint)) {
097            if (!(contribution instanceof HtmlSanitizerDescriptor)) {
098                return;
099            }
100            HtmlSanitizerDescriptor desc = (HtmlSanitizerDescriptor) contribution;
101            log.info("Unregistering HTML sanitizer: " + desc);
102            removeSanitizer(desc);
103        }
104    }
105
106    protected void addAntiSamy(HtmlSanitizerAntiSamyDescriptor desc) {
107        if (Thread.currentThread().getContextClassLoader().getResourceAsStream(desc.policy) == null) {
108            log.error("Cannot find AntiSamy policy: " + desc.policy);
109            return;
110        }
111        allPolicies.add(desc);
112        refreshPolicy();
113    }
114
115    protected void removeAntiSamy(HtmlSanitizerAntiSamyDescriptor desc) {
116        allPolicies.remove(desc);
117        refreshPolicy();
118    }
119
120    protected void refreshPolicy() {
121        if (allPolicies.isEmpty()) {
122            policy = null;
123        } else {
124            HtmlSanitizerAntiSamyDescriptor desc = allPolicies.removeLast();
125            URL url = Thread.currentThread().getContextClassLoader().getResource(desc.policy);
126            HtmlPolicyBuilder builder = new HtmlPolicyBuilder();
127            try {
128                builder.loadAntiSamyPolicy(url);
129                initializeBuilder(builder);
130                policy = builder.toFactory();
131            } catch (IOException e) {
132                policy = null;
133                throw new NuxeoException("Cannot parse AntiSamy policy: " + desc.policy, e);
134            }
135        }
136    }
137
138    protected void initializeBuilder(HtmlPolicyBuilder builder) {
139        builder.allowStandardUrlProtocols();
140        builder.allowUrlProtocols("data"); // still enforces regex matchers from policy
141        builder.allowStyling();
142        builder.disallowElements("script");
143    }
144
145    protected void addSanitizer(HtmlSanitizerDescriptor desc) {
146        if (desc.fields.isEmpty()) {
147            log.error("Sanitizer has no fields: " + desc);
148            return;
149        }
150        allSanitizers.add(desc);
151        refreshSanitizers();
152    }
153
154    protected void removeSanitizer(HtmlSanitizerDescriptor desc) {
155        allSanitizers.remove(desc);
156        refreshSanitizers();
157    }
158
159    protected void refreshSanitizers() {
160        // not very efficient algorithm but who cares?
161        sanitizers.clear();
162        for (HtmlSanitizerDescriptor sanitizer : allSanitizers) {
163            // remove existing with same name
164            for (Iterator<HtmlSanitizerDescriptor> it = sanitizers.iterator(); it.hasNext();) {
165                HtmlSanitizerDescriptor s = it.next();
166                if (s.name.equals(sanitizer.name)) {
167                    it.remove();
168                    break;
169                }
170            }
171            // add new one if enabled
172            if (sanitizer.enabled) {
173                sanitizers.add(sanitizer);
174            }
175        }
176    }
177
178    protected List<HtmlSanitizerDescriptor> getSanitizers() {
179        return sanitizers;
180    }
181
182    // ----- HtmlSanitizerService -----
183
184    @Override
185    public void sanitizeDocument(DocumentModel doc) {
186        if (policy == null) {
187            log.error("Cannot sanitize, no policy registered");
188            return;
189        }
190        for (HtmlSanitizerDescriptor sanitizer : sanitizers) {
191            if (!sanitizer.types.isEmpty() && !sanitizer.types.contains(doc.getType())) {
192                continue;
193            }
194            for (FieldDescriptor field : sanitizer.fields) {
195                String fieldName = field.getContentField();
196                String filterField = field.getFilterField();
197                if (filterField != null) {
198                    Property filterProp;
199                    try {
200                        filterProp = doc.getProperty(filterField);
201                    } catch (PropertyNotFoundException e) {
202                        continue;
203                    }
204                    if (field.match(String.valueOf(filterProp.getValue())) != field.doSanitize()) {
205                        continue;
206                    }
207                }
208                Property prop;
209                try {
210                    prop = doc.getProperty(fieldName);
211                } catch (PropertyNotFoundException e) {
212                    continue;
213                }
214                Serializable value = prop.getValue();
215                if (value == null) {
216                    continue;
217                }
218                if (!(value instanceof String)) {
219                    log.debug("Cannot sanitize non-string field: " + field);
220                    continue;
221                }
222                String info = "doc " + doc.getPathAsString() + " (" + doc.getId() + ") field " + field;
223                String newValue = sanitizeString((String) value, info);
224                if (!newValue.equals(value)) {
225                    prop.setValue(newValue);
226                }
227            }
228        }
229    }
230
231    @Override
232    public String sanitizeString(String string, String info) {
233        if (policy == null) {
234            log.error("Cannot sanitize, no policy registered");
235            return string;
236        }
237        return policy.sanitize(string);
238    }
239
240}