001/* 002 * (C) Copyright 2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo 018 */ 019 020package org.nuxeo.apidoc.worker; 021 022import java.io.IOException; 023import java.util.HashSet; 024import java.util.Set; 025import java.util.stream.IntStream; 026 027import javax.xml.parsers.ParserConfigurationException; 028import javax.xml.parsers.SAXParser; 029import javax.xml.parsers.SAXParserFactory; 030 031import org.apache.commons.lang3.StringUtils; 032import org.nuxeo.apidoc.listener.AttributesExtractorStater; 033import org.nuxeo.ecm.core.api.Blob; 034import org.nuxeo.ecm.core.api.DocumentModel; 035import org.nuxeo.ecm.core.api.DocumentNotFoundException; 036import org.nuxeo.ecm.core.api.DocumentRef; 037import org.nuxeo.ecm.core.api.NuxeoException; 038import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 039import org.nuxeo.ecm.core.work.AbstractWork; 040import org.nuxeo.ecm.platform.dublincore.listener.DublinCoreListener; 041import org.xml.sax.Attributes; 042import org.xml.sax.SAXException; 043import org.xml.sax.helpers.DefaultHandler; 044 045/** 046 * @author <a href="mailto:ak@nuxeo.com">Arnaud Kervern</a> 047 * @since 8.3 048 */ 049public class ExtractXmlAttributesWorker extends AbstractWork { 050 051 private static final long serialVersionUID = 1L; 052 053 public static final String CATEGORY = "apidoc-xml-extractor"; 054 055 protected ExtractXmlAttributesWorker(String repositoryName, String docId) { 056 super(String.format("%s:%s:xml:extractor", repositoryName, docId)); 057 setDocument(repositoryName, docId); 058 } 059 060 public ExtractXmlAttributesWorker(String repositoryName, String originatingUsername, String docId) { 061 this(repositoryName, docId); 062 setOriginatingUsername(originatingUsername); 063 } 064 065 @Override 066 public void work() { 067 setStatus("Extracting"); 068 openSystemSession(); 069 070 try { 071 DocumentModel doc = loadDocument(); 072 BlobHolder adapter = doc.getAdapter(BlobHolder.class); 073 String attributes = extractAttributes(adapter.getBlob()); 074 doc.setPropertyValue(AttributesExtractorStater.ATTRIBUTES_PROPERTY, attributes); 075 076 session.saveDocument(doc); 077 078 setStatus("Done"); 079 } catch (DocumentNotFoundException cause) { 080 ; 081 } catch (IOException | ParserConfigurationException | SAXException e) { 082 setStatus("Failed"); 083 throw new NuxeoException(e); 084 } 085 } 086 087 protected DocumentModel loadDocument() throws DocumentNotFoundException { 088 final DocumentRef docRef = getDocument().getDocRef(); 089 DocumentModel doc = session.getDocument(docRef); 090 doc.putContextData(DublinCoreListener.DISABLE_DUBLINCORE_LISTENER, true); 091 return doc; 092 } 093 094 public String extractAttributes(Blob blob) throws ParserConfigurationException, SAXException, IOException { 095 if (blob == null) { 096 return null; 097 } 098 099 SAXParserFactory factory = SAXParserFactory.newInstance(); 100 SAXParser saxParser = factory.newSAXParser(); 101 102 Set<String> attributes = new HashSet<>(); 103 saxParser.parse(blob.getStream(), new Handler(attributes)); 104 105 return StringUtils.join(attributes, ' '); 106 } 107 108 @Override 109 public String getCategory() { 110 return CATEGORY; 111 } 112 113 @Override 114 public String getTitle() { 115 return "XML Attributes extractor for fulltext search"; 116 } 117 118 protected static class Handler extends DefaultHandler { 119 private Set<String> attributesSet; 120 121 public Handler(Set<String> attributesSet) { 122 this.attributesSet = attributesSet; 123 } 124 125 @Override 126 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { 127 super.startElement(uri, localName, qName, attributes); 128 IntStream.range(0, attributes.getLength()).forEach(i -> attributesSet.add(attributes.getValue(i))); 129 } 130 } 131 132}