001/* 002 * (C) Copyright 2002-2015 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 */ 019 020package org.nuxeo.ecm.platform.importer.xml.parser; 021 022import java.io.File; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.Serializable; 026import java.util.ArrayList; 027import java.util.Collections; 028import java.util.HashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.Stack; 032 033import org.apache.commons.io.FileUtils; 034import org.apache.commons.logging.Log; 035import org.apache.commons.logging.LogFactory; 036import org.dom4j.Attribute; 037import org.dom4j.Document; 038import org.dom4j.DocumentException; 039import org.dom4j.Element; 040import org.dom4j.InvalidXPathException; 041import org.dom4j.Node; 042import org.dom4j.Text; 043import org.dom4j.io.SAXReader; 044import org.dom4j.tree.DefaultText; 045import org.mvel2.MVEL; 046 047import org.nuxeo.common.Environment; 048import org.nuxeo.common.utils.ZipUtils; 049import org.nuxeo.ecm.automation.AutomationService; 050import org.nuxeo.ecm.automation.OperationContext; 051import org.nuxeo.ecm.automation.OperationException; 052import org.nuxeo.ecm.core.api.Blob; 053import org.nuxeo.ecm.core.api.Blobs; 054import org.nuxeo.ecm.core.api.CoreSession; 055import org.nuxeo.ecm.core.api.DocumentModel; 056import org.nuxeo.ecm.core.api.NuxeoException; 057import org.nuxeo.ecm.core.api.model.Property; 058import org.nuxeo.ecm.core.api.model.impl.primitives.BlobProperty; 059import org.nuxeo.ecm.core.schema.types.ListType; 060import org.nuxeo.runtime.api.Framework; 061 062/** 063 * Main implementation class for delivering the Import logic 064 * 065 * @author <a href="mailto:tdelprat@nuxeo.com">Tiry</a> 066 */ 067public class XMLImporterServiceImpl { 068 069 private static final String MSG_NO_ELEMENT_FOUND = "**CREATION**\n" 070 + "No element \"%s\" found in %s, use the DOC_TYPE-INDEX value"; 071 072 private static final String MSG_CREATION = "**CREATION**\n" 073 + "Try to create document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n"; 074 075 private static final String MSG_UPDATE = "**DOCUMENT UPDATE**\n" 076 + "Try to update document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n"; 077 078 private static final String MSG_UPDATE_PROPERTY_TRACE = "**PROPERTY UPDATE**\n" 079 + "Value found for %s in %s is \"%s\". With the following conf: %s"; 080 081 private static final String MSG_UPDATE_PROPERTY = "**PROPERTY UPDATE**\n" 082 + "Try to set value into %s property based on %s element on document \"%s\" (%s). Conf activated: %s"; 083 084 public static final Log log = LogFactory.getLog(XMLImporterServiceImpl.class); 085 086 public static final String XML_IMPORTER_INITIALIZATION = "org.nuxeo.xml.importer.initialization"; 087 088 protected CoreSession session; 089 090 protected DocumentModel rootDoc; 091 092 protected Stack<DocumentModel> docsStack; 093 094 protected Map<String, List<String>> deletedAttributes = new HashMap<>(); 095 096 protected Map<String, Object> mvelCtx = new HashMap<>(); 097 098 protected Map<Element, DocumentModel> elToDoc = new HashMap<>(); 099 100 protected ParserConfigRegistry registry; 101 102 protected Boolean deferSave = false; 103 104 public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry) { 105 this(rootDoc, registry, null, false); 106 } 107 108 public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry, 109 Map<String, Object> mvelContext, boolean deferSave) { 110 if (mvelContext != null) { 111 mvelCtx.putAll(mvelContext); 112 } 113 114 session = rootDoc.getCoreSession(); 115 this.rootDoc = rootDoc; 116 this.deferSave = deferSave; 117 118 docsStack = new Stack<>(); 119 pushInStack(rootDoc); 120 mvelCtx.put("root", rootDoc); 121 mvelCtx.put("docs", docsStack); 122 mvelCtx.put("session", session); 123 124 this.registry = registry; 125 } 126 127 protected ParserConfigRegistry getRegistry() { 128 return registry; 129 } 130 131 protected DocConfigDescriptor getDocCreationConfig(Element el) { 132 for (DocConfigDescriptor conf : getRegistry().getDocCreationConfigs()) { 133 // direct tagName match 134 if (conf.getTagName().equals(el.getName())) { 135 return conf; 136 } else { 137 // try xpath match 138 try { 139 if (el.matches(conf.getTagName())) { 140 return conf; 141 } 142 } catch (InvalidXPathException e) { 143 // NOP 144 } 145 } 146 } 147 return null; 148 } 149 150 protected List<AttributeConfigDescriptor> getAttributConfigs(Element el) { 151 List<AttributeConfigDescriptor> result = new ArrayList<>(); 152 for (AttributeConfigDescriptor conf : getRegistry().getAttributConfigs()) { 153 if (conf.getTagName().equals(el.getName())) { 154 result.add(conf); 155 } else { 156 // try xpath match 157 try { 158 if (el.matches(conf.getTagName())) { 159 result.add(conf); 160 } 161 } catch (InvalidXPathException e) { 162 // NOP 163 } 164 } 165 } 166 return result; 167 } 168 169 protected File workingDirectory; 170 171 private AutomationService automationService; 172 173 public List<DocumentModel> parse(InputStream is) throws IOException { 174 mvelCtx.put("source", is); 175 try { 176 Document doc; 177 doc = new SAXReader().read(is); 178 workingDirectory = null; 179 return parse(doc); 180 } catch (DocumentException e) { 181 throw new IOException(e); 182 } 183 } 184 185 public List<DocumentModel> parse(File file) throws IOException { 186 mvelCtx.put("source", file); 187 188 Document doc = null; 189 File directory = null; 190 try { 191 doc = new SAXReader().read(file); 192 workingDirectory = file.getParentFile(); 193 } catch (DocumentException e) { 194 File tmp = Environment.getDefault().getTemp(); 195 directory = new File(tmp, file.getName() + System.currentTimeMillis()); 196 directory.mkdir(); 197 ZipUtils.unzip(file, directory); 198 for (File child : directory.listFiles()) { 199 if (child.getName().endsWith(".xml")) { 200 return parse(child); 201 } 202 } 203 throw new NuxeoException("Can not find XML file inside the zip archive", e); 204 } finally { 205 FileUtils.deleteQuietly(directory); 206 } 207 return parse(doc); 208 } 209 210 public List<DocumentModel> parse(Document doc) { 211 Element root = doc.getRootElement(); 212 elToDoc = new HashMap<>(); 213 mvelCtx.put("xml", doc); 214 mvelCtx.put("map", elToDoc); 215 process(root); 216 217 // defer saveDocument to end of operation 218 if (deferSave) { 219 ArrayList<DocumentModel> a = new ArrayList<>(); 220 DocumentModel d = null; 221 while (docsStack.size() > 0) { 222 d = popStack(); 223 d.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE); 224 d = session.saveDocument(d); 225 a.add(d); 226 } 227 return a; 228 } else { 229 return new ArrayList<>(docsStack); 230 } 231 } 232 233 protected Object resolveComplex(Element el, AttributeConfigDescriptor conf) { 234 Map<String, Object> propValue = new HashMap<>(); 235 for (String name : conf.getMapping().keySet()) { 236 propValue.put(name, resolveAndEvaluateXmlNode(el, conf.getMapping().get(name))); 237 } 238 239 return propValue; 240 } 241 242 protected Blob resolveBlob(Element el, AttributeConfigDescriptor conf) { 243 @SuppressWarnings("unchecked") 244 Map<String, Object> propValues = (Map<String, Object>) resolveComplex(el, conf); 245 246 if (propValues.containsKey("content")) { 247 try { 248 Blob blob = null; 249 String content = (String) propValues.get("content"); 250 if (content != null && workingDirectory != null) { 251 File file = new File(workingDirectory, content.trim()); 252 if (file.exists()) { 253 blob = Blobs.createBlob(file); 254 } 255 } 256 if (blob == null) { 257 blob = Blobs.createBlob((String) propValues.get("content")); 258 } 259 if (propValues.containsKey("mimetype")) { 260 blob.setMimeType((String) propValues.get("mimetype")); 261 } 262 if (propValues.containsKey("filename")) { 263 blob.setFilename((String) propValues.get("filename")); 264 } 265 return blob; 266 } catch (IOException e) { 267 throw new RuntimeException(e); 268 } 269 } 270 return null; 271 } 272 273 protected void processDocAttributes(DocumentModel doc, Element el, AttributeConfigDescriptor conf) { 274 String targetDocProperty = conf.getTargetDocProperty(); 275 276 if (log.isDebugEnabled()) { 277 log.debug(String.format(MSG_UPDATE_PROPERTY, targetDocProperty, el.getUniquePath(), doc.getPathAsString(), 278 doc.getType(), conf.toString())); 279 } 280 Property property = doc.getProperty(targetDocProperty); 281 282 if (property.isScalar()) { 283 Object value = resolveAndEvaluateXmlNode(el, conf.getSingleXpath()); 284 if (log.isTraceEnabled()) { 285 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 286 conf.toString())); 287 } 288 property.setValue(value); 289 290 } else if (property.isComplex()) { 291 292 if (property instanceof BlobProperty) { 293 Object value = resolveBlob(el, conf); 294 if (log.isTraceEnabled()) { 295 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 296 conf.toString())); 297 } 298 property.setValue(value); 299 } else { 300 Object value = resolveComplex(el, conf); 301 if (log.isTraceEnabled()) { 302 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 303 conf.toString())); 304 } 305 property.setValue(value); 306 } 307 308 } else if (property.isList()) { 309 310 ListType lType = (ListType) property.getType(); 311 312 Serializable value; 313 314 if (lType.getFieldType().isSimpleType()) { 315 value = (Serializable) resolveAndEvaluateXmlNode(el, conf.getSingleXpath()); 316 if (value != null) { 317 Object values = property.getValue(); 318 if (values instanceof List) { 319 ((List) values).add(value); 320 property.setValue(values); 321 } else if (values instanceof Object[]) { 322 List<Object> valuesList = new ArrayList<>(); 323 Collections.addAll(valuesList, (Object[]) property.getValue()); 324 valuesList.add(value); 325 property.setValue(valuesList.toArray()); 326 } else { 327 log.error("Simple multi value property " + targetDocProperty 328 + " is neither a List nor an Array"); 329 } 330 } 331 } else { 332 value = (Serializable) resolveComplex(el, conf); 333 if (value != null && !conf.getMapping().isEmpty()) { 334 property.addValue(value); 335 } 336 } 337 338 if (log.isTraceEnabled()) { 339 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 340 conf.toString())); 341 } 342 } 343 } 344 345 protected Map<String, Object> getMVELContext(Element el) { 346 mvelCtx.put("currentDocument", docsStack.peek()); 347 mvelCtx.put("currentElement", el); 348 mvelCtx.put("Fn", new MVELImporterFunction(session, docsStack, elToDoc, el)); 349 return mvelCtx; 350 } 351 352 protected Object resolve(Element el, String xpr) { 353 if (xpr == null) { 354 return null; 355 } 356 357 if (xpr.startsWith("#{") && xpr.endsWith("}")) { // MVEL 358 xpr = xpr.substring(2, xpr.length() - 1); 359 return resolveMVEL(el, xpr); 360 } else if (xpr.contains("{{")) { // String containing XPaths 361 StringBuffer sb = new StringBuffer(); 362 int idx = xpr.indexOf("{{"); 363 while (idx >= 0) { 364 int idx2 = xpr.indexOf("}}", idx); 365 if (idx2 > 0) { 366 sb.append(xpr.substring(0, idx)); 367 String xpath = xpr.substring(idx + 2, idx2); 368 sb.append(resolveAndEvaluateXmlNode(el, xpath)); 369 xpr = xpr.substring(idx2); 370 } else { 371 sb.append(xpr); 372 xpr = ""; 373 } 374 idx = xpr.indexOf("{{"); 375 } 376 return sb.toString(); 377 } else { 378 return resolveXP(el, xpr); // default to pure XPATH 379 } 380 } 381 382 protected Object resolveMVEL(Element el, String xpr) { 383 Map<String, Object> ctx = new HashMap<>(getMVELContext(el)); 384 Serializable compiled = MVEL.compileExpression(xpr); 385 return MVEL.executeExpression(compiled, ctx); 386 } 387 388 protected Object resolveXP(Element el, String xpr) { 389 List<Object> nodes = el.selectNodes(xpr); 390 if (nodes.size() == 1) { 391 return nodes.get(0); 392 } else if (nodes.size() > 1) { 393 // Workaround for NXP-11834 394 if (xpr.endsWith("text()")) { 395 String value = ""; 396 for (Object node : nodes) { 397 if (!(node instanceof DefaultText)) { 398 String msg = "Text selector must return a string (expr:\"%s\") element %s"; 399 log.error(String.format(msg, xpr, el.getStringValue())); 400 return value; 401 } 402 value += ((DefaultText) node).getText(); 403 } 404 return new DefaultText(value); 405 } 406 return nodes; 407 } 408 return null; 409 } 410 411 protected String resolvePath(Element el, String xpr) { 412 Object ob = resolve(el, xpr); 413 if (ob == null) { 414 for (int i = 0; i < docsStack.size(); i++) { 415 if (docsStack.get(i).isFolder()) { 416 return docsStack.get(i).getPathAsString(); 417 } 418 } 419 } else { 420 if (ob instanceof DocumentModel) { 421 return ((DocumentModel) ob).getPathAsString(); 422 } else if (ob instanceof Node) { 423 if (ob instanceof Element) { 424 Element targetElement = (Element) ob; 425 DocumentModel target = elToDoc.get(targetElement); 426 if (target != null) { 427 return target.getPathAsString(); 428 } else { 429 return targetElement.getText(); 430 } 431 } else if (ob instanceof Attribute) { 432 return ((Attribute) ob).getValue(); 433 } else if (ob instanceof Text) { 434 return ((Text) ob).getText(); 435 } else if (ob.getClass().isAssignableFrom(Attribute.class)) { 436 return ((Attribute) ob).getValue(); 437 } 438 } else { 439 return ob.toString(); 440 } 441 } 442 return rootDoc.getPathAsString(); 443 } 444 445 protected String resolveName(Element el, String xpr) { 446 Object ob = resolveAndEvaluateXmlNode(el, xpr); 447 if (ob == null) { 448 return null; 449 } 450 return ob.toString(); 451 } 452 453 protected Object resolveAndEvaluateXmlNode(Element el, String xpr) { 454 Object ob = resolve(el, xpr); 455 if (ob == null) { 456 return null; 457 } 458 if (ob instanceof Node) { 459 return ((Node) ob).getText(); 460 } else { 461 return ob; 462 } 463 } 464 465 protected void createNewDocument(Element el, DocConfigDescriptor conf) { 466 DocumentModel doc = session.createDocumentModel(conf.getDocType()); 467 468 String path = resolvePath(el, conf.getParent()); 469 Object nameOb = resolveName(el, conf.getName()); 470 String name = null; 471 if (nameOb == null) { 472 if (log.isDebugEnabled()) { 473 log.debug(String.format(MSG_NO_ELEMENT_FOUND, conf.getName(), el.getUniquePath())); 474 } 475 int idx = 1; 476 for (int i = 0; i < docsStack.size(); i++) { 477 if (docsStack.get(i).getType().equals(conf.getDocType())) { 478 idx++; 479 } 480 } 481 name = conf.getDocType() + "-" + idx; 482 } else { 483 name = nameOb.toString(); 484 } 485 doc.setPathInfo(path, name); 486 487 if (log.isDebugEnabled()) { 488 if (conf.getUpdate()) { 489 log.debug(String.format(MSG_UPDATE, path, name, el.getUniquePath(), conf.toString())); 490 } else { 491 log.debug(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString())); 492 } 493 } 494 495 try { 496 if (conf.getUpdate() && session.exists(doc.getRef())) { 497 DocumentModel existingDoc = session.getDocument(doc.getRef()); 498 499 // get attributes, if attribute needs to be overwritten, empty in the document 500 for (Object e : el.elements()) { 501 List<AttributeConfigDescriptor> configs = getAttributConfigs((Element) e); 502 if (configs != null) { 503 if (!deletedAttributes.containsKey(existingDoc.getId())) { 504 deletedAttributes.put(existingDoc.getId(), new ArrayList<String>()); 505 } 506 for (AttributeConfigDescriptor config : configs) { 507 String targetDocProperty = config.getTargetDocProperty(); 508 // check deletedAttributes for attribute which should be overwritten 509 // if it is there, don't empty it a second time 510 if (config.overwrite 511 && !deletedAttributes.get(existingDoc.getId()).contains(targetDocProperty)) { 512 deletedAttributes.get(existingDoc.getId()).add(targetDocProperty); 513 existingDoc.setPropertyValue(targetDocProperty, new ArrayList<>()); 514 } 515 } 516 } 517 } 518 doc = existingDoc; 519 } else { 520 doc = session.createDocument(doc); 521 } 522 } catch (NuxeoException e) { 523 e.addInfo(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString())); 524 throw e; 525 } 526 pushInStack(doc); 527 elToDoc.put(el, doc); 528 } 529 530 protected void process(Element el) { 531 DocConfigDescriptor createConf = getDocCreationConfig(el); 532 if (createConf != null) { 533 createNewDocument(el, createConf); 534 } 535 List<AttributeConfigDescriptor> configs = getAttributConfigs(el); 536 if (configs != null) { 537 for (AttributeConfigDescriptor config : configs) { 538 processDocAttributes(docsStack.peek(), el, config); 539 } 540 541 DocumentModel doc = popStack(); 542 doc.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE); 543 if (!deferSave) { 544 doc = session.saveDocument(doc); 545 } 546 pushInStack(doc); 547 548 if (createConf != null) { 549 String chain = createConf.getAutomationChain(); 550 if (chain != null && !"".equals(chain.trim())) { 551 OperationContext ctx = new OperationContext(session, mvelCtx); 552 ctx.setInput(docsStack.peek()); 553 try { 554 getAutomationService().run(ctx, chain); 555 } catch (OperationException e) { 556 throw new NuxeoException(e); 557 } 558 } 559 } 560 } 561 for (Object e : el.elements()) { 562 process((Element) e); 563 } 564 } 565 566 private AutomationService getAutomationService() { 567 if (automationService == null) { 568 automationService = Framework.getLocalService(AutomationService.class); 569 } 570 return automationService; 571 572 } 573 574 private void pushInStack(DocumentModel doc) { 575 mvelCtx.put("changeableDocument", doc); 576 docsStack.push(doc); 577 } 578 579 private DocumentModel popStack() { 580 DocumentModel doc = docsStack.pop(); 581 mvelCtx.put("changeableDocument", doc); 582 return doc; 583 } 584 585}