1 /* (This is the new BSD license.)
  2 * Copyright (c) 2012-2014, Chris Culy
  3 * All rights reserved.
  4 *
  5 * Redistribution and use in source and binary forms, with or without
  6 * modification, are permitted provided that the following conditions are met:
  7 *     * Redistributions of source code must retain the above copyright
  8 *       notice, this list of conditions and the following disclaimer.
  9 *     * Redistributions in binary form must reproduce the above copyright
 10 *       notice, this list of conditions and the following disclaimer in the
 11 *       documentation and/or other materials provided with the distribution.
 12 *     * Neither the name of the Chris Culy nor the 
 13 *		names of its contributors may be used to endorse or promote 
 14 *		products from this software without specific prior written permission.
 15 *
 16 * THIS SOFTWARE IS PROVIDED BY Chris Culy
 17 * ``AS IS'' AND ANY OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
 18 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 19 * ARE DISCLAIMED. IN NO EVENT SHALL Chris Culy
 20 * BE LIABLE FOR ANY, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 21 * CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 
 22 * GOODS OR SERVICES; OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
 23 * CAUSED AND ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 
 24 * TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 
 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 */
 27 
 28 "use strict";
 29 var doubletree = doubletree || {};
 30 
 31 (function(){
 32     
 33     /**
 34      * @class doubletree.Trie
 35      * This is the class for the specialized trie that is the data for {@link doubletree.DoubleTree}.
 36  * The Trie will get pieces of data that contain fields. Some (possibly all) of those fields will be used for distinguishing among items.
 37  * For example, we might have an "author" field, but not use it when distinguishing among items for the Trie.
 38  * @param caseSensitive are the elements in the first distinguishing field compared in a case sensitive fashion
 39  * @param fldNames the names of the fields
 40  * @param fldDelim the field delimiter
 41  * @distinguishingFldsArray the distinguishing fields
 42  * @undistinguishedRoot true if the root should be calculated without using the distinguishing fields
 43  */
 44 doubletree.Trie = function(caseSensitive, fldNames, fldDelim, distinguishingFldsArray, undistinguishedRoot) {
 45     var endNG = " ";
 46     var rootName = "_root_";
 47     var noCase = (! caseSensitive) && true;
 48     if (! fldNames) {
 49         fldNames = ["item"];
 50     }
 51     var fieldNames = fldNames;
 52     if (! fieldDelim) {
 53         fieldDelim = "\t"; //default
 54     }
 55     var fieldDelim = fldDelim;
 56     if (! distinguishingFieldsArray) {
 57         distinguishingFieldsArray = [ fieldNames[0] ];
 58     }
 59     var distinguishingFieldsArray = distinguishingFldsArray;
 60     var undistinguishedRt = undistinguishedRoot;
 61     if (undefined == undistinguishedRt) {
 62         undistinguishedRt = true; //TBD: check to make sure this doesn't break anything
 63     }
 64     
 65     var trie = new TrieNode(rootName,-1,0);
 66     
 67     /** @private */
 68     function TrieNode(item, id, count) {
 69         this.id = id;
 70         this.count = count;        
 71         this.info = {"count":count, "ids":{}};
 72         
 73         if (item == null) {
 74             this.item = rootName;        
 75         } else {
 76             this.item = item;
 77             this.info.ids = {};
 78             this.info.ids[id] = true;
 79             var flds = item.split(fieldDelim);
 80             for(var i in flds) {
 81                 this.info[ fieldNames[i] ] = [ flds[i] ];
 82             }
 83         }
 84         this.nodes = {};
 85         
 86          /** @private */
 87         this.addNgram = function(itemArray, id, count) {
 88             if (! count) {
 89                 count = 1;
 90             }
 91             var thisItem, thisKey;
 92             if (itemArray.length > 0) {
 93                 thisItem = itemArray.shift();
 94                     
 95                 var theseFlds = thisItem.split(fieldDelim);
 96 
 97                 if (undistinguishedRt && this.item == rootName) {
 98                     thisKey = "";
 99                 } else {
100                     thisKey = theseFlds.filter(function(f,i) {
101                             return distinguishingFieldsArray.indexOf( fieldNames[i] ) > -1;
102                         })
103                     .map(function(f) {
104                             if (noCase) {
105                                 return f.toLocaleLowerCase();
106                             }
107                             return f;
108                         })
109                     .join(fieldDelim);
110                 }
111                 
112             } else {
113                 thisItem = endNG;
114                 thisKey = thisItem;
115             }
116             
117             var subTrie;
118             if (thisKey in this.nodes && this.nodes[thisKey] instanceof TrieNode) { //we need the instanceof TrieNode so we can override Object properties -- hope that none are already arrays
119                 subTrie = this.nodes[thisKey];
120                 subTrie.info.count += count;
121                 subTrie.info.ids[id] = true;
122                 
123                 for(var f in theseFlds) {
124                     var thisFld = theseFlds[f];
125                     if (subTrie.info[ fieldNames[f] ].indexOf( thisFld ) == -1 ){
126                         subTrie.info[ fieldNames[f] ].push(thisFld);
127                     }                
128                 }
129                 
130                 
131             } else {
132                 subTrie = new TrieNode(thisItem, id, count);            
133                 this.nodes[thisKey] = subTrie;
134             }
135             if (thisItem != endNG) {
136                 subTrie.addNgram(itemArray,id, count);
137             }
138         }
139             
140          /** @private */
141         this.getUniqRoot = function() {
142             if (this.item == rootName) {
143                 var children = Object.keys(this.nodes);
144                 if (children.length == 1) {
145                     return this.nodes[ children[0] ];
146                 }
147             }
148         
149             return this;
150         }
151         
152         /** @private */
153         this.toTree = function(filterFuns) {
154             
155             function toTreeHelper(filterFuns, descendentLevel, trieData) {
156                 
157                 var what = {"children":[]};
158                 what.name = trieData.item;
159                 what.info = {};
160                 for(var k in trieData.info) {
161                     if (typeof(trieData.info[k]) === 'Object') {
162                         what.info[k] = {};
163                         for(var k2 in trieData.info[k]) {
164                             what.info[k][k2] = this.info[k][k2];
165                         }
166                     } else {
167                         what.info[k] = trieData.info[k];
168                     }
169                 }
170                 what.pruned = {};
171                 
172                 
173                 for(var item in trieData.nodes) {
174                     var itemNode = trieData.nodes[item];
175                     var thisFilter = filterFuns[descendentLevel];            
176                     if ( ! thisFilter || (thisFilter && thisFilter(itemNode.info)) ) {
177                         what.children.push( toTreeHelper(filterFuns, descendentLevel +1, itemNode) );
178                         if (itemNode.pruned != {}) {
179                             addTo(what.pruned, itemNode.pruned);
180                         }
181                     } else {
182                         addTo(what.pruned, itemNode.info.ids);
183                     }
184                 }
185         
186                 what.info.continuations = what.children.length;
187                 //this is to record info we need for sizing the tree, since D3 automatically scales to fit, which is not what we want
188                 //we also need to keep track of the minimum count (the root always has the max, of course), for scaling
189                 if (what.children.length == 0) {
190                    what.children = null; //the trees expect null if there are no children, not the empty array. Odd, but true.
191                    what.maxChildren = 0;
192                    
193                    if (what.name) {
194                     what.maxLen = what.name.length;
195                    } else {
196                     what.maxLen = 0;
197                    }
198                    
199                    what.minCount = what.info.count;
200                    //what.maxChildren = 0; //new
201                    
202                 } else {
203                     var cMax = d3.max( what.children.map(function(c) {return c.maxChildren;}) );
204                     what.maxChildren = Math.max(what.children.length, cMax);
205                     
206                     var maxLen = d3.max( what.children.map(function(c) {return c.maxLen;}));
207                     what.maxLen = Math.max(maxLen, what.name.length);
208                     
209                     what.minCount = d3.min( what.children.map(function(c) {return c.minCount;})); //the children are always <= the parent
210                 }
211                 return what;
212             }
213             
214             if (! filterFuns ) {
215                 filterFuns = [];
216             }
217             
218             var trieData = JSON.parse(JSON.stringify(this)); //CuC make a copy of the data, to keep the real trie immutable
219         
220             return toTreeHelper(filterFuns, 0, trieData);
221         }
222     }
223     
224     
225     
226     
227     /**
228      * Add an ngram to the Trie
229      * @param itemArray an array of delimited items (the ngrams)
230      * @param id an id for this ngram 
231      * @param count a count for this ngram. Default is 1
232      */
233     this.addNgram = function(itemArray, id, count) {trie.addNgram(itemArray, id, count);};
234     
235     /**
236      * get the unique root of this Trie. Used only by {@link DoubleTree}
237      * @returns a new Trie with a unique item as the root
238      */
239     this.getUniqRoot = function() {
240         var what = new doubletree.Trie((!noCase), fieldNames, fieldDelim, distinguishingFieldsArray);
241         what.trie( trie.getUniqRoot() );
242         return what;
243     };
244     
245     /**
246      * convert the Trie to a tree structure for display. Used only by {@link DoubleTree}
247      * @param filterFuns the filtering functions to apply to the tree see {@link DoubleTree.filters}
248      * @param descendentLevel the current level we are filtering
249      * @returns the tree
250      */
251     this.toTree = function(filterFuns, descendentLevel) {return trie.toTree(filterFuns, descendentLevel);};
252     
253     /**
254      * serialize the Trie as a JSON string
255      * @returns the JSON string representation of the Trie
256      */
257     this.serialize = function() {
258         return JSON.stringify(this);
259     }
260     
261     /**
262      * make this Trie have the values of a previously serialized Trie see {@link #serialize}
263      */
264     this.deserialize = function(serialized) {
265         var obj = JSON.parse(serialized);
266         
267         endNG = obj.endNG();
268         rootName = obj.rootName();
269         noCase = obj.caseSensitive();
270         fieldNames = obj.fieldNames();
271         fieldDelim = obj.fieldDelim();
272         distinguishingFieldsArray = obj.distinguishingFieldsArray();
273         trie = obj.trie();
274         
275     }
276     
277     //getters -- the properties are readonly, set in constructor
278     
279     //private, only used in deserialization
280     /** @private */
281     this.endNG = function() {
282         return endNG;
283     }
284     //private, only used in deserialization
285     /** @private */
286     this.rootName = function() {
287         return rootName;
288     }
289     
290     //private, also a setter, only used in deserialization and getUniqRoot;
291     /** @private */
292     this.trie = function(value) {
293       if (arguments.length > 0) {
294         trie = value;
295       }
296       return trie;
297     }
298     
299     /**
300      * @returns whether this Trie uses case sensitive comparison
301      */
302     this.caseSensitive = function() {
303         return ! noCase;
304     }
305     
306     /**
307      * get the field names in the data
308      * @returns the field names in the data
309      */
310     this.fieldNames = function() {
311         return fieldNames;
312     }
313     
314     /**
315      * get the field delimiter for the data
316      * @returns the field delimiter for the data
317      */
318     this.fieldDelim = function() {
319         return fieldDelim;
320     }
321     
322     /**
323      * get the distinguishing fields for the data
324      * @returns the distinguishing fields for the data
325      */
326     this.distinguishingFieldsArray = function() {
327         return distinguishingFieldsArray;
328     }
329     
330     //add key/vals of o2 to o1 and return o1; (top level key-value only, o2 values maintained over o1)
331     /** @private */
332     function addTo(o1, o2) {
333         for(var k in o2) {
334             o1[k] = o2[k];
335         }
336     }
337 
338     
339 }  
340 
341 })();
342